#!/usr/bin/env python3 import subprocess import os from datetime import datetime, timedelta from collections import defaultdict NOTES_DIR = "/var/www/nextcloud/data/user/files/Notes" TEMPLATE_PATH = "/usr/local/bin/notes-template.html" OUTPUT_PATH = "/var/www/website/notes.html" def run_cmd(cmd): result = subprocess.run(cmd, shell=True, capture_output=True, text=True, cwd=NOTES_DIR) return result.stdout.strip() def count_pattern(pattern): cmd = f"grep -roh '{pattern}' --include='*.md' . | wc -l" return int(run_cmd(cmd) or 0) def get_file_list(): cmd = "find . -type f -name '*.md'" files = run_cmd(cmd).split('\n') return [f for f in files if f] def calculate_basic_stats(): stats = {} stats['total_notes'] = int(run_cmd("find . -type f -name '*.md' | wc -l")) stats['total_words'] = int(run_cmd("find . -type f -name '*.md' -exec wc -w {} + | tail -1 | awk '{print $1}'")) stats['total_lines'] = int(run_cmd("find . -type f -name '*.md' -exec wc -l {} + | tail -1 | awk '{print $1}'")) du_output = run_cmd("du -sh .") stats['disk_usage'] = du_output.split()[0] stats['avg_words'] = stats['total_words'] // stats['total_notes'] stats['avg_lines'] = stats['total_lines'] // stats['total_notes'] stats['total_vaults'] = int(run_cmd("find . -maxdepth 1 -mindepth 1 -type d ! -name '.*' | wc -l")) return stats def calculate_content_stats(): stats = {} stats['internal_links'] = count_pattern('\\[\\[[^]]*\\]\\]') stats['external_urls'] = count_pattern('https\\?://[^[:space:]]\\+') stats['images'] = count_pattern('!\\[\\[[^]]*\\]\\]') + count_pattern('!\\[[^]]*\\]([^)]*)') stats['code_blocks'] = count_pattern('```') // 2 stats['math_expr'] = count_pattern('\\$\\$[^$]*\\$\\$') + count_pattern('\\$[^$]*\\$') return stats def calculate_markdown_stats(): stats = {} stats['h1'] = count_pattern('^# ') stats['h2'] = count_pattern('^## ') stats['h3'] = count_pattern('^### ') stats['h4'] = count_pattern('^#### ') stats['lists'] = count_pattern('^[[:space:]]*[-*] ') + count_pattern('^[[:space:]]*[0-9]\\+\\. ') stats['blockquotes'] = count_pattern('^> ') stats['tables'] = count_pattern('^|.*|$') // 3 # Estimate: header, separator, data stats['hr'] = count_pattern('^---$') + count_pattern('^\\*\\*\\*$') return stats def calculate_task_stats(): stats = {} total = count_pattern('\\- \\[[ x]\\]') stats['total_tasks'] = total completed = count_pattern('\\- \\[x\\]') stats['tasks_completed'] = completed stats['tasks_unchecked'] = total - completed if total > 0: stats['task_completion'] = (completed * 100) // total else: stats['task_completion'] = 0 stats['task_completion_angle'] = (stats['task_completion'] * 360) // 100 return stats def calculate_temporal_stats(): stats = {} cmd = "find . -type f -name '*.md' -printf '%T@\\n' | sort -n | tail -1" last_edit_ts = run_cmd(cmd) if last_edit_ts: last_edit = datetime.fromtimestamp(float(last_edit_ts)) stats['days_since_last_edit'] = (datetime.now() - last_edit).days else: stats['days_since_last_edit'] = 0 monthly_counts = [] now = datetime.now() for i in range(5, -1, -1): target_month = now - timedelta(days=30*i) month_start = target_month.replace(day=1, hour=0, minute=0, second=0, microsecond=0) if month_start.month == 12: month_end = month_start.replace(year=month_start.year+1, month=1) else: month_end = month_start.replace(month=month_start.month+1) start_ts = int(month_start.timestamp()) end_ts = int(month_end.timestamp()) cmd = f"find . -type f -name '*.md' -newermt '@{start_ts}' ! -newermt '@{end_ts}' | wc -l" count = int(run_cmd(cmd) or 0) monthly_counts.append({'month': month_start.strftime('%b %Y'), 'count': count}) stats['monthly_activity'] = monthly_counts dow_counts = defaultdict(int) files = get_file_list() for file in files: if not file: continue cmd = f"stat -c %Y '{file}'" ts = run_cmd(cmd) if ts: dt = datetime.fromtimestamp(int(ts)) dow = dt.strftime('%a') dow_counts[dow] += 1 days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] stats['day_of_week'] = [{'day': day, 'count': dow_counts.get(day, 0)} for day in days] return stats def calculate_length_distribution(): files = get_file_list() buckets = [0, 0, 0, 0, 0, 0] # 0-100, 100-500, 500-1k, 1k-2k, 2k-5k, 5k+ for file in files: if not file: continue cmd = f"wc -w '{file}' | awk '{{print $1}}'" words = int(run_cmd(cmd) or 0) if words < 100: buckets[0] += 1 elif words < 500: buckets[1] += 1 elif words < 1000: buckets[2] += 1 elif words < 2000: buckets[3] += 1 elif words < 5000: buckets[4] += 1 else: buckets[5] += 1 return buckets def generate_html(): with open(TEMPLATE_PATH, 'r') as f: html = f.read() basic = calculate_basic_stats() if basic is None: print("No markdown files found") return content = calculate_content_stats() markdown = calculate_markdown_stats() tasks = calculate_task_stats() temporal = calculate_temporal_stats() length_dist = calculate_length_distribution() # Basic stats html = html.replace('{{TOTAL_NOTES}}', str(basic['total_notes'])) html = html.replace('{{TOTAL_WORDS}}', f"{basic['total_words']:,}") html = html.replace('{{TOTAL_LINES}}', f"{basic['total_lines']:,}") html = html.replace('{{DISK_USAGE}}', basic['disk_usage']) html = html.replace('{{AVG_WORDS}}', str(basic['avg_words'])) html = html.replace('{{AVG_LINES}}', str(basic['avg_lines'])) html = html.replace('{{TOTAL_VAULTS}}', str(basic['total_vaults'])) # Task stats with ASCII progress bar html = html.replace('{{TOTAL_TASKS}}', str(tasks['total_tasks'])) html = html.replace('{{TASKS_COMPLETED}}', str(tasks['tasks_completed'])) html = html.replace('{{TASKS_UNCHECKED}}', str(tasks['tasks_unchecked'])) html = html.replace('{{TASK_COMPLETION}}', str(tasks['task_completion'])) # ASCII progress bar (20 chars) filled = (tasks['task_completion'] * 20) // 100 progress_bar = '=' * filled + '-' * (20 - filled) html = html.replace('{{TASK_PROGRESS_BAR}}', progress_bar) # Content stats html = html.replace('{{INTERNAL_LINKS}}', str(content['internal_links'])) html = html.replace('{{EXTERNAL_URLS}}', str(content['external_urls'])) html = html.replace('{{IMAGES}}', str(content['images'])) html = html.replace('{{CODE_BLOCKS}}', str(content['code_blocks'])) html = html.replace('{{MATH_EXPR}}', str(content['math_expr'])) # Markdown stats html = html.replace('{{H1_COUNT}}', str(markdown['h1'])) html = html.replace('{{H2_COUNT}}', str(markdown['h2'])) html = html.replace('{{H3_COUNT}}', str(markdown['h3'])) html = html.replace('{{H4_COUNT}}', str(markdown['h4'])) html = html.replace('{{LISTS}}', str(markdown['lists'])) html = html.replace('{{BLOCKQUOTES}}', str(markdown['blockquotes'])) html = html.replace('{{TABLES}}', str(markdown['tables'])) html = html.replace('{{HR_COUNT}}', str(markdown['hr'])) # Temporal stats html = html.replace('{{DAYS_SINCE_LAST_EDIT}}', str(temporal['days_since_last_edit'])) # Monthly activity calendar grid monthly_html = "" for month_data in temporal['monthly_activity']: monthly_html += f'''