From f11dbc9d3f9de9da6a7ef20395128f7b873ce744 Mon Sep 17 00:00:00 2001 From: n0tori <188390306+n0tori@users.noreply.github.com> Date: Sat, 10 Jan 2026 23:04:07 +0000 Subject: primitive and working version of code --- notes-stats.py | 262 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 262 insertions(+) create mode 100644 notes-stats.py (limited to 'notes-stats.py') diff --git a/notes-stats.py b/notes-stats.py new file mode 100644 index 0000000..275c311 --- /dev/null +++ b/notes-stats.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python3 +import subprocess +import os +from datetime import datetime, timedelta +from collections import defaultdict + +NOTES_DIR = "/var/www/nextcloud/data/user/files/Notes" +TEMPLATE_PATH = "/usr/local/bin/notes-template.html" +OUTPUT_PATH = "/var/www/website/notes.html" + +def run_cmd(cmd): + result = subprocess.run(cmd, shell=True, capture_output=True, text=True, cwd=NOTES_DIR) + return result.stdout.strip() + +def count_pattern(pattern): + cmd = f"grep -roh '{pattern}' --include='*.md' . | wc -l" + return int(run_cmd(cmd) or 0) + +def get_file_list(): + cmd = "find . -type f -name '*.md'" + files = run_cmd(cmd).split('\n') + return [f for f in files if f] + +def calculate_basic_stats(): + stats = {} + + stats['total_notes'] = int(run_cmd("find . -type f -name '*.md' | wc -l")) + stats['total_words'] = int(run_cmd("find . -type f -name '*.md' -exec wc -w {} + | tail -1 | awk '{print $1}'")) + stats['total_lines'] = int(run_cmd("find . -type f -name '*.md' -exec wc -l {} + | tail -1 | awk '{print $1}'")) + du_output = run_cmd("du -sh .") + stats['disk_usage'] = du_output.split()[0] + stats['avg_words'] = stats['total_words'] // stats['total_notes'] + stats['avg_lines'] = stats['total_lines'] // stats['total_notes'] + stats['total_vaults'] = int(run_cmd("find . -maxdepth 1 -mindepth 1 -type d ! -name '.*' | wc -l")) + return stats + +def calculate_content_stats(): + stats = {} + stats['internal_links'] = count_pattern('\\[\\[[^]]*\\]\\]') + stats['external_urls'] = count_pattern('https\\?://[^[:space:]]\\+') + stats['images'] = count_pattern('!\\[\\[[^]]*\\]\\]') + count_pattern('!\\[[^]]*\\]([^)]*)') + stats['code_blocks'] = count_pattern('```') // 2 + stats['math_expr'] = count_pattern('\\$\\$[^$]*\\$\\$') + count_pattern('\\$[^$]*\\$') + return stats + +def calculate_markdown_stats(): + stats = {} + stats['h1'] = count_pattern('^# ') + stats['h2'] = count_pattern('^## ') + stats['h3'] = count_pattern('^### ') + stats['h4'] = count_pattern('^#### ') + stats['lists'] = count_pattern('^[[:space:]]*[-*] ') + count_pattern('^[[:space:]]*[0-9]\\+\\. ') + stats['blockquotes'] = count_pattern('^> ') + stats['tables'] = count_pattern('^|.*|$') // 3 # Estimate: header, separator, data + stats['hr'] = count_pattern('^---$') + count_pattern('^\\*\\*\\*$') + return stats + +def calculate_task_stats(): + stats = {} + total = count_pattern('\\- \\[[ x]\\]') + stats['total_tasks'] = total + completed = count_pattern('\\- \\[x\\]') + stats['tasks_completed'] = completed + stats['tasks_unchecked'] = total - completed + if total > 0: + stats['task_completion'] = (completed * 100) // total + else: + stats['task_completion'] = 0 + + stats['task_completion_angle'] = (stats['task_completion'] * 360) // 100 + + return stats + +def calculate_temporal_stats(): + stats = {} + cmd = "find . -type f -name '*.md' -printf '%T@\\n' | sort -n | tail -1" + last_edit_ts = run_cmd(cmd) + if last_edit_ts: + last_edit = datetime.fromtimestamp(float(last_edit_ts)) + stats['days_since_last_edit'] = (datetime.now() - last_edit).days + else: + stats['days_since_last_edit'] = 0 + + monthly_counts = [] + now = datetime.now() + + for i in range(5, -1, -1): + target_month = now - timedelta(days=30*i) + month_start = target_month.replace(day=1, hour=0, minute=0, second=0, microsecond=0) + if month_start.month == 12: + month_end = month_start.replace(year=month_start.year+1, month=1) + else: + month_end = month_start.replace(month=month_start.month+1) + + start_ts = int(month_start.timestamp()) + end_ts = int(month_end.timestamp()) + + cmd = f"find . -type f -name '*.md' -newermt '@{start_ts}' ! -newermt '@{end_ts}' | wc -l" + count = int(run_cmd(cmd) or 0) + + monthly_counts.append({'month': month_start.strftime('%b %Y'), 'count': count}) + + stats['monthly_activity'] = monthly_counts + + dow_counts = defaultdict(int) + files = get_file_list() + + for file in files: + if not file: + continue + cmd = f"stat -c %Y '{file}'" + ts = run_cmd(cmd) + if ts: + dt = datetime.fromtimestamp(int(ts)) + dow = dt.strftime('%a') + dow_counts[dow] += 1 + + days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + stats['day_of_week'] = [{'day': day, 'count': dow_counts.get(day, 0)} for day in days] + + return stats + +def calculate_length_distribution(): + files = get_file_list() + buckets = [0, 0, 0, 0, 0, 0] # 0-100, 100-500, 500-1k, 1k-2k, 2k-5k, 5k+ + + for file in files: + if not file: + continue + cmd = f"wc -w '{file}' | awk '{{print $1}}'" + words = int(run_cmd(cmd) or 0) + + if words < 100: + buckets[0] += 1 + elif words < 500: + buckets[1] += 1 + elif words < 1000: + buckets[2] += 1 + elif words < 2000: + buckets[3] += 1 + elif words < 5000: + buckets[4] += 1 + else: + buckets[5] += 1 + + return buckets + +def generate_html(): + with open(TEMPLATE_PATH, 'r') as f: + html = f.read() + + basic = calculate_basic_stats() + if basic is None: + print("No markdown files found") + return + + content = calculate_content_stats() + markdown = calculate_markdown_stats() + tasks = calculate_task_stats() + temporal = calculate_temporal_stats() + length_dist = calculate_length_distribution() + + # Basic stats + html = html.replace('{{TOTAL_NOTES}}', str(basic['total_notes'])) + html = html.replace('{{TOTAL_WORDS}}', f"{basic['total_words']:,}") + html = html.replace('{{TOTAL_LINES}}', f"{basic['total_lines']:,}") + html = html.replace('{{DISK_USAGE}}', basic['disk_usage']) + html = html.replace('{{AVG_WORDS}}', str(basic['avg_words'])) + html = html.replace('{{AVG_LINES}}', str(basic['avg_lines'])) + html = html.replace('{{TOTAL_VAULTS}}', str(basic['total_vaults'])) + + # Task stats with ASCII progress bar + html = html.replace('{{TOTAL_TASKS}}', str(tasks['total_tasks'])) + html = html.replace('{{TASKS_COMPLETED}}', str(tasks['tasks_completed'])) + html = html.replace('{{TASKS_UNCHECKED}}', str(tasks['tasks_unchecked'])) + html = html.replace('{{TASK_COMPLETION}}', str(tasks['task_completion'])) + + # ASCII progress bar (20 chars) + filled = (tasks['task_completion'] * 20) // 100 + progress_bar = '=' * filled + '-' * (20 - filled) + html = html.replace('{{TASK_PROGRESS_BAR}}', progress_bar) + + # Content stats + html = html.replace('{{INTERNAL_LINKS}}', str(content['internal_links'])) + html = html.replace('{{EXTERNAL_URLS}}', str(content['external_urls'])) + html = html.replace('{{IMAGES}}', str(content['images'])) + html = html.replace('{{CODE_BLOCKS}}', str(content['code_blocks'])) + html = html.replace('{{MATH_EXPR}}', str(content['math_expr'])) + + # Markdown stats + html = html.replace('{{H1_COUNT}}', str(markdown['h1'])) + html = html.replace('{{H2_COUNT}}', str(markdown['h2'])) + html = html.replace('{{H3_COUNT}}', str(markdown['h3'])) + html = html.replace('{{H4_COUNT}}', str(markdown['h4'])) + html = html.replace('{{LISTS}}', str(markdown['lists'])) + html = html.replace('{{BLOCKQUOTES}}', str(markdown['blockquotes'])) + html = html.replace('{{TABLES}}', str(markdown['tables'])) + html = html.replace('{{HR_COUNT}}', str(markdown['hr'])) + + # Temporal stats + html = html.replace('{{DAYS_SINCE_LAST_EDIT}}', str(temporal['days_since_last_edit'])) + + # Monthly activity calendar grid + monthly_html = "" + for month_data in temporal['monthly_activity']: + monthly_html += f'''
+
{month_data['month']}
+
{month_data['count']}
+
\n''' + html = html.replace('{{MONTHLY_ACTIVITY}}', monthly_html) + + # Day of week ASCII bars + dow_max = max([d['count'] for d in temporal['day_of_week']]) or 1 + most_active_day = max(temporal['day_of_week'], key=lambda x: x['count']) + + dow_bars = "" + for dow_data in temporal['day_of_week']: + bar_len = (dow_data['count'] * 30) // dow_max if dow_max > 0 else 0 + bar = '█' * bar_len + dow_bars += f"{dow_data['day']}: {bar} {dow_data['count']}\n" + + html = html.replace('{{DAY_OF_WEEK_BARS}}', dow_bars.strip()) + html = html.replace('{{MOST_ACTIVE_DAY}}', most_active_day['day']) + html = html.replace('{{MOST_ACTIVE_DAY_COUNT}}', str(most_active_day['count'])) + + # Length distribution with metrics + ranges = ['0-100', '100-500', '500-1k', '1k-2k', '2k-5k', '5k+'] + length_html = "" + for i, count in enumerate(length_dist): + length_html += f'
  • {ranges[i]} words {count} notes
  • \n' + html = html.replace('{{LENGTH_DISTRIBUTION}}', length_html) + + max_idx = length_dist.index(max(length_dist)) + html = html.replace('{{MOST_COMMON_BRACKET}}', ranges[max_idx]) + html = html.replace('{{MOST_COMMON_COUNT}}', str(length_dist[max_idx])) + + # Find longest and shortest with actual notes + longest_idx = next((i for i in range(5, -1, -1) if length_dist[i] > 0), 0) + shortest_idx = next((i for i in range(6) if length_dist[i] > 0), 0) + html = html.replace('{{LONGEST_BRACKET}}', ranges[longest_idx]) + html = html.replace('{{LONGEST_COUNT}}', str(length_dist[longest_idx])) + html = html.replace('{{SHORTEST_BRACKET}}', ranges[shortest_idx]) + html = html.replace('{{SHORTEST_COUNT}}', str(length_dist[shortest_idx])) + + # Footer metadata + html = html.replace('{{LAST_UPDATED}}', datetime.now().strftime('%d/%m/%Y')) + + with open(OUTPUT_PATH, 'w') as f: + f.write(html) + + file_size = run_cmd(f"ls -lh '{OUTPUT_PATH}' | awk '{{print $5}}'") + + with open(OUTPUT_PATH, 'r') as f: + html = f.read() + html = html.replace('{{FILE_SIZE}}', file_size) + with open(OUTPUT_PATH, 'w') as f: + f.write(html) + + print(f"Generated {OUTPUT_PATH}") + +if __name__ == "__main__": + generate_html() -- cgit v1.2.3