diff options
| author | n0tori <188390306+n0tori@users.noreply.github.com> | 2026-01-10 23:04:07 +0000 |
|---|---|---|
| committer | n0tori <188390306+n0tori@users.noreply.github.com> | 2026-01-10 23:04:07 +0000 |
| commit | f11dbc9d3f9de9da6a7ef20395128f7b873ce744 (patch) | |
| tree | 18134d87ab5774fe1a5968dd35115929201df807 | |
| parent | 7197c2972ccd2566073173b27d152b6ee0f4ec00 (diff) | |
primitive and working version of code
| -rw-r--r-- | notes-stats.py | 262 | ||||
| -rw-r--r-- | notes-template.html | 250 |
2 files changed, 512 insertions, 0 deletions
diff --git a/notes-stats.py b/notes-stats.py new file mode 100644 index 0000000..275c311 --- /dev/null +++ b/notes-stats.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python3 +import subprocess +import os +from datetime import datetime, timedelta +from collections import defaultdict + +NOTES_DIR = "/var/www/nextcloud/data/user/files/Notes" +TEMPLATE_PATH = "/usr/local/bin/notes-template.html" +OUTPUT_PATH = "/var/www/website/notes.html" + +def run_cmd(cmd): + result = subprocess.run(cmd, shell=True, capture_output=True, text=True, cwd=NOTES_DIR) + return result.stdout.strip() + +def count_pattern(pattern): + cmd = f"grep -roh '{pattern}' --include='*.md' . | wc -l" + return int(run_cmd(cmd) or 0) + +def get_file_list(): + cmd = "find . -type f -name '*.md'" + files = run_cmd(cmd).split('\n') + return [f for f in files if f] + +def calculate_basic_stats(): + stats = {} + + stats['total_notes'] = int(run_cmd("find . -type f -name '*.md' | wc -l")) + stats['total_words'] = int(run_cmd("find . -type f -name '*.md' -exec wc -w {} + | tail -1 | awk '{print $1}'")) + stats['total_lines'] = int(run_cmd("find . -type f -name '*.md' -exec wc -l {} + | tail -1 | awk '{print $1}'")) + du_output = run_cmd("du -sh .") + stats['disk_usage'] = du_output.split()[0] + stats['avg_words'] = stats['total_words'] // stats['total_notes'] + stats['avg_lines'] = stats['total_lines'] // stats['total_notes'] + stats['total_vaults'] = int(run_cmd("find . -maxdepth 1 -mindepth 1 -type d ! -name '.*' | wc -l")) + return stats + +def calculate_content_stats(): + stats = {} + stats['internal_links'] = count_pattern('\\[\\[[^]]*\\]\\]') + stats['external_urls'] = count_pattern('https\\?://[^[:space:]]\\+') + stats['images'] = count_pattern('!\\[\\[[^]]*\\]\\]') + count_pattern('!\\[[^]]*\\]([^)]*)') + stats['code_blocks'] = count_pattern('```') // 2 + stats['math_expr'] = count_pattern('\\$\\$[^$]*\\$\\$') + count_pattern('\\$[^$]*\\$') + return stats + +def calculate_markdown_stats(): + stats = {} + stats['h1'] = count_pattern('^# ') + stats['h2'] = count_pattern('^## ') + stats['h3'] = count_pattern('^### ') + stats['h4'] = count_pattern('^#### ') + stats['lists'] = count_pattern('^[[:space:]]*[-*] ') + count_pattern('^[[:space:]]*[0-9]\\+\\. ') + stats['blockquotes'] = count_pattern('^> ') + stats['tables'] = count_pattern('^|.*|$') // 3 # Estimate: header, separator, data + stats['hr'] = count_pattern('^---$') + count_pattern('^\\*\\*\\*$') + return stats + +def calculate_task_stats(): + stats = {} + total = count_pattern('\\- \\[[ x]\\]') + stats['total_tasks'] = total + completed = count_pattern('\\- \\[x\\]') + stats['tasks_completed'] = completed + stats['tasks_unchecked'] = total - completed + if total > 0: + stats['task_completion'] = (completed * 100) // total + else: + stats['task_completion'] = 0 + + stats['task_completion_angle'] = (stats['task_completion'] * 360) // 100 + + return stats + +def calculate_temporal_stats(): + stats = {} + cmd = "find . -type f -name '*.md' -printf '%T@\\n' | sort -n | tail -1" + last_edit_ts = run_cmd(cmd) + if last_edit_ts: + last_edit = datetime.fromtimestamp(float(last_edit_ts)) + stats['days_since_last_edit'] = (datetime.now() - last_edit).days + else: + stats['days_since_last_edit'] = 0 + + monthly_counts = [] + now = datetime.now() + + for i in range(5, -1, -1): + target_month = now - timedelta(days=30*i) + month_start = target_month.replace(day=1, hour=0, minute=0, second=0, microsecond=0) + if month_start.month == 12: + month_end = month_start.replace(year=month_start.year+1, month=1) + else: + month_end = month_start.replace(month=month_start.month+1) + + start_ts = int(month_start.timestamp()) + end_ts = int(month_end.timestamp()) + + cmd = f"find . -type f -name '*.md' -newermt '@{start_ts}' ! -newermt '@{end_ts}' | wc -l" + count = int(run_cmd(cmd) or 0) + + monthly_counts.append({'month': month_start.strftime('%b %Y'), 'count': count}) + + stats['monthly_activity'] = monthly_counts + + dow_counts = defaultdict(int) + files = get_file_list() + + for file in files: + if not file: + continue + cmd = f"stat -c %Y '{file}'" + ts = run_cmd(cmd) + if ts: + dt = datetime.fromtimestamp(int(ts)) + dow = dt.strftime('%a') + dow_counts[dow] += 1 + + days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + stats['day_of_week'] = [{'day': day, 'count': dow_counts.get(day, 0)} for day in days] + + return stats + +def calculate_length_distribution(): + files = get_file_list() + buckets = [0, 0, 0, 0, 0, 0] # 0-100, 100-500, 500-1k, 1k-2k, 2k-5k, 5k+ + + for file in files: + if not file: + continue + cmd = f"wc -w '{file}' | awk '{{print $1}}'" + words = int(run_cmd(cmd) or 0) + + if words < 100: + buckets[0] += 1 + elif words < 500: + buckets[1] += 1 + elif words < 1000: + buckets[2] += 1 + elif words < 2000: + buckets[3] += 1 + elif words < 5000: + buckets[4] += 1 + else: + buckets[5] += 1 + + return buckets + +def generate_html(): + with open(TEMPLATE_PATH, 'r') as f: + html = f.read() + + basic = calculate_basic_stats() + if basic is None: + print("No markdown files found") + return + + content = calculate_content_stats() + markdown = calculate_markdown_stats() + tasks = calculate_task_stats() + temporal = calculate_temporal_stats() + length_dist = calculate_length_distribution() + + # Basic stats + html = html.replace('{{TOTAL_NOTES}}', str(basic['total_notes'])) + html = html.replace('{{TOTAL_WORDS}}', f"{basic['total_words']:,}") + html = html.replace('{{TOTAL_LINES}}', f"{basic['total_lines']:,}") + html = html.replace('{{DISK_USAGE}}', basic['disk_usage']) + html = html.replace('{{AVG_WORDS}}', str(basic['avg_words'])) + html = html.replace('{{AVG_LINES}}', str(basic['avg_lines'])) + html = html.replace('{{TOTAL_VAULTS}}', str(basic['total_vaults'])) + + # Task stats with ASCII progress bar + html = html.replace('{{TOTAL_TASKS}}', str(tasks['total_tasks'])) + html = html.replace('{{TASKS_COMPLETED}}', str(tasks['tasks_completed'])) + html = html.replace('{{TASKS_UNCHECKED}}', str(tasks['tasks_unchecked'])) + html = html.replace('{{TASK_COMPLETION}}', str(tasks['task_completion'])) + + # ASCII progress bar (20 chars) + filled = (tasks['task_completion'] * 20) // 100 + progress_bar = '=' * filled + '-' * (20 - filled) + html = html.replace('{{TASK_PROGRESS_BAR}}', progress_bar) + + # Content stats + html = html.replace('{{INTERNAL_LINKS}}', str(content['internal_links'])) + html = html.replace('{{EXTERNAL_URLS}}', str(content['external_urls'])) + html = html.replace('{{IMAGES}}', str(content['images'])) + html = html.replace('{{CODE_BLOCKS}}', str(content['code_blocks'])) + html = html.replace('{{MATH_EXPR}}', str(content['math_expr'])) + + # Markdown stats + html = html.replace('{{H1_COUNT}}', str(markdown['h1'])) + html = html.replace('{{H2_COUNT}}', str(markdown['h2'])) + html = html.replace('{{H3_COUNT}}', str(markdown['h3'])) + html = html.replace('{{H4_COUNT}}', str(markdown['h4'])) + html = html.replace('{{LISTS}}', str(markdown['lists'])) + html = html.replace('{{BLOCKQUOTES}}', str(markdown['blockquotes'])) + html = html.replace('{{TABLES}}', str(markdown['tables'])) + html = html.replace('{{HR_COUNT}}', str(markdown['hr'])) + + # Temporal stats + html = html.replace('{{DAYS_SINCE_LAST_EDIT}}', str(temporal['days_since_last_edit'])) + + # Monthly activity calendar grid + monthly_html = "" + for month_data in temporal['monthly_activity']: + monthly_html += f''' <div class="calendar-month"> + <div class="month-label">{month_data['month']}</div> + <div class="month-value">{month_data['count']}</div> + </div>\n''' + html = html.replace('{{MONTHLY_ACTIVITY}}', monthly_html) + + # Day of week ASCII bars + dow_max = max([d['count'] for d in temporal['day_of_week']]) or 1 + most_active_day = max(temporal['day_of_week'], key=lambda x: x['count']) + + dow_bars = "" + for dow_data in temporal['day_of_week']: + bar_len = (dow_data['count'] * 30) // dow_max if dow_max > 0 else 0 + bar = '█' * bar_len + dow_bars += f"{dow_data['day']}: {bar} {dow_data['count']}\n" + + html = html.replace('{{DAY_OF_WEEK_BARS}}', dow_bars.strip()) + html = html.replace('{{MOST_ACTIVE_DAY}}', most_active_day['day']) + html = html.replace('{{MOST_ACTIVE_DAY_COUNT}}', str(most_active_day['count'])) + + # Length distribution with metrics + ranges = ['0-100', '100-500', '500-1k', '1k-2k', '2k-5k', '5k+'] + length_html = "" + for i, count in enumerate(length_dist): + length_html += f' <li><span class="label">{ranges[i]} words</span> <span class="value">{count} notes</span></li>\n' + html = html.replace('{{LENGTH_DISTRIBUTION}}', length_html) + + max_idx = length_dist.index(max(length_dist)) + html = html.replace('{{MOST_COMMON_BRACKET}}', ranges[max_idx]) + html = html.replace('{{MOST_COMMON_COUNT}}', str(length_dist[max_idx])) + + # Find longest and shortest with actual notes + longest_idx = next((i for i in range(5, -1, -1) if length_dist[i] > 0), 0) + shortest_idx = next((i for i in range(6) if length_dist[i] > 0), 0) + html = html.replace('{{LONGEST_BRACKET}}', ranges[longest_idx]) + html = html.replace('{{LONGEST_COUNT}}', str(length_dist[longest_idx])) + html = html.replace('{{SHORTEST_BRACKET}}', ranges[shortest_idx]) + html = html.replace('{{SHORTEST_COUNT}}', str(length_dist[shortest_idx])) + + # Footer metadata + html = html.replace('{{LAST_UPDATED}}', datetime.now().strftime('%d/%m/%Y')) + + with open(OUTPUT_PATH, 'w') as f: + f.write(html) + + file_size = run_cmd(f"ls -lh '{OUTPUT_PATH}' | awk '{{print $5}}'") + + with open(OUTPUT_PATH, 'r') as f: + html = f.read() + html = html.replace('{{FILE_SIZE}}', file_size) + with open(OUTPUT_PATH, 'w') as f: + f.write(html) + + print(f"Generated {OUTPUT_PATH}") + +if __name__ == "__main__": + generate_html() diff --git a/notes-template.html b/notes-template.html new file mode 100644 index 0000000..e390c5e --- /dev/null +++ b/notes-template.html @@ -0,0 +1,250 @@ +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="UTF-8"> + <title>notes</title> + <link rel="icon" type="image/x-icon" href="favicon.ico"> + <link rel="stylesheet" href="base.css"> + <style> + .big-stat { + text-align: center; + margin: 30px 0; + padding: 20px; + background-color: #ddd9c8; + border: 3px double #333; + } + + .big-stat-value { + font-size: 4em; + font-weight: bold; + color: #2a2a2a; + line-height: 1; + } + + .big-stat-label { + font-size: 1.2em; + color: #555; + margin-top: 10px; + } + + .inline-stats { + margin: 20px 0; + line-height: 2; + } + + .inline-stats p { + margin: 5px 0; + } + + .stat-highlight { + background-color: #ddd9c8; + padding: 2px 8px; + font-weight: bold; + color: #2a2a2a; + } + + .ascii-bar { + font-family: monospace; + margin: 8px 0; + white-space: pre; + } + + .task-progress { + margin: 20px 0; + font-family: monospace; + font-size: 1.1em; + } + + .progress-bar-ascii { + display: inline-block; + background-color: #ddd9c8; + padding: 5px 10px; + border: 2px solid #333; + } + + .simple-list { + list-style: none; + padding: 0; + margin: 15px 0; + } + + .simple-list li { + padding: 8px 0; + border-bottom: 1px dashed #999; + } + + .simple-list li:last-child { + border-bottom: none; + } + + .simple-list .label { + color: #555; + display: inline-block; + min-width: 200px; + } + + .simple-list .value { + font-weight: bold; + color: #2a2a2a; + } + + .calendar-grid { + display: grid; + grid-template-columns: repeat(6, 1fr); + gap: 5px; + margin: 20px 0; + font-family: monospace; + } + + .calendar-month { + padding: 10px; + background-color: #ddd9c8; + text-align: center; + border: 1px solid #999; + } + + .month-label { + font-size: 0.8em; + color: #555; + } + + .month-value { + font-size: 1.2em; + font-weight: bold; + color: #2a2a2a; + } + + .fun-fact { + background-color: #ddd9c8; + padding: 15px 20px; + margin: 20px 0; + border-left: 4px solid #d87a16; + } + + .two-col { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 20px; + margin: 20px 0; + } + + .box { + background-color: #ddd9c8; + padding: 20px; + border: 2px dashed #333; + } + + section { + margin: 40px 0; + } + </style> +</head> +<body> + <div class="container"> + <nav> + <ul> + <li><a href="/">~/</a></li> + <li>|</li> + <li><a href="writings">~/writings</a></li> + <li>|</li> + <li><a href="projects">~/projects</a></li> + <li>|</li> + <li><a href="notes" class="active">~/notes</a></li> + <li>|</li> + <li><a href="contact">~/contact</a></li> + <li style="float: right">Made in <span class="flag-emoji">🇬🇧</span></li> + </ul> + </nav> + + <main> + <h1>Notes Statistics</h1> + <p>Aggregated stats are calculated using a Python script on my Obsidian markdown notes, which updates this html file every week using a cronjob.</p> + <hr> + <div class="big-stat"> + <div class="big-stat-value">{{TOTAL_NOTES}}</div> + <div class="big-stat-label">notes across {{TOTAL_VAULTS}} vaults</div> + </div> + + <div class="inline-stats"> + <p>> That's <span class="stat-highlight">{{TOTAL_WORDS}}</span> total words, averaging <span class="stat-highlight">{{AVG_WORDS}}</span> words per note.</p> + <p>> <span class="stat-highlight">{{TOTAL_LINES}}</span> lines of markdown taking up <span class="stat-highlight">{{DISK_USAGE}}</span> on disk.</p> + <p>> Last edited <span class="stat-highlight">{{DAYS_SINCE_LAST_EDIT}}</span> days ago.</p> + </div> + + <section> + <h2>Task Completion</h2> + <div class="task-progress"> + [{{TASK_PROGRESS_BAR}}] {{TASK_COMPLETION}}% ({{TASKS_COMPLETED}}/{{TOTAL_TASKS}}) + </div> + <p style="color: #555;">{{TASKS_UNCHECKED}} tasks still unchecked</p> + </section> + + <section> + <h2>What's Inside</h2> + <div class="two-col"> + <div class="box"> + <h3 style="margin-top: 0;">Links & Media</h3> + <ul class="simple-list"> + <li><span class="label">Internal Links</span> <span class="value">{{INTERNAL_LINKS}}</span></li> + <li><span class="label">External URLs</span> <span class="value">{{EXTERNAL_URLS}}</span></li> + <li><span class="label">Images</span> <span class="value">{{IMAGES}}</span></li> + </ul> + </div> + <div class="box"> + <h3 style="margin-top: 0;">Code & Math</h3> + <ul class="simple-list"> + <li><span class="label">Code Blocks</span> <span class="value">{{CODE_BLOCKS}}</span></li> + <li><span class="label">Math Expressions</span> <span class="value">{{MATH_EXPR}}</span></li> + <li><span class="label">Tables</span> <span class="value">{{TABLES}}</span></li> + <li><span class="label">Blockquotes</span> <span class="value">{{BLOCKQUOTES}}</span></li> + </ul> + </div> + </div> + + <div class="fun-fact"> + <b>Headers:</b> {{H1_COUNT}} H1s, {{H2_COUNT}} H2s, {{H3_COUNT}} H3s, {{H4_COUNT}} H4+ | + <b>Lists:</b> {{LISTS}} items | + <b>Horizontal Rules:</b> {{HR_COUNT}} + </div> + </section> + + <section> + <h2>Activity Timeline</h2> + <p style="color: #555;">Last 6 months of note modifications</p> + <div class="calendar-grid"> + {{MONTHLY_ACTIVITY}} + </div> + </section> + + <section> + <h2>When I Write</h2> + <p style="color: #555;">Most active day: <b>{{MOST_ACTIVE_DAY}}</b> ({{MOST_ACTIVE_DAY_COUNT}} notes modified)</p> + <div style="margin: 20px 0; font-family: monospace; white-space: pre-line;"> + {{DAY_OF_WEEK_BARS}} + </div> + </section> + + <section> + <h2>Note Sizes</h2> + <div style="margin: 20px 0;"> + <p>> <b>{{LONGEST_BRACKET}}</b> words: longest note category ({{LONGEST_COUNT}} notes)</p> + <p>> <b>{{SHORTEST_BRACKET}}</b> words: shortest note category ({{SHORTEST_COUNT}} notes)</p> + <p>> Most notes are in the <b>{{MOST_COMMON_BRACKET}}</b> range ({{MOST_COMMON_COUNT}} notes)</p> + </div> + <ul class="simple-list"> + {{LENGTH_DISTRIBUTION}} + </ul> + </section> + </main> + + <footer> + <p style="border-bottom: 1px dashed #999"><a href="rss.xml"><img src="static/rss.png" alt="RSS" width="16" height="16"></a></p> + <p>Last Updated: <b>{{LAST_UPDATED}}</b></p> + <p>File size: <b>{{FILE_SIZE}}</b></p> + <p style="border-bottom: 1px dashed #999"><b>Licensed under <a href="https://www.gnu.org/licenses/gpl-3.0.en.html">GPL</a></b></p> + <p>Donate <b style="color: #d87a16">XMR</b> to the author:</p> + <p>45gcPYy1NdNfjXLXua77uEUvbx49bPR9ZZ4NNUcXEqwWGEogEauDpcU3HhvWEb1voz8eVchUV5ZspDWG66ViVSnH3GHphn6</p> + </footer> + </div> +</body> +</html> |
