summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--notes-stats.py262
-rw-r--r--notes-template.html250
2 files changed, 512 insertions, 0 deletions
diff --git a/notes-stats.py b/notes-stats.py
new file mode 100644
index 0000000..275c311
--- /dev/null
+++ b/notes-stats.py
@@ -0,0 +1,262 @@
+#!/usr/bin/env python3
+import subprocess
+import os
+from datetime import datetime, timedelta
+from collections import defaultdict
+
+NOTES_DIR = "/var/www/nextcloud/data/user/files/Notes"
+TEMPLATE_PATH = "/usr/local/bin/notes-template.html"
+OUTPUT_PATH = "/var/www/website/notes.html"
+
+def run_cmd(cmd):
+ result = subprocess.run(cmd, shell=True, capture_output=True, text=True, cwd=NOTES_DIR)
+ return result.stdout.strip()
+
+def count_pattern(pattern):
+ cmd = f"grep -roh '{pattern}' --include='*.md' . | wc -l"
+ return int(run_cmd(cmd) or 0)
+
+def get_file_list():
+ cmd = "find . -type f -name '*.md'"
+ files = run_cmd(cmd).split('\n')
+ return [f for f in files if f]
+
+def calculate_basic_stats():
+ stats = {}
+
+ stats['total_notes'] = int(run_cmd("find . -type f -name '*.md' | wc -l"))
+ stats['total_words'] = int(run_cmd("find . -type f -name '*.md' -exec wc -w {} + | tail -1 | awk '{print $1}'"))
+ stats['total_lines'] = int(run_cmd("find . -type f -name '*.md' -exec wc -l {} + | tail -1 | awk '{print $1}'"))
+ du_output = run_cmd("du -sh .")
+ stats['disk_usage'] = du_output.split()[0]
+ stats['avg_words'] = stats['total_words'] // stats['total_notes']
+ stats['avg_lines'] = stats['total_lines'] // stats['total_notes']
+ stats['total_vaults'] = int(run_cmd("find . -maxdepth 1 -mindepth 1 -type d ! -name '.*' | wc -l"))
+ return stats
+
+def calculate_content_stats():
+ stats = {}
+ stats['internal_links'] = count_pattern('\\[\\[[^]]*\\]\\]')
+ stats['external_urls'] = count_pattern('https\\?://[^[:space:]]\\+')
+ stats['images'] = count_pattern('!\\[\\[[^]]*\\]\\]') + count_pattern('!\\[[^]]*\\]([^)]*)')
+ stats['code_blocks'] = count_pattern('```') // 2
+ stats['math_expr'] = count_pattern('\\$\\$[^$]*\\$\\$') + count_pattern('\\$[^$]*\\$')
+ return stats
+
+def calculate_markdown_stats():
+ stats = {}
+ stats['h1'] = count_pattern('^# ')
+ stats['h2'] = count_pattern('^## ')
+ stats['h3'] = count_pattern('^### ')
+ stats['h4'] = count_pattern('^#### ')
+ stats['lists'] = count_pattern('^[[:space:]]*[-*] ') + count_pattern('^[[:space:]]*[0-9]\\+\\. ')
+ stats['blockquotes'] = count_pattern('^> ')
+ stats['tables'] = count_pattern('^|.*|$') // 3 # Estimate: header, separator, data
+ stats['hr'] = count_pattern('^---$') + count_pattern('^\\*\\*\\*$')
+ return stats
+
+def calculate_task_stats():
+ stats = {}
+ total = count_pattern('\\- \\[[ x]\\]')
+ stats['total_tasks'] = total
+ completed = count_pattern('\\- \\[x\\]')
+ stats['tasks_completed'] = completed
+ stats['tasks_unchecked'] = total - completed
+ if total > 0:
+ stats['task_completion'] = (completed * 100) // total
+ else:
+ stats['task_completion'] = 0
+
+ stats['task_completion_angle'] = (stats['task_completion'] * 360) // 100
+
+ return stats
+
+def calculate_temporal_stats():
+ stats = {}
+ cmd = "find . -type f -name '*.md' -printf '%T@\\n' | sort -n | tail -1"
+ last_edit_ts = run_cmd(cmd)
+ if last_edit_ts:
+ last_edit = datetime.fromtimestamp(float(last_edit_ts))
+ stats['days_since_last_edit'] = (datetime.now() - last_edit).days
+ else:
+ stats['days_since_last_edit'] = 0
+
+ monthly_counts = []
+ now = datetime.now()
+
+ for i in range(5, -1, -1):
+ target_month = now - timedelta(days=30*i)
+ month_start = target_month.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
+ if month_start.month == 12:
+ month_end = month_start.replace(year=month_start.year+1, month=1)
+ else:
+ month_end = month_start.replace(month=month_start.month+1)
+
+ start_ts = int(month_start.timestamp())
+ end_ts = int(month_end.timestamp())
+
+ cmd = f"find . -type f -name '*.md' -newermt '@{start_ts}' ! -newermt '@{end_ts}' | wc -l"
+ count = int(run_cmd(cmd) or 0)
+
+ monthly_counts.append({'month': month_start.strftime('%b %Y'), 'count': count})
+
+ stats['monthly_activity'] = monthly_counts
+
+ dow_counts = defaultdict(int)
+ files = get_file_list()
+
+ for file in files:
+ if not file:
+ continue
+ cmd = f"stat -c %Y '{file}'"
+ ts = run_cmd(cmd)
+ if ts:
+ dt = datetime.fromtimestamp(int(ts))
+ dow = dt.strftime('%a')
+ dow_counts[dow] += 1
+
+ days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
+ stats['day_of_week'] = [{'day': day, 'count': dow_counts.get(day, 0)} for day in days]
+
+ return stats
+
+def calculate_length_distribution():
+ files = get_file_list()
+ buckets = [0, 0, 0, 0, 0, 0] # 0-100, 100-500, 500-1k, 1k-2k, 2k-5k, 5k+
+
+ for file in files:
+ if not file:
+ continue
+ cmd = f"wc -w '{file}' | awk '{{print $1}}'"
+ words = int(run_cmd(cmd) or 0)
+
+ if words < 100:
+ buckets[0] += 1
+ elif words < 500:
+ buckets[1] += 1
+ elif words < 1000:
+ buckets[2] += 1
+ elif words < 2000:
+ buckets[3] += 1
+ elif words < 5000:
+ buckets[4] += 1
+ else:
+ buckets[5] += 1
+
+ return buckets
+
+def generate_html():
+ with open(TEMPLATE_PATH, 'r') as f:
+ html = f.read()
+
+ basic = calculate_basic_stats()
+ if basic is None:
+ print("No markdown files found")
+ return
+
+ content = calculate_content_stats()
+ markdown = calculate_markdown_stats()
+ tasks = calculate_task_stats()
+ temporal = calculate_temporal_stats()
+ length_dist = calculate_length_distribution()
+
+ # Basic stats
+ html = html.replace('{{TOTAL_NOTES}}', str(basic['total_notes']))
+ html = html.replace('{{TOTAL_WORDS}}', f"{basic['total_words']:,}")
+ html = html.replace('{{TOTAL_LINES}}', f"{basic['total_lines']:,}")
+ html = html.replace('{{DISK_USAGE}}', basic['disk_usage'])
+ html = html.replace('{{AVG_WORDS}}', str(basic['avg_words']))
+ html = html.replace('{{AVG_LINES}}', str(basic['avg_lines']))
+ html = html.replace('{{TOTAL_VAULTS}}', str(basic['total_vaults']))
+
+ # Task stats with ASCII progress bar
+ html = html.replace('{{TOTAL_TASKS}}', str(tasks['total_tasks']))
+ html = html.replace('{{TASKS_COMPLETED}}', str(tasks['tasks_completed']))
+ html = html.replace('{{TASKS_UNCHECKED}}', str(tasks['tasks_unchecked']))
+ html = html.replace('{{TASK_COMPLETION}}', str(tasks['task_completion']))
+
+ # ASCII progress bar (20 chars)
+ filled = (tasks['task_completion'] * 20) // 100
+ progress_bar = '=' * filled + '-' * (20 - filled)
+ html = html.replace('{{TASK_PROGRESS_BAR}}', progress_bar)
+
+ # Content stats
+ html = html.replace('{{INTERNAL_LINKS}}', str(content['internal_links']))
+ html = html.replace('{{EXTERNAL_URLS}}', str(content['external_urls']))
+ html = html.replace('{{IMAGES}}', str(content['images']))
+ html = html.replace('{{CODE_BLOCKS}}', str(content['code_blocks']))
+ html = html.replace('{{MATH_EXPR}}', str(content['math_expr']))
+
+ # Markdown stats
+ html = html.replace('{{H1_COUNT}}', str(markdown['h1']))
+ html = html.replace('{{H2_COUNT}}', str(markdown['h2']))
+ html = html.replace('{{H3_COUNT}}', str(markdown['h3']))
+ html = html.replace('{{H4_COUNT}}', str(markdown['h4']))
+ html = html.replace('{{LISTS}}', str(markdown['lists']))
+ html = html.replace('{{BLOCKQUOTES}}', str(markdown['blockquotes']))
+ html = html.replace('{{TABLES}}', str(markdown['tables']))
+ html = html.replace('{{HR_COUNT}}', str(markdown['hr']))
+
+ # Temporal stats
+ html = html.replace('{{DAYS_SINCE_LAST_EDIT}}', str(temporal['days_since_last_edit']))
+
+ # Monthly activity calendar grid
+ monthly_html = ""
+ for month_data in temporal['monthly_activity']:
+ monthly_html += f''' <div class="calendar-month">
+ <div class="month-label">{month_data['month']}</div>
+ <div class="month-value">{month_data['count']}</div>
+ </div>\n'''
+ html = html.replace('{{MONTHLY_ACTIVITY}}', monthly_html)
+
+ # Day of week ASCII bars
+ dow_max = max([d['count'] for d in temporal['day_of_week']]) or 1
+ most_active_day = max(temporal['day_of_week'], key=lambda x: x['count'])
+
+ dow_bars = ""
+ for dow_data in temporal['day_of_week']:
+ bar_len = (dow_data['count'] * 30) // dow_max if dow_max > 0 else 0
+ bar = '█' * bar_len
+ dow_bars += f"{dow_data['day']}: {bar} {dow_data['count']}\n"
+
+ html = html.replace('{{DAY_OF_WEEK_BARS}}', dow_bars.strip())
+ html = html.replace('{{MOST_ACTIVE_DAY}}', most_active_day['day'])
+ html = html.replace('{{MOST_ACTIVE_DAY_COUNT}}', str(most_active_day['count']))
+
+ # Length distribution with metrics
+ ranges = ['0-100', '100-500', '500-1k', '1k-2k', '2k-5k', '5k+']
+ length_html = ""
+ for i, count in enumerate(length_dist):
+ length_html += f' <li><span class="label">{ranges[i]} words</span> <span class="value">{count} notes</span></li>\n'
+ html = html.replace('{{LENGTH_DISTRIBUTION}}', length_html)
+
+ max_idx = length_dist.index(max(length_dist))
+ html = html.replace('{{MOST_COMMON_BRACKET}}', ranges[max_idx])
+ html = html.replace('{{MOST_COMMON_COUNT}}', str(length_dist[max_idx]))
+
+ # Find longest and shortest with actual notes
+ longest_idx = next((i for i in range(5, -1, -1) if length_dist[i] > 0), 0)
+ shortest_idx = next((i for i in range(6) if length_dist[i] > 0), 0)
+ html = html.replace('{{LONGEST_BRACKET}}', ranges[longest_idx])
+ html = html.replace('{{LONGEST_COUNT}}', str(length_dist[longest_idx]))
+ html = html.replace('{{SHORTEST_BRACKET}}', ranges[shortest_idx])
+ html = html.replace('{{SHORTEST_COUNT}}', str(length_dist[shortest_idx]))
+
+ # Footer metadata
+ html = html.replace('{{LAST_UPDATED}}', datetime.now().strftime('%d/%m/%Y'))
+
+ with open(OUTPUT_PATH, 'w') as f:
+ f.write(html)
+
+ file_size = run_cmd(f"ls -lh '{OUTPUT_PATH}' | awk '{{print $5}}'")
+
+ with open(OUTPUT_PATH, 'r') as f:
+ html = f.read()
+ html = html.replace('{{FILE_SIZE}}', file_size)
+ with open(OUTPUT_PATH, 'w') as f:
+ f.write(html)
+
+ print(f"Generated {OUTPUT_PATH}")
+
+if __name__ == "__main__":
+ generate_html()
diff --git a/notes-template.html b/notes-template.html
new file mode 100644
index 0000000..e390c5e
--- /dev/null
+++ b/notes-template.html
@@ -0,0 +1,250 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="UTF-8">
+ <title>notes</title>
+ <link rel="icon" type="image/x-icon" href="favicon.ico">
+ <link rel="stylesheet" href="base.css">
+ <style>
+ .big-stat {
+ text-align: center;
+ margin: 30px 0;
+ padding: 20px;
+ background-color: #ddd9c8;
+ border: 3px double #333;
+ }
+
+ .big-stat-value {
+ font-size: 4em;
+ font-weight: bold;
+ color: #2a2a2a;
+ line-height: 1;
+ }
+
+ .big-stat-label {
+ font-size: 1.2em;
+ color: #555;
+ margin-top: 10px;
+ }
+
+ .inline-stats {
+ margin: 20px 0;
+ line-height: 2;
+ }
+
+ .inline-stats p {
+ margin: 5px 0;
+ }
+
+ .stat-highlight {
+ background-color: #ddd9c8;
+ padding: 2px 8px;
+ font-weight: bold;
+ color: #2a2a2a;
+ }
+
+ .ascii-bar {
+ font-family: monospace;
+ margin: 8px 0;
+ white-space: pre;
+ }
+
+ .task-progress {
+ margin: 20px 0;
+ font-family: monospace;
+ font-size: 1.1em;
+ }
+
+ .progress-bar-ascii {
+ display: inline-block;
+ background-color: #ddd9c8;
+ padding: 5px 10px;
+ border: 2px solid #333;
+ }
+
+ .simple-list {
+ list-style: none;
+ padding: 0;
+ margin: 15px 0;
+ }
+
+ .simple-list li {
+ padding: 8px 0;
+ border-bottom: 1px dashed #999;
+ }
+
+ .simple-list li:last-child {
+ border-bottom: none;
+ }
+
+ .simple-list .label {
+ color: #555;
+ display: inline-block;
+ min-width: 200px;
+ }
+
+ .simple-list .value {
+ font-weight: bold;
+ color: #2a2a2a;
+ }
+
+ .calendar-grid {
+ display: grid;
+ grid-template-columns: repeat(6, 1fr);
+ gap: 5px;
+ margin: 20px 0;
+ font-family: monospace;
+ }
+
+ .calendar-month {
+ padding: 10px;
+ background-color: #ddd9c8;
+ text-align: center;
+ border: 1px solid #999;
+ }
+
+ .month-label {
+ font-size: 0.8em;
+ color: #555;
+ }
+
+ .month-value {
+ font-size: 1.2em;
+ font-weight: bold;
+ color: #2a2a2a;
+ }
+
+ .fun-fact {
+ background-color: #ddd9c8;
+ padding: 15px 20px;
+ margin: 20px 0;
+ border-left: 4px solid #d87a16;
+ }
+
+ .two-col {
+ display: grid;
+ grid-template-columns: 1fr 1fr;
+ gap: 20px;
+ margin: 20px 0;
+ }
+
+ .box {
+ background-color: #ddd9c8;
+ padding: 20px;
+ border: 2px dashed #333;
+ }
+
+ section {
+ margin: 40px 0;
+ }
+ </style>
+</head>
+<body>
+ <div class="container">
+ <nav>
+ <ul>
+ <li><a href="/">~/</a></li>
+ <li>|</li>
+ <li><a href="writings">~/writings</a></li>
+ <li>|</li>
+ <li><a href="projects">~/projects</a></li>
+ <li>|</li>
+ <li><a href="notes" class="active">~/notes</a></li>
+ <li>|</li>
+ <li><a href="contact">~/contact</a></li>
+ <li style="float: right">Made in <span class="flag-emoji">🇬🇧</span></li>
+ </ul>
+ </nav>
+
+ <main>
+ <h1>Notes Statistics</h1>
+ <p>Aggregated stats are calculated using a Python script on my Obsidian markdown notes, which updates this html file every week using a cronjob.</p>
+ <hr>
+ <div class="big-stat">
+ <div class="big-stat-value">{{TOTAL_NOTES}}</div>
+ <div class="big-stat-label">notes across {{TOTAL_VAULTS}} vaults</div>
+ </div>
+
+ <div class="inline-stats">
+ <p>&gt; That's <span class="stat-highlight">{{TOTAL_WORDS}}</span> total words, averaging <span class="stat-highlight">{{AVG_WORDS}}</span> words per note.</p>
+ <p>&gt; <span class="stat-highlight">{{TOTAL_LINES}}</span> lines of markdown taking up <span class="stat-highlight">{{DISK_USAGE}}</span> on disk.</p>
+ <p>&gt; Last edited <span class="stat-highlight">{{DAYS_SINCE_LAST_EDIT}}</span> days ago.</p>
+ </div>
+
+ <section>
+ <h2>Task Completion</h2>
+ <div class="task-progress">
+ [{{TASK_PROGRESS_BAR}}] {{TASK_COMPLETION}}% ({{TASKS_COMPLETED}}/{{TOTAL_TASKS}})
+ </div>
+ <p style="color: #555;">{{TASKS_UNCHECKED}} tasks still unchecked</p>
+ </section>
+
+ <section>
+ <h2>What's Inside</h2>
+ <div class="two-col">
+ <div class="box">
+ <h3 style="margin-top: 0;">Links & Media</h3>
+ <ul class="simple-list">
+ <li><span class="label">Internal Links</span> <span class="value">{{INTERNAL_LINKS}}</span></li>
+ <li><span class="label">External URLs</span> <span class="value">{{EXTERNAL_URLS}}</span></li>
+ <li><span class="label">Images</span> <span class="value">{{IMAGES}}</span></li>
+ </ul>
+ </div>
+ <div class="box">
+ <h3 style="margin-top: 0;">Code & Math</h3>
+ <ul class="simple-list">
+ <li><span class="label">Code Blocks</span> <span class="value">{{CODE_BLOCKS}}</span></li>
+ <li><span class="label">Math Expressions</span> <span class="value">{{MATH_EXPR}}</span></li>
+ <li><span class="label">Tables</span> <span class="value">{{TABLES}}</span></li>
+ <li><span class="label">Blockquotes</span> <span class="value">{{BLOCKQUOTES}}</span></li>
+ </ul>
+ </div>
+ </div>
+
+ <div class="fun-fact">
+ <b>Headers:</b> {{H1_COUNT}} H1s, {{H2_COUNT}} H2s, {{H3_COUNT}} H3s, {{H4_COUNT}} H4+ |
+ <b>Lists:</b> {{LISTS}} items |
+ <b>Horizontal Rules:</b> {{HR_COUNT}}
+ </div>
+ </section>
+
+ <section>
+ <h2>Activity Timeline</h2>
+ <p style="color: #555;">Last 6 months of note modifications</p>
+ <div class="calendar-grid">
+ {{MONTHLY_ACTIVITY}}
+ </div>
+ </section>
+
+ <section>
+ <h2>When I Write</h2>
+ <p style="color: #555;">Most active day: <b>{{MOST_ACTIVE_DAY}}</b> ({{MOST_ACTIVE_DAY_COUNT}} notes modified)</p>
+ <div style="margin: 20px 0; font-family: monospace; white-space: pre-line;">
+ {{DAY_OF_WEEK_BARS}}
+ </div>
+ </section>
+
+ <section>
+ <h2>Note Sizes</h2>
+ <div style="margin: 20px 0;">
+ <p>&gt; <b>{{LONGEST_BRACKET}}</b> words: longest note category ({{LONGEST_COUNT}} notes)</p>
+ <p>&gt; <b>{{SHORTEST_BRACKET}}</b> words: shortest note category ({{SHORTEST_COUNT}} notes)</p>
+ <p>&gt; Most notes are in the <b>{{MOST_COMMON_BRACKET}}</b> range ({{MOST_COMMON_COUNT}} notes)</p>
+ </div>
+ <ul class="simple-list">
+ {{LENGTH_DISTRIBUTION}}
+ </ul>
+ </section>
+ </main>
+
+ <footer>
+ <p style="border-bottom: 1px dashed #999"><a href="rss.xml"><img src="static/rss.png" alt="RSS" width="16" height="16"></a></p>
+ <p>Last Updated: <b>{{LAST_UPDATED}}</b></p>
+ <p>File size: <b>{{FILE_SIZE}}</b></p>
+ <p style="border-bottom: 1px dashed #999"><b>Licensed under <a href="https://www.gnu.org/licenses/gpl-3.0.en.html">GPL</a></b></p>
+ <p>Donate <b style="color: #d87a16">XMR</b> to the author:</p>
+ <p>45gcPYy1NdNfjXLXua77uEUvbx49bPR9ZZ4NNUcXEqwWGEogEauDpcU3HhvWEb1voz8eVchUV5ZspDWG66ViVSnH3GHphn6</p>
+ </footer>
+ </div>
+</body>
+</html>