syntaxai/tdd.md · commit b91543a

Reports: live data from tdd.md repo via deploy-time git-history snapshot

The /reports/live page now renders real commit data from this repo
instead of demo fixtures. Because syntaxai/tdd.md is a private repo,
the public unauthenticated GitHub commits API returns 404 — so we
sidestep that by snapshotting the local git log into JSON at deploy
time and bundling it with the container.

  scripts/p620/snapshot-git-history.ts
    Bun script: git log -> content/git-history/<owner>__<name>.json
    in the GithubCommit shape that c14_github + c32_real_reports
    already consume.

  scripts/p620/deploy-tdd-md.sh
    Runs the snapshot before rsync so each deploy refreshes the
    bundle. No GitHub token, no rate-limit, no extra secrets.

  src/c14_github.ts (fetchRepoCommits)
    Tries the bundle first; falls back to the public commits API
    only when no bundle is present. Same 5-minute in-memory TTL.

  src/c32_real_reports.ts
    Aggregator: detects agent attribution from Co-Authored-By
    footers, builds AgentReport[] with phase-coverage as the score
    proxy. Commits without a recognised footer are bucketed as
    "unknown" and surfaced separately.

  src/c21_app.ts + src/c51_render_reports.ts
    /reports/live, /reports/live/agents/:slug, /reports/live/tests
    routes wired against the same body builders that render demo,
    via a parameterised ReportsContext / TestsOverviewContext.

  .gitignore
    content/git-history/ is regenerated per deploy, never tracked.

For the dogfood case the entire history fits in 200 commits with
plenty of headroom; if we ever need more, bumping MAX in the
snapshot script is a one-liner.

Co-Authored-By: Claude Opus 4.7 (1M context) <[email protected]>
author
syntaxai <[email protected]>
date
2026-05-08 16:25:09 +01:00
parent
a6c69dc
commit
b91543ab63a658f3d1e8dc7f9b51aa0945f488b3

7 files changed · +527 −54

modified .gitignore +1 −0
@@ -5,3 +5,4 @@ node_modules/
55 .env.local
66 .bun-cache/
77 .claude/
8+content/git-history/
modified scripts/p620/deploy-tdd-md.sh +6 −0
@@ -38,6 +38,12 @@ ssh "$SSH_HOST" 'command -v podman >/dev/null && command -v systemctl >/dev/null
3838
3939 need_restart=0
4040
41+echo "→ snapshot git history → content/git-history/"
42+# Bundles local git log into JSON so the container can render /reports/live
43+# for the (private) syntaxai/tdd.md repo without a GitHub token.
44+( cd "$REPO_ROOT" && bun scripts/p620/snapshot-git-history.ts ) \
45+ || { echo "✗ snapshot-git-history mislukt"; exit 1; }
46+
4147 echo "→ source rsync naar $SSH_HOST:~/$REMOTE_SRC_DIR"
4248 ssh "$SSH_HOST" "mkdir -p ~/$REMOTE_SRC_DIR"
4349 # --delete zodat verwijderde files ook weggaan op remote.
added scripts/p620/snapshot-git-history.ts +56 −0
@@ -0,0 +1,56 @@
1+#!/usr/bin/env bun
2+// Dump local git history into the same shape that the live-reports
3+// pipeline expects from GitHub's commits API. Runs at deploy-time so
4+// the container can render /reports/live for a private repo without
5+// holding a GitHub token. Each deploy refreshes the bundle.
6+//
7+// Output: content/git-history/<owner>__<name>.json
8+// Schema: GithubCommit[] (see src/c14_github.ts) — newest first.
9+
10+import { spawnSync } from "node:child_process";
11+import { mkdirSync, writeFileSync } from "node:fs";
12+import { dirname, resolve } from "node:path";
13+
14+const REPO_ROOT = resolve(import.meta.dir, "..", "..");
15+const OWNER = "syntaxai";
16+const NAME = "tdd.md";
17+const MAX = 200;
18+
19+// Use ASCII record separators (\x1e between commits, \x1f between
20+// fields) so commit-message newlines pass through unmangled.
21+const FMT = ["%H", "%aI", "%an", "%ae", "%B"].join("\x1f") + "\x1e";
22+
23+const res = spawnSync(
24+ "git",
25+ ["log", `--max-count=${MAX}`, `--pretty=format:${FMT}`],
26+ { cwd: REPO_ROOT, encoding: "utf8", maxBuffer: 64 * 1024 * 1024 },
27+);
28+if (res.status !== 0) {
29+ console.error("git log failed:", res.stderr);
30+ process.exit(1);
31+}
32+
33+const records = res.stdout.split("\x1e").map((s) => s.trim()).filter(Boolean);
34+const commits = records.map((rec) => {
35+ const [sha, date, name, email, ...rest] = rec.split("\x1f");
36+ const message = (rest.join("\x1f") ?? "").replace(/\n+$/, "");
37+ return {
38+ sha: sha ?? "",
39+ commit: {
40+ author: {
41+ name: name ?? "",
42+ email: email ?? "",
43+ date: date ?? "",
44+ },
45+ message,
46+ },
47+ author: null,
48+ };
49+});
50+
51+const outDir = resolve(REPO_ROOT, "content", "git-history");
52+const outPath = resolve(outDir, `${OWNER}__${NAME}.json`);
53+mkdirSync(dirname(outPath), { recursive: true });
54+writeFileSync(outPath, JSON.stringify({ owner: OWNER, name: NAME, fetchedAt: Date.now(), commits }, null, 2));
55+
56+console.log(`✓ wrote ${commits.length} commits → ${outPath}`);
modified src/c14_github.ts +86 −0
@@ -120,3 +120,89 @@ export const fetchProjectConfig = async (
120120 }
121121 return parseProjectConfig(parsed);
122122 };
123+
124+// ---------------------------------------------------------------------
125+// Public commits API. Used to feed the live reports view from real
126+// data. Public-repo only; unauthenticated calls are rate-limited to
127+// 60/hour, so we cache aggressively. Single in-memory cache per
128+// (owner, repo) with a 5-minute TTL — enough for casual page-loads,
129+// not so long that pushed commits stay invisible.
130+// ---------------------------------------------------------------------
131+
132+export interface GithubCommit {
133+ sha: string;
134+ commit: {
135+ author: { name: string; email: string; date: string };
136+ message: string;
137+ };
138+ author: { login: string } | null;
139+}
140+
141+const COMMITS_TTL_MS = 5 * 60 * 1000;
142+const commitsCache = new Map<string, { fetchedAt: number; commits: GithubCommit[] }>();
143+
144+// Deploy-time snapshot: scripts/p620/snapshot-git-history.ts dumps the
145+// local git log into content/git-history/<owner>__<name>.json so the
146+// container can serve /reports/live for a private repo without a
147+// GitHub token. Bundle is preferred when present; we fall back to the
148+// public API for any repo we don't bundle.
149+const bundlePath = (repoOwner: string, repoName: string): string =>
150+ `./content/git-history/${repoOwner}__${repoName}.json`;
151+
152+interface GitHistoryBundle {
153+ owner: string;
154+ name: string;
155+ fetchedAt: number;
156+ commits: GithubCommit[];
157+}
158+
159+const loadBundle = async (
160+ repoOwner: string,
161+ repoName: string,
162+): Promise<GithubCommit[] | null> => {
163+ try {
164+ const file = Bun.file(bundlePath(repoOwner, repoName));
165+ if (!(await file.exists())) return null;
166+ const data = (await file.json()) as GitHistoryBundle;
167+ return Array.isArray(data.commits) ? data.commits : null;
168+ } catch {
169+ return null;
170+ }
171+};
172+
173+export const fetchRepoCommits = async (
174+ repoOwner: string,
175+ repoName: string,
176+ perPage = 100,
177+): Promise<GithubCommit[]> => {
178+ const key = `${repoOwner}/${repoName}#${perPage}`;
179+ const cached = commitsCache.get(key);
180+ if (cached && Date.now() - cached.fetchedAt < COMMITS_TTL_MS) {
181+ return cached.commits;
182+ }
183+
184+ const bundle = await loadBundle(repoOwner, repoName);
185+ if (bundle) {
186+ const sliced = bundle.slice(0, perPage);
187+ commitsCache.set(key, { fetchedAt: Date.now(), commits: sliced });
188+ return sliced;
189+ }
190+
191+ const url = `https://api.github.com/repos/${encodeURIComponent(repoOwner)}/${encodeURIComponent(repoName)}/commits?per_page=${perPage}`;
192+ const res = await fetch(url, {
193+ headers: {
194+ Accept: "application/vnd.github+json",
195+ "User-Agent": "tdd.md",
196+ },
197+ });
198+ if (!res.ok) {
199+ // Honour the cache on transient failure rather than blanking the page —
200+ // GitHub's 60/hour anonymous rate limit is the most likely cause and
201+ // the cached data is still strictly better than no data.
202+ if (cached) return cached.commits;
203+ throw new Error(`GitHub commits API failed for ${repoOwner}/${repoName}: HTTP ${res.status}`);
204+ }
205+ const commits = (await res.json()) as GithubCommit[];
206+ commitsCache.set(key, { fetchedAt: Date.now(), commits });
207+ return commits;
208+};
modified src/c21_app.ts +115 −4
@@ -27,7 +27,15 @@ import { fetchProjectConfig } from "./c14_github.ts";
2727 import { listGames, loadGame } from "./c31_games.ts";
2828 import { ALL_POSTS } from "./c31_blog.ts";
2929 import { ALL_GUIDES } from "./c31_guides.ts";
30-import { DEMO_REPORTS } from "./c31_reports_demo.ts";
30+import {
31+ DEMO_REPORTS,
32+ DEMO_PERIOD,
33+ DEMO_ORG,
34+ DEMO_REPOS,
35+ DEMO_SNAPSHOTS,
36+ DEMO_STABILITY,
37+} from "./c31_reports_demo.ts";
38+import { buildLiveReports } from "./c32_real_reports.ts";
3139 import { parseRepoIdentifier } from "./c31_project_config.ts";
3240 import { judge } from "./c32_judge.ts";
3341 import {
@@ -49,6 +57,54 @@ import { startGithubOauth, handleGithubCallback } from "./c21_handlers_auth.ts";
4957 const HOME_MD = "./content/home.md";
5058 const GAME_DIR = "./content/games";
5159
60+// ---------------------------------------------------------------------
61+// Reports-context builders. The c51 builders take a ReportsContext —
62+// these tiny helpers assemble it for the synthetic /reports/demo and
63+// the live /reports/live (real data fetched from syntaxai/tdd.md).
64+// ---------------------------------------------------------------------
65+
66+const LIVE_REPO_OWNER = "syntaxai";
67+const LIVE_REPO_NAME = "tdd.md";
68+const LIVE_FETCH_COUNT = 100;
69+
70+const DEMO_BANNER_HTML = `<div class="report-mockup-banner">demo data — design preview with synthetic numbers. Want the real readout? <a href="/reports/live">/reports/live</a> renders the same shape from live tdd.md commits. <a href="/blog/tweag-handbook-tdd">why tdd.md needs this</a></div>`;
71+
72+const LIVE_BANNER_HTML = `<div class="report-mockup-banner">live data — sourced from <a href="https://github.com/${LIVE_REPO_OWNER}/${LIVE_REPO_NAME}">${LIVE_REPO_OWNER}/${LIVE_REPO_NAME}</a> via the public commits API (5-min cache). Agent attribution comes from <code>Co-Authored-By:</code> footers; commits without one are excluded. Phase coverage measures % of commits tagged <code>red:/green:/refactor:</code>.</div>`;
73+
74+const demoContext = () => ({
75+ reports: DEMO_REPORTS,
76+ period: DEMO_PERIOD,
77+ scopeLabel: `${DEMO_REPOS} repos · ${DEMO_ORG}`,
78+ bannerHtml: DEMO_BANNER_HTML,
79+ narrative: {
80+ changedHeading: "wat veranderde dit kwartaal",
81+ changedBody:
82+ "Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de `api-gateway` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf.",
83+ doingHeading: "wat we doen",
84+ doingBody:
85+ "- **Cursor in `api-gateway`**: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel \"never delete a test in a refactor commit\" gepind ([details →](/reports/demo/agents/cursor)).\n- **Claude Code uitrollen**: het CLAUDE.md-template dat in `billing-service` werkte naar de andere drie repos kopiëren.\n- **Volgende meting**: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt.",
86+ },
87+ footerLinks:
88+ "[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports)",
89+});
90+
91+const liveContext = async () => {
92+ const live = await buildLiveReports(LIVE_REPO_OWNER, LIVE_REPO_NAME, LIVE_FETCH_COUNT);
93+ const period = live.earliest && live.latest
94+ ? `${live.earliest.slice(0, 10)} → ${live.latest.slice(0, 10)}`
95+ : "no commits fetched";
96+ const drillLinks = live.reports
97+ .map((r) => `[${r.name}](/reports/live/agents/${r.slug})`)
98+ .join(" · ");
99+ return {
100+ reports: live.reports,
101+ period,
102+ scopeLabel: `${LIVE_REPO_OWNER}/${LIVE_REPO_NAME} · ${live.totalCommits} commits sampled${live.unknownCount > 0 ? ` (${live.unknownCount} unattributed, excluded)` : ""}`,
103+ bannerHtml: LIVE_BANNER_HTML,
104+ footerLinks: `${drillLinks ? drillLinks + " · " : ""}[tests overzicht](/reports/live/tests) · [demo preview](/reports/demo) · [back to /reports](/reports)`,
105+ };
106+};
107+
52108 const HOME_DESCRIPTION =
53109 "Test-driven development for agentic coding. Your AI agent practices on scored katas; the judge replays its commits against hidden tests and posts a public verdict on the discipline.";
54110
@@ -417,10 +473,11 @@ ${rows}
417473 },
418474
419475 "/reports/demo": async () => {
476+ const ctx = demoContext();
420477 const html = await renderPage({
421478 title: "TDD-discipline rapport · Q1 2026 (demo) — tdd.md",
422479 description: "Mockup of the management-level TDD-discipline report — single page, three agents, with trend and narrative.",
423- bodyMarkdown: execSummaryMd(),
480+ bodyMarkdown: execSummaryMd(ctx),
424481 ogPath: "https://tdd.md/reports/demo",
425482 noindex: true,
426483 });
@@ -431,7 +488,12 @@ ${rows}
431488 const html = await renderPage({
432489 title: "Tests overzicht (demo) — tdd.md",
433490 description: "Mockup of the per-test overview: current pass/fail snapshot per repo plus test stability over the quarter.",
434- bodyMarkdown: testsOverviewMd(),
491+ bodyMarkdown: testsOverviewMd({
492+ period: DEMO_PERIOD,
493+ bannerHtml: DEMO_BANNER_HTML,
494+ snapshots: DEMO_SNAPSHOTS,
495+ stability: DEMO_STABILITY,
496+ }),
435497 ogPath: "https://tdd.md/reports/demo/tests",
436498 noindex: true,
437499 });
@@ -440,7 +502,8 @@ ${rows}
440502
441503 "/reports/demo/agents/:slug": async (req) => {
442504 const slug = req.params.slug as (typeof DEMO_REPORTS)[number]["slug"];
443- const md = agentDrilldownMd(slug);
505+ const ctx = demoContext();
506+ const md = agentDrilldownMd(slug, ctx);
444507 if (!md) {
445508 const html = await renderNotFound(`/reports/demo/agents/${slug}`);
446509 return htmlResponse(html, 404);
@@ -456,6 +519,54 @@ ${rows}
456519 return htmlResponse(html);
457520 },
458521
522+ "/reports/live": async () => {
523+ const ctx = await liveContext();
524+ const html = await renderPage({
525+ title: "TDD-discipline rapport · live — tdd.md",
526+ description: `Live discipline rapport gebouwd uit de echte commit-historie van syntaxai/tdd.md (laatste ${LIVE_FETCH_COUNT} commits, 5-min cache).`,
527+ bodyMarkdown: execSummaryMd(ctx),
528+ ogPath: "https://tdd.md/reports/live",
529+ noindex: true,
530+ });
531+ return htmlResponse(html);
532+ },
533+
534+ "/reports/live/tests": async () => {
535+ const html = await renderPage({
536+ title: "Tests overzicht · live (placeholder) — tdd.md",
537+ description: "Placeholder voor de live test-overview — wacht op de sandbox-runner sliver.",
538+ bodyMarkdown: testsOverviewMd({
539+ period: "live",
540+ bannerHtml: LIVE_BANNER_HTML,
541+ snapshots: [],
542+ stability: [],
543+ unavailableNote: "De per-repo test-snapshot en stabiliteitstabel hebben de sandbox-runner sliver nodig (block 1 vervolg). Tot dat klaar is, alleen de exec-summary + drill-down draaien op echte data; de testpagina staat in de [demo](/reports/demo/tests).",
544+ }),
545+ ogPath: "https://tdd.md/reports/live/tests",
546+ noindex: true,
547+ });
548+ return htmlResponse(html);
549+ },
550+
551+ "/reports/live/agents/:slug": async (req) => {
552+ const ctx = await liveContext();
553+ const slug = req.params.slug as (typeof DEMO_REPORTS)[number]["slug"];
554+ const md = agentDrilldownMd(slug, ctx);
555+ if (!md) {
556+ const html = await renderNotFound(`/reports/live/agents/${slug}`);
557+ return htmlResponse(html, 404);
558+ }
559+ const entry = ctx.reports.find((r) => r.slug === slug)!;
560+ const html = await renderPage({
561+ title: `${entry.name} drill-down · live — tdd.md`,
562+ description: `Live drill-down voor ${entry.name} op syntaxai/tdd.md — trend, failure-mode breakdown, recent commits.`,
563+ bodyMarkdown: md,
564+ ogPath: `https://tdd.md/reports/live/agents/${slug}`,
565+ noindex: true,
566+ });
567+ return htmlResponse(html);
568+ },
569+
459570 "/guides": async () => {
460571 const rows = ALL_GUIDES
461572 .map((g) => `| [${g.title}](/guides/${g.slug}) | ${g.description} |`)
added src/c32_real_reports.ts +170 −0
@@ -0,0 +1,170 @@
1+// c32 — logic: aggregate real GitHub commit history into the same
2+// AgentReport / RecentFlagged shape that c51_render_reports renders.
3+// Pure (given fetched commits in, produces report objects out); the
4+// I/O happens in c14_github.fetchRepoCommits which we call here.
5+//
6+// Attribution: Co-Authored-By footers are the agent-attribution channel
7+// the existing tdd.md commit history already uses. Anything without a
8+// recognised footer is bucketed as "unknown" and reported separately —
9+// it's still useful for volume context.
10+
11+import { parseCommit } from "./c31_commits.ts";
12+import { fetchRepoCommits, type GithubCommit } from "./c14_github.ts";
13+import type {
14+ AgentReport,
15+ FailureSlice,
16+ RecentFlagged,
17+} from "./c31_reports_demo.ts";
18+
19+type LiveAgentSlug = AgentReport["slug"] | "unknown";
20+
21+const detectAgent = (msg: string): LiveAgentSlug => {
22+ if (/Co-Authored-By:.*Claude/i.test(msg)) return "claude-code";
23+ if (/Co-Authored-By:.*Cursor/i.test(msg)) return "cursor";
24+ if (/Co-Authored-By:.*Aider/i.test(msg)) return "aider";
25+ return "unknown";
26+};
27+
28+const AGENT_NAMES: Record<AgentReport["slug"], string> = {
29+ "claude-code": "Claude Code",
30+ cursor: "Cursor",
31+ aider: "Aider",
32+};
33+
34+// 30-day daily commit-count series, oldest → newest. When there are no
35+// commits in a day, that day's value is 0 — the sparkline still renders
36+// but flat-lines, which honestly reflects the data.
37+const buildTrend = (commits: GithubCommit[], days = 30): number[] => {
38+ const out = new Array<number>(days).fill(0);
39+ const today = new Date();
40+ today.setUTCHours(0, 0, 0, 0);
41+ for (const c of commits) {
42+ const d = new Date(c.commit.author.date);
43+ d.setUTCHours(0, 0, 0, 0);
44+ const ageDays = Math.floor((today.getTime() - d.getTime()) / (24 * 60 * 60 * 1000));
45+ if (ageDays < 0 || ageDays >= days) continue;
46+ const idx = days - 1 - ageDays;
47+ const cur = out[idx] ?? 0;
48+ out[idx] = cur + 1;
49+ }
50+ return out;
51+};
52+
53+const buildAgentReport = (
54+ slug: AgentReport["slug"],
55+ agentCommits: GithubCommit[],
56+ repoSlug: string,
57+): AgentReport => {
58+ const tagged = agentCommits.filter((c) => {
59+ const phase = parseCommit(c.commit.message).phase;
60+ return phase === "red" || phase === "green" || phase === "refactor";
61+ });
62+ const phaseCoveragePct = agentCommits.length === 0
63+ ? 0
64+ : Math.round((tagged.length / agentCommits.length) * 100);
65+
66+ // Score is a proxy: phase-coverage is the only structural signal we
67+ // can compute without running the test suite. When coverage is 0 the
68+ // agent isn't attempting TDD, so the score is honestly low.
69+ const score = phaseCoveragePct;
70+
71+ // Failure mix collapses to two slices for live data — phase-tagged vs
72+ // not. Fine-grained failure modes (red-did-not-fail, test-deleted, etc)
73+ // need the runner sliver before they're computable.
74+ const failureMix: FailureSlice[] = [
75+ { label: "phase-tagged", pct: phaseCoveragePct, tone: "green" },
76+ { label: "no phase tag", pct: 100 - phaseCoveragePct, tone: "muted" },
77+ ];
78+
79+ const recent: RecentFlagged[] = agentCommits
80+ .slice(0, 5)
81+ .map((c) => {
82+ const parsed = parseCommit(c.commit.message);
83+ const phase = parsed.phase === "red" || parsed.phase === "green" || parsed.phase === "refactor"
84+ ? parsed.phase
85+ : "green";
86+ const failure = parsed.phase === "untagged" || parsed.phase === "init"
87+ ? "no phase tag"
88+ : `${parsed.phase} (live judge not yet wired)`;
89+ return {
90+ date: c.commit.author.date.slice(0, 10),
91+ repo: repoSlug,
92+ sha: c.sha.slice(0, 7),
93+ phase,
94+ failure,
95+ pts: 0,
96+ };
97+ });
98+
99+ const topIssueLabel = phaseCoveragePct === 100 ? "no current issues" : "no phase tag";
100+ const topIssuePct = 100 - phaseCoveragePct;
101+
102+ return {
103+ slug,
104+ name: AGENT_NAMES[slug],
105+ score,
106+ delta: 0,
107+ commits: agentCommits.length,
108+ phaseCoveragePct,
109+ streak: 0,
110+ streakBroken: false,
111+ topIssueLabel,
112+ topIssuePct,
113+ failureMix,
114+ trend: buildTrend(agentCommits),
115+ recent,
116+ };
117+};
118+
119+export interface LiveReports {
120+ reports: AgentReport[];
121+ unknownCount: number;
122+ totalCommits: number;
123+ earliest: string | null;
124+ latest: string | null;
125+ fetchedAt: number;
126+}
127+
128+export const buildLiveReports = async (
129+ repoOwner: string,
130+ repoName: string,
131+ perPage = 100,
132+): Promise<LiveReports> => {
133+ const commits = await fetchRepoCommits(repoOwner, repoName, perPage);
134+ const repoSlug = `${repoOwner}/${repoName}`;
135+ const byAgent = new Map<AgentReport["slug"], GithubCommit[]>();
136+ let unknownCount = 0;
137+
138+ for (const c of commits) {
139+ const a = detectAgent(c.commit.message);
140+ if (a === "unknown") {
141+ unknownCount++;
142+ continue;
143+ }
144+ const arr = byAgent.get(a) ?? [];
145+ arr.push(c);
146+ byAgent.set(a, arr);
147+ }
148+
149+ const order: AgentReport["slug"][] = ["claude-code", "cursor", "aider"];
150+ const reports = order
151+ .map((slug) => {
152+ const list = byAgent.get(slug);
153+ if (!list || list.length === 0) return null;
154+ return buildAgentReport(slug, list, repoSlug);
155+ })
156+ .filter((r): r is AgentReport => r !== null);
157+
158+ const dates = commits.map((c) => c.commit.author.date).sort();
159+ const earliest = dates[0] ?? null;
160+ const latest = dates[dates.length - 1] ?? null;
161+
162+ return {
163+ reports,
164+ unknownCount,
165+ totalCommits: commits.length,
166+ earliest,
167+ latest,
168+ fetchedAt: Date.now(),
169+ };
170+};
modified src/c51_render_reports.ts +93 −50
@@ -1,14 +1,11 @@
11 // c51 (reports) — body builders for /reports, /reports/demo,
2-// /reports/demo/agents/:slug, /reports/demo/tests. All synthetic data
3-// comes from c31_reports_demo; chrome helpers come from c51_render_layout.
2+// /reports/live, /reports/demo/agents/:slug, /reports/demo/tests. The
3+// builders take the dataset as an explicit ReportsContext so the same
4+// markdown templates serve both the synthetic demo (DEMO_* from
5+// c31_reports_demo) and the live tdd.md aggregation (c32_real_reports).
46
57 import {
6- DEMO_PERIOD,
7- DEMO_ORG,
8- DEMO_REPOS,
98 DEMO_REPORTS,
10- DEMO_SNAPSHOTS,
11- DEMO_STABILITY,
129 type AgentReport,
1310 type FailureSlice,
1411 type TestSnapshot,
@@ -16,6 +13,33 @@ import {
1613 } from "./c31_reports_demo.ts";
1714 import { escape } from "./c51_render_layout.ts";
1815
16+export interface ReportsContext {
17+ reports: AgentReport[];
18+ period: string;
19+ scopeLabel: string;
20+ bannerHtml: string;
21+ // Optional narrative — present for the curated demo, omitted for live
22+ // where the data has to speak for itself.
23+ narrative?: {
24+ changedHeading: string;
25+ changedBody: string;
26+ doingHeading: string;
27+ doingBody: string;
28+ };
29+ // Trailing footer line (links). Defaults reasonable for both demo + live.
30+ footerLinks: string;
31+}
32+
33+export interface TestsOverviewContext {
34+ period: string;
35+ bannerHtml: string;
36+ snapshots: TestSnapshot[];
37+ stability: TestStability[];
38+ // When the runner sliver isn't wired (live mode, today), pass a
39+ // placeholder note instead of the snapshot+stability sections.
40+ unavailableNote?: string;
41+}
42+
1943 const trendArrow = (delta: number): { glyph: string; cls: string } =>
2044 delta > 0 ? { glyph: "↑", cls: "up" } : delta < 0 ? { glyph: "↓", cls: "down" } : { glyph: "→", cls: "flat" };
2145
@@ -70,8 +94,6 @@ const streakBox = (a: AgentReport): string => {
7094 return `<span class="report-streak ${cls}"><span class="report-streak-num">${a.streak}</span> ${label}</span>`;
7195 };
7296
73-const mockBanner = `<div class="report-mockup-banner">demo data — real reporting wires up when the project-tracking pipeline ships. <a href="/blog/tweag-handbook-tdd">why tdd.md needs this</a> · <a href="/reports">about reporting</a></div>`;
74-
7597 const snapshotBlock = (s: TestSnapshot): string => {
7698 const failuresHtml = s.failures.length === 0
7799 ? `<li class="test-list-pass">all ${s.passing} tests groen</li>`
@@ -113,13 +135,16 @@ export const reportsLandingMd = (): string => `# reports
113135
114136 > Per-agent TDD-discipline reporting over real project repos. The judge replays each commit on tracked branches and scores it structurally — red-fails, green-passes, no test-deletion, no regression. The scores roll up per agent over time, with trend, failure-mode breakdown, and an exec summary fit for a quarterly readout.
115137
116-This is a design preview. The pipeline that ingests real repos isn't wired yet; what you can navigate today is a mockup with synthetic data:
138+Two views of the same shape:
139+
140+- **[/reports/live](/reports/live)** — built from real commit data on \`syntaxai/tdd.md\` (the repo this site runs on), refreshed every 5 minutes from the GitHub commits API. Agent attribution comes from \`Co-Authored-By:\` footers. Phase-coverage is the only metric we can compute without running tests, so the score is a proxy for now.
141+- **[/reports/demo](/reports/demo)** — the polished design preview with synthetic data for three agents and four repos. Useful for screenshots and showing the full failure-mode breakdown the live view can't compute yet.
117142
118-- [exec summary mockup →](/reports/demo) — single page, 1 quarter, 3 agents
119-- [per-agent drill-down →](/reports/demo/agents/cursor) — trend, failure mix, recent flagged commits
120-- [tests overzicht →](/reports/demo/tests) — huidige stand per repo + test-stabiliteit per test-naam
143+Drill-downs:
144+- [live drill-down per agent](/reports/live/agents/claude-code) · [tests overzicht (live: placeholder)](/reports/live/tests)
145+- [demo drill-down per agent](/reports/demo/agents/cursor) · [tests overzicht (demo)](/reports/demo/tests)
121146
122-Want a real repo on this layer? [Register a project →](/projects) — drops \`.tdd-md.json\` at the repo root, onboards in seconds. Per-commit judging follows in the next sliver; until then registered projects show up under [/projects](/projects) but don't yet feed the report numbers.
147+Want a real repo on this layer? [Register a project →](/projects) — drops \`.tdd-md.json\` at the repo root, onboards in seconds. Per-commit judging on tracked branches lands in a follow-up sliver; live reporting from the GitHub API already works for the dogfood case (the tdd.md repo itself).
123148
124149 ## what gets measured
125150
@@ -145,55 +170,63 @@ For team-leads:
145170 [← back to tdd.md](/) · [the blog](/blog) · [the katas](/games)
146171 `;
147172
148-export const execSummaryMd = (): string => {
149- const totalCommits = DEMO_REPORTS.reduce((s, a) => s + a.commits, 0);
150- const tiles = DEMO_REPORTS.map(tile).join("\n");
151- return `# tdd-discipline rapport · q1 2026
173+export const execSummaryMd = (ctx: ReportsContext): string => {
174+ const totalCommits = ctx.reports.reduce((s, a) => s + a.commits, 0);
175+ const tiles = ctx.reports.length === 0
176+ ? `<div class="report-tile-empty">No agent-attributed commits in this dataset.</div>`
177+ : ctx.reports.map(tile).join("\n");
178+ const narrativeBlock = ctx.narrative
179+ ? `## ${ctx.narrative.changedHeading}
152180
153-${mockBanner}
181+${ctx.narrative.changedBody}
154182
155-> **Periode** ${DEMO_PERIOD} · **Scope** ${DEMO_REPOS} repos · ${totalCommits.toLocaleString()} AI-toegeschreven commits in ${escape(DEMO_ORG)}.
183+## ${ctx.narrative.doingHeading}
156184
157-<div class="report-tiles">
158-${tiles}
159-</div>
185+${ctx.narrative.doingBody}
160186
161-## wat veranderde dit kwartaal
187+`
188+ : "";
189+ return `# tdd-discipline rapport · ${ctx.period}
162190
163-Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de \`api-gateway\` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf.
191+${ctx.bannerHtml}
164192
165-## wat we doen
193+> **Periode** ${ctx.period} · **Scope** ${escape(ctx.scopeLabel)} · ${totalCommits.toLocaleString()} AI-toegeschreven commits.
166194
167-- **Cursor in \`api-gateway\`**: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel "never delete a test in a refactor commit" gepind ([details →](/reports/demo/agents/cursor)).
168-- **Claude Code uitrollen**: het CLAUDE.md-template dat in \`billing-service\` werkte naar de andere drie repos kopiëren.
169-- **Volgende meting**: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt.
195+<div class="report-tiles">
196+${tiles}
197+</div>
170198
171-## wat dit getal *niet* meet
199+${narrativeBlock}## wat dit getal *niet* meet
172200
173201 Discipline, niet code-kwaliteit. Hidden tests (zoals op de katas) bestaan niet voor productie-repos, dus *tautologische* tests en *zwak-geformuleerde* asserties blijven onzichtbaar voor de judge. Dit cijfer zegt: "de agent volgt de TDD-cyclus eerlijk". Het zegt niets over of de tests die hij schrijft het juiste beweren. Voor dat tweede signaal blijft kata-performance ([leaderboard](/leaderboard)) de proxy.
174202
175203 ---
176204
177-[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports)
205+${ctx.footerLinks}
178206 `;
179207 };
180208
181-export const agentDrilldownMd = (slug: AgentReport["slug"]): string | null => {
182- const a = DEMO_REPORTS.find((r) => r.slug === slug);
209+export const agentDrilldownMd = (
210+ slug: AgentReport["slug"],
211+ ctx: ReportsContext,
212+): string | null => {
213+ const a = ctx.reports.find((r) => r.slug === slug);
183214 if (!a) return null;
184215 const arr = trendArrow(a.delta);
185216 const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`;
186- const recentRows = a.recent
187- .map(
188- (r) =>
189- `| ${r.date} | \`${r.repo}\` | \`${r.sha}\` | ${r.phase} | ${r.failure} | ${r.pts} |`,
190- )
191- .join("\n");
217+ const recentRows = a.recent.length === 0
218+ ? `| _no recent attributed activity_ | | | | | |`
219+ : a.recent
220+ .map(
221+ (r) =>
222+ `| ${r.date} | \`${r.repo}\` | \`${r.sha}\` | ${r.phase} | ${r.failure} | ${r.pts} |`,
223+ )
224+ .join("\n");
192225 return `# ${a.name} · drill-down
193226
194-${mockBanner}
227+${ctx.bannerHtml}
195228
196-> Discipline-score **${a.score} / 100** <span class="report-tile-trend ${arr.cls}">${arr.glyph} ${deltaStr}</span> over ${DEMO_PERIOD}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage **${a.phaseCoveragePct}%**.
229+> Discipline-score **${a.score} / 100** <span class="report-tile-trend ${arr.cls}">${arr.glyph} ${deltaStr}</span> over ${ctx.period}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage **${a.phaseCoveragePct}%**.
197230
198231 ## trend (30 dagen)
199232
@@ -222,21 +255,31 @@ ${recentRows}
222255
223256 ---
224257
225-[← exec summary](/reports/demo) · [back to /reports](/reports)
258+${ctx.footerLinks}
226259 `;
227260 };
228261
229-export const testsOverviewMd = (): string => {
230- const total = DEMO_SNAPSHOTS.reduce((s, r) => s + r.total, 0);
231- const passing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.passing, 0);
232- const failing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.failing, 0);
233- const snapshots = DEMO_SNAPSHOTS.map(snapshotBlock).join("\n");
234- const stabRows = DEMO_STABILITY.map(stabilityRow).join("\n");
262+export const testsOverviewMd = (ctx: TestsOverviewContext): string => {
263+ if (ctx.unavailableNote) {
264+ return `# tests overzicht
265+
266+${ctx.bannerHtml}
267+
268+> ${ctx.unavailableNote}
269+
270+[← exec summary](/reports) · [back to /reports](/reports)
271+`;
272+ }
273+ const total = ctx.snapshots.reduce((s, r) => s + r.total, 0);
274+ const passing = ctx.snapshots.reduce((s, r) => s + r.passing, 0);
275+ const failing = ctx.snapshots.reduce((s, r) => s + r.failing, 0);
276+ const snapshots = ctx.snapshots.map(snapshotBlock).join("\n");
277+ const stabRows = ctx.stability.map(stabilityRow).join("\n");
235278 return `# tests overzicht
236279
237-${mockBanner}
280+${ctx.bannerHtml}
238281
239-> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${DEMO_PERIOD}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen".
282+> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${ctx.period}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen".
240283
241284 ## huidige stand · per repo
242285