syntaxai/tdd.md · commit b91543a

Reports: live data from tdd.md repo via deploy-time git-history snapshot

The /reports/live page now renders real commit data from this repo
instead of demo fixtures. Because syntaxai/tdd.md is a private repo,
the public unauthenticated GitHub commits API returns 404 — so we
sidestep that by snapshotting the local git log into JSON at deploy
time and bundling it with the container.

  scripts/p620/snapshot-git-history.ts
    Bun script: git log -> content/git-history/<owner>__<name>.json
    in the GithubCommit shape that c14_github + c32_real_reports
    already consume.

  scripts/p620/deploy-tdd-md.sh
    Runs the snapshot before rsync so each deploy refreshes the
    bundle. No GitHub token, no rate-limit, no extra secrets.

  src/c14_github.ts (fetchRepoCommits)
    Tries the bundle first; falls back to the public commits API
    only when no bundle is present. Same 5-minute in-memory TTL.

  src/c32_real_reports.ts
    Aggregator: detects agent attribution from Co-Authored-By
    footers, builds AgentReport[] with phase-coverage as the score
    proxy. Commits without a recognised footer are bucketed as
    "unknown" and surfaced separately.

  src/c21_app.ts + src/c51_render_reports.ts
    /reports/live, /reports/live/agents/:slug, /reports/live/tests
    routes wired against the same body builders that render demo,
    via a parameterised ReportsContext / TestsOverviewContext.

  .gitignore
    content/git-history/ is regenerated per deploy, never tracked.

For the dogfood case the entire history fits in 200 commits with
plenty of headroom; if we ever need more, bumping MAX in the
snapshot script is a one-liner.

Co-Authored-By: Claude Opus 4.7 (1M context) <[email protected]>

author: syntaxai <[email protected]>
date: 2026-05-08 16:25:09 +01:00
parent: a6c69dc
commit: b91543ab63a658f3d1e8dc7f9b51aa0945f488b3

7 files changed · +527 −54

modified .gitignore +1 −0

@@ -5,3 +5,4 @@ node_modules/
5	5	.env.local
6	6	.bun-cache/
7	7	.claude/
	8	+content/git-history/

modified scripts/p620/deploy-tdd-md.sh +6 −0

@@ -38,6 +38,12 @@ ssh "$SSH_HOST" 'command -v podman >/dev/null && command -v systemctl >/dev/null
38	38
39	39	need_restart=0
40	40
	41	+echo "→ snapshot git history → content/git-history/"
	42	+# Bundles local git log into JSON so the container can render /reports/live
	43	+# for the (private) syntaxai/tdd.md repo without a GitHub token.
	44	+( cd "$REPO_ROOT" && bun scripts/p620/snapshot-git-history.ts ) \
	45	+ \|\| { echo "✗ snapshot-git-history mislukt"; exit 1; }
	46	+
41	47	echo "→ source rsync naar $SSH_HOST:~/$REMOTE_SRC_DIR"
42	48	ssh "$SSH_HOST" "mkdir -p ~/$REMOTE_SRC_DIR"
43	49	# --delete zodat verwijderde files ook weggaan op remote.

added scripts/p620/snapshot-git-history.ts +56 −0

@@ -0,0 +1,56 @@
	1	+#!/usr/bin/env bun
	2	+// Dump local git history into the same shape that the live-reports
	3	+// pipeline expects from GitHub's commits API. Runs at deploy-time so
	4	+// the container can render /reports/live for a private repo without
	5	+// holding a GitHub token. Each deploy refreshes the bundle.
	6	+//
	7	+// Output: content/git-history/<owner>__<name>.json
	8	+// Schema: GithubCommit[] (see src/c14_github.ts) — newest first.
	9	+
	10	+import { spawnSync } from "node:child_process";
	11	+import { mkdirSync, writeFileSync } from "node:fs";
	12	+import { dirname, resolve } from "node:path";
	13	+
	14	+const REPO_ROOT = resolve(import.meta.dir, "..", "..");
	15	+const OWNER = "syntaxai";
	16	+const NAME = "tdd.md";
	17	+const MAX = 200;
	18	+
	19	+// Use ASCII record separators (\x1e between commits, \x1f between
	20	+// fields) so commit-message newlines pass through unmangled.
	21	+const FMT = ["%H", "%aI", "%an", "%ae", "%B"].join("\x1f") + "\x1e";
	22	+
	23	+const res = spawnSync(
	24	+ "git",
	25	+ ["log", `--max-count=${MAX}`, `--pretty=format:${FMT}`],
	26	+ { cwd: REPO_ROOT, encoding: "utf8", maxBuffer: 64 * 1024 * 1024 },
	27	+);
	28	+if (res.status !== 0) {
	29	+ console.error("git log failed:", res.stderr);
	30	+ process.exit(1);
	31	+}
	32	+
	33	+const records = res.stdout.split("\x1e").map((s) => s.trim()).filter(Boolean);
	34	+const commits = records.map((rec) => {
	35	+ const [sha, date, name, email, ...rest] = rec.split("\x1f");
	36	+ const message = (rest.join("\x1f") ?? "").replace(/\n+$/, "");
	37	+ return {
	38	+ sha: sha ?? "",
	39	+ commit: {
	40	+ author: {
	41	+ name: name ?? "",
	42	+ email: email ?? "",
	43	+ date: date ?? "",
	44	+ },
	45	+ message,
	46	+ },
	47	+ author: null,
	48	+ };
	49	+});
	50	+
	51	+const outDir = resolve(REPO_ROOT, "content", "git-history");
	52	+const outPath = resolve(outDir, `${OWNER}__${NAME}.json`);
	53	+mkdirSync(dirname(outPath), { recursive: true });
	54	+writeFileSync(outPath, JSON.stringify({ owner: OWNER, name: NAME, fetchedAt: Date.now(), commits }, null, 2));
	55	+
	56	+console.log(`✓ wrote ${commits.length} commits → ${outPath}`);

modified src/c14_github.ts +86 −0

@@ -120,3 +120,89 @@ export const fetchProjectConfig = async (
120	120	}
121	121	return parseProjectConfig(parsed);
122	122	};
	123	+
	124	+// ---------------------------------------------------------------------
	125	+// Public commits API. Used to feed the live reports view from real
	126	+// data. Public-repo only; unauthenticated calls are rate-limited to
	127	+// 60/hour, so we cache aggressively. Single in-memory cache per
	128	+// (owner, repo) with a 5-minute TTL — enough for casual page-loads,
	129	+// not so long that pushed commits stay invisible.
	130	+// ---------------------------------------------------------------------
	131	+
	132	+export interface GithubCommit {
	133	+ sha: string;
	134	+ commit: {
	135	+ author: { name: string; email: string; date: string };
	136	+ message: string;
	137	+ };
	138	+ author: { login: string } \| null;
	139	+}
	140	+
	141	+const COMMITS_TTL_MS = 5 * 60 * 1000;
	142	+const commitsCache = new Map<string, { fetchedAt: number; commits: GithubCommit[] }>();
	143	+
	144	+// Deploy-time snapshot: scripts/p620/snapshot-git-history.ts dumps the
	145	+// local git log into content/git-history/<owner>__<name>.json so the
	146	+// container can serve /reports/live for a private repo without a
	147	+// GitHub token. Bundle is preferred when present; we fall back to the
	148	+// public API for any repo we don't bundle.
	149	+const bundlePath = (repoOwner: string, repoName: string): string =>
	150	+ `./content/git-history/${repoOwner}__${repoName}.json`;
	151	+
	152	+interface GitHistoryBundle {
	153	+ owner: string;
	154	+ name: string;
	155	+ fetchedAt: number;
	156	+ commits: GithubCommit[];
	157	+}
	158	+
	159	+const loadBundle = async (
	160	+ repoOwner: string,
	161	+ repoName: string,
	162	+): Promise<GithubCommit[] \| null> => {
	163	+ try {
	164	+ const file = Bun.file(bundlePath(repoOwner, repoName));
	165	+ if (!(await file.exists())) return null;
	166	+ const data = (await file.json()) as GitHistoryBundle;
	167	+ return Array.isArray(data.commits) ? data.commits : null;
	168	+ } catch {
	169	+ return null;
	170	+ }
	171	+};
	172	+
	173	+export const fetchRepoCommits = async (
	174	+ repoOwner: string,
	175	+ repoName: string,
	176	+ perPage = 100,
	177	+): Promise<GithubCommit[]> => {
	178	+ const key = `${repoOwner}/${repoName}#${perPage}`;
	179	+ const cached = commitsCache.get(key);
	180	+ if (cached && Date.now() - cached.fetchedAt < COMMITS_TTL_MS) {
	181	+ return cached.commits;
	182	+ }
	183	+
	184	+ const bundle = await loadBundle(repoOwner, repoName);
	185	+ if (bundle) {
	186	+ const sliced = bundle.slice(0, perPage);
	187	+ commitsCache.set(key, { fetchedAt: Date.now(), commits: sliced });
	188	+ return sliced;
	189	+ }
	190	+
	191	+ const url = `https://api.github.com/repos/${encodeURIComponent(repoOwner)}/${encodeURIComponent(repoName)}/commits?per_page=${perPage}`;
	192	+ const res = await fetch(url, {
	193	+ headers: {
	194	+ Accept: "application/vnd.github+json",
	195	+ "User-Agent": "tdd.md",
	196	+ },
	197	+ });
	198	+ if (!res.ok) {
	199	+ // Honour the cache on transient failure rather than blanking the page —
	200	+ // GitHub's 60/hour anonymous rate limit is the most likely cause and
	201	+ // the cached data is still strictly better than no data.
	202	+ if (cached) return cached.commits;
	203	+ throw new Error(`GitHub commits API failed for ${repoOwner}/${repoName}: HTTP ${res.status}`);
	204	+ }
	205	+ const commits = (await res.json()) as GithubCommit[];
	206	+ commitsCache.set(key, { fetchedAt: Date.now(), commits });
	207	+ return commits;
	208	+};

modified src/c21_app.ts +115 −4

@@ -27,7 +27,15 @@ import { fetchProjectConfig } from "./c14_github.ts";
27	27	import { listGames, loadGame } from "./c31_games.ts";
28	28	import { ALL_POSTS } from "./c31_blog.ts";
29	29	import { ALL_GUIDES } from "./c31_guides.ts";
30		-import { DEMO_REPORTS } from "./c31_reports_demo.ts";
	30	+import {
	31	+ DEMO_REPORTS,
	32	+ DEMO_PERIOD,
	33	+ DEMO_ORG,
	34	+ DEMO_REPOS,
	35	+ DEMO_SNAPSHOTS,
	36	+ DEMO_STABILITY,
	37	+} from "./c31_reports_demo.ts";
	38	+import { buildLiveReports } from "./c32_real_reports.ts";
31	39	import { parseRepoIdentifier } from "./c31_project_config.ts";
32	40	import { judge } from "./c32_judge.ts";
33	41	import {
@@ -49,6 +57,54 @@ import { startGithubOauth, handleGithubCallback } from "./c21_handlers_auth.ts";
49	57	const HOME_MD = "./content/home.md";
50	58	const GAME_DIR = "./content/games";
51	59
	60	+// ---------------------------------------------------------------------
	61	+// Reports-context builders. The c51 builders take a ReportsContext —
	62	+// these tiny helpers assemble it for the synthetic /reports/demo and
	63	+// the live /reports/live (real data fetched from syntaxai/tdd.md).
	64	+// ---------------------------------------------------------------------
	65	+
	66	+const LIVE_REPO_OWNER = "syntaxai";
	67	+const LIVE_REPO_NAME = "tdd.md";
	68	+const LIVE_FETCH_COUNT = 100;
	69	+
	70	+const DEMO_BANNER_HTML = `<div class="report-mockup-banner">demo data — design preview with synthetic numbers. Want the real readout? <a href="/reports/live">/reports/live</a> renders the same shape from live tdd.md commits. <a href="/blog/tweag-handbook-tdd">why tdd.md needs this</a></div>`;
	71	+
	72	+const LIVE_BANNER_HTML = `<div class="report-mockup-banner">live data — sourced from <a href="https://github.com/${LIVE_REPO_OWNER}/${LIVE_REPO_NAME}">${LIVE_REPO_OWNER}/${LIVE_REPO_NAME}</a> via the public commits API (5-min cache). Agent attribution comes from <code>Co-Authored-By:</code> footers; commits without one are excluded. Phase coverage measures % of commits tagged <code>red:/green:/refactor:</code>.</div>`;
	73	+
	74	+const demoContext = () => ({
	75	+ reports: DEMO_REPORTS,
	76	+ period: DEMO_PERIOD,
	77	+ scopeLabel: `${DEMO_REPOS} repos · ${DEMO_ORG}`,
	78	+ bannerHtml: DEMO_BANNER_HTML,
	79	+ narrative: {
	80	+ changedHeading: "wat veranderde dit kwartaal",
	81	+ changedBody:
	82	+ "Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de `api-gateway` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf.",
	83	+ doingHeading: "wat we doen",
	84	+ doingBody:
	85	+ "- Cursor in `api-gateway`: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel \"never delete a test in a refactor commit\" gepind ([details →](/reports/demo/agents/cursor)).\n- Claude Code uitrollen: het CLAUDE.md-template dat in `billing-service` werkte naar de andere drie repos kopiëren.\n- Volgende meting: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt.",
	86	+ },
	87	+ footerLinks:
	88	+ "[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports)",
	89	+});
	90	+
	91	+const liveContext = async () => {
	92	+ const live = await buildLiveReports(LIVE_REPO_OWNER, LIVE_REPO_NAME, LIVE_FETCH_COUNT);
	93	+ const period = live.earliest && live.latest
	94	+ ? `${live.earliest.slice(0, 10)} → ${live.latest.slice(0, 10)}`
	95	+ : "no commits fetched";
	96	+ const drillLinks = live.reports
	97	+ .map((r) => `[${r.name}](/reports/live/agents/${r.slug})`)
	98	+ .join(" · ");
	99	+ return {
	100	+ reports: live.reports,
	101	+ period,
	102	+ scopeLabel: `${LIVE_REPO_OWNER}/${LIVE_REPO_NAME} · ${live.totalCommits} commits sampled${live.unknownCount > 0 ? ` (${live.unknownCount} unattributed, excluded)` : ""}`,
	103	+ bannerHtml: LIVE_BANNER_HTML,
	104	+ footerLinks: `${drillLinks ? drillLinks + " · " : ""}[tests overzicht](/reports/live/tests) · [demo preview](/reports/demo) · [back to /reports](/reports)`,
	105	+ };
	106	+};
	107	+
52	108	const HOME_DESCRIPTION =
53	109	"Test-driven development for agentic coding. Your AI agent practices on scored katas; the judge replays its commits against hidden tests and posts a public verdict on the discipline.";
54	110
@@ -417,10 +473,11 @@ ${rows}
417	473	},
418	474
419	475	"/reports/demo": async () => {
	476	+ const ctx = demoContext();
420	477	const html = await renderPage({
421	478	title: "TDD-discipline rapport · Q1 2026 (demo) — tdd.md",
422	479	description: "Mockup of the management-level TDD-discipline report — single page, three agents, with trend and narrative.",
423		- bodyMarkdown: execSummaryMd(),
	480	+ bodyMarkdown: execSummaryMd(ctx),
424	481	ogPath: "https://tdd.md/reports/demo",
425	482	noindex: true,
426	483	});
@@ -431,7 +488,12 @@ ${rows}
431	488	const html = await renderPage({
432	489	title: "Tests overzicht (demo) — tdd.md",
433	490	description: "Mockup of the per-test overview: current pass/fail snapshot per repo plus test stability over the quarter.",
434		- bodyMarkdown: testsOverviewMd(),
	491	+ bodyMarkdown: testsOverviewMd({
	492	+ period: DEMO_PERIOD,
	493	+ bannerHtml: DEMO_BANNER_HTML,
	494	+ snapshots: DEMO_SNAPSHOTS,
	495	+ stability: DEMO_STABILITY,
	496	+ }),
435	497	ogPath: "https://tdd.md/reports/demo/tests",
436	498	noindex: true,
437	499	});
@@ -440,7 +502,8 @@ ${rows}
440	502
441	503	"/reports/demo/agents/:slug": async (req) => {
442	504	const slug = req.params.slug as (typeof DEMO_REPORTS)[number]["slug"];
443		- const md = agentDrilldownMd(slug);
	505	+ const ctx = demoContext();
	506	+ const md = agentDrilldownMd(slug, ctx);
444	507	if (!md) {
445	508	const html = await renderNotFound(`/reports/demo/agents/${slug}`);
446	509	return htmlResponse(html, 404);
@@ -456,6 +519,54 @@ ${rows}
456	519	return htmlResponse(html);
457	520	},
458	521
	522	+ "/reports/live": async () => {
	523	+ const ctx = await liveContext();
	524	+ const html = await renderPage({
	525	+ title: "TDD-discipline rapport · live — tdd.md",
	526	+ description: `Live discipline rapport gebouwd uit de echte commit-historie van syntaxai/tdd.md (laatste ${LIVE_FETCH_COUNT} commits, 5-min cache).`,
	527	+ bodyMarkdown: execSummaryMd(ctx),
	528	+ ogPath: "https://tdd.md/reports/live",
	529	+ noindex: true,
	530	+ });
	531	+ return htmlResponse(html);
	532	+ },
	533	+
	534	+ "/reports/live/tests": async () => {
	535	+ const html = await renderPage({
	536	+ title: "Tests overzicht · live (placeholder) — tdd.md",
	537	+ description: "Placeholder voor de live test-overview — wacht op de sandbox-runner sliver.",
	538	+ bodyMarkdown: testsOverviewMd({
	539	+ period: "live",
	540	+ bannerHtml: LIVE_BANNER_HTML,
	541	+ snapshots: [],
	542	+ stability: [],
	543	+ unavailableNote: "De per-repo test-snapshot en stabiliteitstabel hebben de sandbox-runner sliver nodig (block 1 vervolg). Tot dat klaar is, alleen de exec-summary + drill-down draaien op echte data; de testpagina staat in de [demo](/reports/demo/tests).",
	544	+ }),
	545	+ ogPath: "https://tdd.md/reports/live/tests",
	546	+ noindex: true,
	547	+ });
	548	+ return htmlResponse(html);
	549	+ },
	550	+
	551	+ "/reports/live/agents/:slug": async (req) => {
	552	+ const ctx = await liveContext();
	553	+ const slug = req.params.slug as (typeof DEMO_REPORTS)[number]["slug"];
	554	+ const md = agentDrilldownMd(slug, ctx);
	555	+ if (!md) {
	556	+ const html = await renderNotFound(`/reports/live/agents/${slug}`);
	557	+ return htmlResponse(html, 404);
	558	+ }
	559	+ const entry = ctx.reports.find((r) => r.slug === slug)!;
	560	+ const html = await renderPage({
	561	+ title: `${entry.name} drill-down · live — tdd.md`,
	562	+ description: `Live drill-down voor ${entry.name} op syntaxai/tdd.md — trend, failure-mode breakdown, recent commits.`,
	563	+ bodyMarkdown: md,
	564	+ ogPath: `https://tdd.md/reports/live/agents/${slug}`,
	565	+ noindex: true,
	566	+ });
	567	+ return htmlResponse(html);
	568	+ },
	569	+
459	570	"/guides": async () => {
460	571	const rows = ALL_GUIDES
461	572	.map((g) => `\| [${g.title}](/guides/${g.slug}) \| ${g.description} \|`)

added src/c32_real_reports.ts +170 −0

@@ -0,0 +1,170 @@
	1	+// c32 — logic: aggregate real GitHub commit history into the same
	2	+// AgentReport / RecentFlagged shape that c51_render_reports renders.
	3	+// Pure (given fetched commits in, produces report objects out); the
	4	+// I/O happens in c14_github.fetchRepoCommits which we call here.
	5	+//
	6	+// Attribution: Co-Authored-By footers are the agent-attribution channel
	7	+// the existing tdd.md commit history already uses. Anything without a
	8	+// recognised footer is bucketed as "unknown" and reported separately —
	9	+// it's still useful for volume context.
	10	+
	11	+import { parseCommit } from "./c31_commits.ts";
	12	+import { fetchRepoCommits, type GithubCommit } from "./c14_github.ts";
	13	+import type {
	14	+ AgentReport,
	15	+ FailureSlice,
	16	+ RecentFlagged,
	17	+} from "./c31_reports_demo.ts";
	18	+
	19	+type LiveAgentSlug = AgentReport["slug"] \| "unknown";
	20	+
	21	+const detectAgent = (msg: string): LiveAgentSlug => {
	22	+ if (/Co-Authored-By:.*Claude/i.test(msg)) return "claude-code";
	23	+ if (/Co-Authored-By:.*Cursor/i.test(msg)) return "cursor";
	24	+ if (/Co-Authored-By:.*Aider/i.test(msg)) return "aider";
	25	+ return "unknown";
	26	+};
	27	+
	28	+const AGENT_NAMES: Record<AgentReport["slug"], string> = {
	29	+ "claude-code": "Claude Code",
	30	+ cursor: "Cursor",
	31	+ aider: "Aider",
	32	+};
	33	+
	34	+// 30-day daily commit-count series, oldest → newest. When there are no
	35	+// commits in a day, that day's value is 0 — the sparkline still renders
	36	+// but flat-lines, which honestly reflects the data.
	37	+const buildTrend = (commits: GithubCommit[], days = 30): number[] => {
	38	+ const out = new Array<number>(days).fill(0);
	39	+ const today = new Date();
	40	+ today.setUTCHours(0, 0, 0, 0);
	41	+ for (const c of commits) {
	42	+ const d = new Date(c.commit.author.date);
	43	+ d.setUTCHours(0, 0, 0, 0);
	44	+ const ageDays = Math.floor((today.getTime() - d.getTime()) / (24 * 60 * 60 * 1000));
	45	+ if (ageDays < 0 \|\| ageDays >= days) continue;
	46	+ const idx = days - 1 - ageDays;
	47	+ const cur = out[idx] ?? 0;
	48	+ out[idx] = cur + 1;
	49	+ }
	50	+ return out;
	51	+};
	52	+
	53	+const buildAgentReport = (
	54	+ slug: AgentReport["slug"],
	55	+ agentCommits: GithubCommit[],
	56	+ repoSlug: string,
	57	+): AgentReport => {
	58	+ const tagged = agentCommits.filter((c) => {
	59	+ const phase = parseCommit(c.commit.message).phase;
	60	+ return phase === "red" \|\| phase === "green" \|\| phase === "refactor";
	61	+ });
	62	+ const phaseCoveragePct = agentCommits.length === 0
	63	+ ? 0
	64	+ : Math.round((tagged.length / agentCommits.length) * 100);
	65	+
	66	+ // Score is a proxy: phase-coverage is the only structural signal we
	67	+ // can compute without running the test suite. When coverage is 0 the
	68	+ // agent isn't attempting TDD, so the score is honestly low.
	69	+ const score = phaseCoveragePct;
	70	+
	71	+ // Failure mix collapses to two slices for live data — phase-tagged vs
	72	+ // not. Fine-grained failure modes (red-did-not-fail, test-deleted, etc)
	73	+ // need the runner sliver before they're computable.
	74	+ const failureMix: FailureSlice[] = [
	75	+ { label: "phase-tagged", pct: phaseCoveragePct, tone: "green" },
	76	+ { label: "no phase tag", pct: 100 - phaseCoveragePct, tone: "muted" },
	77	+ ];
	78	+
	79	+ const recent: RecentFlagged[] = agentCommits
	80	+ .slice(0, 5)
	81	+ .map((c) => {
	82	+ const parsed = parseCommit(c.commit.message);
	83	+ const phase = parsed.phase === "red" \|\| parsed.phase === "green" \|\| parsed.phase === "refactor"
	84	+ ? parsed.phase
	85	+ : "green";
	86	+ const failure = parsed.phase === "untagged" \|\| parsed.phase === "init"
	87	+ ? "no phase tag"
	88	+ : `${parsed.phase} (live judge not yet wired)`;
	89	+ return {
	90	+ date: c.commit.author.date.slice(0, 10),
	91	+ repo: repoSlug,
	92	+ sha: c.sha.slice(0, 7),
	93	+ phase,
	94	+ failure,
	95	+ pts: 0,
	96	+ };
	97	+ });
	98	+
	99	+ const topIssueLabel = phaseCoveragePct === 100 ? "no current issues" : "no phase tag";
	100	+ const topIssuePct = 100 - phaseCoveragePct;
	101	+
	102	+ return {
	103	+ slug,
	104	+ name: AGENT_NAMES[slug],
	105	+ score,
	106	+ delta: 0,
	107	+ commits: agentCommits.length,
	108	+ phaseCoveragePct,
	109	+ streak: 0,
	110	+ streakBroken: false,
	111	+ topIssueLabel,
	112	+ topIssuePct,
	113	+ failureMix,
	114	+ trend: buildTrend(agentCommits),
	115	+ recent,
	116	+ };
	117	+};
	118	+
	119	+export interface LiveReports {
	120	+ reports: AgentReport[];
	121	+ unknownCount: number;
	122	+ totalCommits: number;
	123	+ earliest: string \| null;
	124	+ latest: string \| null;
	125	+ fetchedAt: number;
	126	+}
	127	+
	128	+export const buildLiveReports = async (
	129	+ repoOwner: string,
	130	+ repoName: string,
	131	+ perPage = 100,
	132	+): Promise<LiveReports> => {
	133	+ const commits = await fetchRepoCommits(repoOwner, repoName, perPage);
	134	+ const repoSlug = `${repoOwner}/${repoName}`;
	135	+ const byAgent = new Map<AgentReport["slug"], GithubCommit[]>();
	136	+ let unknownCount = 0;
	137	+
	138	+ for (const c of commits) {
	139	+ const a = detectAgent(c.commit.message);
	140	+ if (a === "unknown") {
	141	+ unknownCount++;
	142	+ continue;
	143	+ }
	144	+ const arr = byAgent.get(a) ?? [];
	145	+ arr.push(c);
	146	+ byAgent.set(a, arr);
	147	+ }
	148	+
	149	+ const order: AgentReport["slug"][] = ["claude-code", "cursor", "aider"];
	150	+ const reports = order
	151	+ .map((slug) => {
	152	+ const list = byAgent.get(slug);
	153	+ if (!list \|\| list.length === 0) return null;
	154	+ return buildAgentReport(slug, list, repoSlug);
	155	+ })
	156	+ .filter((r): r is AgentReport => r !== null);
	157	+
	158	+ const dates = commits.map((c) => c.commit.author.date).sort();
	159	+ const earliest = dates[0] ?? null;
	160	+ const latest = dates[dates.length - 1] ?? null;
	161	+
	162	+ return {
	163	+ reports,
	164	+ unknownCount,
	165	+ totalCommits: commits.length,
	166	+ earliest,
	167	+ latest,
	168	+ fetchedAt: Date.now(),
	169	+ };
	170	+};

modified src/c51_render_reports.ts +93 −50

@@ -1,14 +1,11 @@
1	1	// c51 (reports) — body builders for /reports, /reports/demo,
2		-// /reports/demo/agents/:slug, /reports/demo/tests. All synthetic data
3		-// comes from c31_reports_demo; chrome helpers come from c51_render_layout.
	2	+// /reports/live, /reports/demo/agents/:slug, /reports/demo/tests. The
	3	+// builders take the dataset as an explicit ReportsContext so the same
	4	+// markdown templates serve both the synthetic demo (DEMO_* from
	5	+// c31_reports_demo) and the live tdd.md aggregation (c32_real_reports).
4	6
5	7	import {
6		- DEMO_PERIOD,
7		- DEMO_ORG,
8		- DEMO_REPOS,
9	8	DEMO_REPORTS,
10		- DEMO_SNAPSHOTS,
11		- DEMO_STABILITY,
12	9	type AgentReport,
13	10	type FailureSlice,
14	11	type TestSnapshot,
@@ -16,6 +13,33 @@ import {
16	13	} from "./c31_reports_demo.ts";
17	14	import { escape } from "./c51_render_layout.ts";
18	15
	16	+export interface ReportsContext {
	17	+ reports: AgentReport[];
	18	+ period: string;
	19	+ scopeLabel: string;
	20	+ bannerHtml: string;
	21	+ // Optional narrative — present for the curated demo, omitted for live
	22	+ // where the data has to speak for itself.
	23	+ narrative?: {
	24	+ changedHeading: string;
	25	+ changedBody: string;
	26	+ doingHeading: string;
	27	+ doingBody: string;
	28	+ };
	29	+ // Trailing footer line (links). Defaults reasonable for both demo + live.
	30	+ footerLinks: string;
	31	+}
	32	+
	33	+export interface TestsOverviewContext {
	34	+ period: string;
	35	+ bannerHtml: string;
	36	+ snapshots: TestSnapshot[];
	37	+ stability: TestStability[];
	38	+ // When the runner sliver isn't wired (live mode, today), pass a
	39	+ // placeholder note instead of the snapshot+stability sections.
	40	+ unavailableNote?: string;
	41	+}
	42	+
19	43	const trendArrow = (delta: number): { glyph: string; cls: string } =>
20	44	delta > 0 ? { glyph: "↑", cls: "up" } : delta < 0 ? { glyph: "↓", cls: "down" } : { glyph: "→", cls: "flat" };
21	45
@@ -70,8 +94,6 @@ const streakBox = (a: AgentReport): string => {
70	94	return `<span class="report-streak ${cls}"><span class="report-streak-num">${a.streak}</span> ${label}</span>`;
71	95	};
72	96
73		-const mockBanner = `<div class="report-mockup-banner">demo data — real reporting wires up when the project-tracking pipeline ships. <a href="/blog/tweag-handbook-tdd">why tdd.md needs this</a> · <a href="/reports">about reporting</a></div>`;
74		-
75	97	const snapshotBlock = (s: TestSnapshot): string => {
76	98	const failuresHtml = s.failures.length === 0
77	99	? `<li class="test-list-pass">all ${s.passing} tests groen</li>`
@@ -113,13 +135,16 @@ export const reportsLandingMd = (): string => `# reports
113	135
114	136	> Per-agent TDD-discipline reporting over real project repos. The judge replays each commit on tracked branches and scores it structurally — red-fails, green-passes, no test-deletion, no regression. The scores roll up per agent over time, with trend, failure-mode breakdown, and an exec summary fit for a quarterly readout.
115	137
116		-This is a design preview. The pipeline that ingests real repos isn't wired yet; what you can navigate today is a mockup with synthetic data:
	138	+Two views of the same shape:
	139	+
	140	+- [/reports/live](/reports/live) — built from real commit data on \`syntaxai/tdd.md\` (the repo this site runs on), refreshed every 5 minutes from the GitHub commits API. Agent attribution comes from \`Co-Authored-By:\` footers. Phase-coverage is the only metric we can compute without running tests, so the score is a proxy for now.
	141	+- [/reports/demo](/reports/demo) — the polished design preview with synthetic data for three agents and four repos. Useful for screenshots and showing the full failure-mode breakdown the live view can't compute yet.
117	142
118		-- [exec summary mockup →](/reports/demo) — single page, 1 quarter, 3 agents
119		-- [per-agent drill-down →](/reports/demo/agents/cursor) — trend, failure mix, recent flagged commits
120		-- [tests overzicht →](/reports/demo/tests) — huidige stand per repo + test-stabiliteit per test-naam
	143	+Drill-downs:
	144	+- [live drill-down per agent](/reports/live/agents/claude-code) · [tests overzicht (live: placeholder)](/reports/live/tests)
	145	+- [demo drill-down per agent](/reports/demo/agents/cursor) · [tests overzicht (demo)](/reports/demo/tests)
121	146
122		-Want a real repo on this layer? [Register a project →](/projects) — drops \`.tdd-md.json\` at the repo root, onboards in seconds. Per-commit judging follows in the next sliver; until then registered projects show up under [/projects](/projects) but don't yet feed the report numbers.
	147	+Want a real repo on this layer? [Register a project →](/projects) — drops \`.tdd-md.json\` at the repo root, onboards in seconds. Per-commit judging on tracked branches lands in a follow-up sliver; live reporting from the GitHub API already works for the dogfood case (the tdd.md repo itself).
123	148
124	149	## what gets measured
125	150
@@ -145,55 +170,63 @@ For team-leads:
145	170	[← back to tdd.md](/) · [the blog](/blog) · [the katas](/games)
146	171	`;
147	172
148		-export const execSummaryMd = (): string => {
149		- const totalCommits = DEMO_REPORTS.reduce((s, a) => s + a.commits, 0);
150		- const tiles = DEMO_REPORTS.map(tile).join("\n");
151		- return `# tdd-discipline rapport · q1 2026
	173	+export const execSummaryMd = (ctx: ReportsContext): string => {
	174	+ const totalCommits = ctx.reports.reduce((s, a) => s + a.commits, 0);
	175	+ const tiles = ctx.reports.length === 0
	176	+ ? `<div class="report-tile-empty">No agent-attributed commits in this dataset.</div>`
	177	+ : ctx.reports.map(tile).join("\n");
	178	+ const narrativeBlock = ctx.narrative
	179	+ ? `## ${ctx.narrative.changedHeading}
152	180
153		-${mockBanner}
	181	+${ctx.narrative.changedBody}
154	182
155		-> Periode ${DEMO_PERIOD} · Scope ${DEMO_REPOS} repos · ${totalCommits.toLocaleString()} AI-toegeschreven commits in ${escape(DEMO_ORG)}.
	183	+## ${ctx.narrative.doingHeading}
156	184
157		-<div class="report-tiles">
158		-${tiles}
159		-</div>
	185	+${ctx.narrative.doingBody}
160	186
161		-## wat veranderde dit kwartaal
	187	+`
	188	+ : "";
	189	+ return `# tdd-discipline rapport · ${ctx.period}
162	190
163		-Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de \`api-gateway\` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf.
	191	+${ctx.bannerHtml}
164	192
165		-## wat we doen
	193	+> Periode ${ctx.period} · Scope ${escape(ctx.scopeLabel)} · ${totalCommits.toLocaleString()} AI-toegeschreven commits.
166	194
167		-- Cursor in \`api-gateway\`: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel "never delete a test in a refactor commit" gepind ([details →](/reports/demo/agents/cursor)).
168		-- Claude Code uitrollen: het CLAUDE.md-template dat in \`billing-service\` werkte naar de andere drie repos kopiëren.
169		-- Volgende meting: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt.
	195	+<div class="report-tiles">
	196	+${tiles}
	197	+</div>
170	198
171		-## wat dit getal niet meet
	199	+${narrativeBlock}## wat dit getal niet meet
172	200
173	201	Discipline, niet code-kwaliteit. Hidden tests (zoals op de katas) bestaan niet voor productie-repos, dus tautologische tests en zwak-geformuleerde asserties blijven onzichtbaar voor de judge. Dit cijfer zegt: "de agent volgt de TDD-cyclus eerlijk". Het zegt niets over of de tests die hij schrijft het juiste beweren. Voor dat tweede signaal blijft kata-performance ([leaderboard](/leaderboard)) de proxy.
174	202
175	203	---
176	204
177		-[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports)
	205	+${ctx.footerLinks}
178	206	`;
179	207	};
180	208
181		-export const agentDrilldownMd = (slug: AgentReport["slug"]): string \| null => {
182		- const a = DEMO_REPORTS.find((r) => r.slug === slug);
	209	+export const agentDrilldownMd = (
	210	+ slug: AgentReport["slug"],
	211	+ ctx: ReportsContext,
	212	+): string \| null => {
	213	+ const a = ctx.reports.find((r) => r.slug === slug);
183	214	if (!a) return null;
184	215	const arr = trendArrow(a.delta);
185	216	const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`;
186		- const recentRows = a.recent
187		- .map(
188		- (r) =>
189		- `\| ${r.date} \| \`${r.repo}\` \| \`${r.sha}\` \| ${r.phase} \| ${r.failure} \| ${r.pts} \|`,
190		- )
191		- .join("\n");
	217	+ const recentRows = a.recent.length === 0
	218	+ ? `\| _no recent attributed activity_ \| \| \| \| \| \|`
	219	+ : a.recent
	220	+ .map(
	221	+ (r) =>
	222	+ `\| ${r.date} \| \`${r.repo}\` \| \`${r.sha}\` \| ${r.phase} \| ${r.failure} \| ${r.pts} \|`,
	223	+ )
	224	+ .join("\n");
192	225	return `# ${a.name} · drill-down
193	226
194		-${mockBanner}
	227	+${ctx.bannerHtml}
195	228
196		-> Discipline-score ${a.score} / 100 <span class="report-tile-trend ${arr.cls}">${arr.glyph} ${deltaStr}</span> over ${DEMO_PERIOD}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage ${a.phaseCoveragePct}%.
	229	+> Discipline-score ${a.score} / 100 <span class="report-tile-trend ${arr.cls}">${arr.glyph} ${deltaStr}</span> over ${ctx.period}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage ${a.phaseCoveragePct}%.
197	230
198	231	## trend (30 dagen)
199	232
@@ -222,21 +255,31 @@ ${recentRows}
222	255
223	256	---
224	257
225		-[← exec summary](/reports/demo) · [back to /reports](/reports)
	258	+${ctx.footerLinks}
226	259	`;
227	260	};
228	261
229		-export const testsOverviewMd = (): string => {
230		- const total = DEMO_SNAPSHOTS.reduce((s, r) => s + r.total, 0);
231		- const passing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.passing, 0);
232		- const failing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.failing, 0);
233		- const snapshots = DEMO_SNAPSHOTS.map(snapshotBlock).join("\n");
234		- const stabRows = DEMO_STABILITY.map(stabilityRow).join("\n");
	262	+export const testsOverviewMd = (ctx: TestsOverviewContext): string => {
	263	+ if (ctx.unavailableNote) {
	264	+ return `# tests overzicht
	265	+
	266	+${ctx.bannerHtml}
	267	+
	268	+> ${ctx.unavailableNote}
	269	+
	270	+[← exec summary](/reports) · [back to /reports](/reports)
	271	+`;
	272	+ }
	273	+ const total = ctx.snapshots.reduce((s, r) => s + r.total, 0);
	274	+ const passing = ctx.snapshots.reduce((s, r) => s + r.passing, 0);
	275	+ const failing = ctx.snapshots.reduce((s, r) => s + r.failing, 0);
	276	+ const snapshots = ctx.snapshots.map(snapshotBlock).join("\n");
	277	+ const stabRows = ctx.stability.map(stabilityRow).join("\n");
235	278	return `# tests overzicht
236	279
237		-${mockBanner}
	280	+${ctx.bannerHtml}
238	281
239		-> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${DEMO_PERIOD}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen".
	282	+> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${ctx.period}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen".
240	283
241	284	## huidige stand · per repo
242	285

raw .diff