Reports: live data from tdd.md repo via deploy-time git-history snapshot
The /reports/live page now renders real commit data from this repo
instead of demo fixtures. Because syntaxai/tdd.md is a private repo,
the public unauthenticated GitHub commits API returns 404 — so we
sidestep that by snapshotting the local git log into JSON at deploy
time and bundling it with the container.
scripts/p620/snapshot-git-history.ts
Bun script: git log -> content/git-history/<owner>__<name>.json
in the GithubCommit shape that c14_github + c32_real_reports
already consume.
scripts/p620/deploy-tdd-md.sh
Runs the snapshot before rsync so each deploy refreshes the
bundle. No GitHub token, no rate-limit, no extra secrets.
src/c14_github.ts (fetchRepoCommits)
Tries the bundle first; falls back to the public commits API
only when no bundle is present. Same 5-minute in-memory TTL.
src/c32_real_reports.ts
Aggregator: detects agent attribution from Co-Authored-By
footers, builds AgentReport[] with phase-coverage as the score
proxy. Commits without a recognised footer are bucketed as
"unknown" and surfaced separately.
src/c21_app.ts + src/c51_render_reports.ts
/reports/live, /reports/live/agents/:slug, /reports/live/tests
routes wired against the same body builders that render demo,
via a parameterised ReportsContext / TestsOverviewContext.
.gitignore
content/git-history/ is regenerated per deploy, never tracked.
For the dogfood case the entire history fits in 200 commits with
plenty of headroom; if we ever need more, bumping MAX in the
snapshot script is a one-liner.
Co-Authored-By: Claude Opus 4.7 (1M context) <[email protected]>
7 files changed · +527 −54
.gitignore
+1
−0
| @@ -5,3 +5,4 @@ node_modules/ | ||
| 5 | 5 | .env.local |
| 6 | 6 | .bun-cache/ |
| 7 | 7 | .claude/ |
| 8 | +content/git-history/ | |
scripts/p620/deploy-tdd-md.sh
+6
−0
| @@ -38,6 +38,12 @@ ssh "$SSH_HOST" 'command -v podman >/dev/null && command -v systemctl >/dev/null | ||
| 38 | 38 | |
| 39 | 39 | need_restart=0 |
| 40 | 40 | |
| 41 | +echo "→ snapshot git history → content/git-history/" | |
| 42 | +# Bundles local git log into JSON so the container can render /reports/live | |
| 43 | +# for the (private) syntaxai/tdd.md repo without a GitHub token. | |
| 44 | +( cd "$REPO_ROOT" && bun scripts/p620/snapshot-git-history.ts ) \ | |
| 45 | + || { echo "✗ snapshot-git-history mislukt"; exit 1; } | |
| 46 | + | |
| 41 | 47 | echo "→ source rsync naar $SSH_HOST:~/$REMOTE_SRC_DIR" |
| 42 | 48 | ssh "$SSH_HOST" "mkdir -p ~/$REMOTE_SRC_DIR" |
| 43 | 49 | # --delete zodat verwijderde files ook weggaan op remote. |
scripts/p620/snapshot-git-history.ts
+56
−0
| @@ -0,0 +1,56 @@ | ||
| 1 | +#!/usr/bin/env bun | |
| 2 | +// Dump local git history into the same shape that the live-reports | |
| 3 | +// pipeline expects from GitHub's commits API. Runs at deploy-time so | |
| 4 | +// the container can render /reports/live for a private repo without | |
| 5 | +// holding a GitHub token. Each deploy refreshes the bundle. | |
| 6 | +// | |
| 7 | +// Output: content/git-history/<owner>__<name>.json | |
| 8 | +// Schema: GithubCommit[] (see src/c14_github.ts) — newest first. | |
| 9 | + | |
| 10 | +import { spawnSync } from "node:child_process"; | |
| 11 | +import { mkdirSync, writeFileSync } from "node:fs"; | |
| 12 | +import { dirname, resolve } from "node:path"; | |
| 13 | + | |
| 14 | +const REPO_ROOT = resolve(import.meta.dir, "..", ".."); | |
| 15 | +const OWNER = "syntaxai"; | |
| 16 | +const NAME = "tdd.md"; | |
| 17 | +const MAX = 200; | |
| 18 | + | |
| 19 | +// Use ASCII record separators (\x1e between commits, \x1f between | |
| 20 | +// fields) so commit-message newlines pass through unmangled. | |
| 21 | +const FMT = ["%H", "%aI", "%an", "%ae", "%B"].join("\x1f") + "\x1e"; | |
| 22 | + | |
| 23 | +const res = spawnSync( | |
| 24 | + "git", | |
| 25 | + ["log", `--max-count=${MAX}`, `--pretty=format:${FMT}`], | |
| 26 | + { cwd: REPO_ROOT, encoding: "utf8", maxBuffer: 64 * 1024 * 1024 }, | |
| 27 | +); | |
| 28 | +if (res.status !== 0) { | |
| 29 | + console.error("git log failed:", res.stderr); | |
| 30 | + process.exit(1); | |
| 31 | +} | |
| 32 | + | |
| 33 | +const records = res.stdout.split("\x1e").map((s) => s.trim()).filter(Boolean); | |
| 34 | +const commits = records.map((rec) => { | |
| 35 | + const [sha, date, name, email, ...rest] = rec.split("\x1f"); | |
| 36 | + const message = (rest.join("\x1f") ?? "").replace(/\n+$/, ""); | |
| 37 | + return { | |
| 38 | + sha: sha ?? "", | |
| 39 | + commit: { | |
| 40 | + author: { | |
| 41 | + name: name ?? "", | |
| 42 | + email: email ?? "", | |
| 43 | + date: date ?? "", | |
| 44 | + }, | |
| 45 | + message, | |
| 46 | + }, | |
| 47 | + author: null, | |
| 48 | + }; | |
| 49 | +}); | |
| 50 | + | |
| 51 | +const outDir = resolve(REPO_ROOT, "content", "git-history"); | |
| 52 | +const outPath = resolve(outDir, `${OWNER}__${NAME}.json`); | |
| 53 | +mkdirSync(dirname(outPath), { recursive: true }); | |
| 54 | +writeFileSync(outPath, JSON.stringify({ owner: OWNER, name: NAME, fetchedAt: Date.now(), commits }, null, 2)); | |
| 55 | + | |
| 56 | +console.log(`✓ wrote ${commits.length} commits → ${outPath}`); | |
src/c14_github.ts
+86
−0
| @@ -120,3 +120,89 @@ export const fetchProjectConfig = async ( | ||
| 120 | 120 | } |
| 121 | 121 | return parseProjectConfig(parsed); |
| 122 | 122 | }; |
| 123 | + | |
| 124 | +// --------------------------------------------------------------------- | |
| 125 | +// Public commits API. Used to feed the live reports view from real | |
| 126 | +// data. Public-repo only; unauthenticated calls are rate-limited to | |
| 127 | +// 60/hour, so we cache aggressively. Single in-memory cache per | |
| 128 | +// (owner, repo) with a 5-minute TTL — enough for casual page-loads, | |
| 129 | +// not so long that pushed commits stay invisible. | |
| 130 | +// --------------------------------------------------------------------- | |
| 131 | + | |
| 132 | +export interface GithubCommit { | |
| 133 | + sha: string; | |
| 134 | + commit: { | |
| 135 | + author: { name: string; email: string; date: string }; | |
| 136 | + message: string; | |
| 137 | + }; | |
| 138 | + author: { login: string } | null; | |
| 139 | +} | |
| 140 | + | |
| 141 | +const COMMITS_TTL_MS = 5 * 60 * 1000; | |
| 142 | +const commitsCache = new Map<string, { fetchedAt: number; commits: GithubCommit[] }>(); | |
| 143 | + | |
| 144 | +// Deploy-time snapshot: scripts/p620/snapshot-git-history.ts dumps the | |
| 145 | +// local git log into content/git-history/<owner>__<name>.json so the | |
| 146 | +// container can serve /reports/live for a private repo without a | |
| 147 | +// GitHub token. Bundle is preferred when present; we fall back to the | |
| 148 | +// public API for any repo we don't bundle. | |
| 149 | +const bundlePath = (repoOwner: string, repoName: string): string => | |
| 150 | + `./content/git-history/${repoOwner}__${repoName}.json`; | |
| 151 | + | |
| 152 | +interface GitHistoryBundle { | |
| 153 | + owner: string; | |
| 154 | + name: string; | |
| 155 | + fetchedAt: number; | |
| 156 | + commits: GithubCommit[]; | |
| 157 | +} | |
| 158 | + | |
| 159 | +const loadBundle = async ( | |
| 160 | + repoOwner: string, | |
| 161 | + repoName: string, | |
| 162 | +): Promise<GithubCommit[] | null> => { | |
| 163 | + try { | |
| 164 | + const file = Bun.file(bundlePath(repoOwner, repoName)); | |
| 165 | + if (!(await file.exists())) return null; | |
| 166 | + const data = (await file.json()) as GitHistoryBundle; | |
| 167 | + return Array.isArray(data.commits) ? data.commits : null; | |
| 168 | + } catch { | |
| 169 | + return null; | |
| 170 | + } | |
| 171 | +}; | |
| 172 | + | |
| 173 | +export const fetchRepoCommits = async ( | |
| 174 | + repoOwner: string, | |
| 175 | + repoName: string, | |
| 176 | + perPage = 100, | |
| 177 | +): Promise<GithubCommit[]> => { | |
| 178 | + const key = `${repoOwner}/${repoName}#${perPage}`; | |
| 179 | + const cached = commitsCache.get(key); | |
| 180 | + if (cached && Date.now() - cached.fetchedAt < COMMITS_TTL_MS) { | |
| 181 | + return cached.commits; | |
| 182 | + } | |
| 183 | + | |
| 184 | + const bundle = await loadBundle(repoOwner, repoName); | |
| 185 | + if (bundle) { | |
| 186 | + const sliced = bundle.slice(0, perPage); | |
| 187 | + commitsCache.set(key, { fetchedAt: Date.now(), commits: sliced }); | |
| 188 | + return sliced; | |
| 189 | + } | |
| 190 | + | |
| 191 | + const url = `https://api.github.com/repos/${encodeURIComponent(repoOwner)}/${encodeURIComponent(repoName)}/commits?per_page=${perPage}`; | |
| 192 | + const res = await fetch(url, { | |
| 193 | + headers: { | |
| 194 | + Accept: "application/vnd.github+json", | |
| 195 | + "User-Agent": "tdd.md", | |
| 196 | + }, | |
| 197 | + }); | |
| 198 | + if (!res.ok) { | |
| 199 | + // Honour the cache on transient failure rather than blanking the page — | |
| 200 | + // GitHub's 60/hour anonymous rate limit is the most likely cause and | |
| 201 | + // the cached data is still strictly better than no data. | |
| 202 | + if (cached) return cached.commits; | |
| 203 | + throw new Error(`GitHub commits API failed for ${repoOwner}/${repoName}: HTTP ${res.status}`); | |
| 204 | + } | |
| 205 | + const commits = (await res.json()) as GithubCommit[]; | |
| 206 | + commitsCache.set(key, { fetchedAt: Date.now(), commits }); | |
| 207 | + return commits; | |
| 208 | +}; | |
src/c21_app.ts
+115
−4
| @@ -27,7 +27,15 @@ import { fetchProjectConfig } from "./c14_github.ts"; | ||
| 27 | 27 | import { listGames, loadGame } from "./c31_games.ts"; |
| 28 | 28 | import { ALL_POSTS } from "./c31_blog.ts"; |
| 29 | 29 | import { ALL_GUIDES } from "./c31_guides.ts"; |
| 30 | -import { DEMO_REPORTS } from "./c31_reports_demo.ts"; | |
| 30 | +import { | |
| 31 | + DEMO_REPORTS, | |
| 32 | + DEMO_PERIOD, | |
| 33 | + DEMO_ORG, | |
| 34 | + DEMO_REPOS, | |
| 35 | + DEMO_SNAPSHOTS, | |
| 36 | + DEMO_STABILITY, | |
| 37 | +} from "./c31_reports_demo.ts"; | |
| 38 | +import { buildLiveReports } from "./c32_real_reports.ts"; | |
| 31 | 39 | import { parseRepoIdentifier } from "./c31_project_config.ts"; |
| 32 | 40 | import { judge } from "./c32_judge.ts"; |
| 33 | 41 | import { |
| @@ -49,6 +57,54 @@ import { startGithubOauth, handleGithubCallback } from "./c21_handlers_auth.ts"; | ||
| 49 | 57 | const HOME_MD = "./content/home.md"; |
| 50 | 58 | const GAME_DIR = "./content/games"; |
| 51 | 59 | |
| 60 | +// --------------------------------------------------------------------- | |
| 61 | +// Reports-context builders. The c51 builders take a ReportsContext — | |
| 62 | +// these tiny helpers assemble it for the synthetic /reports/demo and | |
| 63 | +// the live /reports/live (real data fetched from syntaxai/tdd.md). | |
| 64 | +// --------------------------------------------------------------------- | |
| 65 | + | |
| 66 | +const LIVE_REPO_OWNER = "syntaxai"; | |
| 67 | +const LIVE_REPO_NAME = "tdd.md"; | |
| 68 | +const LIVE_FETCH_COUNT = 100; | |
| 69 | + | |
| 70 | +const DEMO_BANNER_HTML = `<div class="report-mockup-banner">demo data — design preview with synthetic numbers. Want the real readout? <a href="/reports/live">/reports/live</a> renders the same shape from live tdd.md commits. <a href="/blog/tweag-handbook-tdd">why tdd.md needs this</a></div>`; | |
| 71 | + | |
| 72 | +const LIVE_BANNER_HTML = `<div class="report-mockup-banner">live data — sourced from <a href="https://github.com/${LIVE_REPO_OWNER}/${LIVE_REPO_NAME}">${LIVE_REPO_OWNER}/${LIVE_REPO_NAME}</a> via the public commits API (5-min cache). Agent attribution comes from <code>Co-Authored-By:</code> footers; commits without one are excluded. Phase coverage measures % of commits tagged <code>red:/green:/refactor:</code>.</div>`; | |
| 73 | + | |
| 74 | +const demoContext = () => ({ | |
| 75 | + reports: DEMO_REPORTS, | |
| 76 | + period: DEMO_PERIOD, | |
| 77 | + scopeLabel: `${DEMO_REPOS} repos · ${DEMO_ORG}`, | |
| 78 | + bannerHtml: DEMO_BANNER_HTML, | |
| 79 | + narrative: { | |
| 80 | + changedHeading: "wat veranderde dit kwartaal", | |
| 81 | + changedBody: | |
| 82 | + "Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de `api-gateway` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf.", | |
| 83 | + doingHeading: "wat we doen", | |
| 84 | + doingBody: | |
| 85 | + "- **Cursor in `api-gateway`**: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel \"never delete a test in a refactor commit\" gepind ([details →](/reports/demo/agents/cursor)).\n- **Claude Code uitrollen**: het CLAUDE.md-template dat in `billing-service` werkte naar de andere drie repos kopiëren.\n- **Volgende meting**: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt.", | |
| 86 | + }, | |
| 87 | + footerLinks: | |
| 88 | + "[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports)", | |
| 89 | +}); | |
| 90 | + | |
| 91 | +const liveContext = async () => { | |
| 92 | + const live = await buildLiveReports(LIVE_REPO_OWNER, LIVE_REPO_NAME, LIVE_FETCH_COUNT); | |
| 93 | + const period = live.earliest && live.latest | |
| 94 | + ? `${live.earliest.slice(0, 10)} → ${live.latest.slice(0, 10)}` | |
| 95 | + : "no commits fetched"; | |
| 96 | + const drillLinks = live.reports | |
| 97 | + .map((r) => `[${r.name}](/reports/live/agents/${r.slug})`) | |
| 98 | + .join(" · "); | |
| 99 | + return { | |
| 100 | + reports: live.reports, | |
| 101 | + period, | |
| 102 | + scopeLabel: `${LIVE_REPO_OWNER}/${LIVE_REPO_NAME} · ${live.totalCommits} commits sampled${live.unknownCount > 0 ? ` (${live.unknownCount} unattributed, excluded)` : ""}`, | |
| 103 | + bannerHtml: LIVE_BANNER_HTML, | |
| 104 | + footerLinks: `${drillLinks ? drillLinks + " · " : ""}[tests overzicht](/reports/live/tests) · [demo preview](/reports/demo) · [back to /reports](/reports)`, | |
| 105 | + }; | |
| 106 | +}; | |
| 107 | + | |
| 52 | 108 | const HOME_DESCRIPTION = |
| 53 | 109 | "Test-driven development for agentic coding. Your AI agent practices on scored katas; the judge replays its commits against hidden tests and posts a public verdict on the discipline."; |
| 54 | 110 | |
| @@ -417,10 +473,11 @@ ${rows} | ||
| 417 | 473 | }, |
| 418 | 474 | |
| 419 | 475 | "/reports/demo": async () => { |
| 476 | + const ctx = demoContext(); | |
| 420 | 477 | const html = await renderPage({ |
| 421 | 478 | title: "TDD-discipline rapport · Q1 2026 (demo) — tdd.md", |
| 422 | 479 | description: "Mockup of the management-level TDD-discipline report — single page, three agents, with trend and narrative.", |
| 423 | - bodyMarkdown: execSummaryMd(), | |
| 480 | + bodyMarkdown: execSummaryMd(ctx), | |
| 424 | 481 | ogPath: "https://tdd.md/reports/demo", |
| 425 | 482 | noindex: true, |
| 426 | 483 | }); |
| @@ -431,7 +488,12 @@ ${rows} | ||
| 431 | 488 | const html = await renderPage({ |
| 432 | 489 | title: "Tests overzicht (demo) — tdd.md", |
| 433 | 490 | description: "Mockup of the per-test overview: current pass/fail snapshot per repo plus test stability over the quarter.", |
| 434 | - bodyMarkdown: testsOverviewMd(), | |
| 491 | + bodyMarkdown: testsOverviewMd({ | |
| 492 | + period: DEMO_PERIOD, | |
| 493 | + bannerHtml: DEMO_BANNER_HTML, | |
| 494 | + snapshots: DEMO_SNAPSHOTS, | |
| 495 | + stability: DEMO_STABILITY, | |
| 496 | + }), | |
| 435 | 497 | ogPath: "https://tdd.md/reports/demo/tests", |
| 436 | 498 | noindex: true, |
| 437 | 499 | }); |
| @@ -440,7 +502,8 @@ ${rows} | ||
| 440 | 502 | |
| 441 | 503 | "/reports/demo/agents/:slug": async (req) => { |
| 442 | 504 | const slug = req.params.slug as (typeof DEMO_REPORTS)[number]["slug"]; |
| 443 | - const md = agentDrilldownMd(slug); | |
| 505 | + const ctx = demoContext(); | |
| 506 | + const md = agentDrilldownMd(slug, ctx); | |
| 444 | 507 | if (!md) { |
| 445 | 508 | const html = await renderNotFound(`/reports/demo/agents/${slug}`); |
| 446 | 509 | return htmlResponse(html, 404); |
| @@ -456,6 +519,54 @@ ${rows} | ||
| 456 | 519 | return htmlResponse(html); |
| 457 | 520 | }, |
| 458 | 521 | |
| 522 | + "/reports/live": async () => { | |
| 523 | + const ctx = await liveContext(); | |
| 524 | + const html = await renderPage({ | |
| 525 | + title: "TDD-discipline rapport · live — tdd.md", | |
| 526 | + description: `Live discipline rapport gebouwd uit de echte commit-historie van syntaxai/tdd.md (laatste ${LIVE_FETCH_COUNT} commits, 5-min cache).`, | |
| 527 | + bodyMarkdown: execSummaryMd(ctx), | |
| 528 | + ogPath: "https://tdd.md/reports/live", | |
| 529 | + noindex: true, | |
| 530 | + }); | |
| 531 | + return htmlResponse(html); | |
| 532 | + }, | |
| 533 | + | |
| 534 | + "/reports/live/tests": async () => { | |
| 535 | + const html = await renderPage({ | |
| 536 | + title: "Tests overzicht · live (placeholder) — tdd.md", | |
| 537 | + description: "Placeholder voor de live test-overview — wacht op de sandbox-runner sliver.", | |
| 538 | + bodyMarkdown: testsOverviewMd({ | |
| 539 | + period: "live", | |
| 540 | + bannerHtml: LIVE_BANNER_HTML, | |
| 541 | + snapshots: [], | |
| 542 | + stability: [], | |
| 543 | + unavailableNote: "De per-repo test-snapshot en stabiliteitstabel hebben de sandbox-runner sliver nodig (block 1 vervolg). Tot dat klaar is, alleen de exec-summary + drill-down draaien op echte data; de testpagina staat in de [demo](/reports/demo/tests).", | |
| 544 | + }), | |
| 545 | + ogPath: "https://tdd.md/reports/live/tests", | |
| 546 | + noindex: true, | |
| 547 | + }); | |
| 548 | + return htmlResponse(html); | |
| 549 | + }, | |
| 550 | + | |
| 551 | + "/reports/live/agents/:slug": async (req) => { | |
| 552 | + const ctx = await liveContext(); | |
| 553 | + const slug = req.params.slug as (typeof DEMO_REPORTS)[number]["slug"]; | |
| 554 | + const md = agentDrilldownMd(slug, ctx); | |
| 555 | + if (!md) { | |
| 556 | + const html = await renderNotFound(`/reports/live/agents/${slug}`); | |
| 557 | + return htmlResponse(html, 404); | |
| 558 | + } | |
| 559 | + const entry = ctx.reports.find((r) => r.slug === slug)!; | |
| 560 | + const html = await renderPage({ | |
| 561 | + title: `${entry.name} drill-down · live — tdd.md`, | |
| 562 | + description: `Live drill-down voor ${entry.name} op syntaxai/tdd.md — trend, failure-mode breakdown, recent commits.`, | |
| 563 | + bodyMarkdown: md, | |
| 564 | + ogPath: `https://tdd.md/reports/live/agents/${slug}`, | |
| 565 | + noindex: true, | |
| 566 | + }); | |
| 567 | + return htmlResponse(html); | |
| 568 | + }, | |
| 569 | + | |
| 459 | 570 | "/guides": async () => { |
| 460 | 571 | const rows = ALL_GUIDES |
| 461 | 572 | .map((g) => `| [${g.title}](/guides/${g.slug}) | ${g.description} |`) |
src/c32_real_reports.ts
+170
−0
| @@ -0,0 +1,170 @@ | ||
| 1 | +// c32 — logic: aggregate real GitHub commit history into the same | |
| 2 | +// AgentReport / RecentFlagged shape that c51_render_reports renders. | |
| 3 | +// Pure (given fetched commits in, produces report objects out); the | |
| 4 | +// I/O happens in c14_github.fetchRepoCommits which we call here. | |
| 5 | +// | |
| 6 | +// Attribution: Co-Authored-By footers are the agent-attribution channel | |
| 7 | +// the existing tdd.md commit history already uses. Anything without a | |
| 8 | +// recognised footer is bucketed as "unknown" and reported separately — | |
| 9 | +// it's still useful for volume context. | |
| 10 | + | |
| 11 | +import { parseCommit } from "./c31_commits.ts"; | |
| 12 | +import { fetchRepoCommits, type GithubCommit } from "./c14_github.ts"; | |
| 13 | +import type { | |
| 14 | + AgentReport, | |
| 15 | + FailureSlice, | |
| 16 | + RecentFlagged, | |
| 17 | +} from "./c31_reports_demo.ts"; | |
| 18 | + | |
| 19 | +type LiveAgentSlug = AgentReport["slug"] | "unknown"; | |
| 20 | + | |
| 21 | +const detectAgent = (msg: string): LiveAgentSlug => { | |
| 22 | + if (/Co-Authored-By:.*Claude/i.test(msg)) return "claude-code"; | |
| 23 | + if (/Co-Authored-By:.*Cursor/i.test(msg)) return "cursor"; | |
| 24 | + if (/Co-Authored-By:.*Aider/i.test(msg)) return "aider"; | |
| 25 | + return "unknown"; | |
| 26 | +}; | |
| 27 | + | |
| 28 | +const AGENT_NAMES: Record<AgentReport["slug"], string> = { | |
| 29 | + "claude-code": "Claude Code", | |
| 30 | + cursor: "Cursor", | |
| 31 | + aider: "Aider", | |
| 32 | +}; | |
| 33 | + | |
| 34 | +// 30-day daily commit-count series, oldest → newest. When there are no | |
| 35 | +// commits in a day, that day's value is 0 — the sparkline still renders | |
| 36 | +// but flat-lines, which honestly reflects the data. | |
| 37 | +const buildTrend = (commits: GithubCommit[], days = 30): number[] => { | |
| 38 | + const out = new Array<number>(days).fill(0); | |
| 39 | + const today = new Date(); | |
| 40 | + today.setUTCHours(0, 0, 0, 0); | |
| 41 | + for (const c of commits) { | |
| 42 | + const d = new Date(c.commit.author.date); | |
| 43 | + d.setUTCHours(0, 0, 0, 0); | |
| 44 | + const ageDays = Math.floor((today.getTime() - d.getTime()) / (24 * 60 * 60 * 1000)); | |
| 45 | + if (ageDays < 0 || ageDays >= days) continue; | |
| 46 | + const idx = days - 1 - ageDays; | |
| 47 | + const cur = out[idx] ?? 0; | |
| 48 | + out[idx] = cur + 1; | |
| 49 | + } | |
| 50 | + return out; | |
| 51 | +}; | |
| 52 | + | |
| 53 | +const buildAgentReport = ( | |
| 54 | + slug: AgentReport["slug"], | |
| 55 | + agentCommits: GithubCommit[], | |
| 56 | + repoSlug: string, | |
| 57 | +): AgentReport => { | |
| 58 | + const tagged = agentCommits.filter((c) => { | |
| 59 | + const phase = parseCommit(c.commit.message).phase; | |
| 60 | + return phase === "red" || phase === "green" || phase === "refactor"; | |
| 61 | + }); | |
| 62 | + const phaseCoveragePct = agentCommits.length === 0 | |
| 63 | + ? 0 | |
| 64 | + : Math.round((tagged.length / agentCommits.length) * 100); | |
| 65 | + | |
| 66 | + // Score is a proxy: phase-coverage is the only structural signal we | |
| 67 | + // can compute without running the test suite. When coverage is 0 the | |
| 68 | + // agent isn't attempting TDD, so the score is honestly low. | |
| 69 | + const score = phaseCoveragePct; | |
| 70 | + | |
| 71 | + // Failure mix collapses to two slices for live data — phase-tagged vs | |
| 72 | + // not. Fine-grained failure modes (red-did-not-fail, test-deleted, etc) | |
| 73 | + // need the runner sliver before they're computable. | |
| 74 | + const failureMix: FailureSlice[] = [ | |
| 75 | + { label: "phase-tagged", pct: phaseCoveragePct, tone: "green" }, | |
| 76 | + { label: "no phase tag", pct: 100 - phaseCoveragePct, tone: "muted" }, | |
| 77 | + ]; | |
| 78 | + | |
| 79 | + const recent: RecentFlagged[] = agentCommits | |
| 80 | + .slice(0, 5) | |
| 81 | + .map((c) => { | |
| 82 | + const parsed = parseCommit(c.commit.message); | |
| 83 | + const phase = parsed.phase === "red" || parsed.phase === "green" || parsed.phase === "refactor" | |
| 84 | + ? parsed.phase | |
| 85 | + : "green"; | |
| 86 | + const failure = parsed.phase === "untagged" || parsed.phase === "init" | |
| 87 | + ? "no phase tag" | |
| 88 | + : `${parsed.phase} (live judge not yet wired)`; | |
| 89 | + return { | |
| 90 | + date: c.commit.author.date.slice(0, 10), | |
| 91 | + repo: repoSlug, | |
| 92 | + sha: c.sha.slice(0, 7), | |
| 93 | + phase, | |
| 94 | + failure, | |
| 95 | + pts: 0, | |
| 96 | + }; | |
| 97 | + }); | |
| 98 | + | |
| 99 | + const topIssueLabel = phaseCoveragePct === 100 ? "no current issues" : "no phase tag"; | |
| 100 | + const topIssuePct = 100 - phaseCoveragePct; | |
| 101 | + | |
| 102 | + return { | |
| 103 | + slug, | |
| 104 | + name: AGENT_NAMES[slug], | |
| 105 | + score, | |
| 106 | + delta: 0, | |
| 107 | + commits: agentCommits.length, | |
| 108 | + phaseCoveragePct, | |
| 109 | + streak: 0, | |
| 110 | + streakBroken: false, | |
| 111 | + topIssueLabel, | |
| 112 | + topIssuePct, | |
| 113 | + failureMix, | |
| 114 | + trend: buildTrend(agentCommits), | |
| 115 | + recent, | |
| 116 | + }; | |
| 117 | +}; | |
| 118 | + | |
| 119 | +export interface LiveReports { | |
| 120 | + reports: AgentReport[]; | |
| 121 | + unknownCount: number; | |
| 122 | + totalCommits: number; | |
| 123 | + earliest: string | null; | |
| 124 | + latest: string | null; | |
| 125 | + fetchedAt: number; | |
| 126 | +} | |
| 127 | + | |
| 128 | +export const buildLiveReports = async ( | |
| 129 | + repoOwner: string, | |
| 130 | + repoName: string, | |
| 131 | + perPage = 100, | |
| 132 | +): Promise<LiveReports> => { | |
| 133 | + const commits = await fetchRepoCommits(repoOwner, repoName, perPage); | |
| 134 | + const repoSlug = `${repoOwner}/${repoName}`; | |
| 135 | + const byAgent = new Map<AgentReport["slug"], GithubCommit[]>(); | |
| 136 | + let unknownCount = 0; | |
| 137 | + | |
| 138 | + for (const c of commits) { | |
| 139 | + const a = detectAgent(c.commit.message); | |
| 140 | + if (a === "unknown") { | |
| 141 | + unknownCount++; | |
| 142 | + continue; | |
| 143 | + } | |
| 144 | + const arr = byAgent.get(a) ?? []; | |
| 145 | + arr.push(c); | |
| 146 | + byAgent.set(a, arr); | |
| 147 | + } | |
| 148 | + | |
| 149 | + const order: AgentReport["slug"][] = ["claude-code", "cursor", "aider"]; | |
| 150 | + const reports = order | |
| 151 | + .map((slug) => { | |
| 152 | + const list = byAgent.get(slug); | |
| 153 | + if (!list || list.length === 0) return null; | |
| 154 | + return buildAgentReport(slug, list, repoSlug); | |
| 155 | + }) | |
| 156 | + .filter((r): r is AgentReport => r !== null); | |
| 157 | + | |
| 158 | + const dates = commits.map((c) => c.commit.author.date).sort(); | |
| 159 | + const earliest = dates[0] ?? null; | |
| 160 | + const latest = dates[dates.length - 1] ?? null; | |
| 161 | + | |
| 162 | + return { | |
| 163 | + reports, | |
| 164 | + unknownCount, | |
| 165 | + totalCommits: commits.length, | |
| 166 | + earliest, | |
| 167 | + latest, | |
| 168 | + fetchedAt: Date.now(), | |
| 169 | + }; | |
| 170 | +}; | |
src/c51_render_reports.ts
+93
−50
| @@ -1,14 +1,11 @@ | ||
| 1 | 1 | // c51 (reports) — body builders for /reports, /reports/demo, |
| 2 | -// /reports/demo/agents/:slug, /reports/demo/tests. All synthetic data | |
| 3 | -// comes from c31_reports_demo; chrome helpers come from c51_render_layout. | |
| 2 | +// /reports/live, /reports/demo/agents/:slug, /reports/demo/tests. The | |
| 3 | +// builders take the dataset as an explicit ReportsContext so the same | |
| 4 | +// markdown templates serve both the synthetic demo (DEMO_* from | |
| 5 | +// c31_reports_demo) and the live tdd.md aggregation (c32_real_reports). | |
| 4 | 6 | |
| 5 | 7 | import { |
| 6 | - DEMO_PERIOD, | |
| 7 | - DEMO_ORG, | |
| 8 | - DEMO_REPOS, | |
| 9 | 8 | DEMO_REPORTS, |
| 10 | - DEMO_SNAPSHOTS, | |
| 11 | - DEMO_STABILITY, | |
| 12 | 9 | type AgentReport, |
| 13 | 10 | type FailureSlice, |
| 14 | 11 | type TestSnapshot, |
| @@ -16,6 +13,33 @@ import { | ||
| 16 | 13 | } from "./c31_reports_demo.ts"; |
| 17 | 14 | import { escape } from "./c51_render_layout.ts"; |
| 18 | 15 | |
| 16 | +export interface ReportsContext { | |
| 17 | + reports: AgentReport[]; | |
| 18 | + period: string; | |
| 19 | + scopeLabel: string; | |
| 20 | + bannerHtml: string; | |
| 21 | + // Optional narrative — present for the curated demo, omitted for live | |
| 22 | + // where the data has to speak for itself. | |
| 23 | + narrative?: { | |
| 24 | + changedHeading: string; | |
| 25 | + changedBody: string; | |
| 26 | + doingHeading: string; | |
| 27 | + doingBody: string; | |
| 28 | + }; | |
| 29 | + // Trailing footer line (links). Defaults reasonable for both demo + live. | |
| 30 | + footerLinks: string; | |
| 31 | +} | |
| 32 | + | |
| 33 | +export interface TestsOverviewContext { | |
| 34 | + period: string; | |
| 35 | + bannerHtml: string; | |
| 36 | + snapshots: TestSnapshot[]; | |
| 37 | + stability: TestStability[]; | |
| 38 | + // When the runner sliver isn't wired (live mode, today), pass a | |
| 39 | + // placeholder note instead of the snapshot+stability sections. | |
| 40 | + unavailableNote?: string; | |
| 41 | +} | |
| 42 | + | |
| 19 | 43 | const trendArrow = (delta: number): { glyph: string; cls: string } => |
| 20 | 44 | delta > 0 ? { glyph: "↑", cls: "up" } : delta < 0 ? { glyph: "↓", cls: "down" } : { glyph: "→", cls: "flat" }; |
| 21 | 45 | |
| @@ -70,8 +94,6 @@ const streakBox = (a: AgentReport): string => { | ||
| 70 | 94 | return `<span class="report-streak ${cls}"><span class="report-streak-num">${a.streak}</span> ${label}</span>`; |
| 71 | 95 | }; |
| 72 | 96 | |
| 73 | -const mockBanner = `<div class="report-mockup-banner">demo data — real reporting wires up when the project-tracking pipeline ships. <a href="/blog/tweag-handbook-tdd">why tdd.md needs this</a> · <a href="/reports">about reporting</a></div>`; | |
| 74 | - | |
| 75 | 97 | const snapshotBlock = (s: TestSnapshot): string => { |
| 76 | 98 | const failuresHtml = s.failures.length === 0 |
| 77 | 99 | ? `<li class="test-list-pass">all ${s.passing} tests groen</li>` |
| @@ -113,13 +135,16 @@ export const reportsLandingMd = (): string => `# reports | ||
| 113 | 135 | |
| 114 | 136 | > Per-agent TDD-discipline reporting over real project repos. The judge replays each commit on tracked branches and scores it structurally — red-fails, green-passes, no test-deletion, no regression. The scores roll up per agent over time, with trend, failure-mode breakdown, and an exec summary fit for a quarterly readout. |
| 115 | 137 | |
| 116 | -This is a design preview. The pipeline that ingests real repos isn't wired yet; what you can navigate today is a mockup with synthetic data: | |
| 138 | +Two views of the same shape: | |
| 139 | + | |
| 140 | +- **[/reports/live](/reports/live)** — built from real commit data on \`syntaxai/tdd.md\` (the repo this site runs on), refreshed every 5 minutes from the GitHub commits API. Agent attribution comes from \`Co-Authored-By:\` footers. Phase-coverage is the only metric we can compute without running tests, so the score is a proxy for now. | |
| 141 | +- **[/reports/demo](/reports/demo)** — the polished design preview with synthetic data for three agents and four repos. Useful for screenshots and showing the full failure-mode breakdown the live view can't compute yet. | |
| 117 | 142 | |
| 118 | -- [exec summary mockup →](/reports/demo) — single page, 1 quarter, 3 agents | |
| 119 | -- [per-agent drill-down →](/reports/demo/agents/cursor) — trend, failure mix, recent flagged commits | |
| 120 | -- [tests overzicht →](/reports/demo/tests) — huidige stand per repo + test-stabiliteit per test-naam | |
| 143 | +Drill-downs: | |
| 144 | +- [live drill-down per agent](/reports/live/agents/claude-code) · [tests overzicht (live: placeholder)](/reports/live/tests) | |
| 145 | +- [demo drill-down per agent](/reports/demo/agents/cursor) · [tests overzicht (demo)](/reports/demo/tests) | |
| 121 | 146 | |
| 122 | -Want a real repo on this layer? [Register a project →](/projects) — drops \`.tdd-md.json\` at the repo root, onboards in seconds. Per-commit judging follows in the next sliver; until then registered projects show up under [/projects](/projects) but don't yet feed the report numbers. | |
| 147 | +Want a real repo on this layer? [Register a project →](/projects) — drops \`.tdd-md.json\` at the repo root, onboards in seconds. Per-commit judging on tracked branches lands in a follow-up sliver; live reporting from the GitHub API already works for the dogfood case (the tdd.md repo itself). | |
| 123 | 148 | |
| 124 | 149 | ## what gets measured |
| 125 | 150 | |
| @@ -145,55 +170,63 @@ For team-leads: | ||
| 145 | 170 | [← back to tdd.md](/) · [the blog](/blog) · [the katas](/games) |
| 146 | 171 | `; |
| 147 | 172 | |
| 148 | -export const execSummaryMd = (): string => { | |
| 149 | - const totalCommits = DEMO_REPORTS.reduce((s, a) => s + a.commits, 0); | |
| 150 | - const tiles = DEMO_REPORTS.map(tile).join("\n"); | |
| 151 | - return `# tdd-discipline rapport · q1 2026 | |
| 173 | +export const execSummaryMd = (ctx: ReportsContext): string => { | |
| 174 | + const totalCommits = ctx.reports.reduce((s, a) => s + a.commits, 0); | |
| 175 | + const tiles = ctx.reports.length === 0 | |
| 176 | + ? `<div class="report-tile-empty">No agent-attributed commits in this dataset.</div>` | |
| 177 | + : ctx.reports.map(tile).join("\n"); | |
| 178 | + const narrativeBlock = ctx.narrative | |
| 179 | + ? `## ${ctx.narrative.changedHeading} | |
| 152 | 180 | |
| 153 | -${mockBanner} | |
| 181 | +${ctx.narrative.changedBody} | |
| 154 | 182 | |
| 155 | -> **Periode** ${DEMO_PERIOD} · **Scope** ${DEMO_REPOS} repos · ${totalCommits.toLocaleString()} AI-toegeschreven commits in ${escape(DEMO_ORG)}. | |
| 183 | +## ${ctx.narrative.doingHeading} | |
| 156 | 184 | |
| 157 | -<div class="report-tiles"> | |
| 158 | -${tiles} | |
| 159 | -</div> | |
| 185 | +${ctx.narrative.doingBody} | |
| 160 | 186 | |
| 161 | -## wat veranderde dit kwartaal | |
| 187 | +` | |
| 188 | + : ""; | |
| 189 | + return `# tdd-discipline rapport · ${ctx.period} | |
| 162 | 190 | |
| 163 | -Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de \`api-gateway\` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf. | |
| 191 | +${ctx.bannerHtml} | |
| 164 | 192 | |
| 165 | -## wat we doen | |
| 193 | +> **Periode** ${ctx.period} · **Scope** ${escape(ctx.scopeLabel)} · ${totalCommits.toLocaleString()} AI-toegeschreven commits. | |
| 166 | 194 | |
| 167 | -- **Cursor in \`api-gateway\`**: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel "never delete a test in a refactor commit" gepind ([details →](/reports/demo/agents/cursor)). | |
| 168 | -- **Claude Code uitrollen**: het CLAUDE.md-template dat in \`billing-service\` werkte naar de andere drie repos kopiëren. | |
| 169 | -- **Volgende meting**: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt. | |
| 195 | +<div class="report-tiles"> | |
| 196 | +${tiles} | |
| 197 | +</div> | |
| 170 | 198 | |
| 171 | -## wat dit getal *niet* meet | |
| 199 | +${narrativeBlock}## wat dit getal *niet* meet | |
| 172 | 200 | |
| 173 | 201 | Discipline, niet code-kwaliteit. Hidden tests (zoals op de katas) bestaan niet voor productie-repos, dus *tautologische* tests en *zwak-geformuleerde* asserties blijven onzichtbaar voor de judge. Dit cijfer zegt: "de agent volgt de TDD-cyclus eerlijk". Het zegt niets over of de tests die hij schrijft het juiste beweren. Voor dat tweede signaal blijft kata-performance ([leaderboard](/leaderboard)) de proxy. |
| 174 | 202 | |
| 175 | 203 | --- |
| 176 | 204 | |
| 177 | -[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports) | |
| 205 | +${ctx.footerLinks} | |
| 178 | 206 | `; |
| 179 | 207 | }; |
| 180 | 208 | |
| 181 | -export const agentDrilldownMd = (slug: AgentReport["slug"]): string | null => { | |
| 182 | - const a = DEMO_REPORTS.find((r) => r.slug === slug); | |
| 209 | +export const agentDrilldownMd = ( | |
| 210 | + slug: AgentReport["slug"], | |
| 211 | + ctx: ReportsContext, | |
| 212 | +): string | null => { | |
| 213 | + const a = ctx.reports.find((r) => r.slug === slug); | |
| 183 | 214 | if (!a) return null; |
| 184 | 215 | const arr = trendArrow(a.delta); |
| 185 | 216 | const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`; |
| 186 | - const recentRows = a.recent | |
| 187 | - .map( | |
| 188 | - (r) => | |
| 189 | - `| ${r.date} | \`${r.repo}\` | \`${r.sha}\` | ${r.phase} | ${r.failure} | ${r.pts} |`, | |
| 190 | - ) | |
| 191 | - .join("\n"); | |
| 217 | + const recentRows = a.recent.length === 0 | |
| 218 | + ? `| _no recent attributed activity_ | | | | | |` | |
| 219 | + : a.recent | |
| 220 | + .map( | |
| 221 | + (r) => | |
| 222 | + `| ${r.date} | \`${r.repo}\` | \`${r.sha}\` | ${r.phase} | ${r.failure} | ${r.pts} |`, | |
| 223 | + ) | |
| 224 | + .join("\n"); | |
| 192 | 225 | return `# ${a.name} · drill-down |
| 193 | 226 | |
| 194 | -${mockBanner} | |
| 227 | +${ctx.bannerHtml} | |
| 195 | 228 | |
| 196 | -> Discipline-score **${a.score} / 100** <span class="report-tile-trend ${arr.cls}">${arr.glyph} ${deltaStr}</span> over ${DEMO_PERIOD}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage **${a.phaseCoveragePct}%**. | |
| 229 | +> Discipline-score **${a.score} / 100** <span class="report-tile-trend ${arr.cls}">${arr.glyph} ${deltaStr}</span> over ${ctx.period}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage **${a.phaseCoveragePct}%**. | |
| 197 | 230 | |
| 198 | 231 | ## trend (30 dagen) |
| 199 | 232 | |
| @@ -222,21 +255,31 @@ ${recentRows} | ||
| 222 | 255 | |
| 223 | 256 | --- |
| 224 | 257 | |
| 225 | -[← exec summary](/reports/demo) · [back to /reports](/reports) | |
| 258 | +${ctx.footerLinks} | |
| 226 | 259 | `; |
| 227 | 260 | }; |
| 228 | 261 | |
| 229 | -export const testsOverviewMd = (): string => { | |
| 230 | - const total = DEMO_SNAPSHOTS.reduce((s, r) => s + r.total, 0); | |
| 231 | - const passing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.passing, 0); | |
| 232 | - const failing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.failing, 0); | |
| 233 | - const snapshots = DEMO_SNAPSHOTS.map(snapshotBlock).join("\n"); | |
| 234 | - const stabRows = DEMO_STABILITY.map(stabilityRow).join("\n"); | |
| 262 | +export const testsOverviewMd = (ctx: TestsOverviewContext): string => { | |
| 263 | + if (ctx.unavailableNote) { | |
| 264 | + return `# tests overzicht | |
| 265 | + | |
| 266 | +${ctx.bannerHtml} | |
| 267 | + | |
| 268 | +> ${ctx.unavailableNote} | |
| 269 | + | |
| 270 | +[← exec summary](/reports) · [back to /reports](/reports) | |
| 271 | +`; | |
| 272 | + } | |
| 273 | + const total = ctx.snapshots.reduce((s, r) => s + r.total, 0); | |
| 274 | + const passing = ctx.snapshots.reduce((s, r) => s + r.passing, 0); | |
| 275 | + const failing = ctx.snapshots.reduce((s, r) => s + r.failing, 0); | |
| 276 | + const snapshots = ctx.snapshots.map(snapshotBlock).join("\n"); | |
| 277 | + const stabRows = ctx.stability.map(stabilityRow).join("\n"); | |
| 235 | 278 | return `# tests overzicht |
| 236 | 279 | |
| 237 | -${mockBanner} | |
| 280 | +${ctx.bannerHtml} | |
| 238 | 281 | |
| 239 | -> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${DEMO_PERIOD}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen". | |
| 282 | +> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${ctx.period}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen". | |
| 240 | 283 | |
| 241 | 284 | ## huidige stand · per repo |
| 242 | 285 | |