| 1 | +// Mockup reporting layer for tdd.md. |
| 2 | +// |
| 3 | +// All data here is FAKE — wired up only so the management/exec view and |
| 4 | +// per-agent drill-down can be designed in the browser before the real |
| 5 | +// project-tracking pipeline (block 1) exists. |
| 6 | +// |
| 7 | +// Real reporting needs: |
| 8 | +// - GitHub App / webhook ingest of pushes on tracked branches |
| 9 | +// - per-commit judging without hidden tests (red-fails / green-passes / |
| 10 | +// no-test-deletion / no-regression) |
| 11 | +// - agent attribution (commit footer convention or wrapper-driven) |
| 12 | +// Once that exists, the same generators in this file accept real data. |
| 13 | + |
| 14 | +interface RecentFlagged { |
| 15 | + date: string; |
| 16 | + repo: string; |
| 17 | + sha: string; |
| 18 | + phase: "red" | "green" | "refactor"; |
| 19 | + failure: string; |
| 20 | + pts: number; |
| 21 | +} |
| 22 | + |
| 23 | +interface FailureSlice { |
| 24 | + label: string; |
| 25 | + pct: number; |
| 26 | + tone: "red" | "green" | "muted" | "accent"; |
| 27 | +} |
| 28 | + |
| 29 | +export interface AgentReport { |
| 30 | + slug: "claude-code" | "cursor" | "aider"; |
| 31 | + name: string; |
| 32 | + score: number; |
| 33 | + delta: number; |
| 34 | + commits: number; |
| 35 | + phaseCoveragePct: number; |
| 36 | + streak: number; |
| 37 | + streakBroken: boolean; |
| 38 | + topIssueLabel: string; |
| 39 | + topIssuePct: number; |
| 40 | + failureMix: FailureSlice[]; |
| 41 | + trend: number[]; |
| 42 | + recent: RecentFlagged[]; |
| 43 | +} |
| 44 | + |
| 45 | +export const DEMO_PERIOD = "2026-01-01 → 2026-03-31"; |
| 46 | +export const DEMO_ORG = "acme-corp"; |
| 47 | +export const DEMO_REPOS = 4; |
| 48 | + |
| 49 | +export const DEMO_REPORTS: AgentReport[] = [ |
| 50 | + { |
| 51 | + slug: "claude-code", |
| 52 | + name: "Claude Code", |
| 53 | + score: 78, |
| 54 | + delta: +6, |
| 55 | + commits: 612, |
| 56 | + phaseCoveragePct: 92, |
| 57 | + streak: 47, |
| 58 | + streakBroken: false, |
| 59 | + topIssueLabel: "red-did-not-fail", |
| 60 | + topIssuePct: 8, |
| 61 | + failureMix: [ |
| 62 | + { label: "clean cycles", pct: 84, tone: "green" }, |
| 63 | + { label: "red-did-not-fail", pct: 8, tone: "red" }, |
| 64 | + { label: "broken refactor", pct: 4, tone: "red" }, |
| 65 | + { label: "test-deleted", pct: 2, tone: "red" }, |
| 66 | + { label: "no phase tag", pct: 2, tone: "muted" }, |
| 67 | + ], |
| 68 | + trend: [72, 73, 71, 74, 72, 75, 73, 75, 77, 76, 75, 76, 78, 77, 79, 78, 77, 79, 80, 78, 79, 80, 79, 81, 80, 82, 81, 80, 79, 78], |
| 69 | + recent: [ |
| 70 | + { date: "2026-03-29", repo: "api-gateway", sha: "f1c8b3a", phase: "red", failure: "red-did-not-fail", pts: -5 }, |
| 71 | + { date: "2026-03-24", repo: "billing-service", sha: "9d2e1f4", phase: "refactor", failure: "broken refactor", pts: -5 }, |
| 72 | + { date: "2026-03-18", repo: "data-pipeline", sha: "62a9cb7", phase: "green", failure: "no phase tag (parent)", pts: 0 }, |
| 73 | + ], |
| 74 | + }, |
| 75 | + { |
| 76 | + slug: "cursor", |
| 77 | + name: "Cursor", |
| 78 | + score: 54, |
| 79 | + delta: -15, |
| 80 | + commits: 489, |
| 81 | + phaseCoveragePct: 71, |
| 82 | + streak: 3, |
| 83 | + streakBroken: true, |
| 84 | + topIssueLabel: "test-deleted in refactor", |
| 85 | + topIssuePct: 14, |
| 86 | + failureMix: [ |
| 87 | + { label: "clean cycles", pct: 64, tone: "green" }, |
| 88 | + { label: "test-deleted", pct: 14, tone: "red" }, |
| 89 | + { label: "red-did-not-fail", pct: 9, tone: "red" }, |
| 90 | + { label: "broken refactor", pct: 7, tone: "red" }, |
| 91 | + { label: "no phase tag", pct: 6, tone: "muted" }, |
| 92 | + ], |
| 93 | + trend: [69, 70, 71, 72, 70, 71, 72, 73, 72, 71, 72, 70, 68, 65, 60, 55, 50, 52, 54, 53, 56, 54, 52, 55, 53, 54, 56, 55, 54, 54], |
| 94 | + recent: [ |
| 95 | + { date: "2026-03-28", repo: "api-gateway", sha: "a1b2c3d", phase: "refactor", failure: "test-deleted", pts: -20 }, |
| 96 | + { date: "2026-03-26", repo: "api-gateway", sha: "4e5f6a7", phase: "green", failure: "broken refactor", pts: -5 }, |
| 97 | + { date: "2026-03-23", repo: "billing-service", sha: "8b9c0d1", phase: "red", failure: "red-did-not-fail", pts: -5 }, |
| 98 | + { date: "2026-03-21", repo: "api-gateway", sha: "2e3f4a5", phase: "refactor", failure: "test-deleted", pts: -20 }, |
| 99 | + { date: "2026-03-19", repo: "data-pipeline", sha: "6b7c8d9", phase: "refactor", failure: "broken refactor", pts: -5 }, |
| 100 | + ], |
| 101 | + }, |
| 102 | + { |
| 103 | + slug: "aider", |
| 104 | + name: "Aider", |
| 105 | + score: 89, |
| 106 | + delta: +2, |
| 107 | + commits: 146, |
| 108 | + phaseCoveragePct: 96, |
| 109 | + streak: 89, |
| 110 | + streakBroken: false, |
| 111 | + topIssueLabel: "broken refactor", |
| 112 | + topIssuePct: 3, |
| 113 | + failureMix: [ |
| 114 | + { label: "clean cycles", pct: 94, tone: "green" }, |
| 115 | + { label: "broken refactor", pct: 3, tone: "red" }, |
| 116 | + { label: "red-did-not-fail", pct: 2, tone: "red" }, |
| 117 | + { label: "no phase tag", pct: 1, tone: "muted" }, |
| 118 | + ], |
| 119 | + trend: [87, 88, 89, 88, 87, 89, 90, 89, 88, 89, 90, 88, 89, 90, 91, 89, 88, 89, 90, 89, 90, 91, 89, 88, 89, 90, 89, 90, 89, 89], |
| 120 | + recent: [ |
| 121 | + { date: "2026-03-27", repo: "data-pipeline", sha: "3a4b5c6", phase: "refactor", failure: "broken refactor", pts: -5 }, |
| 122 | + { date: "2026-03-15", repo: "billing-service", sha: "7d8e9f0", phase: "red", failure: "red-did-not-fail", pts: -5 }, |
| 123 | + ], |
| 124 | + }, |
| 125 | +]; |
| 126 | + |
| 127 | +const escape = (s: string): string => |
| 128 | + s.replace(/&/g, "&").replace(/"/g, """).replace(/</g, "<").replace(/>/g, ">"); |
| 129 | + |
| 130 | +const trendArrow = (delta: number): { glyph: string; cls: string } => |
| 131 | + delta > 0 ? { glyph: "↑", cls: "up" } : delta < 0 ? { glyph: "↓", cls: "down" } : { glyph: "→", cls: "flat" }; |
| 132 | + |
| 133 | +const sparkline = (values: number[], height = 60, width = 320): string => { |
| 134 | + if (values.length === 0) return ""; |
| 135 | + const min = Math.min(...values); |
| 136 | + const max = Math.max(...values); |
| 137 | + const range = Math.max(1, max - min); |
| 138 | + const stepX = width / Math.max(1, values.length - 1); |
| 139 | + const pad = 6; |
| 140 | + const innerH = height - pad * 2; |
| 141 | + const points = values |
| 142 | + .map((v, i) => { |
| 143 | + const x = (i * stepX).toFixed(1); |
| 144 | + const y = (pad + innerH - ((v - min) / range) * innerH).toFixed(1); |
| 145 | + return `${x},${y}`; |
| 146 | + }) |
| 147 | + .join(" "); |
| 148 | + return `<svg class="report-sparkline" viewBox="0 0 ${width} ${height}" preserveAspectRatio="none" aria-hidden="true"> |
| 149 | + <polyline fill="none" stroke="currentColor" stroke-width="1.5" points="${points}" /> |
| 150 | +</svg>`; |
| 151 | +}; |
| 152 | + |
| 153 | +const tile = (a: AgentReport): string => { |
| 154 | + const arr = trendArrow(a.delta); |
| 155 | + const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`; |
| 156 | + return `<div class="report-tile"> |
| 157 | + <p class="report-tile-name"><a href="/reports/demo/agents/${a.slug}">${escape(a.name)}</a></p> |
| 158 | + <p class="report-tile-score">${a.score}<span class="report-tile-score-suffix"> / 100</span></p> |
| 159 | + <p class="report-tile-trend ${arr.cls}">${arr.glyph} ${escape(deltaStr)}</p> |
| 160 | + <p class="report-tile-volume">${a.commits.toLocaleString()} commits</p> |
| 161 | + <div class="report-tile-issue">top issue: <strong>${escape(a.topIssueLabel)}</strong> (${a.topIssuePct}%)</div> |
| 162 | +</div>`; |
| 163 | +}; |
| 164 | + |
| 165 | +const bars = (mix: FailureSlice[]): string => { |
| 166 | + const rows = mix |
| 167 | + .map( |
| 168 | + (s) => `<div class="report-bar-row"> |
| 169 | + <span class="report-bar-label">${escape(s.label)}</span> |
| 170 | + <span class="report-bar-track"><span class="report-bar-fill ${s.tone}" style="width: ${s.pct}%"></span></span> |
| 171 | + <span class="report-bar-pct">${s.pct}%</span> |
| 172 | +</div>`, |
| 173 | + ) |
| 174 | + .join("\n"); |
| 175 | + return `<div class="report-bars">${rows}</div>`; |
| 176 | +}; |
| 177 | + |
| 178 | +const streakBox = (a: AgentReport): string => { |
| 179 | + const cls = a.streakBroken ? "broken" : a.streak >= 30 ? "long" : ""; |
| 180 | + const label = a.streakBroken ? "recent break" : "consecutive clean cycles"; |
| 181 | + return `<span class="report-streak ${cls}"><span class="report-streak-num">${a.streak}</span> ${label}</span>`; |
| 182 | +}; |
| 183 | + |
| 184 | +const mockBanner = `<div class="report-mockup-banner">demo data — real reporting wires up when the project-tracking pipeline ships. <a href="/blog/tweag-handbook-tdd">why tdd.md needs this</a> · <a href="/reports">about reporting</a></div>`; |
| 185 | + |
| 186 | +export const reportsLandingMd = (): string => `# reports |
| 187 | + |
| 188 | +> Per-agent TDD-discipline reporting over real project repos. The judge replays each commit on tracked branches and scores it structurally — red-fails, green-passes, no test-deletion, no regression. The scores roll up per agent over time, with trend, failure-mode breakdown, and an exec summary fit for a quarterly readout. |
| 189 | + |
| 190 | +This is a design preview. The pipeline that ingests real repos isn't wired yet; what you can navigate today is a mockup with synthetic data: |
| 191 | + |
| 192 | +- [exec summary mockup →](/reports/demo) — single page, 1 quarter, 3 agents |
| 193 | +- [per-agent drill-down →](/reports/demo/agents/cursor) — trend, failure mix, recent flagged commits |
| 194 | + |
| 195 | +## what gets measured |
| 196 | + |
| 197 | +This layer measures **discipline**, not code-quality. Without hidden tests (those only exist on katas), tdd.md can't catch tautologies or weakened assertions on real repos. It *can* catch: |
| 198 | + |
| 199 | +| failure mode | what triggers it | what it costs | |
| 200 | +|---|---|---| |
| 201 | +| \`red-did-not-fail\` | commit tagged \`red:\` but tests pass | -5 / commit | |
| 202 | +| \`test-deleted\` | test count drops between commits | -20 / commit | |
| 203 | +| \`broken refactor\` | tests fail at a \`refactor:\` commit | -5 / commit | |
| 204 | +| \`no phase tag\` | tracked-branch commit missing \`red\\|green\\|refactor:\` | counts against phase-coverage % | |
| 205 | + |
| 206 | +The metric pair that anchors the report is **discipline-score** (0-100) + **phase-coverage %**. An agent with 0% phase-coverage doesn't *do* TDD — its score is N/A, not 0. Don't let a low-volume non-attempt look like a high-volume slip. |
| 207 | + |
| 208 | +## reading the data |
| 209 | + |
| 210 | +For management: |
| 211 | +- the [exec summary](/reports/demo) gives one number per agent + a narrative paragraph. Prints to one page. |
| 212 | + |
| 213 | +For team-leads: |
| 214 | +- the [drill-down](/reports/demo/agents/cursor) shows trend, failure-mix, streak, and the most recent flagged commits with one-click coaching links to the [Claude Code](/blog/claude-code-tdd) / [Cursor](/blog/cursor-tdd) / [Aider](/blog/aider-tdd) posts. |
| 215 | + |
| 216 | +[← back to tdd.md](/) · [the blog](/blog) · [the katas](/games) |
| 217 | +`; |
| 218 | + |
| 219 | +export const execSummaryMd = (): string => { |
| 220 | + const totalCommits = DEMO_REPORTS.reduce((s, a) => s + a.commits, 0); |
| 221 | + const tiles = DEMO_REPORTS.map(tile).join("\n"); |
| 222 | + return `# tdd-discipline rapport · q1 2026 |
| 223 | + |
| 224 | +${mockBanner} |
| 225 | + |
| 226 | +> **Periode** ${DEMO_PERIOD} · **Scope** ${DEMO_REPOS} repos · ${totalCommits.toLocaleString()} AI-toegeschreven commits in ${escape(DEMO_ORG)}. |
| 227 | + |
| 228 | +<div class="report-tiles"> |
| 229 | +${tiles} |
| 230 | +</div> |
| 231 | + |
| 232 | +## wat veranderde dit kwartaal |
| 233 | + |
| 234 | +Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de \`api-gateway\` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf. |
| 235 | + |
| 236 | +## wat we doen |
| 237 | + |
| 238 | +- **Cursor in \`api-gateway\`**: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel "never delete a test in a refactor commit" gepind ([details →](/reports/demo/agents/cursor)). |
| 239 | +- **Claude Code uitrollen**: het CLAUDE.md-template dat in \`billing-service\` werkte naar de andere drie repos kopiëren. |
| 240 | +- **Volgende meting**: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt. |
| 241 | + |
| 242 | +## wat dit getal *niet* meet |
| 243 | + |
| 244 | +Discipline, niet code-kwaliteit. Hidden tests (zoals op de katas) bestaan niet voor productie-repos, dus *tautologische* tests en *zwak-geformuleerde* asserties blijven onzichtbaar voor de judge. Dit cijfer zegt: "de agent volgt de TDD-cyclus eerlijk". Het zegt niets over of de tests die hij schrijft het juiste beweren. Voor dat tweede signaal blijft kata-performance ([leaderboard](/leaderboard)) de proxy. |
| 245 | + |
| 246 | +--- |
| 247 | + |
| 248 | +[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [back to /reports](/reports) |
| 249 | +`; |
| 250 | +}; |
| 251 | + |
| 252 | +export const agentDrilldownMd = (slug: AgentReport["slug"]): string | null => { |
| 253 | + const a = DEMO_REPORTS.find((r) => r.slug === slug); |
| 254 | + if (!a) return null; |
| 255 | + const arr = trendArrow(a.delta); |
| 256 | + const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`; |
| 257 | + const recentRows = a.recent |
| 258 | + .map( |
| 259 | + (r) => |
| 260 | + `| ${r.date} | \`${r.repo}\` | \`${r.sha}\` | ${r.phase} | ${r.failure} | ${r.pts} |`, |
| 261 | + ) |
| 262 | + .join("\n"); |
| 263 | + return `# ${a.name} · drill-down |
| 264 | + |
| 265 | +${mockBanner} |
| 266 | + |
| 267 | +> Discipline-score **${a.score} / 100** <span class="report-tile-trend ${arr.cls}">${arr.glyph} ${deltaStr}</span> over ${DEMO_PERIOD}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage **${a.phaseCoveragePct}%**. |
| 268 | + |
| 269 | +## trend (30 dagen) |
| 270 | + |
| 271 | +<div class="${arr.cls === "down" ? "red" : arr.cls === "up" ? "green" : "muted"}"> |
| 272 | +${sparkline(a.trend)} |
| 273 | +</div> |
| 274 | + |
| 275 | +${streakBox(a)} |
| 276 | + |
| 277 | +## failure-mode breakdown |
| 278 | + |
| 279 | +${bars(a.failureMix)} |
| 280 | + |
| 281 | +Top issue dit kwartaal: **${escape(a.topIssueLabel)}** (${a.topIssuePct}% van commits). |
| 282 | + |
| 283 | +## recent flagged |
| 284 | + |
| 285 | +| date | repo | sha | phase | failure | pts | |
| 286 | +|---|---|---|---|---|---| |
| 287 | +${recentRows} |
| 288 | + |
| 289 | +## coaching |
| 290 | + |
| 291 | +- ${a.slug === "claude-code" ? `[Claude Code does not do TDD by default](/blog/claude-code-tdd) — CLAUDE.md rules + fresh-context boundaries that prevent \`red-did-not-fail\`.` : a.slug === "cursor" ? `[Cursor knows how to do TDD; users skip the parts that matter](/blog/cursor-tdd) — Plan Mode, fresh chats, \`.cursor/rules\` to stop test-deletion.` : `[Aider is the closest agent to TDD on rails — until \`--auto-test\`](/blog/aider-tdd) — keep auto-test off for green commits, on for refactor.`} |
| 292 | +- [Tweag's TDD handbook needs a judge](/blog/tweag-handbook-tdd) — why local green isn't enough. |
| 293 | + |
| 294 | +--- |
| 295 | + |
| 296 | +[← exec summary](/reports/demo) · [back to /reports](/reports) |
| 297 | +`; |
| 298 | +}; |