| 46 | 46 | export const DEMO_ORG = "acme-corp"; |
| 47 | 47 | export const DEMO_REPOS = 4; |
| 48 | 48 | |
| 49 | +interface TestFailure { |
| 50 | + test: string; |
| 51 | + since: string; |
| 52 | + flaky?: boolean; |
| 53 | +} |
| 54 | + |
| 55 | +interface TestSnapshot { |
| 56 | + repo: string; |
| 57 | + branch: string; |
| 58 | + total: number; |
| 59 | + passing: number; |
| 60 | + failing: number; |
| 61 | + failures: TestFailure[]; |
| 62 | +} |
| 63 | + |
| 64 | +interface TestStability { |
| 65 | + test: string; |
| 66 | + repo: string; |
| 67 | + pass: number; |
| 68 | + fail: number; |
| 69 | + deleted: number; |
| 70 | + lastBrokenBy: AgentReport["slug"]; |
| 71 | + flagged?: boolean; |
| 72 | +} |
| 73 | + |
| 74 | +export const DEMO_SNAPSHOTS: TestSnapshot[] = [ |
| 75 | + { |
| 76 | + repo: "api-gateway", |
| 77 | + branch: "main", |
| 78 | + total: 247, |
| 79 | + passing: 245, |
| 80 | + failing: 2, |
| 81 | + failures: [ |
| 82 | + { test: "rate-limit.spec.ts > resets at midnight UTC", since: "2026-03-26" }, |
| 83 | + { test: "webhook.spec.ts > retries on 5xx with backoff", since: "2026-03-28" }, |
| 84 | + ], |
| 85 | + }, |
| 86 | + { |
| 87 | + repo: "billing-service", |
| 88 | + branch: "main", |
| 89 | + total: 89, |
| 90 | + passing: 89, |
| 91 | + failing: 0, |
| 92 | + failures: [], |
| 93 | + }, |
| 94 | + { |
| 95 | + repo: "data-pipeline", |
| 96 | + branch: "main", |
| 97 | + total: 156, |
| 98 | + passing: 154, |
| 99 | + failing: 2, |
| 100 | + failures: [ |
| 101 | + { test: "ingest.spec.ts > handles malformed CSV row", since: "2026-03-22" }, |
| 102 | + { test: "ingest.spec.ts > deduplicates by hash", since: "2026-03-22" }, |
| 103 | + ], |
| 104 | + }, |
| 105 | + { |
| 106 | + repo: "frontend-web", |
| 107 | + branch: "main", |
| 108 | + total: 312, |
| 109 | + passing: 310, |
| 110 | + failing: 2, |
| 111 | + failures: [ |
| 112 | + { test: "checkout.spec.ts > handles network timeout", since: "2026-03-15", flaky: true }, |
| 113 | + { test: "login.spec.ts > redirects after auth", since: "2026-03-11", flaky: true }, |
| 114 | + ], |
| 115 | + }, |
| 116 | +]; |
| 117 | + |
| 118 | +export const DEMO_STABILITY: TestStability[] = [ |
| 119 | + { test: "webhook.spec.ts > retries on 5xx with backoff", repo: "api-gateway", pass: 33, fail: 11, deleted: 0, lastBrokenBy: "cursor", flagged: true }, |
| 120 | + { test: "checkout.spec.ts > handles network timeout", repo: "frontend-web", pass: 51, fail: 8, deleted: 0, lastBrokenBy: "cursor", flagged: true }, |
| 121 | + { test: "rate-limit.spec.ts > resets at midnight UTC", repo: "api-gateway", pass: 42, fail: 6, deleted: 0, lastBrokenBy: "claude-code" }, |
| 122 | + { test: "login.spec.ts > redirects after auth", repo: "frontend-web", pass: 44, fail: 5, deleted: 1, lastBrokenBy: "cursor", flagged: true }, |
| 123 | + { test: "ingest.spec.ts > handles malformed CSV row", repo: "data-pipeline", pass: 38, fail: 4, deleted: 0, lastBrokenBy: "aider" }, |
| 124 | + { test: "auth.spec.ts > validates JWT signature", repo: "api-gateway", pass: 47, fail: 3, deleted: 0, lastBrokenBy: "claude-code" }, |
| 125 | + { test: "ingest.spec.ts > deduplicates by hash", repo: "data-pipeline", pass: 30, fail: 3, deleted: 0, lastBrokenBy: "aider" }, |
| 126 | + { test: "billing.spec.ts > applies tax bracket", repo: "billing-service", pass: 29, fail: 2, deleted: 0, lastBrokenBy: "claude-code" }, |
| 127 | + { test: "webhook.spec.ts > signs payload with HMAC", repo: "api-gateway", pass: 35, fail: 2, deleted: 1, lastBrokenBy: "cursor", flagged: true }, |
| 128 | + { test: "billing.spec.ts > computes monthly total", repo: "billing-service", pass: 28, fail: 1, deleted: 1, lastBrokenBy: "cursor", flagged: true }, |
| 129 | + { test: "invoice.spec.ts > generates PDF receipt", repo: "billing-service", pass: 25, fail: 1, deleted: 0, lastBrokenBy: "claude-code" }, |
| 130 | + { test: "pricing.spec.ts > rounds to nearest cent", repo: "billing-service", pass: 26, fail: 1, deleted: 0, lastBrokenBy: "aider" }, |
| 131 | +]; |
| 132 | + |
| 49 | 133 | export const DEMO_REPORTS: AgentReport[] = [ |
| 50 | 134 | { |
| 51 | 135 | slug: "claude-code", |
| 183 | 267 | |
| 184 | 268 | const mockBanner = `<div class="report-mockup-banner">demo data — real reporting wires up when the project-tracking pipeline ships. <a href="/blog/tweag-handbook-tdd">why tdd.md needs this</a> · <a href="/reports">about reporting</a></div>`; |
| 185 | 269 | |
| 270 | +const snapshotBlock = (s: TestSnapshot): string => { |
| 271 | + const failuresHtml = s.failures.length === 0 |
| 272 | + ? `<li class="test-list-pass">all ${s.passing} tests groen</li>` |
| 273 | + : s.failures |
| 274 | + .map( |
| 275 | + (f) => |
| 276 | + `<li class="test-list-fail">${escape(f.test)} <span class="test-list-meta">${f.flaky ? "intermittent · " : ""}sinds ${f.since}</span></li>`, |
| 277 | + ) |
| 278 | + .concat([`<li class="test-list-collapsed">+ ${s.passing.toLocaleString()} passing tests</li>`]) |
| 279 | + .join("\n"); |
| 280 | + const statusCls = s.failing === 0 ? "ok" : "bad"; |
| 281 | + return `<div class="test-snapshot ${statusCls}"> |
| 282 | + <p class="test-snapshot-head"><strong>${escape(s.repo)}</strong> <span class="test-snapshot-branch">@ ${escape(s.branch)}</span></p> |
| 283 | + <p class="test-snapshot-stats">${s.total.toLocaleString()} tests · <span class="green">${s.passing.toLocaleString()} passing</span>${s.failing > 0 ? ` · <span class="red">${s.failing.toLocaleString()} failing</span>` : ""}</p> |
| 284 | + <ul class="test-list"> |
| 285 | +${failuresHtml} |
| 286 | + </ul> |
| 287 | +</div>`; |
| 288 | +}; |
| 289 | + |
| 290 | +const agentTagHtml = (slug: AgentReport["slug"]): string => { |
| 291 | + const name = DEMO_REPORTS.find((r) => r.slug === slug)?.name ?? slug; |
| 292 | + return `<a class="agent-tag" href="/reports/demo/agents/${slug}">${escape(name)}</a>`; |
| 293 | +}; |
| 294 | + |
| 295 | +const stabilityRow = (s: TestStability): string => { |
| 296 | + const cls = s.flagged ? "test-stab-row flagged" : "test-stab-row"; |
| 297 | + const warn = s.flagged ? ` <span class="test-stab-warn" title="test-deletion of weakening dit kwartaal">⚠</span>` : ""; |
| 298 | + return `<tr class="${cls}"> |
| 299 | + <td class="test-stab-name">${escape(s.test)}<div class="test-stab-repo">${escape(s.repo)}</div></td> |
| 300 | + <td class="test-stab-num green">${s.pass}</td> |
| 301 | + <td class="test-stab-num ${s.fail >= 8 ? "red" : ""}">${s.fail}</td> |
| 302 | + <td class="test-stab-num ${s.deleted > 0 ? "red" : ""}">${s.deleted}</td> |
| 303 | + <td class="test-stab-by">${agentTagHtml(s.lastBrokenBy)}${warn}</td> |
| 304 | +</tr>`; |
| 305 | +}; |
| 306 | + |
| 307 | +export const testsOverviewMd = (): string => { |
| 308 | + const total = DEMO_SNAPSHOTS.reduce((s, r) => s + r.total, 0); |
| 309 | + const passing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.passing, 0); |
| 310 | + const failing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.failing, 0); |
| 311 | + const snapshots = DEMO_SNAPSHOTS.map(snapshotBlock).join("\n"); |
| 312 | + const stabRows = DEMO_STABILITY.map(stabilityRow).join("\n"); |
| 313 | + return `# tests overzicht |
| 314 | + |
| 315 | +${mockBanner} |
| 316 | + |
| 317 | +> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${DEMO_PERIOD}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen". |
| 318 | + |
| 319 | +## huidige stand · per repo |
| 320 | + |
| 321 | +<div class="test-snapshots"> |
| 322 | +${snapshots} |
| 323 | +</div> |
| 324 | + |
| 325 | +**Totaal**: ${total.toLocaleString()} tests · <span class="green">${passing.toLocaleString()} passing</span> · <span class="${failing > 0 ? "red" : "muted"}">${failing.toLocaleString()} failing</span>. |
| 326 | + |
| 327 | +## test-stabiliteit · q1 2026 |
| 328 | + |
| 329 | +Top 12 meest-flappende tests dit kwartaal, met aantal pass/fail/deleted-events en de agent die de test het laatst heeft gebroken. |
| 330 | + |
| 331 | +<table class="test-stability"> |
| 332 | +<thead> |
| 333 | + <tr> |
| 334 | + <th>test</th> |
| 335 | + <th class="num">pass</th> |
| 336 | + <th class="num">fail</th> |
| 337 | + <th class="num">del</th> |
| 338 | + <th>laatst gebroken door</th> |
| 339 | + </tr> |
| 340 | +</thead> |
| 341 | +<tbody> |
| 342 | +${stabRows} |
| 343 | +</tbody> |
| 344 | +</table> |
| 345 | + |
| 346 | +> ⚠ markeert tests waarbij dit kwartaal een test-deletion of weakening-event is gedetecteerd. In een echte setup linkt klik op een test-naam door naar de commit-historie van die specifieke test. |
| 347 | + |
| 348 | +## hoe lees je dit |
| 349 | + |
| 350 | +- **Veel pass, weinig fail, 0 del**: gezond. Test doet wat hij moet, niemand sloopt 'm. |
| 351 | +- **Veel fail, 0 del**: test vangt actief regressies. Goed nieuws — discipline werkt. |
| 352 | +- **Fail én del > 0**: test wordt onder druk gezet. Coach de agent die 'm gebroken heeft (klik op het tag-icoon). |
| 353 | +- **Snapshot rood + stabiliteit hoog**: bekende, langlopende kapotte test. Apart onderwerp, niet per se een agent-probleem. |
| 354 | + |
| 355 | +--- |
| 356 | + |
| 357 | +[← exec summary](/reports/demo) · [back to /reports](/reports) |
| 358 | +`; |
| 359 | +}; |
| 360 | + |
| 186 | 361 | export const reportsLandingMd = (): string => `# reports |
| 187 | 362 | |
| 188 | 363 | > Per-agent TDD-discipline reporting over real project repos. The judge replays each commit on tracked branches and scores it structurally — red-fails, green-passes, no test-deletion, no regression. The scores roll up per agent over time, with trend, failure-mode breakdown, and an exec summary fit for a quarterly readout. |
| 191 | 366 | |
| 192 | 367 | - [exec summary mockup →](/reports/demo) — single page, 1 quarter, 3 agents |
| 193 | 368 | - [per-agent drill-down →](/reports/demo/agents/cursor) — trend, failure mix, recent flagged commits |
| 369 | +- [tests overzicht →](/reports/demo/tests) — huidige stand per repo + test-stabiliteit per test-naam |
| 194 | 370 | |
| 195 | 371 | ## what gets measured |
| 196 | 372 | |
| 245 | 421 | |
| 246 | 422 | --- |
| 247 | 423 | |
| 248 | | -[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [back to /reports](/reports) |
| 424 | +[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports) |
| 249 | 425 | `; |
| 250 | 426 | }; |
| 251 | 427 | |