syntaxai/tdd.md · commit 44dbe97

Reports mockup: tests overview at /reports/demo/tests

Adds the per-test view with two sections: live-snapshot per repo (current
HEAD pass/fail with failing tests called out) and Q1 stability table
(top 12 most-flapping tests with pass/fail/deleted counts and the
last agent that broke each one). Cursor's test-deletion incidents are
surfaced as flagged rows so the discipline link stays visible.

Co-Authored-By: Claude Opus 4.7 (1M context) <[email protected]>
author
syntaxai <[email protected]>
date
2026-05-08 10:39:26 +01:00
parent
6047760
commit
44dbe97dba2027df40a7ff5ed0d71b019fb6280c

3 files changed · +299 −1

modified public/style.css +110 −0
@@ -338,3 +338,113 @@ main.md strong { font-weight: 600; }
338338 .report-tiles { grid-template-columns: 1fr; }
339339 .report-bar-row { grid-template-columns: 130px 1fr 50px; }
340340 }
341+
342+/* tests overview */
343+
344+.test-snapshots {
345+ display: grid;
346+ grid-template-columns: repeat(2, 1fr);
347+ gap: 1rem;
348+ margin: 1rem 0 1.5rem;
349+}
350+
351+.test-snapshot {
352+ border: 1px solid var(--border);
353+ border-radius: 6px;
354+ padding: 1rem 1.1rem;
355+ background: var(--code-bg);
356+}
357+.test-snapshot.bad { border-left: 3px solid var(--red); }
358+.test-snapshot.ok { border-left: 3px solid var(--green); }
359+
360+.test-snapshot-head {
361+ font-family: ui-monospace, "SF Mono", "JetBrains Mono", "Fira Code", Menlo, Consolas, monospace;
362+ font-size: 0.95rem;
363+ margin: 0 0 0.3rem;
364+}
365+.test-snapshot-branch {
366+ color: var(--muted);
367+ font-weight: 400;
368+ font-size: 0.85rem;
369+}
370+.test-snapshot-stats {
371+ font-family: ui-monospace, "SF Mono", "JetBrains Mono", "Fira Code", Menlo, Consolas, monospace;
372+ font-size: 0.8rem;
373+ color: var(--muted);
374+ margin: 0 0 0.7rem;
375+}
376+
377+.test-list {
378+ list-style: none;
379+ margin: 0;
380+ padding: 0;
381+ font-family: ui-monospace, "SF Mono", "JetBrains Mono", "Fira Code", Menlo, Consolas, monospace;
382+ font-size: 0.82rem;
383+}
384+.test-list li { margin: 0.25rem 0; padding-left: 1.2rem; position: relative; line-height: 1.4; }
385+.test-list li::before {
386+ position: absolute;
387+ left: 0;
388+ top: 0;
389+}
390+.test-list-fail { color: var(--fg); }
391+.test-list-fail::before { content: "✗"; color: var(--red); }
392+.test-list-pass { color: var(--green); }
393+.test-list-pass::before { content: "✓"; color: var(--green); }
394+.test-list-collapsed { color: var(--muted); }
395+.test-list-collapsed::before { content: "+"; color: var(--muted); }
396+
397+.test-list-meta {
398+ color: var(--muted);
399+ font-size: 0.78rem;
400+}
401+
402+main.md table.test-stability {
403+ width: 100%;
404+ border-collapse: collapse;
405+ font-size: 0.85rem;
406+ font-family: ui-monospace, "SF Mono", "JetBrains Mono", "Fira Code", Menlo, Consolas, monospace;
407+ margin: 1rem 0 1.5rem;
408+}
409+main.md table.test-stability th,
410+main.md table.test-stability td {
411+ text-align: left;
412+ padding: 0.55rem 0.6rem;
413+ border-bottom: 1px solid var(--border);
414+ vertical-align: top;
415+}
416+main.md table.test-stability th.num,
417+main.md table.test-stability td.test-stab-num {
418+ text-align: right;
419+ width: 50px;
420+}
421+.test-stab-name { color: var(--fg); }
422+.test-stab-repo {
423+ color: var(--muted);
424+ font-size: 0.75rem;
425+ margin-top: 0.15rem;
426+}
427+.test-stab-num.red { color: var(--red); font-weight: 600; }
428+.test-stab-num.green { color: var(--muted); }
429+.test-stab-row.flagged { background: color-mix(in srgb, var(--red) 8%, transparent); }
430+.test-stab-warn {
431+ color: var(--red);
432+ margin-left: 0.25rem;
433+ cursor: help;
434+}
435+
436+.agent-tag {
437+ font-size: 0.78rem;
438+ padding: 0.15rem 0.5rem;
439+ border: 1px solid var(--border);
440+ border-radius: 999px;
441+ color: var(--muted);
442+ text-decoration: none;
443+ white-space: nowrap;
444+}
445+.agent-tag:hover { color: var(--fg); border-color: var(--fg); }
446+
447+@media (max-width: 600px) {
448+ .test-snapshots { grid-template-columns: 1fr; }
449+ main.md table.test-stability { font-size: 0.78rem; }
450+}
modified src/reports.ts +177 −1
@@ -46,6 +46,90 @@ export const DEMO_PERIOD = "2026-01-01 → 2026-03-31";
4646 export const DEMO_ORG = "acme-corp";
4747 export const DEMO_REPOS = 4;
4848
49+interface TestFailure {
50+ test: string;
51+ since: string;
52+ flaky?: boolean;
53+}
54+
55+interface TestSnapshot {
56+ repo: string;
57+ branch: string;
58+ total: number;
59+ passing: number;
60+ failing: number;
61+ failures: TestFailure[];
62+}
63+
64+interface TestStability {
65+ test: string;
66+ repo: string;
67+ pass: number;
68+ fail: number;
69+ deleted: number;
70+ lastBrokenBy: AgentReport["slug"];
71+ flagged?: boolean;
72+}
73+
74+export const DEMO_SNAPSHOTS: TestSnapshot[] = [
75+ {
76+ repo: "api-gateway",
77+ branch: "main",
78+ total: 247,
79+ passing: 245,
80+ failing: 2,
81+ failures: [
82+ { test: "rate-limit.spec.ts > resets at midnight UTC", since: "2026-03-26" },
83+ { test: "webhook.spec.ts > retries on 5xx with backoff", since: "2026-03-28" },
84+ ],
85+ },
86+ {
87+ repo: "billing-service",
88+ branch: "main",
89+ total: 89,
90+ passing: 89,
91+ failing: 0,
92+ failures: [],
93+ },
94+ {
95+ repo: "data-pipeline",
96+ branch: "main",
97+ total: 156,
98+ passing: 154,
99+ failing: 2,
100+ failures: [
101+ { test: "ingest.spec.ts > handles malformed CSV row", since: "2026-03-22" },
102+ { test: "ingest.spec.ts > deduplicates by hash", since: "2026-03-22" },
103+ ],
104+ },
105+ {
106+ repo: "frontend-web",
107+ branch: "main",
108+ total: 312,
109+ passing: 310,
110+ failing: 2,
111+ failures: [
112+ { test: "checkout.spec.ts > handles network timeout", since: "2026-03-15", flaky: true },
113+ { test: "login.spec.ts > redirects after auth", since: "2026-03-11", flaky: true },
114+ ],
115+ },
116+];
117+
118+export const DEMO_STABILITY: TestStability[] = [
119+ { test: "webhook.spec.ts > retries on 5xx with backoff", repo: "api-gateway", pass: 33, fail: 11, deleted: 0, lastBrokenBy: "cursor", flagged: true },
120+ { test: "checkout.spec.ts > handles network timeout", repo: "frontend-web", pass: 51, fail: 8, deleted: 0, lastBrokenBy: "cursor", flagged: true },
121+ { test: "rate-limit.spec.ts > resets at midnight UTC", repo: "api-gateway", pass: 42, fail: 6, deleted: 0, lastBrokenBy: "claude-code" },
122+ { test: "login.spec.ts > redirects after auth", repo: "frontend-web", pass: 44, fail: 5, deleted: 1, lastBrokenBy: "cursor", flagged: true },
123+ { test: "ingest.spec.ts > handles malformed CSV row", repo: "data-pipeline", pass: 38, fail: 4, deleted: 0, lastBrokenBy: "aider" },
124+ { test: "auth.spec.ts > validates JWT signature", repo: "api-gateway", pass: 47, fail: 3, deleted: 0, lastBrokenBy: "claude-code" },
125+ { test: "ingest.spec.ts > deduplicates by hash", repo: "data-pipeline", pass: 30, fail: 3, deleted: 0, lastBrokenBy: "aider" },
126+ { test: "billing.spec.ts > applies tax bracket", repo: "billing-service", pass: 29, fail: 2, deleted: 0, lastBrokenBy: "claude-code" },
127+ { test: "webhook.spec.ts > signs payload with HMAC", repo: "api-gateway", pass: 35, fail: 2, deleted: 1, lastBrokenBy: "cursor", flagged: true },
128+ { test: "billing.spec.ts > computes monthly total", repo: "billing-service", pass: 28, fail: 1, deleted: 1, lastBrokenBy: "cursor", flagged: true },
129+ { test: "invoice.spec.ts > generates PDF receipt", repo: "billing-service", pass: 25, fail: 1, deleted: 0, lastBrokenBy: "claude-code" },
130+ { test: "pricing.spec.ts > rounds to nearest cent", repo: "billing-service", pass: 26, fail: 1, deleted: 0, lastBrokenBy: "aider" },
131+];
132+
49133 export const DEMO_REPORTS: AgentReport[] = [
50134 {
51135 slug: "claude-code",
@@ -183,6 +267,97 @@ const streakBox = (a: AgentReport): string => {
183267
184268 const mockBanner = `<div class="report-mockup-banner">demo data — real reporting wires up when the project-tracking pipeline ships. <a href="/blog/tweag-handbook-tdd">why tdd.md needs this</a> · <a href="/reports">about reporting</a></div>`;
185269
270+const snapshotBlock = (s: TestSnapshot): string => {
271+ const failuresHtml = s.failures.length === 0
272+ ? `<li class="test-list-pass">all ${s.passing} tests groen</li>`
273+ : s.failures
274+ .map(
275+ (f) =>
276+ `<li class="test-list-fail">${escape(f.test)} <span class="test-list-meta">${f.flaky ? "intermittent · " : ""}sinds ${f.since}</span></li>`,
277+ )
278+ .concat([`<li class="test-list-collapsed">+ ${s.passing.toLocaleString()} passing tests</li>`])
279+ .join("\n");
280+ const statusCls = s.failing === 0 ? "ok" : "bad";
281+ return `<div class="test-snapshot ${statusCls}">
282+ <p class="test-snapshot-head"><strong>${escape(s.repo)}</strong> <span class="test-snapshot-branch">@ ${escape(s.branch)}</span></p>
283+ <p class="test-snapshot-stats">${s.total.toLocaleString()} tests · <span class="green">${s.passing.toLocaleString()} passing</span>${s.failing > 0 ? ` · <span class="red">${s.failing.toLocaleString()} failing</span>` : ""}</p>
284+ <ul class="test-list">
285+${failuresHtml}
286+ </ul>
287+</div>`;
288+};
289+
290+const agentTagHtml = (slug: AgentReport["slug"]): string => {
291+ const name = DEMO_REPORTS.find((r) => r.slug === slug)?.name ?? slug;
292+ return `<a class="agent-tag" href="/reports/demo/agents/${slug}">${escape(name)}</a>`;
293+};
294+
295+const stabilityRow = (s: TestStability): string => {
296+ const cls = s.flagged ? "test-stab-row flagged" : "test-stab-row";
297+ const warn = s.flagged ? ` <span class="test-stab-warn" title="test-deletion of weakening dit kwartaal">⚠</span>` : "";
298+ return `<tr class="${cls}">
299+ <td class="test-stab-name">${escape(s.test)}<div class="test-stab-repo">${escape(s.repo)}</div></td>
300+ <td class="test-stab-num green">${s.pass}</td>
301+ <td class="test-stab-num ${s.fail >= 8 ? "red" : ""}">${s.fail}</td>
302+ <td class="test-stab-num ${s.deleted > 0 ? "red" : ""}">${s.deleted}</td>
303+ <td class="test-stab-by">${agentTagHtml(s.lastBrokenBy)}${warn}</td>
304+</tr>`;
305+};
306+
307+export const testsOverviewMd = (): string => {
308+ const total = DEMO_SNAPSHOTS.reduce((s, r) => s + r.total, 0);
309+ const passing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.passing, 0);
310+ const failing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.failing, 0);
311+ const snapshots = DEMO_SNAPSHOTS.map(snapshotBlock).join("\n");
312+ const stabRows = DEMO_STABILITY.map(stabilityRow).join("\n");
313+ return `# tests overzicht
314+
315+${mockBanner}
316+
317+> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${DEMO_PERIOD}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen".
318+
319+## huidige stand · per repo
320+
321+<div class="test-snapshots">
322+${snapshots}
323+</div>
324+
325+**Totaal**: ${total.toLocaleString()} tests · <span class="green">${passing.toLocaleString()} passing</span> · <span class="${failing > 0 ? "red" : "muted"}">${failing.toLocaleString()} failing</span>.
326+
327+## test-stabiliteit · q1 2026
328+
329+Top 12 meest-flappende tests dit kwartaal, met aantal pass/fail/deleted-events en de agent die de test het laatst heeft gebroken.
330+
331+<table class="test-stability">
332+<thead>
333+ <tr>
334+ <th>test</th>
335+ <th class="num">pass</th>
336+ <th class="num">fail</th>
337+ <th class="num">del</th>
338+ <th>laatst gebroken door</th>
339+ </tr>
340+</thead>
341+<tbody>
342+${stabRows}
343+</tbody>
344+</table>
345+
346+> ⚠ markeert tests waarbij dit kwartaal een test-deletion of weakening-event is gedetecteerd. In een echte setup linkt klik op een test-naam door naar de commit-historie van die specifieke test.
347+
348+## hoe lees je dit
349+
350+- **Veel pass, weinig fail, 0 del**: gezond. Test doet wat hij moet, niemand sloopt 'm.
351+- **Veel fail, 0 del**: test vangt actief regressies. Goed nieuws — discipline werkt.
352+- **Fail én del > 0**: test wordt onder druk gezet. Coach de agent die 'm gebroken heeft (klik op het tag-icoon).
353+- **Snapshot rood + stabiliteit hoog**: bekende, langlopende kapotte test. Apart onderwerp, niet per se een agent-probleem.
354+
355+---
356+
357+[← exec summary](/reports/demo) · [back to /reports](/reports)
358+`;
359+};
360+
186361 export const reportsLandingMd = (): string => `# reports
187362
188363 > Per-agent TDD-discipline reporting over real project repos. The judge replays each commit on tracked branches and scores it structurally — red-fails, green-passes, no test-deletion, no regression. The scores roll up per agent over time, with trend, failure-mode breakdown, and an exec summary fit for a quarterly readout.
@@ -191,6 +366,7 @@ This is a design preview. The pipeline that ingests real repos isn't wired yet;
191366
192367 - [exec summary mockup →](/reports/demo) — single page, 1 quarter, 3 agents
193368 - [per-agent drill-down →](/reports/demo/agents/cursor) — trend, failure mix, recent flagged commits
369+- [tests overzicht →](/reports/demo/tests) — huidige stand per repo + test-stabiliteit per test-naam
194370
195371 ## what gets measured
196372
@@ -245,7 +421,7 @@ Discipline, niet code-kwaliteit. Hidden tests (zoals op de katas) bestaan niet v
245421
246422 ---
247423
248-[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [back to /reports](/reports)
424+[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports)
249425 `;
250426 };
251427
modified src/server.ts +12 −0
@@ -9,6 +9,7 @@ import {
99 reportsLandingMd,
1010 execSummaryMd,
1111 agentDrilldownMd,
12+ testsOverviewMd,
1213 DEMO_REPORTS,
1314 } from "./reports";
1415
@@ -810,6 +811,17 @@ ${rows}
810811 return htmlResponse(html);
811812 },
812813
814+ "/reports/demo/tests": async () => {
815+ const html = await renderPage({
816+ title: "Tests overzicht (demo) — tdd.md",
817+ description: "Mockup of the per-test overview: current pass/fail snapshot per repo plus test stability over the quarter.",
818+ bodyMarkdown: testsOverviewMd(),
819+ ogPath: "https://tdd.md/reports/demo/tests",
820+ noindex: true,
821+ });
822+ return htmlResponse(html);
823+ },
824+
813825 "/reports/demo/agents/:slug": async (req) => {
814826 const slug = req.params.slug as (typeof DEMO_REPORTS)[number]["slug"];
815827 const md = agentDrilldownMd(slug);