44dbe97dba2027df40a7ff5ed0d71b019fb6280c diff --git a/public/style.css b/public/style.css index c5220ab9fee147104a0c0b58813d5367c7320baa..66ad590dc4b9433ad33db43ca8210b041ac60e5b 100644 --- a/public/style.css +++ b/public/style.css @@ -338,3 +338,113 @@ main.md strong { font-weight: 600; } .report-tiles { grid-template-columns: 1fr; } .report-bar-row { grid-template-columns: 130px 1fr 50px; } } + +/* tests overview */ + +.test-snapshots { + display: grid; + grid-template-columns: repeat(2, 1fr); + gap: 1rem; + margin: 1rem 0 1.5rem; +} + +.test-snapshot { + border: 1px solid var(--border); + border-radius: 6px; + padding: 1rem 1.1rem; + background: var(--code-bg); +} +.test-snapshot.bad { border-left: 3px solid var(--red); } +.test-snapshot.ok { border-left: 3px solid var(--green); } + +.test-snapshot-head { + font-family: ui-monospace, "SF Mono", "JetBrains Mono", "Fira Code", Menlo, Consolas, monospace; + font-size: 0.95rem; + margin: 0 0 0.3rem; +} +.test-snapshot-branch { + color: var(--muted); + font-weight: 400; + font-size: 0.85rem; +} +.test-snapshot-stats { + font-family: ui-monospace, "SF Mono", "JetBrains Mono", "Fira Code", Menlo, Consolas, monospace; + font-size: 0.8rem; + color: var(--muted); + margin: 0 0 0.7rem; +} + +.test-list { + list-style: none; + margin: 0; + padding: 0; + font-family: ui-monospace, "SF Mono", "JetBrains Mono", "Fira Code", Menlo, Consolas, monospace; + font-size: 0.82rem; +} +.test-list li { margin: 0.25rem 0; padding-left: 1.2rem; position: relative; line-height: 1.4; } +.test-list li::before { + position: absolute; + left: 0; + top: 0; +} +.test-list-fail { color: var(--fg); } +.test-list-fail::before { content: "✗"; color: var(--red); } +.test-list-pass { color: var(--green); } +.test-list-pass::before { content: "✓"; color: var(--green); } +.test-list-collapsed { color: var(--muted); } +.test-list-collapsed::before { content: "+"; color: var(--muted); } + +.test-list-meta { + color: var(--muted); + font-size: 0.78rem; +} + +main.md table.test-stability { + width: 100%; + border-collapse: collapse; + font-size: 0.85rem; + font-family: ui-monospace, "SF Mono", "JetBrains Mono", "Fira Code", Menlo, Consolas, monospace; + margin: 1rem 0 1.5rem; +} +main.md table.test-stability th, +main.md table.test-stability td { + text-align: left; + padding: 0.55rem 0.6rem; + border-bottom: 1px solid var(--border); + vertical-align: top; +} +main.md table.test-stability th.num, +main.md table.test-stability td.test-stab-num { + text-align: right; + width: 50px; +} +.test-stab-name { color: var(--fg); } +.test-stab-repo { + color: var(--muted); + font-size: 0.75rem; + margin-top: 0.15rem; +} +.test-stab-num.red { color: var(--red); font-weight: 600; } +.test-stab-num.green { color: var(--muted); } +.test-stab-row.flagged { background: color-mix(in srgb, var(--red) 8%, transparent); } +.test-stab-warn { + color: var(--red); + margin-left: 0.25rem; + cursor: help; +} + +.agent-tag { + font-size: 0.78rem; + padding: 0.15rem 0.5rem; + border: 1px solid var(--border); + border-radius: 999px; + color: var(--muted); + text-decoration: none; + white-space: nowrap; +} +.agent-tag:hover { color: var(--fg); border-color: var(--fg); } + +@media (max-width: 600px) { + .test-snapshots { grid-template-columns: 1fr; } + main.md table.test-stability { font-size: 0.78rem; } +} diff --git a/src/reports.ts b/src/reports.ts index 4c11445c14b0bf3e154074e040adac53f82412b1..e5a003d491d306fd5f3e5e6bd332ae3b288a4871 100644 --- a/src/reports.ts +++ b/src/reports.ts @@ -46,6 +46,90 @@ export const DEMO_PERIOD = "2026-01-01 → 2026-03-31"; export const DEMO_ORG = "acme-corp"; export const DEMO_REPOS = 4; +interface TestFailure { + test: string; + since: string; + flaky?: boolean; +} + +interface TestSnapshot { + repo: string; + branch: string; + total: number; + passing: number; + failing: number; + failures: TestFailure[]; +} + +interface TestStability { + test: string; + repo: string; + pass: number; + fail: number; + deleted: number; + lastBrokenBy: AgentReport["slug"]; + flagged?: boolean; +} + +export const DEMO_SNAPSHOTS: TestSnapshot[] = [ + { + repo: "api-gateway", + branch: "main", + total: 247, + passing: 245, + failing: 2, + failures: [ + { test: "rate-limit.spec.ts > resets at midnight UTC", since: "2026-03-26" }, + { test: "webhook.spec.ts > retries on 5xx with backoff", since: "2026-03-28" }, + ], + }, + { + repo: "billing-service", + branch: "main", + total: 89, + passing: 89, + failing: 0, + failures: [], + }, + { + repo: "data-pipeline", + branch: "main", + total: 156, + passing: 154, + failing: 2, + failures: [ + { test: "ingest.spec.ts > handles malformed CSV row", since: "2026-03-22" }, + { test: "ingest.spec.ts > deduplicates by hash", since: "2026-03-22" }, + ], + }, + { + repo: "frontend-web", + branch: "main", + total: 312, + passing: 310, + failing: 2, + failures: [ + { test: "checkout.spec.ts > handles network timeout", since: "2026-03-15", flaky: true }, + { test: "login.spec.ts > redirects after auth", since: "2026-03-11", flaky: true }, + ], + }, +]; + +export const DEMO_STABILITY: TestStability[] = [ + { test: "webhook.spec.ts > retries on 5xx with backoff", repo: "api-gateway", pass: 33, fail: 11, deleted: 0, lastBrokenBy: "cursor", flagged: true }, + { test: "checkout.spec.ts > handles network timeout", repo: "frontend-web", pass: 51, fail: 8, deleted: 0, lastBrokenBy: "cursor", flagged: true }, + { test: "rate-limit.spec.ts > resets at midnight UTC", repo: "api-gateway", pass: 42, fail: 6, deleted: 0, lastBrokenBy: "claude-code" }, + { test: "login.spec.ts > redirects after auth", repo: "frontend-web", pass: 44, fail: 5, deleted: 1, lastBrokenBy: "cursor", flagged: true }, + { test: "ingest.spec.ts > handles malformed CSV row", repo: "data-pipeline", pass: 38, fail: 4, deleted: 0, lastBrokenBy: "aider" }, + { test: "auth.spec.ts > validates JWT signature", repo: "api-gateway", pass: 47, fail: 3, deleted: 0, lastBrokenBy: "claude-code" }, + { test: "ingest.spec.ts > deduplicates by hash", repo: "data-pipeline", pass: 30, fail: 3, deleted: 0, lastBrokenBy: "aider" }, + { test: "billing.spec.ts > applies tax bracket", repo: "billing-service", pass: 29, fail: 2, deleted: 0, lastBrokenBy: "claude-code" }, + { test: "webhook.spec.ts > signs payload with HMAC", repo: "api-gateway", pass: 35, fail: 2, deleted: 1, lastBrokenBy: "cursor", flagged: true }, + { test: "billing.spec.ts > computes monthly total", repo: "billing-service", pass: 28, fail: 1, deleted: 1, lastBrokenBy: "cursor", flagged: true }, + { test: "invoice.spec.ts > generates PDF receipt", repo: "billing-service", pass: 25, fail: 1, deleted: 0, lastBrokenBy: "claude-code" }, + { test: "pricing.spec.ts > rounds to nearest cent", repo: "billing-service", pass: 26, fail: 1, deleted: 0, lastBrokenBy: "aider" }, +]; + export const DEMO_REPORTS: AgentReport[] = [ { slug: "claude-code", @@ -183,6 +267,97 @@ const streakBox = (a: AgentReport): string => { const mockBanner = `
demo data — real reporting wires up when the project-tracking pipeline ships. why tdd.md needs this · about reporting
`; +const snapshotBlock = (s: TestSnapshot): string => { + const failuresHtml = s.failures.length === 0 + ? `
  • all ${s.passing} tests groen
  • ` + : s.failures + .map( + (f) => + `
  • ${escape(f.test)} ${f.flaky ? "intermittent · " : ""}sinds ${f.since}
  • `, + ) + .concat([`
  • + ${s.passing.toLocaleString()} passing tests
  • `]) + .join("\n"); + const statusCls = s.failing === 0 ? "ok" : "bad"; + return `
    +

    ${escape(s.repo)} @ ${escape(s.branch)}

    +

    ${s.total.toLocaleString()} tests · ${s.passing.toLocaleString()} passing${s.failing > 0 ? ` · ${s.failing.toLocaleString()} failing` : ""}

    + +
    `; +}; + +const agentTagHtml = (slug: AgentReport["slug"]): string => { + const name = DEMO_REPORTS.find((r) => r.slug === slug)?.name ?? slug; + return `${escape(name)}`; +}; + +const stabilityRow = (s: TestStability): string => { + const cls = s.flagged ? "test-stab-row flagged" : "test-stab-row"; + const warn = s.flagged ? ` ` : ""; + return ` + ${escape(s.test)}
    ${escape(s.repo)}
    + ${s.pass} + ${s.fail} + ${s.deleted} + ${agentTagHtml(s.lastBrokenBy)}${warn} +`; +}; + +export const testsOverviewMd = (): string => { + const total = DEMO_SNAPSHOTS.reduce((s, r) => s + r.total, 0); + const passing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.passing, 0); + const failing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.failing, 0); + const snapshots = DEMO_SNAPSHOTS.map(snapshotBlock).join("\n"); + const stabRows = DEMO_STABILITY.map(stabilityRow).join("\n"); + return `# tests overzicht + +${mockBanner} + +> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${DEMO_PERIOD}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen". + +## huidige stand · per repo + +
    +${snapshots} +
    + +**Totaal**: ${total.toLocaleString()} tests · ${passing.toLocaleString()} passing · ${failing.toLocaleString()} failing. + +## test-stabiliteit · q1 2026 + +Top 12 meest-flappende tests dit kwartaal, met aantal pass/fail/deleted-events en de agent die de test het laatst heeft gebroken. + + + + + + + + + + + + +${stabRows} + +
    testpassfaildellaatst gebroken door
    + +> ⚠ markeert tests waarbij dit kwartaal een test-deletion of weakening-event is gedetecteerd. In een echte setup linkt klik op een test-naam door naar de commit-historie van die specifieke test. + +## hoe lees je dit + +- **Veel pass, weinig fail, 0 del**: gezond. Test doet wat hij moet, niemand sloopt 'm. +- **Veel fail, 0 del**: test vangt actief regressies. Goed nieuws — discipline werkt. +- **Fail én del > 0**: test wordt onder druk gezet. Coach de agent die 'm gebroken heeft (klik op het tag-icoon). +- **Snapshot rood + stabiliteit hoog**: bekende, langlopende kapotte test. Apart onderwerp, niet per se een agent-probleem. + +--- + +[← exec summary](/reports/demo) · [back to /reports](/reports) +`; +}; + export const reportsLandingMd = (): string => `# reports > Per-agent TDD-discipline reporting over real project repos. The judge replays each commit on tracked branches and scores it structurally — red-fails, green-passes, no test-deletion, no regression. The scores roll up per agent over time, with trend, failure-mode breakdown, and an exec summary fit for a quarterly readout. @@ -191,6 +366,7 @@ This is a design preview. The pipeline that ingests real repos isn't wired yet; - [exec summary mockup →](/reports/demo) — single page, 1 quarter, 3 agents - [per-agent drill-down →](/reports/demo/agents/cursor) — trend, failure mix, recent flagged commits +- [tests overzicht →](/reports/demo/tests) — huidige stand per repo + test-stabiliteit per test-naam ## what gets measured @@ -245,7 +421,7 @@ Discipline, niet code-kwaliteit. Hidden tests (zoals op de katas) bestaan niet v --- -[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [back to /reports](/reports) +[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports) `; }; diff --git a/src/server.ts b/src/server.ts index 01703187c69b01d6cc65a9de2b1777c4851d28ff..a7bb7f05950ab1967d486697b4cba920e2ca4f28 100644 --- a/src/server.ts +++ b/src/server.ts @@ -9,6 +9,7 @@ import { reportsLandingMd, execSummaryMd, agentDrilldownMd, + testsOverviewMd, DEMO_REPORTS, } from "./reports"; @@ -810,6 +811,17 @@ ${rows} return htmlResponse(html); }, + "/reports/demo/tests": async () => { + const html = await renderPage({ + title: "Tests overzicht (demo) — tdd.md", + description: "Mockup of the per-test overview: current pass/fail snapshot per repo plus test stability over the quarter.", + bodyMarkdown: testsOverviewMd(), + ogPath: "https://tdd.md/reports/demo/tests", + noindex: true, + }); + return htmlResponse(html); + }, + "/reports/demo/agents/:slug": async (req) => { const slug = req.params.slug as (typeof DEMO_REPORTS)[number]["slug"]; const md = agentDrilldownMd(slug);