f3e2bacd499ddd0a46a8999f16d03de4bec529e0 diff --git a/src/c21_app.ts b/src/c21_app.ts index 6c80556aa8e28d7942d91bcce4fd8ba0362a7b67..8f94a9a0e8e4ac01b2a5005f8cc87be4e185b6fd 100644 --- a/src/c21_app.ts +++ b/src/c21_app.ts @@ -78,15 +78,15 @@ const demoContext = () => ({ scopeLabel: `${DEMO_REPOS} repos · ${DEMO_ORG}`, bannerHtml: DEMO_BANNER_HTML, narrative: { - changedHeading: "wat veranderde dit kwartaal", + changedHeading: "what changed this quarter", changedBody: - "Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de `api-gateway` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf.", - doingHeading: "wat we doen", + "Cursor's score dropped 15 points after agent-mode became default in March; test-deletion incidents climbed from 2% to 14% of refactor commits, concentrated in the `api-gateway` repo. Claude Code's score rose after a phase-tagged commit prefix was added to CLAUDE.md at the end of January. Aider stays steadily high — auto-commit-per-edit prevents most cross-phase cheating on its own.", + doingHeading: "what we're doing", doingBody: - "- **Cursor in `api-gateway`**: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel \"never delete a test in a refactor commit\" gepind ([details →](/reports/demo/agents/cursor)).\n- **Claude Code uitrollen**: het CLAUDE.md-template dat in `billing-service` werkte naar de andere drie repos kopiëren.\n- **Volgende meting**: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt.", + "- **Cursor in `api-gateway`**: agent-mode disabled for refactor prompts, CONVENTIONS rule \"never delete a test in a refactor commit\" pinned ([details →](/reports/demo/agents/cursor)).\n- **Roll out Claude Code**: copy the CLAUDE.md template that worked in `billing-service` to the other three repos.\n- **Next reading**: 2026-04-30, mid-Q2, to check whether the Cursor fix holds.", }, footerLinks: - "[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports)", + "[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overview](/reports/demo/tests) · [back to /reports](/reports)", }); const liveContext = async () => { @@ -102,7 +102,7 @@ const liveContext = async () => { period, scopeLabel: `${LIVE_REPO_OWNER}/${LIVE_REPO_NAME} · ${live.totalCommits} commits sampled${live.unknownCount > 0 ? ` (${live.unknownCount} unattributed, excluded)` : ""}`, bannerHtml: LIVE_BANNER_HTML, - footerLinks: `${drillLinks ? drillLinks + " · " : ""}[tests overzicht](/reports/live/tests) · [demo preview](/reports/demo) · [back to /reports](/reports)`, + footerLinks: `${drillLinks ? drillLinks + " · " : ""}[tests overview](/reports/live/tests) · [demo preview](/reports/demo) · [back to /reports](/reports)`, }; }; @@ -476,7 +476,7 @@ ${rows} "/reports/demo": async () => { const ctx = demoContext(); const html = await renderPage({ - title: "TDD-discipline rapport · Q1 2026 (demo) — tdd.md", + title: "TDD-discipline report · Q1 2026 (demo) — tdd.md", description: "Mockup of the management-level TDD-discipline report — single page, three agents, with trend and narrative.", bodyMarkdown: execSummaryMd(ctx), ogPath: "https://tdd.md/reports/demo", @@ -487,7 +487,7 @@ ${rows} "/reports/demo/tests": async () => { const html = await renderPage({ - title: "Tests overzicht (demo) — tdd.md", + title: "Tests overview (demo) — tdd.md", description: "Mockup of the per-test overview: current pass/fail snapshot per repo plus test stability over the quarter.", bodyMarkdown: testsOverviewMd({ period: DEMO_PERIOD, @@ -523,8 +523,8 @@ ${rows} "/reports/live": async () => { const ctx = await liveContext(); const html = await renderPage({ - title: "TDD-discipline rapport · live — tdd.md", - description: `Live discipline rapport gebouwd uit de echte commit-historie van syntaxai/tdd.md (laatste ${LIVE_FETCH_COUNT} commits, 5-min cache).`, + title: "TDD-discipline report · live — tdd.md", + description: `Live discipline report built from the real commit history of syntaxai/tdd.md (last ${LIVE_FETCH_COUNT} commits, 5-min cache).`, bodyMarkdown: execSummaryMd(ctx), ogPath: "https://tdd.md/reports/live", noindex: true, @@ -536,14 +536,14 @@ ${rows} const data = await buildLiveTestData(LIVE_REPO_OWNER, LIVE_REPO_NAME); const ranOn = data.ranAt ? new Date(data.ranAt).toISOString().slice(0, 10) : null; const period = data.runsCount === 0 - ? "geen runs in bundle" - : `last run ${ranOn} · ${data.runsCount} run${data.runsCount === 1 ? "" : "s"} cumulatief`; + ? "no runs in bundle" + : `last run ${ranOn} · ${data.runsCount} run${data.runsCount === 1 ? "" : "s"} cumulative`; const unavailableNote = data.runsCount === 0 - ? "Nog geen test-runs gebundeld. De volgende deploy draait `bun test --reporter=junit` op de huidige HEAD en publiceert het resultaat hier. Stabiliteit (flaky %, deletion) bouwt zich op naarmate er meer runs in de bundle staan — de demo op [/reports/demo/tests](/reports/demo/tests) toont waar het naartoe groeit." + ? "No test runs bundled yet. The next deploy will run `bun test --reporter=junit` on the current HEAD and publish the result here. Stability (flaky %, deletion) builds up as more runs land in the bundle — the demo at [/reports/demo/tests](/reports/demo/tests) shows where this is heading." : undefined; const html = await renderPage({ - title: "Tests overzicht · live — tdd.md", - description: `Live test-snapshot van ${LIVE_REPO_OWNER}/${LIVE_REPO_NAME} — ${data.runsCount} run${data.runsCount === 1 ? "" : "s"} gebundeld.`, + title: "Tests overview · live — tdd.md", + description: `Live test snapshot of ${LIVE_REPO_OWNER}/${LIVE_REPO_NAME} — ${data.runsCount} run${data.runsCount === 1 ? "" : "s"} bundled.`, bodyMarkdown: testsOverviewMd({ period, bannerHtml: LIVE_BANNER_HTML, @@ -567,7 +567,7 @@ ${rows} const entry = ctx.reports.find((r) => r.slug === slug)!; const html = await renderPage({ title: `${entry.name} drill-down · live — tdd.md`, - description: `Live drill-down voor ${entry.name} op syntaxai/tdd.md — trend, failure-mode breakdown, recent commits.`, + description: `Live drill-down for ${entry.name} on syntaxai/tdd.md — trend, failure-mode breakdown, recent commits.`, bodyMarkdown: md, ogPath: `https://tdd.md/reports/live/agents/${slug}`, noindex: true, diff --git a/src/c51_render_reports.ts b/src/c51_render_reports.ts index 126d8b61b14d419aaff7f3484ffa1bba765ffb1d..7f1be09ab39a6fc45fe3dc4e69964b53f76b0f16 100644 --- a/src/c51_render_reports.ts +++ b/src/c51_render_reports.ts @@ -121,7 +121,7 @@ const agentTagHtml = (slug: AgentReport["slug"]): string => { const stabilityRow = (s: TestStability): string => { const cls = s.flagged ? "test-stab-row flagged" : "test-stab-row"; - const warn = s.flagged ? ` ` : ""; + const warn = s.flagged ? ` ` : ""; return ` ${escape(s.test)}
${escape(s.repo)}
${s.pass} @@ -141,8 +141,8 @@ Two views of the same shape: - **[/reports/demo](/reports/demo)** — the polished design preview with synthetic data for three agents and four repos. Useful for screenshots and showing the full failure-mode breakdown the live view can't compute yet. Drill-downs: -- [live drill-down per agent](/reports/live/agents/claude-code) · [tests overzicht (live: placeholder)](/reports/live/tests) -- [demo drill-down per agent](/reports/demo/agents/cursor) · [tests overzicht (demo)](/reports/demo/tests) +- [live drill-down per agent](/reports/live/agents/claude-code) · [tests overview (live)](/reports/live/tests) +- [demo drill-down per agent](/reports/demo/agents/cursor) · [tests overview (demo)](/reports/demo/tests) Want a real repo on this layer? [Register a project →](/projects) — drops \`.tdd-md.json\` at the repo root, onboards in seconds. Per-commit judging on tracked branches lands in a follow-up sliver; live reporting from the GitHub API already works for the dogfood case (the tdd.md repo itself). @@ -186,19 +186,19 @@ ${ctx.narrative.doingBody} ` : ""; - return `# tdd-discipline rapport · ${ctx.period} + return `# tdd-discipline report · ${ctx.period} ${ctx.bannerHtml} -> **Periode** ${ctx.period} · **Scope** ${escape(ctx.scopeLabel)} · ${totalCommits.toLocaleString()} AI-toegeschreven commits. +> **Period** ${ctx.period} · **Scope** ${escape(ctx.scopeLabel)} · ${totalCommits.toLocaleString()} AI-attributed commits.
${tiles}
-${narrativeBlock}## wat dit getal *niet* meet +${narrativeBlock}## what this number does *not* measure -Discipline, niet code-kwaliteit. Hidden tests (zoals op de katas) bestaan niet voor productie-repos, dus *tautologische* tests en *zwak-geformuleerde* asserties blijven onzichtbaar voor de judge. Dit cijfer zegt: "de agent volgt de TDD-cyclus eerlijk". Het zegt niets over of de tests die hij schrijft het juiste beweren. Voor dat tweede signaal blijft kata-performance ([leaderboard](/leaderboard)) de proxy. +Discipline, not code quality. Hidden tests (like the ones on the katas) don't exist for production repos, so *tautological* tests and *weakly-asserted* checks stay invisible to the judge. This number says: "the agent honours the TDD cycle". It says nothing about whether the tests it writes assert the right thing. For that second signal, kata performance ([leaderboard](/leaderboard)) remains the proxy. --- @@ -226,9 +226,9 @@ export const agentDrilldownMd = ( ${ctx.bannerHtml} -> Discipline-score **${a.score} / 100** ${arr.glyph} ${deltaStr} over ${ctx.period}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage **${a.phaseCoveragePct}%**. +> Discipline score **${a.score} / 100** ${arr.glyph} ${deltaStr} over ${ctx.period}. ${a.commits.toLocaleString()} commits analysed, phase coverage **${a.phaseCoveragePct}%**. -## trend (30 dagen) +## trend (30 days)
${sparkline(a.trend)} @@ -240,7 +240,7 @@ ${streakBox(a)} ${bars(a.failureMix)} -Top issue dit kwartaal: **${escape(a.topIssueLabel)}** (${a.topIssuePct}% van commits). +Top issue this quarter: **${escape(a.topIssueLabel)}** (${a.topIssuePct}% of commits). ## recent flagged @@ -261,7 +261,7 @@ ${ctx.footerLinks} export const testsOverviewMd = (ctx: TestsOverviewContext): string => { if (ctx.unavailableNote) { - return `# tests overzicht + return `# tests overview ${ctx.bannerHtml} @@ -275,23 +275,23 @@ ${ctx.bannerHtml} const failing = ctx.snapshots.reduce((s, r) => s + r.failing, 0); const snapshots = ctx.snapshots.map(snapshotBlock).join("\n"); const stabRows = ctx.stability.map(stabilityRow).join("\n"); - return `# tests overzicht + return `# tests overview ${ctx.bannerHtml} -> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${ctx.period}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen". +> Snapshot of the current test state per repo + stability of individual tests over ${ctx.period}. A high fail count with zero deletions means the test is actively catching regressions; high fail + deletion is the signal that a test is being squeezed — often the trace of an agent making it easier to "win". -## huidige stand · per repo +## current state · per repo
${snapshots}
-**Totaal**: ${total.toLocaleString()} tests · ${passing.toLocaleString()} passing · ${failing.toLocaleString()} failing. +**Total**: ${total.toLocaleString()} tests · ${passing.toLocaleString()} passing · ${failing.toLocaleString()} failing. -## test-stabiliteit · q1 2026 +## test stability · ${ctx.period} -Top 12 meest-flappende tests dit kwartaal, met aantal pass/fail/deleted-events en de agent die de test het laatst heeft gebroken. +Top tests by failure activity this period, with pass/fail/deleted counts and the agent who last broke the test. @@ -300,7 +300,7 @@ Top 12 meest-flappende tests dit kwartaal, met aantal pass/fail/deleted-events e - + @@ -308,14 +308,14 @@ ${stabRows}
pass fail dellaatst gebroken doorlast broken by
-> ⚠ markeert tests waarbij dit kwartaal een test-deletion of weakening-event is gedetecteerd. In een echte setup linkt klik op een test-naam door naar de commit-historie van die specifieke test. +> ⚠ marks tests where a test-deletion or weakening event has been detected this period. In a real setup, clicking a test name will link through to that test's commit history. -## hoe lees je dit +## how to read this -- **Veel pass, weinig fail, 0 del**: gezond. Test doet wat hij moet, niemand sloopt 'm. -- **Veel fail, 0 del**: test vangt actief regressies. Goed nieuws — discipline werkt. -- **Fail én del > 0**: test wordt onder druk gezet. Coach de agent die 'm gebroken heeft (klik op het tag-icoon). -- **Snapshot rood + stabiliteit hoog**: bekende, langlopende kapotte test. Apart onderwerp, niet per se een agent-probleem. +- **Lots of pass, few fail, 0 del**: healthy. The test does what it should, nobody is sabotaging it. +- **Lots of fail, 0 del**: the test is actively catching regressions. Good news — discipline is working. +- **Fail and del > 0**: the test is under pressure. Coach the agent that broke it (click the tag icon). +- **Snapshot red + stability high**: a known, long-running broken test. Separate concern, not necessarily an agent problem. ---