604776052d9e96ab4b6876fa462c9715a4f30a76
diff --git a/public/style.css b/public/style.css
index 63119bd3212855595862f2c2ff7cc375f3029770..c5220ab9fee147104a0c0b58813d5367c7320baa 100644
--- a/public/style.css
+++ b/public/style.css
@@ -183,3 +183,158 @@ main.md strong { font-weight: 600; }
   background: var(--accent);
   color: var(--bg);
 }
+
+/* --- reports / dashboard ---------------------------------------------- */
+
+.report-mockup-banner {
+  background: var(--code-bg);
+  border: 1px dashed var(--border);
+  padding: 0.7rem 1rem;
+  border-radius: 4px;
+  font-size: 0.85rem;
+  color: var(--muted);
+  margin: 0 0 2rem;
+  font-family: ui-monospace, "SF Mono", "JetBrains Mono", "Fira Code", Menlo, Consolas, monospace;
+}
+.report-mockup-banner a {
+  color: var(--muted);
+  text-decoration: underline;
+  text-underline-offset: 2px;
+}
+.report-mockup-banner a:hover { color: var(--fg); }
+
+.report-tiles {
+  display: grid;
+  grid-template-columns: repeat(3, 1fr);
+  gap: 1rem;
+  margin: 1.5rem 0 2.5rem;
+}
+
+.report-tile {
+  border: 1px solid var(--border);
+  border-radius: 6px;
+  padding: 1.2rem 1.2rem 1rem;
+  background: var(--code-bg);
+}
+
+.report-tile-name {
+  font-family: ui-monospace, "SF Mono", "JetBrains Mono", "Fira Code", Menlo, Consolas, monospace;
+  font-size: 0.8rem;
+  text-transform: lowercase;
+  letter-spacing: 0.04em;
+  color: var(--muted);
+  margin: 0 0 0.6rem;
+}
+.report-tile-name a {
+  color: inherit;
+  text-decoration: none;
+}
+.report-tile-name a:hover { color: var(--fg); }
+
+.report-tile-score {
+  font-family: ui-monospace, "SF Mono", "JetBrains Mono", "Fira Code", Menlo, Consolas, monospace;
+  font-size: 2.2rem;
+  font-weight: 600;
+  letter-spacing: -0.02em;
+  margin: 0;
+  line-height: 1.1;
+}
+.report-tile-score-suffix {
+  font-size: 0.95rem;
+  color: var(--muted);
+  font-weight: 400;
+}
+
+.report-tile-trend {
+  font-family: ui-monospace, "SF Mono", "JetBrains Mono", "Fira Code", Menlo, Consolas, monospace;
+  font-size: 0.9rem;
+  margin: 0.4rem 0 0.6rem;
+}
+.report-tile-trend.up { color: var(--green); }
+.report-tile-trend.down { color: var(--red); }
+.report-tile-trend.flat { color: var(--muted); }
+
+.report-tile-volume {
+  font-family: ui-monospace, "SF Mono", "JetBrains Mono", "Fira Code", Menlo, Consolas, monospace;
+  font-size: 0.78rem;
+  color: var(--muted);
+  margin: 0 0 0.8rem;
+}
+
+.report-tile-issue {
+  font-size: 0.82rem;
+  color: var(--muted);
+  border-top: 1px solid var(--border);
+  padding-top: 0.7rem;
+}
+.report-tile-issue strong {
+  color: var(--fg);
+  font-weight: 500;
+}
+
+.report-bars {
+  margin: 1rem 0 2rem;
+}
+.report-bar-row {
+  display: grid;
+  grid-template-columns: 180px 1fr 50px;
+  align-items: center;
+  gap: 0.8rem;
+  margin: 0.5rem 0;
+  font-family: ui-monospace, "SF Mono", "JetBrains Mono", "Fira Code", Menlo, Consolas, monospace;
+  font-size: 0.85rem;
+}
+.report-bar-label { color: var(--muted); }
+.report-bar-track {
+  height: 10px;
+  background: var(--code-bg);
+  border: 1px solid var(--border);
+  border-radius: 2px;
+  overflow: hidden;
+}
+.report-bar-fill {
+  display: block;
+  height: 100%;
+  background: var(--accent);
+}
+.report-bar-fill.red { background: var(--red); }
+.report-bar-fill.green { background: var(--green); }
+.report-bar-fill.muted { background: var(--muted); }
+.report-bar-pct { text-align: right; color: var(--fg); }
+
+.report-streak {
+  display: inline-block;
+  padding: 0.4rem 0.8rem;
+  border: 1px solid var(--border);
+  border-radius: 4px;
+  font-family: ui-monospace, "SF Mono", "JetBrains Mono", "Fira Code", Menlo, Consolas, monospace;
+  font-size: 0.85rem;
+  color: var(--muted);
+  margin: 0 0 1.5rem;
+}
+.report-streak-num {
+  font-weight: 600;
+  color: var(--fg);
+}
+.report-streak.broken {
+  color: var(--red);
+  border-color: var(--red);
+}
+.report-streak.broken .report-streak-num { color: var(--red); }
+.report-streak.long {
+  color: var(--green);
+  border-color: var(--green);
+}
+.report-streak.long .report-streak-num { color: var(--green); }
+
+.report-sparkline {
+  width: 100%;
+  height: 80px;
+  display: block;
+  margin: 0.5rem 0 1.2rem;
+}
+
+@media (max-width: 600px) {
+  .report-tiles { grid-template-columns: 1fr; }
+  .report-bar-row { grid-template-columns: 130px 1fr 50px; }
+}
diff --git a/src/reports.ts b/src/reports.ts
new file mode 100644
index 0000000000000000000000000000000000000000..4c11445c14b0bf3e154074e040adac53f82412b1
--- /dev/null
+++ b/src/reports.ts
@@ -0,0 +1,298 @@
+// Mockup reporting layer for tdd.md.
+//
+// All data here is FAKE — wired up only so the management/exec view and
+// per-agent drill-down can be designed in the browser before the real
+// project-tracking pipeline (block 1) exists.
+//
+// Real reporting needs:
+//   - GitHub App / webhook ingest of pushes on tracked branches
+//   - per-commit judging without hidden tests (red-fails / green-passes /
+//     no-test-deletion / no-regression)
+//   - agent attribution (commit footer convention or wrapper-driven)
+// Once that exists, the same generators in this file accept real data.
+
+interface RecentFlagged {
+  date: string;
+  repo: string;
+  sha: string;
+  phase: "red" | "green" | "refactor";
+  failure: string;
+  pts: number;
+}
+
+interface FailureSlice {
+  label: string;
+  pct: number;
+  tone: "red" | "green" | "muted" | "accent";
+}
+
+export interface AgentReport {
+  slug: "claude-code" | "cursor" | "aider";
+  name: string;
+  score: number;
+  delta: number;
+  commits: number;
+  phaseCoveragePct: number;
+  streak: number;
+  streakBroken: boolean;
+  topIssueLabel: string;
+  topIssuePct: number;
+  failureMix: FailureSlice[];
+  trend: number[];
+  recent: RecentFlagged[];
+}
+
+export const DEMO_PERIOD = "2026-01-01 → 2026-03-31";
+export const DEMO_ORG = "acme-corp";
+export const DEMO_REPOS = 4;
+
+export const DEMO_REPORTS: AgentReport[] = [
+  {
+    slug: "claude-code",
+    name: "Claude Code",
+    score: 78,
+    delta: +6,
+    commits: 612,
+    phaseCoveragePct: 92,
+    streak: 47,
+    streakBroken: false,
+    topIssueLabel: "red-did-not-fail",
+    topIssuePct: 8,
+    failureMix: [
+      { label: "clean cycles", pct: 84, tone: "green" },
+      { label: "red-did-not-fail", pct: 8, tone: "red" },
+      { label: "broken refactor", pct: 4, tone: "red" },
+      { label: "test-deleted", pct: 2, tone: "red" },
+      { label: "no phase tag", pct: 2, tone: "muted" },
+    ],
+    trend: [72, 73, 71, 74, 72, 75, 73, 75, 77, 76, 75, 76, 78, 77, 79, 78, 77, 79, 80, 78, 79, 80, 79, 81, 80, 82, 81, 80, 79, 78],
+    recent: [
+      { date: "2026-03-29", repo: "api-gateway", sha: "f1c8b3a", phase: "red", failure: "red-did-not-fail", pts: -5 },
+      { date: "2026-03-24", repo: "billing-service", sha: "9d2e1f4", phase: "refactor", failure: "broken refactor", pts: -5 },
+      { date: "2026-03-18", repo: "data-pipeline", sha: "62a9cb7", phase: "green", failure: "no phase tag (parent)", pts: 0 },
+    ],
+  },
+  {
+    slug: "cursor",
+    name: "Cursor",
+    score: 54,
+    delta: -15,
+    commits: 489,
+    phaseCoveragePct: 71,
+    streak: 3,
+    streakBroken: true,
+    topIssueLabel: "test-deleted in refactor",
+    topIssuePct: 14,
+    failureMix: [
+      { label: "clean cycles", pct: 64, tone: "green" },
+      { label: "test-deleted", pct: 14, tone: "red" },
+      { label: "red-did-not-fail", pct: 9, tone: "red" },
+      { label: "broken refactor", pct: 7, tone: "red" },
+      { label: "no phase tag", pct: 6, tone: "muted" },
+    ],
+    trend: [69, 70, 71, 72, 70, 71, 72, 73, 72, 71, 72, 70, 68, 65, 60, 55, 50, 52, 54, 53, 56, 54, 52, 55, 53, 54, 56, 55, 54, 54],
+    recent: [
+      { date: "2026-03-28", repo: "api-gateway", sha: "a1b2c3d", phase: "refactor", failure: "test-deleted", pts: -20 },
+      { date: "2026-03-26", repo: "api-gateway", sha: "4e5f6a7", phase: "green", failure: "broken refactor", pts: -5 },
+      { date: "2026-03-23", repo: "billing-service", sha: "8b9c0d1", phase: "red", failure: "red-did-not-fail", pts: -5 },
+      { date: "2026-03-21", repo: "api-gateway", sha: "2e3f4a5", phase: "refactor", failure: "test-deleted", pts: -20 },
+      { date: "2026-03-19", repo: "data-pipeline", sha: "6b7c8d9", phase: "refactor", failure: "broken refactor", pts: -5 },
+    ],
+  },
+  {
+    slug: "aider",
+    name: "Aider",
+    score: 89,
+    delta: +2,
+    commits: 146,
+    phaseCoveragePct: 96,
+    streak: 89,
+    streakBroken: false,
+    topIssueLabel: "broken refactor",
+    topIssuePct: 3,
+    failureMix: [
+      { label: "clean cycles", pct: 94, tone: "green" },
+      { label: "broken refactor", pct: 3, tone: "red" },
+      { label: "red-did-not-fail", pct: 2, tone: "red" },
+      { label: "no phase tag", pct: 1, tone: "muted" },
+    ],
+    trend: [87, 88, 89, 88, 87, 89, 90, 89, 88, 89, 90, 88, 89, 90, 91, 89, 88, 89, 90, 89, 90, 91, 89, 88, 89, 90, 89, 90, 89, 89],
+    recent: [
+      { date: "2026-03-27", repo: "data-pipeline", sha: "3a4b5c6", phase: "refactor", failure: "broken refactor", pts: -5 },
+      { date: "2026-03-15", repo: "billing-service", sha: "7d8e9f0", phase: "red", failure: "red-did-not-fail", pts: -5 },
+    ],
+  },
+];
+
+const escape = (s: string): string =>
+  s.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
+
+const trendArrow = (delta: number): { glyph: string; cls: string } =>
+  delta > 0 ? { glyph: "↑", cls: "up" } : delta < 0 ? { glyph: "↓", cls: "down" } : { glyph: "→", cls: "flat" };
+
+const sparkline = (values: number[], height = 60, width = 320): string => {
+  if (values.length === 0) return "";
+  const min = Math.min(...values);
+  const max = Math.max(...values);
+  const range = Math.max(1, max - min);
+  const stepX = width / Math.max(1, values.length - 1);
+  const pad = 6;
+  const innerH = height - pad * 2;
+  const points = values
+    .map((v, i) => {
+      const x = (i * stepX).toFixed(1);
+      const y = (pad + innerH - ((v - min) / range) * innerH).toFixed(1);
+      return `${x},${y}`;
+    })
+    .join(" ");
+  return `<svg class="report-sparkline" viewBox="0 0 ${width} ${height}" preserveAspectRatio="none" aria-hidden="true">
+  <polyline fill="none" stroke="currentColor" stroke-width="1.5" points="${points}" />
+</svg>`;
+};
+
+const tile = (a: AgentReport): string => {
+  const arr = trendArrow(a.delta);
+  const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`;
+  return `<div class="report-tile">
+  <p class="report-tile-name"><a href="/reports/demo/agents/${a.slug}">${escape(a.name)}</a></p>
+  <p class="report-tile-score">${a.score}<span class="report-tile-score-suffix"> / 100</span></p>
+  <p class="report-tile-trend ${arr.cls}">${arr.glyph} ${escape(deltaStr)}</p>
+  <p class="report-tile-volume">${a.commits.toLocaleString()} commits</p>
+  <div class="report-tile-issue">top issue: <strong>${escape(a.topIssueLabel)}</strong> (${a.topIssuePct}%)</div>
+</div>`;
+};
+
+const bars = (mix: FailureSlice[]): string => {
+  const rows = mix
+    .map(
+      (s) => `<div class="report-bar-row">
+  <span class="report-bar-label">${escape(s.label)}</span>
+  <span class="report-bar-track"><span class="report-bar-fill ${s.tone}" style="width: ${s.pct}%"></span></span>
+  <span class="report-bar-pct">${s.pct}%</span>
+</div>`,
+    )
+    .join("\n");
+  return `<div class="report-bars">${rows}</div>`;
+};
+
+const streakBox = (a: AgentReport): string => {
+  const cls = a.streakBroken ? "broken" : a.streak >= 30 ? "long" : "";
+  const label = a.streakBroken ? "recent break" : "consecutive clean cycles";
+  return `<span class="report-streak ${cls}"><span class="report-streak-num">${a.streak}</span> ${label}</span>`;
+};
+
+const mockBanner = `<div class="report-mockup-banner">demo data — real reporting wires up when the project-tracking pipeline ships. <a href="/blog/tweag-handbook-tdd">why tdd.md needs this</a> · <a href="/reports">about reporting</a></div>`;
+
+export const reportsLandingMd = (): string => `# reports
+
+> Per-agent TDD-discipline reporting over real project repos. The judge replays each commit on tracked branches and scores it structurally — red-fails, green-passes, no test-deletion, no regression. The scores roll up per agent over time, with trend, failure-mode breakdown, and an exec summary fit for a quarterly readout.
+
+This is a design preview. The pipeline that ingests real repos isn't wired yet; what you can navigate today is a mockup with synthetic data:
+
+- [exec summary mockup →](/reports/demo) — single page, 1 quarter, 3 agents
+- [per-agent drill-down →](/reports/demo/agents/cursor) — trend, failure mix, recent flagged commits
+
+## what gets measured
+
+This layer measures **discipline**, not code-quality. Without hidden tests (those only exist on katas), tdd.md can't catch tautologies or weakened assertions on real repos. It *can* catch:
+
+| failure mode | what triggers it | what it costs |
+|---|---|---|
+| \`red-did-not-fail\` | commit tagged \`red:\` but tests pass | -5 / commit |
+| \`test-deleted\` | test count drops between commits | -20 / commit |
+| \`broken refactor\` | tests fail at a \`refactor:\` commit | -5 / commit |
+| \`no phase tag\` | tracked-branch commit missing \`red\\|green\\|refactor:\` | counts against phase-coverage % |
+
+The metric pair that anchors the report is **discipline-score** (0-100) + **phase-coverage %**. An agent with 0% phase-coverage doesn't *do* TDD — its score is N/A, not 0. Don't let a low-volume non-attempt look like a high-volume slip.
+
+## reading the data
+
+For management:
+- the [exec summary](/reports/demo) gives one number per agent + a narrative paragraph. Prints to one page.
+
+For team-leads:
+- the [drill-down](/reports/demo/agents/cursor) shows trend, failure-mix, streak, and the most recent flagged commits with one-click coaching links to the [Claude Code](/blog/claude-code-tdd) / [Cursor](/blog/cursor-tdd) / [Aider](/blog/aider-tdd) posts.
+
+[← back to tdd.md](/) · [the blog](/blog) · [the katas](/games)
+`;
+
+export const execSummaryMd = (): string => {
+  const totalCommits = DEMO_REPORTS.reduce((s, a) => s + a.commits, 0);
+  const tiles = DEMO_REPORTS.map(tile).join("\n");
+  return `# tdd-discipline rapport · q1 2026
+
+${mockBanner}
+
+> **Periode** ${DEMO_PERIOD} · **Scope** ${DEMO_REPOS} repos · ${totalCommits.toLocaleString()} AI-toegeschreven commits in ${escape(DEMO_ORG)}.
+
+<div class="report-tiles">
+${tiles}
+</div>
+
+## wat veranderde dit kwartaal
+
+Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de \`api-gateway\` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf.
+
+## wat we doen
+
+- **Cursor in \`api-gateway\`**: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel "never delete a test in a refactor commit" gepind ([details →](/reports/demo/agents/cursor)).
+- **Claude Code uitrollen**: het CLAUDE.md-template dat in \`billing-service\` werkte naar de andere drie repos kopiëren.
+- **Volgende meting**: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt.
+
+## wat dit getal *niet* meet
+
+Discipline, niet code-kwaliteit. Hidden tests (zoals op de katas) bestaan niet voor productie-repos, dus *tautologische* tests en *zwak-geformuleerde* asserties blijven onzichtbaar voor de judge. Dit cijfer zegt: "de agent volgt de TDD-cyclus eerlijk". Het zegt niets over of de tests die hij schrijft het juiste beweren. Voor dat tweede signaal blijft kata-performance ([leaderboard](/leaderboard)) de proxy.
+
+---
+
+[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [back to /reports](/reports)
+`;
+};
+
+export const agentDrilldownMd = (slug: AgentReport["slug"]): string | null => {
+  const a = DEMO_REPORTS.find((r) => r.slug === slug);
+  if (!a) return null;
+  const arr = trendArrow(a.delta);
+  const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`;
+  const recentRows = a.recent
+    .map(
+      (r) =>
+        `| ${r.date} | \`${r.repo}\` | \`${r.sha}\` | ${r.phase} | ${r.failure} | ${r.pts} |`,
+    )
+    .join("\n");
+  return `# ${a.name} · drill-down
+
+${mockBanner}
+
+> Discipline-score **${a.score} / 100** <span class="report-tile-trend ${arr.cls}">${arr.glyph} ${deltaStr}</span> over ${DEMO_PERIOD}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage **${a.phaseCoveragePct}%**.
+
+## trend (30 dagen)
+
+<div class="${arr.cls === "down" ? "red" : arr.cls === "up" ? "green" : "muted"}">
+${sparkline(a.trend)}
+</div>
+
+${streakBox(a)}
+
+## failure-mode breakdown
+
+${bars(a.failureMix)}
+
+Top issue dit kwartaal: **${escape(a.topIssueLabel)}** (${a.topIssuePct}% van commits).
+
+## recent flagged
+
+| date | repo | sha | phase | failure | pts |
+|---|---|---|---|---|---|
+${recentRows}
+
+## coaching
+
+- ${a.slug === "claude-code" ? `[Claude Code does not do TDD by default](/blog/claude-code-tdd) — CLAUDE.md rules + fresh-context boundaries that prevent \`red-did-not-fail\`.` : a.slug === "cursor" ? `[Cursor knows how to do TDD; users skip the parts that matter](/blog/cursor-tdd) — Plan Mode, fresh chats, \`.cursor/rules\` to stop test-deletion.` : `[Aider is the closest agent to TDD on rails — until \`--auto-test\`](/blog/aider-tdd) — keep auto-test off for green commits, on for refactor.`}
+- [Tweag's TDD handbook needs a judge](/blog/tweag-handbook-tdd) — why local green isn't enough.
+
+---
+
+[← exec summary](/reports/demo) · [back to /reports](/reports)
+`;
+};
diff --git a/src/server.ts b/src/server.ts
index b419e499297c446a7f195c236c877618adb3d930..01703187c69b01d6cc65a9de2b1777c4851d28ff 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -5,6 +5,12 @@ import { parseCommit, computeProgress, type Phase } from "./commits";
 import { loadGame, listGames } from "./games";
 import { judge } from "./judge";
 import { latestRun, allLatestRuns } from "./db";
+import {
+  reportsLandingMd,
+  execSummaryMd,
+  agentDrilldownMd,
+  DEMO_REPORTS,
+} from "./reports";
 
 const HOME_MD = "./content/home.md";
 const GAME_DIR = "./content/games";
@@ -782,6 +788,46 @@ ${rows}
       return htmlResponse(html);
     },
 
+    "/reports": async () => {
+      const html = await renderPage({
+        title: "Reports — tdd.md",
+        description: "Per-agent TDD-discipline reporting over real project repos: trend, failure-mode breakdown, and an exec summary fit for a quarterly readout.",
+        bodyMarkdown: reportsLandingMd(),
+        ogPath: "https://tdd.md/reports",
+        noindex: true,
+      });
+      return htmlResponse(html);
+    },
+
+    "/reports/demo": async () => {
+      const html = await renderPage({
+        title: "TDD-discipline rapport · Q1 2026 (demo) — tdd.md",
+        description: "Mockup of the management-level TDD-discipline report — single page, three agents, with trend and narrative.",
+        bodyMarkdown: execSummaryMd(),
+        ogPath: "https://tdd.md/reports/demo",
+        noindex: true,
+      });
+      return htmlResponse(html);
+    },
+
+    "/reports/demo/agents/:slug": async (req) => {
+      const slug = req.params.slug as (typeof DEMO_REPORTS)[number]["slug"];
+      const md = agentDrilldownMd(slug);
+      if (!md) {
+        const html = await renderNotFound(`/reports/demo/agents/${slug}`);
+        return htmlResponse(html, 404);
+      }
+      const entry = DEMO_REPORTS.find((r) => r.slug === slug)!;
+      const html = await renderPage({
+        title: `${entry.name} drill-down (demo) — tdd.md`,
+        description: `Per-agent drill-down mockup for ${entry.name}: trend, failure-mode breakdown, recent flagged commits with coaching links.`,
+        bodyMarkdown: md,
+        ogPath: `https://tdd.md/reports/demo/agents/${slug}`,
+        noindex: true,
+      });
+      return htmlResponse(html);
+    },
+
     "/guides": async () => {
       const rows = ALL_GUIDES
         .map((g) => `| [${g.title}](/guides/${g.slug}) | ${g.description} |`)