b91543ab63a658f3d1e8dc7f9b51aa0945f488b3 diff --git a/.gitignore b/.gitignore index a10315de6708e7ea4368a7aa128fe9e9b08de60c..21ebc0e33dccd09efd9ac049039865612c70a43a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ node_modules/ .env.local .bun-cache/ .claude/ +content/git-history/ diff --git a/scripts/p620/deploy-tdd-md.sh b/scripts/p620/deploy-tdd-md.sh index d63803a387c4f332e2726b789a068ee9cd04c80f..b975ee38c9cc1b3b94cda92b57c205bade89812f 100755 --- a/scripts/p620/deploy-tdd-md.sh +++ b/scripts/p620/deploy-tdd-md.sh @@ -38,6 +38,12 @@ ssh "$SSH_HOST" 'command -v podman >/dev/null && command -v systemctl >/dev/null need_restart=0 +echo "→ snapshot git history → content/git-history/" +# Bundles local git log into JSON so the container can render /reports/live +# for the (private) syntaxai/tdd.md repo without a GitHub token. +( cd "$REPO_ROOT" && bun scripts/p620/snapshot-git-history.ts ) \ + || { echo "✗ snapshot-git-history mislukt"; exit 1; } + echo "→ source rsync naar $SSH_HOST:~/$REMOTE_SRC_DIR" ssh "$SSH_HOST" "mkdir -p ~/$REMOTE_SRC_DIR" # --delete zodat verwijderde files ook weggaan op remote. diff --git a/scripts/p620/snapshot-git-history.ts b/scripts/p620/snapshot-git-history.ts new file mode 100644 index 0000000000000000000000000000000000000000..5779e58aea22913ded215351452e5ad156e2952c --- /dev/null +++ b/scripts/p620/snapshot-git-history.ts @@ -0,0 +1,56 @@ +#!/usr/bin/env bun +// Dump local git history into the same shape that the live-reports +// pipeline expects from GitHub's commits API. Runs at deploy-time so +// the container can render /reports/live for a private repo without +// holding a GitHub token. Each deploy refreshes the bundle. +// +// Output: content/git-history/__.json +// Schema: GithubCommit[] (see src/c14_github.ts) — newest first. + +import { spawnSync } from "node:child_process"; +import { mkdirSync, writeFileSync } from "node:fs"; +import { dirname, resolve } from "node:path"; + +const REPO_ROOT = resolve(import.meta.dir, "..", ".."); +const OWNER = "syntaxai"; +const NAME = "tdd.md"; +const MAX = 200; + +// Use ASCII record separators (\x1e between commits, \x1f between +// fields) so commit-message newlines pass through unmangled. +const FMT = ["%H", "%aI", "%an", "%ae", "%B"].join("\x1f") + "\x1e"; + +const res = spawnSync( + "git", + ["log", `--max-count=${MAX}`, `--pretty=format:${FMT}`], + { cwd: REPO_ROOT, encoding: "utf8", maxBuffer: 64 * 1024 * 1024 }, +); +if (res.status !== 0) { + console.error("git log failed:", res.stderr); + process.exit(1); +} + +const records = res.stdout.split("\x1e").map((s) => s.trim()).filter(Boolean); +const commits = records.map((rec) => { + const [sha, date, name, email, ...rest] = rec.split("\x1f"); + const message = (rest.join("\x1f") ?? "").replace(/\n+$/, ""); + return { + sha: sha ?? "", + commit: { + author: { + name: name ?? "", + email: email ?? "", + date: date ?? "", + }, + message, + }, + author: null, + }; +}); + +const outDir = resolve(REPO_ROOT, "content", "git-history"); +const outPath = resolve(outDir, `${OWNER}__${NAME}.json`); +mkdirSync(dirname(outPath), { recursive: true }); +writeFileSync(outPath, JSON.stringify({ owner: OWNER, name: NAME, fetchedAt: Date.now(), commits }, null, 2)); + +console.log(`✓ wrote ${commits.length} commits → ${outPath}`); diff --git a/src/c14_github.ts b/src/c14_github.ts index 330cbe82ae9abbbc78727397d08253021f039939..abb2101955f9dfb0b26bf696f9fe275b31ad1ac5 100644 --- a/src/c14_github.ts +++ b/src/c14_github.ts @@ -120,3 +120,89 @@ export const fetchProjectConfig = async ( } return parseProjectConfig(parsed); }; + +// --------------------------------------------------------------------- +// Public commits API. Used to feed the live reports view from real +// data. Public-repo only; unauthenticated calls are rate-limited to +// 60/hour, so we cache aggressively. Single in-memory cache per +// (owner, repo) with a 5-minute TTL — enough for casual page-loads, +// not so long that pushed commits stay invisible. +// --------------------------------------------------------------------- + +export interface GithubCommit { + sha: string; + commit: { + author: { name: string; email: string; date: string }; + message: string; + }; + author: { login: string } | null; +} + +const COMMITS_TTL_MS = 5 * 60 * 1000; +const commitsCache = new Map(); + +// Deploy-time snapshot: scripts/p620/snapshot-git-history.ts dumps the +// local git log into content/git-history/__.json so the +// container can serve /reports/live for a private repo without a +// GitHub token. Bundle is preferred when present; we fall back to the +// public API for any repo we don't bundle. +const bundlePath = (repoOwner: string, repoName: string): string => + `./content/git-history/${repoOwner}__${repoName}.json`; + +interface GitHistoryBundle { + owner: string; + name: string; + fetchedAt: number; + commits: GithubCommit[]; +} + +const loadBundle = async ( + repoOwner: string, + repoName: string, +): Promise => { + try { + const file = Bun.file(bundlePath(repoOwner, repoName)); + if (!(await file.exists())) return null; + const data = (await file.json()) as GitHistoryBundle; + return Array.isArray(data.commits) ? data.commits : null; + } catch { + return null; + } +}; + +export const fetchRepoCommits = async ( + repoOwner: string, + repoName: string, + perPage = 100, +): Promise => { + const key = `${repoOwner}/${repoName}#${perPage}`; + const cached = commitsCache.get(key); + if (cached && Date.now() - cached.fetchedAt < COMMITS_TTL_MS) { + return cached.commits; + } + + const bundle = await loadBundle(repoOwner, repoName); + if (bundle) { + const sliced = bundle.slice(0, perPage); + commitsCache.set(key, { fetchedAt: Date.now(), commits: sliced }); + return sliced; + } + + const url = `https://api.github.com/repos/${encodeURIComponent(repoOwner)}/${encodeURIComponent(repoName)}/commits?per_page=${perPage}`; + const res = await fetch(url, { + headers: { + Accept: "application/vnd.github+json", + "User-Agent": "tdd.md", + }, + }); + if (!res.ok) { + // Honour the cache on transient failure rather than blanking the page — + // GitHub's 60/hour anonymous rate limit is the most likely cause and + // the cached data is still strictly better than no data. + if (cached) return cached.commits; + throw new Error(`GitHub commits API failed for ${repoOwner}/${repoName}: HTTP ${res.status}`); + } + const commits = (await res.json()) as GithubCommit[]; + commitsCache.set(key, { fetchedAt: Date.now(), commits }); + return commits; +}; diff --git a/src/c21_app.ts b/src/c21_app.ts index 2d6909b00d9bf8f5c31d70610a251c3698e53eb5..e3de9a367db15cfd606d4f7f2ff96c4fb687dae6 100644 --- a/src/c21_app.ts +++ b/src/c21_app.ts @@ -27,7 +27,15 @@ import { fetchProjectConfig } from "./c14_github.ts"; import { listGames, loadGame } from "./c31_games.ts"; import { ALL_POSTS } from "./c31_blog.ts"; import { ALL_GUIDES } from "./c31_guides.ts"; -import { DEMO_REPORTS } from "./c31_reports_demo.ts"; +import { + DEMO_REPORTS, + DEMO_PERIOD, + DEMO_ORG, + DEMO_REPOS, + DEMO_SNAPSHOTS, + DEMO_STABILITY, +} from "./c31_reports_demo.ts"; +import { buildLiveReports } from "./c32_real_reports.ts"; import { parseRepoIdentifier } from "./c31_project_config.ts"; import { judge } from "./c32_judge.ts"; import { @@ -49,6 +57,54 @@ import { startGithubOauth, handleGithubCallback } from "./c21_handlers_auth.ts"; const HOME_MD = "./content/home.md"; const GAME_DIR = "./content/games"; +// --------------------------------------------------------------------- +// Reports-context builders. The c51 builders take a ReportsContext — +// these tiny helpers assemble it for the synthetic /reports/demo and +// the live /reports/live (real data fetched from syntaxai/tdd.md). +// --------------------------------------------------------------------- + +const LIVE_REPO_OWNER = "syntaxai"; +const LIVE_REPO_NAME = "tdd.md"; +const LIVE_FETCH_COUNT = 100; + +const DEMO_BANNER_HTML = `
demo data — design preview with synthetic numbers. Want the real readout? /reports/live renders the same shape from live tdd.md commits. why tdd.md needs this
`; + +const LIVE_BANNER_HTML = `
live data — sourced from ${LIVE_REPO_OWNER}/${LIVE_REPO_NAME} via the public commits API (5-min cache). Agent attribution comes from Co-Authored-By: footers; commits without one are excluded. Phase coverage measures % of commits tagged red:/green:/refactor:.
`; + +const demoContext = () => ({ + reports: DEMO_REPORTS, + period: DEMO_PERIOD, + scopeLabel: `${DEMO_REPOS} repos · ${DEMO_ORG}`, + bannerHtml: DEMO_BANNER_HTML, + narrative: { + changedHeading: "wat veranderde dit kwartaal", + changedBody: + "Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de `api-gateway` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf.", + doingHeading: "wat we doen", + doingBody: + "- **Cursor in `api-gateway`**: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel \"never delete a test in a refactor commit\" gepind ([details →](/reports/demo/agents/cursor)).\n- **Claude Code uitrollen**: het CLAUDE.md-template dat in `billing-service` werkte naar de andere drie repos kopiëren.\n- **Volgende meting**: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt.", + }, + footerLinks: + "[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports)", +}); + +const liveContext = async () => { + const live = await buildLiveReports(LIVE_REPO_OWNER, LIVE_REPO_NAME, LIVE_FETCH_COUNT); + const period = live.earliest && live.latest + ? `${live.earliest.slice(0, 10)} → ${live.latest.slice(0, 10)}` + : "no commits fetched"; + const drillLinks = live.reports + .map((r) => `[${r.name}](/reports/live/agents/${r.slug})`) + .join(" · "); + return { + reports: live.reports, + period, + scopeLabel: `${LIVE_REPO_OWNER}/${LIVE_REPO_NAME} · ${live.totalCommits} commits sampled${live.unknownCount > 0 ? ` (${live.unknownCount} unattributed, excluded)` : ""}`, + bannerHtml: LIVE_BANNER_HTML, + footerLinks: `${drillLinks ? drillLinks + " · " : ""}[tests overzicht](/reports/live/tests) · [demo preview](/reports/demo) · [back to /reports](/reports)`, + }; +}; + const HOME_DESCRIPTION = "Test-driven development for agentic coding. Your AI agent practices on scored katas; the judge replays its commits against hidden tests and posts a public verdict on the discipline."; @@ -417,10 +473,11 @@ ${rows} }, "/reports/demo": async () => { + const ctx = demoContext(); const html = await renderPage({ title: "TDD-discipline rapport · Q1 2026 (demo) — tdd.md", description: "Mockup of the management-level TDD-discipline report — single page, three agents, with trend and narrative.", - bodyMarkdown: execSummaryMd(), + bodyMarkdown: execSummaryMd(ctx), ogPath: "https://tdd.md/reports/demo", noindex: true, }); @@ -431,7 +488,12 @@ ${rows} const html = await renderPage({ title: "Tests overzicht (demo) — tdd.md", description: "Mockup of the per-test overview: current pass/fail snapshot per repo plus test stability over the quarter.", - bodyMarkdown: testsOverviewMd(), + bodyMarkdown: testsOverviewMd({ + period: DEMO_PERIOD, + bannerHtml: DEMO_BANNER_HTML, + snapshots: DEMO_SNAPSHOTS, + stability: DEMO_STABILITY, + }), ogPath: "https://tdd.md/reports/demo/tests", noindex: true, }); @@ -440,7 +502,8 @@ ${rows} "/reports/demo/agents/:slug": async (req) => { const slug = req.params.slug as (typeof DEMO_REPORTS)[number]["slug"]; - const md = agentDrilldownMd(slug); + const ctx = demoContext(); + const md = agentDrilldownMd(slug, ctx); if (!md) { const html = await renderNotFound(`/reports/demo/agents/${slug}`); return htmlResponse(html, 404); @@ -456,6 +519,54 @@ ${rows} return htmlResponse(html); }, + "/reports/live": async () => { + const ctx = await liveContext(); + const html = await renderPage({ + title: "TDD-discipline rapport · live — tdd.md", + description: `Live discipline rapport gebouwd uit de echte commit-historie van syntaxai/tdd.md (laatste ${LIVE_FETCH_COUNT} commits, 5-min cache).`, + bodyMarkdown: execSummaryMd(ctx), + ogPath: "https://tdd.md/reports/live", + noindex: true, + }); + return htmlResponse(html); + }, + + "/reports/live/tests": async () => { + const html = await renderPage({ + title: "Tests overzicht · live (placeholder) — tdd.md", + description: "Placeholder voor de live test-overview — wacht op de sandbox-runner sliver.", + bodyMarkdown: testsOverviewMd({ + period: "live", + bannerHtml: LIVE_BANNER_HTML, + snapshots: [], + stability: [], + unavailableNote: "De per-repo test-snapshot en stabiliteitstabel hebben de sandbox-runner sliver nodig (block 1 vervolg). Tot dat klaar is, alleen de exec-summary + drill-down draaien op echte data; de testpagina staat in de [demo](/reports/demo/tests).", + }), + ogPath: "https://tdd.md/reports/live/tests", + noindex: true, + }); + return htmlResponse(html); + }, + + "/reports/live/agents/:slug": async (req) => { + const ctx = await liveContext(); + const slug = req.params.slug as (typeof DEMO_REPORTS)[number]["slug"]; + const md = agentDrilldownMd(slug, ctx); + if (!md) { + const html = await renderNotFound(`/reports/live/agents/${slug}`); + return htmlResponse(html, 404); + } + const entry = ctx.reports.find((r) => r.slug === slug)!; + const html = await renderPage({ + title: `${entry.name} drill-down · live — tdd.md`, + description: `Live drill-down voor ${entry.name} op syntaxai/tdd.md — trend, failure-mode breakdown, recent commits.`, + bodyMarkdown: md, + ogPath: `https://tdd.md/reports/live/agents/${slug}`, + noindex: true, + }); + return htmlResponse(html); + }, + "/guides": async () => { const rows = ALL_GUIDES .map((g) => `| [${g.title}](/guides/${g.slug}) | ${g.description} |`) diff --git a/src/c32_real_reports.ts b/src/c32_real_reports.ts new file mode 100644 index 0000000000000000000000000000000000000000..c93c4c3f4262082d4390f594a039a4432c68038b --- /dev/null +++ b/src/c32_real_reports.ts @@ -0,0 +1,170 @@ +// c32 — logic: aggregate real GitHub commit history into the same +// AgentReport / RecentFlagged shape that c51_render_reports renders. +// Pure (given fetched commits in, produces report objects out); the +// I/O happens in c14_github.fetchRepoCommits which we call here. +// +// Attribution: Co-Authored-By footers are the agent-attribution channel +// the existing tdd.md commit history already uses. Anything without a +// recognised footer is bucketed as "unknown" and reported separately — +// it's still useful for volume context. + +import { parseCommit } from "./c31_commits.ts"; +import { fetchRepoCommits, type GithubCommit } from "./c14_github.ts"; +import type { + AgentReport, + FailureSlice, + RecentFlagged, +} from "./c31_reports_demo.ts"; + +type LiveAgentSlug = AgentReport["slug"] | "unknown"; + +const detectAgent = (msg: string): LiveAgentSlug => { + if (/Co-Authored-By:.*Claude/i.test(msg)) return "claude-code"; + if (/Co-Authored-By:.*Cursor/i.test(msg)) return "cursor"; + if (/Co-Authored-By:.*Aider/i.test(msg)) return "aider"; + return "unknown"; +}; + +const AGENT_NAMES: Record = { + "claude-code": "Claude Code", + cursor: "Cursor", + aider: "Aider", +}; + +// 30-day daily commit-count series, oldest → newest. When there are no +// commits in a day, that day's value is 0 — the sparkline still renders +// but flat-lines, which honestly reflects the data. +const buildTrend = (commits: GithubCommit[], days = 30): number[] => { + const out = new Array(days).fill(0); + const today = new Date(); + today.setUTCHours(0, 0, 0, 0); + for (const c of commits) { + const d = new Date(c.commit.author.date); + d.setUTCHours(0, 0, 0, 0); + const ageDays = Math.floor((today.getTime() - d.getTime()) / (24 * 60 * 60 * 1000)); + if (ageDays < 0 || ageDays >= days) continue; + const idx = days - 1 - ageDays; + const cur = out[idx] ?? 0; + out[idx] = cur + 1; + } + return out; +}; + +const buildAgentReport = ( + slug: AgentReport["slug"], + agentCommits: GithubCommit[], + repoSlug: string, +): AgentReport => { + const tagged = agentCommits.filter((c) => { + const phase = parseCommit(c.commit.message).phase; + return phase === "red" || phase === "green" || phase === "refactor"; + }); + const phaseCoveragePct = agentCommits.length === 0 + ? 0 + : Math.round((tagged.length / agentCommits.length) * 100); + + // Score is a proxy: phase-coverage is the only structural signal we + // can compute without running the test suite. When coverage is 0 the + // agent isn't attempting TDD, so the score is honestly low. + const score = phaseCoveragePct; + + // Failure mix collapses to two slices for live data — phase-tagged vs + // not. Fine-grained failure modes (red-did-not-fail, test-deleted, etc) + // need the runner sliver before they're computable. + const failureMix: FailureSlice[] = [ + { label: "phase-tagged", pct: phaseCoveragePct, tone: "green" }, + { label: "no phase tag", pct: 100 - phaseCoveragePct, tone: "muted" }, + ]; + + const recent: RecentFlagged[] = agentCommits + .slice(0, 5) + .map((c) => { + const parsed = parseCommit(c.commit.message); + const phase = parsed.phase === "red" || parsed.phase === "green" || parsed.phase === "refactor" + ? parsed.phase + : "green"; + const failure = parsed.phase === "untagged" || parsed.phase === "init" + ? "no phase tag" + : `${parsed.phase} (live judge not yet wired)`; + return { + date: c.commit.author.date.slice(0, 10), + repo: repoSlug, + sha: c.sha.slice(0, 7), + phase, + failure, + pts: 0, + }; + }); + + const topIssueLabel = phaseCoveragePct === 100 ? "no current issues" : "no phase tag"; + const topIssuePct = 100 - phaseCoveragePct; + + return { + slug, + name: AGENT_NAMES[slug], + score, + delta: 0, + commits: agentCommits.length, + phaseCoveragePct, + streak: 0, + streakBroken: false, + topIssueLabel, + topIssuePct, + failureMix, + trend: buildTrend(agentCommits), + recent, + }; +}; + +export interface LiveReports { + reports: AgentReport[]; + unknownCount: number; + totalCommits: number; + earliest: string | null; + latest: string | null; + fetchedAt: number; +} + +export const buildLiveReports = async ( + repoOwner: string, + repoName: string, + perPage = 100, +): Promise => { + const commits = await fetchRepoCommits(repoOwner, repoName, perPage); + const repoSlug = `${repoOwner}/${repoName}`; + const byAgent = new Map(); + let unknownCount = 0; + + for (const c of commits) { + const a = detectAgent(c.commit.message); + if (a === "unknown") { + unknownCount++; + continue; + } + const arr = byAgent.get(a) ?? []; + arr.push(c); + byAgent.set(a, arr); + } + + const order: AgentReport["slug"][] = ["claude-code", "cursor", "aider"]; + const reports = order + .map((slug) => { + const list = byAgent.get(slug); + if (!list || list.length === 0) return null; + return buildAgentReport(slug, list, repoSlug); + }) + .filter((r): r is AgentReport => r !== null); + + const dates = commits.map((c) => c.commit.author.date).sort(); + const earliest = dates[0] ?? null; + const latest = dates[dates.length - 1] ?? null; + + return { + reports, + unknownCount, + totalCommits: commits.length, + earliest, + latest, + fetchedAt: Date.now(), + }; +}; diff --git a/src/c51_render_reports.ts b/src/c51_render_reports.ts index b34901f6d307c8544790e3aa228d6b2275e03988..126d8b61b14d419aaff7f3484ffa1bba765ffb1d 100644 --- a/src/c51_render_reports.ts +++ b/src/c51_render_reports.ts @@ -1,14 +1,11 @@ // c51 (reports) — body builders for /reports, /reports/demo, -// /reports/demo/agents/:slug, /reports/demo/tests. All synthetic data -// comes from c31_reports_demo; chrome helpers come from c51_render_layout. +// /reports/live, /reports/demo/agents/:slug, /reports/demo/tests. The +// builders take the dataset as an explicit ReportsContext so the same +// markdown templates serve both the synthetic demo (DEMO_* from +// c31_reports_demo) and the live tdd.md aggregation (c32_real_reports). import { - DEMO_PERIOD, - DEMO_ORG, - DEMO_REPOS, DEMO_REPORTS, - DEMO_SNAPSHOTS, - DEMO_STABILITY, type AgentReport, type FailureSlice, type TestSnapshot, @@ -16,6 +13,33 @@ import { } from "./c31_reports_demo.ts"; import { escape } from "./c51_render_layout.ts"; +export interface ReportsContext { + reports: AgentReport[]; + period: string; + scopeLabel: string; + bannerHtml: string; + // Optional narrative — present for the curated demo, omitted for live + // where the data has to speak for itself. + narrative?: { + changedHeading: string; + changedBody: string; + doingHeading: string; + doingBody: string; + }; + // Trailing footer line (links). Defaults reasonable for both demo + live. + footerLinks: string; +} + +export interface TestsOverviewContext { + period: string; + bannerHtml: string; + snapshots: TestSnapshot[]; + stability: TestStability[]; + // When the runner sliver isn't wired (live mode, today), pass a + // placeholder note instead of the snapshot+stability sections. + unavailableNote?: string; +} + const trendArrow = (delta: number): { glyph: string; cls: string } => delta > 0 ? { glyph: "↑", cls: "up" } : delta < 0 ? { glyph: "↓", cls: "down" } : { glyph: "→", cls: "flat" }; @@ -70,8 +94,6 @@ const streakBox = (a: AgentReport): string => { return `${a.streak} ${label}`; }; -const mockBanner = `
demo data — real reporting wires up when the project-tracking pipeline ships. why tdd.md needs this · about reporting
`; - const snapshotBlock = (s: TestSnapshot): string => { const failuresHtml = s.failures.length === 0 ? `
  • all ${s.passing} tests groen
  • ` @@ -113,13 +135,16 @@ export const reportsLandingMd = (): string => `# reports > Per-agent TDD-discipline reporting over real project repos. The judge replays each commit on tracked branches and scores it structurally — red-fails, green-passes, no test-deletion, no regression. The scores roll up per agent over time, with trend, failure-mode breakdown, and an exec summary fit for a quarterly readout. -This is a design preview. The pipeline that ingests real repos isn't wired yet; what you can navigate today is a mockup with synthetic data: +Two views of the same shape: + +- **[/reports/live](/reports/live)** — built from real commit data on \`syntaxai/tdd.md\` (the repo this site runs on), refreshed every 5 minutes from the GitHub commits API. Agent attribution comes from \`Co-Authored-By:\` footers. Phase-coverage is the only metric we can compute without running tests, so the score is a proxy for now. +- **[/reports/demo](/reports/demo)** — the polished design preview with synthetic data for three agents and four repos. Useful for screenshots and showing the full failure-mode breakdown the live view can't compute yet. -- [exec summary mockup →](/reports/demo) — single page, 1 quarter, 3 agents -- [per-agent drill-down →](/reports/demo/agents/cursor) — trend, failure mix, recent flagged commits -- [tests overzicht →](/reports/demo/tests) — huidige stand per repo + test-stabiliteit per test-naam +Drill-downs: +- [live drill-down per agent](/reports/live/agents/claude-code) · [tests overzicht (live: placeholder)](/reports/live/tests) +- [demo drill-down per agent](/reports/demo/agents/cursor) · [tests overzicht (demo)](/reports/demo/tests) -Want a real repo on this layer? [Register a project →](/projects) — drops \`.tdd-md.json\` at the repo root, onboards in seconds. Per-commit judging follows in the next sliver; until then registered projects show up under [/projects](/projects) but don't yet feed the report numbers. +Want a real repo on this layer? [Register a project →](/projects) — drops \`.tdd-md.json\` at the repo root, onboards in seconds. Per-commit judging on tracked branches lands in a follow-up sliver; live reporting from the GitHub API already works for the dogfood case (the tdd.md repo itself). ## what gets measured @@ -145,55 +170,63 @@ For team-leads: [← back to tdd.md](/) · [the blog](/blog) · [the katas](/games) `; -export const execSummaryMd = (): string => { - const totalCommits = DEMO_REPORTS.reduce((s, a) => s + a.commits, 0); - const tiles = DEMO_REPORTS.map(tile).join("\n"); - return `# tdd-discipline rapport · q1 2026 +export const execSummaryMd = (ctx: ReportsContext): string => { + const totalCommits = ctx.reports.reduce((s, a) => s + a.commits, 0); + const tiles = ctx.reports.length === 0 + ? `
    No agent-attributed commits in this dataset.
    ` + : ctx.reports.map(tile).join("\n"); + const narrativeBlock = ctx.narrative + ? `## ${ctx.narrative.changedHeading} -${mockBanner} +${ctx.narrative.changedBody} -> **Periode** ${DEMO_PERIOD} · **Scope** ${DEMO_REPOS} repos · ${totalCommits.toLocaleString()} AI-toegeschreven commits in ${escape(DEMO_ORG)}. +## ${ctx.narrative.doingHeading} -
    -${tiles} -
    +${ctx.narrative.doingBody} -## wat veranderde dit kwartaal +` + : ""; + return `# tdd-discipline rapport · ${ctx.period} -Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de \`api-gateway\` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf. +${ctx.bannerHtml} -## wat we doen +> **Periode** ${ctx.period} · **Scope** ${escape(ctx.scopeLabel)} · ${totalCommits.toLocaleString()} AI-toegeschreven commits. -- **Cursor in \`api-gateway\`**: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel "never delete a test in a refactor commit" gepind ([details →](/reports/demo/agents/cursor)). -- **Claude Code uitrollen**: het CLAUDE.md-template dat in \`billing-service\` werkte naar de andere drie repos kopiëren. -- **Volgende meting**: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt. +
    +${tiles} +
    -## wat dit getal *niet* meet +${narrativeBlock}## wat dit getal *niet* meet Discipline, niet code-kwaliteit. Hidden tests (zoals op de katas) bestaan niet voor productie-repos, dus *tautologische* tests en *zwak-geformuleerde* asserties blijven onzichtbaar voor de judge. Dit cijfer zegt: "de agent volgt de TDD-cyclus eerlijk". Het zegt niets over of de tests die hij schrijft het juiste beweren. Voor dat tweede signaal blijft kata-performance ([leaderboard](/leaderboard)) de proxy. --- -[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports) +${ctx.footerLinks} `; }; -export const agentDrilldownMd = (slug: AgentReport["slug"]): string | null => { - const a = DEMO_REPORTS.find((r) => r.slug === slug); +export const agentDrilldownMd = ( + slug: AgentReport["slug"], + ctx: ReportsContext, +): string | null => { + const a = ctx.reports.find((r) => r.slug === slug); if (!a) return null; const arr = trendArrow(a.delta); const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`; - const recentRows = a.recent - .map( - (r) => - `| ${r.date} | \`${r.repo}\` | \`${r.sha}\` | ${r.phase} | ${r.failure} | ${r.pts} |`, - ) - .join("\n"); + const recentRows = a.recent.length === 0 + ? `| _no recent attributed activity_ | | | | | |` + : a.recent + .map( + (r) => + `| ${r.date} | \`${r.repo}\` | \`${r.sha}\` | ${r.phase} | ${r.failure} | ${r.pts} |`, + ) + .join("\n"); return `# ${a.name} · drill-down -${mockBanner} +${ctx.bannerHtml} -> Discipline-score **${a.score} / 100** ${arr.glyph} ${deltaStr} over ${DEMO_PERIOD}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage **${a.phaseCoveragePct}%**. +> Discipline-score **${a.score} / 100** ${arr.glyph} ${deltaStr} over ${ctx.period}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage **${a.phaseCoveragePct}%**. ## trend (30 dagen) @@ -222,21 +255,31 @@ ${recentRows} --- -[← exec summary](/reports/demo) · [back to /reports](/reports) +${ctx.footerLinks} `; }; -export const testsOverviewMd = (): string => { - const total = DEMO_SNAPSHOTS.reduce((s, r) => s + r.total, 0); - const passing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.passing, 0); - const failing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.failing, 0); - const snapshots = DEMO_SNAPSHOTS.map(snapshotBlock).join("\n"); - const stabRows = DEMO_STABILITY.map(stabilityRow).join("\n"); +export const testsOverviewMd = (ctx: TestsOverviewContext): string => { + if (ctx.unavailableNote) { + return `# tests overzicht + +${ctx.bannerHtml} + +> ${ctx.unavailableNote} + +[← exec summary](/reports) · [back to /reports](/reports) +`; + } + const total = ctx.snapshots.reduce((s, r) => s + r.total, 0); + const passing = ctx.snapshots.reduce((s, r) => s + r.passing, 0); + const failing = ctx.snapshots.reduce((s, r) => s + r.failing, 0); + const snapshots = ctx.snapshots.map(snapshotBlock).join("\n"); + const stabRows = ctx.stability.map(stabilityRow).join("\n"); return `# tests overzicht -${mockBanner} +${ctx.bannerHtml} -> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${DEMO_PERIOD}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen". +> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${ctx.period}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen". ## huidige stand · per repo