Reports: live test snapshot via deploy-time bun-test bundle
Replaces the placeholder on /reports/live/tests with a real per-test
view sourced from `bun test --reporter=junit`, run at deploy time and
appended to a per-repo bundle.
scripts/p620/snapshot-tests.ts
Runs `bun test --reporter=junit`, parses the XML, appends the
result to content/git-history/<owner>__<name>__tests.json. Each
deploy adds at most one run (skipped when HEAD is already in the
bundle). Capped at 50 runs so the file stays bounded.
src/c14_github.ts
loadTestBundle: reads the bundle from disk; same shape and home
as the git-history snapshot.
src/c32_real_tests.ts
Aggregates the bundle into TestSnapshot[] (latest run, with
"since" computed from the oldest run that flagged a test as
failing) and TestStability[] (pass/fail counts per (file, name)
across all runs, with a "deleted" flag when a test that previously
appeared is missing from the latest run). lastBrokenBy is mapped
via Co-Authored-By footers from the commits bundle.
src/c21_app.ts
/reports/live/tests now calls buildLiveTestData. When the bundle
is empty it shows an honest unavailable-note pointing at the demo;
once at least one run is in the bundle, real data renders.
scripts/p620/deploy-tdd-md.sh
Runs snapshot-tests after snapshot-git-history, before rsync.
V1 is HEAD-only per deploy: stability/flakiness data accumulates run
by run as deploys happen. No git-worktree gymnastics, no per-commit
bun-install. Bumping to historical-commit testing is a future sliver.
Co-Authored-By: Claude Opus 4.7 (1M context) <[email protected]>
5 files changed · +336 −7
scripts/p620/deploy-tdd-md.sh
+6
−0
| @@ -44,6 +44,12 @@ echo "→ snapshot git history → content/git-history/" | ||
| 44 | 44 | ( cd "$REPO_ROOT" && bun scripts/p620/snapshot-git-history.ts ) \ |
| 45 | 45 | || { echo "✗ snapshot-git-history mislukt"; exit 1; } |
| 46 | 46 | |
| 47 | +echo "→ snapshot tests (bun test --reporter=junit) → content/git-history/" | |
| 48 | +# Runs the test suite at HEAD and appends the result to the per-repo | |
| 49 | +# tests bundle. Stability data accumulates run-by-run across deploys. | |
| 50 | +( cd "$REPO_ROOT" && bun scripts/p620/snapshot-tests.ts ) \ | |
| 51 | + || { echo "✗ snapshot-tests mislukt"; exit 1; } | |
| 52 | + | |
| 47 | 53 | echo "→ source rsync naar $SSH_HOST:~/$REMOTE_SRC_DIR" |
| 48 | 54 | ssh "$SSH_HOST" "mkdir -p ~/$REMOTE_SRC_DIR" |
| 49 | 55 | # --delete zodat verwijderde files ook weggaan op remote. |
scripts/p620/snapshot-tests.ts
+129
−0
| @@ -0,0 +1,129 @@ | ||
| 1 | +#!/usr/bin/env bun | |
| 2 | +// Run `bun test` on the current HEAD and append the result to a | |
| 3 | +// per-repo bundle alongside the git-history snapshot. The container | |
| 4 | +// reads this bundle at runtime to render /reports/live/tests for the | |
| 5 | +// (private) syntaxai/tdd.md repo without needing a runtime sandbox. | |
| 6 | +// | |
| 7 | +// Strategy: HEAD-only per deploy. The bundle accumulates one run per | |
| 8 | +// deploy (capped at 50), so stability data builds organically over | |
| 9 | +// time. No git-worktree gymnastics, no per-commit bun-install. | |
| 10 | +// | |
| 11 | +// Output: content/git-history/<owner>__<name>__tests.json | |
| 12 | +// Schema: { owner, name, runs: TestRunRecord[] } — newest first. | |
| 13 | + | |
| 14 | +import { spawnSync } from "node:child_process"; | |
| 15 | +import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; | |
| 16 | +import { resolve } from "node:path"; | |
| 17 | + | |
| 18 | +const REPO_ROOT = resolve(import.meta.dir, "..", ".."); | |
| 19 | +const OWNER = "syntaxai"; | |
| 20 | +const NAME = "tdd.md"; | |
| 21 | +const MAX_RUNS = 50; | |
| 22 | +const JUNIT_OUT = "/tmp/tdd-md-test-junit.xml"; | |
| 23 | + | |
| 24 | +const sh = (cmd: string, args: string[]): string => { | |
| 25 | + const r = spawnSync(cmd, args, { cwd: REPO_ROOT, encoding: "utf8" }); | |
| 26 | + return (r.stdout ?? "").trim(); | |
| 27 | +}; | |
| 28 | + | |
| 29 | +const head = sh("git", ["rev-parse", "HEAD"]); | |
| 30 | +const branch = sh("git", ["rev-parse", "--abbrev-ref", "HEAD"]); | |
| 31 | +if (head === "") { | |
| 32 | + console.error("could not resolve HEAD"); | |
| 33 | + process.exit(1); | |
| 34 | +} | |
| 35 | + | |
| 36 | +// Run tests. bun exits non-zero when tests fail — that's fine, we | |
| 37 | +// just need the junit XML, which it writes regardless. | |
| 38 | +spawnSync("bun", ["test", "--reporter=junit", `--reporter-outfile=${JUNIT_OUT}`], { | |
| 39 | + cwd: REPO_ROOT, | |
| 40 | + stdio: "inherit", | |
| 41 | +}); | |
| 42 | +if (!existsSync(JUNIT_OUT)) { | |
| 43 | + console.error(`✗ junit output missing at ${JUNIT_OUT} — bun test crashed before writing`); | |
| 44 | + process.exit(1); | |
| 45 | +} | |
| 46 | +const xml = readFileSync(JUNIT_OUT, "utf8"); | |
| 47 | + | |
| 48 | +interface TestRecord { | |
| 49 | + name: string; | |
| 50 | + file: string; | |
| 51 | + status: "pass" | "fail"; | |
| 52 | + durationMs: number; | |
| 53 | +} | |
| 54 | + | |
| 55 | +const decodeXmlEntity = (s: string): string => | |
| 56 | + s.replace(/'/g, "'").replace(/"/g, '"').replace(/</g, "<").replace(/>/g, ">").replace(/&/g, "&"); | |
| 57 | + | |
| 58 | +const parseJunit = (xml: string): TestRecord[] => { | |
| 59 | + const out: TestRecord[] = []; | |
| 60 | + const re = /<testcase\s+([^>]*?)(\/>|>([\s\S]*?)<\/testcase>)/g; | |
| 61 | + let m: RegExpExecArray | null; | |
| 62 | + while ((m = re.exec(xml)) !== null) { | |
| 63 | + const attrs = m[1] ?? ""; | |
| 64 | + const inner = m[3] ?? ""; | |
| 65 | + const nameRaw = /name="([^"]*)"/.exec(attrs)?.[1] ?? ""; | |
| 66 | + const file = /file="([^"]*)"/.exec(attrs)?.[1] ?? ""; | |
| 67 | + const time = parseFloat(/time="([^"]*)"/.exec(attrs)?.[1] ?? "0"); | |
| 68 | + const failed = /<failure\b/.test(inner) || /<error\b/.test(inner); | |
| 69 | + out.push({ | |
| 70 | + name: decodeXmlEntity(nameRaw), | |
| 71 | + file, | |
| 72 | + status: failed ? "fail" : "pass", | |
| 73 | + durationMs: Math.round(time * 1000), | |
| 74 | + }); | |
| 75 | + } | |
| 76 | + return out; | |
| 77 | +}; | |
| 78 | + | |
| 79 | +const tests = parseJunit(xml); | |
| 80 | +const passing = tests.filter((t) => t.status === "pass").length; | |
| 81 | +const failing = tests.length - passing; | |
| 82 | +const totalDurationMs = tests.reduce((s, t) => s + t.durationMs, 0); | |
| 83 | + | |
| 84 | +interface TestRunRecord { | |
| 85 | + sha: string; | |
| 86 | + branch: string; | |
| 87 | + ranAt: number; | |
| 88 | + total: number; | |
| 89 | + passing: number; | |
| 90 | + failing: number; | |
| 91 | + durationMs: number; | |
| 92 | + tests: TestRecord[]; | |
| 93 | +} | |
| 94 | + | |
| 95 | +interface TestBundle { | |
| 96 | + owner: string; | |
| 97 | + name: string; | |
| 98 | + runs: TestRunRecord[]; | |
| 99 | +} | |
| 100 | + | |
| 101 | +const bundlePath = resolve(REPO_ROOT, "content", "git-history", `${OWNER}__${NAME}__tests.json`); | |
| 102 | +let bundle: TestBundle = { owner: OWNER, name: NAME, runs: [] }; | |
| 103 | +if (existsSync(bundlePath)) { | |
| 104 | + try { | |
| 105 | + const parsed = JSON.parse(readFileSync(bundlePath, "utf8")) as TestBundle; | |
| 106 | + if (parsed && Array.isArray(parsed.runs)) bundle = parsed; | |
| 107 | + } catch { | |
| 108 | + // Corrupt or unreadable bundle — start fresh, deploy isn't blocked. | |
| 109 | + } | |
| 110 | +} | |
| 111 | + | |
| 112 | +if (bundle.runs.some((r) => r.sha === head)) { | |
| 113 | + console.log(`✓ tests for ${head.slice(0, 7)} already in bundle (${bundle.runs.length} runs total) — nothing to add`); | |
| 114 | +} else { | |
| 115 | + bundle.runs.unshift({ | |
| 116 | + sha: head, | |
| 117 | + branch, | |
| 118 | + ranAt: Date.now(), | |
| 119 | + total: tests.length, | |
| 120 | + passing, | |
| 121 | + failing, | |
| 122 | + durationMs: totalDurationMs, | |
| 123 | + tests, | |
| 124 | + }); | |
| 125 | + bundle.runs = bundle.runs.slice(0, MAX_RUNS); | |
| 126 | + mkdirSync(resolve(REPO_ROOT, "content", "git-history"), { recursive: true }); | |
| 127 | + writeFileSync(bundlePath, JSON.stringify(bundle, null, 2)); | |
| 128 | + console.log(`✓ tests at ${head.slice(0, 7)} (${branch}): ${passing}/${tests.length} pass, ${failing} fail → bundle (${bundle.runs.length} runs total)`); | |
| 129 | +} | |
src/c14_github.ts
+46
−0
| @@ -206,3 +206,49 @@ export const fetchRepoCommits = async ( | ||
| 206 | 206 | commitsCache.set(key, { fetchedAt: Date.now(), commits }); |
| 207 | 207 | return commits; |
| 208 | 208 | }; |
| 209 | + | |
| 210 | +// --------------------------------------------------------------------- | |
| 211 | +// Test-results bundle. Companion to the git-history bundle above — | |
| 212 | +// scripts/p620/snapshot-tests.ts runs `bun test --reporter=junit` at | |
| 213 | +// each deploy and appends the result to this JSON file. Lets the | |
| 214 | +// container render /reports/live/tests against real data without | |
| 215 | +// running tests at runtime. | |
| 216 | +// --------------------------------------------------------------------- | |
| 217 | + | |
| 218 | +export interface TestRecord { | |
| 219 | + name: string; | |
| 220 | + file: string; | |
| 221 | + status: "pass" | "fail"; | |
| 222 | + durationMs: number; | |
| 223 | +} | |
| 224 | + | |
| 225 | +export interface TestRunRecord { | |
| 226 | + sha: string; | |
| 227 | + branch: string; | |
| 228 | + ranAt: number; | |
| 229 | + total: number; | |
| 230 | + passing: number; | |
| 231 | + failing: number; | |
| 232 | + durationMs: number; | |
| 233 | + tests: TestRecord[]; | |
| 234 | +} | |
| 235 | + | |
| 236 | +export interface TestBundle { | |
| 237 | + owner: string; | |
| 238 | + name: string; | |
| 239 | + runs: TestRunRecord[]; | |
| 240 | +} | |
| 241 | + | |
| 242 | +export const loadTestBundle = async ( | |
| 243 | + repoOwner: string, | |
| 244 | + repoName: string, | |
| 245 | +): Promise<TestBundle | null> => { | |
| 246 | + try { | |
| 247 | + const file = Bun.file(`./content/git-history/${repoOwner}__${repoName}__tests.json`); | |
| 248 | + if (!(await file.exists())) return null; | |
| 249 | + const data = (await file.json()) as TestBundle; | |
| 250 | + return Array.isArray(data.runs) ? data : null; | |
| 251 | + } catch { | |
| 252 | + return null; | |
| 253 | + } | |
| 254 | +}; | |
src/c21_app.ts
+15
−7
| @@ -36,6 +36,7 @@ import { | ||
| 36 | 36 | DEMO_STABILITY, |
| 37 | 37 | } from "./c31_reports_demo.ts"; |
| 38 | 38 | import { buildLiveReports } from "./c32_real_reports.ts"; |
| 39 | +import { buildLiveTestData } from "./c32_real_tests.ts"; | |
| 39 | 40 | import { parseRepoIdentifier } from "./c31_project_config.ts"; |
| 40 | 41 | import { judge } from "./c32_judge.ts"; |
| 41 | 42 | import { |
| @@ -532,18 +533,25 @@ ${rows} | ||
| 532 | 533 | }, |
| 533 | 534 | |
| 534 | 535 | "/reports/live/tests": async () => { |
| 536 | + const data = await buildLiveTestData(LIVE_REPO_OWNER, LIVE_REPO_NAME); | |
| 537 | + const ranOn = data.ranAt ? new Date(data.ranAt).toISOString().slice(0, 10) : null; | |
| 538 | + const period = data.runsCount === 0 | |
| 539 | + ? "geen runs in bundle" | |
| 540 | + : `last run ${ranOn} · ${data.runsCount} run${data.runsCount === 1 ? "" : "s"} cumulatief`; | |
| 541 | + const unavailableNote = data.runsCount === 0 | |
| 542 | + ? "Nog geen test-runs gebundeld. De volgende deploy draait `bun test --reporter=junit` op de huidige HEAD en publiceert het resultaat hier. Stabiliteit (flaky %, deletion) bouwt zich op naarmate er meer runs in de bundle staan — de demo op [/reports/demo/tests](/reports/demo/tests) toont waar het naartoe groeit." | |
| 543 | + : undefined; | |
| 535 | 544 | const html = await renderPage({ |
| 536 | - title: "Tests overzicht · live (placeholder) — tdd.md", | |
| 537 | - description: "Placeholder voor de live test-overview — wacht op de sandbox-runner sliver.", | |
| 545 | + title: "Tests overzicht · live — tdd.md", | |
| 546 | + description: `Live test-snapshot van ${LIVE_REPO_OWNER}/${LIVE_REPO_NAME} — ${data.runsCount} run${data.runsCount === 1 ? "" : "s"} gebundeld.`, | |
| 538 | 547 | bodyMarkdown: testsOverviewMd({ |
| 539 | - period: "live", | |
| 548 | + period, | |
| 540 | 549 | bannerHtml: LIVE_BANNER_HTML, |
| 541 | - snapshots: [], | |
| 542 | - stability: [], | |
| 543 | - unavailableNote: "De per-repo test-snapshot en stabiliteitstabel hebben de sandbox-runner sliver nodig (block 1 vervolg). Tot dat klaar is, alleen de exec-summary + drill-down draaien op echte data; de testpagina staat in de [demo](/reports/demo/tests).", | |
| 550 | + snapshots: data.snapshots, | |
| 551 | + stability: data.stability, | |
| 552 | + unavailableNote, | |
| 544 | 553 | }), |
| 545 | 554 | ogPath: "https://tdd.md/reports/live/tests", |
| 546 | - noindex: true, | |
| 547 | 555 | }); |
| 548 | 556 | return htmlResponse(html); |
| 549 | 557 | }, |
src/c32_real_tests.ts
+140
−0
| @@ -0,0 +1,140 @@ | ||
| 1 | +// c32 — logic: aggregate the per-deploy test bundle into the same | |
| 2 | +// TestSnapshot[] / TestStability[] shape that the demo page renders. | |
| 3 | +// HEAD-only snapshots; stability accumulates as more deploys add runs. | |
| 4 | +// | |
| 5 | +// Pure given the bundle + commits in (no I/O of its own beyond delegating | |
| 6 | +// to c14_github's bundle loader and commits fetcher). | |
| 7 | + | |
| 8 | +import { fetchRepoCommits, loadTestBundle } from "./c14_github.ts"; | |
| 9 | +import type { | |
| 10 | + AgentReport, | |
| 11 | + TestFailure, | |
| 12 | + TestSnapshot, | |
| 13 | + TestStability, | |
| 14 | +} from "./c31_reports_demo.ts"; | |
| 15 | + | |
| 16 | +const detectAgent = (msg: string): AgentReport["slug"] | null => { | |
| 17 | + if (/Co-Authored-By:.*Claude/i.test(msg)) return "claude-code"; | |
| 18 | + if (/Co-Authored-By:.*Cursor/i.test(msg)) return "cursor"; | |
| 19 | + if (/Co-Authored-By:.*Aider/i.test(msg)) return "aider"; | |
| 20 | + return null; | |
| 21 | +}; | |
| 22 | + | |
| 23 | +const shortenTestLabel = (file: string, name: string): string => { | |
| 24 | + const base = file.split("/").pop() ?? file; | |
| 25 | + return `${base} > ${name}`; | |
| 26 | +}; | |
| 27 | + | |
| 28 | +export interface LiveTestData { | |
| 29 | + snapshots: TestSnapshot[]; | |
| 30 | + stability: TestStability[]; | |
| 31 | + runsCount: number; | |
| 32 | + ranAt: number | null; | |
| 33 | + headSha: string | null; | |
| 34 | +} | |
| 35 | + | |
| 36 | +export const buildLiveTestData = async ( | |
| 37 | + repoOwner: string, | |
| 38 | + repoName: string, | |
| 39 | +): Promise<LiveTestData> => { | |
| 40 | + const bundle = await loadTestBundle(repoOwner, repoName); | |
| 41 | + if (!bundle || bundle.runs.length === 0) { | |
| 42 | + return { snapshots: [], stability: [], runsCount: 0, ranAt: null, headSha: null }; | |
| 43 | + } | |
| 44 | + const repoSlug = `${repoOwner}/${repoName}`; | |
| 45 | + const latest = bundle.runs[0]; | |
| 46 | + if (!latest) { | |
| 47 | + return { snapshots: [], stability: [], runsCount: 0, ranAt: null, headSha: null }; | |
| 48 | + } | |
| 49 | + | |
| 50 | + // For "since" we want the oldest run that has this test as failing. | |
| 51 | + const oldestFirst = [...bundle.runs].sort((a, b) => a.ranAt - b.ranAt); | |
| 52 | + | |
| 53 | + const failures: TestFailure[] = latest.tests | |
| 54 | + .filter((t) => t.status === "fail") | |
| 55 | + .map((t) => { | |
| 56 | + const firstFail = oldestFirst.find((r) => | |
| 57 | + r.tests.some((x) => x.name === t.name && x.file === t.file && x.status === "fail"), | |
| 58 | + ); | |
| 59 | + const sinceTs = firstFail?.ranAt ?? latest.ranAt; | |
| 60 | + return { test: shortenTestLabel(t.file, t.name), since: new Date(sinceTs).toISOString().slice(0, 10) }; | |
| 61 | + }); | |
| 62 | + | |
| 63 | + const snapshot: TestSnapshot = { | |
| 64 | + repo: repoSlug, | |
| 65 | + branch: latest.branch, | |
| 66 | + total: latest.total, | |
| 67 | + passing: latest.passing, | |
| 68 | + failing: latest.failing, | |
| 69 | + failures, | |
| 70 | + }; | |
| 71 | + | |
| 72 | + // Stability: count pass/fail per (file, name) across every run, with | |
| 73 | + // "deleted" set when a previously-seen test is missing from latest. | |
| 74 | + const commits = await fetchRepoCommits(repoOwner, repoName, 100); | |
| 75 | + const shaToAgent = new Map<string, AgentReport["slug"] | null>(); | |
| 76 | + for (const c of commits) shaToAgent.set(c.sha, detectAgent(c.commit.message)); | |
| 77 | + | |
| 78 | + interface Stat { | |
| 79 | + name: string; | |
| 80 | + file: string; | |
| 81 | + pass: number; | |
| 82 | + fail: number; | |
| 83 | + lastBrokenSha: string | null; | |
| 84 | + lastBrokenAt: number; | |
| 85 | + } | |
| 86 | + const stats = new Map<string, Stat>(); | |
| 87 | + for (const run of bundle.runs) { | |
| 88 | + for (const t of run.tests) { | |
| 89 | + const key = `${t.file}|${t.name}`; | |
| 90 | + let s = stats.get(key); | |
| 91 | + if (!s) { | |
| 92 | + s = { name: t.name, file: t.file, pass: 0, fail: 0, lastBrokenSha: null, lastBrokenAt: 0 }; | |
| 93 | + stats.set(key, s); | |
| 94 | + } | |
| 95 | + if (t.status === "pass") s.pass++; | |
| 96 | + else { | |
| 97 | + s.fail++; | |
| 98 | + if (run.ranAt > s.lastBrokenAt) { | |
| 99 | + s.lastBrokenSha = run.sha; | |
| 100 | + s.lastBrokenAt = run.ranAt; | |
| 101 | + } | |
| 102 | + } | |
| 103 | + } | |
| 104 | + } | |
| 105 | + | |
| 106 | + const latestKeys = new Set(latest.tests.map((t) => `${t.file}|${t.name}`)); | |
| 107 | + | |
| 108 | + // lastBrokenBy needs an agent slug; if we can't map a SHA to an agent | |
| 109 | + // (e.g. the commit isn't in the 100-commit window we fetch), fall | |
| 110 | + // back to the agent of the latest run, which is a defensible default | |
| 111 | + // for the dogfood case (one agent producing the history). | |
| 112 | + const fallbackAgent = (shaToAgent.get(latest.sha) ?? "claude-code") as AgentReport["slug"]; | |
| 113 | + | |
| 114 | + const stability: TestStability[] = Array.from(stats.values()) | |
| 115 | + .map<TestStability>((s) => { | |
| 116 | + const mapped = s.lastBrokenSha ? shaToAgent.get(s.lastBrokenSha) : null; | |
| 117 | + const agent = (mapped ?? fallbackAgent) as AgentReport["slug"]; | |
| 118 | + const deleted = latestKeys.has(`${s.file}|${s.name}`) ? 0 : 1; | |
| 119 | + const flagged = s.fail > 0 && (deleted > 0 || s.fail >= Math.max(2, s.pass / 5)); | |
| 120 | + return { | |
| 121 | + test: shortenTestLabel(s.file, s.name), | |
| 122 | + repo: repoSlug, | |
| 123 | + pass: s.pass, | |
| 124 | + fail: s.fail, | |
| 125 | + deleted, | |
| 126 | + lastBrokenBy: agent, | |
| 127 | + flagged, | |
| 128 | + }; | |
| 129 | + }) | |
| 130 | + .sort((a, b) => b.fail - a.fail || b.deleted - a.deleted || b.pass - a.pass) | |
| 131 | + .slice(0, 30); | |
| 132 | + | |
| 133 | + return { | |
| 134 | + snapshots: [snapshot], | |
| 135 | + stability, | |
| 136 | + runsCount: bundle.runs.length, | |
| 137 | + ranAt: latest.ranAt, | |
| 138 | + headSha: latest.sha, | |
| 139 | + }; | |
| 140 | +}; | |