syntaxai/tdd.md · commit bbab5ed

Reports: live test snapshot via deploy-time bun-test bundle

Replaces the placeholder on /reports/live/tests with a real per-test
view sourced from `bun test --reporter=junit`, run at deploy time and
appended to a per-repo bundle.

  scripts/p620/snapshot-tests.ts
    Runs `bun test --reporter=junit`, parses the XML, appends the
    result to content/git-history/<owner>__<name>__tests.json. Each
    deploy adds at most one run (skipped when HEAD is already in the
    bundle). Capped at 50 runs so the file stays bounded.

  src/c14_github.ts
    loadTestBundle: reads the bundle from disk; same shape and home
    as the git-history snapshot.

  src/c32_real_tests.ts
    Aggregates the bundle into TestSnapshot[] (latest run, with
    "since" computed from the oldest run that flagged a test as
    failing) and TestStability[] (pass/fail counts per (file, name)
    across all runs, with a "deleted" flag when a test that previously
    appeared is missing from the latest run). lastBrokenBy is mapped
    via Co-Authored-By footers from the commits bundle.

  src/c21_app.ts
    /reports/live/tests now calls buildLiveTestData. When the bundle
    is empty it shows an honest unavailable-note pointing at the demo;
    once at least one run is in the bundle, real data renders.

  scripts/p620/deploy-tdd-md.sh
    Runs snapshot-tests after snapshot-git-history, before rsync.

V1 is HEAD-only per deploy: stability/flakiness data accumulates run
by run as deploys happen. No git-worktree gymnastics, no per-commit
bun-install. Bumping to historical-commit testing is a future sliver.

Co-Authored-By: Claude Opus 4.7 (1M context) <[email protected]>
author
syntaxai <[email protected]>
date
2026-05-09 07:34:48 +01:00
parent
b91543a
commit
bbab5ed76d0cb3c9d1bec8b1db7981d8b92a0905

5 files changed · +336 −7

modified scripts/p620/deploy-tdd-md.sh +6 −0
@@ -44,6 +44,12 @@ echo "→ snapshot git history → content/git-history/"
4444 ( cd "$REPO_ROOT" && bun scripts/p620/snapshot-git-history.ts ) \
4545 || { echo "✗ snapshot-git-history mislukt"; exit 1; }
4646
47+echo "→ snapshot tests (bun test --reporter=junit) → content/git-history/"
48+# Runs the test suite at HEAD and appends the result to the per-repo
49+# tests bundle. Stability data accumulates run-by-run across deploys.
50+( cd "$REPO_ROOT" && bun scripts/p620/snapshot-tests.ts ) \
51+ || { echo "✗ snapshot-tests mislukt"; exit 1; }
52+
4753 echo "→ source rsync naar $SSH_HOST:~/$REMOTE_SRC_DIR"
4854 ssh "$SSH_HOST" "mkdir -p ~/$REMOTE_SRC_DIR"
4955 # --delete zodat verwijderde files ook weggaan op remote.
added scripts/p620/snapshot-tests.ts +129 −0
@@ -0,0 +1,129 @@
1+#!/usr/bin/env bun
2+// Run `bun test` on the current HEAD and append the result to a
3+// per-repo bundle alongside the git-history snapshot. The container
4+// reads this bundle at runtime to render /reports/live/tests for the
5+// (private) syntaxai/tdd.md repo without needing a runtime sandbox.
6+//
7+// Strategy: HEAD-only per deploy. The bundle accumulates one run per
8+// deploy (capped at 50), so stability data builds organically over
9+// time. No git-worktree gymnastics, no per-commit bun-install.
10+//
11+// Output: content/git-history/<owner>__<name>__tests.json
12+// Schema: { owner, name, runs: TestRunRecord[] } — newest first.
13+
14+import { spawnSync } from "node:child_process";
15+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
16+import { resolve } from "node:path";
17+
18+const REPO_ROOT = resolve(import.meta.dir, "..", "..");
19+const OWNER = "syntaxai";
20+const NAME = "tdd.md";
21+const MAX_RUNS = 50;
22+const JUNIT_OUT = "/tmp/tdd-md-test-junit.xml";
23+
24+const sh = (cmd: string, args: string[]): string => {
25+ const r = spawnSync(cmd, args, { cwd: REPO_ROOT, encoding: "utf8" });
26+ return (r.stdout ?? "").trim();
27+};
28+
29+const head = sh("git", ["rev-parse", "HEAD"]);
30+const branch = sh("git", ["rev-parse", "--abbrev-ref", "HEAD"]);
31+if (head === "") {
32+ console.error("could not resolve HEAD");
33+ process.exit(1);
34+}
35+
36+// Run tests. bun exits non-zero when tests fail — that's fine, we
37+// just need the junit XML, which it writes regardless.
38+spawnSync("bun", ["test", "--reporter=junit", `--reporter-outfile=${JUNIT_OUT}`], {
39+ cwd: REPO_ROOT,
40+ stdio: "inherit",
41+});
42+if (!existsSync(JUNIT_OUT)) {
43+ console.error(`✗ junit output missing at ${JUNIT_OUT} — bun test crashed before writing`);
44+ process.exit(1);
45+}
46+const xml = readFileSync(JUNIT_OUT, "utf8");
47+
48+interface TestRecord {
49+ name: string;
50+ file: string;
51+ status: "pass" | "fail";
52+ durationMs: number;
53+}
54+
55+const decodeXmlEntity = (s: string): string =>
56+ s.replace(/&apos;/g, "'").replace(/&quot;/g, '"').replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&amp;/g, "&");
57+
58+const parseJunit = (xml: string): TestRecord[] => {
59+ const out: TestRecord[] = [];
60+ const re = /<testcase\s+([^>]*?)(\/>|>([\s\S]*?)<\/testcase>)/g;
61+ let m: RegExpExecArray | null;
62+ while ((m = re.exec(xml)) !== null) {
63+ const attrs = m[1] ?? "";
64+ const inner = m[3] ?? "";
65+ const nameRaw = /name="([^"]*)"/.exec(attrs)?.[1] ?? "";
66+ const file = /file="([^"]*)"/.exec(attrs)?.[1] ?? "";
67+ const time = parseFloat(/time="([^"]*)"/.exec(attrs)?.[1] ?? "0");
68+ const failed = /<failure\b/.test(inner) || /<error\b/.test(inner);
69+ out.push({
70+ name: decodeXmlEntity(nameRaw),
71+ file,
72+ status: failed ? "fail" : "pass",
73+ durationMs: Math.round(time * 1000),
74+ });
75+ }
76+ return out;
77+};
78+
79+const tests = parseJunit(xml);
80+const passing = tests.filter((t) => t.status === "pass").length;
81+const failing = tests.length - passing;
82+const totalDurationMs = tests.reduce((s, t) => s + t.durationMs, 0);
83+
84+interface TestRunRecord {
85+ sha: string;
86+ branch: string;
87+ ranAt: number;
88+ total: number;
89+ passing: number;
90+ failing: number;
91+ durationMs: number;
92+ tests: TestRecord[];
93+}
94+
95+interface TestBundle {
96+ owner: string;
97+ name: string;
98+ runs: TestRunRecord[];
99+}
100+
101+const bundlePath = resolve(REPO_ROOT, "content", "git-history", `${OWNER}__${NAME}__tests.json`);
102+let bundle: TestBundle = { owner: OWNER, name: NAME, runs: [] };
103+if (existsSync(bundlePath)) {
104+ try {
105+ const parsed = JSON.parse(readFileSync(bundlePath, "utf8")) as TestBundle;
106+ if (parsed && Array.isArray(parsed.runs)) bundle = parsed;
107+ } catch {
108+ // Corrupt or unreadable bundle — start fresh, deploy isn't blocked.
109+ }
110+}
111+
112+if (bundle.runs.some((r) => r.sha === head)) {
113+ console.log(`✓ tests for ${head.slice(0, 7)} already in bundle (${bundle.runs.length} runs total) — nothing to add`);
114+} else {
115+ bundle.runs.unshift({
116+ sha: head,
117+ branch,
118+ ranAt: Date.now(),
119+ total: tests.length,
120+ passing,
121+ failing,
122+ durationMs: totalDurationMs,
123+ tests,
124+ });
125+ bundle.runs = bundle.runs.slice(0, MAX_RUNS);
126+ mkdirSync(resolve(REPO_ROOT, "content", "git-history"), { recursive: true });
127+ writeFileSync(bundlePath, JSON.stringify(bundle, null, 2));
128+ console.log(`✓ tests at ${head.slice(0, 7)} (${branch}): ${passing}/${tests.length} pass, ${failing} fail → bundle (${bundle.runs.length} runs total)`);
129+}
modified src/c14_github.ts +46 −0
@@ -206,3 +206,49 @@ export const fetchRepoCommits = async (
206206 commitsCache.set(key, { fetchedAt: Date.now(), commits });
207207 return commits;
208208 };
209+
210+// ---------------------------------------------------------------------
211+// Test-results bundle. Companion to the git-history bundle above —
212+// scripts/p620/snapshot-tests.ts runs `bun test --reporter=junit` at
213+// each deploy and appends the result to this JSON file. Lets the
214+// container render /reports/live/tests against real data without
215+// running tests at runtime.
216+// ---------------------------------------------------------------------
217+
218+export interface TestRecord {
219+ name: string;
220+ file: string;
221+ status: "pass" | "fail";
222+ durationMs: number;
223+}
224+
225+export interface TestRunRecord {
226+ sha: string;
227+ branch: string;
228+ ranAt: number;
229+ total: number;
230+ passing: number;
231+ failing: number;
232+ durationMs: number;
233+ tests: TestRecord[];
234+}
235+
236+export interface TestBundle {
237+ owner: string;
238+ name: string;
239+ runs: TestRunRecord[];
240+}
241+
242+export const loadTestBundle = async (
243+ repoOwner: string,
244+ repoName: string,
245+): Promise<TestBundle | null> => {
246+ try {
247+ const file = Bun.file(`./content/git-history/${repoOwner}__${repoName}__tests.json`);
248+ if (!(await file.exists())) return null;
249+ const data = (await file.json()) as TestBundle;
250+ return Array.isArray(data.runs) ? data : null;
251+ } catch {
252+ return null;
253+ }
254+};
modified src/c21_app.ts +15 −7
@@ -36,6 +36,7 @@ import {
3636 DEMO_STABILITY,
3737 } from "./c31_reports_demo.ts";
3838 import { buildLiveReports } from "./c32_real_reports.ts";
39+import { buildLiveTestData } from "./c32_real_tests.ts";
3940 import { parseRepoIdentifier } from "./c31_project_config.ts";
4041 import { judge } from "./c32_judge.ts";
4142 import {
@@ -532,18 +533,25 @@ ${rows}
532533 },
533534
534535 "/reports/live/tests": async () => {
536+ const data = await buildLiveTestData(LIVE_REPO_OWNER, LIVE_REPO_NAME);
537+ const ranOn = data.ranAt ? new Date(data.ranAt).toISOString().slice(0, 10) : null;
538+ const period = data.runsCount === 0
539+ ? "geen runs in bundle"
540+ : `last run ${ranOn} · ${data.runsCount} run${data.runsCount === 1 ? "" : "s"} cumulatief`;
541+ const unavailableNote = data.runsCount === 0
542+ ? "Nog geen test-runs gebundeld. De volgende deploy draait `bun test --reporter=junit` op de huidige HEAD en publiceert het resultaat hier. Stabiliteit (flaky %, deletion) bouwt zich op naarmate er meer runs in de bundle staan — de demo op [/reports/demo/tests](/reports/demo/tests) toont waar het naartoe groeit."
543+ : undefined;
535544 const html = await renderPage({
536- title: "Tests overzicht · live (placeholder) — tdd.md",
537- description: "Placeholder voor de live test-overview — wacht op de sandbox-runner sliver.",
545+ title: "Tests overzicht · live — tdd.md",
546+ description: `Live test-snapshot van ${LIVE_REPO_OWNER}/${LIVE_REPO_NAME} — ${data.runsCount} run${data.runsCount === 1 ? "" : "s"} gebundeld.`,
538547 bodyMarkdown: testsOverviewMd({
539- period: "live",
548+ period,
540549 bannerHtml: LIVE_BANNER_HTML,
541- snapshots: [],
542- stability: [],
543- unavailableNote: "De per-repo test-snapshot en stabiliteitstabel hebben de sandbox-runner sliver nodig (block 1 vervolg). Tot dat klaar is, alleen de exec-summary + drill-down draaien op echte data; de testpagina staat in de [demo](/reports/demo/tests).",
550+ snapshots: data.snapshots,
551+ stability: data.stability,
552+ unavailableNote,
544553 }),
545554 ogPath: "https://tdd.md/reports/live/tests",
546- noindex: true,
547555 });
548556 return htmlResponse(html);
549557 },
added src/c32_real_tests.ts +140 −0
@@ -0,0 +1,140 @@
1+// c32 — logic: aggregate the per-deploy test bundle into the same
2+// TestSnapshot[] / TestStability[] shape that the demo page renders.
3+// HEAD-only snapshots; stability accumulates as more deploys add runs.
4+//
5+// Pure given the bundle + commits in (no I/O of its own beyond delegating
6+// to c14_github's bundle loader and commits fetcher).
7+
8+import { fetchRepoCommits, loadTestBundle } from "./c14_github.ts";
9+import type {
10+ AgentReport,
11+ TestFailure,
12+ TestSnapshot,
13+ TestStability,
14+} from "./c31_reports_demo.ts";
15+
16+const detectAgent = (msg: string): AgentReport["slug"] | null => {
17+ if (/Co-Authored-By:.*Claude/i.test(msg)) return "claude-code";
18+ if (/Co-Authored-By:.*Cursor/i.test(msg)) return "cursor";
19+ if (/Co-Authored-By:.*Aider/i.test(msg)) return "aider";
20+ return null;
21+};
22+
23+const shortenTestLabel = (file: string, name: string): string => {
24+ const base = file.split("/").pop() ?? file;
25+ return `${base} > ${name}`;
26+};
27+
28+export interface LiveTestData {
29+ snapshots: TestSnapshot[];
30+ stability: TestStability[];
31+ runsCount: number;
32+ ranAt: number | null;
33+ headSha: string | null;
34+}
35+
36+export const buildLiveTestData = async (
37+ repoOwner: string,
38+ repoName: string,
39+): Promise<LiveTestData> => {
40+ const bundle = await loadTestBundle(repoOwner, repoName);
41+ if (!bundle || bundle.runs.length === 0) {
42+ return { snapshots: [], stability: [], runsCount: 0, ranAt: null, headSha: null };
43+ }
44+ const repoSlug = `${repoOwner}/${repoName}`;
45+ const latest = bundle.runs[0];
46+ if (!latest) {
47+ return { snapshots: [], stability: [], runsCount: 0, ranAt: null, headSha: null };
48+ }
49+
50+ // For "since" we want the oldest run that has this test as failing.
51+ const oldestFirst = [...bundle.runs].sort((a, b) => a.ranAt - b.ranAt);
52+
53+ const failures: TestFailure[] = latest.tests
54+ .filter((t) => t.status === "fail")
55+ .map((t) => {
56+ const firstFail = oldestFirst.find((r) =>
57+ r.tests.some((x) => x.name === t.name && x.file === t.file && x.status === "fail"),
58+ );
59+ const sinceTs = firstFail?.ranAt ?? latest.ranAt;
60+ return { test: shortenTestLabel(t.file, t.name), since: new Date(sinceTs).toISOString().slice(0, 10) };
61+ });
62+
63+ const snapshot: TestSnapshot = {
64+ repo: repoSlug,
65+ branch: latest.branch,
66+ total: latest.total,
67+ passing: latest.passing,
68+ failing: latest.failing,
69+ failures,
70+ };
71+
72+ // Stability: count pass/fail per (file, name) across every run, with
73+ // "deleted" set when a previously-seen test is missing from latest.
74+ const commits = await fetchRepoCommits(repoOwner, repoName, 100);
75+ const shaToAgent = new Map<string, AgentReport["slug"] | null>();
76+ for (const c of commits) shaToAgent.set(c.sha, detectAgent(c.commit.message));
77+
78+ interface Stat {
79+ name: string;
80+ file: string;
81+ pass: number;
82+ fail: number;
83+ lastBrokenSha: string | null;
84+ lastBrokenAt: number;
85+ }
86+ const stats = new Map<string, Stat>();
87+ for (const run of bundle.runs) {
88+ for (const t of run.tests) {
89+ const key = `${t.file}|${t.name}`;
90+ let s = stats.get(key);
91+ if (!s) {
92+ s = { name: t.name, file: t.file, pass: 0, fail: 0, lastBrokenSha: null, lastBrokenAt: 0 };
93+ stats.set(key, s);
94+ }
95+ if (t.status === "pass") s.pass++;
96+ else {
97+ s.fail++;
98+ if (run.ranAt > s.lastBrokenAt) {
99+ s.lastBrokenSha = run.sha;
100+ s.lastBrokenAt = run.ranAt;
101+ }
102+ }
103+ }
104+ }
105+
106+ const latestKeys = new Set(latest.tests.map((t) => `${t.file}|${t.name}`));
107+
108+ // lastBrokenBy needs an agent slug; if we can't map a SHA to an agent
109+ // (e.g. the commit isn't in the 100-commit window we fetch), fall
110+ // back to the agent of the latest run, which is a defensible default
111+ // for the dogfood case (one agent producing the history).
112+ const fallbackAgent = (shaToAgent.get(latest.sha) ?? "claude-code") as AgentReport["slug"];
113+
114+ const stability: TestStability[] = Array.from(stats.values())
115+ .map<TestStability>((s) => {
116+ const mapped = s.lastBrokenSha ? shaToAgent.get(s.lastBrokenSha) : null;
117+ const agent = (mapped ?? fallbackAgent) as AgentReport["slug"];
118+ const deleted = latestKeys.has(`${s.file}|${s.name}`) ? 0 : 1;
119+ const flagged = s.fail > 0 && (deleted > 0 || s.fail >= Math.max(2, s.pass / 5));
120+ return {
121+ test: shortenTestLabel(s.file, s.name),
122+ repo: repoSlug,
123+ pass: s.pass,
124+ fail: s.fail,
125+ deleted,
126+ lastBrokenBy: agent,
127+ flagged,
128+ };
129+ })
130+ .sort((a, b) => b.fail - a.fail || b.deleted - a.deleted || b.pass - a.pass)
131+ .slice(0, 30);
132+
133+ return {
134+ snapshots: [snapshot],
135+ stability,
136+ runsCount: bundle.runs.length,
137+ ranAt: latest.ranAt,
138+ headSha: latest.sha,
139+ };
140+};