bbab5ed76d0cb3c9d1bec8b1db7981d8b92a0905 diff --git a/scripts/p620/deploy-tdd-md.sh b/scripts/p620/deploy-tdd-md.sh index b975ee38c9cc1b3b94cda92b57c205bade89812f..f973b4f1a045e09af3a0375e23c97bf67522a0c7 100755 --- a/scripts/p620/deploy-tdd-md.sh +++ b/scripts/p620/deploy-tdd-md.sh @@ -44,6 +44,12 @@ echo "→ snapshot git history → content/git-history/" ( cd "$REPO_ROOT" && bun scripts/p620/snapshot-git-history.ts ) \ || { echo "✗ snapshot-git-history mislukt"; exit 1; } +echo "→ snapshot tests (bun test --reporter=junit) → content/git-history/" +# Runs the test suite at HEAD and appends the result to the per-repo +# tests bundle. Stability data accumulates run-by-run across deploys. +( cd "$REPO_ROOT" && bun scripts/p620/snapshot-tests.ts ) \ + || { echo "✗ snapshot-tests mislukt"; exit 1; } + echo "→ source rsync naar $SSH_HOST:~/$REMOTE_SRC_DIR" ssh "$SSH_HOST" "mkdir -p ~/$REMOTE_SRC_DIR" # --delete zodat verwijderde files ook weggaan op remote. diff --git a/scripts/p620/snapshot-tests.ts b/scripts/p620/snapshot-tests.ts new file mode 100644 index 0000000000000000000000000000000000000000..099933d5f6ab31beaaf9545cd4ec2ac9cb5e6727 --- /dev/null +++ b/scripts/p620/snapshot-tests.ts @@ -0,0 +1,129 @@ +#!/usr/bin/env bun +// Run `bun test` on the current HEAD and append the result to a +// per-repo bundle alongside the git-history snapshot. The container +// reads this bundle at runtime to render /reports/live/tests for the +// (private) syntaxai/tdd.md repo without needing a runtime sandbox. +// +// Strategy: HEAD-only per deploy. The bundle accumulates one run per +// deploy (capped at 50), so stability data builds organically over +// time. No git-worktree gymnastics, no per-commit bun-install. +// +// Output: content/git-history/____tests.json +// Schema: { owner, name, runs: TestRunRecord[] } — newest first. + +import { spawnSync } from "node:child_process"; +import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { resolve } from "node:path"; + +const REPO_ROOT = resolve(import.meta.dir, "..", ".."); +const OWNER = "syntaxai"; +const NAME = "tdd.md"; +const MAX_RUNS = 50; +const JUNIT_OUT = "/tmp/tdd-md-test-junit.xml"; + +const sh = (cmd: string, args: string[]): string => { + const r = spawnSync(cmd, args, { cwd: REPO_ROOT, encoding: "utf8" }); + return (r.stdout ?? "").trim(); +}; + +const head = sh("git", ["rev-parse", "HEAD"]); +const branch = sh("git", ["rev-parse", "--abbrev-ref", "HEAD"]); +if (head === "") { + console.error("could not resolve HEAD"); + process.exit(1); +} + +// Run tests. bun exits non-zero when tests fail — that's fine, we +// just need the junit XML, which it writes regardless. +spawnSync("bun", ["test", "--reporter=junit", `--reporter-outfile=${JUNIT_OUT}`], { + cwd: REPO_ROOT, + stdio: "inherit", +}); +if (!existsSync(JUNIT_OUT)) { + console.error(`✗ junit output missing at ${JUNIT_OUT} — bun test crashed before writing`); + process.exit(1); +} +const xml = readFileSync(JUNIT_OUT, "utf8"); + +interface TestRecord { + name: string; + file: string; + status: "pass" | "fail"; + durationMs: number; +} + +const decodeXmlEntity = (s: string): string => + s.replace(/'/g, "'").replace(/"/g, '"').replace(/</g, "<").replace(/>/g, ">").replace(/&/g, "&"); + +const parseJunit = (xml: string): TestRecord[] => { + const out: TestRecord[] = []; + const re = /]*?)(\/>|>([\s\S]*?)<\/testcase>)/g; + let m: RegExpExecArray | null; + while ((m = re.exec(xml)) !== null) { + const attrs = m[1] ?? ""; + const inner = m[3] ?? ""; + const nameRaw = /name="([^"]*)"/.exec(attrs)?.[1] ?? ""; + const file = /file="([^"]*)"/.exec(attrs)?.[1] ?? ""; + const time = parseFloat(/time="([^"]*)"/.exec(attrs)?.[1] ?? "0"); + const failed = / t.status === "pass").length; +const failing = tests.length - passing; +const totalDurationMs = tests.reduce((s, t) => s + t.durationMs, 0); + +interface TestRunRecord { + sha: string; + branch: string; + ranAt: number; + total: number; + passing: number; + failing: number; + durationMs: number; + tests: TestRecord[]; +} + +interface TestBundle { + owner: string; + name: string; + runs: TestRunRecord[]; +} + +const bundlePath = resolve(REPO_ROOT, "content", "git-history", `${OWNER}__${NAME}__tests.json`); +let bundle: TestBundle = { owner: OWNER, name: NAME, runs: [] }; +if (existsSync(bundlePath)) { + try { + const parsed = JSON.parse(readFileSync(bundlePath, "utf8")) as TestBundle; + if (parsed && Array.isArray(parsed.runs)) bundle = parsed; + } catch { + // Corrupt or unreadable bundle — start fresh, deploy isn't blocked. + } +} + +if (bundle.runs.some((r) => r.sha === head)) { + console.log(`✓ tests for ${head.slice(0, 7)} already in bundle (${bundle.runs.length} runs total) — nothing to add`); +} else { + bundle.runs.unshift({ + sha: head, + branch, + ranAt: Date.now(), + total: tests.length, + passing, + failing, + durationMs: totalDurationMs, + tests, + }); + bundle.runs = bundle.runs.slice(0, MAX_RUNS); + mkdirSync(resolve(REPO_ROOT, "content", "git-history"), { recursive: true }); + writeFileSync(bundlePath, JSON.stringify(bundle, null, 2)); + console.log(`✓ tests at ${head.slice(0, 7)} (${branch}): ${passing}/${tests.length} pass, ${failing} fail → bundle (${bundle.runs.length} runs total)`); +} diff --git a/src/c14_github.ts b/src/c14_github.ts index abb2101955f9dfb0b26bf696f9fe275b31ad1ac5..9ebc43f3683fba8a7009c27a277974f0fda86552 100644 --- a/src/c14_github.ts +++ b/src/c14_github.ts @@ -206,3 +206,49 @@ export const fetchRepoCommits = async ( commitsCache.set(key, { fetchedAt: Date.now(), commits }); return commits; }; + +// --------------------------------------------------------------------- +// Test-results bundle. Companion to the git-history bundle above — +// scripts/p620/snapshot-tests.ts runs `bun test --reporter=junit` at +// each deploy and appends the result to this JSON file. Lets the +// container render /reports/live/tests against real data without +// running tests at runtime. +// --------------------------------------------------------------------- + +export interface TestRecord { + name: string; + file: string; + status: "pass" | "fail"; + durationMs: number; +} + +export interface TestRunRecord { + sha: string; + branch: string; + ranAt: number; + total: number; + passing: number; + failing: number; + durationMs: number; + tests: TestRecord[]; +} + +export interface TestBundle { + owner: string; + name: string; + runs: TestRunRecord[]; +} + +export const loadTestBundle = async ( + repoOwner: string, + repoName: string, +): Promise => { + try { + const file = Bun.file(`./content/git-history/${repoOwner}__${repoName}__tests.json`); + if (!(await file.exists())) return null; + const data = (await file.json()) as TestBundle; + return Array.isArray(data.runs) ? data : null; + } catch { + return null; + } +}; diff --git a/src/c21_app.ts b/src/c21_app.ts index e3de9a367db15cfd606d4f7f2ff96c4fb687dae6..6c80556aa8e28d7942d91bcce4fd8ba0362a7b67 100644 --- a/src/c21_app.ts +++ b/src/c21_app.ts @@ -36,6 +36,7 @@ import { DEMO_STABILITY, } from "./c31_reports_demo.ts"; import { buildLiveReports } from "./c32_real_reports.ts"; +import { buildLiveTestData } from "./c32_real_tests.ts"; import { parseRepoIdentifier } from "./c31_project_config.ts"; import { judge } from "./c32_judge.ts"; import { @@ -532,18 +533,25 @@ ${rows} }, "/reports/live/tests": async () => { + const data = await buildLiveTestData(LIVE_REPO_OWNER, LIVE_REPO_NAME); + const ranOn = data.ranAt ? new Date(data.ranAt).toISOString().slice(0, 10) : null; + const period = data.runsCount === 0 + ? "geen runs in bundle" + : `last run ${ranOn} · ${data.runsCount} run${data.runsCount === 1 ? "" : "s"} cumulatief`; + const unavailableNote = data.runsCount === 0 + ? "Nog geen test-runs gebundeld. De volgende deploy draait `bun test --reporter=junit` op de huidige HEAD en publiceert het resultaat hier. Stabiliteit (flaky %, deletion) bouwt zich op naarmate er meer runs in de bundle staan — de demo op [/reports/demo/tests](/reports/demo/tests) toont waar het naartoe groeit." + : undefined; const html = await renderPage({ - title: "Tests overzicht · live (placeholder) — tdd.md", - description: "Placeholder voor de live test-overview — wacht op de sandbox-runner sliver.", + title: "Tests overzicht · live — tdd.md", + description: `Live test-snapshot van ${LIVE_REPO_OWNER}/${LIVE_REPO_NAME} — ${data.runsCount} run${data.runsCount === 1 ? "" : "s"} gebundeld.`, bodyMarkdown: testsOverviewMd({ - period: "live", + period, bannerHtml: LIVE_BANNER_HTML, - snapshots: [], - stability: [], - unavailableNote: "De per-repo test-snapshot en stabiliteitstabel hebben de sandbox-runner sliver nodig (block 1 vervolg). Tot dat klaar is, alleen de exec-summary + drill-down draaien op echte data; de testpagina staat in de [demo](/reports/demo/tests).", + snapshots: data.snapshots, + stability: data.stability, + unavailableNote, }), ogPath: "https://tdd.md/reports/live/tests", - noindex: true, }); return htmlResponse(html); }, diff --git a/src/c32_real_tests.ts b/src/c32_real_tests.ts new file mode 100644 index 0000000000000000000000000000000000000000..e123a9035ac476aa8006094116bc335d824e9cff --- /dev/null +++ b/src/c32_real_tests.ts @@ -0,0 +1,140 @@ +// c32 — logic: aggregate the per-deploy test bundle into the same +// TestSnapshot[] / TestStability[] shape that the demo page renders. +// HEAD-only snapshots; stability accumulates as more deploys add runs. +// +// Pure given the bundle + commits in (no I/O of its own beyond delegating +// to c14_github's bundle loader and commits fetcher). + +import { fetchRepoCommits, loadTestBundle } from "./c14_github.ts"; +import type { + AgentReport, + TestFailure, + TestSnapshot, + TestStability, +} from "./c31_reports_demo.ts"; + +const detectAgent = (msg: string): AgentReport["slug"] | null => { + if (/Co-Authored-By:.*Claude/i.test(msg)) return "claude-code"; + if (/Co-Authored-By:.*Cursor/i.test(msg)) return "cursor"; + if (/Co-Authored-By:.*Aider/i.test(msg)) return "aider"; + return null; +}; + +const shortenTestLabel = (file: string, name: string): string => { + const base = file.split("/").pop() ?? file; + return `${base} > ${name}`; +}; + +export interface LiveTestData { + snapshots: TestSnapshot[]; + stability: TestStability[]; + runsCount: number; + ranAt: number | null; + headSha: string | null; +} + +export const buildLiveTestData = async ( + repoOwner: string, + repoName: string, +): Promise => { + const bundle = await loadTestBundle(repoOwner, repoName); + if (!bundle || bundle.runs.length === 0) { + return { snapshots: [], stability: [], runsCount: 0, ranAt: null, headSha: null }; + } + const repoSlug = `${repoOwner}/${repoName}`; + const latest = bundle.runs[0]; + if (!latest) { + return { snapshots: [], stability: [], runsCount: 0, ranAt: null, headSha: null }; + } + + // For "since" we want the oldest run that has this test as failing. + const oldestFirst = [...bundle.runs].sort((a, b) => a.ranAt - b.ranAt); + + const failures: TestFailure[] = latest.tests + .filter((t) => t.status === "fail") + .map((t) => { + const firstFail = oldestFirst.find((r) => + r.tests.some((x) => x.name === t.name && x.file === t.file && x.status === "fail"), + ); + const sinceTs = firstFail?.ranAt ?? latest.ranAt; + return { test: shortenTestLabel(t.file, t.name), since: new Date(sinceTs).toISOString().slice(0, 10) }; + }); + + const snapshot: TestSnapshot = { + repo: repoSlug, + branch: latest.branch, + total: latest.total, + passing: latest.passing, + failing: latest.failing, + failures, + }; + + // Stability: count pass/fail per (file, name) across every run, with + // "deleted" set when a previously-seen test is missing from latest. + const commits = await fetchRepoCommits(repoOwner, repoName, 100); + const shaToAgent = new Map(); + for (const c of commits) shaToAgent.set(c.sha, detectAgent(c.commit.message)); + + interface Stat { + name: string; + file: string; + pass: number; + fail: number; + lastBrokenSha: string | null; + lastBrokenAt: number; + } + const stats = new Map(); + for (const run of bundle.runs) { + for (const t of run.tests) { + const key = `${t.file}|${t.name}`; + let s = stats.get(key); + if (!s) { + s = { name: t.name, file: t.file, pass: 0, fail: 0, lastBrokenSha: null, lastBrokenAt: 0 }; + stats.set(key, s); + } + if (t.status === "pass") s.pass++; + else { + s.fail++; + if (run.ranAt > s.lastBrokenAt) { + s.lastBrokenSha = run.sha; + s.lastBrokenAt = run.ranAt; + } + } + } + } + + const latestKeys = new Set(latest.tests.map((t) => `${t.file}|${t.name}`)); + + // lastBrokenBy needs an agent slug; if we can't map a SHA to an agent + // (e.g. the commit isn't in the 100-commit window we fetch), fall + // back to the agent of the latest run, which is a defensible default + // for the dogfood case (one agent producing the history). + const fallbackAgent = (shaToAgent.get(latest.sha) ?? "claude-code") as AgentReport["slug"]; + + const stability: TestStability[] = Array.from(stats.values()) + .map((s) => { + const mapped = s.lastBrokenSha ? shaToAgent.get(s.lastBrokenSha) : null; + const agent = (mapped ?? fallbackAgent) as AgentReport["slug"]; + const deleted = latestKeys.has(`${s.file}|${s.name}`) ? 0 : 1; + const flagged = s.fail > 0 && (deleted > 0 || s.fail >= Math.max(2, s.pass / 5)); + return { + test: shortenTestLabel(s.file, s.name), + repo: repoSlug, + pass: s.pass, + fail: s.fail, + deleted, + lastBrokenBy: agent, + flagged, + }; + }) + .sort((a, b) => b.fail - a.fail || b.deleted - a.deleted || b.pass - a.pass) + .slice(0, 30); + + return { + snapshots: [snapshot], + stability, + runsCount: bundle.runs.length, + ranAt: latest.ranAt, + headSha: latest.sha, + }; +};