#!/usr/bin/env bun // Run `bun test` on the current HEAD (and optionally the last N // historical commits) and append the results to a per-repo bundle // alongside the git-history snapshot. The container reads this bundle // at runtime to render /reports/live/tests for the (private) // syntaxai/tdd.md repo without needing a runtime sandbox. // // HEAD mode (default): one new run per deploy, fast, no worktree. // History mode (SAMA_HISTORY_DEPTH=N): also runs the last N commits // that aren't already in the bundle, via git worktree + bun install // per SHA. Slower (~5-10s/commit) but builds real stability data // instead of waiting for it to accumulate organically. // // Output: content/git-history/____tests.json // Schema: { owner, name, runs: TestRunRecord[] } — newest first. import { spawnSync } from "node:child_process"; import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; import { resolve } from "node:path"; import { stripStringsAndComments } from "../../src/b32_sama_verify.ts"; const REPO_ROOT = resolve(import.meta.dir, "..", ".."); const OWNER = "syntaxai"; const NAME = "tdd.md"; const MAX_RUNS = 50; const JUNIT_OUT = "/tmp/tdd-md-test-junit.xml"; const HISTORY_DEPTH = parseInt(process.env.SAMA_HISTORY_DEPTH ?? "0", 10); const sh = (cmd: string, args: string[]): string => { const r = spawnSync(cmd, args, { cwd: REPO_ROOT, encoding: "utf8" }); return (r.stdout ?? "").trim(); }; const head = sh("git", ["rev-parse", "HEAD"]); const branch = sh("git", ["rev-parse", "--abbrev-ref", "HEAD"]); if (head === "") { console.error("could not resolve HEAD"); process.exit(1); } // Run tests. bun exits non-zero when tests fail — that's fine, we // just need the junit XML, which it writes regardless. spawnSync("bun", ["test", "--reporter=junit", `--reporter-outfile=${JUNIT_OUT}`], { cwd: REPO_ROOT, stdio: "inherit", }); if (!existsSync(JUNIT_OUT)) { console.error(`✗ junit output missing at ${JUNIT_OUT} — bun test crashed before writing`); process.exit(1); } const xml = readFileSync(JUNIT_OUT, "utf8"); interface TestRecord { name: string; file: string; status: "pass" | "fail"; durationMs: number; } const decodeXmlEntity = (s: string): string => s.replace(/'/g, "'").replace(/"/g, '"').replace(/</g, "<").replace(/>/g, ">").replace(/&/g, "&"); const parseJunit = (xml: string): TestRecord[] => { const out: TestRecord[] = []; const re = /]*?)(\/>|>([\s\S]*?)<\/testcase>)/g; let m: RegExpExecArray | null; while ((m = re.exec(xml)) !== null) { const attrs = m[1] ?? ""; const inner = m[3] ?? ""; const nameRaw = /name="([^"]*)"/.exec(attrs)?.[1] ?? ""; const file = /file="([^"]*)"/.exec(attrs)?.[1] ?? ""; const time = parseFloat(/time="([^"]*)"/.exec(attrs)?.[1] ?? "0"); const failed = / t.status === "pass").length; const failing = tests.length - passing; const totalDurationMs = tests.reduce((s, t) => s + t.durationMs, 0); interface PlaceholderTest { name: string; file: string; reason: string; } // Placeholder detection. Catches the failure mode from r/ClaudeCode // post 1qix264 ("90 placeholder tests, 100% pass rate"): tests with // zero `expect(` calls in their body are flagged. Regex-based brace // matching — full AST is overkill for the one structural property we // care about. Limitations: misses tests that delegate to a custom // assertion helper or pass through a subroutine. Acceptable for v1; // the catch is the common failure shape, not every theoretical one. const findPlaceholderTests = (testFile: string, content: string): PlaceholderTest[] => { const out: PlaceholderTest[] = []; // Strip strings & comments to a same-length whitespace mask. Used // below to skip any test()/it() match whose keyword sits inside a // string literal (the c32_sama_verify.test.ts fixtures hold real // test source as backtick strings — those aren't real tests). const mask = stripStringsAndComments(content); const re = /\b(test|it)\s*\(\s*(["'`])((?:\\.|(?!\2).)*)\2\s*,\s*(?:async\s+)?(?:\([^)]*\)|[^=()]*?)\s*=>\s*\{/g; let m: RegExpExecArray | null; while ((m = re.exec(content)) !== null) { // If the match position is whitespace in the mask, the original // was inside a string or comment — skip. if (mask[m.index] === " " || mask[m.index] === "\n") continue; const name = m[3] ?? ""; const startBrace = re.lastIndex - 1; let depth = 1; let i = startBrace + 1; let inString: string | null = null; while (i < content.length && depth > 0) { const c = content[i]; if (inString !== null) { if (c === "\\") { i += 2; continue; } if (c === inString) inString = null; } else { if (c === '"' || c === "'" || c === "`") inString = c; else if (c === "/" && content[i + 1] === "/") { // line comment while (i < content.length && content[i] !== "\n") i++; continue; } else if (c === "/" && content[i + 1] === "*") { // block comment i += 2; while (i < content.length - 1 && !(content[i] === "*" && content[i + 1] === "/")) i++; i += 2; continue; } else if (c === "{") depth++; else if (c === "}") depth--; } i++; } const body = content.slice(startBrace + 1, i - 1); const expectCount = (body.match(/\bexpect\s*\(/g) ?? []).length; if (expectCount === 0) { const trimmedLen = body.replace(/\s+/g, "").length; const reason = trimmedLen === 0 ? "empty test body" : trimmedLen < 20 && /^\s*\/\//.test(body.trim()) ? "comment-only stub" : "no expect() calls in test body"; out.push({ name, file: testFile, reason }); } } return out; }; const detectPlaceholders = (testFiles: string[]): PlaceholderTest[] => { const out: PlaceholderTest[] = []; for (const f of testFiles) { const abs = resolve(REPO_ROOT, f); if (!existsSync(abs)) continue; const content = readFileSync(abs, "utf8"); out.push(...findPlaceholderTests(f, content)); } return out; }; const uniqueTestFiles = Array.from(new Set(tests.map((t) => t.file).filter(Boolean))); const placeholderTests = detectPlaceholders(uniqueTestFiles); interface TestRunRecord { sha: string; branch: string; ranAt: number; total: number; passing: number; failing: number; durationMs: number; tests: TestRecord[]; placeholderTests: PlaceholderTest[]; } interface TestBundle { owner: string; name: string; runs: TestRunRecord[]; } const bundlePath = resolve(REPO_ROOT, "content", "git-history", `${OWNER}__${NAME}__tests.json`); let bundle: TestBundle = { owner: OWNER, name: NAME, runs: [] }; if (existsSync(bundlePath)) { try { const parsed = JSON.parse(readFileSync(bundlePath, "utf8")) as TestBundle; if (parsed && Array.isArray(parsed.runs)) bundle = parsed; } catch { // Corrupt or unreadable bundle — start fresh, deploy isn't blocked. } } if (bundle.runs.some((r) => r.sha === head)) { console.log(`✓ tests for ${head.slice(0, 7)} already in bundle (${bundle.runs.length} runs total) — nothing to add`); } else { bundle.runs.unshift({ sha: head, branch, ranAt: Date.now(), total: tests.length, passing, failing, durationMs: totalDurationMs, tests, placeholderTests, }); bundle.runs = bundle.runs.slice(0, MAX_RUNS); mkdirSync(resolve(REPO_ROOT, "content", "git-history"), { recursive: true }); writeFileSync(bundlePath, JSON.stringify(bundle, null, 2)); console.log(`✓ tests at ${head.slice(0, 7)} (${branch}): ${passing}/${tests.length} pass, ${failing} fail, ${placeholderTests.length} placeholder → bundle (${bundle.runs.length} runs total)`); if (placeholderTests.length > 0) { for (const p of placeholderTests) { console.log(` ⚠ placeholder: ${p.file} > ${p.name} (${p.reason})`); } } } // --------------------------------------------------------------------- // Historical mode: run the test suite at each of the last N commits // that aren't already in the bundle. Opt-in via SAMA_HISTORY_DEPTH. // --------------------------------------------------------------------- const runHistoricalCommit = (sha: string): boolean => { const wt = `/tmp/tdd-md-wt-${sha.slice(0, 12)}`; const junit = `/tmp/tdd-md-test-junit-${sha.slice(0, 12)}.xml`; // Cleanup any leftover from a previous failed run. spawnSync("git", ["worktree", "remove", "--force", wt], { cwd: REPO_ROOT }); rmSync(wt, { recursive: true, force: true }); const add = spawnSync("git", ["worktree", "add", "--detach", wt, sha], { cwd: REPO_ROOT, encoding: "utf8", }); if (add.status !== 0) { console.log(` ✗ worktree add failed for ${sha.slice(0, 7)}: ${add.stderr.trim()}`); return false; } let added = false; try { // Symlink node_modules from the parent checkout. Works as long as // bun.lock didn't change between commits — true for almost every // commit on tdd.md. If it diverged, `bun test` will fail loudly // and we just skip that SHA. spawnSync("ln", ["-s", resolve(REPO_ROOT, "node_modules"), resolve(wt, "node_modules")]); const ranAt = Date.now(); spawnSync("bun", ["test", "--reporter=junit", `--reporter-outfile=${junit}`], { cwd: wt, stdio: "ignore", timeout: 120_000, }); if (!existsSync(junit)) { console.log(` ✗ no junit output for ${sha.slice(0, 7)} — skipping`); return false; } const histXml = readFileSync(junit, "utf8"); const histTests = parseJunit(histXml); if (histTests.length === 0) { console.log(` ⚠ ${sha.slice(0, 7)} produced 0 tests — likely deps mismatch, skipping`); return false; } const histPlaceholders: PlaceholderTest[] = []; for (const f of Array.from(new Set(histTests.map((t) => t.file).filter(Boolean)))) { const abs = resolve(wt, f); if (!existsSync(abs)) continue; histPlaceholders.push(...findPlaceholderTests(f, readFileSync(abs, "utf8"))); } const histPassing = histTests.filter((t) => t.status === "pass").length; const histFailing = histTests.length - histPassing; const histDur = histTests.reduce((s, t) => s + t.durationMs, 0); const branchAtSha = sh("git", ["log", "-1", "--format=%D", sha]).split(",").map((s) => s.trim()).find((s) => s.startsWith("HEAD ->"))?.replace("HEAD -> ", "") ?? "(detached)"; bundle.runs.push({ sha, branch: branchAtSha, ranAt, total: histTests.length, passing: histPassing, failing: histFailing, durationMs: histDur, tests: histTests, placeholderTests: histPlaceholders, }); added = true; console.log(` ✓ ${sha.slice(0, 7)}: ${histPassing}/${histTests.length} pass, ${histFailing} fail, ${histPlaceholders.length} placeholder`); } finally { spawnSync("git", ["worktree", "remove", "--force", wt], { cwd: REPO_ROOT }); rmSync(wt, { recursive: true, force: true }); rmSync(junit, { force: true }); } return added; }; if (HISTORY_DEPTH > 0) { console.log(`→ historical mode: walking last ${HISTORY_DEPTH} commits`); const recent = sh("git", ["log", `--max-count=${HISTORY_DEPTH + 1}`, "--pretty=format:%H"]).split("\n").slice(1); // skip HEAD let addedCount = 0; for (const sha of recent) { if (!sha) continue; if (bundle.runs.some((r) => r.sha === sha)) { console.log(` • ${sha.slice(0, 7)} already in bundle, skipping`); continue; } if (runHistoricalCommit(sha)) addedCount++; } if (addedCount > 0) { // Re-sort newest-first by ranAt before re-writing. The new // historical entries used Date.now() at the moment they ran, but // for chronology we want them positioned by commit author date. const tsByMessage = (s: string) => Date.parse(sh("git", ["log", "-1", "--format=%aI", s])); bundle.runs.sort((a, b) => tsByMessage(b.sha) - tsByMessage(a.sha)); bundle.runs = bundle.runs.slice(0, MAX_RUNS); writeFileSync(bundlePath, JSON.stringify(bundle, null, 2)); console.log(`✓ added ${addedCount} historical run${addedCount === 1 ? "" : "s"} → bundle (${bundle.runs.length} runs total)`); } }