syntaxai/tdd.md · main · scripts / p620 / snapshot-tests.ts
#!/usr/bin/env bun
// Run `bun test` on the current HEAD (and optionally the last N
// historical commits) and append the results to a per-repo bundle
// alongside the git-history snapshot. The container reads this bundle
// at runtime to render /reports/live/tests for the (private)
// syntaxai/tdd.md repo without needing a runtime sandbox.
//
// HEAD mode (default): one new run per deploy, fast, no worktree.
// History mode (SAMA_HISTORY_DEPTH=N): also runs the last N commits
// that aren't already in the bundle, via git worktree + bun install
// per SHA. Slower (~5-10s/commit) but builds real stability data
// instead of waiting for it to accumulate organically.
//
// Output: content/git-history/<owner>__<name>__tests.json
// Schema: { owner, name, runs: TestRunRecord[] } — newest first.
import { spawnSync } from "node:child_process";
import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
import { resolve } from "node:path";
import { stripStringsAndComments } from "../../src/b32_sama_verify.ts";
const REPO_ROOT = resolve(import.meta.dir, "..", "..");
const OWNER = "syntaxai";
const NAME = "tdd.md";
const MAX_RUNS = 50;
const JUNIT_OUT = "/tmp/tdd-md-test-junit.xml";
const HISTORY_DEPTH = parseInt(process.env.SAMA_HISTORY_DEPTH ?? "0", 10);
const sh = (cmd: string, args: string[]): string => {
const r = spawnSync(cmd, args, { cwd: REPO_ROOT, encoding: "utf8" });
return (r.stdout ?? "").trim();
};
const head = sh("git", ["rev-parse", "HEAD"]);
const branch = sh("git", ["rev-parse", "--abbrev-ref", "HEAD"]);
if (head === "") {
console.error("could not resolve HEAD");
process.exit(1);
}
// Run tests. bun exits non-zero when tests fail — that's fine, we
// just need the junit XML, which it writes regardless.
spawnSync("bun", ["test", "--reporter=junit", `--reporter-outfile=${JUNIT_OUT}`], {
cwd: REPO_ROOT,
stdio: "inherit",
});
if (!existsSync(JUNIT_OUT)) {
console.error(`✗ junit output missing at ${JUNIT_OUT} — bun test crashed before writing`);
process.exit(1);
}
const xml = readFileSync(JUNIT_OUT, "utf8");
interface TestRecord {
name: string;
file: string;
status: "pass" | "fail";
durationMs: number;
}
const decodeXmlEntity = (s: string): string =>
s.replace(/'/g, "'").replace(/"/g, '"').replace(/</g, "<").replace(/>/g, ">").replace(/&/g, "&");
const parseJunit = (xml: string): TestRecord[] => {
const out: TestRecord[] = [];
const re = /<testcase\s+([^>]*?)(\/>|>([\s\S]*?)<\/testcase>)/g;
let m: RegExpExecArray | null;
while ((m = re.exec(xml)) !== null) {
const attrs = m[1] ?? "";
const inner = m[3] ?? "";
const nameRaw = /name="([^"]*)"/.exec(attrs)?.[1] ?? "";
const file = /file="([^"]*)"/.exec(attrs)?.[1] ?? "";
const time = parseFloat(/time="([^"]*)"/.exec(attrs)?.[1] ?? "0");
const failed = /<failure\b/.test(inner) || /<error\b/.test(inner);
out.push({
name: decodeXmlEntity(nameRaw),
file,
status: failed ? "fail" : "pass",
durationMs: Math.round(time * 1000),
});
}
return out;
};
const tests = parseJunit(xml);
const passing = tests.filter((t) => t.status === "pass").length;
const failing = tests.length - passing;
const totalDurationMs = tests.reduce((s, t) => s + t.durationMs, 0);
interface PlaceholderTest {
name: string;
file: string;
reason: string;
}
// Placeholder detection. Catches the failure mode from r/ClaudeCode
// post 1qix264 ("90 placeholder tests, 100% pass rate"): tests with
// zero `expect(` calls in their body are flagged. Regex-based brace
// matching — full AST is overkill for the one structural property we
// care about. Limitations: misses tests that delegate to a custom
// assertion helper or pass through a subroutine. Acceptable for v1;
// the catch is the common failure shape, not every theoretical one.
const findPlaceholderTests = (testFile: string, content: string): PlaceholderTest[] => {
const out: PlaceholderTest[] = [];
// Strip strings & comments to a same-length whitespace mask. Used
// below to skip any test()/it() match whose keyword sits inside a
// string literal (the c32_sama_verify.test.ts fixtures hold real
// test source as backtick strings — those aren't real tests).
const mask = stripStringsAndComments(content);
const re = /\b(test|it)\s*\(\s*(["'`])((?:\\.|(?!\2).)*)\2\s*,\s*(?:async\s+)?(?:\([^)]*\)|[^=()]*?)\s*=>\s*\{/g;
let m: RegExpExecArray | null;
while ((m = re.exec(content)) !== null) {
// If the match position is whitespace in the mask, the original
// was inside a string or comment — skip.
if (mask[m.index] === " " || mask[m.index] === "\n") continue;
const name = m[3] ?? "";
const startBrace = re.lastIndex - 1;
let depth = 1;
let i = startBrace + 1;
let inString: string | null = null;
while (i < content.length && depth > 0) {
const c = content[i];
if (inString !== null) {
if (c === "\\") { i += 2; continue; }
if (c === inString) inString = null;
} else {
if (c === '"' || c === "'" || c === "`") inString = c;
else if (c === "/" && content[i + 1] === "/") {
// line comment
while (i < content.length && content[i] !== "\n") i++;
continue;
}
else if (c === "/" && content[i + 1] === "*") {
// block comment
i += 2;
while (i < content.length - 1 && !(content[i] === "*" && content[i + 1] === "/")) i++;
i += 2;
continue;
}
else if (c === "{") depth++;
else if (c === "}") depth--;
}
i++;
}
const body = content.slice(startBrace + 1, i - 1);
const expectCount = (body.match(/\bexpect\s*\(/g) ?? []).length;
if (expectCount === 0) {
const trimmedLen = body.replace(/\s+/g, "").length;
const reason = trimmedLen === 0
? "empty test body"
: trimmedLen < 20 && /^\s*\/\//.test(body.trim())
? "comment-only stub"
: "no expect() calls in test body";
out.push({ name, file: testFile, reason });
}
}
return out;
};
const detectPlaceholders = (testFiles: string[]): PlaceholderTest[] => {
const out: PlaceholderTest[] = [];
for (const f of testFiles) {
const abs = resolve(REPO_ROOT, f);
if (!existsSync(abs)) continue;
const content = readFileSync(abs, "utf8");
out.push(...findPlaceholderTests(f, content));
}
return out;
};
const uniqueTestFiles = Array.from(new Set(tests.map((t) => t.file).filter(Boolean)));
const placeholderTests = detectPlaceholders(uniqueTestFiles);
interface TestRunRecord {
sha: string;
branch: string;
ranAt: number;
total: number;
passing: number;
failing: number;
durationMs: number;
tests: TestRecord[];
placeholderTests: PlaceholderTest[];
}
interface TestBundle {
owner: string;
name: string;
runs: TestRunRecord[];
}
const bundlePath = resolve(REPO_ROOT, "content", "git-history", `${OWNER}__${NAME}__tests.json`);
let bundle: TestBundle = { owner: OWNER, name: NAME, runs: [] };
if (existsSync(bundlePath)) {
try {
const parsed = JSON.parse(readFileSync(bundlePath, "utf8")) as TestBundle;
if (parsed && Array.isArray(parsed.runs)) bundle = parsed;
} catch {
// Corrupt or unreadable bundle — start fresh, deploy isn't blocked.
}
}
if (bundle.runs.some((r) => r.sha === head)) {
console.log(`✓ tests for ${head.slice(0, 7)} already in bundle (${bundle.runs.length} runs total) — nothing to add`);
} else {
bundle.runs.unshift({
sha: head,
branch,
ranAt: Date.now(),
total: tests.length,
passing,
failing,
durationMs: totalDurationMs,
tests,
placeholderTests,
});
bundle.runs = bundle.runs.slice(0, MAX_RUNS);
mkdirSync(resolve(REPO_ROOT, "content", "git-history"), { recursive: true });
writeFileSync(bundlePath, JSON.stringify(bundle, null, 2));
console.log(`✓ tests at ${head.slice(0, 7)} (${branch}): ${passing}/${tests.length} pass, ${failing} fail, ${placeholderTests.length} placeholder → bundle (${bundle.runs.length} runs total)`);
if (placeholderTests.length > 0) {
for (const p of placeholderTests) {
console.log(` ⚠ placeholder: ${p.file} > ${p.name} (${p.reason})`);
}
}
}
// ---------------------------------------------------------------------
// Historical mode: run the test suite at each of the last N commits
// that aren't already in the bundle. Opt-in via SAMA_HISTORY_DEPTH.
// ---------------------------------------------------------------------
const runHistoricalCommit = (sha: string): boolean => {
const wt = `/tmp/tdd-md-wt-${sha.slice(0, 12)}`;
const junit = `/tmp/tdd-md-test-junit-${sha.slice(0, 12)}.xml`;
// Cleanup any leftover from a previous failed run.
spawnSync("git", ["worktree", "remove", "--force", wt], { cwd: REPO_ROOT });
rmSync(wt, { recursive: true, force: true });
const add = spawnSync("git", ["worktree", "add", "--detach", wt, sha], {
cwd: REPO_ROOT,
encoding: "utf8",
});
if (add.status !== 0) {
console.log(` ✗ worktree add failed for ${sha.slice(0, 7)}: ${add.stderr.trim()}`);
return false;
}
let added = false;
try {
// Symlink node_modules from the parent checkout. Works as long as
// bun.lock didn't change between commits — true for almost every
// commit on tdd.md. If it diverged, `bun test` will fail loudly
// and we just skip that SHA.
spawnSync("ln", ["-s", resolve(REPO_ROOT, "node_modules"), resolve(wt, "node_modules")]);
const ranAt = Date.now();
spawnSync("bun", ["test", "--reporter=junit", `--reporter-outfile=${junit}`], {
cwd: wt,
stdio: "ignore",
timeout: 120_000,
});
if (!existsSync(junit)) {
console.log(` ✗ no junit output for ${sha.slice(0, 7)} — skipping`);
return false;
}
const histXml = readFileSync(junit, "utf8");
const histTests = parseJunit(histXml);
if (histTests.length === 0) {
console.log(` ⚠ ${sha.slice(0, 7)} produced 0 tests — likely deps mismatch, skipping`);
return false;
}
const histPlaceholders: PlaceholderTest[] = [];
for (const f of Array.from(new Set(histTests.map((t) => t.file).filter(Boolean)))) {
const abs = resolve(wt, f);
if (!existsSync(abs)) continue;
histPlaceholders.push(...findPlaceholderTests(f, readFileSync(abs, "utf8")));
}
const histPassing = histTests.filter((t) => t.status === "pass").length;
const histFailing = histTests.length - histPassing;
const histDur = histTests.reduce((s, t) => s + t.durationMs, 0);
const branchAtSha = sh("git", ["log", "-1", "--format=%D", sha]).split(",").map((s) => s.trim()).find((s) => s.startsWith("HEAD ->"))?.replace("HEAD -> ", "") ?? "(detached)";
bundle.runs.push({
sha,
branch: branchAtSha,
ranAt,
total: histTests.length,
passing: histPassing,
failing: histFailing,
durationMs: histDur,
tests: histTests,
placeholderTests: histPlaceholders,
});
added = true;
console.log(` ✓ ${sha.slice(0, 7)}: ${histPassing}/${histTests.length} pass, ${histFailing} fail, ${histPlaceholders.length} placeholder`);
} finally {
spawnSync("git", ["worktree", "remove", "--force", wt], { cwd: REPO_ROOT });
rmSync(wt, { recursive: true, force: true });
rmSync(junit, { force: true });
}
return added;
};
if (HISTORY_DEPTH > 0) {
console.log(`→ historical mode: walking last ${HISTORY_DEPTH} commits`);
const recent = sh("git", ["log", `--max-count=${HISTORY_DEPTH + 1}`, "--pretty=format:%H"]).split("\n").slice(1); // skip HEAD
let addedCount = 0;
for (const sha of recent) {
if (!sha) continue;
if (bundle.runs.some((r) => r.sha === sha)) {
console.log(` • ${sha.slice(0, 7)} already in bundle, skipping`);
continue;
}
if (runHistoricalCommit(sha)) addedCount++;
}
if (addedCount > 0) {
// Re-sort newest-first by ranAt before re-writing. The new
// historical entries used Date.now() at the moment they ran, but
// for chronology we want them positioned by commit author date.
const tsByMessage = (s: string) => Date.parse(sh("git", ["log", "-1", "--format=%aI", s]));
bundle.runs.sort((a, b) => tsByMessage(b.sha) - tsByMessage(a.sha));
bundle.runs = bundle.runs.slice(0, MAX_RUNS);
writeFileSync(bundlePath, JSON.stringify(bundle, null, 2));
console.log(`✓ added ${addedCount} historical run${addedCount === 1 ? "" : "s"} → bundle (${bundle.runs.length} runs total)`);
}
}