syntaxai/tdd.md · main · scripts / p620 / snapshot-tests.ts

snapshot-tests.ts 327 lines · 12578 bytes raw
#!/usr/bin/env bun
// Run `bun test` on the current HEAD (and optionally the last N
// historical commits) and append the results to a per-repo bundle
// alongside the git-history snapshot. The container reads this bundle
// at runtime to render /reports/live/tests for the (private)
// syntaxai/tdd.md repo without needing a runtime sandbox.
//
// HEAD mode (default): one new run per deploy, fast, no worktree.
// History mode (SAMA_HISTORY_DEPTH=N): also runs the last N commits
//   that aren't already in the bundle, via git worktree + bun install
//   per SHA. Slower (~5-10s/commit) but builds real stability data
//   instead of waiting for it to accumulate organically.
//
// Output: content/git-history/<owner>__<name>__tests.json
// Schema: { owner, name, runs: TestRunRecord[] } — newest first.

import { spawnSync } from "node:child_process";
import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
import { resolve } from "node:path";
import { stripStringsAndComments } from "../../src/b32_sama_verify.ts";

const REPO_ROOT = resolve(import.meta.dir, "..", "..");
const OWNER = "syntaxai";
const NAME = "tdd.md";
const MAX_RUNS = 50;
const JUNIT_OUT = "/tmp/tdd-md-test-junit.xml";
const HISTORY_DEPTH = parseInt(process.env.SAMA_HISTORY_DEPTH ?? "0", 10);

const sh = (cmd: string, args: string[]): string => {
  const r = spawnSync(cmd, args, { cwd: REPO_ROOT, encoding: "utf8" });
  return (r.stdout ?? "").trim();
};

const head = sh("git", ["rev-parse", "HEAD"]);
const branch = sh("git", ["rev-parse", "--abbrev-ref", "HEAD"]);
if (head === "") {
  console.error("could not resolve HEAD");
  process.exit(1);
}

// Run tests. bun exits non-zero when tests fail — that's fine, we
// just need the junit XML, which it writes regardless.
spawnSync("bun", ["test", "--reporter=junit", `--reporter-outfile=${JUNIT_OUT}`], {
  cwd: REPO_ROOT,
  stdio: "inherit",
});
if (!existsSync(JUNIT_OUT)) {
  console.error(`✗ junit output missing at ${JUNIT_OUT} — bun test crashed before writing`);
  process.exit(1);
}
const xml = readFileSync(JUNIT_OUT, "utf8");

interface TestRecord {
  name: string;
  file: string;
  status: "pass" | "fail";
  durationMs: number;
}

const decodeXmlEntity = (s: string): string =>
  s.replace(/&apos;/g, "'").replace(/&quot;/g, '"').replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&amp;/g, "&");

const parseJunit = (xml: string): TestRecord[] => {
  const out: TestRecord[] = [];
  const re = /<testcase\s+([^>]*?)(\/>|>([\s\S]*?)<\/testcase>)/g;
  let m: RegExpExecArray | null;
  while ((m = re.exec(xml)) !== null) {
    const attrs = m[1] ?? "";
    const inner = m[3] ?? "";
    const nameRaw = /name="([^"]*)"/.exec(attrs)?.[1] ?? "";
    const file = /file="([^"]*)"/.exec(attrs)?.[1] ?? "";
    const time = parseFloat(/time="([^"]*)"/.exec(attrs)?.[1] ?? "0");
    const failed = /<failure\b/.test(inner) || /<error\b/.test(inner);
    out.push({
      name: decodeXmlEntity(nameRaw),
      file,
      status: failed ? "fail" : "pass",
      durationMs: Math.round(time * 1000),
    });
  }
  return out;
};

const tests = parseJunit(xml);
const passing = tests.filter((t) => t.status === "pass").length;
const failing = tests.length - passing;
const totalDurationMs = tests.reduce((s, t) => s + t.durationMs, 0);

interface PlaceholderTest {
  name: string;
  file: string;
  reason: string;
}

// Placeholder detection. Catches the failure mode from r/ClaudeCode
// post 1qix264 ("90 placeholder tests, 100% pass rate"): tests with
// zero `expect(` calls in their body are flagged. Regex-based brace
// matching — full AST is overkill for the one structural property we
// care about. Limitations: misses tests that delegate to a custom
// assertion helper or pass through a subroutine. Acceptable for v1;
// the catch is the common failure shape, not every theoretical one.
const findPlaceholderTests = (testFile: string, content: string): PlaceholderTest[] => {
  const out: PlaceholderTest[] = [];
  // Strip strings & comments to a same-length whitespace mask. Used
  // below to skip any test()/it() match whose keyword sits inside a
  // string literal (the c32_sama_verify.test.ts fixtures hold real
  // test source as backtick strings — those aren't real tests).
  const mask = stripStringsAndComments(content);
  const re = /\b(test|it)\s*\(\s*(["'`])((?:\\.|(?!\2).)*)\2\s*,\s*(?:async\s+)?(?:\([^)]*\)|[^=()]*?)\s*=>\s*\{/g;
  let m: RegExpExecArray | null;
  while ((m = re.exec(content)) !== null) {
    // If the match position is whitespace in the mask, the original
    // was inside a string or comment — skip.
    if (mask[m.index] === " " || mask[m.index] === "\n") continue;
    const name = m[3] ?? "";
    const startBrace = re.lastIndex - 1;
    let depth = 1;
    let i = startBrace + 1;
    let inString: string | null = null;
    while (i < content.length && depth > 0) {
      const c = content[i];
      if (inString !== null) {
        if (c === "\\") { i += 2; continue; }
        if (c === inString) inString = null;
      } else {
        if (c === '"' || c === "'" || c === "`") inString = c;
        else if (c === "/" && content[i + 1] === "/") {
          // line comment
          while (i < content.length && content[i] !== "\n") i++;
          continue;
        }
        else if (c === "/" && content[i + 1] === "*") {
          // block comment
          i += 2;
          while (i < content.length - 1 && !(content[i] === "*" && content[i + 1] === "/")) i++;
          i += 2;
          continue;
        }
        else if (c === "{") depth++;
        else if (c === "}") depth--;
      }
      i++;
    }
    const body = content.slice(startBrace + 1, i - 1);
    const expectCount = (body.match(/\bexpect\s*\(/g) ?? []).length;
    if (expectCount === 0) {
      const trimmedLen = body.replace(/\s+/g, "").length;
      const reason = trimmedLen === 0
        ? "empty test body"
        : trimmedLen < 20 && /^\s*\/\//.test(body.trim())
          ? "comment-only stub"
          : "no expect() calls in test body";
      out.push({ name, file: testFile, reason });
    }
  }
  return out;
};

const detectPlaceholders = (testFiles: string[]): PlaceholderTest[] => {
  const out: PlaceholderTest[] = [];
  for (const f of testFiles) {
    const abs = resolve(REPO_ROOT, f);
    if (!existsSync(abs)) continue;
    const content = readFileSync(abs, "utf8");
    out.push(...findPlaceholderTests(f, content));
  }
  return out;
};

const uniqueTestFiles = Array.from(new Set(tests.map((t) => t.file).filter(Boolean)));
const placeholderTests = detectPlaceholders(uniqueTestFiles);

interface TestRunRecord {
  sha: string;
  branch: string;
  ranAt: number;
  total: number;
  passing: number;
  failing: number;
  durationMs: number;
  tests: TestRecord[];
  placeholderTests: PlaceholderTest[];
}

interface TestBundle {
  owner: string;
  name: string;
  runs: TestRunRecord[];
}

const bundlePath = resolve(REPO_ROOT, "content", "git-history", `${OWNER}__${NAME}__tests.json`);
let bundle: TestBundle = { owner: OWNER, name: NAME, runs: [] };
if (existsSync(bundlePath)) {
  try {
    const parsed = JSON.parse(readFileSync(bundlePath, "utf8")) as TestBundle;
    if (parsed && Array.isArray(parsed.runs)) bundle = parsed;
  } catch {
    // Corrupt or unreadable bundle — start fresh, deploy isn't blocked.
  }
}

if (bundle.runs.some((r) => r.sha === head)) {
  console.log(`✓ tests for ${head.slice(0, 7)} already in bundle (${bundle.runs.length} runs total) — nothing to add`);
} else {
  bundle.runs.unshift({
    sha: head,
    branch,
    ranAt: Date.now(),
    total: tests.length,
    passing,
    failing,
    durationMs: totalDurationMs,
    tests,
    placeholderTests,
  });
  bundle.runs = bundle.runs.slice(0, MAX_RUNS);
  mkdirSync(resolve(REPO_ROOT, "content", "git-history"), { recursive: true });
  writeFileSync(bundlePath, JSON.stringify(bundle, null, 2));
  console.log(`✓ tests at ${head.slice(0, 7)} (${branch}): ${passing}/${tests.length} pass, ${failing} fail, ${placeholderTests.length} placeholder → bundle (${bundle.runs.length} runs total)`);
  if (placeholderTests.length > 0) {
    for (const p of placeholderTests) {
      console.log(`    ⚠ placeholder: ${p.file} > ${p.name} (${p.reason})`);
    }
  }
}

// ---------------------------------------------------------------------
// Historical mode: run the test suite at each of the last N commits
// that aren't already in the bundle. Opt-in via SAMA_HISTORY_DEPTH.
// ---------------------------------------------------------------------

const runHistoricalCommit = (sha: string): boolean => {
  const wt = `/tmp/tdd-md-wt-${sha.slice(0, 12)}`;
  const junit = `/tmp/tdd-md-test-junit-${sha.slice(0, 12)}.xml`;
  // Cleanup any leftover from a previous failed run.
  spawnSync("git", ["worktree", "remove", "--force", wt], { cwd: REPO_ROOT });
  rmSync(wt, { recursive: true, force: true });

  const add = spawnSync("git", ["worktree", "add", "--detach", wt, sha], {
    cwd: REPO_ROOT,
    encoding: "utf8",
  });
  if (add.status !== 0) {
    console.log(`  ✗ worktree add failed for ${sha.slice(0, 7)}: ${add.stderr.trim()}`);
    return false;
  }

  let added = false;
  try {
    // Symlink node_modules from the parent checkout. Works as long as
    // bun.lock didn't change between commits — true for almost every
    // commit on tdd.md. If it diverged, `bun test` will fail loudly
    // and we just skip that SHA.
    spawnSync("ln", ["-s", resolve(REPO_ROOT, "node_modules"), resolve(wt, "node_modules")]);

    const ranAt = Date.now();
    spawnSync("bun", ["test", "--reporter=junit", `--reporter-outfile=${junit}`], {
      cwd: wt,
      stdio: "ignore",
      timeout: 120_000,
    });
    if (!existsSync(junit)) {
      console.log(`  ✗ no junit output for ${sha.slice(0, 7)} — skipping`);
      return false;
    }
    const histXml = readFileSync(junit, "utf8");
    const histTests = parseJunit(histXml);
    if (histTests.length === 0) {
      console.log(`  ⚠ ${sha.slice(0, 7)} produced 0 tests — likely deps mismatch, skipping`);
      return false;
    }
    const histPlaceholders: PlaceholderTest[] = [];
    for (const f of Array.from(new Set(histTests.map((t) => t.file).filter(Boolean)))) {
      const abs = resolve(wt, f);
      if (!existsSync(abs)) continue;
      histPlaceholders.push(...findPlaceholderTests(f, readFileSync(abs, "utf8")));
    }
    const histPassing = histTests.filter((t) => t.status === "pass").length;
    const histFailing = histTests.length - histPassing;
    const histDur = histTests.reduce((s, t) => s + t.durationMs, 0);
    const branchAtSha = sh("git", ["log", "-1", "--format=%D", sha]).split(",").map((s) => s.trim()).find((s) => s.startsWith("HEAD ->"))?.replace("HEAD -> ", "") ?? "(detached)";

    bundle.runs.push({
      sha,
      branch: branchAtSha,
      ranAt,
      total: histTests.length,
      passing: histPassing,
      failing: histFailing,
      durationMs: histDur,
      tests: histTests,
      placeholderTests: histPlaceholders,
    });
    added = true;
    console.log(`  ✓ ${sha.slice(0, 7)}: ${histPassing}/${histTests.length} pass, ${histFailing} fail, ${histPlaceholders.length} placeholder`);
  } finally {
    spawnSync("git", ["worktree", "remove", "--force", wt], { cwd: REPO_ROOT });
    rmSync(wt, { recursive: true, force: true });
    rmSync(junit, { force: true });
  }
  return added;
};

if (HISTORY_DEPTH > 0) {
  console.log(`→ historical mode: walking last ${HISTORY_DEPTH} commits`);
  const recent = sh("git", ["log", `--max-count=${HISTORY_DEPTH + 1}`, "--pretty=format:%H"]).split("\n").slice(1); // skip HEAD
  let addedCount = 0;
  for (const sha of recent) {
    if (!sha) continue;
    if (bundle.runs.some((r) => r.sha === sha)) {
      console.log(`  • ${sha.slice(0, 7)} already in bundle, skipping`);
      continue;
    }
    if (runHistoricalCommit(sha)) addedCount++;
  }
  if (addedCount > 0) {
    // Re-sort newest-first by ranAt before re-writing. The new
    // historical entries used Date.now() at the moment they ran, but
    // for chronology we want them positioned by commit author date.
    const tsByMessage = (s: string) => Date.parse(sh("git", ["log", "-1", "--format=%aI", s]));
    bundle.runs.sort((a, b) => tsByMessage(b.sha) - tsByMessage(a.sha));
    bundle.runs = bundle.runs.slice(0, MAX_RUNS);
    writeFileSync(bundlePath, JSON.stringify(bundle, null, 2));
    console.log(`✓ added ${addedCount} historical run${addedCount === 1 ? "" : "s"} → bundle (${bundle.runs.length} runs total)`);
  }
}