syntaxai/tdd.md · commit fc54109

SAMA Modeled: sibling tests for c32_session/judge/real_reports/real_tests

Closes #11. Greens the 4 pre-existing Modeled violations the verifier
has flagged for weeks. /sama/verify?repo=syntaxai/tdd.md now lights up
4/4 pillars on the dogfood demo.

The c32 layer's intent is pure logic, so most files already had
unit-testable surfaces; the three that didn't (judge, real_reports,
real_tests) had pure helpers buried behind `const` that I lifted to
`export const`. No behaviour changes — just visibility, so the sibling
test can reach them.

- c32_session.test.ts (24 tests) — parseCookies / timingSafeEqual /
  hmacSha256Hex / sessionCookieHeader / randomHex / signSession +
  verifySession round-trip (incl. forged-sig and expired-cookie
  rejection paths). beforeAll/afterAll around SESSION_SECRET because
  Bun's describe-body runs sync at registration time.
- c32_judge.test.ts (9 tests) — applyMode (strict/pragmatic/learning
  penalty math, positive-delta passthrough); explainRefactor (the two
  branches return distinct, instruction-bearing strings). The full
  judge() orchestrator is git-clone + test-execution and stays
  covered by the kata-runner e2e harness, not unit tests.
- c32_real_reports.test.ts (12 tests) — detectAgent (Claude/Cursor/
  Aider/unknown attribution, case-insensitive); buildTrend (30-day
  daily sparkline boundaries, same-day stacking, out-of-window drop).
- c32_real_tests.test.ts (10 tests) — detectAgent (same shape as
  real_reports but returns null instead of "unknown" — documented in
  the test); shortenTestLabel (basename + name format, edge cases).

Suite: 138 → 193. Verifier: M ✗ 4 → M ✓ pass across 55 files.

Co-Authored-By: Claude Opus 4.7 (1M context) <[email protected]>
author
syntaxai <[email protected]>
date
2026-05-22 13:02:29 +01:00
parent
5eee479
commit
fc541094c929eca718ea2823ebe32cd9f3947ef3

7 files changed · +415 −6

added src/c32_judge.test.ts +69 −0
@@ -0,0 +1,69 @@
1+// Sibling test for c32_judge.ts. The orchestrator itself (judge()) does
2+// git clone + test execution and isn't unit-testable without a real
3+// agent repo; the pure helpers underneath it (applyMode, explainRefactor)
4+// are the structural surface that matters for scoring decisions. Cover
5+// the mode-aware penalty math + the operator-facing explanations here.
6+
7+import { describe, test, expect } from "bun:test";
8+import { applyMode, explainRefactor, judge } from "./c32_judge.ts";
9+
10+describe("c32_judge — applyMode (mode-aware penalty math)", () => {
11+ test("positive deltas pass through unchanged in every mode", () => {
12+ expect(applyMode(10, "strict")).toBe(10);
13+ expect(applyMode(10, "pragmatic")).toBe(10);
14+ expect(applyMode(10, "learning")).toBe(10);
15+ });
16+
17+ test("strict mode keeps the full negative penalty", () => {
18+ expect(applyMode(-20, "strict")).toBe(-20);
19+ expect(applyMode(-5, "strict")).toBe(-5);
20+ });
21+
22+ test("pragmatic mode halves negative deltas (Math.ceil — never below half)", () => {
23+ expect(applyMode(-20, "pragmatic")).toBe(-10);
24+ expect(applyMode(-10, "pragmatic")).toBe(-5);
25+ // -5 / 2 = -2.5 → Math.ceil(-2.5) = -2: the harsher half rounds up
26+ // toward zero, which is the documented "softer score" behaviour.
27+ expect(applyMode(-5, "pragmatic")).toBe(-2);
28+ });
29+
30+ test("learning mode zeroes out every negative delta", () => {
31+ expect(applyMode(-20, "learning")).toBe(0);
32+ expect(applyMode(-5, "learning")).toBe(0);
33+ expect(applyMode(-1, "learning")).toBe(0);
34+ });
35+
36+ test("zero delta is neutral in every mode", () => {
37+ expect(applyMode(0, "strict")).toBe(0);
38+ expect(applyMode(0, "pragmatic")).toBe(0);
39+ expect(applyMode(0, "learning")).toBe(0);
40+ });
41+});
42+
43+describe("c32_judge — explainRefactor", () => {
44+ test("passed=true returns the canonical-refactor explanation", () => {
45+ const s = explainRefactor(true);
46+ expect(s).toContain("stayed green");
47+ expect(s).toMatch(/canonical/i);
48+ });
49+
50+ test("passed=false returns guidance to revert or open a new red→green", () => {
51+ const s = explainRefactor(false);
52+ expect(s).toContain("broke");
53+ expect(s).toMatch(/revert|red→green/);
54+ });
55+
56+ test("the two branches return different strings", () => {
57+ expect(explainRefactor(true)).not.toBe(explainRefactor(false));
58+ });
59+});
60+
61+describe("c32_judge — orchestrator entry point", () => {
62+ test("judge is exported as an async function (Promise-returning)", () => {
63+ expect(typeof judge).toBe("function");
64+ // The orchestrator does git clone + test execution; covering it
65+ // end-to-end needs a real agent repo. A type-level check that the
66+ // shape didn't drift is the documented minimum for this layer.
67+ expect(judge.length).toBe(2);
68+ });
69+});
modified src/c32_judge.ts +2 −2
@@ -38,7 +38,7 @@ const readConfig = async (cwd: string): Promise<TddConfig> => {
3838
3939 // Penalty halving for pragmatic, zeroing for learning. Positive deltas
4040 // are unchanged across modes — earned credit is earned credit.
41-const applyMode = (delta: number, mode: Mode): number => {
41+export const applyMode = (delta: number, mode: Mode): number => {
4242 if (delta >= 0) return delta;
4343 if (mode === "learning") return 0;
4444 if (mode === "pragmatic") return Math.ceil(delta / 2);
@@ -82,7 +82,7 @@ const explainStep = (params: {
8282 }
8383 };
8484
85-const explainRefactor = (passed: boolean): string =>
85+export const explainRefactor = (passed: boolean): string =>
8686 passed
8787 ? "Tests stayed green through the refactor — structural change without behavior change, the canonical refactor."
8888 : "Refactor commit broke at least one test. Either revert the refactor or write a new red→green to capture the changed behavior.";
added src/c32_real_reports.test.ts +101 −0
@@ -0,0 +1,101 @@
1+// Sibling test for c32_real_reports.ts. buildLiveReports itself fans out
2+// to fetchRepoCommits (network) so its end-to-end shape is covered by
3+// the live /reports/live route. The pure helpers underneath — agent
4+// attribution from commit messages, and the 30-day daily sparkline —
5+// are unit-testable here.
6+
7+import { describe, test, expect } from "bun:test";
8+import {
9+ detectAgent,
10+ buildTrend,
11+ buildLiveReports,
12+} from "./c32_real_reports.ts";
13+import type { GithubCommit } from "./c14_github.ts";
14+
15+const mkCommit = (date: string, message = ""): GithubCommit => ({
16+ sha: "0".repeat(40),
17+ commit: {
18+ message,
19+ author: { name: "test", email: "[email protected]", date },
20+ committer: { name: "test", email: "[email protected]", date },
21+ },
22+ author: null,
23+ committer: null,
24+} as unknown as GithubCommit);
25+
26+describe("c32_real_reports — detectAgent", () => {
27+ test("recognises a Claude Code commit via Co-Authored-By: Claude", () => {
28+ expect(detectAgent("Add feature\n\nCo-Authored-By: Claude <noreply>")).toBe("claude-code");
29+ });
30+
31+ test("recognises a Cursor commit", () => {
32+ expect(detectAgent("Fix bug\n\nCo-Authored-By: Cursor <[email protected]>")).toBe("cursor");
33+ });
34+
35+ test("recognises an Aider commit", () => {
36+ expect(detectAgent("Refactor x\n\nCo-Authored-By: aider")).toBe("aider");
37+ });
38+
39+ test("returns unknown when no recognised footer is present", () => {
40+ expect(detectAgent("Just a commit")).toBe("unknown");
41+ expect(detectAgent("")).toBe("unknown");
42+ });
43+
44+ test("the regex is case-insensitive on the agent token", () => {
45+ expect(detectAgent("Co-Authored-By: CLAUDE")).toBe("claude-code");
46+ expect(detectAgent("co-authored-by: CURSOR")).toBe("cursor");
47+ });
48+});
49+
50+describe("c32_real_reports — buildTrend (30-day daily sparkline)", () => {
51+ // Use today (UTC) as the anchor — the function compares against UTC
52+ // midnight, so we need ISO strings that fall on the right days.
53+ const today = new Date();
54+ today.setUTCHours(0, 0, 0, 0);
55+ const iso = (daysAgo: number): string => {
56+ const d = new Date(today.getTime() - daysAgo * 24 * 60 * 60 * 1000);
57+ return d.toISOString();
58+ };
59+
60+ test("returns an array of `days` length", () => {
61+ expect(buildTrend([], 30)).toHaveLength(30);
62+ expect(buildTrend([], 7)).toHaveLength(7);
63+ });
64+
65+ test("empty input flat-lines at zero", () => {
66+ const trend = buildTrend([], 7);
67+ expect(trend.every((n) => n === 0)).toBe(true);
68+ });
69+
70+ test("a single commit today increments the last bucket", () => {
71+ const trend = buildTrend([mkCommit(iso(0))], 7);
72+ expect(trend[trend.length - 1]).toBe(1);
73+ expect(trend.slice(0, -1).every((n) => n === 0)).toBe(true);
74+ });
75+
76+ test("multiple commits on the same day stack in the same bucket", () => {
77+ const trend = buildTrend([mkCommit(iso(0)), mkCommit(iso(0)), mkCommit(iso(0))], 7);
78+ expect(trend[trend.length - 1]).toBe(3);
79+ });
80+
81+ test("commits older than the window are dropped", () => {
82+ const trend = buildTrend([mkCommit(iso(99))], 7);
83+ expect(trend.every((n) => n === 0)).toBe(true);
84+ });
85+
86+ test("a commit `daysAgo` lands at index `days - 1 - daysAgo`", () => {
87+ const trend = buildTrend([mkCommit(iso(2))], 7);
88+ // index 6 = today, 5 = yesterday, 4 = 2 days ago
89+ expect(trend[4]).toBe(1);
90+ });
91+});
92+
93+describe("c32_real_reports — orchestrator entry point", () => {
94+ test("buildLiveReports is exported as an async function", () => {
95+ expect(typeof buildLiveReports).toBe("function");
96+ // End-to-end coverage lives on /reports/live; this is the structural
97+ // smoke that the export shape didn't drift. `.length` counts only
98+ // non-default params (owner, repo) — perPage carries a default.
99+ expect(buildLiveReports.length).toBe(2);
100+ });
101+});
modified src/c32_real_reports.ts +2 −2
@@ -18,7 +18,7 @@ import type {
1818
1919 type LiveAgentSlug = AgentReport["slug"] | "unknown";
2020
21-const detectAgent = (msg: string): LiveAgentSlug => {
21+export const detectAgent = (msg: string): LiveAgentSlug => {
2222 if (/Co-Authored-By:.*Claude/i.test(msg)) return "claude-code";
2323 if (/Co-Authored-By:.*Cursor/i.test(msg)) return "cursor";
2424 if (/Co-Authored-By:.*Aider/i.test(msg)) return "aider";
@@ -34,7 +34,7 @@ const AGENT_NAMES: Record<AgentReport["slug"], string> = {
3434 // 30-day daily commit-count series, oldest → newest. When there are no
3535 // commits in a day, that day's value is 0 — the sparkline still renders
3636 // but flat-lines, which honestly reflects the data.
37-const buildTrend = (commits: GithubCommit[], days = 30): number[] => {
37+export const buildTrend = (commits: GithubCommit[], days = 30): number[] => {
3838 const out = new Array<number>(days).fill(0);
3939 const today = new Date();
4040 today.setUTCHours(0, 0, 0, 0);
added src/c32_real_tests.test.ts +66 −0
@@ -0,0 +1,66 @@
1+// Sibling test for c32_real_tests.ts. buildLiveTestData fans out to
2+// loadTestBundle + fetchRepoCommits (both network/disk) so the
3+// end-to-end is covered by the live /reports/live/tests route. The
4+// pure helpers — agent attribution and the file/name label shortener —
5+// are unit-testable here.
6+
7+import { describe, test, expect } from "bun:test";
8+import {
9+ detectAgent,
10+ shortenTestLabel,
11+ buildLiveTestData,
12+} from "./c32_real_tests.ts";
13+
14+describe("c32_real_tests — detectAgent", () => {
15+ test("recognises Claude Code via Co-Authored-By: Claude", () => {
16+ expect(detectAgent("Add feature\n\nCo-Authored-By: Claude <noreply>")).toBe("claude-code");
17+ });
18+
19+ test("recognises Cursor", () => {
20+ expect(detectAgent("Fix bug\n\nCo-Authored-By: Cursor <[email protected]>")).toBe("cursor");
21+ });
22+
23+ test("recognises Aider", () => {
24+ expect(detectAgent("Refactor x\n\nCo-Authored-By: aider")).toBe("aider");
25+ });
26+
27+ test("returns null when no recognised footer is present (distinct from c32_real_reports which returns 'unknown')", () => {
28+ // The two real_* files made different choices here: real_reports
29+ // buckets unknown into its own slug; real_tests returns null so
30+ // the caller can filter or fall back. Document the difference.
31+ expect(detectAgent("Just a commit")).toBeNull();
32+ expect(detectAgent("")).toBeNull();
33+ });
34+
35+ test("the regex is case-insensitive on the agent token", () => {
36+ expect(detectAgent("Co-Authored-By: CLAUDE")).toBe("claude-code");
37+ expect(detectAgent("co-authored-by: aider")).toBe("aider");
38+ });
39+});
40+
41+describe("c32_real_tests — shortenTestLabel", () => {
42+ test("keeps only the basename of the file path + the test name", () => {
43+ expect(shortenTestLabel("src/foo/bar/baz.test.ts", "handles X")).toBe("baz.test.ts > handles X");
44+ });
45+
46+ test("handles a bare filename (no path) without splitting weirdly", () => {
47+ expect(shortenTestLabel("baz.test.ts", "handles X")).toBe("baz.test.ts > handles X");
48+ });
49+
50+ test("handles an empty file string (falls back to the empty basename)", () => {
51+ // .split('/').pop() on '' yields ''. Documented behaviour: the
52+ // helper never throws; the caller decides whether to filter empties.
53+ expect(shortenTestLabel("", "name")).toBe(" > name");
54+ });
55+
56+ test("preserves spaces and special chars in the test name", () => {
57+ expect(shortenTestLabel("a.ts", "rejects `bad input`")).toBe("a.ts > rejects `bad input`");
58+ });
59+});
60+
61+describe("c32_real_tests — orchestrator entry point", () => {
62+ test("buildLiveTestData is exported as an async function", () => {
63+ expect(typeof buildLiveTestData).toBe("function");
64+ expect(buildLiveTestData.length).toBe(2);
65+ });
66+});
modified src/c32_real_tests.ts +2 −2
@@ -13,14 +13,14 @@ import type {
1313 TestStability,
1414 } from "./c31_reports_demo.ts";
1515
16-const detectAgent = (msg: string): AgentReport["slug"] | null => {
16+export const detectAgent = (msg: string): AgentReport["slug"] | null => {
1717 if (/Co-Authored-By:.*Claude/i.test(msg)) return "claude-code";
1818 if (/Co-Authored-By:.*Cursor/i.test(msg)) return "cursor";
1919 if (/Co-Authored-By:.*Aider/i.test(msg)) return "aider";
2020 return null;
2121 };
2222
23-const shortenTestLabel = (file: string, name: string): string => {
23+export const shortenTestLabel = (file: string, name: string): string => {
2424 const base = file.split("/").pop() ?? file;
2525 return `${base} > ${name}`;
2626 };
added src/c32_session.test.ts +173 −0
@@ -0,0 +1,173 @@
1+import { describe, test, expect, beforeAll, afterAll } from "bun:test";
2+import {
3+ parseCookies,
4+ timingSafeEqual,
5+ hmacSha256Hex,
6+ sessionCookieHeader,
7+ randomHex,
8+ signSession,
9+ verifySession,
10+ SESSION_TTL_SEC,
11+} from "./c32_session.ts";
12+
13+describe("c32_session — parseCookies", () => {
14+ test("empty / null header returns an empty object", () => {
15+ expect(parseCookies(null)).toEqual({});
16+ expect(parseCookies("")).toEqual({});
17+ });
18+
19+ test("parses a single name=value pair", () => {
20+ expect(parseCookies("tdd_session=abc")).toEqual({ tdd_session: "abc" });
21+ });
22+
23+ test("parses multiple pairs separated by `;`", () => {
24+ const out = parseCookies("a=1; b=2; c=3");
25+ expect(out).toEqual({ a: "1", b: "2", c: "3" });
26+ });
27+
28+ test("strips surrounding whitespace from name and value", () => {
29+ expect(parseCookies(" k = v ")).toEqual({ k: "v" });
30+ });
31+
32+ test("url-decodes values", () => {
33+ expect(parseCookies("path=%2Ffoo%2Fbar")).toEqual({ path: "/foo/bar" });
34+ });
35+
36+ test("ignores entries that have no `=` separator", () => {
37+ expect(parseCookies("malformed; ok=yes")).toEqual({ ok: "yes" });
38+ });
39+});
40+
41+describe("c32_session — timingSafeEqual", () => {
42+ test("returns true for identical strings", () => {
43+ expect(timingSafeEqual("hello", "hello")).toBe(true);
44+ });
45+
46+ test("returns false for different strings of the same length", () => {
47+ expect(timingSafeEqual("hello", "world")).toBe(false);
48+ });
49+
50+ test("returns false when lengths differ — early exit", () => {
51+ expect(timingSafeEqual("a", "ab")).toBe(false);
52+ });
53+
54+ test("returns true for two empty strings", () => {
55+ expect(timingSafeEqual("", "")).toBe(true);
56+ });
57+});
58+
59+describe("c32_session — hmacSha256Hex", () => {
60+ test("is deterministic for a fixed (secret, body) pair", async () => {
61+ const a = await hmacSha256Hex("s3cret", "payload");
62+ const b = await hmacSha256Hex("s3cret", "payload");
63+ expect(a).toBe(b);
64+ });
65+
66+ test("returns a 64-char lowercase hex string (SHA-256 hex length)", async () => {
67+ const sig = await hmacSha256Hex("k", "v");
68+ expect(sig).toMatch(/^[0-9a-f]{64}$/);
69+ });
70+
71+ test("a different secret produces a different signature for the same body", async () => {
72+ const a = await hmacSha256Hex("secret-a", "payload");
73+ const b = await hmacSha256Hex("secret-b", "payload");
74+ expect(a).not.toBe(b);
75+ });
76+
77+ test("a different body produces a different signature for the same secret", async () => {
78+ const a = await hmacSha256Hex("k", "body-a");
79+ const b = await hmacSha256Hex("k", "body-b");
80+ expect(a).not.toBe(b);
81+ });
82+});
83+
84+describe("c32_session — sessionCookieHeader", () => {
85+ test("formats the canonical attributes", () => {
86+ const h = sessionCookieHeader("token-x", 3600);
87+ expect(h).toContain("tdd_session=token-x");
88+ expect(h).toContain("Path=/");
89+ expect(h).toContain("HttpOnly");
90+ expect(h).toContain("Secure");
91+ expect(h).toContain("SameSite=Lax");
92+ expect(h).toContain("Max-Age=3600");
93+ });
94+
95+ test("zero max-age (logout) still emits Max-Age=0", () => {
96+ expect(sessionCookieHeader("", 0)).toContain("Max-Age=0");
97+ });
98+});
99+
100+describe("c32_session — randomHex", () => {
101+ test("returns a hex string of 2 × bytes characters", () => {
102+ expect(randomHex(8)).toMatch(/^[0-9a-f]{16}$/);
103+ expect(randomHex(16)).toMatch(/^[0-9a-f]{32}$/);
104+ });
105+
106+ test("successive calls produce distinct values", () => {
107+ expect(randomHex(16)).not.toBe(randomHex(16));
108+ });
109+});
110+
111+describe("c32_session — signSession / verifySession round-trip", () => {
112+ // The signer reads SESSION_SECRET (or WEBHOOK_SECRET) from the env.
113+ // Set a fixed value before the tests run so both sides hash with the
114+ // same key. beforeAll/afterAll, not bare describe-body, because the
115+ // body runs at registration time while tests run async — restoration
116+ // there would happen *before* any test executes.
117+ let original: string | undefined;
118+ beforeAll(() => {
119+ original = process.env.SESSION_SECRET;
120+ process.env.SESSION_SECRET = "test-secret-do-not-use-in-prod";
121+ });
122+ afterAll(() => {
123+ if (original === undefined) {
124+ delete process.env.SESSION_SECRET;
125+ } else {
126+ process.env.SESSION_SECRET = original;
127+ }
128+ });
129+
130+ test("signSession produces a 3-part cookie of `name.exp.sig`", async () => {
131+ const cookie = await signSession("alice");
132+ const parts = cookie.split(".");
133+ expect(parts.length).toBe(3);
134+ expect(parts[0]).toBe("alice");
135+ expect(Number(parts[1])).toBeGreaterThan(Math.floor(Date.now() / 1000));
136+ });
137+
138+ test("verifySession round-trips a freshly signed cookie back to the username", async () => {
139+ const cookie = await signSession("bob");
140+ const username = await verifySession(cookie);
141+ expect(username).toBe("bob");
142+ });
143+
144+ test("verifySession rejects a cookie with a forged signature", async () => {
145+ const cookie = await signSession("eve");
146+ const tampered = cookie.replace(/.$/, "0");
147+ const result = await verifySession(tampered);
148+ expect(result).toBeNull();
149+ });
150+
151+ test("verifySession rejects a cookie that's not three parts", async () => {
152+ expect(await verifySession("just-one-part")).toBeNull();
153+ expect(await verifySession("two.parts")).toBeNull();
154+ });
155+
156+ test("verifySession rejects a cookie whose expiry is in the past", async () => {
157+ // Hand-roll a cookie with an `exp` that's already passed; sign with
158+ // the same secret so the HMAC matches but the time-window check
159+ // fails.
160+ const username = "carol";
161+ const exp = Math.floor(Date.now() / 1000) - 60;
162+ const sig = await hmacSha256Hex(process.env.SESSION_SECRET!, `${username}.${exp}`);
163+ const cookie = `${username}.${exp}.${sig}`;
164+ expect(await verifySession(cookie)).toBeNull();
165+ });
166+
167+});
168+
169+describe("c32_session — exports", () => {
170+ test("SESSION_TTL_SEC is a positive integer (30 days)", () => {
171+ expect(SESSION_TTL_SEC).toBe(30 * 24 * 60 * 60);
172+ });
173+});