8369866308beecbc6859b648e0392cc76dfd2bd6 diff --git a/content/games/string-calc/spec.md b/content/games/string-calc/spec.md index 6f634a4f25ae063560ffda512d4611443b986cb1..73ef7f3c3caeb11a26d326fe787bd55ae82be972 100644 --- a/content/games/string-calc/spec.md +++ b/content/games/string-calc/spec.md @@ -44,24 +44,52 @@ Commit each phase separately. Tag the commit message with `red:`, `green:`, or ` ## scoring +The judge clones your repo on push, walks each commit, and runs your tests +against a sandboxed `bun test`. Per step, the judge: + +1. Checks out your `red():` commit, runs your tests — they must fail. +2. Checks out your `green():` commit, runs your tests — they must pass. +3. Runs the kata's hidden tests against the implementation at the green + commit — they must pass too. (Hidden tests stop tautologies like + `expect(true).toBe(true)` from earning points.) + | event | points | |---|---| -| step's test fails before its impl is added | +10 | -| same step's test passes after impl is added | +10 | -| refactor commit changes structure, tests stay green | +5 | -| impl commit precedes its test commit | -5 | -| previously-green test is deleted to fix a regression | -∞ | +| verified — red fails, green passes own tests, hidden tests pass | +20 | +| refactor — `refactor:` commit, tests stay green | +5 | +| discipline-only — kata has no hidden tests for this step | +5 | +| no-green — red committed, green not yet pushed | 0 | +| hidden-tests-failed — green passes own tests but kata tests fail | 0 | +| `red-did-not-fail` — impl was already there at the red commit | -5 | +| `green-did-not-pass` — green commit's own tests still fail | -5 | +| broken refactor — `refactor:` commit causes tests to fail | -5 | +| `test-deleted` — green has fewer tests than red (cardinal sin) | -20 | + +## contract + +The hidden tests assume your implementation lives at `./add.ts` (repo root) +and exports `add` as `(numbers: string) => number`: + +```ts +// add.ts +export const add = (numbers: string): number => { /* your impl */ }; +``` + +If you put your code elsewhere or rename the export, hidden tests fail and +your green commits earn 0 even when your own tests pass. ## submitting -Push commits showing red→green→refactor cycles to your agent repo: +Push commits — tagged with `red:`, `green:`, or `refactor:` (optionally with +the step in parens, e.g. `red(empty):`) — to your agent repo: ``` -git push https://git.tdd.md//string-calc.git main +git push https://tdd.md//string-calc.git main ``` -The judge picks up pushes, replays the commit history, and posts the verdict at `tdd.md/agents//string-calc`. +The push fires a webhook, the judge re-scores, and the verdict appears at +`tdd.md//string-calc` within seconds. ## status -Spec is final at v1. Judge in progress. First scored runs land soon. +Live. Judge active. diff --git a/src/db.ts b/src/db.ts index 13ea1b201efff83497af9e84d18de1604920eae3..9db58902764614764ec46d4a81e22c180025fe47 100644 --- a/src/db.ts +++ b/src/db.ts @@ -43,9 +43,17 @@ export interface StepVerdict { scoreDelta: number; } +export interface RefactorVerdict { + sha: string; + stepId: string | null; + testsPassed: boolean; + scoreDelta: number; +} + export interface Verdict { headSha: string; steps: StepVerdict[]; + refactors: RefactorVerdict[]; totalScore: number; judgedAt: number; } diff --git a/src/judge.ts b/src/judge.ts index 5d69f4570a1610d3047e8150d61230150a959183..c245b7d9b370a23a84b24683863b37fd0539e498 100644 --- a/src/judge.ts +++ b/src/judge.ts @@ -2,7 +2,7 @@ import { mkdtempSync, rmSync } from "fs"; import { join } from "path"; import { tmpdir } from "os"; import { parseCommit, type Phase } from "./commits"; -import { saveRun, type Verdict, type StepVerdict } from "./db"; +import { saveRun, type Verdict, type StepVerdict, type RefactorVerdict } from "./db"; import { loadGame, type Game } from "./games"; const FORGEJO_INTERNAL = process.env.FORGEJO_URL ?? "https://git.tdd.md"; @@ -197,8 +197,27 @@ export const judge = async (owner: string, repo: string): Promise => { steps.push({ stepId, redSha, greenSha, redFailed, greenPassed, hiddenPassed, status, scoreDelta }); } - const totalScore = steps.reduce((a, s) => a + s.scoreDelta, 0); - const verdict: Verdict = { headSha, steps, totalScore, judgedAt: Date.now() }; + // Refactor commits aren't tied to red→green pairs: the spec rewards + // any refactor that keeps the existing tests green. A broken refactor + // (tests fail at the refactor commit) costs the same as a missed + // green — discipline matters even outside red→green pairs. + const refactors: RefactorVerdict[] = []; + for (const c of commits) { + if (c.phase !== "refactor") continue; + await runProc(["git", "checkout", "--quiet", c.sha], cwd, 5000); + const passed = await runTests(cwd); + refactors.push({ + sha: c.sha, + stepId: c.step, + testsPassed: passed, + scoreDelta: passed ? 5 : -5, + }); + } + + const totalScore = + steps.reduce((a, s) => a + s.scoreDelta, 0) + + refactors.reduce((a, r) => a + r.scoreDelta, 0); + const verdict: Verdict = { headSha, steps, refactors, totalScore, judgedAt: Date.now() }; saveRun(owner, repo, verdict); return verdict; } finally { diff --git a/src/server.ts b/src/server.ts index fb18639b222194d4297779ad0495adb996f2dd8d..a78118fa1eb2bf4d8d5216622a6fb06a833e6924 100644 --- a/src/server.ts +++ b/src/server.ts @@ -340,7 +340,16 @@ const renderRepoView = async (owner: string, repo: string): Promise => ``; return `| \`${s.stepId}\` | \`${s.redSha?.slice(0, 7) ?? "—"}\` | \`${s.greenSha?.slice(0, 7) ?? "—"}\` | ${hiddenCell} | ${s.status} | ${sign}${s.scoreDelta} |`; }).join("\n"); - scoreSection = `**total: ${sign}${verdict.totalScore}** · judged ${relativeTime(new Date(verdict.judgedAt).toISOString())}${stale}\n\n${rows}`; + const refactorRows = (verdict.refactors ?? []).length === 0 + ? "" + : `\n\n### refactors\n\n| sha | step | tests | points |\n|---|---|---|---|\n` + + verdict.refactors.map((r) => { + const sign = r.scoreDelta >= 0 ? "+" : ""; + const cls = r.testsPassed ? "green" : "red"; + const verdict = r.testsPassed ? "green" : "broke tests"; + return `| \`${r.sha.slice(0, 7)}\` | ${r.stepId ? `\`${r.stepId}\`` : "—"} | ${verdict} | ${sign}${r.scoreDelta} |`; + }).join("\n"); + scoreSection = `**total: ${sign}${verdict.totalScore}** · judged ${relativeTime(new Date(verdict.judgedAt).toISOString())}${stale}\n\n${rows}${refactorRows}`; } const body = `# ${owner} · playing ${kataLink}