SAMA refactor: cXX_*.ts file-prefix convention across src/
Adopts the Spalder Application Module Architecture (matching the Rust monorepo + ~/Documents/snowplaza-info) so files sort by layer with alphabetical = dependency-direction: c11_server.ts entry: env + Bun.serve(createApp()) c13_database.ts SQLite (runs + projects) c14_forgejo.ts Forgejo HTTP + proxy c14_github.ts GitHub OAuth + raw .tdd-md.json fetch c21_app.ts routes + appFetch fallback + appError c31_blog.ts ALL_POSTS registry c31_commits.ts parseCommit + computeProgress c31_games.ts kata loader c31_guides.ts ALL_GUIDES registry c31_project_config.ts .tdd-md.json schema + parser c31_reports_demo.ts synthetic /reports/demo dataset c32_judge.ts kata-judging logic c32_session.ts HMAC session + cookie helpers c51_render.ts page chrome + every body builder Lower layers never import from higher (verifiable with `grep -rE 'from "\./c[5-9]' src/c1*.ts src/c2*.ts src/c3*.ts` → empty). Imports now carry the explicit `.ts` extension. The Bun.serve route literal stays inline inside createApp(port) so path-parameter inference flows through to handler types. No behavioural change — every public route still returns its previous status code (verified: 26 routes 200/302/404 as expected). Co-Authored-By: Claude Opus 4.7 (1M context) <[email protected]>
32 files changed · +3418 −3330
Containerfile
+1
−1
| @@ -25,4 +25,4 @@ EXPOSE 3000 | ||
| 25 | 25 | # external probes (cloudflared upstream + the deploy script's /healthz |
| 26 | 26 | # poll) already cover liveness. |
| 27 | 27 | |
| 28 | -CMD ["bun", "src/server.ts"] | |
| 28 | +CMD ["bun", "src/c11_server.ts"] | |
content/games/fizzbuzz/spec.ts
+1
−1
| @@ -1,4 +1,4 @@ | ||
| 1 | -import type { Game } from "../../../src/games"; | |
| 1 | +import type { Game } from "../../../src/c31_games.ts"; | |
| 2 | 2 | |
| 3 | 3 | export const spec: Game = { |
| 4 | 4 | id: "fizzbuzz", |
content/games/string-calc/spec.ts
+1
−1
| @@ -1,4 +1,4 @@ | ||
| 1 | -import type { Game } from "../../../src/games"; | |
| 1 | +import type { Game } from "../../../src/c31_games.ts"; | |
| 2 | 2 | |
| 3 | 3 | export const spec: Game = { |
| 4 | 4 | id: "string-calc", |
package.json
+3
−3
| @@ -2,10 +2,10 @@ | ||
| 2 | 2 | "name": "tdd.md", |
| 3 | 3 | "private": true, |
| 4 | 4 | "type": "module", |
| 5 | - "module": "src/server.ts", | |
| 5 | + "module": "src/c11_server.ts", | |
| 6 | 6 | "scripts": { |
| 7 | - "dev": "bun --hot src/server.ts", | |
| 8 | - "start": "bun src/server.ts" | |
| 7 | + "dev": "bun --hot src/c11_server.ts", | |
| 8 | + "start": "bun src/c11_server.ts" | |
| 9 | 9 | }, |
| 10 | 10 | "dependencies": { |
| 11 | 11 | "marked": "^14.1.4" |
src/c11_server.ts
+10
−0
| @@ -0,0 +1,10 @@ | ||
| 1 | +// c11 — server entry: env + Bun.serve startup. No route logic, no SQL, | |
| 2 | +// no HTML. The route table, fallback fetch, and error handler live in | |
| 3 | +// c21_app.ts; this file just reads PORT and asks createApp() to bind. | |
| 4 | + | |
| 5 | +import { createApp } from "./c21_app.ts"; | |
| 6 | + | |
| 7 | +const port = Number(process.env.PORT ?? 3000); | |
| 8 | +const server = createApp(port); | |
| 9 | + | |
| 10 | +console.log(`tdd.md → ${server.url}`); | |
src/c13_database.ts
+214
−0
| @@ -0,0 +1,214 @@ | ||
| 1 | +import { Database } from "bun:sqlite"; | |
| 2 | +import type { ProjectConfig, TestRunner } from "./c31_project_config.ts"; | |
| 3 | + | |
| 4 | +const DB_PATH = process.env.TDD_DB_PATH ?? ":memory:"; | |
| 5 | + | |
| 6 | +let db: Database | null = null; | |
| 7 | + | |
| 8 | +const getDb = (): Database => { | |
| 9 | + if (db) return db; | |
| 10 | + db = new Database(DB_PATH, { create: true }); | |
| 11 | + db.exec(` | |
| 12 | + CREATE TABLE IF NOT EXISTS runs ( | |
| 13 | + id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| 14 | + owner TEXT NOT NULL, | |
| 15 | + repo TEXT NOT NULL, | |
| 16 | + head_sha TEXT NOT NULL, | |
| 17 | + judged_at INTEGER NOT NULL, | |
| 18 | + verdict_json TEXT NOT NULL | |
| 19 | + ); | |
| 20 | + CREATE INDEX IF NOT EXISTS idx_runs_owner_repo | |
| 21 | + ON runs(owner, repo, judged_at DESC); | |
| 22 | + | |
| 23 | + CREATE TABLE IF NOT EXISTS projects ( | |
| 24 | + id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| 25 | + registered_by TEXT NOT NULL, | |
| 26 | + repo_owner TEXT NOT NULL, | |
| 27 | + repo_name TEXT NOT NULL, | |
| 28 | + test_runner TEXT NOT NULL DEFAULT 'none', | |
| 29 | + tracked_branches TEXT NOT NULL, | |
| 30 | + display_name TEXT, | |
| 31 | + team TEXT, | |
| 32 | + registered_at INTEGER NOT NULL, | |
| 33 | + status TEXT NOT NULL DEFAULT 'active', | |
| 34 | + UNIQUE(repo_owner, repo_name) | |
| 35 | + ); | |
| 36 | + CREATE INDEX IF NOT EXISTS idx_projects_registered_by | |
| 37 | + ON projects(registered_by); | |
| 38 | + `); | |
| 39 | + return db; | |
| 40 | +}; | |
| 41 | + | |
| 42 | +export type Mode = "strict" | "pragmatic" | "learning"; | |
| 43 | + | |
| 44 | +export interface StepVerdict { | |
| 45 | + stepId: string; | |
| 46 | + redSha: string | null; | |
| 47 | + greenSha: string | null; | |
| 48 | + redFailed: boolean | null; | |
| 49 | + greenPassed: boolean | null; | |
| 50 | + // Whether the kata's authoritative hidden tests pass against the agent's | |
| 51 | + // implementation at the green commit. null when no hidden tests exist | |
| 52 | + // for the step (unknown kata, or step not registered with the spec). | |
| 53 | + hiddenPassed: boolean | null; | |
| 54 | + status: | |
| 55 | + | "verified" | |
| 56 | + | "discipline-only" | |
| 57 | + | "no-green" | |
| 58 | + | "red-did-not-fail" | |
| 59 | + | "green-did-not-pass" | |
| 60 | + | "hidden-tests-failed" | |
| 61 | + | "test-deleted" | |
| 62 | + // Trace-only mode: tests not executed, only commit discipline checked. | |
| 63 | + // Used when test_runner: "none" — language-agnostic, useful as a | |
| 64 | + // CI gate on real projects where Bun can't run the test suite. | |
| 65 | + | "trace-verified" | |
| 66 | + | "trace-tests-shrunk"; | |
| 67 | + scoreDelta: number; | |
| 68 | + // Coach-style explanation of the verdict — what happened, why the score | |
| 69 | + // is what it is, and (when relevant) how to improve next time. | |
| 70 | + explanation: string; | |
| 71 | +} | |
| 72 | + | |
| 73 | +export interface RefactorVerdict { | |
| 74 | + sha: string; | |
| 75 | + stepId: string | null; | |
| 76 | + testsPassed: boolean; | |
| 77 | + scoreDelta: number; | |
| 78 | + explanation: string; | |
| 79 | +} | |
| 80 | + | |
| 81 | +export interface Verdict { | |
| 82 | + headSha: string; | |
| 83 | + mode: Mode; | |
| 84 | + steps: StepVerdict[]; | |
| 85 | + refactors: RefactorVerdict[]; | |
| 86 | + totalScore: number; | |
| 87 | + judgedAt: number; | |
| 88 | +} | |
| 89 | + | |
| 90 | +export const saveRun = (owner: string, repo: string, verdict: Verdict): void => { | |
| 91 | + getDb().run( | |
| 92 | + `INSERT INTO runs (owner, repo, head_sha, judged_at, verdict_json) VALUES (?, ?, ?, ?, ?)`, | |
| 93 | + [owner, repo, verdict.headSha, verdict.judgedAt, JSON.stringify(verdict)], | |
| 94 | + ); | |
| 95 | +}; | |
| 96 | + | |
| 97 | +export const latestRun = (owner: string, repo: string): Verdict | null => { | |
| 98 | + const row = getDb() | |
| 99 | + .query<{ verdict_json: string }, [string, string]>( | |
| 100 | + `SELECT verdict_json FROM runs WHERE owner = ? AND repo = ? ORDER BY judged_at DESC LIMIT 1`, | |
| 101 | + ) | |
| 102 | + .get(owner, repo); | |
| 103 | + if (!row) return null; | |
| 104 | + return JSON.parse(row.verdict_json) as Verdict; | |
| 105 | +}; | |
| 106 | + | |
| 107 | +export interface ProjectRow { | |
| 108 | + id: number; | |
| 109 | + registeredBy: string; | |
| 110 | + repoOwner: string; | |
| 111 | + repoName: string; | |
| 112 | + testRunner: TestRunner; | |
| 113 | + trackedBranches: string[]; | |
| 114 | + displayName: string | null; | |
| 115 | + team: string | null; | |
| 116 | + registeredAt: number; | |
| 117 | + status: "active" | "paused"; | |
| 118 | +} | |
| 119 | + | |
| 120 | +interface ProjectDbRow { | |
| 121 | + id: number; | |
| 122 | + registered_by: string; | |
| 123 | + repo_owner: string; | |
| 124 | + repo_name: string; | |
| 125 | + test_runner: string; | |
| 126 | + tracked_branches: string; | |
| 127 | + display_name: string | null; | |
| 128 | + team: string | null; | |
| 129 | + registered_at: number; | |
| 130 | + status: string; | |
| 131 | +} | |
| 132 | + | |
| 133 | +const rowToProject = (r: ProjectDbRow): ProjectRow => ({ | |
| 134 | + id: r.id, | |
| 135 | + registeredBy: r.registered_by, | |
| 136 | + repoOwner: r.repo_owner, | |
| 137 | + repoName: r.repo_name, | |
| 138 | + testRunner: (r.test_runner === "bun" ? "bun" : "none") as TestRunner, | |
| 139 | + trackedBranches: JSON.parse(r.tracked_branches) as string[], | |
| 140 | + displayName: r.display_name, | |
| 141 | + team: r.team, | |
| 142 | + registeredAt: r.registered_at, | |
| 143 | + status: r.status === "paused" ? "paused" : "active", | |
| 144 | +}); | |
| 145 | + | |
| 146 | +// Inserts or updates a project. Re-registering the same repo refreshes | |
| 147 | +// its config (test_runner, tracked_branches, display_name, team) without | |
| 148 | +// duplicating the row. Returns the stored project. | |
| 149 | +export const upsertProject = ( | |
| 150 | + registeredBy: string, | |
| 151 | + repoOwner: string, | |
| 152 | + repoName: string, | |
| 153 | + config: ProjectConfig, | |
| 154 | +): ProjectRow => { | |
| 155 | + const now = Date.now(); | |
| 156 | + const branches = JSON.stringify(config.tracked_branches); | |
| 157 | + const display = config.display_name ?? null; | |
| 158 | + const team = config.team ?? null; | |
| 159 | + getDb().run( | |
| 160 | + `INSERT INTO projects (registered_by, repo_owner, repo_name, test_runner, tracked_branches, display_name, team, registered_at, status) | |
| 161 | + VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'active') | |
| 162 | + ON CONFLICT(repo_owner, repo_name) DO UPDATE SET | |
| 163 | + test_runner = excluded.test_runner, | |
| 164 | + tracked_branches = excluded.tracked_branches, | |
| 165 | + display_name = excluded.display_name, | |
| 166 | + team = excluded.team, | |
| 167 | + status = 'active'`, | |
| 168 | + [registeredBy, repoOwner, repoName, config.test_runner, branches, display, team, now], | |
| 169 | + ); | |
| 170 | + const row = getDb() | |
| 171 | + .query<ProjectDbRow, [string, string]>( | |
| 172 | + `SELECT * FROM projects WHERE repo_owner = ? AND repo_name = ?`, | |
| 173 | + ) | |
| 174 | + .get(repoOwner, repoName); | |
| 175 | + if (!row) throw new Error("project upsert returned no row"); | |
| 176 | + return rowToProject(row); | |
| 177 | +}; | |
| 178 | + | |
| 179 | +export const getProject = (repoOwner: string, repoName: string): ProjectRow | null => { | |
| 180 | + const row = getDb() | |
| 181 | + .query<ProjectDbRow, [string, string]>( | |
| 182 | + `SELECT * FROM projects WHERE repo_owner = ? AND repo_name = ?`, | |
| 183 | + ) | |
| 184 | + .get(repoOwner, repoName); | |
| 185 | + return row ? rowToProject(row) : null; | |
| 186 | +}; | |
| 187 | + | |
| 188 | +export const listActiveProjects = (): ProjectRow[] => { | |
| 189 | + const rows = getDb() | |
| 190 | + .query<ProjectDbRow, []>( | |
| 191 | + `SELECT * FROM projects WHERE status = 'active' ORDER BY registered_at DESC`, | |
| 192 | + ) | |
| 193 | + .all(); | |
| 194 | + return rows.map(rowToProject); | |
| 195 | +}; | |
| 196 | + | |
| 197 | +// Latest verdict per (owner, repo) across all agents — drives the | |
| 198 | +// leaderboard and the /agents index. | |
| 199 | +export const allLatestRuns = (): { owner: string; repo: string; verdict: Verdict }[] => { | |
| 200 | + const rows = getDb() | |
| 201 | + .query<{ owner: string; repo: string; verdict_json: string }, []>( | |
| 202 | + `SELECT owner, repo, verdict_json FROM runs r1 | |
| 203 | + WHERE judged_at = ( | |
| 204 | + SELECT MAX(judged_at) FROM runs r2 | |
| 205 | + WHERE r2.owner = r1.owner AND r2.repo = r1.repo | |
| 206 | + )`, | |
| 207 | + ) | |
| 208 | + .all(); | |
| 209 | + return rows.map((r) => ({ | |
| 210 | + owner: r.owner, | |
| 211 | + repo: r.repo, | |
| 212 | + verdict: JSON.parse(r.verdict_json) as Verdict, | |
| 213 | + })); | |
| 214 | +}; | |
src/c14_forgejo.ts
+345
−0
| @@ -0,0 +1,345 @@ | ||
| 1 | +// c14 — secondary I/O: HTTP client to the local Forgejo instance. Owns | |
| 2 | +// every URL reachable at git.tdd.md (admin API, user repos, raw git | |
| 3 | +// protocol, webhook setup) plus the proxy that forwards git-protocol | |
| 4 | +// requests through tdd.md to keep the public hostname uniform. | |
| 5 | + | |
| 6 | +// Internal URL — Bun container talks to Forgejo via host.containers.internal | |
| 7 | +// (rootless podman's standard hostname for the host network). Falls back to | |
| 8 | +// the public URL for local dev. | |
| 9 | +export const FORGEJO_URL = process.env.FORGEJO_URL ?? "https://git.tdd.md"; | |
| 10 | +const ADMIN_TOKEN = process.env.FORGEJO_ADMIN_TOKEN ?? ""; | |
| 11 | + | |
| 12 | +const adminAuth = (): HeadersInit => ({ | |
| 13 | + Authorization: `token ${ADMIN_TOKEN}`, | |
| 14 | +}); | |
| 15 | + | |
| 16 | +const userAuth = (username: string, password: string): HeadersInit => ({ | |
| 17 | + Authorization: `Basic ${btoa(`${username}:${password}`)}`, | |
| 18 | +}); | |
| 19 | + | |
| 20 | +export const isConfigured = (): boolean => ADMIN_TOKEN !== ""; | |
| 21 | + | |
| 22 | +export const userExists = async (username: string): Promise<boolean> => { | |
| 23 | + const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}`, { | |
| 24 | + headers: adminAuth(), | |
| 25 | + }); | |
| 26 | + return res.status === 200; | |
| 27 | +}; | |
| 28 | + | |
| 29 | +export const createUser = async (params: { | |
| 30 | + username: string; | |
| 31 | + email: string; | |
| 32 | + password: string; | |
| 33 | + fullName?: string; | |
| 34 | +}): Promise<void> => { | |
| 35 | + const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users`, { | |
| 36 | + method: "POST", | |
| 37 | + headers: { ...adminAuth(), "Content-Type": "application/json" }, | |
| 38 | + body: JSON.stringify({ | |
| 39 | + username: params.username, | |
| 40 | + email: params.email, | |
| 41 | + password: params.password, | |
| 42 | + full_name: params.fullName ?? params.username, | |
| 43 | + must_change_password: false, | |
| 44 | + send_notify: false, | |
| 45 | + }), | |
| 46 | + }); | |
| 47 | + if (!res.ok) { | |
| 48 | + const text = await res.text(); | |
| 49 | + throw new Error(`forgejo createUser ${res.status}: ${text}`); | |
| 50 | + } | |
| 51 | +}; | |
| 52 | + | |
| 53 | +export const setUserPassword = async (username: string, password: string): Promise<void> => { | |
| 54 | + const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users/${encodeURIComponent(username)}`, { | |
| 55 | + method: "PATCH", | |
| 56 | + headers: { ...adminAuth(), "Content-Type": "application/json" }, | |
| 57 | + body: JSON.stringify({ | |
| 58 | + password, | |
| 59 | + must_change_password: false, | |
| 60 | + source_id: 0, | |
| 61 | + login_name: username, | |
| 62 | + }), | |
| 63 | + }); | |
| 64 | + if (!res.ok) { | |
| 65 | + const text = await res.text(); | |
| 66 | + throw new Error(`forgejo setUserPassword ${res.status}: ${text}`); | |
| 67 | + } | |
| 68 | +}; | |
| 69 | + | |
| 70 | +export const repoExists = async (owner: string, repo: string): Promise<boolean> => { | |
| 71 | + const res = await fetch(`${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}`, { | |
| 72 | + headers: adminAuth(), | |
| 73 | + }); | |
| 74 | + return res.status === 200; | |
| 75 | +}; | |
| 76 | + | |
| 77 | +// Creates a per-repo webhook that fires on push events. The webhook | |
| 78 | +// posts to /api/forgejo/webhook on tdd.md, signed with WEBHOOK_SECRET so | |
| 79 | +// our endpoint can verify it. Idempotent — checks for an existing hook | |
| 80 | +// with the same URL before creating. | |
| 81 | +export const ensureRepoWebhook = async (params: { | |
| 82 | + owner: string; | |
| 83 | + repo: string; | |
| 84 | + webhookUrl: string; | |
| 85 | + secret: string; | |
| 86 | +}): Promise<void> => { | |
| 87 | + const base = `${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(params.owner)}/${encodeURIComponent(params.repo)}/hooks`; | |
| 88 | + const listRes = await fetch(base, { headers: adminAuth() }); | |
| 89 | + if (listRes.ok) { | |
| 90 | + const hooks = (await listRes.json()) as { id: number; config: { url?: string } }[]; | |
| 91 | + const exists = hooks.some((h) => h.config?.url === params.webhookUrl); | |
| 92 | + if (exists) return; | |
| 93 | + } | |
| 94 | + const res = await fetch(base, { | |
| 95 | + method: "POST", | |
| 96 | + headers: { ...adminAuth(), "Content-Type": "application/json" }, | |
| 97 | + body: JSON.stringify({ | |
| 98 | + type: "forgejo", | |
| 99 | + active: true, | |
| 100 | + events: ["push"], | |
| 101 | + config: { | |
| 102 | + url: params.webhookUrl, | |
| 103 | + content_type: "json", | |
| 104 | + secret: params.secret, | |
| 105 | + }, | |
| 106 | + }), | |
| 107 | + }); | |
| 108 | + if (!res.ok) { | |
| 109 | + const text = await res.text(); | |
| 110 | + throw new Error(`forgejo ensureRepoWebhook ${res.status}: ${text}`); | |
| 111 | + } | |
| 112 | +}; | |
| 113 | + | |
| 114 | +export const createRepoForUser = async (params: { | |
| 115 | + username: string; | |
| 116 | + name: string; | |
| 117 | + description?: string; | |
| 118 | +}): Promise<void> => { | |
| 119 | + const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users/${encodeURIComponent(params.username)}/repos`, { | |
| 120 | + method: "POST", | |
| 121 | + headers: { ...adminAuth(), "Content-Type": "application/json" }, | |
| 122 | + body: JSON.stringify({ | |
| 123 | + name: params.name, | |
| 124 | + description: params.description ?? "", | |
| 125 | + // Private by default — the source is the agent's, not ours to | |
| 126 | + // publish. Verdicts still render on tdd.md via admin-mediated | |
| 127 | + // API calls; clones require the agent's push token. | |
| 128 | + private: true, | |
| 129 | + // No auto_init: the agent's first push becomes the genuine initial | |
| 130 | + // commit. An admin-authored "Initial commit" would muddle the phase | |
| 131 | + // log and break attribution on the agent's repo page. | |
| 132 | + auto_init: false, | |
| 133 | + default_branch: "main", | |
| 134 | + }), | |
| 135 | + }); | |
| 136 | + if (!res.ok) { | |
| 137 | + const text = await res.text(); | |
| 138 | + throw new Error(`forgejo createRepo ${res.status}: ${text}`); | |
| 139 | + } | |
| 140 | +}; | |
| 141 | + | |
| 142 | +interface TokenInfo { | |
| 143 | + id: number; | |
| 144 | + name: string; | |
| 145 | +} | |
| 146 | + | |
| 147 | +const listTokens = async (username: string, password: string): Promise<TokenInfo[]> => { | |
| 148 | + const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}/tokens`, { | |
| 149 | + headers: userAuth(username, password), | |
| 150 | + }); | |
| 151 | + if (!res.ok) return []; | |
| 152 | + return (await res.json()) as TokenInfo[]; | |
| 153 | +}; | |
| 154 | + | |
| 155 | +const deleteToken = async (username: string, password: string, tokenId: number): Promise<void> => { | |
| 156 | + await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}/tokens/${tokenId}`, { | |
| 157 | + method: "DELETE", | |
| 158 | + headers: userAuth(username, password), | |
| 159 | + }); | |
| 160 | +}; | |
| 161 | + | |
| 162 | +export const createPushToken = async (params: { | |
| 163 | + username: string; | |
| 164 | + password: string; | |
| 165 | + name: string; | |
| 166 | +}): Promise<string> => { | |
| 167 | + // Revoke any existing tokens with the same name so re-registration always | |
| 168 | + // returns a fresh one and the previous one is invalidated. | |
| 169 | + const existing = await listTokens(params.username, params.password); | |
| 170 | + for (const t of existing) { | |
| 171 | + if (t.name === params.name) { | |
| 172 | + await deleteToken(params.username, params.password, t.id); | |
| 173 | + } | |
| 174 | + } | |
| 175 | + | |
| 176 | + const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(params.username)}/tokens`, { | |
| 177 | + method: "POST", | |
| 178 | + headers: { ...userAuth(params.username, params.password), "Content-Type": "application/json" }, | |
| 179 | + body: JSON.stringify({ | |
| 180 | + name: params.name, | |
| 181 | + // write:repository for the push; read:user so the agent can | |
| 182 | + // verify their own identity against tdd.md's self-service | |
| 183 | + // endpoints (e.g. POST /api/agents/:name/visibility). | |
| 184 | + scopes: ["write:repository", "read:user"], | |
| 185 | + }), | |
| 186 | + }); | |
| 187 | + if (!res.ok) { | |
| 188 | + const text = await res.text(); | |
| 189 | + throw new Error(`forgejo createPushToken ${res.status}: ${text}`); | |
| 190 | + } | |
| 191 | + const data = (await res.json()) as { sha1: string }; | |
| 192 | + return data.sha1; | |
| 193 | +}; | |
| 194 | + | |
| 195 | +const randomPassword = (): string => | |
| 196 | + Array.from(crypto.getRandomValues(new Uint8Array(32))) | |
| 197 | + .map((b) => b.toString(16).padStart(2, "0")) | |
| 198 | + .join(""); | |
| 199 | + | |
| 200 | +export interface AgentRegistration { | |
| 201 | + username: string; | |
| 202 | + pushToken: string; | |
| 203 | + repoCloneUrl: string; | |
| 204 | + isNew: boolean; | |
| 205 | +} | |
| 206 | + | |
| 207 | +// Idempotent: if the user exists, reset their password and rotate the push | |
| 208 | +// token. Always also ensures the kata repo exists. | |
| 209 | +export const registerAgent = async (params: { | |
| 210 | + username: string; | |
| 211 | + email: string; | |
| 212 | + fullName?: string; | |
| 213 | + kata?: string; | |
| 214 | +}): Promise<AgentRegistration> => { | |
| 215 | + const password = randomPassword(); | |
| 216 | + const isNew = !(await userExists(params.username)); | |
| 217 | + | |
| 218 | + if (isNew) { | |
| 219 | + await createUser({ | |
| 220 | + username: params.username, | |
| 221 | + email: params.email, | |
| 222 | + password, | |
| 223 | + fullName: params.fullName, | |
| 224 | + }); | |
| 225 | + } else { | |
| 226 | + await setUserPassword(params.username, password); | |
| 227 | + } | |
| 228 | + | |
| 229 | + const pushToken = await createPushToken({ | |
| 230 | + username: params.username, | |
| 231 | + password, | |
| 232 | + name: "tdd-md-push", | |
| 233 | + }); | |
| 234 | + | |
| 235 | + const kata = params.kata ?? "string-calc"; | |
| 236 | + if (!(await repoExists(params.username, kata))) { | |
| 237 | + await createRepoForUser({ | |
| 238 | + username: params.username, | |
| 239 | + name: kata, | |
| 240 | + description: `${params.username}'s submission for the ${kata} kata`, | |
| 241 | + }); | |
| 242 | + } | |
| 243 | + | |
| 244 | + const baseUrl = process.env.BASE_URL ?? "https://tdd.md"; | |
| 245 | + const webhookSecret = process.env.WEBHOOK_SECRET; | |
| 246 | + if (webhookSecret) { | |
| 247 | + try { | |
| 248 | + await ensureRepoWebhook({ | |
| 249 | + owner: params.username, | |
| 250 | + repo: kata, | |
| 251 | + webhookUrl: `${baseUrl}/api/forgejo/webhook`, | |
| 252 | + secret: webhookSecret, | |
| 253 | + }); | |
| 254 | + } catch (err) { | |
| 255 | + // Webhook is convenience; registration must still succeed without it. | |
| 256 | + console.error(`webhook setup failed for ${params.username}/${kata}:`, err); | |
| 257 | + } | |
| 258 | + } | |
| 259 | + | |
| 260 | + return { | |
| 261 | + username: params.username, | |
| 262 | + pushToken, | |
| 263 | + repoCloneUrl: `${baseUrl}/${params.username}/${kata}.git`, | |
| 264 | + isNew, | |
| 265 | + }; | |
| 266 | +}; | |
| 267 | + | |
| 268 | +// --------------------------------------------------------------------- | |
| 269 | +// Read-side helpers used by c21 handlers + c51 rendering. | |
| 270 | +// --------------------------------------------------------------------- | |
| 271 | + | |
| 272 | +export interface ForgejoUserSummary { | |
| 273 | + id: number; | |
| 274 | + login: string; | |
| 275 | + is_admin?: boolean; | |
| 276 | + // Forgejo visibility levels: "public" | "limited" | "private". | |
| 277 | + // Anything other than "public" is hidden from anonymous tdd.md visitors. | |
| 278 | + visibility?: string; | |
| 279 | +} | |
| 280 | + | |
| 281 | +// Admin-token-authenticated headers for API calls. Agent repos are | |
| 282 | +// private by default; rendering the verdict page must still work. We | |
| 283 | +// proxy the data through the admin identity, never exposing the source | |
| 284 | +// or push protocol publicly. | |
| 285 | +export const adminApiHeaders = (): HeadersInit => { | |
| 286 | + const token = process.env.FORGEJO_ADMIN_TOKEN; | |
| 287 | + return token ? { Authorization: `token ${token}` } : {}; | |
| 288 | +}; | |
| 289 | + | |
| 290 | +// Single-user visibility lookup for /:owner/:repo and /agents/:name. | |
| 291 | +// Returns the raw Forgejo string (or null if the user doesn't exist). | |
| 292 | +export const getUserVisibility = async (name: string): Promise<string | null> => { | |
| 293 | + const r = await fetch( | |
| 294 | + `${FORGEJO_URL}/api/v1/users/${encodeURIComponent(name)}`, | |
| 295 | + { headers: adminApiHeaders() }, | |
| 296 | + ); | |
| 297 | + if (!r.ok) return null; | |
| 298 | + const u = (await r.json()) as ForgejoUserSummary; | |
| 299 | + return u.visibility ?? "public"; | |
| 300 | +}; | |
| 301 | + | |
| 302 | +const HOP_BY_HOP = [ | |
| 303 | + "host", | |
| 304 | + "connection", | |
| 305 | + "keep-alive", | |
| 306 | + "transfer-encoding", | |
| 307 | + "upgrade", | |
| 308 | + "proxy-authorization", | |
| 309 | + "proxy-connection", | |
| 310 | + "te", | |
| 311 | + "trailer", | |
| 312 | +]; | |
| 313 | + | |
| 314 | +// Forward git protocol + Forgejo API/asset requests to Forgejo via the host | |
| 315 | +// network. Lets us serve everything under tdd.md (GitHub-style) without | |
| 316 | +// exposing git.tdd.md externally. | |
| 317 | +export const proxyToForgejo = async (req: Request, pathAndQuery: string): Promise<Response> => { | |
| 318 | + const upstream = `${FORGEJO_URL}${pathAndQuery}`; | |
| 319 | + const headers = new Headers(req.headers); | |
| 320 | + for (const h of HOP_BY_HOP) headers.delete(h); | |
| 321 | + headers.set("X-Forwarded-Host", "tdd.md"); | |
| 322 | + headers.set("X-Forwarded-Proto", "https"); | |
| 323 | + headers.set("X-Forwarded-For", req.headers.get("cf-connecting-ip") ?? "0.0.0.0"); | |
| 324 | + | |
| 325 | + let body: ArrayBuffer | undefined; | |
| 326 | + if (req.method !== "GET" && req.method !== "HEAD") { | |
| 327 | + body = await req.arrayBuffer(); | |
| 328 | + } | |
| 329 | + | |
| 330 | + const upstreamRes = await fetch(upstream, { | |
| 331 | + method: req.method, | |
| 332 | + headers, | |
| 333 | + body, | |
| 334 | + redirect: "manual", | |
| 335 | + }); | |
| 336 | + | |
| 337 | + const responseHeaders = new Headers(upstreamRes.headers); | |
| 338 | + for (const h of HOP_BY_HOP) responseHeaders.delete(h); | |
| 339 | + | |
| 340 | + return new Response(upstreamRes.body, { | |
| 341 | + status: upstreamRes.status, | |
| 342 | + statusText: upstreamRes.statusText, | |
| 343 | + headers: responseHeaders, | |
| 344 | + }); | |
| 345 | +}; | |
src/c14_github.ts
+122
−0
| @@ -0,0 +1,122 @@ | ||
| 1 | +// c14 — secondary I/O: HTTP clients to GitHub. Two concerns under one roof: | |
| 2 | +// 1. OAuth flow for sign-in (used by /auth/github/start + callback). | |
| 3 | +// 2. Raw-content fetch of `.tdd-md.json` from a public repo's default | |
| 4 | +// branch, for project onboarding. | |
| 5 | +// Both talk to GitHub; both are pure HTTP, no in-process state. | |
| 6 | + | |
| 7 | +import { | |
| 8 | + PROJECT_CONFIG_PATH, | |
| 9 | + parseProjectConfig, | |
| 10 | + type ProjectConfig, | |
| 11 | +} from "./c31_project_config.ts"; | |
| 12 | + | |
| 13 | +const CLIENT_ID = process.env.GITHUB_CLIENT_ID ?? ""; | |
| 14 | +const CLIENT_SECRET = process.env.GITHUB_CLIENT_SECRET ?? ""; | |
| 15 | + | |
| 16 | +export interface GithubUser { | |
| 17 | + login: string; | |
| 18 | + id: number; | |
| 19 | + email: string | null; | |
| 20 | + avatar_url: string; | |
| 21 | + name: string | null; | |
| 22 | +} | |
| 23 | + | |
| 24 | +export interface GithubEmail { | |
| 25 | + email: string; | |
| 26 | + primary: boolean; | |
| 27 | + verified: boolean; | |
| 28 | + visibility: string | null; | |
| 29 | +} | |
| 30 | + | |
| 31 | +export const isConfigured = (): boolean => CLIENT_ID !== "" && CLIENT_SECRET !== ""; | |
| 32 | + | |
| 33 | +export const authorizeUrl = (state: string, redirectUri: string): string => { | |
| 34 | + const params = new URLSearchParams({ | |
| 35 | + client_id: CLIENT_ID, | |
| 36 | + redirect_uri: redirectUri, | |
| 37 | + scope: "read:user user:email", | |
| 38 | + state, | |
| 39 | + allow_signup: "true", | |
| 40 | + }); | |
| 41 | + return `https://github.com/login/oauth/authorize?${params}`; | |
| 42 | +}; | |
| 43 | + | |
| 44 | +export const exchangeCode = async (code: string, redirectUri: string): Promise<string> => { | |
| 45 | + const res = await fetch("https://github.com/login/oauth/access_token", { | |
| 46 | + method: "POST", | |
| 47 | + headers: { | |
| 48 | + Accept: "application/json", | |
| 49 | + "Content-Type": "application/json", | |
| 50 | + }, | |
| 51 | + body: JSON.stringify({ | |
| 52 | + client_id: CLIENT_ID, | |
| 53 | + client_secret: CLIENT_SECRET, | |
| 54 | + code, | |
| 55 | + redirect_uri: redirectUri, | |
| 56 | + }), | |
| 57 | + }); | |
| 58 | + if (!res.ok) { | |
| 59 | + throw new Error(`github token exchange failed: ${res.status}`); | |
| 60 | + } | |
| 61 | + const data = (await res.json()) as { access_token?: string; error?: string; error_description?: string }; | |
| 62 | + if (!data.access_token) { | |
| 63 | + throw new Error(`github token exchange returned no token: ${data.error_description ?? data.error ?? "unknown"}`); | |
| 64 | + } | |
| 65 | + return data.access_token; | |
| 66 | +}; | |
| 67 | + | |
| 68 | +export const fetchUser = async (accessToken: string): Promise<GithubUser> => { | |
| 69 | + const res = await fetch("https://api.github.com/user", { | |
| 70 | + headers: { | |
| 71 | + Authorization: `token ${accessToken}`, | |
| 72 | + Accept: "application/vnd.github+json", | |
| 73 | + "User-Agent": "tdd.md", | |
| 74 | + }, | |
| 75 | + }); | |
| 76 | + if (!res.ok) throw new Error(`github user fetch failed: ${res.status}`); | |
| 77 | + return (await res.json()) as GithubUser; | |
| 78 | +}; | |
| 79 | + | |
| 80 | +export const fetchPrimaryEmail = async (accessToken: string): Promise<string | null> => { | |
| 81 | + const res = await fetch("https://api.github.com/user/emails", { | |
| 82 | + headers: { | |
| 83 | + Authorization: `token ${accessToken}`, | |
| 84 | + Accept: "application/vnd.github+json", | |
| 85 | + "User-Agent": "tdd.md", | |
| 86 | + }, | |
| 87 | + }); | |
| 88 | + if (!res.ok) return null; | |
| 89 | + const emails = (await res.json()) as GithubEmail[]; | |
| 90 | + const verified = emails.filter((e) => e.verified); | |
| 91 | + return verified.find((e) => e.primary)?.email ?? verified[0]?.email ?? null; | |
| 92 | +}; | |
| 93 | + | |
| 94 | +// Pulls .tdd-md.json from a public GitHub repo's default branch via the | |
| 95 | +// raw-content host. No auth — public-repo only for now (private repos | |
| 96 | +// land when we install a GitHub App, deferred to a later sliver). | |
| 97 | +export const fetchProjectConfig = async ( | |
| 98 | + repoOwner: string, | |
| 99 | + repoName: string, | |
| 100 | +): Promise<ProjectConfig> => { | |
| 101 | + const url = `https://raw.githubusercontent.com/${encodeURIComponent(repoOwner)}/${encodeURIComponent(repoName)}/HEAD/${PROJECT_CONFIG_PATH}`; | |
| 102 | + const res = await fetch(url, { | |
| 103 | + headers: { Accept: "application/json", "User-Agent": "tdd.md" }, | |
| 104 | + }); | |
| 105 | + if (res.status === 404) { | |
| 106 | + throw new Error( | |
| 107 | + `${PROJECT_CONFIG_PATH} not found in ${repoOwner}/${repoName} on the default branch (or the repo is private; private repos aren't supported yet).`, | |
| 108 | + ); | |
| 109 | + } | |
| 110 | + if (!res.ok) { | |
| 111 | + throw new Error( | |
| 112 | + `Couldn't fetch ${PROJECT_CONFIG_PATH} from ${repoOwner}/${repoName}: HTTP ${res.status}`, | |
| 113 | + ); | |
| 114 | + } | |
| 115 | + let parsed: unknown; | |
| 116 | + try { | |
| 117 | + parsed = await res.json(); | |
| 118 | + } catch { | |
| 119 | + throw new Error(`${PROJECT_CONFIG_PATH} in ${repoOwner}/${repoName} isn't valid JSON`); | |
| 120 | + } | |
| 121 | + return parseProjectConfig(parsed); | |
| 122 | +}; | |
src/c21_app.ts
+1176
−0
| @@ -0,0 +1,1176 @@ | ||
| 1 | +// c21 — handlers: the route table + fallback fetch. Composes the lower | |
| 2 | +// layers (c13 db, c14 secondary I/O, c31 models, c32 logic, c51 render) | |
| 3 | +// into the HTTP surface served by Bun.serve in c11_server. | |
| 4 | + | |
| 5 | +import { | |
| 6 | + renderPage, | |
| 7 | + renderNotFound, | |
| 8 | + htmlResponse, | |
| 9 | + errorPage, | |
| 10 | + phaseSpan, | |
| 11 | + relativeTime, | |
| 12 | + reportsLandingMd, | |
| 13 | + execSummaryMd, | |
| 14 | + agentDrilldownMd, | |
| 15 | + testsOverviewMd, | |
| 16 | + projectsLandingMd, | |
| 17 | + projectRegisterMd, | |
| 18 | + projectDetailMd, | |
| 19 | +} from "./c51_render.ts"; | |
| 20 | +import * as github from "./c14_github.ts"; | |
| 21 | +import * as forgejo from "./c14_forgejo.ts"; | |
| 22 | +import { | |
| 23 | + FORGEJO_URL, | |
| 24 | + adminApiHeaders, | |
| 25 | + getUserVisibility, | |
| 26 | + proxyToForgejo, | |
| 27 | + type ForgejoUserSummary, | |
| 28 | +} from "./c14_forgejo.ts"; | |
| 29 | +import { parseCommit, computeProgress } from "./c31_commits.ts"; | |
| 30 | +import { loadGame, listGames } from "./c31_games.ts"; | |
| 31 | +import { ALL_POSTS } from "./c31_blog.ts"; | |
| 32 | +import { ALL_GUIDES } from "./c31_guides.ts"; | |
| 33 | +import { DEMO_REPORTS } from "./c31_reports_demo.ts"; | |
| 34 | +import { parseRepoIdentifier } from "./c31_project_config.ts"; | |
| 35 | +import { fetchProjectConfig } from "./c14_github.ts"; | |
| 36 | +import { judge } from "./c32_judge.ts"; | |
| 37 | +import { | |
| 38 | + SESSION_TTL_SEC, | |
| 39 | + getViewer, | |
| 40 | + randomHex, | |
| 41 | + parseCookies, | |
| 42 | + signSession, | |
| 43 | + sessionCookieHeader, | |
| 44 | + timingSafeEqual, | |
| 45 | + hmacSha256Hex, | |
| 46 | +} from "./c32_session.ts"; | |
| 47 | +import { | |
| 48 | + latestRun, | |
| 49 | + allLatestRuns, | |
| 50 | + listActiveProjects, | |
| 51 | + getProject, | |
| 52 | + upsertProject, | |
| 53 | +} from "./c13_database.ts"; | |
| 54 | + | |
| 55 | +const HOME_MD = "./content/home.md"; | |
| 56 | +const GAME_DIR = "./content/games"; | |
| 57 | + | |
| 58 | +const BASE_URL = process.env.BASE_URL ?? "https://tdd.md"; | |
| 59 | +const CALLBACK_URL = `${BASE_URL}/auth/github/callback`; | |
| 60 | + | |
| 61 | +const HOME_DESCRIPTION = | |
| 62 | + "Test-driven development for agentic coding. Your AI agent practices on scored katas; the judge replays its commits against hidden tests and posts a public verdict on the discipline."; | |
| 63 | + | |
| 64 | +const homeBody = await Bun.file(HOME_MD).text(); | |
| 65 | +const HOME_HTML = await renderPage({ | |
| 66 | + title: "tdd.md — TDD for agentic coding", | |
| 67 | + description: HOME_DESCRIPTION, | |
| 68 | + bodyMarkdown: homeBody, | |
| 69 | + active: "home", | |
| 70 | + jsonLd: { | |
| 71 | + "@context": "https://schema.org", | |
| 72 | + "@type": "WebSite", | |
| 73 | + name: "tdd.md", | |
| 74 | + url: "https://tdd.md", | |
| 75 | + description: HOME_DESCRIPTION, | |
| 76 | + }, | |
| 77 | +}); | |
| 78 | + | |
| 79 | +const ALL_GAMES = await listGames(); | |
| 80 | + | |
| 81 | +const gamesIndexBody = `# games | |
| 82 | + | |
| 83 | +${ALL_GAMES.length === 0 | |
| 84 | + ? "_No katas registered yet._" | |
| 85 | + : `| kata | description | steps |\n|---|---|---|\n${ALL_GAMES.map( | |
| 86 | + (g) => `| [${g.id}](/games/${g.id}) | ${g.description} | ${g.steps.length} |`, | |
| 87 | + ).join("\n")}` | |
| 88 | +} | |
| 89 | + | |
| 90 | +> Ready to play? [Register your agent →](/agents/register) | |
| 91 | +> Using a specific agent? See the [agent-specific guides](/guides) — Claude Code, Cursor, Aider. | |
| 92 | +`; | |
| 93 | + | |
| 94 | +const GAMES_INDEX_HTML = await renderPage({ | |
| 95 | + title: "TDD katas — tdd.md", | |
| 96 | + description: | |
| 97 | + "Browse the TDD katas. Pick a challenge, push red→green→refactor commits, and earn a public verdict graded against hidden tests.", | |
| 98 | + bodyMarkdown: gamesIndexBody, | |
| 99 | + ogPath: "https://tdd.md/games", | |
| 100 | + active: "games", | |
| 101 | +}); | |
| 102 | + | |
| 103 | +const renderKata = async (kata: string): Promise<Response | null> => { | |
| 104 | + const file = Bun.file(`${GAME_DIR}/${kata}/spec.md`); | |
| 105 | + if (!(await file.exists())) return null; | |
| 106 | + const md = await file.text(); | |
| 107 | + // Pull the kata's own description from spec.ts when available — it's | |
| 108 | + // the canonical short copy (rendered on /games + sitemap previews). | |
| 109 | + let description: string | undefined; | |
| 110 | + try { | |
| 111 | + const game = await loadGame(kata); | |
| 112 | + description = game.description; | |
| 113 | + } catch { | |
| 114 | + // unknown kata; use the site default | |
| 115 | + } | |
| 116 | + const html = await renderPage({ | |
| 117 | + title: `${kata} TDD kata — tdd.md`, | |
| 118 | + description, | |
| 119 | + bodyMarkdown: md, | |
| 120 | + ogPath: `https://tdd.md/games/${kata}`, | |
| 121 | + active: "games", | |
| 122 | + }); | |
| 123 | + return new Response(html, { headers: { "Content-Type": "text/html; charset=utf-8" } }); | |
| 124 | +}; | |
| 125 | + | |
| 126 | +const renderAgentsIndex = async (): Promise<Response> => { | |
| 127 | + let users: ForgejoUserSummary[] = []; | |
| 128 | + const adminToken = process.env.FORGEJO_ADMIN_TOKEN; | |
| 129 | + if (adminToken) { | |
| 130 | + const r = await fetch(`${FORGEJO_URL}/api/v1/admin/users?limit=200`, { | |
| 131 | + headers: adminApiHeaders(), | |
| 132 | + }); | |
| 133 | + if (r.ok) users = (await r.json()) as ForgejoUserSummary[]; | |
| 134 | + } | |
| 135 | + // Drop the admin (id 1) and anyone whose visibility isn't "public" — | |
| 136 | + // private and limited agents stay invisible on the public index. | |
| 137 | + const agents = users.filter( | |
| 138 | + (u) => u.id !== 1 && !u.is_admin && (u.visibility ?? "public") === "public", | |
| 139 | + ); | |
| 140 | + | |
| 141 | + // Per-agent score totals from the latest run per repo. | |
| 142 | + const allRuns = allLatestRuns(); | |
| 143 | + const totalsByOwner = new Map<string, { score: number; runs: number }>(); | |
| 144 | + for (const r of allRuns) { | |
| 145 | + const t = totalsByOwner.get(r.owner) ?? { score: 0, runs: 0 }; | |
| 146 | + t.score += r.verdict.totalScore; | |
| 147 | + t.runs += 1; | |
| 148 | + totalsByOwner.set(r.owner, t); | |
| 149 | + } | |
| 150 | + | |
| 151 | + let body: string; | |
| 152 | + if (agents.length === 0) { | |
| 153 | + body = `# agents | |
| 154 | + | |
| 155 | +> No agents registered yet. Be the first. | |
| 156 | + | |
| 157 | +[ Register your agent → ](/agents/register) | |
| 158 | +`; | |
| 159 | + } else { | |
| 160 | + const rows = agents | |
| 161 | + .map((u) => { | |
| 162 | + const t = totalsByOwner.get(u.login) ?? { score: 0, runs: 0 }; | |
| 163 | + const sign = t.score >= 0 ? "+" : ""; | |
| 164 | + return `| [${u.login}](/agents/${u.login}) | ${t.runs} | ${sign}${t.score} |`; | |
| 165 | + }) | |
| 166 | + .join("\n"); | |
| 167 | + body = `# agents | |
| 168 | + | |
| 169 | +| agent | attempts | total score | | |
| 170 | +|---|---|---| | |
| 171 | +${rows} | |
| 172 | + | |
| 173 | +[ Register your agent → ](/agents/register) | |
| 174 | +`; | |
| 175 | + } | |
| 176 | + | |
| 177 | + const description = | |
| 178 | + agents.length === 0 | |
| 179 | + ? "AI agents doing test-driven development on tdd.md — registration is open, sign in with GitHub to play." | |
| 180 | + : `${agents.length} AI ${agents.length === 1 ? "agent" : "agents"} doing test-driven development on tdd.md, scored on red→green discipline against hidden tests for agentic coding.`; | |
| 181 | + | |
| 182 | + const html = await renderPage({ | |
| 183 | + title: "AI agents on tdd.md", | |
| 184 | + description, | |
| 185 | + bodyMarkdown: body, | |
| 186 | + ogPath: "https://tdd.md/agents", | |
| 187 | + active: "agents", | |
| 188 | + }); | |
| 189 | + return htmlResponse(html); | |
| 190 | +}; | |
| 191 | + | |
| 192 | +const renderLeaderboard = async (): Promise<Response> => { | |
| 193 | + // Only show runs whose owner is public. Fetch the user list once | |
| 194 | + // and build a Set so we can filter without N+1 lookups. | |
| 195 | + const adminToken = process.env.FORGEJO_ADMIN_TOKEN; | |
| 196 | + const publicOwners = new Set<string>(); | |
| 197 | + if (adminToken) { | |
| 198 | + const r = await fetch(`${FORGEJO_URL}/api/v1/admin/users?limit=200`, { | |
| 199 | + headers: adminApiHeaders(), | |
| 200 | + }); | |
| 201 | + if (r.ok) { | |
| 202 | + const users = (await r.json()) as ForgejoUserSummary[]; | |
| 203 | + for (const u of users) { | |
| 204 | + if ((u.visibility ?? "public") === "public") publicOwners.add(u.login); | |
| 205 | + } | |
| 206 | + } | |
| 207 | + } | |
| 208 | + const runs = allLatestRuns() | |
| 209 | + .filter((r) => publicOwners.size === 0 || publicOwners.has(r.owner)) | |
| 210 | + .sort((a, b) => b.verdict.totalScore - a.verdict.totalScore); | |
| 211 | + let body: string; | |
| 212 | + if (runs.length === 0) { | |
| 213 | + body = `# leaderboard | |
| 214 | + | |
| 215 | +> No verdicts yet. The first agent to push a red→green pair lands here. | |
| 216 | + | |
| 217 | +[ Register your agent → ](/agents/register) | |
| 218 | +`; | |
| 219 | + } else { | |
| 220 | + const rows = runs | |
| 221 | + .map((r, i) => { | |
| 222 | + const sign = r.verdict.totalScore >= 0 ? "+" : ""; | |
| 223 | + const verified = r.verdict.steps.filter((s) => s.status === "verified").length; | |
| 224 | + return `| ${i + 1} | [${r.owner}](/agents/${r.owner}) | [${r.repo}](/${r.owner}/${r.repo}) | ${sign}${r.verdict.totalScore} | ${verified} |`; | |
| 225 | + }) | |
| 226 | + .join("\n"); | |
| 227 | + body = `# leaderboard | |
| 228 | + | |
| 229 | +| rank | agent | kata | score | verified steps | | |
| 230 | +|---|---|---|---|---| | |
| 231 | +${rows} | |
| 232 | +`; | |
| 233 | + } | |
| 234 | + const description = | |
| 235 | + runs.length === 0 | |
| 236 | + ? "TDD leaderboard for AI agents on tdd.md — be the first verdict." | |
| 237 | + : `Top AI agents by TDD score on tdd.md — ${runs.length} ranked ${runs.length === 1 ? "submission" : "submissions"} graded on red→green discipline and hidden test pass rate.`; | |
| 238 | + | |
| 239 | + const html = await renderPage({ | |
| 240 | + title: "TDD leaderboard — tdd.md", | |
| 241 | + description, | |
| 242 | + bodyMarkdown: body, | |
| 243 | + ogPath: "https://tdd.md/leaderboard", | |
| 244 | + active: "leaderboard", | |
| 245 | + }); | |
| 246 | + return htmlResponse(html); | |
| 247 | +}; | |
| 248 | + | |
| 249 | +const REGISTER_BODY = `# register | |
| 250 | + | |
| 251 | +> Sign in with GitHub to create your tdd.md agent. | |
| 252 | + | |
| 253 | +## what we ask GitHub for | |
| 254 | +- your username | |
| 255 | +- your primary verified email | |
| 256 | + | |
| 257 | +That's it — no repo access, no anything else. | |
| 258 | + | |
| 259 | +## what you get | |
| 260 | +- a public agent account at \`git.tdd.md/<your-github-name>\` | |
| 261 | +- a push token (shown once) | |
| 262 | +- an empty repo for the first kata, ready to push to | |
| 263 | + | |
| 264 | +[ sign in with github → ](/auth/github/start) | |
| 265 | +`; | |
| 266 | + | |
| 267 | +const REGISTER_HTML = await renderPage({ | |
| 268 | + title: "Register your AI agent — tdd.md", | |
| 269 | + description: | |
| 270 | + "Sign in with GitHub to register your AI agent on tdd.md and start solving TDD katas. Public-signup, verified-identity, no extra forms.", | |
| 271 | + bodyMarkdown: REGISTER_BODY, | |
| 272 | + ogPath: "https://tdd.md/agents/register", | |
| 273 | + active: "agents", | |
| 274 | + noindex: true, | |
| 275 | +}); | |
| 276 | + | |
| 277 | +interface ForgejoRepoSummary { | |
| 278 | + description: string; | |
| 279 | + clone_url: string; | |
| 280 | + empty: boolean; | |
| 281 | + private: boolean; | |
| 282 | +} | |
| 283 | + | |
| 284 | +interface ForgejoCommit { | |
| 285 | + sha: string; | |
| 286 | + commit: { message: string; author: { name: string; date: string } }; | |
| 287 | +} | |
| 288 | + | |
| 289 | +const renderRepoView = async ( | |
| 290 | + owner: string, | |
| 291 | + repo: string, | |
| 292 | + viewer: string | null, | |
| 293 | +): Promise<Response> => { | |
| 294 | + // Private/limited owners get a 404 to anonymous visitors — but the | |
| 295 | + // owner themselves (verified via session cookie) can always see | |
| 296 | + // their own pages. | |
| 297 | + const ownerVisibility = await getUserVisibility(owner); | |
| 298 | + if (ownerVisibility !== null && ownerVisibility !== "public" && viewer !== owner) { | |
| 299 | + const html = await renderNotFound(`/${owner}/${repo}`); | |
| 300 | + return htmlResponse(html, 404); | |
| 301 | + } | |
| 302 | + | |
| 303 | + const repoApi = `${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}`; | |
| 304 | + const repoRes = await fetch(repoApi, { headers: adminApiHeaders() }); | |
| 305 | + if (repoRes.status === 404) { | |
| 306 | + const html = await renderNotFound(`/${owner}/${repo}`); | |
| 307 | + return htmlResponse(html, 404); | |
| 308 | + } | |
| 309 | + if (!repoRes.ok) { | |
| 310 | + const html = await renderPage({ | |
| 311 | + title: `${owner}/${repo} — tdd.md`, | |
| 312 | + bodyMarkdown: `# ${owner}/${repo}\n\n> repository unavailable`, | |
| 313 | + }); | |
| 314 | + return htmlResponse(html, 502); | |
| 315 | + } | |
| 316 | + const info = (await repoRes.json()) as ForgejoRepoSummary; | |
| 317 | + const cloneUrl = info.clone_url || `https://tdd.md/${owner}/${repo}.git`; | |
| 318 | + const isPrivate = info.private === true; | |
| 319 | + | |
| 320 | + // The repo name is by convention the kata id. If the kata exists, the | |
| 321 | + // header link is meaningful and we know the total step count. | |
| 322 | + let totalSteps: number | null = null; | |
| 323 | + let kataExists = false; | |
| 324 | + try { | |
| 325 | + const game = await loadGame(repo); | |
| 326 | + totalSteps = game.steps.length; | |
| 327 | + kataExists = true; | |
| 328 | + } catch { | |
| 329 | + // Repo isn't a known kata — still render, just without step totals. | |
| 330 | + } | |
| 331 | + | |
| 332 | + let commits: ForgejoCommit[] = []; | |
| 333 | + if (!info.empty) { | |
| 334 | + const commitsRes = await fetch(`${repoApi}/commits?limit=50&stat=false`, { | |
| 335 | + headers: adminApiHeaders(), | |
| 336 | + }); | |
| 337 | + if (commitsRes.ok) commits = (await commitsRes.json()) as ForgejoCommit[]; | |
| 338 | + } | |
| 339 | + const progress = computeProgress(commits); | |
| 340 | + const verified = progress.verifiedSteps.size; | |
| 341 | + | |
| 342 | + let status: string; | |
| 343 | + if (commits.length === 0) { | |
| 344 | + status = "awaiting first push"; | |
| 345 | + } else if (totalSteps !== null && verified >= totalSteps) { | |
| 346 | + status = "kata complete"; | |
| 347 | + } else if (verified > 0) { | |
| 348 | + status = "in progress"; | |
| 349 | + } else { | |
| 350 | + status = "no verified steps yet"; | |
| 351 | + } | |
| 352 | + const stepCounter = totalSteps !== null ? `${verified} / ${totalSteps}` : `${verified} / ?`; | |
| 353 | + | |
| 354 | + let phaseLog: string; | |
| 355 | + if (commits.length === 0) { | |
| 356 | + phaseLog = "_No commits yet — push your first `red:` commit to start the cycle._"; | |
| 357 | + } else { | |
| 358 | + const rows = commits.map((c) => { | |
| 359 | + const sha = c.sha.slice(0, 7); | |
| 360 | + const p = parseCommit(c.commit.message); | |
| 361 | + const subject = (p.subject || c.commit.message.split("\n")[0] || "").replace(/\|/g, "\\|"); | |
| 362 | + const stepCell = p.step ? `\`${p.step}\`` : "—"; | |
| 363 | + return `| \`${sha}\` | ${phaseSpan(p.phase)} | ${stepCell} | ${subject} | ${relativeTime(c.commit.author.date)} |`; | |
| 364 | + }); | |
| 365 | + phaseLog = `| sha | phase | step | message | when |\n|---|---|---|---|---|\n${rows.join("\n")}`; | |
| 366 | + } | |
| 367 | + | |
| 368 | + const kataLink = kataExists | |
| 369 | + ? `[\`${repo}\` →](/games/${repo})` | |
| 370 | + : `\`${repo}\``; | |
| 371 | + const privateBadge = isPrivate ? ` <span class="muted">[private]</span>` : ""; | |
| 372 | + | |
| 373 | + const verdict = latestRun(owner, repo); | |
| 374 | + const headSha = commits[0]?.sha ?? null; | |
| 375 | + const verdictStale = verdict !== null && headSha !== null && verdict.headSha !== headSha; | |
| 376 | + | |
| 377 | + let scoreSection: string; | |
| 378 | + if (verdict === null) { | |
| 379 | + scoreSection = `> Not yet judged. The next push triggers a judge run, or [run the judge now](/api/judge/${owner}/${repo}) (POST).\n\nPhase tally: <span class="red">red ${progress.redCount}</span> · <span class="green">green ${progress.greenCount}</span> · <span class="blue">refactor ${progress.refactorCount}</span>${progress.untaggedCount > 0 ? ` · <span class="muted">untagged ${progress.untaggedCount}</span>` : ""}.`; | |
| 380 | + } else { | |
| 381 | + const stale = verdictStale ? ` · <span class="muted">stale — newer commits not yet judged</span>` : ""; | |
| 382 | + const sign = verdict.totalScore >= 0 ? "+" : ""; | |
| 383 | + const statusClass = (status: string): string => { | |
| 384 | + if (status === "verified") return "green"; | |
| 385 | + if (status === "discipline-only") return "blue"; | |
| 386 | + if (status === "no-green") return "muted"; | |
| 387 | + return "red"; | |
| 388 | + }; | |
| 389 | + const modeLabel = (m: string): string => { | |
| 390 | + const cls = m === "strict" ? "red" : m === "pragmatic" ? "blue" : "green"; | |
| 391 | + return `<span class="${cls}">${m}</span>`; | |
| 392 | + }; | |
| 393 | + const rows = verdict.steps.length === 0 | |
| 394 | + ? "_No red→green pairs found yet._" | |
| 395 | + : `| step | red | green | hidden | status | points | explanation |\n|---|---|---|---|---|---|---|\n` + | |
| 396 | + verdict.steps.map((s) => { | |
| 397 | + const cls = statusClass(s.status); | |
| 398 | + const sign = s.scoreDelta >= 0 ? "+" : ""; | |
| 399 | + const hiddenCell = | |
| 400 | + s.hiddenPassed === true ? `<span class="green">pass</span>` : | |
| 401 | + s.hiddenPassed === false ? `<span class="red">fail</span>` : | |
| 402 | + `<span class="muted">—</span>`; | |
| 403 | + const explanation = (s.explanation ?? "").replace(/\|/g, "\\|"); | |
| 404 | + return `| \`${s.stepId}\` | \`${s.redSha?.slice(0, 7) ?? "—"}\` | \`${s.greenSha?.slice(0, 7) ?? "—"}\` | ${hiddenCell} | <span class="${cls}">${s.status}</span> | ${sign}${s.scoreDelta} | ${explanation} |`; | |
| 405 | + }).join("\n"); | |
| 406 | + const refactorRows = (verdict.refactors ?? []).length === 0 | |
| 407 | + ? "" | |
| 408 | + : `\n\n### refactors\n\n| sha | step | tests | points | explanation |\n|---|---|---|---|---|\n` + | |
| 409 | + verdict.refactors.map((r) => { | |
| 410 | + const sign = r.scoreDelta >= 0 ? "+" : ""; | |
| 411 | + const cls = r.testsPassed ? "green" : "red"; | |
| 412 | + const verb = r.testsPassed ? "green" : "broke tests"; | |
| 413 | + const explanation = (r.explanation ?? "").replace(/\|/g, "\\|"); | |
| 414 | + return `| \`${r.sha.slice(0, 7)}\` | ${r.stepId ? `\`${r.stepId}\`` : "—"} | <span class="${cls}">${verb}</span> | ${sign}${r.scoreDelta} | ${explanation} |`; | |
| 415 | + }).join("\n"); | |
| 416 | + const modeLine = verdict.mode ? `**mode: ${modeLabel(verdict.mode)}** · ` : ""; | |
| 417 | + scoreSection = `${modeLine}**total: ${sign}${verdict.totalScore}** · judged ${relativeTime(new Date(verdict.judgedAt).toISOString())}${stale}\n\n${rows}${refactorRows}`; | |
| 418 | + } | |
| 419 | + | |
| 420 | + const body = `# ${owner} · playing ${kataLink}${privateBadge} | |
| 421 | + | |
| 422 | +> ${status} | |
| 423 | +> **${stepCounter}** steps verified | |
| 424 | + | |
| 425 | +## phase log | |
| 426 | + | |
| 427 | +${phaseLog} | |
| 428 | + | |
| 429 | +## score | |
| 430 | + | |
| 431 | +${scoreSection} | |
| 432 | + | |
| 433 | +## clone | |
| 434 | + | |
| 435 | +\`\`\` | |
| 436 | +git clone ${cloneUrl} | |
| 437 | +\`\`\` | |
| 438 | + | |
| 439 | +[← /agents/${owner}](/agents/${owner})${kataExists ? ` · [kata spec →](/games/${repo})` : ""} | |
| 440 | +`; | |
| 441 | + | |
| 442 | + // Dynamic description tailored to this attempt — gives every agent | |
| 443 | + // run a unique snippet for search results and social previews instead | |
| 444 | + // of falling back to the site default. | |
| 445 | + const totalSnippet = | |
| 446 | + verdict !== null | |
| 447 | + ? `, score ${verdict.totalScore >= 0 ? "+" : ""}${verdict.totalScore}` | |
| 448 | + : ""; | |
| 449 | + const description = kataExists | |
| 450 | + ? `${owner}'s ${repo} TDD kata attempt on tdd.md — ${verified}${totalSteps !== null ? `/${totalSteps}` : ""} steps verified${totalSnippet}.` | |
| 451 | + : `${owner}/${repo} on tdd.md — ${commits.length} ${commits.length === 1 ? "commit" : "commits"} in the phase log${totalSnippet}.`; | |
| 452 | + | |
| 453 | + const html = await renderPage({ | |
| 454 | + title: `${owner} · ${repo}${kataExists ? " TDD kata" : ""} — tdd.md`, | |
| 455 | + description, | |
| 456 | + bodyMarkdown: body, | |
| 457 | + ogPath: `https://tdd.md/${owner}/${repo}`, | |
| 458 | + active: "agents", | |
| 459 | + }); | |
| 460 | + return htmlResponse(html); | |
| 461 | +}; | |
| 462 | + | |
| 463 | +const isGitProtocol = (pathname: string, search: URLSearchParams): boolean => { | |
| 464 | + if (pathname.includes(".git/") || pathname.endsWith(".git")) return true; | |
| 465 | + if ( | |
| 466 | + pathname.endsWith("/info/refs") && | |
| 467 | + (search.get("service") === "git-upload-pack" || search.get("service") === "git-receive-pack") | |
| 468 | + ) { | |
| 469 | + return true; | |
| 470 | + } | |
| 471 | + if (pathname.endsWith("/git-upload-pack") || pathname.endsWith("/git-receive-pack")) { | |
| 472 | + return true; | |
| 473 | + } | |
| 474 | + return false; | |
| 475 | +}; | |
| 476 | + | |
| 477 | +// Fallback handler — git-protocol proxy, bare-repo /:owner/:repo view, | |
| 478 | +// and /:owner/:repo.git redirects. Mounted as `fetch` on Bun.serve. | |
| 479 | +const appFetch = async (req: Request): Promise<Response> => { | |
| 480 | + const url = new URL(req.url); | |
| 481 | + | |
| 482 | + // Bare /<owner>/<repo>.git (no sub-path) is what someone gets when | |
| 483 | + // they paste the clone URL into a browser. Without intervention our | |
| 484 | + // proxy hands it to Forgejo, which renders its own repo page — | |
| 485 | + // Forgejo's chrome leaks onto tdd.md. Redirect to the clean URL | |
| 486 | + // so the visitor lands on our Bun-native scoreboard instead. Real | |
| 487 | + // git operations always have sub-paths (/info/refs, /git-upload-pack, | |
| 488 | + // /objects/...) and continue to be proxied below. | |
| 489 | + const bareGitUrl = url.pathname.match( | |
| 490 | + /^\/([A-Za-z0-9][A-Za-z0-9-]*)\/([A-Za-z0-9][A-Za-z0-9._-]*)\.git\/?$/, | |
| 491 | + ); | |
| 492 | + if (bareGitUrl) { | |
| 493 | + return new Response(null, { | |
| 494 | + status: 302, | |
| 495 | + headers: { Location: `/${bareGitUrl[1]}/${bareGitUrl[2]}` }, | |
| 496 | + }); | |
| 497 | + } | |
| 498 | + | |
| 499 | + // Git smart-HTTP and dumb-HTTP — proxy raw to Forgejo. | |
| 500 | + if (isGitProtocol(url.pathname, url.searchParams)) { | |
| 501 | + return proxyToForgejo(req, url.pathname + url.search); | |
| 502 | + } | |
| 503 | + | |
| 504 | + // Bare repo URL: /<owner>/<repo> — render Bun-native view via Forgejo API. | |
| 505 | + // Two segments only, no trailing path. Reserved top-level paths are | |
| 506 | + // already matched by explicit routes above, so they never reach here. | |
| 507 | + const repoMatch = url.pathname.match(/^\/([A-Za-z0-9][A-Za-z0-9-]*)\/([A-Za-z0-9][A-Za-z0-9._-]*)\/?$/); | |
| 508 | + if (repoMatch) { | |
| 509 | + const viewer = await getViewer(req); | |
| 510 | + return renderRepoView(repoMatch[1]!, repoMatch[2]!, viewer); | |
| 511 | + } | |
| 512 | + | |
| 513 | + const html = await renderNotFound(url.pathname); | |
| 514 | + return htmlResponse(html, 404); | |
| 515 | +}; | |
| 516 | + | |
| 517 | +const appError = (err: Error): Response => { | |
| 518 | + console.error(err); | |
| 519 | + return new Response("internal error", { status: 500 }); | |
| 520 | +}; | |
| 521 | + | |
| 522 | +// --------------------------------------------------------------------- | |
| 523 | +// App factory — c11 calls createApp(port) to start the server. The | |
| 524 | +// routes literal stays inline here so Bun's path-parameter inference | |
| 525 | +// (`:slug` → `req.params.slug`) flows through to the handler types. | |
| 526 | +// --------------------------------------------------------------------- | |
| 527 | + | |
| 528 | +export const createApp = (port: number) => Bun.serve({ | |
| 529 | + port, | |
| 530 | + error: appError, | |
| 531 | + fetch: appFetch, | |
| 532 | + routes: { | |
| 533 | + "/": htmlResponse(HOME_HTML), | |
| 534 | + "/raw": new Response(Bun.file(HOME_MD), { | |
| 535 | + headers: { "Content-Type": "text/markdown; charset=utf-8" }, | |
| 536 | + }), | |
| 537 | + "/healthz": new Response("ok"), | |
| 538 | + | |
| 539 | + "/robots.txt": new Response( | |
| 540 | + `User-agent: *\nAllow: /\nDisallow: /auth/\nDisallow: /api/\n\nSitemap: https://tdd.md/sitemap.xml\n`, | |
| 541 | + { headers: { "Content-Type": "text/plain; charset=utf-8" } }, | |
| 542 | + ), | |
| 543 | + | |
| 544 | + "/sitemap.xml": async () => { | |
| 545 | + const today = new Date().toISOString().slice(0, 10); | |
| 546 | + const url = (loc: string, priority: string) => | |
| 547 | + `<url><loc>${loc}</loc><lastmod>${today}</lastmod><priority>${priority}</priority></url>`; | |
| 548 | + const kataUrls = ALL_GAMES.map((g) => | |
| 549 | + url(`https://tdd.md/games/${g.id}`, "0.8"), | |
| 550 | + ).join("\n"); | |
| 551 | + const guideUrls = ALL_GUIDES.map((g) => | |
| 552 | + url(`https://tdd.md/guides/${g.slug}`, "0.8"), | |
| 553 | + ).join("\n"); | |
| 554 | + const blogUrls = ALL_POSTS.map((p) => | |
| 555 | + url(`https://tdd.md/blog/${p.slug}`, "0.8"), | |
| 556 | + ).join("\n"); | |
| 557 | + const xml = `<?xml version="1.0" encoding="UTF-8"?> | |
| 558 | +<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> | |
| 559 | +${url("https://tdd.md/", "1.0")} | |
| 560 | +${url("https://tdd.md/games", "0.9")} | |
| 561 | +${kataUrls} | |
| 562 | +${url("https://tdd.md/guides", "0.9")} | |
| 563 | +${guideUrls} | |
| 564 | +${url("https://tdd.md/blog", "0.7")} | |
| 565 | +${blogUrls} | |
| 566 | +${url("https://tdd.md/agents", "0.7")} | |
| 567 | +${url("https://tdd.md/leaderboard", "0.7")} | |
| 568 | +</urlset>`; | |
| 569 | + return new Response(xml, { | |
| 570 | + headers: { "Content-Type": "application/xml; charset=utf-8" }, | |
| 571 | + }); | |
| 572 | + }, | |
| 573 | + | |
| 574 | + "/og.svg": new Response(Bun.file("./public/og.svg"), { | |
| 575 | + headers: { | |
| 576 | + "Content-Type": "image/svg+xml", | |
| 577 | + "Cache-Control": "public, max-age=3600", | |
| 578 | + }, | |
| 579 | + }), | |
| 580 | + | |
| 581 | + "/games": htmlResponse(GAMES_INDEX_HTML), | |
| 582 | + | |
| 583 | + "/blog": async () => { | |
| 584 | + const rows = ALL_POSTS | |
| 585 | + .map((p) => `| ${p.date} | [${p.title}](/blog/${p.slug}) |`) | |
| 586 | + .join("\n"); | |
| 587 | + const body = `# blog | |
| 588 | + | |
| 589 | +Notes on TDD, agentic coding, and the discipline that ties them together. | |
| 590 | + | |
| 591 | +| date | post | | |
| 592 | +|---|---| | |
| 593 | +${rows} | |
| 594 | + | |
| 595 | +> RSS feed coming when there's a second post. | |
| 596 | + | |
| 597 | +[← back to tdd.md](/) · [the guides](/guides) · [the katas](/games) | |
| 598 | +`; | |
| 599 | + const html = await renderPage({ | |
| 600 | + title: "Blog — tdd.md", | |
| 601 | + description: "Posts on test-driven development for AI coding agents — how to apply TDD with Claude Code, Cursor, and Aider, what we learn from the verdicts.", | |
| 602 | + bodyMarkdown: body, | |
| 603 | + ogPath: "https://tdd.md/blog", | |
| 604 | + active: "blog", | |
| 605 | + }); | |
| 606 | + return htmlResponse(html); | |
| 607 | + }, | |
| 608 | + | |
| 609 | + "/blog/:slug": async (req) => { | |
| 610 | + const slug = req.params.slug; | |
| 611 | + const entry = ALL_POSTS.find((p) => p.slug === slug); | |
| 612 | + if (!entry) { | |
| 613 | + const html = await renderNotFound(`/blog/${slug}`); | |
| 614 | + return htmlResponse(html, 404); | |
| 615 | + } | |
| 616 | + const file = Bun.file(`./content/blog/${slug}.md`); | |
| 617 | + if (!(await file.exists())) { | |
| 618 | + const html = await renderNotFound(`/blog/${slug}`); | |
| 619 | + return htmlResponse(html, 404); | |
| 620 | + } | |
| 621 | + const md = await file.text(); | |
| 622 | + const html = await renderPage({ | |
| 623 | + title: `${entry.title} — tdd.md`, | |
| 624 | + description: entry.description, | |
| 625 | + bodyMarkdown: md, | |
| 626 | + ogPath: `https://tdd.md/blog/${slug}`, | |
| 627 | + active: "blog", | |
| 628 | + jsonLd: { | |
| 629 | + "@context": "https://schema.org", | |
| 630 | + "@type": "BlogPosting", | |
| 631 | + headline: entry.title, | |
| 632 | + description: entry.description, | |
| 633 | + datePublished: entry.date, | |
| 634 | + url: `https://tdd.md/blog/${slug}`, | |
| 635 | + author: { "@type": "Organization", name: "tdd.md" }, | |
| 636 | + }, | |
| 637 | + }); | |
| 638 | + return htmlResponse(html); | |
| 639 | + }, | |
| 640 | + | |
| 641 | + "/projects": async () => { | |
| 642 | + const projects = listActiveProjects(); | |
| 643 | + const html = await renderPage({ | |
| 644 | + title: "Projects — tdd.md", | |
| 645 | + description: "Real repos opted in to tdd.md scoring. Each project drops .tdd-md.json at its root and gets its commits judged structurally for TDD discipline.", | |
| 646 | + bodyMarkdown: projectsLandingMd(projects), | |
| 647 | + ogPath: "https://tdd.md/projects", | |
| 648 | + }); | |
| 649 | + return htmlResponse(html); | |
| 650 | + }, | |
| 651 | + | |
| 652 | + "/projects/new": async (req) => { | |
| 653 | + const viewer = await getViewer(req); | |
| 654 | + if (req.method === "GET") { | |
| 655 | + const url = new URL(req.url); | |
| 656 | + const prefilled = url.searchParams.get("repo") ?? undefined; | |
| 657 | + const html = await renderPage({ | |
| 658 | + title: "Register a project — tdd.md", | |
| 659 | + description: "Onboard a real repo for TDD-discipline scoring. Drops .tdd-md.json at the repo root, register here, and the reports begin tracking commits on its tracked branches.", | |
| 660 | + bodyMarkdown: projectRegisterMd(viewer, prefilled), | |
| 661 | + ogPath: "https://tdd.md/projects/new", | |
| 662 | + noindex: true, | |
| 663 | + }); | |
| 664 | + return htmlResponse(html); | |
| 665 | + } | |
| 666 | + if (req.method !== "POST") return new Response("method not allowed", { status: 405 }); | |
| 667 | + if (!viewer) return new Response("unauthorized — sign in first", { status: 401 }); | |
| 668 | + | |
| 669 | + let raw = ""; | |
| 670 | + try { | |
| 671 | + const form = await req.formData(); | |
| 672 | + raw = String(form.get("repo") ?? "").trim(); | |
| 673 | + } catch { | |
| 674 | + return new Response("invalid form body", { status: 400 }); | |
| 675 | + } | |
| 676 | + | |
| 677 | + const renderError = async (message: string, status = 400): Promise<Response> => { | |
| 678 | + const html = await renderPage({ | |
| 679 | + title: "Register a project — tdd.md", | |
| 680 | + bodyMarkdown: projectRegisterMd(viewer, raw, message), | |
| 681 | + ogPath: "https://tdd.md/projects/new", | |
| 682 | + noindex: true, | |
| 683 | + }); | |
| 684 | + return htmlResponse(html, status); | |
| 685 | + }; | |
| 686 | + | |
| 687 | + let owner: string; | |
| 688 | + let repo: string; | |
| 689 | + try { | |
| 690 | + ({ owner, repo } = parseRepoIdentifier(raw)); | |
| 691 | + } catch (err) { | |
| 692 | + return renderError((err as Error).message); | |
| 693 | + } | |
| 694 | + | |
| 695 | + let config; | |
| 696 | + try { | |
| 697 | + config = await fetchProjectConfig(owner, repo); | |
| 698 | + } catch (err) { | |
| 699 | + return renderError((err as Error).message); | |
| 700 | + } | |
| 701 | + | |
| 702 | + upsertProject(viewer, owner, repo, config); | |
| 703 | + return new Response(null, { | |
| 704 | + status: 303, | |
| 705 | + headers: { Location: `/projects/${owner}/${repo}` }, | |
| 706 | + }); | |
| 707 | + }, | |
| 708 | + | |
| 709 | + "/projects/:repoOwner/:repoName": async (req) => { | |
| 710 | + const { repoOwner, repoName } = req.params; | |
| 711 | + const project = getProject(repoOwner, repoName); | |
| 712 | + if (!project) { | |
| 713 | + const html = await renderNotFound(`/projects/${repoOwner}/${repoName}`); | |
| 714 | + return htmlResponse(html, 404); | |
| 715 | + } | |
| 716 | + const html = await renderPage({ | |
| 717 | + title: `${project.displayName ?? `${project.repoOwner}/${project.repoName}`} — tdd.md`, | |
| 718 | + description: `${project.repoOwner}/${project.repoName} on tdd.md — ${project.testRunner === "none" ? "trace-mode" : project.testRunner} judging across ${project.trackedBranches.join(", ")}.`, | |
| 719 | + bodyMarkdown: projectDetailMd(project), | |
| 720 | + ogPath: `https://tdd.md/projects/${project.repoOwner}/${project.repoName}`, | |
| 721 | + }); | |
| 722 | + return htmlResponse(html); | |
| 723 | + }, | |
| 724 | + | |
| 725 | + "/reports": async () => { | |
| 726 | + const html = await renderPage({ | |
| 727 | + title: "Reports — tdd.md", | |
| 728 | + description: "Per-agent TDD-discipline reporting over real project repos: trend, failure-mode breakdown, and an exec summary fit for a quarterly readout.", | |
| 729 | + bodyMarkdown: reportsLandingMd(), | |
| 730 | + ogPath: "https://tdd.md/reports", | |
| 731 | + noindex: true, | |
| 732 | + }); | |
| 733 | + return htmlResponse(html); | |
| 734 | + }, | |
| 735 | + | |
| 736 | + "/reports/demo": async () => { | |
| 737 | + const html = await renderPage({ | |
| 738 | + title: "TDD-discipline rapport · Q1 2026 (demo) — tdd.md", | |
| 739 | + description: "Mockup of the management-level TDD-discipline report — single page, three agents, with trend and narrative.", | |
| 740 | + bodyMarkdown: execSummaryMd(), | |
| 741 | + ogPath: "https://tdd.md/reports/demo", | |
| 742 | + noindex: true, | |
| 743 | + }); | |
| 744 | + return htmlResponse(html); | |
| 745 | + }, | |
| 746 | + | |
| 747 | + "/reports/demo/tests": async () => { | |
| 748 | + const html = await renderPage({ | |
| 749 | + title: "Tests overzicht (demo) — tdd.md", | |
| 750 | + description: "Mockup of the per-test overview: current pass/fail snapshot per repo plus test stability over the quarter.", | |
| 751 | + bodyMarkdown: testsOverviewMd(), | |
| 752 | + ogPath: "https://tdd.md/reports/demo/tests", | |
| 753 | + noindex: true, | |
| 754 | + }); | |
| 755 | + return htmlResponse(html); | |
| 756 | + }, | |
| 757 | + | |
| 758 | + "/reports/demo/agents/:slug": async (req) => { | |
| 759 | + const slug = req.params.slug as (typeof DEMO_REPORTS)[number]["slug"]; | |
| 760 | + const md = agentDrilldownMd(slug); | |
| 761 | + if (!md) { | |
| 762 | + const html = await renderNotFound(`/reports/demo/agents/${slug}`); | |
| 763 | + return htmlResponse(html, 404); | |
| 764 | + } | |
| 765 | + const entry = DEMO_REPORTS.find((r) => r.slug === slug)!; | |
| 766 | + const html = await renderPage({ | |
| 767 | + title: `${entry.name} drill-down (demo) — tdd.md`, | |
| 768 | + description: `Per-agent drill-down mockup for ${entry.name}: trend, failure-mode breakdown, recent flagged commits with coaching links.`, | |
| 769 | + bodyMarkdown: md, | |
| 770 | + ogPath: `https://tdd.md/reports/demo/agents/${slug}`, | |
| 771 | + noindex: true, | |
| 772 | + }); | |
| 773 | + return htmlResponse(html); | |
| 774 | + }, | |
| 775 | + | |
| 776 | + "/guides": async () => { | |
| 777 | + const rows = ALL_GUIDES | |
| 778 | + .map((g) => `| [${g.title}](/guides/${g.slug}) | ${g.description} |`) | |
| 779 | + .join("\n"); | |
| 780 | + const body = `# guides | |
| 781 | + | |
| 782 | +Agent-specific walkthroughs for using tdd.md with the major agentic-coding tools. Each guide covers setup, prompt patterns that keep the agent in TDD, and the common pitfalls that cost score. | |
| 783 | + | |
| 784 | +| guide | what it covers | | |
| 785 | +|---|---| | |
| 786 | +${rows} | |
| 787 | + | |
| 788 | +> Missing your agent? [The mechanics are the same](/) — push commits tagged \`red:\` / \`green:\` / \`refactor:\` to your kata repo. Send a PR with a new guide and we'll list it here. | |
| 789 | + | |
| 790 | +[← play a kata](/games) · [register your agent →](/you) | |
| 791 | +`; | |
| 792 | + const html = await renderPage({ | |
| 793 | + title: "TDD guides for agentic coding tools — tdd.md", | |
| 794 | + description: "Practical TDD walkthroughs for Claude Code, Cursor, Aider and other AI coding agents — keep your agent honest with red→green→refactor commits, scored by tdd.md.", | |
| 795 | + bodyMarkdown: body, | |
| 796 | + ogPath: "https://tdd.md/guides", | |
| 797 | + active: "guides", | |
| 798 | + }); | |
| 799 | + return htmlResponse(html); | |
| 800 | + }, | |
| 801 | + | |
| 802 | + "/guides/:slug": async (req) => { | |
| 803 | + const slug = req.params.slug; | |
| 804 | + const entry = ALL_GUIDES.find((g) => g.slug === slug); | |
| 805 | + if (!entry) { | |
| 806 | + const html = await renderNotFound(`/guides/${slug}`); | |
| 807 | + return htmlResponse(html, 404); | |
| 808 | + } | |
| 809 | + const file = Bun.file(`./content/guides/${slug}.md`); | |
| 810 | + if (!(await file.exists())) { | |
| 811 | + const html = await renderNotFound(`/guides/${slug}`); | |
| 812 | + return htmlResponse(html, 404); | |
| 813 | + } | |
| 814 | + const md = await file.text(); | |
| 815 | + const html = await renderPage({ | |
| 816 | + title: `${entry.title} — tdd.md`, | |
| 817 | + description: entry.description, | |
| 818 | + bodyMarkdown: md, | |
| 819 | + ogPath: `https://tdd.md/guides/${slug}`, | |
| 820 | + active: "guides", | |
| 821 | + }); | |
| 822 | + return htmlResponse(html); | |
| 823 | + }, | |
| 824 | + | |
| 825 | + "/games/:kata": async (req) => { | |
| 826 | + const res = await renderKata(req.params.kata); | |
| 827 | + if (res) return res; | |
| 828 | + const html = await renderNotFound(`/games/${req.params.kata}`); | |
| 829 | + return htmlResponse(html, 404); | |
| 830 | + }, | |
| 831 | + | |
| 832 | + "/agents": () => renderAgentsIndex(), | |
| 833 | + "/agents/register": htmlResponse(REGISTER_HTML), | |
| 834 | + "/agents/:name": async (req) => { | |
| 835 | + const name = req.params.name; | |
| 836 | + const viewer = await getViewer(req); | |
| 837 | + const userRes = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(name)}`, { | |
| 838 | + headers: adminApiHeaders(), | |
| 839 | + }); | |
| 840 | + // Treat private/limited users as if they don't exist publicly — | |
| 841 | + // unless the logged-in viewer IS the owner. Owner can always see | |
| 842 | + // their own dashboard, public or not. | |
| 843 | + if (userRes.ok) { | |
| 844 | + const u = (await userRes.clone().json()) as ForgejoUserSummary; | |
| 845 | + const ownVisibility = u.visibility ?? "public"; | |
| 846 | + if (ownVisibility !== "public" && viewer !== name) { | |
| 847 | + const html = await renderNotFound(`/agents/${name}`); | |
| 848 | + return htmlResponse(html, 404); | |
| 849 | + } | |
| 850 | + } | |
| 851 | + if (userRes.status === 404) { | |
| 852 | + const html = await renderPage({ | |
| 853 | + title: `${name} — agents — tdd.md`, | |
| 854 | + bodyMarkdown: `# agents / ${name}\n\n> No agent registered with this name.\n\n[← all agents](/agents) · [register your own →](/agents/register)`, | |
| 855 | + ogPath: `https://tdd.md/agents/${name}`, | |
| 856 | + active: "agents", | |
| 857 | + }); | |
| 858 | + return htmlResponse(html, 404); | |
| 859 | + } | |
| 860 | + const reposRes = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(name)}/repos?limit=50`, { | |
| 861 | + headers: adminApiHeaders(), | |
| 862 | + }); | |
| 863 | + const repos = reposRes.ok ? ((await reposRes.json()) as { name: string; description: string }[]) : []; | |
| 864 | + | |
| 865 | + const progressByRepo = await Promise.all( | |
| 866 | + repos.map(async (r) => { | |
| 867 | + const cRes = await fetch( | |
| 868 | + `${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(name)}/${encodeURIComponent(r.name)}/commits?limit=50&stat=false`, | |
| 869 | + { headers: adminApiHeaders() }, | |
| 870 | + ); | |
| 871 | + const commits = cRes.ok ? ((await cRes.json()) as { commit: { message: string } }[]) : []; | |
| 872 | + return { repo: r, progress: computeProgress(commits) }; | |
| 873 | + }), | |
| 874 | + ); | |
| 875 | + | |
| 876 | + const totals: Record<string, number> = {}; | |
| 877 | + for (const r of repos) { | |
| 878 | + try { | |
| 879 | + const game = await loadGame(r.name); | |
| 880 | + totals[r.name] = game.steps.length; | |
| 881 | + } catch { | |
| 882 | + // unknown kata, no total | |
| 883 | + } | |
| 884 | + } | |
| 885 | + | |
| 886 | + const isSelf = viewer === name; | |
| 887 | + let body = `# agents / ${name}\n\n`; | |
| 888 | + if (isSelf) { | |
| 889 | + body += `> Welcome back, ${name}. This is your dashboard — only you and admins see it when your profile is private.\n\n`; | |
| 890 | + } | |
| 891 | + if (repos.length === 0) { | |
| 892 | + body += "> Registered, but no kata attempts yet.\n\n[← all agents](/agents)"; | |
| 893 | + } else { | |
| 894 | + body += "## attempts\n\n"; | |
| 895 | + body += "| kata | verified | phases |\n|---|---|---|\n"; | |
| 896 | + for (const { repo: r, progress } of progressByRepo) { | |
| 897 | + const total = totals[r.name]; | |
| 898 | + const verified = progress.verifiedSteps.size; | |
| 899 | + const counter = total !== undefined ? `${verified} / ${total}` : `${verified} / ?`; | |
| 900 | + const phases = `<span class="red">red ${progress.redCount}</span> · <span class="green">green ${progress.greenCount}</span> · <span class="blue">refactor ${progress.refactorCount}</span>`; | |
| 901 | + body += `| [${r.name}](/${name}/${r.name}) | ${counter} | ${phases} |\n`; | |
| 902 | + } | |
| 903 | + } | |
| 904 | + | |
| 905 | + if (isSelf) { | |
| 906 | + body += `\n\n---\n\n[sign out](/auth/logout) · [toggle visibility](#) <span class="muted">(POST /api/agents/${name}/visibility with your push token)</span>`; | |
| 907 | + } | |
| 908 | + | |
| 909 | + const verifiedSteps = progressByRepo.reduce((acc, p) => acc + p.progress.verifiedSteps.size, 0); | |
| 910 | + const description = | |
| 911 | + repos.length === 0 | |
| 912 | + ? `${name} just registered on tdd.md — no kata attempts yet.` | |
| 913 | + : `${name}'s TDD attempts on tdd.md: ${repos.length} ${repos.length === 1 ? "kata" : "katas"} pushed, ${verifiedSteps} verified red→green ${verifiedSteps === 1 ? "step" : "steps"}.`; | |
| 914 | + const html = await renderPage({ | |
| 915 | + title: `${name} · TDD attempts — tdd.md`, | |
| 916 | + description, | |
| 917 | + bodyMarkdown: body, | |
| 918 | + ogPath: `https://tdd.md/agents/${name}`, | |
| 919 | + active: "agents", | |
| 920 | + }); | |
| 921 | + return htmlResponse(html); | |
| 922 | + }, | |
| 923 | + // Redirect the legacy URL to the canonical /:owner/:repo path — | |
| 924 | + // /agents/:name/:kata used to render a placeholder before the | |
| 925 | + // GitHub-style routing landed. | |
| 926 | + "/agents/:name/:kata": (req) => | |
| 927 | + Response.redirect(`/${req.params.name}/${req.params.kata}`, 301), | |
| 928 | + | |
| 929 | + "/leaderboard": () => renderLeaderboard(), | |
| 930 | + | |
| 931 | + "/api/judge/:owner/:repo": async (req) => { | |
| 932 | + if (req.method !== "POST") { | |
| 933 | + return new Response("method not allowed; POST to trigger a judge run", { status: 405 }); | |
| 934 | + } | |
| 935 | + // Manual triggers require the admin token. Push-driven runs come | |
| 936 | + // through /api/forgejo/webhook with HMAC signature verification. | |
| 937 | + const adminToken = process.env.FORGEJO_ADMIN_TOKEN; | |
| 938 | + const provided = req.headers.get("authorization")?.replace(/^[Bb]earer\s+/, "") ?? ""; | |
| 939 | + if (!adminToken || !timingSafeEqual(provided, adminToken)) { | |
| 940 | + return new Response("unauthorized — POST with `Authorization: Bearer <admin-token>`", { status: 401 }); | |
| 941 | + } | |
| 942 | + try { | |
| 943 | + const verdict = await judge(req.params.owner, req.params.repo); | |
| 944 | + return Response.json(verdict); | |
| 945 | + } catch (err) { | |
| 946 | + return Response.json({ error: (err as Error).message }, { status: 500 }); | |
| 947 | + } | |
| 948 | + }, | |
| 949 | + | |
| 950 | + // Self-service visibility toggle. Agent posts their push token in | |
| 951 | + // Authorization, picks "public" | "limited" | "private". We verify | |
| 952 | + // the token actually belongs to :name by hitting Forgejo's /user | |
| 953 | + // endpoint with it, then PATCH the user via admin token. | |
| 954 | + "/api/agents/:name/visibility": async (req) => { | |
| 955 | + if (req.method !== "POST") return new Response("POST only", { status: 405 }); | |
| 956 | + const name = req.params.name; | |
| 957 | + const provided = req.headers.get("authorization")?.replace(/^[Bb]earer\s+/, "") ?? ""; | |
| 958 | + if (!provided) return Response.json({ error: "missing bearer token" }, { status: 401 }); | |
| 959 | + | |
| 960 | + // Verify the token belongs to :name (or is the admin token). | |
| 961 | + const adminToken = process.env.FORGEJO_ADMIN_TOKEN ?? ""; | |
| 962 | + let allowed = adminToken && timingSafeEqual(provided, adminToken); | |
| 963 | + if (!allowed) { | |
| 964 | + const meRes = await fetch(`${FORGEJO_URL}/api/v1/user`, { | |
| 965 | + headers: { Authorization: `token ${provided}` }, | |
| 966 | + }); | |
| 967 | + if (meRes.ok) { | |
| 968 | + const me = (await meRes.json()) as { login?: string }; | |
| 969 | + allowed = me.login === name; | |
| 970 | + } | |
| 971 | + } | |
| 972 | + if (!allowed) return Response.json({ error: "token does not match agent" }, { status: 403 }); | |
| 973 | + | |
| 974 | + let body: { visibility?: string }; | |
| 975 | + try { | |
| 976 | + body = (await req.json()) as { visibility?: string }; | |
| 977 | + } catch { | |
| 978 | + return Response.json({ error: "invalid json" }, { status: 400 }); | |
| 979 | + } | |
| 980 | + const visibility = body.visibility; | |
| 981 | + if (visibility !== "public" && visibility !== "limited" && visibility !== "private") { | |
| 982 | + return Response.json( | |
| 983 | + { error: "visibility must be one of public|limited|private" }, | |
| 984 | + { status: 400 }, | |
| 985 | + ); | |
| 986 | + } | |
| 987 | + | |
| 988 | + const patchRes = await fetch( | |
| 989 | + `${FORGEJO_URL}/api/v1/admin/users/${encodeURIComponent(name)}`, | |
| 990 | + { | |
| 991 | + method: "PATCH", | |
| 992 | + headers: { ...adminApiHeaders(), "Content-Type": "application/json" }, | |
| 993 | + body: JSON.stringify({ visibility, source_id: 0, login_name: name }), | |
| 994 | + }, | |
| 995 | + ); | |
| 996 | + if (!patchRes.ok) { | |
| 997 | + const text = await patchRes.text(); | |
| 998 | + return Response.json( | |
| 999 | + { error: `forgejo PATCH failed: ${patchRes.status} ${text}` }, | |
| 1000 | + { status: 502 }, | |
| 1001 | + ); | |
| 1002 | + } | |
| 1003 | + return Response.json({ name, visibility }); | |
| 1004 | + }, | |
| 1005 | + | |
| 1006 | + "/api/forgejo/webhook": async (req) => { | |
| 1007 | + if (req.method !== "POST") return new Response("POST only", { status: 405 }); | |
| 1008 | + const secret = process.env.WEBHOOK_SECRET; | |
| 1009 | + if (!secret) return new Response("webhook not configured", { status: 503 }); | |
| 1010 | + | |
| 1011 | + const body = await req.text(); | |
| 1012 | + const provided = | |
| 1013 | + req.headers.get("x-forgejo-signature") ?? req.headers.get("x-gitea-signature") ?? ""; | |
| 1014 | + const expected = await hmacSha256Hex(secret, body); | |
| 1015 | + if (provided.length !== expected.length || !timingSafeEqual(provided, expected)) { | |
| 1016 | + return new Response("invalid signature", { status: 401 }); | |
| 1017 | + } | |
| 1018 | + | |
| 1019 | + let payload: { repository?: { owner?: { login?: string }; name?: string }; ref?: string }; | |
| 1020 | + try { | |
| 1021 | + payload = JSON.parse(body); | |
| 1022 | + } catch { | |
| 1023 | + return new Response("invalid json", { status: 400 }); | |
| 1024 | + } | |
| 1025 | + const owner = payload.repository?.owner?.login; | |
| 1026 | + const repo = payload.repository?.name; | |
| 1027 | + if (!owner || !repo) return new Response("missing owner/repo", { status: 400 }); | |
| 1028 | + | |
| 1029 | + // Fire the judge in the background; ack immediately so Forgejo | |
| 1030 | + // doesn't time out while we're checking out commits. | |
| 1031 | + void judge(owner, repo).catch((err) => { | |
| 1032 | + console.error(`judge failed for ${owner}/${repo}:`, err); | |
| 1033 | + }); | |
| 1034 | + return Response.json({ accepted: true, owner, repo }); | |
| 1035 | + }, | |
| 1036 | + | |
| 1037 | + "/you": async (req) => { | |
| 1038 | + const viewer = await getViewer(req); | |
| 1039 | + const target = viewer ? `/agents/${viewer}` : "/auth/github/start"; | |
| 1040 | + return new Response(null, { status: 302, headers: { Location: target } }); | |
| 1041 | + }, | |
| 1042 | + | |
| 1043 | + "/auth/logout": (_req) => { | |
| 1044 | + // Clear the session cookie and bounce back home. | |
| 1045 | + return new Response(null, { | |
| 1046 | + status: 302, | |
| 1047 | + headers: { | |
| 1048 | + Location: "/", | |
| 1049 | + "Set-Cookie": sessionCookieHeader("", 0), | |
| 1050 | + }, | |
| 1051 | + }); | |
| 1052 | + }, | |
| 1053 | + | |
| 1054 | + "/auth/github/start": (_req) => { | |
| 1055 | + if (!github.isConfigured() || !forgejo.isConfigured()) { | |
| 1056 | + return errorPage("registration is not configured on this server", 503); | |
| 1057 | + } | |
| 1058 | + const nonce = randomHex(16); | |
| 1059 | + return new Response(null, { | |
| 1060 | + status: 302, | |
| 1061 | + headers: { | |
| 1062 | + Location: github.authorizeUrl(nonce, CALLBACK_URL), | |
| 1063 | + "Set-Cookie": `tdd_oauth_state=${nonce}; Path=/auth; HttpOnly; Secure; SameSite=Lax; Max-Age=600`, | |
| 1064 | + }, | |
| 1065 | + }); | |
| 1066 | + }, | |
| 1067 | + | |
| 1068 | + "/auth/github/callback": async (req) => { | |
| 1069 | + const url = new URL(req.url); | |
| 1070 | + const code = url.searchParams.get("code"); | |
| 1071 | + const state = url.searchParams.get("state"); | |
| 1072 | + if (!code || !state) return errorPage("missing code or state"); | |
| 1073 | + | |
| 1074 | + const cookies = parseCookies(req.headers.get("cookie")); | |
| 1075 | + const cookieState = cookies.tdd_oauth_state; | |
| 1076 | + if (!cookieState || !timingSafeEqual(cookieState, state)) { | |
| 1077 | + return errorPage("state mismatch — open the registration page again and retry"); | |
| 1078 | + } | |
| 1079 | + | |
| 1080 | + let username: string; | |
| 1081 | + let email: string; | |
| 1082 | + let fullName: string | null; | |
| 1083 | + try { | |
| 1084 | + const accessToken = await github.exchangeCode(code, CALLBACK_URL); | |
| 1085 | + const user = await github.fetchUser(accessToken); | |
| 1086 | + username = user.login; | |
| 1087 | + fullName = user.name; | |
| 1088 | + // GitHub's noreply email format: unique per account, never collides | |
| 1089 | + // with another Forgejo user. We don't need a deliverable address — | |
| 1090 | + // agents authenticate by token, not by email reset flow. | |
| 1091 | + email = `${user.id}+${user.login}@users.noreply.github.com`; | |
| 1092 | + } catch (err) { | |
| 1093 | + return errorPage(`github oauth failed: ${(err as Error).message}`, 400); | |
| 1094 | + } | |
| 1095 | + | |
| 1096 | + // Login vs register: if the user already exists in Forgejo, this | |
| 1097 | + // is a returning visitor — set the session cookie, redirect to | |
| 1098 | + // their dashboard, don't rotate their token. | |
| 1099 | + const isExisting = await forgejo.userExists(username); | |
| 1100 | + const sessionToken = await signSession(username); | |
| 1101 | + const sessionCookie = sessionCookieHeader(sessionToken, SESSION_TTL_SEC); | |
| 1102 | + const clearOauthState = | |
| 1103 | + "tdd_oauth_state=; Path=/auth; HttpOnly; Secure; SameSite=Lax; Max-Age=0"; | |
| 1104 | + | |
| 1105 | + if (isExisting) { | |
| 1106 | + return new Response(null, { | |
| 1107 | + status: 302, | |
| 1108 | + headers: new Headers([ | |
| 1109 | + ["Location", `/agents/${username}`], | |
| 1110 | + ["Set-Cookie", sessionCookie], | |
| 1111 | + ["Set-Cookie", clearOauthState], | |
| 1112 | + ]), | |
| 1113 | + }); | |
| 1114 | + } | |
| 1115 | + | |
| 1116 | + let reg: forgejo.AgentRegistration; | |
| 1117 | + try { | |
| 1118 | + reg = await forgejo.registerAgent({ | |
| 1119 | + username, | |
| 1120 | + email, | |
| 1121 | + fullName: fullName ?? undefined, | |
| 1122 | + }); | |
| 1123 | + } catch (err) { | |
| 1124 | + return errorPage(`failed to create your agent: ${(err as Error).message}`, 422); | |
| 1125 | + } | |
| 1126 | + | |
| 1127 | + const verb = reg.isNew ? "created" : "rotated"; | |
| 1128 | + const body = `# welcome, ${reg.username} | |
| 1129 | + | |
| 1130 | +> Your tdd.md agent has been ${verb}. **Save the token below — this page is the only time you'll see it.** If you lose it, [register again](/agents/register) to issue a fresh one (the old one will stop working). | |
| 1131 | + | |
| 1132 | +## push token | |
| 1133 | + | |
| 1134 | +\`\`\` | |
| 1135 | +${reg.pushToken} | |
| 1136 | +\`\`\` | |
| 1137 | + | |
| 1138 | +## kata: string-calc | |
| 1139 | + | |
| 1140 | +Your repo is at [\`git.tdd.md/${reg.username}/string-calc\`](https://git.tdd.md/${reg.username}/string-calc), already initialized with a default branch \`main\`. | |
| 1141 | + | |
| 1142 | +\`\`\` | |
| 1143 | +git clone ${reg.repoCloneUrl} | |
| 1144 | +cd string-calc | |
| 1145 | + | |
| 1146 | +# play the kata, commit per phase | |
| 1147 | +# red: commit a failing test | |
| 1148 | +# green: commit the impl that makes it pass | |
| 1149 | +# refactor: commit a structural change with tests staying green | |
| 1150 | + | |
| 1151 | +git push | |
| 1152 | +# username: ${reg.username} | |
| 1153 | +# password: <paste the token above> | |
| 1154 | +\`\`\` | |
| 1155 | + | |
| 1156 | +When you push, the judge replays your commits and posts the verdict at [/agents/${reg.username}/string-calc](/agents/${reg.username}/string-calc). | |
| 1157 | + | |
| 1158 | +[← spec](/games/string-calc) · [all agents](/agents) | |
| 1159 | +`; | |
| 1160 | + | |
| 1161 | + const html = await renderPage({ | |
| 1162 | + title: `welcome ${reg.username} — tdd.md`, | |
| 1163 | + bodyMarkdown: body, | |
| 1164 | + active: "agents", | |
| 1165 | + noindex: true, | |
| 1166 | + }); | |
| 1167 | + return new Response(html, { | |
| 1168 | + headers: new Headers([ | |
| 1169 | + ["Content-Type", "text/html; charset=utf-8"], | |
| 1170 | + ["Set-Cookie", sessionCookie], | |
| 1171 | + ["Set-Cookie", clearOauthState], | |
| 1172 | + ]), | |
| 1173 | + }); | |
| 1174 | + }, | |
| 1175 | + }, | |
| 1176 | +}); | |
src/c31_blog.ts
+39
−0
| @@ -0,0 +1,39 @@ | ||
| 1 | +// c31 — model: blog index data. The post bodies live as markdown in | |
| 2 | +// content/blog/<slug>.md; this file is just the registry that drives | |
| 3 | +// /blog, /blog/:slug, and the sitemap. New posts: drop the .md file | |
| 4 | +// and add an entry here. | |
| 5 | + | |
| 6 | +export interface BlogEntry { | |
| 7 | + slug: string; | |
| 8 | + title: string; | |
| 9 | + description: string; | |
| 10 | + // ISO date for the listing + sitemap lastmod. | |
| 11 | + date: string; | |
| 12 | +} | |
| 13 | + | |
| 14 | +export const ALL_POSTS: BlogEntry[] = [ | |
| 15 | + { | |
| 16 | + slug: "tweag-handbook-tdd", | |
| 17 | + title: "Tweag's agentic TDD handbook gets the loop right — local green still isn't enough", | |
| 18 | + description: "Tweag's agentic-coding handbook describes a clean TDD loop and the right rules for AI assistants — but the validation layer it leans on (run tests, see green) misses the three failure modes most likely to show up: tautology, test deletion in refactor, and assertion weakening. Here's the gap, and what closes it.", | |
| 19 | + date: "2026-05-08", | |
| 20 | + }, | |
| 21 | + { | |
| 22 | + slug: "aider-tdd", | |
| 23 | + title: "Aider is the closest agent to TDD on rails — until you let it auto-fix", | |
| 24 | + description: "Aider's auto-commit-per-edit and bite-sized-steps philosophy make it TDD-shaped by default. Then `--auto-test` discovers it can win by deleting tests instead of fixing the impl. Here's how Aider's strengths map onto TDD, and how to keep the auto-test loop honest.", | |
| 25 | + date: "2026-05-04", | |
| 26 | + }, | |
| 27 | + { | |
| 28 | + slug: "cursor-tdd", | |
| 29 | + title: "Cursor knows how to do TDD. Most users skip the parts that matter.", | |
| 30 | + description: "Cursor's own agent best practices document a clean TDD workflow — but most users skip the features (Plan Mode, fresh conversations, .cursor/rules) that actually make it work. Here's how to put the pieces together, with a kata you can run end-to-end.", | |
| 31 | + date: "2026-05-04", | |
| 32 | + }, | |
| 33 | + { | |
| 34 | + slug: "claude-code-tdd", | |
| 35 | + title: "Claude Code does not do TDD by default — here's how to make it", | |
| 36 | + description: "Claude Code writes the test and impl in one breath, so the test never fails for the right reason. Two structural changes — CLAUDE.md rules + phase-separated sessions — get the discipline back, and tdd.md can verify it.", | |
| 37 | + date: "2026-05-04", | |
| 38 | + }, | |
| 39 | +]; | |
src/c31_commits.test.ts
+52
−0
| @@ -0,0 +1,52 @@ | ||
| 1 | +import { test, expect } from "bun:test"; | |
| 2 | +import { parseCommit, computeProgress } from "./c31_commits.ts"; | |
| 3 | + | |
| 4 | +test("parseCommit reads a phase prefix", () => { | |
| 5 | + expect(parseCommit("red: failing test for empty")).toEqual({ | |
| 6 | + phase: "red", | |
| 7 | + step: null, | |
| 8 | + subject: "failing test for empty", | |
| 9 | + }); | |
| 10 | +}); | |
| 11 | + | |
| 12 | +test("parseCommit extracts step from phase(step): form", () => { | |
| 13 | + expect(parseCommit("green(single-number): return n for one number")).toEqual({ | |
| 14 | + phase: "green", | |
| 15 | + step: "single-number", | |
| 16 | + subject: "return n for one number", | |
| 17 | + }); | |
| 18 | +}); | |
| 19 | + | |
| 20 | +test("parseCommit recognizes 'Initial commit' as init", () => { | |
| 21 | + expect(parseCommit("Initial commit").phase).toBe("init"); | |
| 22 | +}); | |
| 23 | + | |
| 24 | +test("parseCommit returns untagged for unknown messages", () => { | |
| 25 | + expect(parseCommit("wip — fixing something").phase).toBe("untagged"); | |
| 26 | +}); | |
| 27 | + | |
| 28 | +test("parseCommit recognizes spike: prefix", () => { | |
| 29 | + expect(parseCommit("spike: try the regex approach").phase).toBe("spike"); | |
| 30 | +}); | |
| 31 | + | |
| 32 | +test("parseCommit extracts step from spike(step):", () => { | |
| 33 | + const p = parseCommit("spike(custom-separator): explore Forge regex"); | |
| 34 | + expect(p.phase).toBe("spike"); | |
| 35 | + expect(p.step).toBe("custom-separator"); | |
| 36 | +}); | |
| 37 | + | |
| 38 | +test("computeProgress verifies a step after red→green for the same step", () => { | |
| 39 | + const commits = [ | |
| 40 | + { commit: { message: "green(empty): returns 0" } }, | |
| 41 | + { commit: { message: "red(empty): empty string returns 0" } }, | |
| 42 | + ]; // newest first, like Forgejo | |
| 43 | + const p = computeProgress(commits); | |
| 44 | + expect(p.verifiedSteps).toEqual(new Set(["empty"])); | |
| 45 | + expect(p.redCount).toBe(1); | |
| 46 | + expect(p.greenCount).toBe(1); | |
| 47 | +}); | |
| 48 | + | |
| 49 | +test("computeProgress does not verify green-without-prior-red", () => { | |
| 50 | + const commits = [{ commit: { message: "green(empty): returns 0" } }]; | |
| 51 | + expect(computeProgress(commits).verifiedSteps.size).toBe(0); | |
| 52 | +}); | |
src/c31_commits.ts
+65
−0
| @@ -0,0 +1,65 @@ | ||
| 1 | +export type Phase = "red" | "green" | "refactor" | "spike" | "init" | "untagged"; | |
| 2 | + | |
| 3 | +export interface ParsedCommit { | |
| 4 | + phase: Phase; | |
| 5 | + step: string | null; | |
| 6 | + subject: string; | |
| 7 | +} | |
| 8 | + | |
| 9 | +const PHASE_RE = /^(red|green|refactor|spike)(?:\(([a-z][a-z0-9-]*)\))?:\s*(.*)$/i; | |
| 10 | + | |
| 11 | +export const parseCommit = (message: string): ParsedCommit => { | |
| 12 | + const subject = message.split("\n")[0] ?? ""; | |
| 13 | + const m = subject.match(PHASE_RE); | |
| 14 | + if (m) { | |
| 15 | + return { | |
| 16 | + phase: m[1]!.toLowerCase() as Phase, | |
| 17 | + step: m[2] ?? null, | |
| 18 | + subject: m[3] ?? "", | |
| 19 | + }; | |
| 20 | + } | |
| 21 | + if (/^Initial commit$/i.test(subject)) { | |
| 22 | + return { phase: "init", step: null, subject }; | |
| 23 | + } | |
| 24 | + return { phase: "untagged", step: null, subject }; | |
| 25 | +}; | |
| 26 | + | |
| 27 | +export interface Progress { | |
| 28 | + verifiedSteps: Set<string>; | |
| 29 | + redCount: number; | |
| 30 | + greenCount: number; | |
| 31 | + refactorCount: number; | |
| 32 | + spikeCount: number; | |
| 33 | + untaggedCount: number; | |
| 34 | +} | |
| 35 | + | |
| 36 | +// A step counts as "verified" when its red commit is followed by a green | |
| 37 | +// for the same step. Refactor and untagged commits are tallied separately | |
| 38 | +// for the score breakdown but don't move verification. | |
| 39 | +export const computeProgress = (commits: { commit: { message: string } }[]): Progress => { | |
| 40 | + const pendingRed = new Set<string>(); | |
| 41 | + const verifiedSteps = new Set<string>(); | |
| 42 | + let redCount = 0; | |
| 43 | + let greenCount = 0; | |
| 44 | + let refactorCount = 0; | |
| 45 | + let spikeCount = 0; | |
| 46 | + let untaggedCount = 0; | |
| 47 | + // Forgejo returns commits newest-first; walk oldest-first to get sequence. | |
| 48 | + for (const c of [...commits].reverse()) { | |
| 49 | + const p = parseCommit(c.commit.message); | |
| 50 | + if (p.phase === "red") { | |
| 51 | + redCount++; | |
| 52 | + if (p.step) pendingRed.add(p.step); | |
| 53 | + } else if (p.phase === "green") { | |
| 54 | + greenCount++; | |
| 55 | + if (p.step && pendingRed.has(p.step)) verifiedSteps.add(p.step); | |
| 56 | + } else if (p.phase === "refactor") { | |
| 57 | + refactorCount++; | |
| 58 | + } else if (p.phase === "spike") { | |
| 59 | + spikeCount++; | |
| 60 | + } else if (p.phase === "untagged") { | |
| 61 | + untaggedCount++; | |
| 62 | + } | |
| 63 | + } | |
| 64 | + return { verifiedSteps, redCount, greenCount, refactorCount, spikeCount, untaggedCount }; | |
| 65 | +}; | |
src/c31_games.test.ts
+26
−0
| @@ -0,0 +1,26 @@ | ||
| 1 | +import { test, expect } from "bun:test"; | |
| 2 | +import { loadGame } from "./c31_games.ts"; | |
| 3 | + | |
| 4 | +test("loadGame returns a game with the expected id", async () => { | |
| 5 | + const game = await loadGame("string-calc"); | |
| 6 | + expect(game.id).toBe("string-calc"); | |
| 7 | +}); | |
| 8 | + | |
| 9 | +test("loadGame returns the kata's step ids in order", async () => { | |
| 10 | + const game = await loadGame("string-calc"); | |
| 11 | + expect(game.steps.map((s) => s.id)).toEqual([ | |
| 12 | + "empty", | |
| 13 | + "single-number", | |
| 14 | + "two-numbers", | |
| 15 | + "n-numbers", | |
| 16 | + "newline-separator", | |
| 17 | + "custom-separator", | |
| 18 | + "negatives-throw", | |
| 19 | + ]); | |
| 20 | +}); | |
| 21 | + | |
| 22 | +test("loadGame throws a clear error for an unknown game", async () => { | |
| 23 | + await expect(loadGame("does-not-exist")).rejects.toThrow( | |
| 24 | + /unknown game: does-not-exist/, | |
| 25 | + ); | |
| 26 | +}); | |
src/c31_games.ts
+55
−0
| @@ -0,0 +1,55 @@ | ||
| 1 | +export interface Step { | |
| 2 | + id: string; | |
| 3 | + requirement: string; | |
| 4 | + // Path (relative to the kata's spec.ts) of the authoritative test file. | |
| 5 | + // The judge copies this into the agent's working tree after the green | |
| 6 | + // checkout and runs it — hidden tests are how we detect cheating where | |
| 7 | + // an agent writes a tautological test like `expect(true).toBe(true)`. | |
| 8 | + hiddenTestFile: string; | |
| 9 | +} | |
| 10 | + | |
| 11 | +export interface Game { | |
| 12 | + id: string; | |
| 13 | + // One-line summary shown on the games index and OG previews. | |
| 14 | + description: string; | |
| 15 | + // Human-readable function signature the agent must export. Documented | |
| 16 | + // on the kata page so authors know what to build. | |
| 17 | + signature: string; | |
| 18 | + // The module path the hidden tests will import from. Agents must export | |
| 19 | + // their solution from this exact path (relative to repo root). | |
| 20 | + importPath: string; | |
| 21 | + steps: Step[]; | |
| 22 | +} | |
| 23 | + | |
| 24 | +import { readdir } from "node:fs/promises"; | |
| 25 | + | |
| 26 | +// Reads every kata under content/games/ and returns the loaded specs in | |
| 27 | +// alphabetical order. Used to build the games index and sitemap without | |
| 28 | +// hard-coding individual kata ids. | |
| 29 | +export async function listGames(): Promise<Game[]> { | |
| 30 | + let entries; | |
| 31 | + try { | |
| 32 | + entries = await readdir("./content/games", { withFileTypes: true }); | |
| 33 | + } catch { | |
| 34 | + return []; | |
| 35 | + } | |
| 36 | + const ids = entries.filter((e) => e.isDirectory()).map((e) => e.name).sort(); | |
| 37 | + const games: Game[] = []; | |
| 38 | + for (const id of ids) { | |
| 39 | + try { | |
| 40 | + games.push(await loadGame(id)); | |
| 41 | + } catch { | |
| 42 | + // skip katas that fail to load (missing spec.ts, etc.) | |
| 43 | + } | |
| 44 | + } | |
| 45 | + return games; | |
| 46 | +} | |
| 47 | + | |
| 48 | +export async function loadGame(id: string): Promise<Game> { | |
| 49 | + const file = Bun.file(`./content/games/${id}/spec.ts`); | |
| 50 | + if (!(await file.exists())) { | |
| 51 | + throw new Error(`unknown game: ${id}`); | |
| 52 | + } | |
| 53 | + const mod = await import(`../content/games/${id}/spec.ts`); | |
| 54 | + return mod.spec as Game; | |
| 55 | +} | |
src/c31_guides.ts
+26
−0
| @@ -0,0 +1,26 @@ | ||
| 1 | +// c31 — model: agent-specific TDD-walkthrough registry. Drives | |
| 2 | +// /guides + /guides/:slug. Markdown bodies live in content/guides/<slug>.md. | |
| 3 | + | |
| 4 | +export interface GuideEntry { | |
| 5 | + slug: string; | |
| 6 | + title: string; | |
| 7 | + description: string; | |
| 8 | +} | |
| 9 | + | |
| 10 | +export const ALL_GUIDES: GuideEntry[] = [ | |
| 11 | + { | |
| 12 | + slug: "claude-code", | |
| 13 | + title: "TDD with Claude Code", | |
| 14 | + description: "Run TDD katas through Anthropic's Claude Code with phase-separated prompts and CLAUDE.md rules so the judge scores clean red→green→refactor cycles.", | |
| 15 | + }, | |
| 16 | + { | |
| 17 | + slug: "cursor", | |
| 18 | + title: "TDD with Cursor", | |
| 19 | + description: "Test-driven katas through Cursor — Composer per phase, project rules pinned in .cursor/rules, fresh context for red vs green.", | |
| 20 | + }, | |
| 21 | + { | |
| 22 | + slug: "aider", | |
| 23 | + title: "TDD with Aider", | |
| 24 | + description: "Aider's commit-per-edit model maps directly onto red→green→refactor — prompt with phase tags and the auto-commit carries through.", | |
| 25 | + }, | |
| 26 | +]; | |
src/c31_project_config.ts
+102
−0
| @@ -0,0 +1,102 @@ | ||
| 1 | +// c31 — model: types + parser for `.tdd-md.json`, the per-repo opt-in | |
| 2 | +// config used by the project-tracking pipeline. Pure data, no I/O. | |
| 3 | +// Fetching the file lives in c14_github; persistence lives in c13_database; | |
| 4 | +// page rendering lives in c51_render. | |
| 5 | + | |
| 6 | +export const PROJECT_CONFIG_PATH = ".tdd-md.json"; | |
| 7 | +export const PROJECT_CONFIG_VERSION = 1; | |
| 8 | + | |
| 9 | +export type TestRunner = "none" | "bun"; | |
| 10 | +export type AgentSlug = "claude-code" | "cursor" | "aider" | "unknown"; | |
| 11 | + | |
| 12 | +export interface ProjectConfig { | |
| 13 | + version: number; | |
| 14 | + // "none" → trace-mode judging only (commit discipline, no test execution). | |
| 15 | + // "bun" → full sandbox-runner judging (later sliver — registration accepts | |
| 16 | + // the value but judging stays trace-only until the runner ships). | |
| 17 | + test_runner: TestRunner; | |
| 18 | + // Branches whose pushes get scored. Defaults to ["main"]. | |
| 19 | + tracked_branches: string[]; | |
| 20 | + // Optional reporting metadata. | |
| 21 | + display_name?: string; | |
| 22 | + team?: string; | |
| 23 | +} | |
| 24 | + | |
| 25 | +export const DEFAULT_CONFIG: ProjectConfig = { | |
| 26 | + version: PROJECT_CONFIG_VERSION, | |
| 27 | + test_runner: "none", | |
| 28 | + tracked_branches: ["main"], | |
| 29 | +}; | |
| 30 | + | |
| 31 | +// Validates and normalises a parsed JSON blob into a ProjectConfig. | |
| 32 | +// Throws with a human-readable message on failure — those messages are | |
| 33 | +// surfaced verbatim to the registering user, so they need to be useful. | |
| 34 | +export const parseProjectConfig = (raw: unknown): ProjectConfig => { | |
| 35 | + if (!raw || typeof raw !== "object") { | |
| 36 | + throw new Error(".tdd-md.json must be a JSON object"); | |
| 37 | + } | |
| 38 | + const obj = raw as Record<string, unknown>; | |
| 39 | + const version = obj.version; | |
| 40 | + if (typeof version !== "number" || version !== PROJECT_CONFIG_VERSION) { | |
| 41 | + throw new Error( | |
| 42 | + `.tdd-md.json has version ${JSON.stringify(version)}; expected ${PROJECT_CONFIG_VERSION}`, | |
| 43 | + ); | |
| 44 | + } | |
| 45 | + let testRunner: TestRunner = "none"; | |
| 46 | + if (obj.test_runner !== undefined) { | |
| 47 | + if (obj.test_runner !== "none" && obj.test_runner !== "bun") { | |
| 48 | + throw new Error( | |
| 49 | + `.tdd-md.json: test_runner must be "none" or "bun" (got ${JSON.stringify(obj.test_runner)})`, | |
| 50 | + ); | |
| 51 | + } | |
| 52 | + testRunner = obj.test_runner; | |
| 53 | + } | |
| 54 | + let trackedBranches: string[] = ["main"]; | |
| 55 | + if (obj.tracked_branches !== undefined) { | |
| 56 | + if (!Array.isArray(obj.tracked_branches) || obj.tracked_branches.some((b) => typeof b !== "string" || !b)) { | |
| 57 | + throw new Error(".tdd-md.json: tracked_branches must be a non-empty array of branch names"); | |
| 58 | + } | |
| 59 | + trackedBranches = obj.tracked_branches as string[]; | |
| 60 | + } | |
| 61 | + const config: ProjectConfig = { | |
| 62 | + version, | |
| 63 | + test_runner: testRunner, | |
| 64 | + tracked_branches: trackedBranches, | |
| 65 | + }; | |
| 66 | + if (typeof obj.display_name === "string" && obj.display_name) { | |
| 67 | + config.display_name = obj.display_name; | |
| 68 | + } | |
| 69 | + if (typeof obj.team === "string" && obj.team) { | |
| 70 | + config.team = obj.team; | |
| 71 | + } | |
| 72 | + return config; | |
| 73 | +}; | |
| 74 | + | |
| 75 | +// Parse a GitHub repo URL or owner/repo shorthand. Accepts: | |
| 76 | +// https://github.com/syntaxai/tdd.md | |
| 77 | +// https://github.com/syntaxai/tdd.md.git | |
| 78 | +// github.com/syntaxai/tdd.md | |
| 79 | +// syntaxai/tdd.md | |
| 80 | +// Returns the owner + repo or throws with a precise message. | |
| 81 | +export const parseRepoIdentifier = (raw: string): { owner: string; repo: string } => { | |
| 82 | + const trimmed = raw.trim(); | |
| 83 | + if (!trimmed) throw new Error("Repository URL is required."); | |
| 84 | + let path = trimmed; | |
| 85 | + const httpsMatch = path.match(/^https?:\/\/(?:www\.)?github\.com\/(.+)$/i); | |
| 86 | + if (httpsMatch?.[1]) path = httpsMatch[1]; | |
| 87 | + const bareMatch = path.match(/^github\.com\/(.+)$/i); | |
| 88 | + if (bareMatch?.[1]) path = bareMatch[1]; | |
| 89 | + path = path.replace(/\.git$/i, "").replace(/\/+$/, ""); | |
| 90 | + const parts = path.split("/").filter(Boolean); | |
| 91 | + const owner = parts[0]; | |
| 92 | + const repo = parts[1]; | |
| 93 | + if (parts.length !== 2 || !owner || !repo) { | |
| 94 | + throw new Error( | |
| 95 | + `Couldn't parse "${raw}" as a GitHub repo. Use a URL like https://github.com/owner/name or the shorthand owner/name.`, | |
| 96 | + ); | |
| 97 | + } | |
| 98 | + if (!/^[A-Za-z0-9._-]+$/.test(owner) || !/^[A-Za-z0-9._-]+$/.test(repo)) { | |
| 99 | + throw new Error(`"${raw}" contains characters that aren't valid for a GitHub owner/repo.`); | |
| 100 | + } | |
| 101 | + return { owner, repo }; | |
| 102 | +}; | |
src/c31_reports_demo.ts
+201
−0
| @@ -0,0 +1,201 @@ | ||
| 1 | +// c31 — model: synthetic dataset for the reporting mockups. Pure data, | |
| 2 | +// no I/O, no rendering. The c51_render builders consume these to produce | |
| 3 | +// the demo views at /reports/demo/*. When the real ingest pipeline ships | |
| 4 | +// the same shape gets populated from c13_database queries instead. | |
| 5 | + | |
| 6 | +export interface RecentFlagged { | |
| 7 | + date: string; | |
| 8 | + repo: string; | |
| 9 | + sha: string; | |
| 10 | + phase: "red" | "green" | "refactor"; | |
| 11 | + failure: string; | |
| 12 | + pts: number; | |
| 13 | +} | |
| 14 | + | |
| 15 | +export interface FailureSlice { | |
| 16 | + label: string; | |
| 17 | + pct: number; | |
| 18 | + tone: "red" | "green" | "muted" | "accent"; | |
| 19 | +} | |
| 20 | + | |
| 21 | +export interface AgentReport { | |
| 22 | + slug: "claude-code" | "cursor" | "aider"; | |
| 23 | + name: string; | |
| 24 | + score: number; | |
| 25 | + delta: number; | |
| 26 | + commits: number; | |
| 27 | + phaseCoveragePct: number; | |
| 28 | + streak: number; | |
| 29 | + streakBroken: boolean; | |
| 30 | + topIssueLabel: string; | |
| 31 | + topIssuePct: number; | |
| 32 | + failureMix: FailureSlice[]; | |
| 33 | + trend: number[]; | |
| 34 | + recent: RecentFlagged[]; | |
| 35 | +} | |
| 36 | + | |
| 37 | +export interface TestFailure { | |
| 38 | + test: string; | |
| 39 | + since: string; | |
| 40 | + flaky?: boolean; | |
| 41 | +} | |
| 42 | + | |
| 43 | +export interface TestSnapshot { | |
| 44 | + repo: string; | |
| 45 | + branch: string; | |
| 46 | + total: number; | |
| 47 | + passing: number; | |
| 48 | + failing: number; | |
| 49 | + failures: TestFailure[]; | |
| 50 | +} | |
| 51 | + | |
| 52 | +export interface TestStability { | |
| 53 | + test: string; | |
| 54 | + repo: string; | |
| 55 | + pass: number; | |
| 56 | + fail: number; | |
| 57 | + deleted: number; | |
| 58 | + lastBrokenBy: AgentReport["slug"]; | |
| 59 | + flagged?: boolean; | |
| 60 | +} | |
| 61 | + | |
| 62 | +export const DEMO_PERIOD = "2026-01-01 → 2026-03-31"; | |
| 63 | +export const DEMO_ORG = "acme-corp"; | |
| 64 | +export const DEMO_REPOS = 4; | |
| 65 | + | |
| 66 | +export const DEMO_SNAPSHOTS: TestSnapshot[] = [ | |
| 67 | + { | |
| 68 | + repo: "api-gateway", | |
| 69 | + branch: "main", | |
| 70 | + total: 247, | |
| 71 | + passing: 245, | |
| 72 | + failing: 2, | |
| 73 | + failures: [ | |
| 74 | + { test: "rate-limit.spec.ts > resets at midnight UTC", since: "2026-03-26" }, | |
| 75 | + { test: "webhook.spec.ts > retries on 5xx with backoff", since: "2026-03-28" }, | |
| 76 | + ], | |
| 77 | + }, | |
| 78 | + { | |
| 79 | + repo: "billing-service", | |
| 80 | + branch: "main", | |
| 81 | + total: 89, | |
| 82 | + passing: 89, | |
| 83 | + failing: 0, | |
| 84 | + failures: [], | |
| 85 | + }, | |
| 86 | + { | |
| 87 | + repo: "data-pipeline", | |
| 88 | + branch: "main", | |
| 89 | + total: 156, | |
| 90 | + passing: 154, | |
| 91 | + failing: 2, | |
| 92 | + failures: [ | |
| 93 | + { test: "ingest.spec.ts > handles malformed CSV row", since: "2026-03-22" }, | |
| 94 | + { test: "ingest.spec.ts > deduplicates by hash", since: "2026-03-22" }, | |
| 95 | + ], | |
| 96 | + }, | |
| 97 | + { | |
| 98 | + repo: "frontend-web", | |
| 99 | + branch: "main", | |
| 100 | + total: 312, | |
| 101 | + passing: 310, | |
| 102 | + failing: 2, | |
| 103 | + failures: [ | |
| 104 | + { test: "checkout.spec.ts > handles network timeout", since: "2026-03-15", flaky: true }, | |
| 105 | + { test: "login.spec.ts > redirects after auth", since: "2026-03-11", flaky: true }, | |
| 106 | + ], | |
| 107 | + }, | |
| 108 | +]; | |
| 109 | + | |
| 110 | +export const DEMO_STABILITY: TestStability[] = [ | |
| 111 | + { test: "webhook.spec.ts > retries on 5xx with backoff", repo: "api-gateway", pass: 33, fail: 11, deleted: 0, lastBrokenBy: "cursor", flagged: true }, | |
| 112 | + { test: "checkout.spec.ts > handles network timeout", repo: "frontend-web", pass: 51, fail: 8, deleted: 0, lastBrokenBy: "cursor", flagged: true }, | |
| 113 | + { test: "rate-limit.spec.ts > resets at midnight UTC", repo: "api-gateway", pass: 42, fail: 6, deleted: 0, lastBrokenBy: "claude-code" }, | |
| 114 | + { test: "login.spec.ts > redirects after auth", repo: "frontend-web", pass: 44, fail: 5, deleted: 1, lastBrokenBy: "cursor", flagged: true }, | |
| 115 | + { test: "ingest.spec.ts > handles malformed CSV row", repo: "data-pipeline", pass: 38, fail: 4, deleted: 0, lastBrokenBy: "aider" }, | |
| 116 | + { test: "auth.spec.ts > validates JWT signature", repo: "api-gateway", pass: 47, fail: 3, deleted: 0, lastBrokenBy: "claude-code" }, | |
| 117 | + { test: "ingest.spec.ts > deduplicates by hash", repo: "data-pipeline", pass: 30, fail: 3, deleted: 0, lastBrokenBy: "aider" }, | |
| 118 | + { test: "billing.spec.ts > applies tax bracket", repo: "billing-service", pass: 29, fail: 2, deleted: 0, lastBrokenBy: "claude-code" }, | |
| 119 | + { test: "webhook.spec.ts > signs payload with HMAC", repo: "api-gateway", pass: 35, fail: 2, deleted: 1, lastBrokenBy: "cursor", flagged: true }, | |
| 120 | + { test: "billing.spec.ts > computes monthly total", repo: "billing-service", pass: 28, fail: 1, deleted: 1, lastBrokenBy: "cursor", flagged: true }, | |
| 121 | + { test: "invoice.spec.ts > generates PDF receipt", repo: "billing-service", pass: 25, fail: 1, deleted: 0, lastBrokenBy: "claude-code" }, | |
| 122 | + { test: "pricing.spec.ts > rounds to nearest cent", repo: "billing-service", pass: 26, fail: 1, deleted: 0, lastBrokenBy: "aider" }, | |
| 123 | +]; | |
| 124 | + | |
| 125 | +export const DEMO_REPORTS: AgentReport[] = [ | |
| 126 | + { | |
| 127 | + slug: "claude-code", | |
| 128 | + name: "Claude Code", | |
| 129 | + score: 78, | |
| 130 | + delta: +6, | |
| 131 | + commits: 612, | |
| 132 | + phaseCoveragePct: 92, | |
| 133 | + streak: 47, | |
| 134 | + streakBroken: false, | |
| 135 | + topIssueLabel: "red-did-not-fail", | |
| 136 | + topIssuePct: 8, | |
| 137 | + failureMix: [ | |
| 138 | + { label: "clean cycles", pct: 84, tone: "green" }, | |
| 139 | + { label: "red-did-not-fail", pct: 8, tone: "red" }, | |
| 140 | + { label: "broken refactor", pct: 4, tone: "red" }, | |
| 141 | + { label: "test-deleted", pct: 2, tone: "red" }, | |
| 142 | + { label: "no phase tag", pct: 2, tone: "muted" }, | |
| 143 | + ], | |
| 144 | + trend: [72, 73, 71, 74, 72, 75, 73, 75, 77, 76, 75, 76, 78, 77, 79, 78, 77, 79, 80, 78, 79, 80, 79, 81, 80, 82, 81, 80, 79, 78], | |
| 145 | + recent: [ | |
| 146 | + { date: "2026-03-29", repo: "api-gateway", sha: "f1c8b3a", phase: "red", failure: "red-did-not-fail", pts: -5 }, | |
| 147 | + { date: "2026-03-24", repo: "billing-service", sha: "9d2e1f4", phase: "refactor", failure: "broken refactor", pts: -5 }, | |
| 148 | + { date: "2026-03-18", repo: "data-pipeline", sha: "62a9cb7", phase: "green", failure: "no phase tag (parent)", pts: 0 }, | |
| 149 | + ], | |
| 150 | + }, | |
| 151 | + { | |
| 152 | + slug: "cursor", | |
| 153 | + name: "Cursor", | |
| 154 | + score: 54, | |
| 155 | + delta: -15, | |
| 156 | + commits: 489, | |
| 157 | + phaseCoveragePct: 71, | |
| 158 | + streak: 3, | |
| 159 | + streakBroken: true, | |
| 160 | + topIssueLabel: "test-deleted in refactor", | |
| 161 | + topIssuePct: 14, | |
| 162 | + failureMix: [ | |
| 163 | + { label: "clean cycles", pct: 64, tone: "green" }, | |
| 164 | + { label: "test-deleted", pct: 14, tone: "red" }, | |
| 165 | + { label: "red-did-not-fail", pct: 9, tone: "red" }, | |
| 166 | + { label: "broken refactor", pct: 7, tone: "red" }, | |
| 167 | + { label: "no phase tag", pct: 6, tone: "muted" }, | |
| 168 | + ], | |
| 169 | + trend: [69, 70, 71, 72, 70, 71, 72, 73, 72, 71, 72, 70, 68, 65, 60, 55, 50, 52, 54, 53, 56, 54, 52, 55, 53, 54, 56, 55, 54, 54], | |
| 170 | + recent: [ | |
| 171 | + { date: "2026-03-28", repo: "api-gateway", sha: "a1b2c3d", phase: "refactor", failure: "test-deleted", pts: -20 }, | |
| 172 | + { date: "2026-03-26", repo: "api-gateway", sha: "4e5f6a7", phase: "green", failure: "broken refactor", pts: -5 }, | |
| 173 | + { date: "2026-03-23", repo: "billing-service", sha: "8b9c0d1", phase: "red", failure: "red-did-not-fail", pts: -5 }, | |
| 174 | + { date: "2026-03-21", repo: "api-gateway", sha: "2e3f4a5", phase: "refactor", failure: "test-deleted", pts: -20 }, | |
| 175 | + { date: "2026-03-19", repo: "data-pipeline", sha: "6b7c8d9", phase: "refactor", failure: "broken refactor", pts: -5 }, | |
| 176 | + ], | |
| 177 | + }, | |
| 178 | + { | |
| 179 | + slug: "aider", | |
| 180 | + name: "Aider", | |
| 181 | + score: 89, | |
| 182 | + delta: +2, | |
| 183 | + commits: 146, | |
| 184 | + phaseCoveragePct: 96, | |
| 185 | + streak: 89, | |
| 186 | + streakBroken: false, | |
| 187 | + topIssueLabel: "broken refactor", | |
| 188 | + topIssuePct: 3, | |
| 189 | + failureMix: [ | |
| 190 | + { label: "clean cycles", pct: 94, tone: "green" }, | |
| 191 | + { label: "broken refactor", pct: 3, tone: "red" }, | |
| 192 | + { label: "red-did-not-fail", pct: 2, tone: "red" }, | |
| 193 | + { label: "no phase tag", pct: 1, tone: "muted" }, | |
| 194 | + ], | |
| 195 | + trend: [87, 88, 89, 88, 87, 89, 90, 89, 88, 89, 90, 88, 89, 90, 91, 89, 88, 89, 90, 89, 90, 91, 89, 88, 89, 90, 89, 90, 89, 89], | |
| 196 | + recent: [ | |
| 197 | + { date: "2026-03-27", repo: "data-pipeline", sha: "3a4b5c6", phase: "refactor", failure: "broken refactor", pts: -5 }, | |
| 198 | + { date: "2026-03-15", repo: "billing-service", sha: "7d8e9f0", phase: "red", failure: "red-did-not-fail", pts: -5 }, | |
| 199 | + ], | |
| 200 | + }, | |
| 201 | +]; | |
src/c32_judge.ts
+370
−0
| @@ -0,0 +1,370 @@ | ||
| 1 | +import { mkdtempSync, rmSync } from "fs"; | |
| 2 | +import { join } from "path"; | |
| 3 | +import { tmpdir } from "os"; | |
| 4 | +import { parseCommit, type Phase } from "./c31_commits.ts"; | |
| 5 | +import { saveRun, type Verdict, type StepVerdict, type RefactorVerdict, type Mode } from "./c13_database.ts"; | |
| 6 | +import { loadGame, type Game } from "./c31_games.ts"; | |
| 7 | + | |
| 8 | +type TestRunner = "bun" | "none"; | |
| 9 | + | |
| 10 | +interface TddConfig { | |
| 11 | + mode: Mode; | |
| 12 | + testRunner: TestRunner; | |
| 13 | +} | |
| 14 | + | |
| 15 | +// tdd.config.json from the agent's repo selects the scoring mode and | |
| 16 | +// test runner. Falls back to strict / bun when missing or unparseable. | |
| 17 | +// | |
| 18 | +// { "mode": "pragmatic", "test_runner": "none" } | |
| 19 | +// | |
| 20 | +// test_runner: "none" enables trace-only judging — no checkout, no test | |
| 21 | +// execution. Useful as a CI gate on projects where Bun can't run the | |
| 22 | +// suite (e.g. .NET, Python without bun-compat tests). | |
| 23 | +const readConfig = async (cwd: string): Promise<TddConfig> => { | |
| 24 | + const file = Bun.file(join(cwd, "tdd.config.json")); | |
| 25 | + let mode: Mode = "strict"; | |
| 26 | + let testRunner: TestRunner = "bun"; | |
| 27 | + if (await file.exists()) { | |
| 28 | + try { | |
| 29 | + const cfg = (await file.json()) as { mode?: string; test_runner?: string }; | |
| 30 | + if (cfg.mode === "pragmatic" || cfg.mode === "learning") mode = cfg.mode; | |
| 31 | + if (cfg.test_runner === "none") testRunner = "none"; | |
| 32 | + } catch { | |
| 33 | + // best effort — bad config falls back to defaults | |
| 34 | + } | |
| 35 | + } | |
| 36 | + return { mode, testRunner }; | |
| 37 | +}; | |
| 38 | + | |
| 39 | +// Penalty halving for pragmatic, zeroing for learning. Positive deltas | |
| 40 | +// are unchanged across modes — earned credit is earned credit. | |
| 41 | +const applyMode = (delta: number, mode: Mode): number => { | |
| 42 | + if (delta >= 0) return delta; | |
| 43 | + if (mode === "learning") return 0; | |
| 44 | + if (mode === "pragmatic") return Math.ceil(delta / 2); | |
| 45 | + return delta; | |
| 46 | +}; | |
| 47 | + | |
| 48 | +// Plain-language summary of a step verdict, written to the agent (not | |
| 49 | +// the human admin). One short paragraph; named intentionally so callers | |
| 50 | +// can see it next to the row in the score table. | |
| 51 | +const explainStep = (params: { | |
| 52 | + status: StepVerdict["status"]; | |
| 53 | + redSha: string | null; | |
| 54 | + greenSha: string | null; | |
| 55 | + hiddenPassed: boolean | null; | |
| 56 | + mode: Mode; | |
| 57 | +}): string => { | |
| 58 | + const { status, hiddenPassed, mode } = params; | |
| 59 | + switch (status) { | |
| 60 | + case "verified": | |
| 61 | + return "Red failed as expected, green passes your tests, and the kata's hidden tests confirm the implementation matches the requirement."; | |
| 62 | + case "discipline-only": | |
| 63 | + return "Red→green discipline holds, but this kata didn't ship hidden tests for the step. Partial credit awarded; full +20 isn't possible without authoritative verification."; | |
| 64 | + case "no-green": | |
| 65 | + return "Red commit landed; the matching green(<step>) commit hasn't been pushed yet. Push your green to lock in the score."; | |
| 66 | + case "red-did-not-fail": | |
| 67 | + return mode === "pragmatic" | |
| 68 | + ? "Combined red+green commit detected. Pragmatic mode allows this — the cycle still counts, just with a softer score than a clean separation." | |
| 69 | + : "Red commit's tests already passed when the step was first introduced — meaning the implementation was added before the test, or the test is tautological. Switch to pragmatic mode if you commit red+green together intentionally."; | |
| 70 | + case "green-did-not-pass": | |
| 71 | + return "Green commit's own tests still fail. The implementation doesn't yet satisfy the test you wrote — fix the impl, or reconsider whether the test reflects the requirement."; | |
| 72 | + case "hidden-tests-failed": | |
| 73 | + return hiddenPassed === false | |
| 74 | + ? "Your tests pass, but the kata's hidden tests don't — this is the classic tautology trap. Tighten your test to mirror the requirement (e.g., assert the actual return value, not just that it runs)." | |
| 75 | + : "Your tests pass, but hidden verification was inconclusive. Re-push to retry."; | |
| 76 | + case "test-deleted": | |
| 77 | + return "Test count dropped between red and green for this step. Once a test exists it must keep existing — refactor it, don't delete it. If the test was wrong, replace it in a separate commit before resuming the cycle."; | |
| 78 | + case "trace-verified": | |
| 79 | + return "Trace-only mode: red→green pair found in the commit log. Tests weren't executed (test_runner: \"none\"). Switch to bun runner for behaviour verification."; | |
| 80 | + case "trace-tests-shrunk": | |
| 81 | + return "Trace-only mode: the green commit's tree has fewer test files than the red commit's tree — looks like deletion. If you renamed or split test files, the tally still drops."; | |
| 82 | + } | |
| 83 | +}; | |
| 84 | + | |
| 85 | +const explainRefactor = (passed: boolean): string => | |
| 86 | + passed | |
| 87 | + ? "Tests stayed green through the refactor — structural change without behavior change, the canonical refactor." | |
| 88 | + : "Refactor commit broke at least one test. Either revert the refactor or write a new red→green to capture the changed behavior."; | |
| 89 | + | |
| 90 | +const FORGEJO_INTERNAL = process.env.FORGEJO_URL ?? "https://git.tdd.md"; | |
| 91 | +const TEST_TIMEOUT_MS = 8000; | |
| 92 | + | |
| 93 | +// Sandboxed env passed to git and bun subprocesses. Strips every secret | |
| 94 | +// from the parent process — agent code never sees FORGEJO_ADMIN_TOKEN, | |
| 95 | +// GITHUB_CLIENT_SECRET, or SESSION_SECRET. PATH is fixed; HOME and TMPDIR | |
| 96 | +// stay inside the per-run temp dir so dotfile writes can't escape. | |
| 97 | +const sandboxEnv = (cwd: string): Record<string, string> => ({ | |
| 98 | + PATH: "/usr/local/bin:/usr/bin:/bin", | |
| 99 | + HOME: cwd, | |
| 100 | + TMPDIR: cwd, | |
| 101 | + NODE_ENV: "test", | |
| 102 | +}); | |
| 103 | + | |
| 104 | +const runProc = async ( | |
| 105 | + cmd: string[], | |
| 106 | + cwd: string, | |
| 107 | + timeoutMs: number, | |
| 108 | +): Promise<{ stdout: string; stderr: string; exitCode: number; timedOut: boolean }> => { | |
| 109 | + const proc = Bun.spawn(cmd, { | |
| 110 | + cwd, | |
| 111 | + stdout: "pipe", | |
| 112 | + stderr: "pipe", | |
| 113 | + env: sandboxEnv(cwd), | |
| 114 | + }); | |
| 115 | + let timedOut = false; | |
| 116 | + const timer = setTimeout(() => { | |
| 117 | + timedOut = true; | |
| 118 | + proc.kill("SIGKILL"); | |
| 119 | + }, timeoutMs); | |
| 120 | + const exitCode = await proc.exited; | |
| 121 | + clearTimeout(timer); | |
| 122 | + const stdout = await new Response(proc.stdout).text(); | |
| 123 | + const stderr = await new Response(proc.stderr).text(); | |
| 124 | + return { stdout: stdout.trim(), stderr: stderr.trim(), exitCode, timedOut }; | |
| 125 | +}; | |
| 126 | + | |
| 127 | +const runTests = async (cwd: string): Promise<boolean> => { | |
| 128 | + const r = await runProc(["bun", "test"], cwd, TEST_TIMEOUT_MS); | |
| 129 | + // Bun test exits 0 only when all tests pass. | |
| 130 | + return !r.timedOut && r.exitCode === 0; | |
| 131 | +}; | |
| 132 | + | |
| 133 | +// Language-agnostic test-file counter for trace-only mode. Uses git | |
| 134 | +// ls-tree at the given sha so we don't have to checkout the working | |
| 135 | +// tree. Matches conventional test-file naming across ecosystems: | |
| 136 | +// foo.test.ts, foo.spec.ts, FooTests.cs, FooTest.java, test_foo.py, | |
| 137 | +// foo_test.go, FooSpec.scala, foo_spec.rb. | |
| 138 | +const countTestFiles = async (cwd: string, sha: string): Promise<number> => { | |
| 139 | + const r = await runProc(["git", "ls-tree", "-r", "--name-only", sha], cwd, 5000); | |
| 140 | + if (r.exitCode !== 0) return 0; | |
| 141 | + const re = /(?:^|\/)(?:[^/]*\.(?:test|spec)\.[a-z]+|[Tt]ests?\/[^/]+|test_[^/]+|[^/]+_test\.[a-z]+|[^/]+[Tt]ests?\.cs|[^/]+[Tt]est\.java)$/; | |
| 142 | + let count = 0; | |
| 143 | + for (const line of r.stdout.split("\n")) { | |
| 144 | + if (re.test(line)) count++; | |
| 145 | + } | |
| 146 | + return count; | |
| 147 | +}; | |
| 148 | + | |
| 149 | +// Count `test(` / `it(` calls in tracked *.test.ts files. Used to detect | |
| 150 | +// when an agent deletes tests between red and green to make a regression | |
| 151 | +// "pass" — a cardinal TDD sin per the kata spec. | |
| 152 | +const countTests = async (cwd: string): Promise<number> => { | |
| 153 | + const r = await runProc(["git", "ls-files", "*.test.ts"], cwd, 5000); | |
| 154 | + if (r.exitCode !== 0) return 0; | |
| 155 | + const files = r.stdout.split("\n").filter((f) => f && !f.includes("__hidden_")); | |
| 156 | + let count = 0; | |
| 157 | + for (const f of files) { | |
| 158 | + const content = await Bun.file(join(cwd, f)) | |
| 159 | + .text() | |
| 160 | + .catch(() => ""); | |
| 161 | + const matches = content.match(/\b(?:test|it)\s*\(/g); | |
| 162 | + if (matches) count += matches.length; | |
| 163 | + } | |
| 164 | + return count; | |
| 165 | +}; | |
| 166 | + | |
| 167 | +// Runs the kata's authoritative tests against the agent's implementation | |
| 168 | +// at whatever commit is currently checked out. Copies the hidden test | |
| 169 | +// file into the working tree under a __hidden__ prefix so it doesn't | |
| 170 | +// collide with the agent's filenames, runs only that file, then deletes | |
| 171 | +// it. Returns null if the kata doesn't have hidden tests for this step. | |
| 172 | +const runHiddenTests = async (cwd: string, spec: Game, stepId: string): Promise<boolean | null> => { | |
| 173 | + const stepDef = spec.steps.find((s) => s.id === stepId); | |
| 174 | + if (!stepDef) return null; | |
| 175 | + const sourcePath = `./content/games/${spec.id}/${stepDef.hiddenTestFile}`; | |
| 176 | + const sourceFile = Bun.file(sourcePath); | |
| 177 | + if (!(await sourceFile.exists())) return null; | |
| 178 | + const content = await sourceFile.text(); | |
| 179 | + const targetName = `__hidden_${stepId}__.test.ts`; | |
| 180 | + const targetPath = join(cwd, targetName); | |
| 181 | + await Bun.write(targetPath, content); | |
| 182 | + try { | |
| 183 | + const r = await runProc(["bun", "test", targetName], cwd, TEST_TIMEOUT_MS); | |
| 184 | + return !r.timedOut && r.exitCode === 0; | |
| 185 | + } finally { | |
| 186 | + try { | |
| 187 | + rmSync(targetPath, { force: true }); | |
| 188 | + } catch { | |
| 189 | + // best effort | |
| 190 | + } | |
| 191 | + } | |
| 192 | +}; | |
| 193 | + | |
| 194 | +interface CommitInfo { | |
| 195 | + sha: string; | |
| 196 | + phase: Phase; | |
| 197 | + step: string | null; | |
| 198 | +} | |
| 199 | + | |
| 200 | +const readCommits = async (cwd: string): Promise<CommitInfo[]> => { | |
| 201 | + const r = await runProc(["git", "log", "--reverse", "--pretty=format:%H%x1f%B%x1e"], cwd, 10000); | |
| 202 | + if (r.exitCode !== 0) return []; | |
| 203 | + const out: CommitInfo[] = []; | |
| 204 | + for (const block of r.stdout.split("\x1e")) { | |
| 205 | + const t = block.trim(); | |
| 206 | + if (!t) continue; | |
| 207 | + const [sha, message = ""] = t.split("\x1f"); | |
| 208 | + if (!sha) continue; | |
| 209 | + const p = parseCommit(message); | |
| 210 | + out.push({ sha, phase: p.phase, step: p.step }); | |
| 211 | + } | |
| 212 | + return out; | |
| 213 | +}; | |
| 214 | + | |
| 215 | +export const judge = async (owner: string, repo: string): Promise<Verdict> => { | |
| 216 | + const cwd = mkdtempSync(join(tmpdir(), `judge-${owner}-${repo}-`)); | |
| 217 | + try { | |
| 218 | + // Agent repos default to private. Authenticate via admin token in | |
| 219 | + // an http.extraheader so the token isn't persisted in the cloned | |
| 220 | + // repo's config (extraheader applies to the clone request only). | |
| 221 | + const cloneUrl = `${FORGEJO_INTERNAL}/${owner}/${repo}.git`; | |
| 222 | + const adminToken = process.env.FORGEJO_ADMIN_TOKEN; | |
| 223 | + const gitArgs = adminToken | |
| 224 | + ? ["-c", `http.extraheader=Authorization: token ${adminToken}`, "clone", "--quiet", cloneUrl, "."] | |
| 225 | + : ["clone", "--quiet", cloneUrl, "."]; | |
| 226 | + const cloneR = await runProc(["git", ...gitArgs], cwd, 30000); | |
| 227 | + if (cloneR.exitCode !== 0) { | |
| 228 | + throw new Error(`clone failed: ${cloneR.stderr || cloneR.stdout}`); | |
| 229 | + } | |
| 230 | + | |
| 231 | + const commits = await readCommits(cwd); | |
| 232 | + const headR = await runProc(["git", "rev-parse", "HEAD"], cwd, 5000); | |
| 233 | + const headSha = headR.stdout; | |
| 234 | + | |
| 235 | + // First red per step + first green-after-red per step (chronological). | |
| 236 | + const stepRed = new Map<string, string>(); | |
| 237 | + const stepGreen = new Map<string, string>(); | |
| 238 | + for (const c of commits) { | |
| 239 | + if (!c.step) continue; | |
| 240 | + if (c.phase === "red" && !stepRed.has(c.step)) { | |
| 241 | + stepRed.set(c.step, c.sha); | |
| 242 | + } else if (c.phase === "green" && stepRed.has(c.step) && !stepGreen.has(c.step)) { | |
| 243 | + stepGreen.set(c.step, c.sha); | |
| 244 | + } | |
| 245 | + } | |
| 246 | + | |
| 247 | + // Read the agent's mode + runner preferences from tdd.config.json. | |
| 248 | + const { mode, testRunner } = await readConfig(cwd); | |
| 249 | + | |
| 250 | + // Load the kata's authoritative spec — used to fetch hidden tests | |
| 251 | + // per step. Repos that don't match a known kata get scored on red→green | |
| 252 | + // discipline only (no hidden-test verification). | |
| 253 | + let spec: Game | null = null; | |
| 254 | + try { | |
| 255 | + spec = await loadGame(repo); | |
| 256 | + } catch { | |
| 257 | + spec = null; | |
| 258 | + } | |
| 259 | + | |
| 260 | + const steps: StepVerdict[] = []; | |
| 261 | + for (const [stepId, redSha] of stepRed) { | |
| 262 | + const greenSha = stepGreen.get(stepId) ?? null; | |
| 263 | + | |
| 264 | + if (testRunner === "none") { | |
| 265 | + // Trace-only path: don't checkout, don't run anything. Score | |
| 266 | + // purely from the commit log + a language-agnostic test-file | |
| 267 | + // count via `git ls-tree`. Useful for non-Bun projects. | |
| 268 | + const redFiles = await countTestFiles(cwd, redSha); | |
| 269 | + const greenFiles = greenSha ? await countTestFiles(cwd, greenSha) : redFiles; | |
| 270 | + const filesShrank = greenSha !== null && greenFiles < redFiles; | |
| 271 | + | |
| 272 | + let status: StepVerdict["status"]; | |
| 273 | + let baseDelta = 0; | |
| 274 | + if (greenSha === null) { | |
| 275 | + status = "no-green"; | |
| 276 | + } else if (filesShrank) { | |
| 277 | + status = "trace-tests-shrunk"; | |
| 278 | + baseDelta = -10; | |
| 279 | + } else { | |
| 280 | + status = "trace-verified"; | |
| 281 | + baseDelta = 10; | |
| 282 | + } | |
| 283 | + const scoreDelta = applyMode(baseDelta, mode); | |
| 284 | + const explanation = explainStep({ status, redSha, greenSha, hiddenPassed: null, mode }); | |
| 285 | + steps.push({ | |
| 286 | + stepId, redSha, greenSha, | |
| 287 | + redFailed: null, greenPassed: null, hiddenPassed: null, | |
| 288 | + status, scoreDelta, explanation, | |
| 289 | + }); | |
| 290 | + continue; | |
| 291 | + } | |
| 292 | + | |
| 293 | + await runProc(["git", "checkout", "--quiet", redSha], cwd, 5000); | |
| 294 | + const redTestCount = await countTests(cwd); | |
| 295 | + const redPassed = await runTests(cwd); | |
| 296 | + const redFailed = !redPassed; | |
| 297 | + let greenPassed: boolean | null = null; | |
| 298 | + let hiddenPassed: boolean | null = null; | |
| 299 | + let testsDeleted = false; | |
| 300 | + if (greenSha) { | |
| 301 | + await runProc(["git", "checkout", "--quiet", greenSha], cwd, 5000); | |
| 302 | + const greenTestCount = await countTests(cwd); | |
| 303 | + testsDeleted = greenTestCount < redTestCount; | |
| 304 | + greenPassed = await runTests(cwd); | |
| 305 | + if (greenPassed && spec && !testsDeleted) { | |
| 306 | + hiddenPassed = await runHiddenTests(cwd, spec, stepId); | |
| 307 | + } | |
| 308 | + } | |
| 309 | + | |
| 310 | + let status: StepVerdict["status"]; | |
| 311 | + let baseDelta = 0; | |
| 312 | + if (greenSha === null) { | |
| 313 | + status = "no-green"; | |
| 314 | + } else if (testsDeleted) { | |
| 315 | + status = "test-deleted"; | |
| 316 | + baseDelta = -20; | |
| 317 | + } else if (!redFailed) { | |
| 318 | + status = "red-did-not-fail"; | |
| 319 | + baseDelta = -5; | |
| 320 | + } else if (greenPassed === false) { | |
| 321 | + status = "green-did-not-pass"; | |
| 322 | + baseDelta = -5; | |
| 323 | + } else if (hiddenPassed === false) { | |
| 324 | + status = "hidden-tests-failed"; | |
| 325 | + baseDelta = 0; | |
| 326 | + } else if (hiddenPassed === true) { | |
| 327 | + status = "verified"; | |
| 328 | + baseDelta = 20; | |
| 329 | + } else { | |
| 330 | + status = "discipline-only"; | |
| 331 | + baseDelta = 5; | |
| 332 | + } | |
| 333 | + const scoreDelta = applyMode(baseDelta, mode); | |
| 334 | + const explanation = explainStep({ status, redSha, greenSha, hiddenPassed, mode }); | |
| 335 | + steps.push({ stepId, redSha, greenSha, redFailed, greenPassed, hiddenPassed, status, scoreDelta, explanation }); | |
| 336 | + } | |
| 337 | + | |
| 338 | + // Refactor commits aren't tied to red→green pairs: the spec rewards | |
| 339 | + // any refactor that keeps the existing tests green. A broken refactor | |
| 340 | + // (tests fail at the refactor commit) costs the same as a missed | |
| 341 | + // green — discipline matters even outside red→green pairs. | |
| 342 | + const refactors: RefactorVerdict[] = []; | |
| 343 | + for (const c of commits) { | |
| 344 | + if (c.phase !== "refactor") continue; | |
| 345 | + await runProc(["git", "checkout", "--quiet", c.sha], cwd, 5000); | |
| 346 | + const passed = await runTests(cwd); | |
| 347 | + const baseDelta = passed ? 5 : -5; | |
| 348 | + refactors.push({ | |
| 349 | + sha: c.sha, | |
| 350 | + stepId: c.step, | |
| 351 | + testsPassed: passed, | |
| 352 | + scoreDelta: applyMode(baseDelta, mode), | |
| 353 | + explanation: explainRefactor(passed), | |
| 354 | + }); | |
| 355 | + } | |
| 356 | + | |
| 357 | + const totalScore = | |
| 358 | + steps.reduce((a, s) => a + s.scoreDelta, 0) + | |
| 359 | + refactors.reduce((a, r) => a + r.scoreDelta, 0); | |
| 360 | + const verdict: Verdict = { headSha, mode, steps, refactors, totalScore, judgedAt: Date.now() }; | |
| 361 | + saveRun(owner, repo, verdict); | |
| 362 | + return verdict; | |
| 363 | + } finally { | |
| 364 | + try { | |
| 365 | + rmSync(cwd, { recursive: true, force: true }); | |
| 366 | + } catch { | |
| 367 | + // best effort cleanup | |
| 368 | + } | |
| 369 | + } | |
| 370 | +}; | |
src/c32_session.ts
+81
−0
| @@ -0,0 +1,81 @@ | ||
| 1 | +// c32 — logic: session signing/verification + cookie helpers. Pure | |
| 2 | +// HMAC over the session payload, no I/O. Handlers (c21) pull a viewer | |
| 3 | +// off the request via getViewer(), and the OAuth callback issues a | |
| 4 | +// session cookie via sessionCookieHeader + signSession. | |
| 5 | + | |
| 6 | +// 30 days. Long enough for everyday use, short enough that a leaked | |
| 7 | +// cookie doesn't grant indefinite access. | |
| 8 | +export const SESSION_TTL_SEC = 30 * 24 * 60 * 60; | |
| 9 | +const SESSION_COOKIE = "tdd_session"; | |
| 10 | + | |
| 11 | +const sessionSecret = (): string => | |
| 12 | + process.env.SESSION_SECRET ?? process.env.WEBHOOK_SECRET ?? ""; | |
| 13 | + | |
| 14 | +export const randomHex = (bytes: number): string => | |
| 15 | + Array.from(crypto.getRandomValues(new Uint8Array(bytes))) | |
| 16 | + .map((b) => b.toString(16).padStart(2, "0")) | |
| 17 | + .join(""); | |
| 18 | + | |
| 19 | +export const parseCookies = (header: string | null): Record<string, string> => { | |
| 20 | + const out: Record<string, string> = {}; | |
| 21 | + if (!header) return out; | |
| 22 | + for (const part of header.split(";")) { | |
| 23 | + const idx = part.indexOf("="); | |
| 24 | + if (idx === -1) continue; | |
| 25 | + const name = part.slice(0, idx).trim(); | |
| 26 | + const value = part.slice(idx + 1).trim(); | |
| 27 | + if (name) out[name] = decodeURIComponent(value); | |
| 28 | + } | |
| 29 | + return out; | |
| 30 | +}; | |
| 31 | + | |
| 32 | +export const timingSafeEqual = (a: string, b: string): boolean => { | |
| 33 | + if (a.length !== b.length) return false; | |
| 34 | + let r = 0; | |
| 35 | + for (let i = 0; i < a.length; i++) r |= a.charCodeAt(i) ^ b.charCodeAt(i); | |
| 36 | + return r === 0; | |
| 37 | +}; | |
| 38 | + | |
| 39 | +export const hmacSha256Hex = async (secret: string, body: string): Promise<string> => { | |
| 40 | + const key = await crypto.subtle.importKey( | |
| 41 | + "raw", | |
| 42 | + new TextEncoder().encode(secret), | |
| 43 | + { name: "HMAC", hash: "SHA-256" }, | |
| 44 | + false, | |
| 45 | + ["sign"], | |
| 46 | + ); | |
| 47 | + const sig = await crypto.subtle.sign("HMAC", key, new TextEncoder().encode(body)); | |
| 48 | + return Array.from(new Uint8Array(sig)) | |
| 49 | + .map((b) => b.toString(16).padStart(2, "0")) | |
| 50 | + .join(""); | |
| 51 | +}; | |
| 52 | + | |
| 53 | +export const signSession = async (username: string): Promise<string> => { | |
| 54 | + const exp = Math.floor(Date.now() / 1000) + SESSION_TTL_SEC; | |
| 55 | + const payload = `${username}.${exp}`; | |
| 56 | + const sig = await hmacSha256Hex(sessionSecret(), payload); | |
| 57 | + return `${payload}.${sig}`; | |
| 58 | +}; | |
| 59 | + | |
| 60 | +export const verifySession = async (cookie: string): Promise<string | null> => { | |
| 61 | + const parts = cookie.split("."); | |
| 62 | + if (parts.length !== 3) return null; | |
| 63 | + const [username, expStr, providedSig] = parts; | |
| 64 | + if (!username || !expStr || !providedSig) return null; | |
| 65 | + const exp = Number(expStr); | |
| 66 | + if (!Number.isFinite(exp) || exp < Math.floor(Date.now() / 1000)) return null; | |
| 67 | + const expectedSig = await hmacSha256Hex(sessionSecret(), `${username}.${expStr}`); | |
| 68 | + if (!timingSafeEqual(providedSig, expectedSig)) return null; | |
| 69 | + return username; | |
| 70 | +}; | |
| 71 | + | |
| 72 | +export const getViewer = async (req: Request): Promise<string | null> => { | |
| 73 | + if (!sessionSecret()) return null; | |
| 74 | + const cookies = parseCookies(req.headers.get("cookie")); | |
| 75 | + const raw = cookies[SESSION_COOKIE]; | |
| 76 | + if (!raw) return null; | |
| 77 | + return verifySession(raw); | |
| 78 | +}; | |
| 79 | + | |
| 80 | +export const sessionCookieHeader = (value: string, maxAge: number): string => | |
| 81 | + `${SESSION_COOKIE}=${value}; Path=/; HttpOnly; Secure; SameSite=Lax; Max-Age=${maxAge}`; | |
src/c51_render.ts
+528
−0
| @@ -0,0 +1,528 @@ | ||
| 1 | +// c51 — UI: HTML rendering. Page chrome (renderPage / renderNotFound) | |
| 2 | +// plus all per-page body builders. Imports types from c13/c31; never | |
| 3 | +// from c11 or c21 (lower-numbered layers can be imported, higher ones | |
| 4 | +// cannot). | |
| 5 | + | |
| 6 | +import { marked } from "marked"; | |
| 7 | +import type { ProjectRow } from "./c13_database.ts"; | |
| 8 | +import { PROJECT_CONFIG_PATH } from "./c31_project_config.ts"; | |
| 9 | +import type { Phase } from "./c31_commits.ts"; | |
| 10 | +import { | |
| 11 | + DEMO_PERIOD, | |
| 12 | + DEMO_ORG, | |
| 13 | + DEMO_REPOS, | |
| 14 | + DEMO_REPORTS, | |
| 15 | + DEMO_SNAPSHOTS, | |
| 16 | + DEMO_STABILITY, | |
| 17 | + type AgentReport, | |
| 18 | + type FailureSlice, | |
| 19 | + type TestSnapshot, | |
| 20 | + type TestStability, | |
| 21 | +} from "./c31_reports_demo.ts"; | |
| 22 | + | |
| 23 | +const STYLE_CSS = "./public/style.css"; | |
| 24 | +const css = await Bun.file(STYLE_CSS).text(); | |
| 25 | + | |
| 26 | +export type Section = "home" | "games" | "guides" | "blog" | "agents" | "leaderboard"; | |
| 27 | + | |
| 28 | +export interface PageOptions { | |
| 29 | + title: string; | |
| 30 | + bodyMarkdown: string; | |
| 31 | + description?: string; | |
| 32 | + ogPath?: string; | |
| 33 | + active?: Section; | |
| 34 | + noindex?: boolean; | |
| 35 | + jsonLd?: Record<string, unknown>; | |
| 36 | +} | |
| 37 | + | |
| 38 | +const SITE_DESCRIPTION = "Test-driven development for agentic coding. Scored katas, public verdicts."; | |
| 39 | + | |
| 40 | +const escape = (s: string): string => | |
| 41 | + s.replace(/&/g, "&").replace(/"/g, """).replace(/</g, "<").replace(/>/g, ">"); | |
| 42 | + | |
| 43 | +const navLink = (href: string, label: string, active: boolean): string => { | |
| 44 | + const cls = active ? ' class="nav-active"' : ""; | |
| 45 | + return `<a href="${href}"${cls}>${label}</a>`; | |
| 46 | +}; | |
| 47 | + | |
| 48 | +const nav = (active?: Section): string => `<nav class="md-nav">${navLink("/", "tdd.md", active === "home")} <span class="md-nav-sep">·</span> ${navLink("/games", "games", active === "games")} <span class="md-nav-sep">·</span> ${navLink("/guides", "guides", active === "guides")} <span class="md-nav-sep">·</span> ${navLink("/blog", "blog", active === "blog")} <span class="md-nav-sep">·</span> ${navLink("/agents", "agents", active === "agents")} <span class="md-nav-sep">·</span> ${navLink("/leaderboard", "leaderboard", active === "leaderboard")}</nav>`; | |
| 49 | + | |
| 50 | +export const renderPage = async (opts: PageOptions): Promise<string> => { | |
| 51 | + const body = await marked.parse(opts.bodyMarkdown, { gfm: true, breaks: false }); | |
| 52 | + const description = opts.description ?? SITE_DESCRIPTION; | |
| 53 | + const ogPath = opts.ogPath ?? "https://tdd.md"; | |
| 54 | + const robots = opts.noindex ? `<meta name="robots" content="noindex,nofollow">\n` : ""; | |
| 55 | + const jsonLd = opts.jsonLd | |
| 56 | + ? `<script type="application/ld+json">${JSON.stringify(opts.jsonLd)}</script>\n` | |
| 57 | + : ""; | |
| 58 | + return `<!doctype html> | |
| 59 | +<html lang="en"> | |
| 60 | +<head> | |
| 61 | +<meta charset="utf-8"> | |
| 62 | +<meta name="viewport" content="width=device-width,initial-scale=1"> | |
| 63 | +<meta name="color-scheme" content="dark light"> | |
| 64 | +<meta name="description" content="${escape(description)}"> | |
| 65 | +${robots}<link rel="canonical" href="${escape(ogPath)}"> | |
| 66 | +<meta property="og:title" content="${escape(opts.title)}"> | |
| 67 | +<meta property="og:description" content="${escape(description)}"> | |
| 68 | +<meta property="og:type" content="website"> | |
| 69 | +<meta property="og:url" content="${escape(ogPath)}"> | |
| 70 | +<meta property="og:image" content="https://tdd.md/og.svg"> | |
| 71 | +<meta property="og:image:type" content="image/svg+xml"> | |
| 72 | +<meta property="og:image:width" content="1200"> | |
| 73 | +<meta property="og:image:height" content="630"> | |
| 74 | +<meta property="og:site_name" content="tdd.md"> | |
| 75 | +<meta name="twitter:card" content="summary_large_image"> | |
| 76 | +<meta name="twitter:title" content="${escape(opts.title)}"> | |
| 77 | +<meta name="twitter:description" content="${escape(description)}"> | |
| 78 | +<meta name="twitter:image" content="https://tdd.md/og.svg"> | |
| 79 | +<title>${escape(opts.title)}</title> | |
| 80 | +${jsonLd}<style>${css}</style> | |
| 81 | +</head> | |
| 82 | +<body> | |
| 83 | +${nav(opts.active)} | |
| 84 | +<main class="md"> | |
| 85 | +${body} | |
| 86 | +</main> | |
| 87 | +</body> | |
| 88 | +</html>`; | |
| 89 | +}; | |
| 90 | + | |
| 91 | +export const renderNotFound = async (path: string): Promise<string> => | |
| 92 | + renderPage({ | |
| 93 | + title: "404 — tdd.md", | |
| 94 | + bodyMarkdown: `# 404\n\n> No such path: \`${path}\`\n\nTry [home](/), [games](/games), [agents](/agents), or [leaderboard](/leaderboard).`, | |
| 95 | + noindex: true, | |
| 96 | + }); | |
| 97 | + | |
| 98 | +// --------------------------------------------------------------------- | |
| 99 | +// Small response/formatting helpers used by c21 handlers. | |
| 100 | +// --------------------------------------------------------------------- | |
| 101 | + | |
| 102 | +export const htmlResponse = (html: string, status = 200): Response => | |
| 103 | + new Response(html, { status, headers: { "Content-Type": "text/html; charset=utf-8" } }); | |
| 104 | + | |
| 105 | +export const errorPage = async (message: string, status = 400): Promise<Response> => { | |
| 106 | + const html = await renderPage({ | |
| 107 | + title: "error — tdd.md", | |
| 108 | + bodyMarkdown: `# error\n\n> ${message}\n\n[← back](/agents/register)`, | |
| 109 | + active: "agents", | |
| 110 | + }); | |
| 111 | + return htmlResponse(html, status); | |
| 112 | +}; | |
| 113 | + | |
| 114 | +export const phaseSpan = (p: Phase): string => { | |
| 115 | + const cls = p === "red" ? "red" : p === "green" ? "green" : p === "refactor" ? "blue" : "muted"; | |
| 116 | + return `<span class="${cls}">${p}</span>`; | |
| 117 | +}; | |
| 118 | + | |
| 119 | +export const relativeTime = (iso: string): string => { | |
| 120 | + const ms = Date.now() - new Date(iso).getTime(); | |
| 121 | + if (ms < 60_000) return `${Math.max(0, Math.floor(ms / 1000))}s ago`; | |
| 122 | + if (ms < 3_600_000) return `${Math.floor(ms / 60_000)}m ago`; | |
| 123 | + if (ms < 86_400_000) return `${Math.floor(ms / 3_600_000)}h ago`; | |
| 124 | + return `${Math.floor(ms / 86_400_000)}d ago`; | |
| 125 | +}; | |
| 126 | + | |
| 127 | +// --------------------------------------------------------------------- | |
| 128 | +// Body builders for /projects. | |
| 129 | +// --------------------------------------------------------------------- | |
| 130 | + | |
| 131 | +const projectListRow = (p: ProjectRow): string => { | |
| 132 | + const slug = `${p.repoOwner}/${p.repoName}`; | |
| 133 | + const display = p.displayName ?? slug; | |
| 134 | + const team = p.team ? ` <span class="muted">· ${escape(p.team)}</span>` : ""; | |
| 135 | + const branches = p.trackedBranches.map((b) => `\`${b}\``).join(", "); | |
| 136 | + const runner = p.testRunner === "none" ? "trace-only" : p.testRunner; | |
| 137 | + return `| [${escape(display)}](/projects/${p.repoOwner}/${p.repoName}) ${team} | ${branches} | ${runner} |`; | |
| 138 | +}; | |
| 139 | + | |
| 140 | +export const projectsLandingMd = (projects: ProjectRow[]): string => { | |
| 141 | + const rows = projects.length === 0 | |
| 142 | + ? `| _no projects yet — [register one](/projects/new)_ | | |` | |
| 143 | + : projects.map(projectListRow).join("\n"); | |
| 144 | + return `# projects | |
| 145 | + | |
| 146 | +> Real repos that opted in to tdd.md scoring. Each project drops \`${PROJECT_CONFIG_PATH}\` at its root, registers here, and from then on its commits on tracked branches get judged structurally — red-fails, green-passes, no test-deletion, no regression. The aggregated scores feed [the reports](/reports). | |
| 147 | + | |
| 148 | +## tracked | |
| 149 | + | |
| 150 | +| project | branches | runner | | |
| 151 | +|---|---|---| | |
| 152 | +${rows} | |
| 153 | + | |
| 154 | +## register a repo | |
| 155 | + | |
| 156 | +[Register a project →](/projects/new) — paste a public GitHub URL; tdd.md fetches \`${PROJECT_CONFIG_PATH}\` from the default branch and onboards it. | |
| 157 | + | |
| 158 | +## the config file | |
| 159 | + | |
| 160 | +Drop \`${PROJECT_CONFIG_PATH}\` at the root of your repo's default branch: | |
| 161 | + | |
| 162 | +\`\`\`json | |
| 163 | +{ | |
| 164 | + "version": 1, | |
| 165 | + "test_runner": "none", | |
| 166 | + "tracked_branches": ["main"], | |
| 167 | + "display_name": "API Gateway", | |
| 168 | + "team": "platform" | |
| 169 | +} | |
| 170 | +\`\`\` | |
| 171 | + | |
| 172 | +- **\`test_runner\`** — \`"none"\` for trace-mode (commit-discipline only, language-agnostic). \`"bun"\` will run the test suite once the sandbox-runner ships. | |
| 173 | +- **\`tracked_branches\`** — pushes to these branches get scored. Defaults to \`["main"]\`. | |
| 174 | +- **\`display_name\`** / **\`team\`** — optional, only used in the reporting UI. | |
| 175 | + | |
| 176 | +## what comes next | |
| 177 | + | |
| 178 | +Registration just stores the project. Per-commit judging (the part that produces score data for the reports) lands in the next sliver — until then the [report pages](/reports) keep showing the demo dataset. | |
| 179 | + | |
| 180 | +[← back to tdd.md](/) · [the reports](/reports) | |
| 181 | +`; | |
| 182 | +}; | |
| 183 | + | |
| 184 | +export const projectRegisterMd = ( | |
| 185 | + viewer: string | null, | |
| 186 | + prefilled?: string, | |
| 187 | + errorMessage?: string, | |
| 188 | +): string => { | |
| 189 | + if (!viewer) { | |
| 190 | + return `# register a project | |
| 191 | + | |
| 192 | +> You need to sign in before registering a project. We use your GitHub identity to record who onboarded the repo. | |
| 193 | + | |
| 194 | +[ sign in with github → ](/auth/github/start) | |
| 195 | + | |
| 196 | +[← all projects](/projects) | |
| 197 | +`; | |
| 198 | + } | |
| 199 | + const error = errorMessage | |
| 200 | + ? `<div class="project-form-error"><strong>Couldn't register that repo:</strong><br>${escape(errorMessage)}</div>` | |
| 201 | + : ""; | |
| 202 | + const value = prefilled ? ` value="${escape(prefilled)}"` : ""; | |
| 203 | + return `# register a project | |
| 204 | + | |
| 205 | +> Paste a public GitHub URL. tdd.md fetches \`${PROJECT_CONFIG_PATH}\` from its default branch, validates it, and onboards the repo. Re-register the same repo to refresh the config. | |
| 206 | + | |
| 207 | +${error} | |
| 208 | + | |
| 209 | +<form method="post" action="/projects/new" class="project-form"> | |
| 210 | + <label for="repo-url">Repository URL or <code>owner/name</code></label> | |
| 211 | + <input id="repo-url" name="repo" type="text" required | |
| 212 | + placeholder="https://github.com/owner/name" | |
| 213 | + autocomplete="off" autocapitalize="off" autocorrect="off"${value} /> | |
| 214 | + <button type="submit">Register</button> | |
| 215 | +</form> | |
| 216 | + | |
| 217 | +> Signed in as <code>${escape(viewer)}</code>. Don't have \`${PROJECT_CONFIG_PATH}\` yet? [See the format on /projects](/projects#the-config-file). | |
| 218 | + | |
| 219 | +[← all projects](/projects) | |
| 220 | +`; | |
| 221 | +}; | |
| 222 | + | |
| 223 | +// --------------------------------------------------------------------- | |
| 224 | +// Body builders for /reports. | |
| 225 | +// --------------------------------------------------------------------- | |
| 226 | + | |
| 227 | +const trendArrow = (delta: number): { glyph: string; cls: string } => | |
| 228 | + delta > 0 ? { glyph: "↑", cls: "up" } : delta < 0 ? { glyph: "↓", cls: "down" } : { glyph: "→", cls: "flat" }; | |
| 229 | + | |
| 230 | +const sparkline = (values: number[], height = 60, width = 320): string => { | |
| 231 | + if (values.length === 0) return ""; | |
| 232 | + const min = Math.min(...values); | |
| 233 | + const max = Math.max(...values); | |
| 234 | + const range = Math.max(1, max - min); | |
| 235 | + const stepX = width / Math.max(1, values.length - 1); | |
| 236 | + const pad = 6; | |
| 237 | + const innerH = height - pad * 2; | |
| 238 | + const points = values | |
| 239 | + .map((v, i) => { | |
| 240 | + const x = (i * stepX).toFixed(1); | |
| 241 | + const y = (pad + innerH - ((v - min) / range) * innerH).toFixed(1); | |
| 242 | + return `${x},${y}`; | |
| 243 | + }) | |
| 244 | + .join(" "); | |
| 245 | + return `<svg class="report-sparkline" viewBox="0 0 ${width} ${height}" preserveAspectRatio="none" aria-hidden="true"> | |
| 246 | + <polyline fill="none" stroke="currentColor" stroke-width="1.5" points="${points}" /> | |
| 247 | +</svg>`; | |
| 248 | +}; | |
| 249 | + | |
| 250 | +const tile = (a: AgentReport): string => { | |
| 251 | + const arr = trendArrow(a.delta); | |
| 252 | + const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`; | |
| 253 | + return `<div class="report-tile"> | |
| 254 | + <p class="report-tile-name"><a href="/reports/demo/agents/${a.slug}">${escape(a.name)}</a></p> | |
| 255 | + <p class="report-tile-score">${a.score}<span class="report-tile-score-suffix"> / 100</span></p> | |
| 256 | + <p class="report-tile-trend ${arr.cls}">${arr.glyph} ${escape(deltaStr)}</p> | |
| 257 | + <p class="report-tile-volume">${a.commits.toLocaleString()} commits</p> | |
| 258 | + <div class="report-tile-issue">top issue: <strong>${escape(a.topIssueLabel)}</strong> (${a.topIssuePct}%)</div> | |
| 259 | +</div>`; | |
| 260 | +}; | |
| 261 | + | |
| 262 | +const bars = (mix: FailureSlice[]): string => { | |
| 263 | + const rows = mix | |
| 264 | + .map( | |
| 265 | + (s) => `<div class="report-bar-row"> | |
| 266 | + <span class="report-bar-label">${escape(s.label)}</span> | |
| 267 | + <span class="report-bar-track"><span class="report-bar-fill ${s.tone}" style="width: ${s.pct}%"></span></span> | |
| 268 | + <span class="report-bar-pct">${s.pct}%</span> | |
| 269 | +</div>`, | |
| 270 | + ) | |
| 271 | + .join("\n"); | |
| 272 | + return `<div class="report-bars">${rows}</div>`; | |
| 273 | +}; | |
| 274 | + | |
| 275 | +const streakBox = (a: AgentReport): string => { | |
| 276 | + const cls = a.streakBroken ? "broken" : a.streak >= 30 ? "long" : ""; | |
| 277 | + const label = a.streakBroken ? "recent break" : "consecutive clean cycles"; | |
| 278 | + return `<span class="report-streak ${cls}"><span class="report-streak-num">${a.streak}</span> ${label}</span>`; | |
| 279 | +}; | |
| 280 | + | |
| 281 | +const mockBanner = `<div class="report-mockup-banner">demo data — real reporting wires up when the project-tracking pipeline ships. <a href="/blog/tweag-handbook-tdd">why tdd.md needs this</a> · <a href="/reports">about reporting</a></div>`; | |
| 282 | + | |
| 283 | +const snapshotBlock = (s: TestSnapshot): string => { | |
| 284 | + const failuresHtml = s.failures.length === 0 | |
| 285 | + ? `<li class="test-list-pass">all ${s.passing} tests groen</li>` | |
| 286 | + : s.failures | |
| 287 | + .map( | |
| 288 | + (f) => | |
| 289 | + `<li class="test-list-fail">${escape(f.test)} <span class="test-list-meta">${f.flaky ? "intermittent · " : ""}sinds ${f.since}</span></li>`, | |
| 290 | + ) | |
| 291 | + .concat([`<li class="test-list-collapsed">+ ${s.passing.toLocaleString()} passing tests</li>`]) | |
| 292 | + .join("\n"); | |
| 293 | + const statusCls = s.failing === 0 ? "ok" : "bad"; | |
| 294 | + return `<div class="test-snapshot ${statusCls}"> | |
| 295 | + <p class="test-snapshot-head"><strong>${escape(s.repo)}</strong> <span class="test-snapshot-branch">@ ${escape(s.branch)}</span></p> | |
| 296 | + <p class="test-snapshot-stats">${s.total.toLocaleString()} tests · <span class="green">${s.passing.toLocaleString()} passing</span>${s.failing > 0 ? ` · <span class="red">${s.failing.toLocaleString()} failing</span>` : ""}</p> | |
| 297 | + <ul class="test-list"> | |
| 298 | +${failuresHtml} | |
| 299 | + </ul> | |
| 300 | +</div>`; | |
| 301 | +}; | |
| 302 | + | |
| 303 | +const agentTagHtml = (slug: AgentReport["slug"]): string => { | |
| 304 | + const name = DEMO_REPORTS.find((r) => r.slug === slug)?.name ?? slug; | |
| 305 | + return `<a class="agent-tag" href="/reports/demo/agents/${slug}">${escape(name)}</a>`; | |
| 306 | +}; | |
| 307 | + | |
| 308 | +const stabilityRow = (s: TestStability): string => { | |
| 309 | + const cls = s.flagged ? "test-stab-row flagged" : "test-stab-row"; | |
| 310 | + const warn = s.flagged ? ` <span class="test-stab-warn" title="test-deletion of weakening dit kwartaal">⚠</span>` : ""; | |
| 311 | + return `<tr class="${cls}"> | |
| 312 | + <td class="test-stab-name">${escape(s.test)}<div class="test-stab-repo">${escape(s.repo)}</div></td> | |
| 313 | + <td class="test-stab-num green">${s.pass}</td> | |
| 314 | + <td class="test-stab-num ${s.fail >= 8 ? "red" : ""}">${s.fail}</td> | |
| 315 | + <td class="test-stab-num ${s.deleted > 0 ? "red" : ""}">${s.deleted}</td> | |
| 316 | + <td class="test-stab-by">${agentTagHtml(s.lastBrokenBy)}${warn}</td> | |
| 317 | +</tr>`; | |
| 318 | +}; | |
| 319 | + | |
| 320 | +export const reportsLandingMd = (): string => `# reports | |
| 321 | + | |
| 322 | +> Per-agent TDD-discipline reporting over real project repos. The judge replays each commit on tracked branches and scores it structurally — red-fails, green-passes, no test-deletion, no regression. The scores roll up per agent over time, with trend, failure-mode breakdown, and an exec summary fit for a quarterly readout. | |
| 323 | + | |
| 324 | +This is a design preview. The pipeline that ingests real repos isn't wired yet; what you can navigate today is a mockup with synthetic data: | |
| 325 | + | |
| 326 | +- [exec summary mockup →](/reports/demo) — single page, 1 quarter, 3 agents | |
| 327 | +- [per-agent drill-down →](/reports/demo/agents/cursor) — trend, failure mix, recent flagged commits | |
| 328 | +- [tests overzicht →](/reports/demo/tests) — huidige stand per repo + test-stabiliteit per test-naam | |
| 329 | + | |
| 330 | +Want a real repo on this layer? [Register a project →](/projects) — drops \`.tdd-md.json\` at the repo root, onboards in seconds. Per-commit judging follows in the next sliver; until then registered projects show up under [/projects](/projects) but don't yet feed the report numbers. | |
| 331 | + | |
| 332 | +## what gets measured | |
| 333 | + | |
| 334 | +This layer measures **discipline**, not code-quality. Without hidden tests (those only exist on katas), tdd.md can't catch tautologies or weakened assertions on real repos. It *can* catch: | |
| 335 | + | |
| 336 | +| failure mode | what triggers it | what it costs | | |
| 337 | +|---|---|---| | |
| 338 | +| \`red-did-not-fail\` | commit tagged \`red:\` but tests pass | -5 / commit | | |
| 339 | +| \`test-deleted\` | test count drops between commits | -20 / commit | | |
| 340 | +| \`broken refactor\` | tests fail at a \`refactor:\` commit | -5 / commit | | |
| 341 | +| \`no phase tag\` | tracked-branch commit missing \`red\\|green\\|refactor:\` | counts against phase-coverage % | | |
| 342 | + | |
| 343 | +The metric pair that anchors the report is **discipline-score** (0-100) + **phase-coverage %**. An agent with 0% phase-coverage doesn't *do* TDD — its score is N/A, not 0. Don't let a low-volume non-attempt look like a high-volume slip. | |
| 344 | + | |
| 345 | +## reading the data | |
| 346 | + | |
| 347 | +For management: | |
| 348 | +- the [exec summary](/reports/demo) gives one number per agent + a narrative paragraph. Prints to one page. | |
| 349 | + | |
| 350 | +For team-leads: | |
| 351 | +- the [drill-down](/reports/demo/agents/cursor) shows trend, failure-mix, streak, and the most recent flagged commits with one-click coaching links to the [Claude Code](/blog/claude-code-tdd) / [Cursor](/blog/cursor-tdd) / [Aider](/blog/aider-tdd) posts. | |
| 352 | + | |
| 353 | +[← back to tdd.md](/) · [the blog](/blog) · [the katas](/games) | |
| 354 | +`; | |
| 355 | + | |
| 356 | +export const execSummaryMd = (): string => { | |
| 357 | + const totalCommits = DEMO_REPORTS.reduce((s, a) => s + a.commits, 0); | |
| 358 | + const tiles = DEMO_REPORTS.map(tile).join("\n"); | |
| 359 | + return `# tdd-discipline rapport · q1 2026 | |
| 360 | + | |
| 361 | +${mockBanner} | |
| 362 | + | |
| 363 | +> **Periode** ${DEMO_PERIOD} · **Scope** ${DEMO_REPOS} repos · ${totalCommits.toLocaleString()} AI-toegeschreven commits in ${escape(DEMO_ORG)}. | |
| 364 | + | |
| 365 | +<div class="report-tiles"> | |
| 366 | +${tiles} | |
| 367 | +</div> | |
| 368 | + | |
| 369 | +## wat veranderde dit kwartaal | |
| 370 | + | |
| 371 | +Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de \`api-gateway\` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf. | |
| 372 | + | |
| 373 | +## wat we doen | |
| 374 | + | |
| 375 | +- **Cursor in \`api-gateway\`**: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel "never delete a test in a refactor commit" gepind ([details →](/reports/demo/agents/cursor)). | |
| 376 | +- **Claude Code uitrollen**: het CLAUDE.md-template dat in \`billing-service\` werkte naar de andere drie repos kopiëren. | |
| 377 | +- **Volgende meting**: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt. | |
| 378 | + | |
| 379 | +## wat dit getal *niet* meet | |
| 380 | + | |
| 381 | +Discipline, niet code-kwaliteit. Hidden tests (zoals op de katas) bestaan niet voor productie-repos, dus *tautologische* tests en *zwak-geformuleerde* asserties blijven onzichtbaar voor de judge. Dit cijfer zegt: "de agent volgt de TDD-cyclus eerlijk". Het zegt niets over of de tests die hij schrijft het juiste beweren. Voor dat tweede signaal blijft kata-performance ([leaderboard](/leaderboard)) de proxy. | |
| 382 | + | |
| 383 | +--- | |
| 384 | + | |
| 385 | +[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports) | |
| 386 | +`; | |
| 387 | +}; | |
| 388 | + | |
| 389 | +export const agentDrilldownMd = (slug: AgentReport["slug"]): string | null => { | |
| 390 | + const a = DEMO_REPORTS.find((r) => r.slug === slug); | |
| 391 | + if (!a) return null; | |
| 392 | + const arr = trendArrow(a.delta); | |
| 393 | + const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`; | |
| 394 | + const recentRows = a.recent | |
| 395 | + .map( | |
| 396 | + (r) => | |
| 397 | + `| ${r.date} | \`${r.repo}\` | \`${r.sha}\` | ${r.phase} | ${r.failure} | ${r.pts} |`, | |
| 398 | + ) | |
| 399 | + .join("\n"); | |
| 400 | + return `# ${a.name} · drill-down | |
| 401 | + | |
| 402 | +${mockBanner} | |
| 403 | + | |
| 404 | +> Discipline-score **${a.score} / 100** <span class="report-tile-trend ${arr.cls}">${arr.glyph} ${deltaStr}</span> over ${DEMO_PERIOD}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage **${a.phaseCoveragePct}%**. | |
| 405 | + | |
| 406 | +## trend (30 dagen) | |
| 407 | + | |
| 408 | +<div class="${arr.cls === "down" ? "red" : arr.cls === "up" ? "green" : "muted"}"> | |
| 409 | +${sparkline(a.trend)} | |
| 410 | +</div> | |
| 411 | + | |
| 412 | +${streakBox(a)} | |
| 413 | + | |
| 414 | +## failure-mode breakdown | |
| 415 | + | |
| 416 | +${bars(a.failureMix)} | |
| 417 | + | |
| 418 | +Top issue dit kwartaal: **${escape(a.topIssueLabel)}** (${a.topIssuePct}% van commits). | |
| 419 | + | |
| 420 | +## recent flagged | |
| 421 | + | |
| 422 | +| date | repo | sha | phase | failure | pts | | |
| 423 | +|---|---|---|---|---|---| | |
| 424 | +${recentRows} | |
| 425 | + | |
| 426 | +## coaching | |
| 427 | + | |
| 428 | +- ${a.slug === "claude-code" ? `[Claude Code does not do TDD by default](/blog/claude-code-tdd) — CLAUDE.md rules + fresh-context boundaries that prevent \`red-did-not-fail\`.` : a.slug === "cursor" ? `[Cursor knows how to do TDD; users skip the parts that matter](/blog/cursor-tdd) — Plan Mode, fresh chats, \`.cursor/rules\` to stop test-deletion.` : `[Aider is the closest agent to TDD on rails — until \`--auto-test\`](/blog/aider-tdd) — keep auto-test off for green commits, on for refactor.`} | |
| 429 | +- [Tweag's TDD handbook needs a judge](/blog/tweag-handbook-tdd) — why local green isn't enough. | |
| 430 | + | |
| 431 | +--- | |
| 432 | + | |
| 433 | +[← exec summary](/reports/demo) · [back to /reports](/reports) | |
| 434 | +`; | |
| 435 | +}; | |
| 436 | + | |
| 437 | +export const testsOverviewMd = (): string => { | |
| 438 | + const total = DEMO_SNAPSHOTS.reduce((s, r) => s + r.total, 0); | |
| 439 | + const passing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.passing, 0); | |
| 440 | + const failing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.failing, 0); | |
| 441 | + const snapshots = DEMO_SNAPSHOTS.map(snapshotBlock).join("\n"); | |
| 442 | + const stabRows = DEMO_STABILITY.map(stabilityRow).join("\n"); | |
| 443 | + return `# tests overzicht | |
| 444 | + | |
| 445 | +${mockBanner} | |
| 446 | + | |
| 447 | +> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${DEMO_PERIOD}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen". | |
| 448 | + | |
| 449 | +## huidige stand · per repo | |
| 450 | + | |
| 451 | +<div class="test-snapshots"> | |
| 452 | +${snapshots} | |
| 453 | +</div> | |
| 454 | + | |
| 455 | +**Totaal**: ${total.toLocaleString()} tests · <span class="green">${passing.toLocaleString()} passing</span> · <span class="${failing > 0 ? "red" : "muted"}">${failing.toLocaleString()} failing</span>. | |
| 456 | + | |
| 457 | +## test-stabiliteit · q1 2026 | |
| 458 | + | |
| 459 | +Top 12 meest-flappende tests dit kwartaal, met aantal pass/fail/deleted-events en de agent die de test het laatst heeft gebroken. | |
| 460 | + | |
| 461 | +<table class="test-stability"> | |
| 462 | +<thead> | |
| 463 | + <tr> | |
| 464 | + <th>test</th> | |
| 465 | + <th class="num">pass</th> | |
| 466 | + <th class="num">fail</th> | |
| 467 | + <th class="num">del</th> | |
| 468 | + <th>laatst gebroken door</th> | |
| 469 | + </tr> | |
| 470 | +</thead> | |
| 471 | +<tbody> | |
| 472 | +${stabRows} | |
| 473 | +</tbody> | |
| 474 | +</table> | |
| 475 | + | |
| 476 | +> ⚠ markeert tests waarbij dit kwartaal een test-deletion of weakening-event is gedetecteerd. In een echte setup linkt klik op een test-naam door naar de commit-historie van die specifieke test. | |
| 477 | + | |
| 478 | +## hoe lees je dit | |
| 479 | + | |
| 480 | +- **Veel pass, weinig fail, 0 del**: gezond. Test doet wat hij moet, niemand sloopt 'm. | |
| 481 | +- **Veel fail, 0 del**: test vangt actief regressies. Goed nieuws — discipline werkt. | |
| 482 | +- **Fail én del > 0**: test wordt onder druk gezet. Coach de agent die 'm gebroken heeft (klik op het tag-icoon). | |
| 483 | +- **Snapshot rood + stabiliteit hoog**: bekende, langlopende kapotte test. Apart onderwerp, niet per se een agent-probleem. | |
| 484 | + | |
| 485 | +--- | |
| 486 | + | |
| 487 | +[← exec summary](/reports/demo) · [back to /reports](/reports) | |
| 488 | +`; | |
| 489 | +}; | |
| 490 | + | |
| 491 | +// --------------------------------------------------------------------- | |
| 492 | +// Body builder for /projects/:owner/:repo. | |
| 493 | +// --------------------------------------------------------------------- | |
| 494 | + | |
| 495 | +export const projectDetailMd = (p: ProjectRow): string => { | |
| 496 | + const display = p.displayName ?? `${p.repoOwner}/${p.repoName}`; | |
| 497 | + const registeredAt = new Date(p.registeredAt).toISOString().slice(0, 10); | |
| 498 | + const branches = p.trackedBranches.map((b) => `\`${b}\``).join(", "); | |
| 499 | + const runnerNote = p.testRunner === "none" | |
| 500 | + ? "Trace-mode — judging looks at commit phase tags, test-count drift, and refactor stability. No test execution." | |
| 501 | + : "Bun runner — test suite executes in a sandbox at every tracked-branch commit. (Sandbox-runner ships in the next sliver; meanwhile this falls back to trace-mode.)"; | |
| 502 | + return `# ${escape(display)} | |
| 503 | + | |
| 504 | +> [${escape(p.repoOwner)}/${escape(p.repoName)}](https://github.com/${p.repoOwner}/${p.repoName}) · registered by [${escape(p.registeredBy)}](/agents/${p.registeredBy}) on ${registeredAt}. | |
| 505 | + | |
| 506 | +## config | |
| 507 | + | |
| 508 | +| key | value | | |
| 509 | +|---|---| | |
| 510 | +| test_runner | \`${p.testRunner}\` | | |
| 511 | +| tracked_branches | ${branches} | | |
| 512 | +| display_name | ${p.displayName ? `\`${escape(p.displayName)}\`` : "_(none)_"} | | |
| 513 | +| team | ${p.team ? `\`${escape(p.team)}\`` : "_(none)_"} | | |
| 514 | +| status | \`${p.status}\` | | |
| 515 | + | |
| 516 | +${runnerNote} | |
| 517 | + | |
| 518 | +## scored commits | |
| 519 | + | |
| 520 | +> _No commits judged yet._ The webhook ingest + judging pipeline lands in the next sliver — once it does, scored commits for tracked branches will appear here grouped by agent. | |
| 521 | + | |
| 522 | +## refresh | |
| 523 | + | |
| 524 | +Push an updated \`${PROJECT_CONFIG_PATH}\` to your default branch and [re-register](/projects/new?repo=${encodeURIComponent(`${p.repoOwner}/${p.repoName}`)}) to pick up the new config. | |
| 525 | + | |
| 526 | +[← all projects](/projects) | |
| 527 | +`; | |
| 528 | +}; | |
src/commits.test.ts
+0
−52
| @@ -1,52 +0,0 @@ | ||
| 1 | -import { test, expect } from "bun:test"; | |
| 2 | -import { parseCommit, computeProgress } from "./commits"; | |
| 3 | - | |
| 4 | -test("parseCommit reads a phase prefix", () => { | |
| 5 | - expect(parseCommit("red: failing test for empty")).toEqual({ | |
| 6 | - phase: "red", | |
| 7 | - step: null, | |
| 8 | - subject: "failing test for empty", | |
| 9 | - }); | |
| 10 | -}); | |
| 11 | - | |
| 12 | -test("parseCommit extracts step from phase(step): form", () => { | |
| 13 | - expect(parseCommit("green(single-number): return n for one number")).toEqual({ | |
| 14 | - phase: "green", | |
| 15 | - step: "single-number", | |
| 16 | - subject: "return n for one number", | |
| 17 | - }); | |
| 18 | -}); | |
| 19 | - | |
| 20 | -test("parseCommit recognizes 'Initial commit' as init", () => { | |
| 21 | - expect(parseCommit("Initial commit").phase).toBe("init"); | |
| 22 | -}); | |
| 23 | - | |
| 24 | -test("parseCommit returns untagged for unknown messages", () => { | |
| 25 | - expect(parseCommit("wip — fixing something").phase).toBe("untagged"); | |
| 26 | -}); | |
| 27 | - | |
| 28 | -test("parseCommit recognizes spike: prefix", () => { | |
| 29 | - expect(parseCommit("spike: try the regex approach").phase).toBe("spike"); | |
| 30 | -}); | |
| 31 | - | |
| 32 | -test("parseCommit extracts step from spike(step):", () => { | |
| 33 | - const p = parseCommit("spike(custom-separator): explore Forge regex"); | |
| 34 | - expect(p.phase).toBe("spike"); | |
| 35 | - expect(p.step).toBe("custom-separator"); | |
| 36 | -}); | |
| 37 | - | |
| 38 | -test("computeProgress verifies a step after red→green for the same step", () => { | |
| 39 | - const commits = [ | |
| 40 | - { commit: { message: "green(empty): returns 0" } }, | |
| 41 | - { commit: { message: "red(empty): empty string returns 0" } }, | |
| 42 | - ]; // newest first, like Forgejo | |
| 43 | - const p = computeProgress(commits); | |
| 44 | - expect(p.verifiedSteps).toEqual(new Set(["empty"])); | |
| 45 | - expect(p.redCount).toBe(1); | |
| 46 | - expect(p.greenCount).toBe(1); | |
| 47 | -}); | |
| 48 | - | |
| 49 | -test("computeProgress does not verify green-without-prior-red", () => { | |
| 50 | - const commits = [{ commit: { message: "green(empty): returns 0" } }]; | |
| 51 | - expect(computeProgress(commits).verifiedSteps.size).toBe(0); | |
| 52 | -}); | |
src/commits.ts
+0
−65
| @@ -1,65 +0,0 @@ | ||
| 1 | -export type Phase = "red" | "green" | "refactor" | "spike" | "init" | "untagged"; | |
| 2 | - | |
| 3 | -export interface ParsedCommit { | |
| 4 | - phase: Phase; | |
| 5 | - step: string | null; | |
| 6 | - subject: string; | |
| 7 | -} | |
| 8 | - | |
| 9 | -const PHASE_RE = /^(red|green|refactor|spike)(?:\(([a-z][a-z0-9-]*)\))?:\s*(.*)$/i; | |
| 10 | - | |
| 11 | -export const parseCommit = (message: string): ParsedCommit => { | |
| 12 | - const subject = message.split("\n")[0] ?? ""; | |
| 13 | - const m = subject.match(PHASE_RE); | |
| 14 | - if (m) { | |
| 15 | - return { | |
| 16 | - phase: m[1]!.toLowerCase() as Phase, | |
| 17 | - step: m[2] ?? null, | |
| 18 | - subject: m[3] ?? "", | |
| 19 | - }; | |
| 20 | - } | |
| 21 | - if (/^Initial commit$/i.test(subject)) { | |
| 22 | - return { phase: "init", step: null, subject }; | |
| 23 | - } | |
| 24 | - return { phase: "untagged", step: null, subject }; | |
| 25 | -}; | |
| 26 | - | |
| 27 | -export interface Progress { | |
| 28 | - verifiedSteps: Set<string>; | |
| 29 | - redCount: number; | |
| 30 | - greenCount: number; | |
| 31 | - refactorCount: number; | |
| 32 | - spikeCount: number; | |
| 33 | - untaggedCount: number; | |
| 34 | -} | |
| 35 | - | |
| 36 | -// A step counts as "verified" when its red commit is followed by a green | |
| 37 | -// for the same step. Refactor and untagged commits are tallied separately | |
| 38 | -// for the score breakdown but don't move verification. | |
| 39 | -export const computeProgress = (commits: { commit: { message: string } }[]): Progress => { | |
| 40 | - const pendingRed = new Set<string>(); | |
| 41 | - const verifiedSteps = new Set<string>(); | |
| 42 | - let redCount = 0; | |
| 43 | - let greenCount = 0; | |
| 44 | - let refactorCount = 0; | |
| 45 | - let spikeCount = 0; | |
| 46 | - let untaggedCount = 0; | |
| 47 | - // Forgejo returns commits newest-first; walk oldest-first to get sequence. | |
| 48 | - for (const c of [...commits].reverse()) { | |
| 49 | - const p = parseCommit(c.commit.message); | |
| 50 | - if (p.phase === "red") { | |
| 51 | - redCount++; | |
| 52 | - if (p.step) pendingRed.add(p.step); | |
| 53 | - } else if (p.phase === "green") { | |
| 54 | - greenCount++; | |
| 55 | - if (p.step && pendingRed.has(p.step)) verifiedSteps.add(p.step); | |
| 56 | - } else if (p.phase === "refactor") { | |
| 57 | - refactorCount++; | |
| 58 | - } else if (p.phase === "spike") { | |
| 59 | - spikeCount++; | |
| 60 | - } else if (p.phase === "untagged") { | |
| 61 | - untaggedCount++; | |
| 62 | - } | |
| 63 | - } | |
| 64 | - return { verifiedSteps, redCount, greenCount, refactorCount, spikeCount, untaggedCount }; | |
| 65 | -}; | |
src/db.ts
+0
−214
| @@ -1,214 +0,0 @@ | ||
| 1 | -import { Database } from "bun:sqlite"; | |
| 2 | -import type { ProjectConfig, TestRunner } from "./projects"; | |
| 3 | - | |
| 4 | -const DB_PATH = process.env.TDD_DB_PATH ?? ":memory:"; | |
| 5 | - | |
| 6 | -let db: Database | null = null; | |
| 7 | - | |
| 8 | -const getDb = (): Database => { | |
| 9 | - if (db) return db; | |
| 10 | - db = new Database(DB_PATH, { create: true }); | |
| 11 | - db.exec(` | |
| 12 | - CREATE TABLE IF NOT EXISTS runs ( | |
| 13 | - id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| 14 | - owner TEXT NOT NULL, | |
| 15 | - repo TEXT NOT NULL, | |
| 16 | - head_sha TEXT NOT NULL, | |
| 17 | - judged_at INTEGER NOT NULL, | |
| 18 | - verdict_json TEXT NOT NULL | |
| 19 | - ); | |
| 20 | - CREATE INDEX IF NOT EXISTS idx_runs_owner_repo | |
| 21 | - ON runs(owner, repo, judged_at DESC); | |
| 22 | - | |
| 23 | - CREATE TABLE IF NOT EXISTS projects ( | |
| 24 | - id INTEGER PRIMARY KEY AUTOINCREMENT, | |
| 25 | - registered_by TEXT NOT NULL, | |
| 26 | - repo_owner TEXT NOT NULL, | |
| 27 | - repo_name TEXT NOT NULL, | |
| 28 | - test_runner TEXT NOT NULL DEFAULT 'none', | |
| 29 | - tracked_branches TEXT NOT NULL, | |
| 30 | - display_name TEXT, | |
| 31 | - team TEXT, | |
| 32 | - registered_at INTEGER NOT NULL, | |
| 33 | - status TEXT NOT NULL DEFAULT 'active', | |
| 34 | - UNIQUE(repo_owner, repo_name) | |
| 35 | - ); | |
| 36 | - CREATE INDEX IF NOT EXISTS idx_projects_registered_by | |
| 37 | - ON projects(registered_by); | |
| 38 | - `); | |
| 39 | - return db; | |
| 40 | -}; | |
| 41 | - | |
| 42 | -export type Mode = "strict" | "pragmatic" | "learning"; | |
| 43 | - | |
| 44 | -export interface StepVerdict { | |
| 45 | - stepId: string; | |
| 46 | - redSha: string | null; | |
| 47 | - greenSha: string | null; | |
| 48 | - redFailed: boolean | null; | |
| 49 | - greenPassed: boolean | null; | |
| 50 | - // Whether the kata's authoritative hidden tests pass against the agent's | |
| 51 | - // implementation at the green commit. null when no hidden tests exist | |
| 52 | - // for the step (unknown kata, or step not registered with the spec). | |
| 53 | - hiddenPassed: boolean | null; | |
| 54 | - status: | |
| 55 | - | "verified" | |
| 56 | - | "discipline-only" | |
| 57 | - | "no-green" | |
| 58 | - | "red-did-not-fail" | |
| 59 | - | "green-did-not-pass" | |
| 60 | - | "hidden-tests-failed" | |
| 61 | - | "test-deleted" | |
| 62 | - // Trace-only mode: tests not executed, only commit discipline checked. | |
| 63 | - // Used when test_runner: "none" — language-agnostic, useful as a | |
| 64 | - // CI gate on real projects where Bun can't run the test suite. | |
| 65 | - | "trace-verified" | |
| 66 | - | "trace-tests-shrunk"; | |
| 67 | - scoreDelta: number; | |
| 68 | - // Coach-style explanation of the verdict — what happened, why the score | |
| 69 | - // is what it is, and (when relevant) how to improve next time. | |
| 70 | - explanation: string; | |
| 71 | -} | |
| 72 | - | |
| 73 | -export interface RefactorVerdict { | |
| 74 | - sha: string; | |
| 75 | - stepId: string | null; | |
| 76 | - testsPassed: boolean; | |
| 77 | - scoreDelta: number; | |
| 78 | - explanation: string; | |
| 79 | -} | |
| 80 | - | |
| 81 | -export interface Verdict { | |
| 82 | - headSha: string; | |
| 83 | - mode: Mode; | |
| 84 | - steps: StepVerdict[]; | |
| 85 | - refactors: RefactorVerdict[]; | |
| 86 | - totalScore: number; | |
| 87 | - judgedAt: number; | |
| 88 | -} | |
| 89 | - | |
| 90 | -export const saveRun = (owner: string, repo: string, verdict: Verdict): void => { | |
| 91 | - getDb().run( | |
| 92 | - `INSERT INTO runs (owner, repo, head_sha, judged_at, verdict_json) VALUES (?, ?, ?, ?, ?)`, | |
| 93 | - [owner, repo, verdict.headSha, verdict.judgedAt, JSON.stringify(verdict)], | |
| 94 | - ); | |
| 95 | -}; | |
| 96 | - | |
| 97 | -export const latestRun = (owner: string, repo: string): Verdict | null => { | |
| 98 | - const row = getDb() | |
| 99 | - .query<{ verdict_json: string }, [string, string]>( | |
| 100 | - `SELECT verdict_json FROM runs WHERE owner = ? AND repo = ? ORDER BY judged_at DESC LIMIT 1`, | |
| 101 | - ) | |
| 102 | - .get(owner, repo); | |
| 103 | - if (!row) return null; | |
| 104 | - return JSON.parse(row.verdict_json) as Verdict; | |
| 105 | -}; | |
| 106 | - | |
| 107 | -export interface ProjectRow { | |
| 108 | - id: number; | |
| 109 | - registeredBy: string; | |
| 110 | - repoOwner: string; | |
| 111 | - repoName: string; | |
| 112 | - testRunner: TestRunner; | |
| 113 | - trackedBranches: string[]; | |
| 114 | - displayName: string | null; | |
| 115 | - team: string | null; | |
| 116 | - registeredAt: number; | |
| 117 | - status: "active" | "paused"; | |
| 118 | -} | |
| 119 | - | |
| 120 | -interface ProjectDbRow { | |
| 121 | - id: number; | |
| 122 | - registered_by: string; | |
| 123 | - repo_owner: string; | |
| 124 | - repo_name: string; | |
| 125 | - test_runner: string; | |
| 126 | - tracked_branches: string; | |
| 127 | - display_name: string | null; | |
| 128 | - team: string | null; | |
| 129 | - registered_at: number; | |
| 130 | - status: string; | |
| 131 | -} | |
| 132 | - | |
| 133 | -const rowToProject = (r: ProjectDbRow): ProjectRow => ({ | |
| 134 | - id: r.id, | |
| 135 | - registeredBy: r.registered_by, | |
| 136 | - repoOwner: r.repo_owner, | |
| 137 | - repoName: r.repo_name, | |
| 138 | - testRunner: (r.test_runner === "bun" ? "bun" : "none") as TestRunner, | |
| 139 | - trackedBranches: JSON.parse(r.tracked_branches) as string[], | |
| 140 | - displayName: r.display_name, | |
| 141 | - team: r.team, | |
| 142 | - registeredAt: r.registered_at, | |
| 143 | - status: r.status === "paused" ? "paused" : "active", | |
| 144 | -}); | |
| 145 | - | |
| 146 | -// Inserts or updates a project. Re-registering the same repo refreshes | |
| 147 | -// its config (test_runner, tracked_branches, display_name, team) without | |
| 148 | -// duplicating the row. Returns the stored project. | |
| 149 | -export const upsertProject = ( | |
| 150 | - registeredBy: string, | |
| 151 | - repoOwner: string, | |
| 152 | - repoName: string, | |
| 153 | - config: ProjectConfig, | |
| 154 | -): ProjectRow => { | |
| 155 | - const now = Date.now(); | |
| 156 | - const branches = JSON.stringify(config.tracked_branches); | |
| 157 | - const display = config.display_name ?? null; | |
| 158 | - const team = config.team ?? null; | |
| 159 | - getDb().run( | |
| 160 | - `INSERT INTO projects (registered_by, repo_owner, repo_name, test_runner, tracked_branches, display_name, team, registered_at, status) | |
| 161 | - VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'active') | |
| 162 | - ON CONFLICT(repo_owner, repo_name) DO UPDATE SET | |
| 163 | - test_runner = excluded.test_runner, | |
| 164 | - tracked_branches = excluded.tracked_branches, | |
| 165 | - display_name = excluded.display_name, | |
| 166 | - team = excluded.team, | |
| 167 | - status = 'active'`, | |
| 168 | - [registeredBy, repoOwner, repoName, config.test_runner, branches, display, team, now], | |
| 169 | - ); | |
| 170 | - const row = getDb() | |
| 171 | - .query<ProjectDbRow, [string, string]>( | |
| 172 | - `SELECT * FROM projects WHERE repo_owner = ? AND repo_name = ?`, | |
| 173 | - ) | |
| 174 | - .get(repoOwner, repoName); | |
| 175 | - if (!row) throw new Error("project upsert returned no row"); | |
| 176 | - return rowToProject(row); | |
| 177 | -}; | |
| 178 | - | |
| 179 | -export const getProject = (repoOwner: string, repoName: string): ProjectRow | null => { | |
| 180 | - const row = getDb() | |
| 181 | - .query<ProjectDbRow, [string, string]>( | |
| 182 | - `SELECT * FROM projects WHERE repo_owner = ? AND repo_name = ?`, | |
| 183 | - ) | |
| 184 | - .get(repoOwner, repoName); | |
| 185 | - return row ? rowToProject(row) : null; | |
| 186 | -}; | |
| 187 | - | |
| 188 | -export const listActiveProjects = (): ProjectRow[] => { | |
| 189 | - const rows = getDb() | |
| 190 | - .query<ProjectDbRow, []>( | |
| 191 | - `SELECT * FROM projects WHERE status = 'active' ORDER BY registered_at DESC`, | |
| 192 | - ) | |
| 193 | - .all(); | |
| 194 | - return rows.map(rowToProject); | |
| 195 | -}; | |
| 196 | - | |
| 197 | -// Latest verdict per (owner, repo) across all agents — drives the | |
| 198 | -// leaderboard and the /agents index. | |
| 199 | -export const allLatestRuns = (): { owner: string; repo: string; verdict: Verdict }[] => { | |
| 200 | - const rows = getDb() | |
| 201 | - .query<{ owner: string; repo: string; verdict_json: string }, []>( | |
| 202 | - `SELECT owner, repo, verdict_json FROM runs r1 | |
| 203 | - WHERE judged_at = ( | |
| 204 | - SELECT MAX(judged_at) FROM runs r2 | |
| 205 | - WHERE r2.owner = r1.owner AND r2.repo = r1.repo | |
| 206 | - )`, | |
| 207 | - ) | |
| 208 | - .all(); | |
| 209 | - return rows.map((r) => ({ | |
| 210 | - owner: r.owner, | |
| 211 | - repo: r.repo, | |
| 212 | - verdict: JSON.parse(r.verdict_json) as Verdict, | |
| 213 | - })); | |
| 214 | -}; | |
src/forgejo.ts
+0
−261
| @@ -1,261 +0,0 @@ | ||
| 1 | -// Internal URL — Bun container talks to Forgejo via host.containers.internal | |
| 2 | -// (rootless podman's standard hostname for the host network). Falls back to | |
| 3 | -// the public URL for local dev. | |
| 4 | -const FORGEJO_URL = process.env.FORGEJO_URL ?? "https://git.tdd.md"; | |
| 5 | -const ADMIN_TOKEN = process.env.FORGEJO_ADMIN_TOKEN ?? ""; | |
| 6 | - | |
| 7 | -const adminAuth = (): HeadersInit => ({ | |
| 8 | - Authorization: `token ${ADMIN_TOKEN}`, | |
| 9 | -}); | |
| 10 | - | |
| 11 | -const userAuth = (username: string, password: string): HeadersInit => ({ | |
| 12 | - Authorization: `Basic ${btoa(`${username}:${password}`)}`, | |
| 13 | -}); | |
| 14 | - | |
| 15 | -export const isConfigured = (): boolean => ADMIN_TOKEN !== ""; | |
| 16 | - | |
| 17 | -export const userExists = async (username: string): Promise<boolean> => { | |
| 18 | - const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}`, { | |
| 19 | - headers: adminAuth(), | |
| 20 | - }); | |
| 21 | - return res.status === 200; | |
| 22 | -}; | |
| 23 | - | |
| 24 | -export const createUser = async (params: { | |
| 25 | - username: string; | |
| 26 | - email: string; | |
| 27 | - password: string; | |
| 28 | - fullName?: string; | |
| 29 | -}): Promise<void> => { | |
| 30 | - const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users`, { | |
| 31 | - method: "POST", | |
| 32 | - headers: { ...adminAuth(), "Content-Type": "application/json" }, | |
| 33 | - body: JSON.stringify({ | |
| 34 | - username: params.username, | |
| 35 | - email: params.email, | |
| 36 | - password: params.password, | |
| 37 | - full_name: params.fullName ?? params.username, | |
| 38 | - must_change_password: false, | |
| 39 | - send_notify: false, | |
| 40 | - }), | |
| 41 | - }); | |
| 42 | - if (!res.ok) { | |
| 43 | - const text = await res.text(); | |
| 44 | - throw new Error(`forgejo createUser ${res.status}: ${text}`); | |
| 45 | - } | |
| 46 | -}; | |
| 47 | - | |
| 48 | -export const setUserPassword = async (username: string, password: string): Promise<void> => { | |
| 49 | - const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users/${encodeURIComponent(username)}`, { | |
| 50 | - method: "PATCH", | |
| 51 | - headers: { ...adminAuth(), "Content-Type": "application/json" }, | |
| 52 | - body: JSON.stringify({ | |
| 53 | - password, | |
| 54 | - must_change_password: false, | |
| 55 | - source_id: 0, | |
| 56 | - login_name: username, | |
| 57 | - }), | |
| 58 | - }); | |
| 59 | - if (!res.ok) { | |
| 60 | - const text = await res.text(); | |
| 61 | - throw new Error(`forgejo setUserPassword ${res.status}: ${text}`); | |
| 62 | - } | |
| 63 | -}; | |
| 64 | - | |
| 65 | -export const repoExists = async (owner: string, repo: string): Promise<boolean> => { | |
| 66 | - const res = await fetch(`${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}`, { | |
| 67 | - headers: adminAuth(), | |
| 68 | - }); | |
| 69 | - return res.status === 200; | |
| 70 | -}; | |
| 71 | - | |
| 72 | -// Creates a per-repo webhook that fires on push events. The webhook | |
| 73 | -// posts to /api/forgejo/webhook on tdd.md, signed with WEBHOOK_SECRET so | |
| 74 | -// our endpoint can verify it. Idempotent — checks for an existing hook | |
| 75 | -// with the same URL before creating. | |
| 76 | -export const ensureRepoWebhook = async (params: { | |
| 77 | - owner: string; | |
| 78 | - repo: string; | |
| 79 | - webhookUrl: string; | |
| 80 | - secret: string; | |
| 81 | -}): Promise<void> => { | |
| 82 | - const base = `${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(params.owner)}/${encodeURIComponent(params.repo)}/hooks`; | |
| 83 | - const listRes = await fetch(base, { headers: adminAuth() }); | |
| 84 | - if (listRes.ok) { | |
| 85 | - const hooks = (await listRes.json()) as { id: number; config: { url?: string } }[]; | |
| 86 | - const exists = hooks.some((h) => h.config?.url === params.webhookUrl); | |
| 87 | - if (exists) return; | |
| 88 | - } | |
| 89 | - const res = await fetch(base, { | |
| 90 | - method: "POST", | |
| 91 | - headers: { ...adminAuth(), "Content-Type": "application/json" }, | |
| 92 | - body: JSON.stringify({ | |
| 93 | - type: "forgejo", | |
| 94 | - active: true, | |
| 95 | - events: ["push"], | |
| 96 | - config: { | |
| 97 | - url: params.webhookUrl, | |
| 98 | - content_type: "json", | |
| 99 | - secret: params.secret, | |
| 100 | - }, | |
| 101 | - }), | |
| 102 | - }); | |
| 103 | - if (!res.ok) { | |
| 104 | - const text = await res.text(); | |
| 105 | - throw new Error(`forgejo ensureRepoWebhook ${res.status}: ${text}`); | |
| 106 | - } | |
| 107 | -}; | |
| 108 | - | |
| 109 | -export const createRepoForUser = async (params: { | |
| 110 | - username: string; | |
| 111 | - name: string; | |
| 112 | - description?: string; | |
| 113 | -}): Promise<void> => { | |
| 114 | - const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users/${encodeURIComponent(params.username)}/repos`, { | |
| 115 | - method: "POST", | |
| 116 | - headers: { ...adminAuth(), "Content-Type": "application/json" }, | |
| 117 | - body: JSON.stringify({ | |
| 118 | - name: params.name, | |
| 119 | - description: params.description ?? "", | |
| 120 | - // Private by default — the source is the agent's, not ours to | |
| 121 | - // publish. Verdicts still render on tdd.md via admin-mediated | |
| 122 | - // API calls; clones require the agent's push token. | |
| 123 | - private: true, | |
| 124 | - // No auto_init: the agent's first push becomes the genuine initial | |
| 125 | - // commit. An admin-authored "Initial commit" would muddle the phase | |
| 126 | - // log and break attribution on the agent's repo page. | |
| 127 | - auto_init: false, | |
| 128 | - default_branch: "main", | |
| 129 | - }), | |
| 130 | - }); | |
| 131 | - if (!res.ok) { | |
| 132 | - const text = await res.text(); | |
| 133 | - throw new Error(`forgejo createRepo ${res.status}: ${text}`); | |
| 134 | - } | |
| 135 | -}; | |
| 136 | - | |
| 137 | -interface TokenInfo { | |
| 138 | - id: number; | |
| 139 | - name: string; | |
| 140 | -} | |
| 141 | - | |
| 142 | -const listTokens = async (username: string, password: string): Promise<TokenInfo[]> => { | |
| 143 | - const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}/tokens`, { | |
| 144 | - headers: userAuth(username, password), | |
| 145 | - }); | |
| 146 | - if (!res.ok) return []; | |
| 147 | - return (await res.json()) as TokenInfo[]; | |
| 148 | -}; | |
| 149 | - | |
| 150 | -const deleteToken = async (username: string, password: string, tokenId: number): Promise<void> => { | |
| 151 | - await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}/tokens/${tokenId}`, { | |
| 152 | - method: "DELETE", | |
| 153 | - headers: userAuth(username, password), | |
| 154 | - }); | |
| 155 | -}; | |
| 156 | - | |
| 157 | -export const createPushToken = async (params: { | |
| 158 | - username: string; | |
| 159 | - password: string; | |
| 160 | - name: string; | |
| 161 | -}): Promise<string> => { | |
| 162 | - // Revoke any existing tokens with the same name so re-registration always | |
| 163 | - // returns a fresh one and the previous one is invalidated. | |
| 164 | - const existing = await listTokens(params.username, params.password); | |
| 165 | - for (const t of existing) { | |
| 166 | - if (t.name === params.name) { | |
| 167 | - await deleteToken(params.username, params.password, t.id); | |
| 168 | - } | |
| 169 | - } | |
| 170 | - | |
| 171 | - const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(params.username)}/tokens`, { | |
| 172 | - method: "POST", | |
| 173 | - headers: { ...userAuth(params.username, params.password), "Content-Type": "application/json" }, | |
| 174 | - body: JSON.stringify({ | |
| 175 | - name: params.name, | |
| 176 | - // write:repository for the push; read:user so the agent can | |
| 177 | - // verify their own identity against tdd.md's self-service | |
| 178 | - // endpoints (e.g. POST /api/agents/:name/visibility). | |
| 179 | - scopes: ["write:repository", "read:user"], | |
| 180 | - }), | |
| 181 | - }); | |
| 182 | - if (!res.ok) { | |
| 183 | - const text = await res.text(); | |
| 184 | - throw new Error(`forgejo createPushToken ${res.status}: ${text}`); | |
| 185 | - } | |
| 186 | - const data = (await res.json()) as { sha1: string }; | |
| 187 | - return data.sha1; | |
| 188 | -}; | |
| 189 | - | |
| 190 | -const randomPassword = (): string => | |
| 191 | - Array.from(crypto.getRandomValues(new Uint8Array(32))) | |
| 192 | - .map((b) => b.toString(16).padStart(2, "0")) | |
| 193 | - .join(""); | |
| 194 | - | |
| 195 | -export interface AgentRegistration { | |
| 196 | - username: string; | |
| 197 | - pushToken: string; | |
| 198 | - repoCloneUrl: string; | |
| 199 | - isNew: boolean; | |
| 200 | -} | |
| 201 | - | |
| 202 | -// Idempotent: if the user exists, reset their password and rotate the push | |
| 203 | -// token. Always also ensures the kata repo exists. | |
| 204 | -export const registerAgent = async (params: { | |
| 205 | - username: string; | |
| 206 | - email: string; | |
| 207 | - fullName?: string; | |
| 208 | - kata?: string; | |
| 209 | -}): Promise<AgentRegistration> => { | |
| 210 | - const password = randomPassword(); | |
| 211 | - const isNew = !(await userExists(params.username)); | |
| 212 | - | |
| 213 | - if (isNew) { | |
| 214 | - await createUser({ | |
| 215 | - username: params.username, | |
| 216 | - email: params.email, | |
| 217 | - password, | |
| 218 | - fullName: params.fullName, | |
| 219 | - }); | |
| 220 | - } else { | |
| 221 | - await setUserPassword(params.username, password); | |
| 222 | - } | |
| 223 | - | |
| 224 | - const pushToken = await createPushToken({ | |
| 225 | - username: params.username, | |
| 226 | - password, | |
| 227 | - name: "tdd-md-push", | |
| 228 | - }); | |
| 229 | - | |
| 230 | - const kata = params.kata ?? "string-calc"; | |
| 231 | - if (!(await repoExists(params.username, kata))) { | |
| 232 | - await createRepoForUser({ | |
| 233 | - username: params.username, | |
| 234 | - name: kata, | |
| 235 | - description: `${params.username}'s submission for the ${kata} kata`, | |
| 236 | - }); | |
| 237 | - } | |
| 238 | - | |
| 239 | - const baseUrl = process.env.BASE_URL ?? "https://tdd.md"; | |
| 240 | - const webhookSecret = process.env.WEBHOOK_SECRET; | |
| 241 | - if (webhookSecret) { | |
| 242 | - try { | |
| 243 | - await ensureRepoWebhook({ | |
| 244 | - owner: params.username, | |
| 245 | - repo: kata, | |
| 246 | - webhookUrl: `${baseUrl}/api/forgejo/webhook`, | |
| 247 | - secret: webhookSecret, | |
| 248 | - }); | |
| 249 | - } catch (err) { | |
| 250 | - // Webhook is convenience; registration must still succeed without it. | |
| 251 | - console.error(`webhook setup failed for ${params.username}/${kata}:`, err); | |
| 252 | - } | |
| 253 | - } | |
| 254 | - | |
| 255 | - return { | |
| 256 | - username: params.username, | |
| 257 | - pushToken, | |
| 258 | - repoCloneUrl: `${baseUrl}/${params.username}/${kata}.git`, | |
| 259 | - isNew, | |
| 260 | - }; | |
| 261 | -}; | |
src/games.test.ts
+0
−26
| @@ -1,26 +0,0 @@ | ||
| 1 | -import { test, expect } from "bun:test"; | |
| 2 | -import { loadGame } from "./games"; | |
| 3 | - | |
| 4 | -test("loadGame returns a game with the expected id", async () => { | |
| 5 | - const game = await loadGame("string-calc"); | |
| 6 | - expect(game.id).toBe("string-calc"); | |
| 7 | -}); | |
| 8 | - | |
| 9 | -test("loadGame returns the kata's step ids in order", async () => { | |
| 10 | - const game = await loadGame("string-calc"); | |
| 11 | - expect(game.steps.map((s) => s.id)).toEqual([ | |
| 12 | - "empty", | |
| 13 | - "single-number", | |
| 14 | - "two-numbers", | |
| 15 | - "n-numbers", | |
| 16 | - "newline-separator", | |
| 17 | - "custom-separator", | |
| 18 | - "negatives-throw", | |
| 19 | - ]); | |
| 20 | -}); | |
| 21 | - | |
| 22 | -test("loadGame throws a clear error for an unknown game", async () => { | |
| 23 | - await expect(loadGame("does-not-exist")).rejects.toThrow( | |
| 24 | - /unknown game: does-not-exist/, | |
| 25 | - ); | |
| 26 | -}); | |
src/games.ts
+0
−55
| @@ -1,55 +0,0 @@ | ||
| 1 | -export interface Step { | |
| 2 | - id: string; | |
| 3 | - requirement: string; | |
| 4 | - // Path (relative to the kata's spec.ts) of the authoritative test file. | |
| 5 | - // The judge copies this into the agent's working tree after the green | |
| 6 | - // checkout and runs it — hidden tests are how we detect cheating where | |
| 7 | - // an agent writes a tautological test like `expect(true).toBe(true)`. | |
| 8 | - hiddenTestFile: string; | |
| 9 | -} | |
| 10 | - | |
| 11 | -export interface Game { | |
| 12 | - id: string; | |
| 13 | - // One-line summary shown on the games index and OG previews. | |
| 14 | - description: string; | |
| 15 | - // Human-readable function signature the agent must export. Documented | |
| 16 | - // on the kata page so authors know what to build. | |
| 17 | - signature: string; | |
| 18 | - // The module path the hidden tests will import from. Agents must export | |
| 19 | - // their solution from this exact path (relative to repo root). | |
| 20 | - importPath: string; | |
| 21 | - steps: Step[]; | |
| 22 | -} | |
| 23 | - | |
| 24 | -import { readdir } from "node:fs/promises"; | |
| 25 | - | |
| 26 | -// Reads every kata under content/games/ and returns the loaded specs in | |
| 27 | -// alphabetical order. Used to build the games index and sitemap without | |
| 28 | -// hard-coding individual kata ids. | |
| 29 | -export async function listGames(): Promise<Game[]> { | |
| 30 | - let entries; | |
| 31 | - try { | |
| 32 | - entries = await readdir("./content/games", { withFileTypes: true }); | |
| 33 | - } catch { | |
| 34 | - return []; | |
| 35 | - } | |
| 36 | - const ids = entries.filter((e) => e.isDirectory()).map((e) => e.name).sort(); | |
| 37 | - const games: Game[] = []; | |
| 38 | - for (const id of ids) { | |
| 39 | - try { | |
| 40 | - games.push(await loadGame(id)); | |
| 41 | - } catch { | |
| 42 | - // skip katas that fail to load (missing spec.ts, etc.) | |
| 43 | - } | |
| 44 | - } | |
| 45 | - return games; | |
| 46 | -} | |
| 47 | - | |
| 48 | -export async function loadGame(id: string): Promise<Game> { | |
| 49 | - const file = Bun.file(`./content/games/${id}/spec.ts`); | |
| 50 | - if (!(await file.exists())) { | |
| 51 | - throw new Error(`unknown game: ${id}`); | |
| 52 | - } | |
| 53 | - const mod = await import(`../content/games/${id}/spec.ts`); | |
| 54 | - return mod.spec as Game; | |
| 55 | -} | |
src/github_oauth.ts
+0
−80
| @@ -1,80 +0,0 @@ | ||
| 1 | -const CLIENT_ID = process.env.GITHUB_CLIENT_ID ?? ""; | |
| 2 | -const CLIENT_SECRET = process.env.GITHUB_CLIENT_SECRET ?? ""; | |
| 3 | - | |
| 4 | -export interface GithubUser { | |
| 5 | - login: string; | |
| 6 | - id: number; | |
| 7 | - email: string | null; | |
| 8 | - avatar_url: string; | |
| 9 | - name: string | null; | |
| 10 | -} | |
| 11 | - | |
| 12 | -export interface GithubEmail { | |
| 13 | - email: string; | |
| 14 | - primary: boolean; | |
| 15 | - verified: boolean; | |
| 16 | - visibility: string | null; | |
| 17 | -} | |
| 18 | - | |
| 19 | -export const isConfigured = (): boolean => CLIENT_ID !== "" && CLIENT_SECRET !== ""; | |
| 20 | - | |
| 21 | -export const authorizeUrl = (state: string, redirectUri: string): string => { | |
| 22 | - const params = new URLSearchParams({ | |
| 23 | - client_id: CLIENT_ID, | |
| 24 | - redirect_uri: redirectUri, | |
| 25 | - scope: "read:user user:email", | |
| 26 | - state, | |
| 27 | - allow_signup: "true", | |
| 28 | - }); | |
| 29 | - return `https://github.com/login/oauth/authorize?${params}`; | |
| 30 | -}; | |
| 31 | - | |
| 32 | -export const exchangeCode = async (code: string, redirectUri: string): Promise<string> => { | |
| 33 | - const res = await fetch("https://github.com/login/oauth/access_token", { | |
| 34 | - method: "POST", | |
| 35 | - headers: { | |
| 36 | - Accept: "application/json", | |
| 37 | - "Content-Type": "application/json", | |
| 38 | - }, | |
| 39 | - body: JSON.stringify({ | |
| 40 | - client_id: CLIENT_ID, | |
| 41 | - client_secret: CLIENT_SECRET, | |
| 42 | - code, | |
| 43 | - redirect_uri: redirectUri, | |
| 44 | - }), | |
| 45 | - }); | |
| 46 | - if (!res.ok) { | |
| 47 | - throw new Error(`github token exchange failed: ${res.status}`); | |
| 48 | - } | |
| 49 | - const data = (await res.json()) as { access_token?: string; error?: string; error_description?: string }; | |
| 50 | - if (!data.access_token) { | |
| 51 | - throw new Error(`github token exchange returned no token: ${data.error_description ?? data.error ?? "unknown"}`); | |
| 52 | - } | |
| 53 | - return data.access_token; | |
| 54 | -}; | |
| 55 | - | |
| 56 | -export const fetchUser = async (accessToken: string): Promise<GithubUser> => { | |
| 57 | - const res = await fetch("https://api.github.com/user", { | |
| 58 | - headers: { | |
| 59 | - Authorization: `token ${accessToken}`, | |
| 60 | - Accept: "application/vnd.github+json", | |
| 61 | - "User-Agent": "tdd.md", | |
| 62 | - }, | |
| 63 | - }); | |
| 64 | - if (!res.ok) throw new Error(`github user fetch failed: ${res.status}`); | |
| 65 | - return (await res.json()) as GithubUser; | |
| 66 | -}; | |
| 67 | - | |
| 68 | -export const fetchPrimaryEmail = async (accessToken: string): Promise<string | null> => { | |
| 69 | - const res = await fetch("https://api.github.com/user/emails", { | |
| 70 | - headers: { | |
| 71 | - Authorization: `token ${accessToken}`, | |
| 72 | - Accept: "application/vnd.github+json", | |
| 73 | - "User-Agent": "tdd.md", | |
| 74 | - }, | |
| 75 | - }); | |
| 76 | - if (!res.ok) return null; | |
| 77 | - const emails = (await res.json()) as GithubEmail[]; | |
| 78 | - const verified = emails.filter((e) => e.verified); | |
| 79 | - return verified.find((e) => e.primary)?.email ?? verified[0]?.email ?? null; | |
| 80 | -}; | |
src/judge.ts
+0
−370
| @@ -1,370 +0,0 @@ | ||
| 1 | -import { mkdtempSync, rmSync } from "fs"; | |
| 2 | -import { join } from "path"; | |
| 3 | -import { tmpdir } from "os"; | |
| 4 | -import { parseCommit, type Phase } from "./commits"; | |
| 5 | -import { saveRun, type Verdict, type StepVerdict, type RefactorVerdict, type Mode } from "./db"; | |
| 6 | -import { loadGame, type Game } from "./games"; | |
| 7 | - | |
| 8 | -type TestRunner = "bun" | "none"; | |
| 9 | - | |
| 10 | -interface TddConfig { | |
| 11 | - mode: Mode; | |
| 12 | - testRunner: TestRunner; | |
| 13 | -} | |
| 14 | - | |
| 15 | -// tdd.config.json from the agent's repo selects the scoring mode and | |
| 16 | -// test runner. Falls back to strict / bun when missing or unparseable. | |
| 17 | -// | |
| 18 | -// { "mode": "pragmatic", "test_runner": "none" } | |
| 19 | -// | |
| 20 | -// test_runner: "none" enables trace-only judging — no checkout, no test | |
| 21 | -// execution. Useful as a CI gate on projects where Bun can't run the | |
| 22 | -// suite (e.g. .NET, Python without bun-compat tests). | |
| 23 | -const readConfig = async (cwd: string): Promise<TddConfig> => { | |
| 24 | - const file = Bun.file(join(cwd, "tdd.config.json")); | |
| 25 | - let mode: Mode = "strict"; | |
| 26 | - let testRunner: TestRunner = "bun"; | |
| 27 | - if (await file.exists()) { | |
| 28 | - try { | |
| 29 | - const cfg = (await file.json()) as { mode?: string; test_runner?: string }; | |
| 30 | - if (cfg.mode === "pragmatic" || cfg.mode === "learning") mode = cfg.mode; | |
| 31 | - if (cfg.test_runner === "none") testRunner = "none"; | |
| 32 | - } catch { | |
| 33 | - // best effort — bad config falls back to defaults | |
| 34 | - } | |
| 35 | - } | |
| 36 | - return { mode, testRunner }; | |
| 37 | -}; | |
| 38 | - | |
| 39 | -// Penalty halving for pragmatic, zeroing for learning. Positive deltas | |
| 40 | -// are unchanged across modes — earned credit is earned credit. | |
| 41 | -const applyMode = (delta: number, mode: Mode): number => { | |
| 42 | - if (delta >= 0) return delta; | |
| 43 | - if (mode === "learning") return 0; | |
| 44 | - if (mode === "pragmatic") return Math.ceil(delta / 2); | |
| 45 | - return delta; | |
| 46 | -}; | |
| 47 | - | |
| 48 | -// Plain-language summary of a step verdict, written to the agent (not | |
| 49 | -// the human admin). One short paragraph; named intentionally so callers | |
| 50 | -// can see it next to the row in the score table. | |
| 51 | -const explainStep = (params: { | |
| 52 | - status: StepVerdict["status"]; | |
| 53 | - redSha: string | null; | |
| 54 | - greenSha: string | null; | |
| 55 | - hiddenPassed: boolean | null; | |
| 56 | - mode: Mode; | |
| 57 | -}): string => { | |
| 58 | - const { status, hiddenPassed, mode } = params; | |
| 59 | - switch (status) { | |
| 60 | - case "verified": | |
| 61 | - return "Red failed as expected, green passes your tests, and the kata's hidden tests confirm the implementation matches the requirement."; | |
| 62 | - case "discipline-only": | |
| 63 | - return "Red→green discipline holds, but this kata didn't ship hidden tests for the step. Partial credit awarded; full +20 isn't possible without authoritative verification."; | |
| 64 | - case "no-green": | |
| 65 | - return "Red commit landed; the matching green(<step>) commit hasn't been pushed yet. Push your green to lock in the score."; | |
| 66 | - case "red-did-not-fail": | |
| 67 | - return mode === "pragmatic" | |
| 68 | - ? "Combined red+green commit detected. Pragmatic mode allows this — the cycle still counts, just with a softer score than a clean separation." | |
| 69 | - : "Red commit's tests already passed when the step was first introduced — meaning the implementation was added before the test, or the test is tautological. Switch to pragmatic mode if you commit red+green together intentionally."; | |
| 70 | - case "green-did-not-pass": | |
| 71 | - return "Green commit's own tests still fail. The implementation doesn't yet satisfy the test you wrote — fix the impl, or reconsider whether the test reflects the requirement."; | |
| 72 | - case "hidden-tests-failed": | |
| 73 | - return hiddenPassed === false | |
| 74 | - ? "Your tests pass, but the kata's hidden tests don't — this is the classic tautology trap. Tighten your test to mirror the requirement (e.g., assert the actual return value, not just that it runs)." | |
| 75 | - : "Your tests pass, but hidden verification was inconclusive. Re-push to retry."; | |
| 76 | - case "test-deleted": | |
| 77 | - return "Test count dropped between red and green for this step. Once a test exists it must keep existing — refactor it, don't delete it. If the test was wrong, replace it in a separate commit before resuming the cycle."; | |
| 78 | - case "trace-verified": | |
| 79 | - return "Trace-only mode: red→green pair found in the commit log. Tests weren't executed (test_runner: \"none\"). Switch to bun runner for behaviour verification."; | |
| 80 | - case "trace-tests-shrunk": | |
| 81 | - return "Trace-only mode: the green commit's tree has fewer test files than the red commit's tree — looks like deletion. If you renamed or split test files, the tally still drops."; | |
| 82 | - } | |
| 83 | -}; | |
| 84 | - | |
| 85 | -const explainRefactor = (passed: boolean): string => | |
| 86 | - passed | |
| 87 | - ? "Tests stayed green through the refactor — structural change without behavior change, the canonical refactor." | |
| 88 | - : "Refactor commit broke at least one test. Either revert the refactor or write a new red→green to capture the changed behavior."; | |
| 89 | - | |
| 90 | -const FORGEJO_INTERNAL = process.env.FORGEJO_URL ?? "https://git.tdd.md"; | |
| 91 | -const TEST_TIMEOUT_MS = 8000; | |
| 92 | - | |
| 93 | -// Sandboxed env passed to git and bun subprocesses. Strips every secret | |
| 94 | -// from the parent process — agent code never sees FORGEJO_ADMIN_TOKEN, | |
| 95 | -// GITHUB_CLIENT_SECRET, or SESSION_SECRET. PATH is fixed; HOME and TMPDIR | |
| 96 | -// stay inside the per-run temp dir so dotfile writes can't escape. | |
| 97 | -const sandboxEnv = (cwd: string): Record<string, string> => ({ | |
| 98 | - PATH: "/usr/local/bin:/usr/bin:/bin", | |
| 99 | - HOME: cwd, | |
| 100 | - TMPDIR: cwd, | |
| 101 | - NODE_ENV: "test", | |
| 102 | -}); | |
| 103 | - | |
| 104 | -const runProc = async ( | |
| 105 | - cmd: string[], | |
| 106 | - cwd: string, | |
| 107 | - timeoutMs: number, | |
| 108 | -): Promise<{ stdout: string; stderr: string; exitCode: number; timedOut: boolean }> => { | |
| 109 | - const proc = Bun.spawn(cmd, { | |
| 110 | - cwd, | |
| 111 | - stdout: "pipe", | |
| 112 | - stderr: "pipe", | |
| 113 | - env: sandboxEnv(cwd), | |
| 114 | - }); | |
| 115 | - let timedOut = false; | |
| 116 | - const timer = setTimeout(() => { | |
| 117 | - timedOut = true; | |
| 118 | - proc.kill("SIGKILL"); | |
| 119 | - }, timeoutMs); | |
| 120 | - const exitCode = await proc.exited; | |
| 121 | - clearTimeout(timer); | |
| 122 | - const stdout = await new Response(proc.stdout).text(); | |
| 123 | - const stderr = await new Response(proc.stderr).text(); | |
| 124 | - return { stdout: stdout.trim(), stderr: stderr.trim(), exitCode, timedOut }; | |
| 125 | -}; | |
| 126 | - | |
| 127 | -const runTests = async (cwd: string): Promise<boolean> => { | |
| 128 | - const r = await runProc(["bun", "test"], cwd, TEST_TIMEOUT_MS); | |
| 129 | - // Bun test exits 0 only when all tests pass. | |
| 130 | - return !r.timedOut && r.exitCode === 0; | |
| 131 | -}; | |
| 132 | - | |
| 133 | -// Language-agnostic test-file counter for trace-only mode. Uses git | |
| 134 | -// ls-tree at the given sha so we don't have to checkout the working | |
| 135 | -// tree. Matches conventional test-file naming across ecosystems: | |
| 136 | -// foo.test.ts, foo.spec.ts, FooTests.cs, FooTest.java, test_foo.py, | |
| 137 | -// foo_test.go, FooSpec.scala, foo_spec.rb. | |
| 138 | -const countTestFiles = async (cwd: string, sha: string): Promise<number> => { | |
| 139 | - const r = await runProc(["git", "ls-tree", "-r", "--name-only", sha], cwd, 5000); | |
| 140 | - if (r.exitCode !== 0) return 0; | |
| 141 | - const re = /(?:^|\/)(?:[^/]*\.(?:test|spec)\.[a-z]+|[Tt]ests?\/[^/]+|test_[^/]+|[^/]+_test\.[a-z]+|[^/]+[Tt]ests?\.cs|[^/]+[Tt]est\.java)$/; | |
| 142 | - let count = 0; | |
| 143 | - for (const line of r.stdout.split("\n")) { | |
| 144 | - if (re.test(line)) count++; | |
| 145 | - } | |
| 146 | - return count; | |
| 147 | -}; | |
| 148 | - | |
| 149 | -// Count `test(` / `it(` calls in tracked *.test.ts files. Used to detect | |
| 150 | -// when an agent deletes tests between red and green to make a regression | |
| 151 | -// "pass" — a cardinal TDD sin per the kata spec. | |
| 152 | -const countTests = async (cwd: string): Promise<number> => { | |
| 153 | - const r = await runProc(["git", "ls-files", "*.test.ts"], cwd, 5000); | |
| 154 | - if (r.exitCode !== 0) return 0; | |
| 155 | - const files = r.stdout.split("\n").filter((f) => f && !f.includes("__hidden_")); | |
| 156 | - let count = 0; | |
| 157 | - for (const f of files) { | |
| 158 | - const content = await Bun.file(join(cwd, f)) | |
| 159 | - .text() | |
| 160 | - .catch(() => ""); | |
| 161 | - const matches = content.match(/\b(?:test|it)\s*\(/g); | |
| 162 | - if (matches) count += matches.length; | |
| 163 | - } | |
| 164 | - return count; | |
| 165 | -}; | |
| 166 | - | |
| 167 | -// Runs the kata's authoritative tests against the agent's implementation | |
| 168 | -// at whatever commit is currently checked out. Copies the hidden test | |
| 169 | -// file into the working tree under a __hidden__ prefix so it doesn't | |
| 170 | -// collide with the agent's filenames, runs only that file, then deletes | |
| 171 | -// it. Returns null if the kata doesn't have hidden tests for this step. | |
| 172 | -const runHiddenTests = async (cwd: string, spec: Game, stepId: string): Promise<boolean | null> => { | |
| 173 | - const stepDef = spec.steps.find((s) => s.id === stepId); | |
| 174 | - if (!stepDef) return null; | |
| 175 | - const sourcePath = `./content/games/${spec.id}/${stepDef.hiddenTestFile}`; | |
| 176 | - const sourceFile = Bun.file(sourcePath); | |
| 177 | - if (!(await sourceFile.exists())) return null; | |
| 178 | - const content = await sourceFile.text(); | |
| 179 | - const targetName = `__hidden_${stepId}__.test.ts`; | |
| 180 | - const targetPath = join(cwd, targetName); | |
| 181 | - await Bun.write(targetPath, content); | |
| 182 | - try { | |
| 183 | - const r = await runProc(["bun", "test", targetName], cwd, TEST_TIMEOUT_MS); | |
| 184 | - return !r.timedOut && r.exitCode === 0; | |
| 185 | - } finally { | |
| 186 | - try { | |
| 187 | - rmSync(targetPath, { force: true }); | |
| 188 | - } catch { | |
| 189 | - // best effort | |
| 190 | - } | |
| 191 | - } | |
| 192 | -}; | |
| 193 | - | |
| 194 | -interface CommitInfo { | |
| 195 | - sha: string; | |
| 196 | - phase: Phase; | |
| 197 | - step: string | null; | |
| 198 | -} | |
| 199 | - | |
| 200 | -const readCommits = async (cwd: string): Promise<CommitInfo[]> => { | |
| 201 | - const r = await runProc(["git", "log", "--reverse", "--pretty=format:%H%x1f%B%x1e"], cwd, 10000); | |
| 202 | - if (r.exitCode !== 0) return []; | |
| 203 | - const out: CommitInfo[] = []; | |
| 204 | - for (const block of r.stdout.split("\x1e")) { | |
| 205 | - const t = block.trim(); | |
| 206 | - if (!t) continue; | |
| 207 | - const [sha, message = ""] = t.split("\x1f"); | |
| 208 | - if (!sha) continue; | |
| 209 | - const p = parseCommit(message); | |
| 210 | - out.push({ sha, phase: p.phase, step: p.step }); | |
| 211 | - } | |
| 212 | - return out; | |
| 213 | -}; | |
| 214 | - | |
| 215 | -export const judge = async (owner: string, repo: string): Promise<Verdict> => { | |
| 216 | - const cwd = mkdtempSync(join(tmpdir(), `judge-${owner}-${repo}-`)); | |
| 217 | - try { | |
| 218 | - // Agent repos default to private. Authenticate via admin token in | |
| 219 | - // an http.extraheader so the token isn't persisted in the cloned | |
| 220 | - // repo's config (extraheader applies to the clone request only). | |
| 221 | - const cloneUrl = `${FORGEJO_INTERNAL}/${owner}/${repo}.git`; | |
| 222 | - const adminToken = process.env.FORGEJO_ADMIN_TOKEN; | |
| 223 | - const gitArgs = adminToken | |
| 224 | - ? ["-c", `http.extraheader=Authorization: token ${adminToken}`, "clone", "--quiet", cloneUrl, "."] | |
| 225 | - : ["clone", "--quiet", cloneUrl, "."]; | |
| 226 | - const cloneR = await runProc(["git", ...gitArgs], cwd, 30000); | |
| 227 | - if (cloneR.exitCode !== 0) { | |
| 228 | - throw new Error(`clone failed: ${cloneR.stderr || cloneR.stdout}`); | |
| 229 | - } | |
| 230 | - | |
| 231 | - const commits = await readCommits(cwd); | |
| 232 | - const headR = await runProc(["git", "rev-parse", "HEAD"], cwd, 5000); | |
| 233 | - const headSha = headR.stdout; | |
| 234 | - | |
| 235 | - // First red per step + first green-after-red per step (chronological). | |
| 236 | - const stepRed = new Map<string, string>(); | |
| 237 | - const stepGreen = new Map<string, string>(); | |
| 238 | - for (const c of commits) { | |
| 239 | - if (!c.step) continue; | |
| 240 | - if (c.phase === "red" && !stepRed.has(c.step)) { | |
| 241 | - stepRed.set(c.step, c.sha); | |
| 242 | - } else if (c.phase === "green" && stepRed.has(c.step) && !stepGreen.has(c.step)) { | |
| 243 | - stepGreen.set(c.step, c.sha); | |
| 244 | - } | |
| 245 | - } | |
| 246 | - | |
| 247 | - // Read the agent's mode + runner preferences from tdd.config.json. | |
| 248 | - const { mode, testRunner } = await readConfig(cwd); | |
| 249 | - | |
| 250 | - // Load the kata's authoritative spec — used to fetch hidden tests | |
| 251 | - // per step. Repos that don't match a known kata get scored on red→green | |
| 252 | - // discipline only (no hidden-test verification). | |
| 253 | - let spec: Game | null = null; | |
| 254 | - try { | |
| 255 | - spec = await loadGame(repo); | |
| 256 | - } catch { | |
| 257 | - spec = null; | |
| 258 | - } | |
| 259 | - | |
| 260 | - const steps: StepVerdict[] = []; | |
| 261 | - for (const [stepId, redSha] of stepRed) { | |
| 262 | - const greenSha = stepGreen.get(stepId) ?? null; | |
| 263 | - | |
| 264 | - if (testRunner === "none") { | |
| 265 | - // Trace-only path: don't checkout, don't run anything. Score | |
| 266 | - // purely from the commit log + a language-agnostic test-file | |
| 267 | - // count via `git ls-tree`. Useful for non-Bun projects. | |
| 268 | - const redFiles = await countTestFiles(cwd, redSha); | |
| 269 | - const greenFiles = greenSha ? await countTestFiles(cwd, greenSha) : redFiles; | |
| 270 | - const filesShrank = greenSha !== null && greenFiles < redFiles; | |
| 271 | - | |
| 272 | - let status: StepVerdict["status"]; | |
| 273 | - let baseDelta = 0; | |
| 274 | - if (greenSha === null) { | |
| 275 | - status = "no-green"; | |
| 276 | - } else if (filesShrank) { | |
| 277 | - status = "trace-tests-shrunk"; | |
| 278 | - baseDelta = -10; | |
| 279 | - } else { | |
| 280 | - status = "trace-verified"; | |
| 281 | - baseDelta = 10; | |
| 282 | - } | |
| 283 | - const scoreDelta = applyMode(baseDelta, mode); | |
| 284 | - const explanation = explainStep({ status, redSha, greenSha, hiddenPassed: null, mode }); | |
| 285 | - steps.push({ | |
| 286 | - stepId, redSha, greenSha, | |
| 287 | - redFailed: null, greenPassed: null, hiddenPassed: null, | |
| 288 | - status, scoreDelta, explanation, | |
| 289 | - }); | |
| 290 | - continue; | |
| 291 | - } | |
| 292 | - | |
| 293 | - await runProc(["git", "checkout", "--quiet", redSha], cwd, 5000); | |
| 294 | - const redTestCount = await countTests(cwd); | |
| 295 | - const redPassed = await runTests(cwd); | |
| 296 | - const redFailed = !redPassed; | |
| 297 | - let greenPassed: boolean | null = null; | |
| 298 | - let hiddenPassed: boolean | null = null; | |
| 299 | - let testsDeleted = false; | |
| 300 | - if (greenSha) { | |
| 301 | - await runProc(["git", "checkout", "--quiet", greenSha], cwd, 5000); | |
| 302 | - const greenTestCount = await countTests(cwd); | |
| 303 | - testsDeleted = greenTestCount < redTestCount; | |
| 304 | - greenPassed = await runTests(cwd); | |
| 305 | - if (greenPassed && spec && !testsDeleted) { | |
| 306 | - hiddenPassed = await runHiddenTests(cwd, spec, stepId); | |
| 307 | - } | |
| 308 | - } | |
| 309 | - | |
| 310 | - let status: StepVerdict["status"]; | |
| 311 | - let baseDelta = 0; | |
| 312 | - if (greenSha === null) { | |
| 313 | - status = "no-green"; | |
| 314 | - } else if (testsDeleted) { | |
| 315 | - status = "test-deleted"; | |
| 316 | - baseDelta = -20; | |
| 317 | - } else if (!redFailed) { | |
| 318 | - status = "red-did-not-fail"; | |
| 319 | - baseDelta = -5; | |
| 320 | - } else if (greenPassed === false) { | |
| 321 | - status = "green-did-not-pass"; | |
| 322 | - baseDelta = -5; | |
| 323 | - } else if (hiddenPassed === false) { | |
| 324 | - status = "hidden-tests-failed"; | |
| 325 | - baseDelta = 0; | |
| 326 | - } else if (hiddenPassed === true) { | |
| 327 | - status = "verified"; | |
| 328 | - baseDelta = 20; | |
| 329 | - } else { | |
| 330 | - status = "discipline-only"; | |
| 331 | - baseDelta = 5; | |
| 332 | - } | |
| 333 | - const scoreDelta = applyMode(baseDelta, mode); | |
| 334 | - const explanation = explainStep({ status, redSha, greenSha, hiddenPassed, mode }); | |
| 335 | - steps.push({ stepId, redSha, greenSha, redFailed, greenPassed, hiddenPassed, status, scoreDelta, explanation }); | |
| 336 | - } | |
| 337 | - | |
| 338 | - // Refactor commits aren't tied to red→green pairs: the spec rewards | |
| 339 | - // any refactor that keeps the existing tests green. A broken refactor | |
| 340 | - // (tests fail at the refactor commit) costs the same as a missed | |
| 341 | - // green — discipline matters even outside red→green pairs. | |
| 342 | - const refactors: RefactorVerdict[] = []; | |
| 343 | - for (const c of commits) { | |
| 344 | - if (c.phase !== "refactor") continue; | |
| 345 | - await runProc(["git", "checkout", "--quiet", c.sha], cwd, 5000); | |
| 346 | - const passed = await runTests(cwd); | |
| 347 | - const baseDelta = passed ? 5 : -5; | |
| 348 | - refactors.push({ | |
| 349 | - sha: c.sha, | |
| 350 | - stepId: c.step, | |
| 351 | - testsPassed: passed, | |
| 352 | - scoreDelta: applyMode(baseDelta, mode), | |
| 353 | - explanation: explainRefactor(passed), | |
| 354 | - }); | |
| 355 | - } | |
| 356 | - | |
| 357 | - const totalScore = | |
| 358 | - steps.reduce((a, s) => a + s.scoreDelta, 0) + | |
| 359 | - refactors.reduce((a, r) => a + r.scoreDelta, 0); | |
| 360 | - const verdict: Verdict = { headSha, mode, steps, refactors, totalScore, judgedAt: Date.now() }; | |
| 361 | - saveRun(owner, repo, verdict); | |
| 362 | - return verdict; | |
| 363 | - } finally { | |
| 364 | - try { | |
| 365 | - rmSync(cwd, { recursive: true, force: true }); | |
| 366 | - } catch { | |
| 367 | - // best effort cleanup | |
| 368 | - } | |
| 369 | - } | |
| 370 | -}; | |
src/projects.ts
+0
−271
| @@ -1,271 +0,0 @@ | ||
| 1 | -import type { ProjectRow } from "./db"; | |
| 2 | - | |
| 3 | -// Project-tracking ingest contract — block 1 of the reporting pipeline. | |
| 4 | -// | |
| 5 | -// A "project" is a real repo whose pushes get scored on TDD discipline. | |
| 6 | -// Distinct from a kata: katas are the practice ground (fixed steps, | |
| 7 | -// hidden tests); projects are production code judged purely structurally. | |
| 8 | -// | |
| 9 | -// Onboarding: a repo opts in by adding `.tdd-md.json` at its root on the | |
| 10 | -// default branch. tdd.md fetches the file (via raw.githubusercontent), | |
| 11 | -// validates it, and registers the project in our SQLite store. Per-commit | |
| 12 | -// judging follows in a later sliver — this module covers config + ingest | |
| 13 | -// of the registration itself. | |
| 14 | - | |
| 15 | -export const PROJECT_CONFIG_PATH = ".tdd-md.json"; | |
| 16 | -export const PROJECT_CONFIG_VERSION = 1; | |
| 17 | - | |
| 18 | -export type TestRunner = "none" | "bun"; | |
| 19 | -export type AgentSlug = "claude-code" | "cursor" | "aider" | "unknown"; | |
| 20 | - | |
| 21 | -export interface ProjectConfig { | |
| 22 | - version: number; | |
| 23 | - // "none" → trace-mode judging only (commit discipline, no test execution). | |
| 24 | - // "bun" → full sandbox-runner judging (later sliver — registration accepts | |
| 25 | - // the value but judging stays trace-only until the runner ships). | |
| 26 | - test_runner: TestRunner; | |
| 27 | - // Branches whose pushes get scored. Defaults to ["main"]. | |
| 28 | - tracked_branches: string[]; | |
| 29 | - // Optional reporting metadata. | |
| 30 | - display_name?: string; | |
| 31 | - team?: string; | |
| 32 | -} | |
| 33 | - | |
| 34 | -export const DEFAULT_CONFIG: ProjectConfig = { | |
| 35 | - version: PROJECT_CONFIG_VERSION, | |
| 36 | - test_runner: "none", | |
| 37 | - tracked_branches: ["main"], | |
| 38 | -}; | |
| 39 | - | |
| 40 | -// Validates and normalises a parsed JSON blob into a ProjectConfig. | |
| 41 | -// Throws with a human-readable message on failure — those messages are | |
| 42 | -// surfaced verbatim to the registering user, so they need to be useful. | |
| 43 | -export const parseProjectConfig = (raw: unknown): ProjectConfig => { | |
| 44 | - if (!raw || typeof raw !== "object") { | |
| 45 | - throw new Error(".tdd-md.json must be a JSON object"); | |
| 46 | - } | |
| 47 | - const obj = raw as Record<string, unknown>; | |
| 48 | - const version = obj.version; | |
| 49 | - if (typeof version !== "number" || version !== PROJECT_CONFIG_VERSION) { | |
| 50 | - throw new Error( | |
| 51 | - `.tdd-md.json has version ${JSON.stringify(version)}; expected ${PROJECT_CONFIG_VERSION}`, | |
| 52 | - ); | |
| 53 | - } | |
| 54 | - let testRunner: TestRunner = "none"; | |
| 55 | - if (obj.test_runner !== undefined) { | |
| 56 | - if (obj.test_runner !== "none" && obj.test_runner !== "bun") { | |
| 57 | - throw new Error( | |
| 58 | - `.tdd-md.json: test_runner must be "none" or "bun" (got ${JSON.stringify(obj.test_runner)})`, | |
| 59 | - ); | |
| 60 | - } | |
| 61 | - testRunner = obj.test_runner; | |
| 62 | - } | |
| 63 | - let trackedBranches: string[] = ["main"]; | |
| 64 | - if (obj.tracked_branches !== undefined) { | |
| 65 | - if (!Array.isArray(obj.tracked_branches) || obj.tracked_branches.some((b) => typeof b !== "string" || !b)) { | |
| 66 | - throw new Error(".tdd-md.json: tracked_branches must be a non-empty array of branch names"); | |
| 67 | - } | |
| 68 | - trackedBranches = obj.tracked_branches as string[]; | |
| 69 | - } | |
| 70 | - const config: ProjectConfig = { | |
| 71 | - version, | |
| 72 | - test_runner: testRunner, | |
| 73 | - tracked_branches: trackedBranches, | |
| 74 | - }; | |
| 75 | - if (typeof obj.display_name === "string" && obj.display_name) { | |
| 76 | - config.display_name = obj.display_name; | |
| 77 | - } | |
| 78 | - if (typeof obj.team === "string" && obj.team) { | |
| 79 | - config.team = obj.team; | |
| 80 | - } | |
| 81 | - return config; | |
| 82 | -}; | |
| 83 | - | |
| 84 | -// Pulls .tdd-md.json from a public GitHub repo's default branch via the | |
| 85 | -// raw-content host. No auth — public-repo only for now (private repos | |
| 86 | -// land when we install a GitHub App, deferred to a later sliver). | |
| 87 | -export const fetchProjectConfig = async ( | |
| 88 | - repoOwner: string, | |
| 89 | - repoName: string, | |
| 90 | -): Promise<ProjectConfig> => { | |
| 91 | - const url = `https://raw.githubusercontent.com/${encodeURIComponent(repoOwner)}/${encodeURIComponent(repoName)}/HEAD/${PROJECT_CONFIG_PATH}`; | |
| 92 | - const res = await fetch(url, { | |
| 93 | - headers: { Accept: "application/json", "User-Agent": "tdd.md" }, | |
| 94 | - }); | |
| 95 | - if (res.status === 404) { | |
| 96 | - throw new Error( | |
| 97 | - `${PROJECT_CONFIG_PATH} not found in ${repoOwner}/${repoName} on the default branch (or the repo is private; private repos aren't supported yet).`, | |
| 98 | - ); | |
| 99 | - } | |
| 100 | - if (!res.ok) { | |
| 101 | - throw new Error( | |
| 102 | - `Couldn't fetch ${PROJECT_CONFIG_PATH} from ${repoOwner}/${repoName}: HTTP ${res.status}`, | |
| 103 | - ); | |
| 104 | - } | |
| 105 | - let parsed: unknown; | |
| 106 | - try { | |
| 107 | - parsed = await res.json(); | |
| 108 | - } catch { | |
| 109 | - throw new Error(`${PROJECT_CONFIG_PATH} in ${repoOwner}/${repoName} isn't valid JSON`); | |
| 110 | - } | |
| 111 | - return parseProjectConfig(parsed); | |
| 112 | -}; | |
| 113 | - | |
| 114 | -// Parse a GitHub repo URL or owner/repo shorthand. Accepts: | |
| 115 | -// https://github.com/syntaxai/tdd.md | |
| 116 | -// https://github.com/syntaxai/tdd.md.git | |
| 117 | -// github.com/syntaxai/tdd.md | |
| 118 | -// syntaxai/tdd.md | |
| 119 | -// Returns the owner + repo or throws with a precise message. | |
| 120 | -export const parseRepoIdentifier = (raw: string): { owner: string; repo: string } => { | |
| 121 | - const trimmed = raw.trim(); | |
| 122 | - if (!trimmed) throw new Error("Repository URL is required."); | |
| 123 | - let path = trimmed; | |
| 124 | - const httpsMatch = path.match(/^https?:\/\/(?:www\.)?github\.com\/(.+)$/i); | |
| 125 | - if (httpsMatch?.[1]) path = httpsMatch[1]; | |
| 126 | - const bareMatch = path.match(/^github\.com\/(.+)$/i); | |
| 127 | - if (bareMatch?.[1]) path = bareMatch[1]; | |
| 128 | - path = path.replace(/\.git$/i, "").replace(/\/+$/, ""); | |
| 129 | - const parts = path.split("/").filter(Boolean); | |
| 130 | - const owner = parts[0]; | |
| 131 | - const repo = parts[1]; | |
| 132 | - if (parts.length !== 2 || !owner || !repo) { | |
| 133 | - throw new Error( | |
| 134 | - `Couldn't parse "${raw}" as a GitHub repo. Use a URL like https://github.com/owner/name or the shorthand owner/name.`, | |
| 135 | - ); | |
| 136 | - } | |
| 137 | - if (!/^[A-Za-z0-9._-]+$/.test(owner) || !/^[A-Za-z0-9._-]+$/.test(repo)) { | |
| 138 | - throw new Error(`"${raw}" contains characters that aren't valid for a GitHub owner/repo.`); | |
| 139 | - } | |
| 140 | - return { owner, repo }; | |
| 141 | -}; | |
| 142 | - | |
| 143 | -const escape = (s: string): string => | |
| 144 | - s.replace(/&/g, "&").replace(/"/g, """).replace(/</g, "<").replace(/>/g, ">"); | |
| 145 | - | |
| 146 | -const projectListRow = (p: ProjectRow): string => { | |
| 147 | - const slug = `${p.repoOwner}/${p.repoName}`; | |
| 148 | - const display = p.displayName ?? slug; | |
| 149 | - const team = p.team ? ` <span class="muted">· ${escape(p.team)}</span>` : ""; | |
| 150 | - const branches = p.trackedBranches.map((b) => `\`${b}\``).join(", "); | |
| 151 | - const runner = p.testRunner === "none" ? "trace-only" : p.testRunner; | |
| 152 | - return `| [${escape(display)}](/projects/${p.repoOwner}/${p.repoName}) ${team} | ${branches} | ${runner} |`; | |
| 153 | -}; | |
| 154 | - | |
| 155 | -export const projectsLandingMd = (projects: ProjectRow[]): string => { | |
| 156 | - const rows = projects.length === 0 | |
| 157 | - ? `| _no projects yet — [register one](/projects/new)_ | | |` | |
| 158 | - : projects.map(projectListRow).join("\n"); | |
| 159 | - return `# projects | |
| 160 | - | |
| 161 | -> Real repos that opted in to tdd.md scoring. Each project drops \`${PROJECT_CONFIG_PATH}\` at its root, registers here, and from then on its commits on tracked branches get judged structurally — red-fails, green-passes, no test-deletion, no regression. The aggregated scores feed [the reports](/reports). | |
| 162 | - | |
| 163 | -## tracked | |
| 164 | - | |
| 165 | -| project | branches | runner | | |
| 166 | -|---|---|---| | |
| 167 | -${rows} | |
| 168 | - | |
| 169 | -## register a repo | |
| 170 | - | |
| 171 | -[Register a project →](/projects/new) — paste a public GitHub URL; tdd.md fetches \`${PROJECT_CONFIG_PATH}\` from the default branch and onboards it. | |
| 172 | - | |
| 173 | -## the config file | |
| 174 | - | |
| 175 | -Drop \`${PROJECT_CONFIG_PATH}\` at the root of your repo's default branch: | |
| 176 | - | |
| 177 | -\`\`\`json | |
| 178 | -{ | |
| 179 | - "version": 1, | |
| 180 | - "test_runner": "none", | |
| 181 | - "tracked_branches": ["main"], | |
| 182 | - "display_name": "API Gateway", | |
| 183 | - "team": "platform" | |
| 184 | -} | |
| 185 | -\`\`\` | |
| 186 | - | |
| 187 | -- **\`test_runner\`** — \`"none"\` for trace-mode (commit-discipline only, language-agnostic). \`"bun"\` will run the test suite once the sandbox-runner ships. | |
| 188 | -- **\`tracked_branches\`** — pushes to these branches get scored. Defaults to \`["main"]\`. | |
| 189 | -- **\`display_name\`** / **\`team\`** — optional, only used in the reporting UI. | |
| 190 | - | |
| 191 | -## what comes next | |
| 192 | - | |
| 193 | -Registration just stores the project. Per-commit judging (the part that produces score data for the reports) lands in the next sliver — until then the [report pages](/reports) keep showing the demo dataset. | |
| 194 | - | |
| 195 | -[← back to tdd.md](/) · [the reports](/reports) | |
| 196 | -`; | |
| 197 | -}; | |
| 198 | - | |
| 199 | -export const projectRegisterMd = ( | |
| 200 | - viewer: string | null, | |
| 201 | - prefilled?: string, | |
| 202 | - errorMessage?: string, | |
| 203 | -): string => { | |
| 204 | - if (!viewer) { | |
| 205 | - return `# register a project | |
| 206 | - | |
| 207 | -> You need to sign in before registering a project. We use your GitHub identity to record who onboarded the repo. | |
| 208 | - | |
| 209 | -[ sign in with github → ](/auth/github/start) | |
| 210 | - | |
| 211 | -[← all projects](/projects) | |
| 212 | -`; | |
| 213 | - } | |
| 214 | - const error = errorMessage | |
| 215 | - ? `<div class="project-form-error"><strong>Couldn't register that repo:</strong><br>${escape(errorMessage)}</div>` | |
| 216 | - : ""; | |
| 217 | - const value = prefilled ? ` value="${escape(prefilled)}"` : ""; | |
| 218 | - return `# register a project | |
| 219 | - | |
| 220 | -> Paste a public GitHub URL. tdd.md fetches \`${PROJECT_CONFIG_PATH}\` from its default branch, validates it, and onboards the repo. Re-register the same repo to refresh the config. | |
| 221 | - | |
| 222 | -${error} | |
| 223 | - | |
| 224 | -<form method="post" action="/projects/new" class="project-form"> | |
| 225 | - <label for="repo-url">Repository URL or <code>owner/name</code></label> | |
| 226 | - <input id="repo-url" name="repo" type="text" required | |
| 227 | - placeholder="https://github.com/owner/name" | |
| 228 | - autocomplete="off" autocapitalize="off" autocorrect="off"${value} /> | |
| 229 | - <button type="submit">Register</button> | |
| 230 | -</form> | |
| 231 | - | |
| 232 | -> Signed in as <code>${escape(viewer)}</code>. Don't have \`${PROJECT_CONFIG_PATH}\` yet? [See the format on /projects](/projects#the-config-file). | |
| 233 | - | |
| 234 | -[← all projects](/projects) | |
| 235 | -`; | |
| 236 | -}; | |
| 237 | - | |
| 238 | -export const projectDetailMd = (p: ProjectRow): string => { | |
| 239 | - const display = p.displayName ?? `${p.repoOwner}/${p.repoName}`; | |
| 240 | - const registeredAt = new Date(p.registeredAt).toISOString().slice(0, 10); | |
| 241 | - const branches = p.trackedBranches.map((b) => `\`${b}\``).join(", "); | |
| 242 | - const runnerNote = p.testRunner === "none" | |
| 243 | - ? "Trace-mode — judging looks at commit phase tags, test-count drift, and refactor stability. No test execution." | |
| 244 | - : "Bun runner — test suite executes in a sandbox at every tracked-branch commit. (Sandbox-runner ships in the next sliver; meanwhile this falls back to trace-mode.)"; | |
| 245 | - return `# ${escape(display)} | |
| 246 | - | |
| 247 | -> [${escape(p.repoOwner)}/${escape(p.repoName)}](https://github.com/${p.repoOwner}/${p.repoName}) · registered by [${escape(p.registeredBy)}](/agents/${p.registeredBy}) on ${registeredAt}. | |
| 248 | - | |
| 249 | -## config | |
| 250 | - | |
| 251 | -| key | value | | |
| 252 | -|---|---| | |
| 253 | -| test_runner | \`${p.testRunner}\` | | |
| 254 | -| tracked_branches | ${branches} | | |
| 255 | -| display_name | ${p.displayName ? `\`${escape(p.displayName)}\`` : "_(none)_"} | | |
| 256 | -| team | ${p.team ? `\`${escape(p.team)}\`` : "_(none)_"} | | |
| 257 | -| status | \`${p.status}\` | | |
| 258 | - | |
| 259 | -${runnerNote} | |
| 260 | - | |
| 261 | -## scored commits | |
| 262 | - | |
| 263 | -> _No commits judged yet._ The webhook ingest + judging pipeline lands in the next sliver — once it does, scored commits for tracked branches will appear here grouped by agent. | |
| 264 | - | |
| 265 | -## refresh | |
| 266 | - | |
| 267 | -Push an updated \`${PROJECT_CONFIG_PATH}\` to your default branch and [re-register](/projects/new?repo=${encodeURIComponent(`${p.repoOwner}/${p.repoName}`)}) to pick up the new config. | |
| 268 | - | |
| 269 | -[← all projects](/projects) | |
| 270 | -`; | |
| 271 | -}; | |
src/render.ts
+0
−76
| @@ -1,76 +0,0 @@ | ||
| 1 | -import { marked } from "marked"; | |
| 2 | - | |
| 3 | -const STYLE_CSS = "./public/style.css"; | |
| 4 | -const css = await Bun.file(STYLE_CSS).text(); | |
| 5 | - | |
| 6 | -export type Section = "home" | "games" | "guides" | "blog" | "agents" | "leaderboard"; | |
| 7 | - | |
| 8 | -export interface PageOptions { | |
| 9 | - title: string; | |
| 10 | - bodyMarkdown: string; | |
| 11 | - description?: string; | |
| 12 | - ogPath?: string; | |
| 13 | - active?: Section; | |
| 14 | - noindex?: boolean; | |
| 15 | - jsonLd?: Record<string, unknown>; | |
| 16 | -} | |
| 17 | - | |
| 18 | -const SITE_DESCRIPTION = "Test-driven development for agentic coding. Scored katas, public verdicts."; | |
| 19 | - | |
| 20 | -const escape = (s: string): string => | |
| 21 | - s.replace(/&/g, "&").replace(/"/g, """).replace(/</g, "<").replace(/>/g, ">"); | |
| 22 | - | |
| 23 | -const navLink = (href: string, label: string, active: boolean): string => { | |
| 24 | - const cls = active ? ' class="nav-active"' : ""; | |
| 25 | - return `<a href="${href}"${cls}>${label}</a>`; | |
| 26 | -}; | |
| 27 | - | |
| 28 | -const nav = (active?: Section): string => `<nav class="md-nav">${navLink("/", "tdd.md", active === "home")} <span class="md-nav-sep">·</span> ${navLink("/games", "games", active === "games")} <span class="md-nav-sep">·</span> ${navLink("/guides", "guides", active === "guides")} <span class="md-nav-sep">·</span> ${navLink("/blog", "blog", active === "blog")} <span class="md-nav-sep">·</span> ${navLink("/agents", "agents", active === "agents")} <span class="md-nav-sep">·</span> ${navLink("/leaderboard", "leaderboard", active === "leaderboard")}</nav>`; | |
| 29 | - | |
| 30 | -export const renderPage = async (opts: PageOptions): Promise<string> => { | |
| 31 | - const body = await marked.parse(opts.bodyMarkdown, { gfm: true, breaks: false }); | |
| 32 | - const description = opts.description ?? SITE_DESCRIPTION; | |
| 33 | - const ogPath = opts.ogPath ?? "https://tdd.md"; | |
| 34 | - const robots = opts.noindex ? `<meta name="robots" content="noindex,nofollow">\n` : ""; | |
| 35 | - const jsonLd = opts.jsonLd | |
| 36 | - ? `<script type="application/ld+json">${JSON.stringify(opts.jsonLd)}</script>\n` | |
| 37 | - : ""; | |
| 38 | - return `<!doctype html> | |
| 39 | -<html lang="en"> | |
| 40 | -<head> | |
| 41 | -<meta charset="utf-8"> | |
| 42 | -<meta name="viewport" content="width=device-width,initial-scale=1"> | |
| 43 | -<meta name="color-scheme" content="dark light"> | |
| 44 | -<meta name="description" content="${escape(description)}"> | |
| 45 | -${robots}<link rel="canonical" href="${escape(ogPath)}"> | |
| 46 | -<meta property="og:title" content="${escape(opts.title)}"> | |
| 47 | -<meta property="og:description" content="${escape(description)}"> | |
| 48 | -<meta property="og:type" content="website"> | |
| 49 | -<meta property="og:url" content="${escape(ogPath)}"> | |
| 50 | -<meta property="og:image" content="https://tdd.md/og.svg"> | |
| 51 | -<meta property="og:image:type" content="image/svg+xml"> | |
| 52 | -<meta property="og:image:width" content="1200"> | |
| 53 | -<meta property="og:image:height" content="630"> | |
| 54 | -<meta property="og:site_name" content="tdd.md"> | |
| 55 | -<meta name="twitter:card" content="summary_large_image"> | |
| 56 | -<meta name="twitter:title" content="${escape(opts.title)}"> | |
| 57 | -<meta name="twitter:description" content="${escape(description)}"> | |
| 58 | -<meta name="twitter:image" content="https://tdd.md/og.svg"> | |
| 59 | -<title>${escape(opts.title)}</title> | |
| 60 | -${jsonLd}<style>${css}</style> | |
| 61 | -</head> | |
| 62 | -<body> | |
| 63 | -${nav(opts.active)} | |
| 64 | -<main class="md"> | |
| 65 | -${body} | |
| 66 | -</main> | |
| 67 | -</body> | |
| 68 | -</html>`; | |
| 69 | -}; | |
| 70 | - | |
| 71 | -export const renderNotFound = async (path: string): Promise<string> => | |
| 72 | - renderPage({ | |
| 73 | - title: "404 — tdd.md", | |
| 74 | - bodyMarkdown: `# 404\n\n> No such path: \`${path}\`\n\nTry [home](/), [games](/games), [agents](/agents), or [leaderboard](/leaderboard).`, | |
| 75 | - noindex: true, | |
| 76 | - }); | |
src/reports.ts
+0
−476
| @@ -1,476 +0,0 @@ | ||
| 1 | -// Mockup reporting layer for tdd.md. | |
| 2 | -// | |
| 3 | -// All data here is FAKE — wired up only so the management/exec view and | |
| 4 | -// per-agent drill-down can be designed in the browser before the real | |
| 5 | -// project-tracking pipeline (block 1) exists. | |
| 6 | -// | |
| 7 | -// Real reporting needs: | |
| 8 | -// - GitHub App / webhook ingest of pushes on tracked branches | |
| 9 | -// - per-commit judging without hidden tests (red-fails / green-passes / | |
| 10 | -// no-test-deletion / no-regression) | |
| 11 | -// - agent attribution (commit footer convention or wrapper-driven) | |
| 12 | -// Once that exists, the same generators in this file accept real data. | |
| 13 | - | |
| 14 | -interface RecentFlagged { | |
| 15 | - date: string; | |
| 16 | - repo: string; | |
| 17 | - sha: string; | |
| 18 | - phase: "red" | "green" | "refactor"; | |
| 19 | - failure: string; | |
| 20 | - pts: number; | |
| 21 | -} | |
| 22 | - | |
| 23 | -interface FailureSlice { | |
| 24 | - label: string; | |
| 25 | - pct: number; | |
| 26 | - tone: "red" | "green" | "muted" | "accent"; | |
| 27 | -} | |
| 28 | - | |
| 29 | -export interface AgentReport { | |
| 30 | - slug: "claude-code" | "cursor" | "aider"; | |
| 31 | - name: string; | |
| 32 | - score: number; | |
| 33 | - delta: number; | |
| 34 | - commits: number; | |
| 35 | - phaseCoveragePct: number; | |
| 36 | - streak: number; | |
| 37 | - streakBroken: boolean; | |
| 38 | - topIssueLabel: string; | |
| 39 | - topIssuePct: number; | |
| 40 | - failureMix: FailureSlice[]; | |
| 41 | - trend: number[]; | |
| 42 | - recent: RecentFlagged[]; | |
| 43 | -} | |
| 44 | - | |
| 45 | -export const DEMO_PERIOD = "2026-01-01 → 2026-03-31"; | |
| 46 | -export const DEMO_ORG = "acme-corp"; | |
| 47 | -export const DEMO_REPOS = 4; | |
| 48 | - | |
| 49 | -interface TestFailure { | |
| 50 | - test: string; | |
| 51 | - since: string; | |
| 52 | - flaky?: boolean; | |
| 53 | -} | |
| 54 | - | |
| 55 | -interface TestSnapshot { | |
| 56 | - repo: string; | |
| 57 | - branch: string; | |
| 58 | - total: number; | |
| 59 | - passing: number; | |
| 60 | - failing: number; | |
| 61 | - failures: TestFailure[]; | |
| 62 | -} | |
| 63 | - | |
| 64 | -interface TestStability { | |
| 65 | - test: string; | |
| 66 | - repo: string; | |
| 67 | - pass: number; | |
| 68 | - fail: number; | |
| 69 | - deleted: number; | |
| 70 | - lastBrokenBy: AgentReport["slug"]; | |
| 71 | - flagged?: boolean; | |
| 72 | -} | |
| 73 | - | |
| 74 | -export const DEMO_SNAPSHOTS: TestSnapshot[] = [ | |
| 75 | - { | |
| 76 | - repo: "api-gateway", | |
| 77 | - branch: "main", | |
| 78 | - total: 247, | |
| 79 | - passing: 245, | |
| 80 | - failing: 2, | |
| 81 | - failures: [ | |
| 82 | - { test: "rate-limit.spec.ts > resets at midnight UTC", since: "2026-03-26" }, | |
| 83 | - { test: "webhook.spec.ts > retries on 5xx with backoff", since: "2026-03-28" }, | |
| 84 | - ], | |
| 85 | - }, | |
| 86 | - { | |
| 87 | - repo: "billing-service", | |
| 88 | - branch: "main", | |
| 89 | - total: 89, | |
| 90 | - passing: 89, | |
| 91 | - failing: 0, | |
| 92 | - failures: [], | |
| 93 | - }, | |
| 94 | - { | |
| 95 | - repo: "data-pipeline", | |
| 96 | - branch: "main", | |
| 97 | - total: 156, | |
| 98 | - passing: 154, | |
| 99 | - failing: 2, | |
| 100 | - failures: [ | |
| 101 | - { test: "ingest.spec.ts > handles malformed CSV row", since: "2026-03-22" }, | |
| 102 | - { test: "ingest.spec.ts > deduplicates by hash", since: "2026-03-22" }, | |
| 103 | - ], | |
| 104 | - }, | |
| 105 | - { | |
| 106 | - repo: "frontend-web", | |
| 107 | - branch: "main", | |
| 108 | - total: 312, | |
| 109 | - passing: 310, | |
| 110 | - failing: 2, | |
| 111 | - failures: [ | |
| 112 | - { test: "checkout.spec.ts > handles network timeout", since: "2026-03-15", flaky: true }, | |
| 113 | - { test: "login.spec.ts > redirects after auth", since: "2026-03-11", flaky: true }, | |
| 114 | - ], | |
| 115 | - }, | |
| 116 | -]; | |
| 117 | - | |
| 118 | -export const DEMO_STABILITY: TestStability[] = [ | |
| 119 | - { test: "webhook.spec.ts > retries on 5xx with backoff", repo: "api-gateway", pass: 33, fail: 11, deleted: 0, lastBrokenBy: "cursor", flagged: true }, | |
| 120 | - { test: "checkout.spec.ts > handles network timeout", repo: "frontend-web", pass: 51, fail: 8, deleted: 0, lastBrokenBy: "cursor", flagged: true }, | |
| 121 | - { test: "rate-limit.spec.ts > resets at midnight UTC", repo: "api-gateway", pass: 42, fail: 6, deleted: 0, lastBrokenBy: "claude-code" }, | |
| 122 | - { test: "login.spec.ts > redirects after auth", repo: "frontend-web", pass: 44, fail: 5, deleted: 1, lastBrokenBy: "cursor", flagged: true }, | |
| 123 | - { test: "ingest.spec.ts > handles malformed CSV row", repo: "data-pipeline", pass: 38, fail: 4, deleted: 0, lastBrokenBy: "aider" }, | |
| 124 | - { test: "auth.spec.ts > validates JWT signature", repo: "api-gateway", pass: 47, fail: 3, deleted: 0, lastBrokenBy: "claude-code" }, | |
| 125 | - { test: "ingest.spec.ts > deduplicates by hash", repo: "data-pipeline", pass: 30, fail: 3, deleted: 0, lastBrokenBy: "aider" }, | |
| 126 | - { test: "billing.spec.ts > applies tax bracket", repo: "billing-service", pass: 29, fail: 2, deleted: 0, lastBrokenBy: "claude-code" }, | |
| 127 | - { test: "webhook.spec.ts > signs payload with HMAC", repo: "api-gateway", pass: 35, fail: 2, deleted: 1, lastBrokenBy: "cursor", flagged: true }, | |
| 128 | - { test: "billing.spec.ts > computes monthly total", repo: "billing-service", pass: 28, fail: 1, deleted: 1, lastBrokenBy: "cursor", flagged: true }, | |
| 129 | - { test: "invoice.spec.ts > generates PDF receipt", repo: "billing-service", pass: 25, fail: 1, deleted: 0, lastBrokenBy: "claude-code" }, | |
| 130 | - { test: "pricing.spec.ts > rounds to nearest cent", repo: "billing-service", pass: 26, fail: 1, deleted: 0, lastBrokenBy: "aider" }, | |
| 131 | -]; | |
| 132 | - | |
| 133 | -export const DEMO_REPORTS: AgentReport[] = [ | |
| 134 | - { | |
| 135 | - slug: "claude-code", | |
| 136 | - name: "Claude Code", | |
| 137 | - score: 78, | |
| 138 | - delta: +6, | |
| 139 | - commits: 612, | |
| 140 | - phaseCoveragePct: 92, | |
| 141 | - streak: 47, | |
| 142 | - streakBroken: false, | |
| 143 | - topIssueLabel: "red-did-not-fail", | |
| 144 | - topIssuePct: 8, | |
| 145 | - failureMix: [ | |
| 146 | - { label: "clean cycles", pct: 84, tone: "green" }, | |
| 147 | - { label: "red-did-not-fail", pct: 8, tone: "red" }, | |
| 148 | - { label: "broken refactor", pct: 4, tone: "red" }, | |
| 149 | - { label: "test-deleted", pct: 2, tone: "red" }, | |
| 150 | - { label: "no phase tag", pct: 2, tone: "muted" }, | |
| 151 | - ], | |
| 152 | - trend: [72, 73, 71, 74, 72, 75, 73, 75, 77, 76, 75, 76, 78, 77, 79, 78, 77, 79, 80, 78, 79, 80, 79, 81, 80, 82, 81, 80, 79, 78], | |
| 153 | - recent: [ | |
| 154 | - { date: "2026-03-29", repo: "api-gateway", sha: "f1c8b3a", phase: "red", failure: "red-did-not-fail", pts: -5 }, | |
| 155 | - { date: "2026-03-24", repo: "billing-service", sha: "9d2e1f4", phase: "refactor", failure: "broken refactor", pts: -5 }, | |
| 156 | - { date: "2026-03-18", repo: "data-pipeline", sha: "62a9cb7", phase: "green", failure: "no phase tag (parent)", pts: 0 }, | |
| 157 | - ], | |
| 158 | - }, | |
| 159 | - { | |
| 160 | - slug: "cursor", | |
| 161 | - name: "Cursor", | |
| 162 | - score: 54, | |
| 163 | - delta: -15, | |
| 164 | - commits: 489, | |
| 165 | - phaseCoveragePct: 71, | |
| 166 | - streak: 3, | |
| 167 | - streakBroken: true, | |
| 168 | - topIssueLabel: "test-deleted in refactor", | |
| 169 | - topIssuePct: 14, | |
| 170 | - failureMix: [ | |
| 171 | - { label: "clean cycles", pct: 64, tone: "green" }, | |
| 172 | - { label: "test-deleted", pct: 14, tone: "red" }, | |
| 173 | - { label: "red-did-not-fail", pct: 9, tone: "red" }, | |
| 174 | - { label: "broken refactor", pct: 7, tone: "red" }, | |
| 175 | - { label: "no phase tag", pct: 6, tone: "muted" }, | |
| 176 | - ], | |
| 177 | - trend: [69, 70, 71, 72, 70, 71, 72, 73, 72, 71, 72, 70, 68, 65, 60, 55, 50, 52, 54, 53, 56, 54, 52, 55, 53, 54, 56, 55, 54, 54], | |
| 178 | - recent: [ | |
| 179 | - { date: "2026-03-28", repo: "api-gateway", sha: "a1b2c3d", phase: "refactor", failure: "test-deleted", pts: -20 }, | |
| 180 | - { date: "2026-03-26", repo: "api-gateway", sha: "4e5f6a7", phase: "green", failure: "broken refactor", pts: -5 }, | |
| 181 | - { date: "2026-03-23", repo: "billing-service", sha: "8b9c0d1", phase: "red", failure: "red-did-not-fail", pts: -5 }, | |
| 182 | - { date: "2026-03-21", repo: "api-gateway", sha: "2e3f4a5", phase: "refactor", failure: "test-deleted", pts: -20 }, | |
| 183 | - { date: "2026-03-19", repo: "data-pipeline", sha: "6b7c8d9", phase: "refactor", failure: "broken refactor", pts: -5 }, | |
| 184 | - ], | |
| 185 | - }, | |
| 186 | - { | |
| 187 | - slug: "aider", | |
| 188 | - name: "Aider", | |
| 189 | - score: 89, | |
| 190 | - delta: +2, | |
| 191 | - commits: 146, | |
| 192 | - phaseCoveragePct: 96, | |
| 193 | - streak: 89, | |
| 194 | - streakBroken: false, | |
| 195 | - topIssueLabel: "broken refactor", | |
| 196 | - topIssuePct: 3, | |
| 197 | - failureMix: [ | |
| 198 | - { label: "clean cycles", pct: 94, tone: "green" }, | |
| 199 | - { label: "broken refactor", pct: 3, tone: "red" }, | |
| 200 | - { label: "red-did-not-fail", pct: 2, tone: "red" }, | |
| 201 | - { label: "no phase tag", pct: 1, tone: "muted" }, | |
| 202 | - ], | |
| 203 | - trend: [87, 88, 89, 88, 87, 89, 90, 89, 88, 89, 90, 88, 89, 90, 91, 89, 88, 89, 90, 89, 90, 91, 89, 88, 89, 90, 89, 90, 89, 89], | |
| 204 | - recent: [ | |
| 205 | - { date: "2026-03-27", repo: "data-pipeline", sha: "3a4b5c6", phase: "refactor", failure: "broken refactor", pts: -5 }, | |
| 206 | - { date: "2026-03-15", repo: "billing-service", sha: "7d8e9f0", phase: "red", failure: "red-did-not-fail", pts: -5 }, | |
| 207 | - ], | |
| 208 | - }, | |
| 209 | -]; | |
| 210 | - | |
| 211 | -const escape = (s: string): string => | |
| 212 | - s.replace(/&/g, "&").replace(/"/g, """).replace(/</g, "<").replace(/>/g, ">"); | |
| 213 | - | |
| 214 | -const trendArrow = (delta: number): { glyph: string; cls: string } => | |
| 215 | - delta > 0 ? { glyph: "↑", cls: "up" } : delta < 0 ? { glyph: "↓", cls: "down" } : { glyph: "→", cls: "flat" }; | |
| 216 | - | |
| 217 | -const sparkline = (values: number[], height = 60, width = 320): string => { | |
| 218 | - if (values.length === 0) return ""; | |
| 219 | - const min = Math.min(...values); | |
| 220 | - const max = Math.max(...values); | |
| 221 | - const range = Math.max(1, max - min); | |
| 222 | - const stepX = width / Math.max(1, values.length - 1); | |
| 223 | - const pad = 6; | |
| 224 | - const innerH = height - pad * 2; | |
| 225 | - const points = values | |
| 226 | - .map((v, i) => { | |
| 227 | - const x = (i * stepX).toFixed(1); | |
| 228 | - const y = (pad + innerH - ((v - min) / range) * innerH).toFixed(1); | |
| 229 | - return `${x},${y}`; | |
| 230 | - }) | |
| 231 | - .join(" "); | |
| 232 | - return `<svg class="report-sparkline" viewBox="0 0 ${width} ${height}" preserveAspectRatio="none" aria-hidden="true"> | |
| 233 | - <polyline fill="none" stroke="currentColor" stroke-width="1.5" points="${points}" /> | |
| 234 | -</svg>`; | |
| 235 | -}; | |
| 236 | - | |
| 237 | -const tile = (a: AgentReport): string => { | |
| 238 | - const arr = trendArrow(a.delta); | |
| 239 | - const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`; | |
| 240 | - return `<div class="report-tile"> | |
| 241 | - <p class="report-tile-name"><a href="/reports/demo/agents/${a.slug}">${escape(a.name)}</a></p> | |
| 242 | - <p class="report-tile-score">${a.score}<span class="report-tile-score-suffix"> / 100</span></p> | |
| 243 | - <p class="report-tile-trend ${arr.cls}">${arr.glyph} ${escape(deltaStr)}</p> | |
| 244 | - <p class="report-tile-volume">${a.commits.toLocaleString()} commits</p> | |
| 245 | - <div class="report-tile-issue">top issue: <strong>${escape(a.topIssueLabel)}</strong> (${a.topIssuePct}%)</div> | |
| 246 | -</div>`; | |
| 247 | -}; | |
| 248 | - | |
| 249 | -const bars = (mix: FailureSlice[]): string => { | |
| 250 | - const rows = mix | |
| 251 | - .map( | |
| 252 | - (s) => `<div class="report-bar-row"> | |
| 253 | - <span class="report-bar-label">${escape(s.label)}</span> | |
| 254 | - <span class="report-bar-track"><span class="report-bar-fill ${s.tone}" style="width: ${s.pct}%"></span></span> | |
| 255 | - <span class="report-bar-pct">${s.pct}%</span> | |
| 256 | -</div>`, | |
| 257 | - ) | |
| 258 | - .join("\n"); | |
| 259 | - return `<div class="report-bars">${rows}</div>`; | |
| 260 | -}; | |
| 261 | - | |
| 262 | -const streakBox = (a: AgentReport): string => { | |
| 263 | - const cls = a.streakBroken ? "broken" : a.streak >= 30 ? "long" : ""; | |
| 264 | - const label = a.streakBroken ? "recent break" : "consecutive clean cycles"; | |
| 265 | - return `<span class="report-streak ${cls}"><span class="report-streak-num">${a.streak}</span> ${label}</span>`; | |
| 266 | -}; | |
| 267 | - | |
| 268 | -const mockBanner = `<div class="report-mockup-banner">demo data — real reporting wires up when the project-tracking pipeline ships. <a href="/blog/tweag-handbook-tdd">why tdd.md needs this</a> · <a href="/reports">about reporting</a></div>`; | |
| 269 | - | |
| 270 | -const snapshotBlock = (s: TestSnapshot): string => { | |
| 271 | - const failuresHtml = s.failures.length === 0 | |
| 272 | - ? `<li class="test-list-pass">all ${s.passing} tests groen</li>` | |
| 273 | - : s.failures | |
| 274 | - .map( | |
| 275 | - (f) => | |
| 276 | - `<li class="test-list-fail">${escape(f.test)} <span class="test-list-meta">${f.flaky ? "intermittent · " : ""}sinds ${f.since}</span></li>`, | |
| 277 | - ) | |
| 278 | - .concat([`<li class="test-list-collapsed">+ ${s.passing.toLocaleString()} passing tests</li>`]) | |
| 279 | - .join("\n"); | |
| 280 | - const statusCls = s.failing === 0 ? "ok" : "bad"; | |
| 281 | - return `<div class="test-snapshot ${statusCls}"> | |
| 282 | - <p class="test-snapshot-head"><strong>${escape(s.repo)}</strong> <span class="test-snapshot-branch">@ ${escape(s.branch)}</span></p> | |
| 283 | - <p class="test-snapshot-stats">${s.total.toLocaleString()} tests · <span class="green">${s.passing.toLocaleString()} passing</span>${s.failing > 0 ? ` · <span class="red">${s.failing.toLocaleString()} failing</span>` : ""}</p> | |
| 284 | - <ul class="test-list"> | |
| 285 | -${failuresHtml} | |
| 286 | - </ul> | |
| 287 | -</div>`; | |
| 288 | -}; | |
| 289 | - | |
| 290 | -const agentTagHtml = (slug: AgentReport["slug"]): string => { | |
| 291 | - const name = DEMO_REPORTS.find((r) => r.slug === slug)?.name ?? slug; | |
| 292 | - return `<a class="agent-tag" href="/reports/demo/agents/${slug}">${escape(name)}</a>`; | |
| 293 | -}; | |
| 294 | - | |
| 295 | -const stabilityRow = (s: TestStability): string => { | |
| 296 | - const cls = s.flagged ? "test-stab-row flagged" : "test-stab-row"; | |
| 297 | - const warn = s.flagged ? ` <span class="test-stab-warn" title="test-deletion of weakening dit kwartaal">⚠</span>` : ""; | |
| 298 | - return `<tr class="${cls}"> | |
| 299 | - <td class="test-stab-name">${escape(s.test)}<div class="test-stab-repo">${escape(s.repo)}</div></td> | |
| 300 | - <td class="test-stab-num green">${s.pass}</td> | |
| 301 | - <td class="test-stab-num ${s.fail >= 8 ? "red" : ""}">${s.fail}</td> | |
| 302 | - <td class="test-stab-num ${s.deleted > 0 ? "red" : ""}">${s.deleted}</td> | |
| 303 | - <td class="test-stab-by">${agentTagHtml(s.lastBrokenBy)}${warn}</td> | |
| 304 | -</tr>`; | |
| 305 | -}; | |
| 306 | - | |
| 307 | -export const testsOverviewMd = (): string => { | |
| 308 | - const total = DEMO_SNAPSHOTS.reduce((s, r) => s + r.total, 0); | |
| 309 | - const passing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.passing, 0); | |
| 310 | - const failing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.failing, 0); | |
| 311 | - const snapshots = DEMO_SNAPSHOTS.map(snapshotBlock).join("\n"); | |
| 312 | - const stabRows = DEMO_STABILITY.map(stabilityRow).join("\n"); | |
| 313 | - return `# tests overzicht | |
| 314 | - | |
| 315 | -${mockBanner} | |
| 316 | - | |
| 317 | -> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${DEMO_PERIOD}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen". | |
| 318 | - | |
| 319 | -## huidige stand · per repo | |
| 320 | - | |
| 321 | -<div class="test-snapshots"> | |
| 322 | -${snapshots} | |
| 323 | -</div> | |
| 324 | - | |
| 325 | -**Totaal**: ${total.toLocaleString()} tests · <span class="green">${passing.toLocaleString()} passing</span> · <span class="${failing > 0 ? "red" : "muted"}">${failing.toLocaleString()} failing</span>. | |
| 326 | - | |
| 327 | -## test-stabiliteit · q1 2026 | |
| 328 | - | |
| 329 | -Top 12 meest-flappende tests dit kwartaal, met aantal pass/fail/deleted-events en de agent die de test het laatst heeft gebroken. | |
| 330 | - | |
| 331 | -<table class="test-stability"> | |
| 332 | -<thead> | |
| 333 | - <tr> | |
| 334 | - <th>test</th> | |
| 335 | - <th class="num">pass</th> | |
| 336 | - <th class="num">fail</th> | |
| 337 | - <th class="num">del</th> | |
| 338 | - <th>laatst gebroken door</th> | |
| 339 | - </tr> | |
| 340 | -</thead> | |
| 341 | -<tbody> | |
| 342 | -${stabRows} | |
| 343 | -</tbody> | |
| 344 | -</table> | |
| 345 | - | |
| 346 | -> ⚠ markeert tests waarbij dit kwartaal een test-deletion of weakening-event is gedetecteerd. In een echte setup linkt klik op een test-naam door naar de commit-historie van die specifieke test. | |
| 347 | - | |
| 348 | -## hoe lees je dit | |
| 349 | - | |
| 350 | -- **Veel pass, weinig fail, 0 del**: gezond. Test doet wat hij moet, niemand sloopt 'm. | |
| 351 | -- **Veel fail, 0 del**: test vangt actief regressies. Goed nieuws — discipline werkt. | |
| 352 | -- **Fail én del > 0**: test wordt onder druk gezet. Coach de agent die 'm gebroken heeft (klik op het tag-icoon). | |
| 353 | -- **Snapshot rood + stabiliteit hoog**: bekende, langlopende kapotte test. Apart onderwerp, niet per se een agent-probleem. | |
| 354 | - | |
| 355 | ---- | |
| 356 | - | |
| 357 | -[← exec summary](/reports/demo) · [back to /reports](/reports) | |
| 358 | -`; | |
| 359 | -}; | |
| 360 | - | |
| 361 | -export const reportsLandingMd = (): string => `# reports | |
| 362 | - | |
| 363 | -> Per-agent TDD-discipline reporting over real project repos. The judge replays each commit on tracked branches and scores it structurally — red-fails, green-passes, no test-deletion, no regression. The scores roll up per agent over time, with trend, failure-mode breakdown, and an exec summary fit for a quarterly readout. | |
| 364 | - | |
| 365 | -This is a design preview. The pipeline that ingests real repos isn't wired yet; what you can navigate today is a mockup with synthetic data: | |
| 366 | - | |
| 367 | -- [exec summary mockup →](/reports/demo) — single page, 1 quarter, 3 agents | |
| 368 | -- [per-agent drill-down →](/reports/demo/agents/cursor) — trend, failure mix, recent flagged commits | |
| 369 | -- [tests overzicht →](/reports/demo/tests) — huidige stand per repo + test-stabiliteit per test-naam | |
| 370 | - | |
| 371 | -Want a real repo on this layer? [Register a project →](/projects) — drops \`.tdd-md.json\` at the repo root, onboards in seconds. Per-commit judging follows in the next sliver; until then registered projects show up under [/projects](/projects) but don't yet feed the report numbers. | |
| 372 | - | |
| 373 | -## what gets measured | |
| 374 | - | |
| 375 | -This layer measures **discipline**, not code-quality. Without hidden tests (those only exist on katas), tdd.md can't catch tautologies or weakened assertions on real repos. It *can* catch: | |
| 376 | - | |
| 377 | -| failure mode | what triggers it | what it costs | | |
| 378 | -|---|---|---| | |
| 379 | -| \`red-did-not-fail\` | commit tagged \`red:\` but tests pass | -5 / commit | | |
| 380 | -| \`test-deleted\` | test count drops between commits | -20 / commit | | |
| 381 | -| \`broken refactor\` | tests fail at a \`refactor:\` commit | -5 / commit | | |
| 382 | -| \`no phase tag\` | tracked-branch commit missing \`red\\|green\\|refactor:\` | counts against phase-coverage % | | |
| 383 | - | |
| 384 | -The metric pair that anchors the report is **discipline-score** (0-100) + **phase-coverage %**. An agent with 0% phase-coverage doesn't *do* TDD — its score is N/A, not 0. Don't let a low-volume non-attempt look like a high-volume slip. | |
| 385 | - | |
| 386 | -## reading the data | |
| 387 | - | |
| 388 | -For management: | |
| 389 | -- the [exec summary](/reports/demo) gives one number per agent + a narrative paragraph. Prints to one page. | |
| 390 | - | |
| 391 | -For team-leads: | |
| 392 | -- the [drill-down](/reports/demo/agents/cursor) shows trend, failure-mix, streak, and the most recent flagged commits with one-click coaching links to the [Claude Code](/blog/claude-code-tdd) / [Cursor](/blog/cursor-tdd) / [Aider](/blog/aider-tdd) posts. | |
| 393 | - | |
| 394 | -[← back to tdd.md](/) · [the blog](/blog) · [the katas](/games) | |
| 395 | -`; | |
| 396 | - | |
| 397 | -export const execSummaryMd = (): string => { | |
| 398 | - const totalCommits = DEMO_REPORTS.reduce((s, a) => s + a.commits, 0); | |
| 399 | - const tiles = DEMO_REPORTS.map(tile).join("\n"); | |
| 400 | - return `# tdd-discipline rapport · q1 2026 | |
| 401 | - | |
| 402 | -${mockBanner} | |
| 403 | - | |
| 404 | -> **Periode** ${DEMO_PERIOD} · **Scope** ${DEMO_REPOS} repos · ${totalCommits.toLocaleString()} AI-toegeschreven commits in ${escape(DEMO_ORG)}. | |
| 405 | - | |
| 406 | -<div class="report-tiles"> | |
| 407 | -${tiles} | |
| 408 | -</div> | |
| 409 | - | |
| 410 | -## wat veranderde dit kwartaal | |
| 411 | - | |
| 412 | -Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de \`api-gateway\` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf. | |
| 413 | - | |
| 414 | -## wat we doen | |
| 415 | - | |
| 416 | -- **Cursor in \`api-gateway\`**: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel "never delete a test in a refactor commit" gepind ([details →](/reports/demo/agents/cursor)). | |
| 417 | -- **Claude Code uitrollen**: het CLAUDE.md-template dat in \`billing-service\` werkte naar de andere drie repos kopiëren. | |
| 418 | -- **Volgende meting**: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt. | |
| 419 | - | |
| 420 | -## wat dit getal *niet* meet | |
| 421 | - | |
| 422 | -Discipline, niet code-kwaliteit. Hidden tests (zoals op de katas) bestaan niet voor productie-repos, dus *tautologische* tests en *zwak-geformuleerde* asserties blijven onzichtbaar voor de judge. Dit cijfer zegt: "de agent volgt de TDD-cyclus eerlijk". Het zegt niets over of de tests die hij schrijft het juiste beweren. Voor dat tweede signaal blijft kata-performance ([leaderboard](/leaderboard)) de proxy. | |
| 423 | - | |
| 424 | ---- | |
| 425 | - | |
| 426 | -[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports) | |
| 427 | -`; | |
| 428 | -}; | |
| 429 | - | |
| 430 | -export const agentDrilldownMd = (slug: AgentReport["slug"]): string | null => { | |
| 431 | - const a = DEMO_REPORTS.find((r) => r.slug === slug); | |
| 432 | - if (!a) return null; | |
| 433 | - const arr = trendArrow(a.delta); | |
| 434 | - const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`; | |
| 435 | - const recentRows = a.recent | |
| 436 | - .map( | |
| 437 | - (r) => | |
| 438 | - `| ${r.date} | \`${r.repo}\` | \`${r.sha}\` | ${r.phase} | ${r.failure} | ${r.pts} |`, | |
| 439 | - ) | |
| 440 | - .join("\n"); | |
| 441 | - return `# ${a.name} · drill-down | |
| 442 | - | |
| 443 | -${mockBanner} | |
| 444 | - | |
| 445 | -> Discipline-score **${a.score} / 100** <span class="report-tile-trend ${arr.cls}">${arr.glyph} ${deltaStr}</span> over ${DEMO_PERIOD}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage **${a.phaseCoveragePct}%**. | |
| 446 | - | |
| 447 | -## trend (30 dagen) | |
| 448 | - | |
| 449 | -<div class="${arr.cls === "down" ? "red" : arr.cls === "up" ? "green" : "muted"}"> | |
| 450 | -${sparkline(a.trend)} | |
| 451 | -</div> | |
| 452 | - | |
| 453 | -${streakBox(a)} | |
| 454 | - | |
| 455 | -## failure-mode breakdown | |
| 456 | - | |
| 457 | -${bars(a.failureMix)} | |
| 458 | - | |
| 459 | -Top issue dit kwartaal: **${escape(a.topIssueLabel)}** (${a.topIssuePct}% van commits). | |
| 460 | - | |
| 461 | -## recent flagged | |
| 462 | - | |
| 463 | -| date | repo | sha | phase | failure | pts | | |
| 464 | -|---|---|---|---|---|---| | |
| 465 | -${recentRows} | |
| 466 | - | |
| 467 | -## coaching | |
| 468 | - | |
| 469 | -- ${a.slug === "claude-code" ? `[Claude Code does not do TDD by default](/blog/claude-code-tdd) — CLAUDE.md rules + fresh-context boundaries that prevent \`red-did-not-fail\`.` : a.slug === "cursor" ? `[Cursor knows how to do TDD; users skip the parts that matter](/blog/cursor-tdd) — Plan Mode, fresh chats, \`.cursor/rules\` to stop test-deletion.` : `[Aider is the closest agent to TDD on rails — until \`--auto-test\`](/blog/aider-tdd) — keep auto-test off for green commits, on for refactor.`} | |
| 470 | -- [Tweag's TDD handbook needs a judge](/blog/tweag-handbook-tdd) — why local green isn't enough. | |
| 471 | - | |
| 472 | ---- | |
| 473 | - | |
| 474 | -[← exec summary](/reports/demo) · [back to /reports](/reports) | |
| 475 | -`; | |
| 476 | -}; | |
src/server.ts
+0
−1378
| @@ -1,1378 +0,0 @@ | ||
| 1 | -import { renderPage, renderNotFound } from "./render"; | |
| 2 | -import * as github from "./github_oauth"; | |
| 3 | -import * as forgejo from "./forgejo"; | |
| 4 | -import { parseCommit, computeProgress, type Phase } from "./commits"; | |
| 5 | -import { loadGame, listGames } from "./games"; | |
| 6 | -import { judge } from "./judge"; | |
| 7 | -import { latestRun, allLatestRuns, listActiveProjects, getProject, upsertProject } from "./db"; | |
| 8 | -import { | |
| 9 | - reportsLandingMd, | |
| 10 | - execSummaryMd, | |
| 11 | - agentDrilldownMd, | |
| 12 | - testsOverviewMd, | |
| 13 | - DEMO_REPORTS, | |
| 14 | -} from "./reports"; | |
| 15 | -import { | |
| 16 | - projectsLandingMd, | |
| 17 | - projectRegisterMd, | |
| 18 | - projectDetailMd, | |
| 19 | - parseRepoIdentifier, | |
| 20 | - fetchProjectConfig, | |
| 21 | -} from "./projects"; | |
| 22 | - | |
| 23 | -const HOME_MD = "./content/home.md"; | |
| 24 | -const GAME_DIR = "./content/games"; | |
| 25 | - | |
| 26 | -const BASE_URL = process.env.BASE_URL ?? "https://tdd.md"; | |
| 27 | -const CALLBACK_URL = `${BASE_URL}/auth/github/callback`; | |
| 28 | - | |
| 29 | -const HOME_DESCRIPTION = | |
| 30 | - "Test-driven development for agentic coding. Your AI agent practices on scored katas; the judge replays its commits against hidden tests and posts a public verdict on the discipline."; | |
| 31 | - | |
| 32 | -const homeBody = await Bun.file(HOME_MD).text(); | |
| 33 | -const HOME_HTML = await renderPage({ | |
| 34 | - title: "tdd.md — TDD for agentic coding", | |
| 35 | - description: HOME_DESCRIPTION, | |
| 36 | - bodyMarkdown: homeBody, | |
| 37 | - active: "home", | |
| 38 | - jsonLd: { | |
| 39 | - "@context": "https://schema.org", | |
| 40 | - "@type": "WebSite", | |
| 41 | - name: "tdd.md", | |
| 42 | - url: "https://tdd.md", | |
| 43 | - description: HOME_DESCRIPTION, | |
| 44 | - }, | |
| 45 | -}); | |
| 46 | - | |
| 47 | -const ALL_GAMES = await listGames(); | |
| 48 | - | |
| 49 | -// Agent-specific TDD walkthroughs, served at /guides/<slug>. Each entry's | |
| 50 | -// markdown body lives at content/guides/<slug>.md. Adding a new agent | |
| 51 | -// guide is two lines below + drop the .md file. | |
| 52 | -interface GuideEntry { | |
| 53 | - slug: string; | |
| 54 | - title: string; | |
| 55 | - description: string; | |
| 56 | -} | |
| 57 | - | |
| 58 | -interface BlogEntry { | |
| 59 | - slug: string; | |
| 60 | - title: string; | |
| 61 | - description: string; | |
| 62 | - // ISO date for the listing + sitemap lastmod. | |
| 63 | - date: string; | |
| 64 | -} | |
| 65 | - | |
| 66 | -const ALL_POSTS: BlogEntry[] = [ | |
| 67 | - { | |
| 68 | - slug: "tweag-handbook-tdd", | |
| 69 | - title: "Tweag's agentic TDD handbook gets the loop right — local green still isn't enough", | |
| 70 | - description: "Tweag's agentic-coding handbook describes a clean TDD loop and the right rules for AI assistants — but the validation layer it leans on (run tests, see green) misses the three failure modes most likely to show up: tautology, test deletion in refactor, and assertion weakening. Here's the gap, and what closes it.", | |
| 71 | - date: "2026-05-08", | |
| 72 | - }, | |
| 73 | - { | |
| 74 | - slug: "aider-tdd", | |
| 75 | - title: "Aider is the closest agent to TDD on rails — until you let it auto-fix", | |
| 76 | - description: "Aider's auto-commit-per-edit and bite-sized-steps philosophy make it TDD-shaped by default. Then `--auto-test` discovers it can win by deleting tests instead of fixing the impl. Here's how Aider's strengths map onto TDD, and how to keep the auto-test loop honest.", | |
| 77 | - date: "2026-05-04", | |
| 78 | - }, | |
| 79 | - { | |
| 80 | - slug: "cursor-tdd", | |
| 81 | - title: "Cursor knows how to do TDD. Most users skip the parts that matter.", | |
| 82 | - description: "Cursor's own agent best practices document a clean TDD workflow — but most users skip the features (Plan Mode, fresh conversations, .cursor/rules) that actually make it work. Here's how to put the pieces together, with a kata you can run end-to-end.", | |
| 83 | - date: "2026-05-04", | |
| 84 | - }, | |
| 85 | - { | |
| 86 | - slug: "claude-code-tdd", | |
| 87 | - title: "Claude Code does not do TDD by default — here's how to make it", | |
| 88 | - description: "Claude Code writes the test and impl in one breath, so the test never fails for the right reason. Two structural changes — CLAUDE.md rules + phase-separated sessions — get the discipline back, and tdd.md can verify it.", | |
| 89 | - date: "2026-05-04", | |
| 90 | - }, | |
| 91 | -]; | |
| 92 | - | |
| 93 | -const ALL_GUIDES: GuideEntry[] = [ | |
| 94 | - { | |
| 95 | - slug: "claude-code", | |
| 96 | - title: "TDD with Claude Code", | |
| 97 | - description: "Run TDD katas through Anthropic's Claude Code with phase-separated prompts and CLAUDE.md rules so the judge scores clean red→green→refactor cycles.", | |
| 98 | - }, | |
| 99 | - { | |
| 100 | - slug: "cursor", | |
| 101 | - title: "TDD with Cursor", | |
| 102 | - description: "Test-driven katas through Cursor — Composer per phase, project rules pinned in .cursor/rules, fresh context for red vs green.", | |
| 103 | - }, | |
| 104 | - { | |
| 105 | - slug: "aider", | |
| 106 | - title: "TDD with Aider", | |
| 107 | - description: "Aider's commit-per-edit model maps directly onto red→green→refactor — prompt with phase tags and the auto-commit carries through.", | |
| 108 | - }, | |
| 109 | -]; | |
| 110 | - | |
| 111 | -const gamesIndexBody = `# games | |
| 112 | - | |
| 113 | -${ALL_GAMES.length === 0 | |
| 114 | - ? "_No katas registered yet._" | |
| 115 | - : `| kata | description | steps |\n|---|---|---|\n${ALL_GAMES.map( | |
| 116 | - (g) => `| [${g.id}](/games/${g.id}) | ${g.description} | ${g.steps.length} |`, | |
| 117 | - ).join("\n")}` | |
| 118 | -} | |
| 119 | - | |
| 120 | -> Ready to play? [Register your agent →](/agents/register) | |
| 121 | -> Using a specific agent? See the [agent-specific guides](/guides) — Claude Code, Cursor, Aider. | |
| 122 | -`; | |
| 123 | - | |
| 124 | -const GAMES_INDEX_HTML = await renderPage({ | |
| 125 | - title: "TDD katas — tdd.md", | |
| 126 | - description: | |
| 127 | - "Browse the TDD katas. Pick a challenge, push red→green→refactor commits, and earn a public verdict graded against hidden tests.", | |
| 128 | - bodyMarkdown: gamesIndexBody, | |
| 129 | - ogPath: "https://tdd.md/games", | |
| 130 | - active: "games", | |
| 131 | -}); | |
| 132 | - | |
| 133 | -const renderKata = async (kata: string): Promise<Response | null> => { | |
| 134 | - const file = Bun.file(`${GAME_DIR}/${kata}/spec.md`); | |
| 135 | - if (!(await file.exists())) return null; | |
| 136 | - const md = await file.text(); | |
| 137 | - // Pull the kata's own description from spec.ts when available — it's | |
| 138 | - // the canonical short copy (rendered on /games + sitemap previews). | |
| 139 | - let description: string | undefined; | |
| 140 | - try { | |
| 141 | - const game = await loadGame(kata); | |
| 142 | - description = game.description; | |
| 143 | - } catch { | |
| 144 | - // unknown kata; use the site default | |
| 145 | - } | |
| 146 | - const html = await renderPage({ | |
| 147 | - title: `${kata} TDD kata — tdd.md`, | |
| 148 | - description, | |
| 149 | - bodyMarkdown: md, | |
| 150 | - ogPath: `https://tdd.md/games/${kata}`, | |
| 151 | - active: "games", | |
| 152 | - }); | |
| 153 | - return new Response(html, { headers: { "Content-Type": "text/html; charset=utf-8" } }); | |
| 154 | -}; | |
| 155 | - | |
| 156 | -interface ForgejoUserSummary { | |
| 157 | - id: number; | |
| 158 | - login: string; | |
| 159 | - is_admin?: boolean; | |
| 160 | - // Forgejo visibility levels: "public" | "limited" | "private". | |
| 161 | - // Anything other than "public" is hidden from anonymous tdd.md visitors. | |
| 162 | - visibility?: string; | |
| 163 | -} | |
| 164 | - | |
| 165 | -// Single-user visibility lookup for /:owner/:repo and /agents/:name. | |
| 166 | -// Returns the raw Forgejo string (or null if the user doesn't exist). | |
| 167 | -const getUserVisibility = async (name: string): Promise<string | null> => { | |
| 168 | - const r = await fetch( | |
| 169 | - `${FORGEJO_INTERNAL}/api/v1/users/${encodeURIComponent(name)}`, | |
| 170 | - { headers: adminApiHeaders() }, | |
| 171 | - ); | |
| 172 | - if (!r.ok) return null; | |
| 173 | - const u = (await r.json()) as ForgejoUserSummary; | |
| 174 | - return u.visibility ?? "public"; | |
| 175 | -}; | |
| 176 | - | |
| 177 | -const renderAgentsIndex = async (): Promise<Response> => { | |
| 178 | - let users: ForgejoUserSummary[] = []; | |
| 179 | - const adminToken = process.env.FORGEJO_ADMIN_TOKEN; | |
| 180 | - if (adminToken) { | |
| 181 | - const r = await fetch(`${FORGEJO_INTERNAL}/api/v1/admin/users?limit=200`, { | |
| 182 | - headers: adminApiHeaders(), | |
| 183 | - }); | |
| 184 | - if (r.ok) users = (await r.json()) as ForgejoUserSummary[]; | |
| 185 | - } | |
| 186 | - // Drop the admin (id 1) and anyone whose visibility isn't "public" — | |
| 187 | - // private and limited agents stay invisible on the public index. | |
| 188 | - const agents = users.filter( | |
| 189 | - (u) => u.id !== 1 && !u.is_admin && (u.visibility ?? "public") === "public", | |
| 190 | - ); | |
| 191 | - | |
| 192 | - // Per-agent score totals from the latest run per repo. | |
| 193 | - const allRuns = allLatestRuns(); | |
| 194 | - const totalsByOwner = new Map<string, { score: number; runs: number }>(); | |
| 195 | - for (const r of allRuns) { | |
| 196 | - const t = totalsByOwner.get(r.owner) ?? { score: 0, runs: 0 }; | |
| 197 | - t.score += r.verdict.totalScore; | |
| 198 | - t.runs += 1; | |
| 199 | - totalsByOwner.set(r.owner, t); | |
| 200 | - } | |
| 201 | - | |
| 202 | - let body: string; | |
| 203 | - if (agents.length === 0) { | |
| 204 | - body = `# agents | |
| 205 | - | |
| 206 | -> No agents registered yet. Be the first. | |
| 207 | - | |
| 208 | -[ Register your agent → ](/agents/register) | |
| 209 | -`; | |
| 210 | - } else { | |
| 211 | - const rows = agents | |
| 212 | - .map((u) => { | |
| 213 | - const t = totalsByOwner.get(u.login) ?? { score: 0, runs: 0 }; | |
| 214 | - const sign = t.score >= 0 ? "+" : ""; | |
| 215 | - return `| [${u.login}](/agents/${u.login}) | ${t.runs} | ${sign}${t.score} |`; | |
| 216 | - }) | |
| 217 | - .join("\n"); | |
| 218 | - body = `# agents | |
| 219 | - | |
| 220 | -| agent | attempts | total score | | |
| 221 | -|---|---|---| | |
| 222 | -${rows} | |
| 223 | - | |
| 224 | -[ Register your agent → ](/agents/register) | |
| 225 | -`; | |
| 226 | - } | |
| 227 | - | |
| 228 | - const description = | |
| 229 | - agents.length === 0 | |
| 230 | - ? "AI agents doing test-driven development on tdd.md — registration is open, sign in with GitHub to play." | |
| 231 | - : `${agents.length} AI ${agents.length === 1 ? "agent" : "agents"} doing test-driven development on tdd.md, scored on red→green discipline against hidden tests for agentic coding.`; | |
| 232 | - | |
| 233 | - const html = await renderPage({ | |
| 234 | - title: "AI agents on tdd.md", | |
| 235 | - description, | |
| 236 | - bodyMarkdown: body, | |
| 237 | - ogPath: "https://tdd.md/agents", | |
| 238 | - active: "agents", | |
| 239 | - }); | |
| 240 | - return htmlResponse(html); | |
| 241 | -}; | |
| 242 | - | |
| 243 | -const renderLeaderboard = async (): Promise<Response> => { | |
| 244 | - // Only show runs whose owner is public. Fetch the user list once | |
| 245 | - // and build a Set so we can filter without N+1 lookups. | |
| 246 | - const adminToken = process.env.FORGEJO_ADMIN_TOKEN; | |
| 247 | - const publicOwners = new Set<string>(); | |
| 248 | - if (adminToken) { | |
| 249 | - const r = await fetch(`${FORGEJO_INTERNAL}/api/v1/admin/users?limit=200`, { | |
| 250 | - headers: adminApiHeaders(), | |
| 251 | - }); | |
| 252 | - if (r.ok) { | |
| 253 | - const users = (await r.json()) as ForgejoUserSummary[]; | |
| 254 | - for (const u of users) { | |
| 255 | - if ((u.visibility ?? "public") === "public") publicOwners.add(u.login); | |
| 256 | - } | |
| 257 | - } | |
| 258 | - } | |
| 259 | - const runs = allLatestRuns() | |
| 260 | - .filter((r) => publicOwners.size === 0 || publicOwners.has(r.owner)) | |
| 261 | - .sort((a, b) => b.verdict.totalScore - a.verdict.totalScore); | |
| 262 | - let body: string; | |
| 263 | - if (runs.length === 0) { | |
| 264 | - body = `# leaderboard | |
| 265 | - | |
| 266 | -> No verdicts yet. The first agent to push a red→green pair lands here. | |
| 267 | - | |
| 268 | -[ Register your agent → ](/agents/register) | |
| 269 | -`; | |
| 270 | - } else { | |
| 271 | - const rows = runs | |
| 272 | - .map((r, i) => { | |
| 273 | - const sign = r.verdict.totalScore >= 0 ? "+" : ""; | |
| 274 | - const verified = r.verdict.steps.filter((s) => s.status === "verified").length; | |
| 275 | - return `| ${i + 1} | [${r.owner}](/agents/${r.owner}) | [${r.repo}](/${r.owner}/${r.repo}) | ${sign}${r.verdict.totalScore} | ${verified} |`; | |
| 276 | - }) | |
| 277 | - .join("\n"); | |
| 278 | - body = `# leaderboard | |
| 279 | - | |
| 280 | -| rank | agent | kata | score | verified steps | | |
| 281 | -|---|---|---|---|---| | |
| 282 | -${rows} | |
| 283 | -`; | |
| 284 | - } | |
| 285 | - const description = | |
| 286 | - runs.length === 0 | |
| 287 | - ? "TDD leaderboard for AI agents on tdd.md — be the first verdict." | |
| 288 | - : `Top AI agents by TDD score on tdd.md — ${runs.length} ranked ${runs.length === 1 ? "submission" : "submissions"} graded on red→green discipline and hidden test pass rate.`; | |
| 289 | - | |
| 290 | - const html = await renderPage({ | |
| 291 | - title: "TDD leaderboard — tdd.md", | |
| 292 | - description, | |
| 293 | - bodyMarkdown: body, | |
| 294 | - ogPath: "https://tdd.md/leaderboard", | |
| 295 | - active: "leaderboard", | |
| 296 | - }); | |
| 297 | - return htmlResponse(html); | |
| 298 | -}; | |
| 299 | - | |
| 300 | -const REGISTER_BODY = `# register | |
| 301 | - | |
| 302 | -> Sign in with GitHub to create your tdd.md agent. | |
| 303 | - | |
| 304 | -## what we ask GitHub for | |
| 305 | -- your username | |
| 306 | -- your primary verified email | |
| 307 | - | |
| 308 | -That's it — no repo access, no anything else. | |
| 309 | - | |
| 310 | -## what you get | |
| 311 | -- a public agent account at \`git.tdd.md/<your-github-name>\` | |
| 312 | -- a push token (shown once) | |
| 313 | -- an empty repo for the first kata, ready to push to | |
| 314 | - | |
| 315 | -[ sign in with github → ](/auth/github/start) | |
| 316 | -`; | |
| 317 | - | |
| 318 | -const REGISTER_HTML = await renderPage({ | |
| 319 | - title: "Register your AI agent — tdd.md", | |
| 320 | - description: | |
| 321 | - "Sign in with GitHub to register your AI agent on tdd.md and start solving TDD katas. Public-signup, verified-identity, no extra forms.", | |
| 322 | - bodyMarkdown: REGISTER_BODY, | |
| 323 | - ogPath: "https://tdd.md/agents/register", | |
| 324 | - active: "agents", | |
| 325 | - noindex: true, | |
| 326 | -}); | |
| 327 | - | |
| 328 | -const htmlResponse = (html: string, status = 200) => | |
| 329 | - new Response(html, { status, headers: { "Content-Type": "text/html; charset=utf-8" } }); | |
| 330 | - | |
| 331 | -const errorPage = async (message: string, status = 400): Promise<Response> => { | |
| 332 | - const html = await renderPage({ | |
| 333 | - title: "error — tdd.md", | |
| 334 | - bodyMarkdown: `# error\n\n> ${message}\n\n[← back](/agents/register)`, | |
| 335 | - active: "agents", | |
| 336 | - }); | |
| 337 | - return htmlResponse(html, status); | |
| 338 | -}; | |
| 339 | - | |
| 340 | -const randomHex = (bytes: number): string => | |
| 341 | - Array.from(crypto.getRandomValues(new Uint8Array(bytes))) | |
| 342 | - .map((b) => b.toString(16).padStart(2, "0")) | |
| 343 | - .join(""); | |
| 344 | - | |
| 345 | -const parseCookies = (header: string | null): Record<string, string> => { | |
| 346 | - const out: Record<string, string> = {}; | |
| 347 | - if (!header) return out; | |
| 348 | - for (const part of header.split(";")) { | |
| 349 | - const idx = part.indexOf("="); | |
| 350 | - if (idx === -1) continue; | |
| 351 | - const name = part.slice(0, idx).trim(); | |
| 352 | - const value = part.slice(idx + 1).trim(); | |
| 353 | - if (name) out[name] = decodeURIComponent(value); | |
| 354 | - } | |
| 355 | - return out; | |
| 356 | -}; | |
| 357 | - | |
| 358 | -const timingSafeEqual = (a: string, b: string): boolean => { | |
| 359 | - if (a.length !== b.length) return false; | |
| 360 | - let r = 0; | |
| 361 | - for (let i = 0; i < a.length; i++) r |= a.charCodeAt(i) ^ b.charCodeAt(i); | |
| 362 | - return r === 0; | |
| 363 | -}; | |
| 364 | - | |
| 365 | -// 30 days. Long enough for everyday use, short enough that a leaked | |
| 366 | -// cookie doesn't grant indefinite access. | |
| 367 | -const SESSION_TTL_SEC = 30 * 24 * 60 * 60; | |
| 368 | -const SESSION_COOKIE = "tdd_session"; | |
| 369 | - | |
| 370 | -const sessionSecret = (): string => | |
| 371 | - process.env.SESSION_SECRET ?? process.env.WEBHOOK_SECRET ?? ""; | |
| 372 | - | |
| 373 | -const signSession = async (username: string): Promise<string> => { | |
| 374 | - const exp = Math.floor(Date.now() / 1000) + SESSION_TTL_SEC; | |
| 375 | - const payload = `${username}.${exp}`; | |
| 376 | - const sig = await hmacSha256Hex(sessionSecret(), payload); | |
| 377 | - return `${payload}.${sig}`; | |
| 378 | -}; | |
| 379 | - | |
| 380 | -const verifySession = async (cookie: string): Promise<string | null> => { | |
| 381 | - const parts = cookie.split("."); | |
| 382 | - if (parts.length !== 3) return null; | |
| 383 | - const [username, expStr, providedSig] = parts; | |
| 384 | - if (!username || !expStr || !providedSig) return null; | |
| 385 | - const exp = Number(expStr); | |
| 386 | - if (!Number.isFinite(exp) || exp < Math.floor(Date.now() / 1000)) return null; | |
| 387 | - const expectedSig = await hmacSha256Hex(sessionSecret(), `${username}.${expStr}`); | |
| 388 | - if (!timingSafeEqual(providedSig, expectedSig)) return null; | |
| 389 | - return username; | |
| 390 | -}; | |
| 391 | - | |
| 392 | -const getViewer = async (req: Request): Promise<string | null> => { | |
| 393 | - if (!sessionSecret()) return null; | |
| 394 | - const cookies = parseCookies(req.headers.get("cookie")); | |
| 395 | - const raw = cookies[SESSION_COOKIE]; | |
| 396 | - if (!raw) return null; | |
| 397 | - return verifySession(raw); | |
| 398 | -}; | |
| 399 | - | |
| 400 | -const sessionCookieHeader = (value: string, maxAge: number): string => | |
| 401 | - `${SESSION_COOKIE}=${value}; Path=/; HttpOnly; Secure; SameSite=Lax; Max-Age=${maxAge}`; | |
| 402 | - | |
| 403 | -const hmacSha256Hex = async (secret: string, body: string): Promise<string> => { | |
| 404 | - const key = await crypto.subtle.importKey( | |
| 405 | - "raw", | |
| 406 | - new TextEncoder().encode(secret), | |
| 407 | - { name: "HMAC", hash: "SHA-256" }, | |
| 408 | - false, | |
| 409 | - ["sign"], | |
| 410 | - ); | |
| 411 | - const sig = await crypto.subtle.sign("HMAC", key, new TextEncoder().encode(body)); | |
| 412 | - return Array.from(new Uint8Array(sig)) | |
| 413 | - .map((b) => b.toString(16).padStart(2, "0")) | |
| 414 | - .join(""); | |
| 415 | -}; | |
| 416 | - | |
| 417 | -// Forward git protocol + Forgejo API/asset requests to Forgejo via the host | |
| 418 | -// network. Lets us serve everything under tdd.md (GitHub-style) without | |
| 419 | -// exposing git.tdd.md externally. | |
| 420 | -const FORGEJO_INTERNAL = process.env.FORGEJO_URL ?? "https://git.tdd.md"; | |
| 421 | - | |
| 422 | -// Admin-token-authenticated headers for API calls. Agent repos are | |
| 423 | -// private by default; rendering the verdict page must still work. We | |
| 424 | -// proxy the data through the admin identity, never exposing the source | |
| 425 | -// or push protocol publicly. | |
| 426 | -const adminApiHeaders = (): HeadersInit => { | |
| 427 | - const token = process.env.FORGEJO_ADMIN_TOKEN; | |
| 428 | - return token ? { Authorization: `token ${token}` } : {}; | |
| 429 | -}; | |
| 430 | - | |
| 431 | -const HOP_BY_HOP = [ | |
| 432 | - "host", | |
| 433 | - "connection", | |
| 434 | - "keep-alive", | |
| 435 | - "transfer-encoding", | |
| 436 | - "upgrade", | |
| 437 | - "proxy-authorization", | |
| 438 | - "proxy-connection", | |
| 439 | - "te", | |
| 440 | - "trailer", | |
| 441 | -]; | |
| 442 | - | |
| 443 | -const proxyToForgejo = async (req: Request, pathAndQuery: string): Promise<Response> => { | |
| 444 | - const upstream = `${FORGEJO_INTERNAL}${pathAndQuery}`; | |
| 445 | - const headers = new Headers(req.headers); | |
| 446 | - for (const h of HOP_BY_HOP) headers.delete(h); | |
| 447 | - headers.set("X-Forwarded-Host", "tdd.md"); | |
| 448 | - headers.set("X-Forwarded-Proto", "https"); | |
| 449 | - headers.set("X-Forwarded-For", req.headers.get("cf-connecting-ip") ?? "0.0.0.0"); | |
| 450 | - | |
| 451 | - let body: ArrayBuffer | undefined; | |
| 452 | - if (req.method !== "GET" && req.method !== "HEAD") { | |
| 453 | - body = await req.arrayBuffer(); | |
| 454 | - } | |
| 455 | - | |
| 456 | - const upstreamRes = await fetch(upstream, { | |
| 457 | - method: req.method, | |
| 458 | - headers, | |
| 459 | - body, | |
| 460 | - redirect: "manual", | |
| 461 | - }); | |
| 462 | - | |
| 463 | - const responseHeaders = new Headers(upstreamRes.headers); | |
| 464 | - for (const h of HOP_BY_HOP) responseHeaders.delete(h); | |
| 465 | - | |
| 466 | - return new Response(upstreamRes.body, { | |
| 467 | - status: upstreamRes.status, | |
| 468 | - statusText: upstreamRes.statusText, | |
| 469 | - headers: responseHeaders, | |
| 470 | - }); | |
| 471 | -}; | |
| 472 | - | |
| 473 | -const isGitProtocol = (pathname: string, search: URLSearchParams): boolean => { | |
| 474 | - if (pathname.includes(".git/") || pathname.endsWith(".git")) return true; | |
| 475 | - if ( | |
| 476 | - pathname.endsWith("/info/refs") && | |
| 477 | - (search.get("service") === "git-upload-pack" || search.get("service") === "git-receive-pack") | |
| 478 | - ) { | |
| 479 | - return true; | |
| 480 | - } | |
| 481 | - if (pathname.endsWith("/git-upload-pack") || pathname.endsWith("/git-receive-pack")) { | |
| 482 | - return true; | |
| 483 | - } | |
| 484 | - return false; | |
| 485 | -}; | |
| 486 | - | |
| 487 | -interface ForgejoRepoSummary { | |
| 488 | - description: string; | |
| 489 | - clone_url: string; | |
| 490 | - empty: boolean; | |
| 491 | - private: boolean; | |
| 492 | -} | |
| 493 | - | |
| 494 | -interface ForgejoCommit { | |
| 495 | - sha: string; | |
| 496 | - commit: { message: string; author: { name: string; date: string } }; | |
| 497 | -} | |
| 498 | - | |
| 499 | -const phaseSpan = (p: Phase): string => { | |
| 500 | - const cls = p === "red" ? "red" : p === "green" ? "green" : p === "refactor" ? "blue" : "muted"; | |
| 501 | - return `<span class="${cls}">${p}</span>`; | |
| 502 | -}; | |
| 503 | - | |
| 504 | -const relativeTime = (iso: string): string => { | |
| 505 | - const ms = Date.now() - new Date(iso).getTime(); | |
| 506 | - if (ms < 60_000) return `${Math.max(0, Math.floor(ms / 1000))}s ago`; | |
| 507 | - if (ms < 3_600_000) return `${Math.floor(ms / 60_000)}m ago`; | |
| 508 | - if (ms < 86_400_000) return `${Math.floor(ms / 3_600_000)}h ago`; | |
| 509 | - return `${Math.floor(ms / 86_400_000)}d ago`; | |
| 510 | -}; | |
| 511 | - | |
| 512 | -const renderRepoView = async ( | |
| 513 | - owner: string, | |
| 514 | - repo: string, | |
| 515 | - viewer: string | null, | |
| 516 | -): Promise<Response> => { | |
| 517 | - // Private/limited owners get a 404 to anonymous visitors — but the | |
| 518 | - // owner themselves (verified via session cookie) can always see | |
| 519 | - // their own pages. | |
| 520 | - const ownerVisibility = await getUserVisibility(owner); | |
| 521 | - if (ownerVisibility !== null && ownerVisibility !== "public" && viewer !== owner) { | |
| 522 | - const html = await renderNotFound(`/${owner}/${repo}`); | |
| 523 | - return htmlResponse(html, 404); | |
| 524 | - } | |
| 525 | - | |
| 526 | - const repoApi = `${FORGEJO_INTERNAL}/api/v1/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}`; | |
| 527 | - const repoRes = await fetch(repoApi, { headers: adminApiHeaders() }); | |
| 528 | - if (repoRes.status === 404) { | |
| 529 | - const html = await renderNotFound(`/${owner}/${repo}`); | |
| 530 | - return htmlResponse(html, 404); | |
| 531 | - } | |
| 532 | - if (!repoRes.ok) { | |
| 533 | - const html = await renderPage({ | |
| 534 | - title: `${owner}/${repo} — tdd.md`, | |
| 535 | - bodyMarkdown: `# ${owner}/${repo}\n\n> repository unavailable`, | |
| 536 | - }); | |
| 537 | - return htmlResponse(html, 502); | |
| 538 | - } | |
| 539 | - const info = (await repoRes.json()) as ForgejoRepoSummary; | |
| 540 | - const cloneUrl = info.clone_url || `https://tdd.md/${owner}/${repo}.git`; | |
| 541 | - const isPrivate = info.private === true; | |
| 542 | - | |
| 543 | - // The repo name is by convention the kata id. If the kata exists, the | |
| 544 | - // header link is meaningful and we know the total step count. | |
| 545 | - let totalSteps: number | null = null; | |
| 546 | - let kataExists = false; | |
| 547 | - try { | |
| 548 | - const game = await loadGame(repo); | |
| 549 | - totalSteps = game.steps.length; | |
| 550 | - kataExists = true; | |
| 551 | - } catch { | |
| 552 | - // Repo isn't a known kata — still render, just without step totals. | |
| 553 | - } | |
| 554 | - | |
| 555 | - let commits: ForgejoCommit[] = []; | |
| 556 | - if (!info.empty) { | |
| 557 | - const commitsRes = await fetch(`${repoApi}/commits?limit=50&stat=false`, { | |
| 558 | - headers: adminApiHeaders(), | |
| 559 | - }); | |
| 560 | - if (commitsRes.ok) commits = (await commitsRes.json()) as ForgejoCommit[]; | |
| 561 | - } | |
| 562 | - const progress = computeProgress(commits); | |
| 563 | - const verified = progress.verifiedSteps.size; | |
| 564 | - | |
| 565 | - let status: string; | |
| 566 | - if (commits.length === 0) { | |
| 567 | - status = "awaiting first push"; | |
| 568 | - } else if (totalSteps !== null && verified >= totalSteps) { | |
| 569 | - status = "kata complete"; | |
| 570 | - } else if (verified > 0) { | |
| 571 | - status = "in progress"; | |
| 572 | - } else { | |
| 573 | - status = "no verified steps yet"; | |
| 574 | - } | |
| 575 | - const stepCounter = totalSteps !== null ? `${verified} / ${totalSteps}` : `${verified} / ?`; | |
| 576 | - | |
| 577 | - let phaseLog: string; | |
| 578 | - if (commits.length === 0) { | |
| 579 | - phaseLog = "_No commits yet — push your first `red:` commit to start the cycle._"; | |
| 580 | - } else { | |
| 581 | - const rows = commits.map((c) => { | |
| 582 | - const sha = c.sha.slice(0, 7); | |
| 583 | - const p = parseCommit(c.commit.message); | |
| 584 | - const subject = (p.subject || c.commit.message.split("\n")[0] || "").replace(/\|/g, "\\|"); | |
| 585 | - const stepCell = p.step ? `\`${p.step}\`` : "—"; | |
| 586 | - return `| \`${sha}\` | ${phaseSpan(p.phase)} | ${stepCell} | ${subject} | ${relativeTime(c.commit.author.date)} |`; | |
| 587 | - }); | |
| 588 | - phaseLog = `| sha | phase | step | message | when |\n|---|---|---|---|---|\n${rows.join("\n")}`; | |
| 589 | - } | |
| 590 | - | |
| 591 | - const kataLink = kataExists | |
| 592 | - ? `[\`${repo}\` →](/games/${repo})` | |
| 593 | - : `\`${repo}\``; | |
| 594 | - const privateBadge = isPrivate ? ` <span class="muted">[private]</span>` : ""; | |
| 595 | - | |
| 596 | - const verdict = latestRun(owner, repo); | |
| 597 | - const headSha = commits[0]?.sha ?? null; | |
| 598 | - const verdictStale = verdict !== null && headSha !== null && verdict.headSha !== headSha; | |
| 599 | - | |
| 600 | - let scoreSection: string; | |
| 601 | - if (verdict === null) { | |
| 602 | - scoreSection = `> Not yet judged. The next push triggers a judge run, or [run the judge now](/api/judge/${owner}/${repo}) (POST).\n\nPhase tally: <span class="red">red ${progress.redCount}</span> · <span class="green">green ${progress.greenCount}</span> · <span class="blue">refactor ${progress.refactorCount}</span>${progress.untaggedCount > 0 ? ` · <span class="muted">untagged ${progress.untaggedCount}</span>` : ""}.`; | |
| 603 | - } else { | |
| 604 | - const stale = verdictStale ? ` · <span class="muted">stale — newer commits not yet judged</span>` : ""; | |
| 605 | - const sign = verdict.totalScore >= 0 ? "+" : ""; | |
| 606 | - const statusClass = (status: string): string => { | |
| 607 | - if (status === "verified") return "green"; | |
| 608 | - if (status === "discipline-only") return "blue"; | |
| 609 | - if (status === "no-green") return "muted"; | |
| 610 | - return "red"; | |
| 611 | - }; | |
| 612 | - const modeLabel = (m: string): string => { | |
| 613 | - const cls = m === "strict" ? "red" : m === "pragmatic" ? "blue" : "green"; | |
| 614 | - return `<span class="${cls}">${m}</span>`; | |
| 615 | - }; | |
| 616 | - const rows = verdict.steps.length === 0 | |
| 617 | - ? "_No red→green pairs found yet._" | |
| 618 | - : `| step | red | green | hidden | status | points | explanation |\n|---|---|---|---|---|---|---|\n` + | |
| 619 | - verdict.steps.map((s) => { | |
| 620 | - const cls = statusClass(s.status); | |
| 621 | - const sign = s.scoreDelta >= 0 ? "+" : ""; | |
| 622 | - const hiddenCell = | |
| 623 | - s.hiddenPassed === true ? `<span class="green">pass</span>` : | |
| 624 | - s.hiddenPassed === false ? `<span class="red">fail</span>` : | |
| 625 | - `<span class="muted">—</span>`; | |
| 626 | - const explanation = (s.explanation ?? "").replace(/\|/g, "\\|"); | |
| 627 | - return `| \`${s.stepId}\` | \`${s.redSha?.slice(0, 7) ?? "—"}\` | \`${s.greenSha?.slice(0, 7) ?? "—"}\` | ${hiddenCell} | <span class="${cls}">${s.status}</span> | ${sign}${s.scoreDelta} | ${explanation} |`; | |
| 628 | - }).join("\n"); | |
| 629 | - const refactorRows = (verdict.refactors ?? []).length === 0 | |
| 630 | - ? "" | |
| 631 | - : `\n\n### refactors\n\n| sha | step | tests | points | explanation |\n|---|---|---|---|---|\n` + | |
| 632 | - verdict.refactors.map((r) => { | |
| 633 | - const sign = r.scoreDelta >= 0 ? "+" : ""; | |
| 634 | - const cls = r.testsPassed ? "green" : "red"; | |
| 635 | - const verb = r.testsPassed ? "green" : "broke tests"; | |
| 636 | - const explanation = (r.explanation ?? "").replace(/\|/g, "\\|"); | |
| 637 | - return `| \`${r.sha.slice(0, 7)}\` | ${r.stepId ? `\`${r.stepId}\`` : "—"} | <span class="${cls}">${verb}</span> | ${sign}${r.scoreDelta} | ${explanation} |`; | |
| 638 | - }).join("\n"); | |
| 639 | - const modeLine = verdict.mode ? `**mode: ${modeLabel(verdict.mode)}** · ` : ""; | |
| 640 | - scoreSection = `${modeLine}**total: ${sign}${verdict.totalScore}** · judged ${relativeTime(new Date(verdict.judgedAt).toISOString())}${stale}\n\n${rows}${refactorRows}`; | |
| 641 | - } | |
| 642 | - | |
| 643 | - const body = `# ${owner} · playing ${kataLink}${privateBadge} | |
| 644 | - | |
| 645 | -> ${status} | |
| 646 | -> **${stepCounter}** steps verified | |
| 647 | - | |
| 648 | -## phase log | |
| 649 | - | |
| 650 | -${phaseLog} | |
| 651 | - | |
| 652 | -## score | |
| 653 | - | |
| 654 | -${scoreSection} | |
| 655 | - | |
| 656 | -## clone | |
| 657 | - | |
| 658 | -\`\`\` | |
| 659 | -git clone ${cloneUrl} | |
| 660 | -\`\`\` | |
| 661 | - | |
| 662 | -[← /agents/${owner}](/agents/${owner})${kataExists ? ` · [kata spec →](/games/${repo})` : ""} | |
| 663 | -`; | |
| 664 | - | |
| 665 | - // Dynamic description tailored to this attempt — gives every agent | |
| 666 | - // run a unique snippet for search results and social previews instead | |
| 667 | - // of falling back to the site default. | |
| 668 | - const totalSnippet = | |
| 669 | - verdict !== null | |
| 670 | - ? `, score ${verdict.totalScore >= 0 ? "+" : ""}${verdict.totalScore}` | |
| 671 | - : ""; | |
| 672 | - const description = kataExists | |
| 673 | - ? `${owner}'s ${repo} TDD kata attempt on tdd.md — ${verified}${totalSteps !== null ? `/${totalSteps}` : ""} steps verified${totalSnippet}.` | |
| 674 | - : `${owner}/${repo} on tdd.md — ${commits.length} ${commits.length === 1 ? "commit" : "commits"} in the phase log${totalSnippet}.`; | |
| 675 | - | |
| 676 | - const html = await renderPage({ | |
| 677 | - title: `${owner} · ${repo}${kataExists ? " TDD kata" : ""} — tdd.md`, | |
| 678 | - description, | |
| 679 | - bodyMarkdown: body, | |
| 680 | - ogPath: `https://tdd.md/${owner}/${repo}`, | |
| 681 | - active: "agents", | |
| 682 | - }); | |
| 683 | - return htmlResponse(html); | |
| 684 | -}; | |
| 685 | - | |
| 686 | -const port = Number(process.env.PORT ?? 3000); | |
| 687 | - | |
| 688 | -const server = Bun.serve({ | |
| 689 | - port, | |
| 690 | - routes: { | |
| 691 | - "/": htmlResponse(HOME_HTML), | |
| 692 | - "/raw": new Response(Bun.file(HOME_MD), { | |
| 693 | - headers: { "Content-Type": "text/markdown; charset=utf-8" }, | |
| 694 | - }), | |
| 695 | - "/healthz": new Response("ok"), | |
| 696 | - | |
| 697 | - "/robots.txt": new Response( | |
| 698 | - `User-agent: *\nAllow: /\nDisallow: /auth/\nDisallow: /api/\n\nSitemap: https://tdd.md/sitemap.xml\n`, | |
| 699 | - { headers: { "Content-Type": "text/plain; charset=utf-8" } }, | |
| 700 | - ), | |
| 701 | - | |
| 702 | - "/sitemap.xml": async () => { | |
| 703 | - const today = new Date().toISOString().slice(0, 10); | |
| 704 | - const url = (loc: string, priority: string) => | |
| 705 | - `<url><loc>${loc}</loc><lastmod>${today}</lastmod><priority>${priority}</priority></url>`; | |
| 706 | - const kataUrls = ALL_GAMES.map((g) => | |
| 707 | - url(`https://tdd.md/games/${g.id}`, "0.8"), | |
| 708 | - ).join("\n"); | |
| 709 | - const guideUrls = ALL_GUIDES.map((g) => | |
| 710 | - url(`https://tdd.md/guides/${g.slug}`, "0.8"), | |
| 711 | - ).join("\n"); | |
| 712 | - const blogUrls = ALL_POSTS.map((p) => | |
| 713 | - url(`https://tdd.md/blog/${p.slug}`, "0.8"), | |
| 714 | - ).join("\n"); | |
| 715 | - const xml = `<?xml version="1.0" encoding="UTF-8"?> | |
| 716 | -<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> | |
| 717 | -${url("https://tdd.md/", "1.0")} | |
| 718 | -${url("https://tdd.md/games", "0.9")} | |
| 719 | -${kataUrls} | |
| 720 | -${url("https://tdd.md/guides", "0.9")} | |
| 721 | -${guideUrls} | |
| 722 | -${url("https://tdd.md/blog", "0.7")} | |
| 723 | -${blogUrls} | |
| 724 | -${url("https://tdd.md/agents", "0.7")} | |
| 725 | -${url("https://tdd.md/leaderboard", "0.7")} | |
| 726 | -</urlset>`; | |
| 727 | - return new Response(xml, { | |
| 728 | - headers: { "Content-Type": "application/xml; charset=utf-8" }, | |
| 729 | - }); | |
| 730 | - }, | |
| 731 | - | |
| 732 | - "/og.svg": new Response(Bun.file("./public/og.svg"), { | |
| 733 | - headers: { | |
| 734 | - "Content-Type": "image/svg+xml", | |
| 735 | - "Cache-Control": "public, max-age=3600", | |
| 736 | - }, | |
| 737 | - }), | |
| 738 | - | |
| 739 | - "/games": htmlResponse(GAMES_INDEX_HTML), | |
| 740 | - | |
| 741 | - "/blog": async () => { | |
| 742 | - const rows = ALL_POSTS | |
| 743 | - .map((p) => `| ${p.date} | [${p.title}](/blog/${p.slug}) |`) | |
| 744 | - .join("\n"); | |
| 745 | - const body = `# blog | |
| 746 | - | |
| 747 | -Notes on TDD, agentic coding, and the discipline that ties them together. | |
| 748 | - | |
| 749 | -| date | post | | |
| 750 | -|---|---| | |
| 751 | -${rows} | |
| 752 | - | |
| 753 | -> RSS feed coming when there's a second post. | |
| 754 | - | |
| 755 | -[← back to tdd.md](/) · [the guides](/guides) · [the katas](/games) | |
| 756 | -`; | |
| 757 | - const html = await renderPage({ | |
| 758 | - title: "Blog — tdd.md", | |
| 759 | - description: "Posts on test-driven development for AI coding agents — how to apply TDD with Claude Code, Cursor, and Aider, what we learn from the verdicts.", | |
| 760 | - bodyMarkdown: body, | |
| 761 | - ogPath: "https://tdd.md/blog", | |
| 762 | - active: "blog", | |
| 763 | - }); | |
| 764 | - return htmlResponse(html); | |
| 765 | - }, | |
| 766 | - | |
| 767 | - "/blog/:slug": async (req) => { | |
| 768 | - const slug = req.params.slug; | |
| 769 | - const entry = ALL_POSTS.find((p) => p.slug === slug); | |
| 770 | - if (!entry) { | |
| 771 | - const html = await renderNotFound(`/blog/${slug}`); | |
| 772 | - return htmlResponse(html, 404); | |
| 773 | - } | |
| 774 | - const file = Bun.file(`./content/blog/${slug}.md`); | |
| 775 | - if (!(await file.exists())) { | |
| 776 | - const html = await renderNotFound(`/blog/${slug}`); | |
| 777 | - return htmlResponse(html, 404); | |
| 778 | - } | |
| 779 | - const md = await file.text(); | |
| 780 | - const html = await renderPage({ | |
| 781 | - title: `${entry.title} — tdd.md`, | |
| 782 | - description: entry.description, | |
| 783 | - bodyMarkdown: md, | |
| 784 | - ogPath: `https://tdd.md/blog/${slug}`, | |
| 785 | - active: "blog", | |
| 786 | - jsonLd: { | |
| 787 | - "@context": "https://schema.org", | |
| 788 | - "@type": "BlogPosting", | |
| 789 | - headline: entry.title, | |
| 790 | - description: entry.description, | |
| 791 | - datePublished: entry.date, | |
| 792 | - url: `https://tdd.md/blog/${slug}`, | |
| 793 | - author: { "@type": "Organization", name: "tdd.md" }, | |
| 794 | - }, | |
| 795 | - }); | |
| 796 | - return htmlResponse(html); | |
| 797 | - }, | |
| 798 | - | |
| 799 | - "/projects": async () => { | |
| 800 | - const projects = listActiveProjects(); | |
| 801 | - const html = await renderPage({ | |
| 802 | - title: "Projects — tdd.md", | |
| 803 | - description: "Real repos opted in to tdd.md scoring. Each project drops .tdd-md.json at its root and gets its commits judged structurally for TDD discipline.", | |
| 804 | - bodyMarkdown: projectsLandingMd(projects), | |
| 805 | - ogPath: "https://tdd.md/projects", | |
| 806 | - }); | |
| 807 | - return htmlResponse(html); | |
| 808 | - }, | |
| 809 | - | |
| 810 | - "/projects/new": async (req) => { | |
| 811 | - const viewer = await getViewer(req); | |
| 812 | - if (req.method === "GET") { | |
| 813 | - const url = new URL(req.url); | |
| 814 | - const prefilled = url.searchParams.get("repo") ?? undefined; | |
| 815 | - const html = await renderPage({ | |
| 816 | - title: "Register a project — tdd.md", | |
| 817 | - description: "Onboard a real repo for TDD-discipline scoring. Drops .tdd-md.json at the repo root, register here, and the reports begin tracking commits on its tracked branches.", | |
| 818 | - bodyMarkdown: projectRegisterMd(viewer, prefilled), | |
| 819 | - ogPath: "https://tdd.md/projects/new", | |
| 820 | - noindex: true, | |
| 821 | - }); | |
| 822 | - return htmlResponse(html); | |
| 823 | - } | |
| 824 | - if (req.method !== "POST") return new Response("method not allowed", { status: 405 }); | |
| 825 | - if (!viewer) return new Response("unauthorized — sign in first", { status: 401 }); | |
| 826 | - | |
| 827 | - let raw = ""; | |
| 828 | - try { | |
| 829 | - const form = await req.formData(); | |
| 830 | - raw = String(form.get("repo") ?? "").trim(); | |
| 831 | - } catch { | |
| 832 | - return new Response("invalid form body", { status: 400 }); | |
| 833 | - } | |
| 834 | - | |
| 835 | - const renderError = async (message: string, status = 400): Promise<Response> => { | |
| 836 | - const html = await renderPage({ | |
| 837 | - title: "Register a project — tdd.md", | |
| 838 | - bodyMarkdown: projectRegisterMd(viewer, raw, message), | |
| 839 | - ogPath: "https://tdd.md/projects/new", | |
| 840 | - noindex: true, | |
| 841 | - }); | |
| 842 | - return htmlResponse(html, status); | |
| 843 | - }; | |
| 844 | - | |
| 845 | - let owner: string; | |
| 846 | - let repo: string; | |
| 847 | - try { | |
| 848 | - ({ owner, repo } = parseRepoIdentifier(raw)); | |
| 849 | - } catch (err) { | |
| 850 | - return renderError((err as Error).message); | |
| 851 | - } | |
| 852 | - | |
| 853 | - let config; | |
| 854 | - try { | |
| 855 | - config = await fetchProjectConfig(owner, repo); | |
| 856 | - } catch (err) { | |
| 857 | - return renderError((err as Error).message); | |
| 858 | - } | |
| 859 | - | |
| 860 | - upsertProject(viewer, owner, repo, config); | |
| 861 | - return new Response(null, { | |
| 862 | - status: 303, | |
| 863 | - headers: { Location: `/projects/${owner}/${repo}` }, | |
| 864 | - }); | |
| 865 | - }, | |
| 866 | - | |
| 867 | - "/projects/:repoOwner/:repoName": async (req) => { | |
| 868 | - const { repoOwner, repoName } = req.params; | |
| 869 | - const project = getProject(repoOwner, repoName); | |
| 870 | - if (!project) { | |
| 871 | - const html = await renderNotFound(`/projects/${repoOwner}/${repoName}`); | |
| 872 | - return htmlResponse(html, 404); | |
| 873 | - } | |
| 874 | - const html = await renderPage({ | |
| 875 | - title: `${project.displayName ?? `${project.repoOwner}/${project.repoName}`} — tdd.md`, | |
| 876 | - description: `${project.repoOwner}/${project.repoName} on tdd.md — ${project.testRunner === "none" ? "trace-mode" : project.testRunner} judging across ${project.trackedBranches.join(", ")}.`, | |
| 877 | - bodyMarkdown: projectDetailMd(project), | |
| 878 | - ogPath: `https://tdd.md/projects/${project.repoOwner}/${project.repoName}`, | |
| 879 | - }); | |
| 880 | - return htmlResponse(html); | |
| 881 | - }, | |
| 882 | - | |
| 883 | - "/reports": async () => { | |
| 884 | - const html = await renderPage({ | |
| 885 | - title: "Reports — tdd.md", | |
| 886 | - description: "Per-agent TDD-discipline reporting over real project repos: trend, failure-mode breakdown, and an exec summary fit for a quarterly readout.", | |
| 887 | - bodyMarkdown: reportsLandingMd(), | |
| 888 | - ogPath: "https://tdd.md/reports", | |
| 889 | - noindex: true, | |
| 890 | - }); | |
| 891 | - return htmlResponse(html); | |
| 892 | - }, | |
| 893 | - | |
| 894 | - "/reports/demo": async () => { | |
| 895 | - const html = await renderPage({ | |
| 896 | - title: "TDD-discipline rapport · Q1 2026 (demo) — tdd.md", | |
| 897 | - description: "Mockup of the management-level TDD-discipline report — single page, three agents, with trend and narrative.", | |
| 898 | - bodyMarkdown: execSummaryMd(), | |
| 899 | - ogPath: "https://tdd.md/reports/demo", | |
| 900 | - noindex: true, | |
| 901 | - }); | |
| 902 | - return htmlResponse(html); | |
| 903 | - }, | |
| 904 | - | |
| 905 | - "/reports/demo/tests": async () => { | |
| 906 | - const html = await renderPage({ | |
| 907 | - title: "Tests overzicht (demo) — tdd.md", | |
| 908 | - description: "Mockup of the per-test overview: current pass/fail snapshot per repo plus test stability over the quarter.", | |
| 909 | - bodyMarkdown: testsOverviewMd(), | |
| 910 | - ogPath: "https://tdd.md/reports/demo/tests", | |
| 911 | - noindex: true, | |
| 912 | - }); | |
| 913 | - return htmlResponse(html); | |
| 914 | - }, | |
| 915 | - | |
| 916 | - "/reports/demo/agents/:slug": async (req) => { | |
| 917 | - const slug = req.params.slug as (typeof DEMO_REPORTS)[number]["slug"]; | |
| 918 | - const md = agentDrilldownMd(slug); | |
| 919 | - if (!md) { | |
| 920 | - const html = await renderNotFound(`/reports/demo/agents/${slug}`); | |
| 921 | - return htmlResponse(html, 404); | |
| 922 | - } | |
| 923 | - const entry = DEMO_REPORTS.find((r) => r.slug === slug)!; | |
| 924 | - const html = await renderPage({ | |
| 925 | - title: `${entry.name} drill-down (demo) — tdd.md`, | |
| 926 | - description: `Per-agent drill-down mockup for ${entry.name}: trend, failure-mode breakdown, recent flagged commits with coaching links.`, | |
| 927 | - bodyMarkdown: md, | |
| 928 | - ogPath: `https://tdd.md/reports/demo/agents/${slug}`, | |
| 929 | - noindex: true, | |
| 930 | - }); | |
| 931 | - return htmlResponse(html); | |
| 932 | - }, | |
| 933 | - | |
| 934 | - "/guides": async () => { | |
| 935 | - const rows = ALL_GUIDES | |
| 936 | - .map((g) => `| [${g.title}](/guides/${g.slug}) | ${g.description} |`) | |
| 937 | - .join("\n"); | |
| 938 | - const body = `# guides | |
| 939 | - | |
| 940 | -Agent-specific walkthroughs for using tdd.md with the major agentic-coding tools. Each guide covers setup, prompt patterns that keep the agent in TDD, and the common pitfalls that cost score. | |
| 941 | - | |
| 942 | -| guide | what it covers | | |
| 943 | -|---|---| | |
| 944 | -${rows} | |
| 945 | - | |
| 946 | -> Missing your agent? [The mechanics are the same](/) — push commits tagged \`red:\` / \`green:\` / \`refactor:\` to your kata repo. Send a PR with a new guide and we'll list it here. | |
| 947 | - | |
| 948 | -[← play a kata](/games) · [register your agent →](/you) | |
| 949 | -`; | |
| 950 | - const html = await renderPage({ | |
| 951 | - title: "TDD guides for agentic coding tools — tdd.md", | |
| 952 | - description: "Practical TDD walkthroughs for Claude Code, Cursor, Aider and other AI coding agents — keep your agent honest with red→green→refactor commits, scored by tdd.md.", | |
| 953 | - bodyMarkdown: body, | |
| 954 | - ogPath: "https://tdd.md/guides", | |
| 955 | - active: "guides", | |
| 956 | - }); | |
| 957 | - return htmlResponse(html); | |
| 958 | - }, | |
| 959 | - | |
| 960 | - "/guides/:slug": async (req) => { | |
| 961 | - const slug = req.params.slug; | |
| 962 | - const entry = ALL_GUIDES.find((g) => g.slug === slug); | |
| 963 | - if (!entry) { | |
| 964 | - const html = await renderNotFound(`/guides/${slug}`); | |
| 965 | - return htmlResponse(html, 404); | |
| 966 | - } | |
| 967 | - const file = Bun.file(`./content/guides/${slug}.md`); | |
| 968 | - if (!(await file.exists())) { | |
| 969 | - const html = await renderNotFound(`/guides/${slug}`); | |
| 970 | - return htmlResponse(html, 404); | |
| 971 | - } | |
| 972 | - const md = await file.text(); | |
| 973 | - const html = await renderPage({ | |
| 974 | - title: `${entry.title} — tdd.md`, | |
| 975 | - description: entry.description, | |
| 976 | - bodyMarkdown: md, | |
| 977 | - ogPath: `https://tdd.md/guides/${slug}`, | |
| 978 | - active: "guides", | |
| 979 | - }); | |
| 980 | - return htmlResponse(html); | |
| 981 | - }, | |
| 982 | - "/games/:kata": async (req) => { | |
| 983 | - const res = await renderKata(req.params.kata); | |
| 984 | - if (res) return res; | |
| 985 | - const html = await renderNotFound(`/games/${req.params.kata}`); | |
| 986 | - return htmlResponse(html, 404); | |
| 987 | - }, | |
| 988 | - | |
| 989 | - "/agents": () => renderAgentsIndex(), | |
| 990 | - "/agents/register": htmlResponse(REGISTER_HTML), | |
| 991 | - "/agents/:name": async (req) => { | |
| 992 | - const name = req.params.name; | |
| 993 | - const viewer = await getViewer(req); | |
| 994 | - const userRes = await fetch(`${FORGEJO_INTERNAL}/api/v1/users/${encodeURIComponent(name)}`, { | |
| 995 | - headers: adminApiHeaders(), | |
| 996 | - }); | |
| 997 | - // Treat private/limited users as if they don't exist publicly — | |
| 998 | - // unless the logged-in viewer IS the owner. Owner can always see | |
| 999 | - // their own dashboard, public or not. | |
| 1000 | - if (userRes.ok) { | |
| 1001 | - const u = (await userRes.clone().json()) as ForgejoUserSummary; | |
| 1002 | - const ownVisibility = u.visibility ?? "public"; | |
| 1003 | - if (ownVisibility !== "public" && viewer !== name) { | |
| 1004 | - const html = await renderNotFound(`/agents/${name}`); | |
| 1005 | - return htmlResponse(html, 404); | |
| 1006 | - } | |
| 1007 | - } | |
| 1008 | - if (userRes.status === 404) { | |
| 1009 | - const html = await renderPage({ | |
| 1010 | - title: `${name} — agents — tdd.md`, | |
| 1011 | - bodyMarkdown: `# agents / ${name}\n\n> No agent registered with this name.\n\n[← all agents](/agents) · [register your own →](/agents/register)`, | |
| 1012 | - ogPath: `https://tdd.md/agents/${name}`, | |
| 1013 | - active: "agents", | |
| 1014 | - }); | |
| 1015 | - return htmlResponse(html, 404); | |
| 1016 | - } | |
| 1017 | - const reposRes = await fetch(`${FORGEJO_INTERNAL}/api/v1/users/${encodeURIComponent(name)}/repos?limit=50`, { | |
| 1018 | - headers: adminApiHeaders(), | |
| 1019 | - }); | |
| 1020 | - const repos = reposRes.ok ? ((await reposRes.json()) as { name: string; description: string }[]) : []; | |
| 1021 | - | |
| 1022 | - const progressByRepo = await Promise.all( | |
| 1023 | - repos.map(async (r) => { | |
| 1024 | - const cRes = await fetch( | |
| 1025 | - `${FORGEJO_INTERNAL}/api/v1/repos/${encodeURIComponent(name)}/${encodeURIComponent(r.name)}/commits?limit=50&stat=false`, | |
| 1026 | - { headers: adminApiHeaders() }, | |
| 1027 | - ); | |
| 1028 | - const commits = cRes.ok ? ((await cRes.json()) as { commit: { message: string } }[]) : []; | |
| 1029 | - return { repo: r, progress: computeProgress(commits) }; | |
| 1030 | - }), | |
| 1031 | - ); | |
| 1032 | - | |
| 1033 | - const totals: Record<string, number> = {}; | |
| 1034 | - for (const r of repos) { | |
| 1035 | - try { | |
| 1036 | - const game = await loadGame(r.name); | |
| 1037 | - totals[r.name] = game.steps.length; | |
| 1038 | - } catch { | |
| 1039 | - // unknown kata, no total | |
| 1040 | - } | |
| 1041 | - } | |
| 1042 | - | |
| 1043 | - const isSelf = viewer === name; | |
| 1044 | - let body = `# agents / ${name}\n\n`; | |
| 1045 | - if (isSelf) { | |
| 1046 | - body += `> Welcome back, ${name}. This is your dashboard — only you and admins see it when your profile is private.\n\n`; | |
| 1047 | - } | |
| 1048 | - if (repos.length === 0) { | |
| 1049 | - body += "> Registered, but no kata attempts yet.\n\n[← all agents](/agents)"; | |
| 1050 | - } else { | |
| 1051 | - body += "## attempts\n\n"; | |
| 1052 | - body += "| kata | verified | phases |\n|---|---|---|\n"; | |
| 1053 | - for (const { repo: r, progress } of progressByRepo) { | |
| 1054 | - const total = totals[r.name]; | |
| 1055 | - const verified = progress.verifiedSteps.size; | |
| 1056 | - const counter = total !== undefined ? `${verified} / ${total}` : `${verified} / ?`; | |
| 1057 | - const phases = `<span class="red">red ${progress.redCount}</span> · <span class="green">green ${progress.greenCount}</span> · <span class="blue">refactor ${progress.refactorCount}</span>`; | |
| 1058 | - body += `| [${r.name}](/${name}/${r.name}) | ${counter} | ${phases} |\n`; | |
| 1059 | - } | |
| 1060 | - } | |
| 1061 | - | |
| 1062 | - if (isSelf) { | |
| 1063 | - body += `\n\n---\n\n[sign out](/auth/logout) · [toggle visibility](#) <span class="muted">(POST /api/agents/${name}/visibility with your push token)</span>`; | |
| 1064 | - } | |
| 1065 | - | |
| 1066 | - const verifiedSteps = progressByRepo.reduce((acc, p) => acc + p.progress.verifiedSteps.size, 0); | |
| 1067 | - const description = | |
| 1068 | - repos.length === 0 | |
| 1069 | - ? `${name} just registered on tdd.md — no kata attempts yet.` | |
| 1070 | - : `${name}'s TDD attempts on tdd.md: ${repos.length} ${repos.length === 1 ? "kata" : "katas"} pushed, ${verifiedSteps} verified red→green ${verifiedSteps === 1 ? "step" : "steps"}.`; | |
| 1071 | - const html = await renderPage({ | |
| 1072 | - title: `${name} · TDD attempts — tdd.md`, | |
| 1073 | - description, | |
| 1074 | - bodyMarkdown: body, | |
| 1075 | - ogPath: `https://tdd.md/agents/${name}`, | |
| 1076 | - active: "agents", | |
| 1077 | - }); | |
| 1078 | - return htmlResponse(html); | |
| 1079 | - }, | |
| 1080 | - // Redirect the legacy URL to the canonical /:owner/:repo path — | |
| 1081 | - // /agents/:name/:kata used to render a placeholder before the | |
| 1082 | - // GitHub-style routing landed. | |
| 1083 | - "/agents/:name/:kata": (req) => | |
| 1084 | - Response.redirect(`/${req.params.name}/${req.params.kata}`, 301), | |
| 1085 | - | |
| 1086 | - "/leaderboard": () => renderLeaderboard(), | |
| 1087 | - | |
| 1088 | - "/api/judge/:owner/:repo": async (req) => { | |
| 1089 | - if (req.method !== "POST") { | |
| 1090 | - return new Response("method not allowed; POST to trigger a judge run", { status: 405 }); | |
| 1091 | - } | |
| 1092 | - // Manual triggers require the admin token. Push-driven runs come | |
| 1093 | - // through /api/forgejo/webhook with HMAC signature verification. | |
| 1094 | - const adminToken = process.env.FORGEJO_ADMIN_TOKEN; | |
| 1095 | - const provided = req.headers.get("authorization")?.replace(/^[Bb]earer\s+/, "") ?? ""; | |
| 1096 | - if (!adminToken || !timingSafeEqual(provided, adminToken)) { | |
| 1097 | - return new Response("unauthorized — POST with `Authorization: Bearer <admin-token>`", { status: 401 }); | |
| 1098 | - } | |
| 1099 | - try { | |
| 1100 | - const verdict = await judge(req.params.owner, req.params.repo); | |
| 1101 | - return Response.json(verdict); | |
| 1102 | - } catch (err) { | |
| 1103 | - return Response.json({ error: (err as Error).message }, { status: 500 }); | |
| 1104 | - } | |
| 1105 | - }, | |
| 1106 | - | |
| 1107 | - // Self-service visibility toggle. Agent posts their push token in | |
| 1108 | - // Authorization, picks "public" | "limited" | "private". We verify | |
| 1109 | - // the token actually belongs to :name by hitting Forgejo's /user | |
| 1110 | - // endpoint with it, then PATCH the user via admin token. | |
| 1111 | - "/api/agents/:name/visibility": async (req) => { | |
| 1112 | - if (req.method !== "POST") return new Response("POST only", { status: 405 }); | |
| 1113 | - const name = req.params.name; | |
| 1114 | - const provided = req.headers.get("authorization")?.replace(/^[Bb]earer\s+/, "") ?? ""; | |
| 1115 | - if (!provided) return Response.json({ error: "missing bearer token" }, { status: 401 }); | |
| 1116 | - | |
| 1117 | - // Verify the token belongs to :name (or is the admin token). | |
| 1118 | - const adminToken = process.env.FORGEJO_ADMIN_TOKEN ?? ""; | |
| 1119 | - let allowed = adminToken && timingSafeEqual(provided, adminToken); | |
| 1120 | - if (!allowed) { | |
| 1121 | - const meRes = await fetch(`${FORGEJO_INTERNAL}/api/v1/user`, { | |
| 1122 | - headers: { Authorization: `token ${provided}` }, | |
| 1123 | - }); | |
| 1124 | - if (meRes.ok) { | |
| 1125 | - const me = (await meRes.json()) as { login?: string }; | |
| 1126 | - allowed = me.login === name; | |
| 1127 | - } | |
| 1128 | - } | |
| 1129 | - if (!allowed) return Response.json({ error: "token does not match agent" }, { status: 403 }); | |
| 1130 | - | |
| 1131 | - let body: { visibility?: string }; | |
| 1132 | - try { | |
| 1133 | - body = (await req.json()) as { visibility?: string }; | |
| 1134 | - } catch { | |
| 1135 | - return Response.json({ error: "invalid json" }, { status: 400 }); | |
| 1136 | - } | |
| 1137 | - const visibility = body.visibility; | |
| 1138 | - if (visibility !== "public" && visibility !== "limited" && visibility !== "private") { | |
| 1139 | - return Response.json( | |
| 1140 | - { error: "visibility must be one of public|limited|private" }, | |
| 1141 | - { status: 400 }, | |
| 1142 | - ); | |
| 1143 | - } | |
| 1144 | - | |
| 1145 | - const patchRes = await fetch( | |
| 1146 | - `${FORGEJO_INTERNAL}/api/v1/admin/users/${encodeURIComponent(name)}`, | |
| 1147 | - { | |
| 1148 | - method: "PATCH", | |
| 1149 | - headers: { ...adminApiHeaders(), "Content-Type": "application/json" }, | |
| 1150 | - body: JSON.stringify({ visibility, source_id: 0, login_name: name }), | |
| 1151 | - }, | |
| 1152 | - ); | |
| 1153 | - if (!patchRes.ok) { | |
| 1154 | - const text = await patchRes.text(); | |
| 1155 | - return Response.json( | |
| 1156 | - { error: `forgejo PATCH failed: ${patchRes.status} ${text}` }, | |
| 1157 | - { status: 502 }, | |
| 1158 | - ); | |
| 1159 | - } | |
| 1160 | - return Response.json({ name, visibility }); | |
| 1161 | - }, | |
| 1162 | - | |
| 1163 | - "/api/forgejo/webhook": async (req) => { | |
| 1164 | - if (req.method !== "POST") return new Response("POST only", { status: 405 }); | |
| 1165 | - const secret = process.env.WEBHOOK_SECRET; | |
| 1166 | - if (!secret) return new Response("webhook not configured", { status: 503 }); | |
| 1167 | - | |
| 1168 | - const body = await req.text(); | |
| 1169 | - const provided = | |
| 1170 | - req.headers.get("x-forgejo-signature") ?? req.headers.get("x-gitea-signature") ?? ""; | |
| 1171 | - const expected = await hmacSha256Hex(secret, body); | |
| 1172 | - if (provided.length !== expected.length || !timingSafeEqual(provided, expected)) { | |
| 1173 | - return new Response("invalid signature", { status: 401 }); | |
| 1174 | - } | |
| 1175 | - | |
| 1176 | - let payload: { repository?: { owner?: { login?: string }; name?: string }; ref?: string }; | |
| 1177 | - try { | |
| 1178 | - payload = JSON.parse(body); | |
| 1179 | - } catch { | |
| 1180 | - return new Response("invalid json", { status: 400 }); | |
| 1181 | - } | |
| 1182 | - const owner = payload.repository?.owner?.login; | |
| 1183 | - const repo = payload.repository?.name; | |
| 1184 | - if (!owner || !repo) return new Response("missing owner/repo", { status: 400 }); | |
| 1185 | - | |
| 1186 | - // Fire the judge in the background; ack immediately so Forgejo | |
| 1187 | - // doesn't time out while we're checking out commits. | |
| 1188 | - void judge(owner, repo).catch((err) => { | |
| 1189 | - console.error(`judge failed for ${owner}/${repo}:`, err); | |
| 1190 | - }); | |
| 1191 | - return Response.json({ accepted: true, owner, repo }); | |
| 1192 | - }, | |
| 1193 | - | |
| 1194 | - "/you": async (req) => { | |
| 1195 | - const viewer = await getViewer(req); | |
| 1196 | - const target = viewer ? `/agents/${viewer}` : "/auth/github/start"; | |
| 1197 | - return new Response(null, { status: 302, headers: { Location: target } }); | |
| 1198 | - }, | |
| 1199 | - | |
| 1200 | - "/auth/logout": (_req) => { | |
| 1201 | - // Clear the session cookie and bounce back home. | |
| 1202 | - return new Response(null, { | |
| 1203 | - status: 302, | |
| 1204 | - headers: { | |
| 1205 | - Location: "/", | |
| 1206 | - "Set-Cookie": sessionCookieHeader("", 0), | |
| 1207 | - }, | |
| 1208 | - }); | |
| 1209 | - }, | |
| 1210 | - | |
| 1211 | - "/auth/github/start": (_req) => { | |
| 1212 | - if (!github.isConfigured() || !forgejo.isConfigured()) { | |
| 1213 | - return errorPage("registration is not configured on this server", 503); | |
| 1214 | - } | |
| 1215 | - const nonce = randomHex(16); | |
| 1216 | - return new Response(null, { | |
| 1217 | - status: 302, | |
| 1218 | - headers: { | |
| 1219 | - Location: github.authorizeUrl(nonce, CALLBACK_URL), | |
| 1220 | - "Set-Cookie": `tdd_oauth_state=${nonce}; Path=/auth; HttpOnly; Secure; SameSite=Lax; Max-Age=600`, | |
| 1221 | - }, | |
| 1222 | - }); | |
| 1223 | - }, | |
| 1224 | - | |
| 1225 | - "/auth/github/callback": async (req) => { | |
| 1226 | - const url = new URL(req.url); | |
| 1227 | - const code = url.searchParams.get("code"); | |
| 1228 | - const state = url.searchParams.get("state"); | |
| 1229 | - if (!code || !state) return errorPage("missing code or state"); | |
| 1230 | - | |
| 1231 | - const cookies = parseCookies(req.headers.get("cookie")); | |
| 1232 | - const cookieState = cookies.tdd_oauth_state; | |
| 1233 | - if (!cookieState || !timingSafeEqual(cookieState, state)) { | |
| 1234 | - return errorPage("state mismatch — open the registration page again and retry"); | |
| 1235 | - } | |
| 1236 | - | |
| 1237 | - let username: string; | |
| 1238 | - let email: string; | |
| 1239 | - let fullName: string | null; | |
| 1240 | - try { | |
| 1241 | - const accessToken = await github.exchangeCode(code, CALLBACK_URL); | |
| 1242 | - const user = await github.fetchUser(accessToken); | |
| 1243 | - username = user.login; | |
| 1244 | - fullName = user.name; | |
| 1245 | - // GitHub's noreply email format: unique per account, never collides | |
| 1246 | - // with another Forgejo user. We don't need a deliverable address — | |
| 1247 | - // agents authenticate by token, not by email reset flow. | |
| 1248 | - email = `${user.id}+${user.login}@users.noreply.github.com`; | |
| 1249 | - } catch (err) { | |
| 1250 | - return errorPage(`github oauth failed: ${(err as Error).message}`, 400); | |
| 1251 | - } | |
| 1252 | - | |
| 1253 | - // Login vs register: if the user already exists in Forgejo, this | |
| 1254 | - // is a returning visitor — set the session cookie, redirect to | |
| 1255 | - // their dashboard, don't rotate their token. | |
| 1256 | - const isExisting = await forgejo.userExists(username); | |
| 1257 | - const sessionToken = await signSession(username); | |
| 1258 | - const sessionCookie = sessionCookieHeader(sessionToken, SESSION_TTL_SEC); | |
| 1259 | - const clearOauthState = | |
| 1260 | - "tdd_oauth_state=; Path=/auth; HttpOnly; Secure; SameSite=Lax; Max-Age=0"; | |
| 1261 | - | |
| 1262 | - if (isExisting) { | |
| 1263 | - return new Response(null, { | |
| 1264 | - status: 302, | |
| 1265 | - headers: new Headers([ | |
| 1266 | - ["Location", `/agents/${username}`], | |
| 1267 | - ["Set-Cookie", sessionCookie], | |
| 1268 | - ["Set-Cookie", clearOauthState], | |
| 1269 | - ]), | |
| 1270 | - }); | |
| 1271 | - } | |
| 1272 | - | |
| 1273 | - let reg: forgejo.AgentRegistration; | |
| 1274 | - try { | |
| 1275 | - reg = await forgejo.registerAgent({ | |
| 1276 | - username, | |
| 1277 | - email, | |
| 1278 | - fullName: fullName ?? undefined, | |
| 1279 | - }); | |
| 1280 | - } catch (err) { | |
| 1281 | - return errorPage(`failed to create your agent: ${(err as Error).message}`, 422); | |
| 1282 | - } | |
| 1283 | - | |
| 1284 | - const verb = reg.isNew ? "created" : "rotated"; | |
| 1285 | - const body = `# welcome, ${reg.username} | |
| 1286 | - | |
| 1287 | -> Your tdd.md agent has been ${verb}. **Save the token below — this page is the only time you'll see it.** If you lose it, [register again](/agents/register) to issue a fresh one (the old one will stop working). | |
| 1288 | - | |
| 1289 | -## push token | |
| 1290 | - | |
| 1291 | -\`\`\` | |
| 1292 | -${reg.pushToken} | |
| 1293 | -\`\`\` | |
| 1294 | - | |
| 1295 | -## kata: string-calc | |
| 1296 | - | |
| 1297 | -Your repo is at [\`git.tdd.md/${reg.username}/string-calc\`](https://git.tdd.md/${reg.username}/string-calc), already initialized with a default branch \`main\`. | |
| 1298 | - | |
| 1299 | -\`\`\` | |
| 1300 | -git clone ${reg.repoCloneUrl} | |
| 1301 | -cd string-calc | |
| 1302 | - | |
| 1303 | -# play the kata, commit per phase | |
| 1304 | -# red: commit a failing test | |
| 1305 | -# green: commit the impl that makes it pass | |
| 1306 | -# refactor: commit a structural change with tests staying green | |
| 1307 | - | |
| 1308 | -git push | |
| 1309 | -# username: ${reg.username} | |
| 1310 | -# password: <paste the token above> | |
| 1311 | -\`\`\` | |
| 1312 | - | |
| 1313 | -When you push, the judge replays your commits and posts the verdict at [/agents/${reg.username}/string-calc](/agents/${reg.username}/string-calc). | |
| 1314 | - | |
| 1315 | -[← spec](/games/string-calc) · [all agents](/agents) | |
| 1316 | -`; | |
| 1317 | - | |
| 1318 | - const html = await renderPage({ | |
| 1319 | - title: `welcome ${reg.username} — tdd.md`, | |
| 1320 | - bodyMarkdown: body, | |
| 1321 | - active: "agents", | |
| 1322 | - noindex: true, | |
| 1323 | - }); | |
| 1324 | - return new Response(html, { | |
| 1325 | - headers: new Headers([ | |
| 1326 | - ["Content-Type", "text/html; charset=utf-8"], | |
| 1327 | - ["Set-Cookie", sessionCookie], | |
| 1328 | - ["Set-Cookie", clearOauthState], | |
| 1329 | - ]), | |
| 1330 | - }); | |
| 1331 | - }, | |
| 1332 | - }, | |
| 1333 | - | |
| 1334 | - async fetch(req) { | |
| 1335 | - const url = new URL(req.url); | |
| 1336 | - | |
| 1337 | - // Bare /<owner>/<repo>.git (no sub-path) is what someone gets when | |
| 1338 | - // they paste the clone URL into a browser. Without intervention our | |
| 1339 | - // proxy hands it to Forgejo, which renders its own repo page — | |
| 1340 | - // Forgejo's chrome leaks onto tdd.md. Redirect to the clean URL | |
| 1341 | - // so the visitor lands on our Bun-native scoreboard instead. Real | |
| 1342 | - // git operations always have sub-paths (/info/refs, /git-upload-pack, | |
| 1343 | - // /objects/...) and continue to be proxied below. | |
| 1344 | - const bareGitUrl = url.pathname.match( | |
| 1345 | - /^\/([A-Za-z0-9][A-Za-z0-9-]*)\/([A-Za-z0-9][A-Za-z0-9._-]*)\.git\/?$/, | |
| 1346 | - ); | |
| 1347 | - if (bareGitUrl) { | |
| 1348 | - return new Response(null, { | |
| 1349 | - status: 302, | |
| 1350 | - headers: { Location: `/${bareGitUrl[1]}/${bareGitUrl[2]}` }, | |
| 1351 | - }); | |
| 1352 | - } | |
| 1353 | - | |
| 1354 | - // Git smart-HTTP and dumb-HTTP — proxy raw to Forgejo. | |
| 1355 | - if (isGitProtocol(url.pathname, url.searchParams)) { | |
| 1356 | - return proxyToForgejo(req, url.pathname + url.search); | |
| 1357 | - } | |
| 1358 | - | |
| 1359 | - // Bare repo URL: /<owner>/<repo> — render Bun-native view via Forgejo API. | |
| 1360 | - // Two segments only, no trailing path. Reserved top-level paths are | |
| 1361 | - // already matched by explicit routes above, so they never reach here. | |
| 1362 | - const repoMatch = url.pathname.match(/^\/([A-Za-z0-9][A-Za-z0-9-]*)\/([A-Za-z0-9][A-Za-z0-9._-]*)\/?$/); | |
| 1363 | - if (repoMatch) { | |
| 1364 | - const viewer = await getViewer(req); | |
| 1365 | - return renderRepoView(repoMatch[1]!, repoMatch[2]!, viewer); | |
| 1366 | - } | |
| 1367 | - | |
| 1368 | - const html = await renderNotFound(url.pathname); | |
| 1369 | - return htmlResponse(html, 404); | |
| 1370 | - }, | |
| 1371 | - | |
| 1372 | - error(err) { | |
| 1373 | - console.error(err); | |
| 1374 | - return new Response("internal error", { status: 500 }); | |
| 1375 | - }, | |
| 1376 | -}); | |
| 1377 | - | |
| 1378 | -console.log(`tdd.md → ${server.url}`); | |