syntaxai/tdd.md · commit ade29aa

SAMA refactor: cXX_*.ts file-prefix convention across src/

Adopts the Spalder Application Module Architecture (matching the Rust
monorepo + ~/Documents/snowplaza-info) so files sort by layer with
alphabetical = dependency-direction:

  c11_server.ts        entry: env + Bun.serve(createApp())
  c13_database.ts      SQLite (runs + projects)
  c14_forgejo.ts       Forgejo HTTP + proxy
  c14_github.ts        GitHub OAuth + raw .tdd-md.json fetch
  c21_app.ts           routes + appFetch fallback + appError
  c31_blog.ts          ALL_POSTS registry
  c31_commits.ts       parseCommit + computeProgress
  c31_games.ts         kata loader
  c31_guides.ts        ALL_GUIDES registry
  c31_project_config.ts  .tdd-md.json schema + parser
  c31_reports_demo.ts  synthetic /reports/demo dataset
  c32_judge.ts         kata-judging logic
  c32_session.ts       HMAC session + cookie helpers
  c51_render.ts        page chrome + every body builder

Lower layers never import from higher (verifiable with
`grep -rE 'from "\./c[5-9]' src/c1*.ts src/c2*.ts src/c3*.ts` → empty).
Imports now carry the explicit `.ts` extension. The Bun.serve route
literal stays inline inside createApp(port) so path-parameter inference
flows through to handler types.

No behavioural change — every public route still returns its previous
status code (verified: 26 routes 200/302/404 as expected).

Co-Authored-By: Claude Opus 4.7 (1M context) <[email protected]>
author
syntaxai <[email protected]>
date
2026-05-08 12:09:09 +01:00
parent
559b0bf
commit
ade29aa6e3e797fcfcfe1680023b69205a704e17

32 files changed · +3418 −3330

modified Containerfile +1 −1
@@ -25,4 +25,4 @@ EXPOSE 3000
2525 # external probes (cloudflared upstream + the deploy script's /healthz
2626 # poll) already cover liveness.
2727
28-CMD ["bun", "src/server.ts"]
28+CMD ["bun", "src/c11_server.ts"]
modified content/games/fizzbuzz/spec.ts +1 −1
@@ -1,4 +1,4 @@
1-import type { Game } from "../../../src/games";
1+import type { Game } from "../../../src/c31_games.ts";
22
33 export const spec: Game = {
44 id: "fizzbuzz",
modified content/games/string-calc/spec.ts +1 −1
@@ -1,4 +1,4 @@
1-import type { Game } from "../../../src/games";
1+import type { Game } from "../../../src/c31_games.ts";
22
33 export const spec: Game = {
44 id: "string-calc",
modified package.json +3 −3
@@ -2,10 +2,10 @@
22 "name": "tdd.md",
33 "private": true,
44 "type": "module",
5- "module": "src/server.ts",
5+ "module": "src/c11_server.ts",
66 "scripts": {
7- "dev": "bun --hot src/server.ts",
8- "start": "bun src/server.ts"
7+ "dev": "bun --hot src/c11_server.ts",
8+ "start": "bun src/c11_server.ts"
99 },
1010 "dependencies": {
1111 "marked": "^14.1.4"
added src/c11_server.ts +10 −0
@@ -0,0 +1,10 @@
1+// c11 — server entry: env + Bun.serve startup. No route logic, no SQL,
2+// no HTML. The route table, fallback fetch, and error handler live in
3+// c21_app.ts; this file just reads PORT and asks createApp() to bind.
4+
5+import { createApp } from "./c21_app.ts";
6+
7+const port = Number(process.env.PORT ?? 3000);
8+const server = createApp(port);
9+
10+console.log(`tdd.md → ${server.url}`);
added src/c13_database.ts +214 −0
@@ -0,0 +1,214 @@
1+import { Database } from "bun:sqlite";
2+import type { ProjectConfig, TestRunner } from "./c31_project_config.ts";
3+
4+const DB_PATH = process.env.TDD_DB_PATH ?? ":memory:";
5+
6+let db: Database | null = null;
7+
8+const getDb = (): Database => {
9+ if (db) return db;
10+ db = new Database(DB_PATH, { create: true });
11+ db.exec(`
12+ CREATE TABLE IF NOT EXISTS runs (
13+ id INTEGER PRIMARY KEY AUTOINCREMENT,
14+ owner TEXT NOT NULL,
15+ repo TEXT NOT NULL,
16+ head_sha TEXT NOT NULL,
17+ judged_at INTEGER NOT NULL,
18+ verdict_json TEXT NOT NULL
19+ );
20+ CREATE INDEX IF NOT EXISTS idx_runs_owner_repo
21+ ON runs(owner, repo, judged_at DESC);
22+
23+ CREATE TABLE IF NOT EXISTS projects (
24+ id INTEGER PRIMARY KEY AUTOINCREMENT,
25+ registered_by TEXT NOT NULL,
26+ repo_owner TEXT NOT NULL,
27+ repo_name TEXT NOT NULL,
28+ test_runner TEXT NOT NULL DEFAULT 'none',
29+ tracked_branches TEXT NOT NULL,
30+ display_name TEXT,
31+ team TEXT,
32+ registered_at INTEGER NOT NULL,
33+ status TEXT NOT NULL DEFAULT 'active',
34+ UNIQUE(repo_owner, repo_name)
35+ );
36+ CREATE INDEX IF NOT EXISTS idx_projects_registered_by
37+ ON projects(registered_by);
38+ `);
39+ return db;
40+};
41+
42+export type Mode = "strict" | "pragmatic" | "learning";
43+
44+export interface StepVerdict {
45+ stepId: string;
46+ redSha: string | null;
47+ greenSha: string | null;
48+ redFailed: boolean | null;
49+ greenPassed: boolean | null;
50+ // Whether the kata's authoritative hidden tests pass against the agent's
51+ // implementation at the green commit. null when no hidden tests exist
52+ // for the step (unknown kata, or step not registered with the spec).
53+ hiddenPassed: boolean | null;
54+ status:
55+ | "verified"
56+ | "discipline-only"
57+ | "no-green"
58+ | "red-did-not-fail"
59+ | "green-did-not-pass"
60+ | "hidden-tests-failed"
61+ | "test-deleted"
62+ // Trace-only mode: tests not executed, only commit discipline checked.
63+ // Used when test_runner: "none" — language-agnostic, useful as a
64+ // CI gate on real projects where Bun can't run the test suite.
65+ | "trace-verified"
66+ | "trace-tests-shrunk";
67+ scoreDelta: number;
68+ // Coach-style explanation of the verdict — what happened, why the score
69+ // is what it is, and (when relevant) how to improve next time.
70+ explanation: string;
71+}
72+
73+export interface RefactorVerdict {
74+ sha: string;
75+ stepId: string | null;
76+ testsPassed: boolean;
77+ scoreDelta: number;
78+ explanation: string;
79+}
80+
81+export interface Verdict {
82+ headSha: string;
83+ mode: Mode;
84+ steps: StepVerdict[];
85+ refactors: RefactorVerdict[];
86+ totalScore: number;
87+ judgedAt: number;
88+}
89+
90+export const saveRun = (owner: string, repo: string, verdict: Verdict): void => {
91+ getDb().run(
92+ `INSERT INTO runs (owner, repo, head_sha, judged_at, verdict_json) VALUES (?, ?, ?, ?, ?)`,
93+ [owner, repo, verdict.headSha, verdict.judgedAt, JSON.stringify(verdict)],
94+ );
95+};
96+
97+export const latestRun = (owner: string, repo: string): Verdict | null => {
98+ const row = getDb()
99+ .query<{ verdict_json: string }, [string, string]>(
100+ `SELECT verdict_json FROM runs WHERE owner = ? AND repo = ? ORDER BY judged_at DESC LIMIT 1`,
101+ )
102+ .get(owner, repo);
103+ if (!row) return null;
104+ return JSON.parse(row.verdict_json) as Verdict;
105+};
106+
107+export interface ProjectRow {
108+ id: number;
109+ registeredBy: string;
110+ repoOwner: string;
111+ repoName: string;
112+ testRunner: TestRunner;
113+ trackedBranches: string[];
114+ displayName: string | null;
115+ team: string | null;
116+ registeredAt: number;
117+ status: "active" | "paused";
118+}
119+
120+interface ProjectDbRow {
121+ id: number;
122+ registered_by: string;
123+ repo_owner: string;
124+ repo_name: string;
125+ test_runner: string;
126+ tracked_branches: string;
127+ display_name: string | null;
128+ team: string | null;
129+ registered_at: number;
130+ status: string;
131+}
132+
133+const rowToProject = (r: ProjectDbRow): ProjectRow => ({
134+ id: r.id,
135+ registeredBy: r.registered_by,
136+ repoOwner: r.repo_owner,
137+ repoName: r.repo_name,
138+ testRunner: (r.test_runner === "bun" ? "bun" : "none") as TestRunner,
139+ trackedBranches: JSON.parse(r.tracked_branches) as string[],
140+ displayName: r.display_name,
141+ team: r.team,
142+ registeredAt: r.registered_at,
143+ status: r.status === "paused" ? "paused" : "active",
144+});
145+
146+// Inserts or updates a project. Re-registering the same repo refreshes
147+// its config (test_runner, tracked_branches, display_name, team) without
148+// duplicating the row. Returns the stored project.
149+export const upsertProject = (
150+ registeredBy: string,
151+ repoOwner: string,
152+ repoName: string,
153+ config: ProjectConfig,
154+): ProjectRow => {
155+ const now = Date.now();
156+ const branches = JSON.stringify(config.tracked_branches);
157+ const display = config.display_name ?? null;
158+ const team = config.team ?? null;
159+ getDb().run(
160+ `INSERT INTO projects (registered_by, repo_owner, repo_name, test_runner, tracked_branches, display_name, team, registered_at, status)
161+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'active')
162+ ON CONFLICT(repo_owner, repo_name) DO UPDATE SET
163+ test_runner = excluded.test_runner,
164+ tracked_branches = excluded.tracked_branches,
165+ display_name = excluded.display_name,
166+ team = excluded.team,
167+ status = 'active'`,
168+ [registeredBy, repoOwner, repoName, config.test_runner, branches, display, team, now],
169+ );
170+ const row = getDb()
171+ .query<ProjectDbRow, [string, string]>(
172+ `SELECT * FROM projects WHERE repo_owner = ? AND repo_name = ?`,
173+ )
174+ .get(repoOwner, repoName);
175+ if (!row) throw new Error("project upsert returned no row");
176+ return rowToProject(row);
177+};
178+
179+export const getProject = (repoOwner: string, repoName: string): ProjectRow | null => {
180+ const row = getDb()
181+ .query<ProjectDbRow, [string, string]>(
182+ `SELECT * FROM projects WHERE repo_owner = ? AND repo_name = ?`,
183+ )
184+ .get(repoOwner, repoName);
185+ return row ? rowToProject(row) : null;
186+};
187+
188+export const listActiveProjects = (): ProjectRow[] => {
189+ const rows = getDb()
190+ .query<ProjectDbRow, []>(
191+ `SELECT * FROM projects WHERE status = 'active' ORDER BY registered_at DESC`,
192+ )
193+ .all();
194+ return rows.map(rowToProject);
195+};
196+
197+// Latest verdict per (owner, repo) across all agents — drives the
198+// leaderboard and the /agents index.
199+export const allLatestRuns = (): { owner: string; repo: string; verdict: Verdict }[] => {
200+ const rows = getDb()
201+ .query<{ owner: string; repo: string; verdict_json: string }, []>(
202+ `SELECT owner, repo, verdict_json FROM runs r1
203+ WHERE judged_at = (
204+ SELECT MAX(judged_at) FROM runs r2
205+ WHERE r2.owner = r1.owner AND r2.repo = r1.repo
206+ )`,
207+ )
208+ .all();
209+ return rows.map((r) => ({
210+ owner: r.owner,
211+ repo: r.repo,
212+ verdict: JSON.parse(r.verdict_json) as Verdict,
213+ }));
214+};
added src/c14_forgejo.ts +345 −0
@@ -0,0 +1,345 @@
1+// c14 — secondary I/O: HTTP client to the local Forgejo instance. Owns
2+// every URL reachable at git.tdd.md (admin API, user repos, raw git
3+// protocol, webhook setup) plus the proxy that forwards git-protocol
4+// requests through tdd.md to keep the public hostname uniform.
5+
6+// Internal URL — Bun container talks to Forgejo via host.containers.internal
7+// (rootless podman's standard hostname for the host network). Falls back to
8+// the public URL for local dev.
9+export const FORGEJO_URL = process.env.FORGEJO_URL ?? "https://git.tdd.md";
10+const ADMIN_TOKEN = process.env.FORGEJO_ADMIN_TOKEN ?? "";
11+
12+const adminAuth = (): HeadersInit => ({
13+ Authorization: `token ${ADMIN_TOKEN}`,
14+});
15+
16+const userAuth = (username: string, password: string): HeadersInit => ({
17+ Authorization: `Basic ${btoa(`${username}:${password}`)}`,
18+});
19+
20+export const isConfigured = (): boolean => ADMIN_TOKEN !== "";
21+
22+export const userExists = async (username: string): Promise<boolean> => {
23+ const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}`, {
24+ headers: adminAuth(),
25+ });
26+ return res.status === 200;
27+};
28+
29+export const createUser = async (params: {
30+ username: string;
31+ email: string;
32+ password: string;
33+ fullName?: string;
34+}): Promise<void> => {
35+ const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users`, {
36+ method: "POST",
37+ headers: { ...adminAuth(), "Content-Type": "application/json" },
38+ body: JSON.stringify({
39+ username: params.username,
40+ email: params.email,
41+ password: params.password,
42+ full_name: params.fullName ?? params.username,
43+ must_change_password: false,
44+ send_notify: false,
45+ }),
46+ });
47+ if (!res.ok) {
48+ const text = await res.text();
49+ throw new Error(`forgejo createUser ${res.status}: ${text}`);
50+ }
51+};
52+
53+export const setUserPassword = async (username: string, password: string): Promise<void> => {
54+ const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users/${encodeURIComponent(username)}`, {
55+ method: "PATCH",
56+ headers: { ...adminAuth(), "Content-Type": "application/json" },
57+ body: JSON.stringify({
58+ password,
59+ must_change_password: false,
60+ source_id: 0,
61+ login_name: username,
62+ }),
63+ });
64+ if (!res.ok) {
65+ const text = await res.text();
66+ throw new Error(`forgejo setUserPassword ${res.status}: ${text}`);
67+ }
68+};
69+
70+export const repoExists = async (owner: string, repo: string): Promise<boolean> => {
71+ const res = await fetch(`${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}`, {
72+ headers: adminAuth(),
73+ });
74+ return res.status === 200;
75+};
76+
77+// Creates a per-repo webhook that fires on push events. The webhook
78+// posts to /api/forgejo/webhook on tdd.md, signed with WEBHOOK_SECRET so
79+// our endpoint can verify it. Idempotent — checks for an existing hook
80+// with the same URL before creating.
81+export const ensureRepoWebhook = async (params: {
82+ owner: string;
83+ repo: string;
84+ webhookUrl: string;
85+ secret: string;
86+}): Promise<void> => {
87+ const base = `${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(params.owner)}/${encodeURIComponent(params.repo)}/hooks`;
88+ const listRes = await fetch(base, { headers: adminAuth() });
89+ if (listRes.ok) {
90+ const hooks = (await listRes.json()) as { id: number; config: { url?: string } }[];
91+ const exists = hooks.some((h) => h.config?.url === params.webhookUrl);
92+ if (exists) return;
93+ }
94+ const res = await fetch(base, {
95+ method: "POST",
96+ headers: { ...adminAuth(), "Content-Type": "application/json" },
97+ body: JSON.stringify({
98+ type: "forgejo",
99+ active: true,
100+ events: ["push"],
101+ config: {
102+ url: params.webhookUrl,
103+ content_type: "json",
104+ secret: params.secret,
105+ },
106+ }),
107+ });
108+ if (!res.ok) {
109+ const text = await res.text();
110+ throw new Error(`forgejo ensureRepoWebhook ${res.status}: ${text}`);
111+ }
112+};
113+
114+export const createRepoForUser = async (params: {
115+ username: string;
116+ name: string;
117+ description?: string;
118+}): Promise<void> => {
119+ const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users/${encodeURIComponent(params.username)}/repos`, {
120+ method: "POST",
121+ headers: { ...adminAuth(), "Content-Type": "application/json" },
122+ body: JSON.stringify({
123+ name: params.name,
124+ description: params.description ?? "",
125+ // Private by default — the source is the agent's, not ours to
126+ // publish. Verdicts still render on tdd.md via admin-mediated
127+ // API calls; clones require the agent's push token.
128+ private: true,
129+ // No auto_init: the agent's first push becomes the genuine initial
130+ // commit. An admin-authored "Initial commit" would muddle the phase
131+ // log and break attribution on the agent's repo page.
132+ auto_init: false,
133+ default_branch: "main",
134+ }),
135+ });
136+ if (!res.ok) {
137+ const text = await res.text();
138+ throw new Error(`forgejo createRepo ${res.status}: ${text}`);
139+ }
140+};
141+
142+interface TokenInfo {
143+ id: number;
144+ name: string;
145+}
146+
147+const listTokens = async (username: string, password: string): Promise<TokenInfo[]> => {
148+ const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}/tokens`, {
149+ headers: userAuth(username, password),
150+ });
151+ if (!res.ok) return [];
152+ return (await res.json()) as TokenInfo[];
153+};
154+
155+const deleteToken = async (username: string, password: string, tokenId: number): Promise<void> => {
156+ await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}/tokens/${tokenId}`, {
157+ method: "DELETE",
158+ headers: userAuth(username, password),
159+ });
160+};
161+
162+export const createPushToken = async (params: {
163+ username: string;
164+ password: string;
165+ name: string;
166+}): Promise<string> => {
167+ // Revoke any existing tokens with the same name so re-registration always
168+ // returns a fresh one and the previous one is invalidated.
169+ const existing = await listTokens(params.username, params.password);
170+ for (const t of existing) {
171+ if (t.name === params.name) {
172+ await deleteToken(params.username, params.password, t.id);
173+ }
174+ }
175+
176+ const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(params.username)}/tokens`, {
177+ method: "POST",
178+ headers: { ...userAuth(params.username, params.password), "Content-Type": "application/json" },
179+ body: JSON.stringify({
180+ name: params.name,
181+ // write:repository for the push; read:user so the agent can
182+ // verify their own identity against tdd.md's self-service
183+ // endpoints (e.g. POST /api/agents/:name/visibility).
184+ scopes: ["write:repository", "read:user"],
185+ }),
186+ });
187+ if (!res.ok) {
188+ const text = await res.text();
189+ throw new Error(`forgejo createPushToken ${res.status}: ${text}`);
190+ }
191+ const data = (await res.json()) as { sha1: string };
192+ return data.sha1;
193+};
194+
195+const randomPassword = (): string =>
196+ Array.from(crypto.getRandomValues(new Uint8Array(32)))
197+ .map((b) => b.toString(16).padStart(2, "0"))
198+ .join("");
199+
200+export interface AgentRegistration {
201+ username: string;
202+ pushToken: string;
203+ repoCloneUrl: string;
204+ isNew: boolean;
205+}
206+
207+// Idempotent: if the user exists, reset their password and rotate the push
208+// token. Always also ensures the kata repo exists.
209+export const registerAgent = async (params: {
210+ username: string;
211+ email: string;
212+ fullName?: string;
213+ kata?: string;
214+}): Promise<AgentRegistration> => {
215+ const password = randomPassword();
216+ const isNew = !(await userExists(params.username));
217+
218+ if (isNew) {
219+ await createUser({
220+ username: params.username,
221+ email: params.email,
222+ password,
223+ fullName: params.fullName,
224+ });
225+ } else {
226+ await setUserPassword(params.username, password);
227+ }
228+
229+ const pushToken = await createPushToken({
230+ username: params.username,
231+ password,
232+ name: "tdd-md-push",
233+ });
234+
235+ const kata = params.kata ?? "string-calc";
236+ if (!(await repoExists(params.username, kata))) {
237+ await createRepoForUser({
238+ username: params.username,
239+ name: kata,
240+ description: `${params.username}'s submission for the ${kata} kata`,
241+ });
242+ }
243+
244+ const baseUrl = process.env.BASE_URL ?? "https://tdd.md";
245+ const webhookSecret = process.env.WEBHOOK_SECRET;
246+ if (webhookSecret) {
247+ try {
248+ await ensureRepoWebhook({
249+ owner: params.username,
250+ repo: kata,
251+ webhookUrl: `${baseUrl}/api/forgejo/webhook`,
252+ secret: webhookSecret,
253+ });
254+ } catch (err) {
255+ // Webhook is convenience; registration must still succeed without it.
256+ console.error(`webhook setup failed for ${params.username}/${kata}:`, err);
257+ }
258+ }
259+
260+ return {
261+ username: params.username,
262+ pushToken,
263+ repoCloneUrl: `${baseUrl}/${params.username}/${kata}.git`,
264+ isNew,
265+ };
266+};
267+
268+// ---------------------------------------------------------------------
269+// Read-side helpers used by c21 handlers + c51 rendering.
270+// ---------------------------------------------------------------------
271+
272+export interface ForgejoUserSummary {
273+ id: number;
274+ login: string;
275+ is_admin?: boolean;
276+ // Forgejo visibility levels: "public" | "limited" | "private".
277+ // Anything other than "public" is hidden from anonymous tdd.md visitors.
278+ visibility?: string;
279+}
280+
281+// Admin-token-authenticated headers for API calls. Agent repos are
282+// private by default; rendering the verdict page must still work. We
283+// proxy the data through the admin identity, never exposing the source
284+// or push protocol publicly.
285+export const adminApiHeaders = (): HeadersInit => {
286+ const token = process.env.FORGEJO_ADMIN_TOKEN;
287+ return token ? { Authorization: `token ${token}` } : {};
288+};
289+
290+// Single-user visibility lookup for /:owner/:repo and /agents/:name.
291+// Returns the raw Forgejo string (or null if the user doesn't exist).
292+export const getUserVisibility = async (name: string): Promise<string | null> => {
293+ const r = await fetch(
294+ `${FORGEJO_URL}/api/v1/users/${encodeURIComponent(name)}`,
295+ { headers: adminApiHeaders() },
296+ );
297+ if (!r.ok) return null;
298+ const u = (await r.json()) as ForgejoUserSummary;
299+ return u.visibility ?? "public";
300+};
301+
302+const HOP_BY_HOP = [
303+ "host",
304+ "connection",
305+ "keep-alive",
306+ "transfer-encoding",
307+ "upgrade",
308+ "proxy-authorization",
309+ "proxy-connection",
310+ "te",
311+ "trailer",
312+];
313+
314+// Forward git protocol + Forgejo API/asset requests to Forgejo via the host
315+// network. Lets us serve everything under tdd.md (GitHub-style) without
316+// exposing git.tdd.md externally.
317+export const proxyToForgejo = async (req: Request, pathAndQuery: string): Promise<Response> => {
318+ const upstream = `${FORGEJO_URL}${pathAndQuery}`;
319+ const headers = new Headers(req.headers);
320+ for (const h of HOP_BY_HOP) headers.delete(h);
321+ headers.set("X-Forwarded-Host", "tdd.md");
322+ headers.set("X-Forwarded-Proto", "https");
323+ headers.set("X-Forwarded-For", req.headers.get("cf-connecting-ip") ?? "0.0.0.0");
324+
325+ let body: ArrayBuffer | undefined;
326+ if (req.method !== "GET" && req.method !== "HEAD") {
327+ body = await req.arrayBuffer();
328+ }
329+
330+ const upstreamRes = await fetch(upstream, {
331+ method: req.method,
332+ headers,
333+ body,
334+ redirect: "manual",
335+ });
336+
337+ const responseHeaders = new Headers(upstreamRes.headers);
338+ for (const h of HOP_BY_HOP) responseHeaders.delete(h);
339+
340+ return new Response(upstreamRes.body, {
341+ status: upstreamRes.status,
342+ statusText: upstreamRes.statusText,
343+ headers: responseHeaders,
344+ });
345+};
added src/c14_github.ts +122 −0
@@ -0,0 +1,122 @@
1+// c14 — secondary I/O: HTTP clients to GitHub. Two concerns under one roof:
2+// 1. OAuth flow for sign-in (used by /auth/github/start + callback).
3+// 2. Raw-content fetch of `.tdd-md.json` from a public repo's default
4+// branch, for project onboarding.
5+// Both talk to GitHub; both are pure HTTP, no in-process state.
6+
7+import {
8+ PROJECT_CONFIG_PATH,
9+ parseProjectConfig,
10+ type ProjectConfig,
11+} from "./c31_project_config.ts";
12+
13+const CLIENT_ID = process.env.GITHUB_CLIENT_ID ?? "";
14+const CLIENT_SECRET = process.env.GITHUB_CLIENT_SECRET ?? "";
15+
16+export interface GithubUser {
17+ login: string;
18+ id: number;
19+ email: string | null;
20+ avatar_url: string;
21+ name: string | null;
22+}
23+
24+export interface GithubEmail {
25+ email: string;
26+ primary: boolean;
27+ verified: boolean;
28+ visibility: string | null;
29+}
30+
31+export const isConfigured = (): boolean => CLIENT_ID !== "" && CLIENT_SECRET !== "";
32+
33+export const authorizeUrl = (state: string, redirectUri: string): string => {
34+ const params = new URLSearchParams({
35+ client_id: CLIENT_ID,
36+ redirect_uri: redirectUri,
37+ scope: "read:user user:email",
38+ state,
39+ allow_signup: "true",
40+ });
41+ return `https://github.com/login/oauth/authorize?${params}`;
42+};
43+
44+export const exchangeCode = async (code: string, redirectUri: string): Promise<string> => {
45+ const res = await fetch("https://github.com/login/oauth/access_token", {
46+ method: "POST",
47+ headers: {
48+ Accept: "application/json",
49+ "Content-Type": "application/json",
50+ },
51+ body: JSON.stringify({
52+ client_id: CLIENT_ID,
53+ client_secret: CLIENT_SECRET,
54+ code,
55+ redirect_uri: redirectUri,
56+ }),
57+ });
58+ if (!res.ok) {
59+ throw new Error(`github token exchange failed: ${res.status}`);
60+ }
61+ const data = (await res.json()) as { access_token?: string; error?: string; error_description?: string };
62+ if (!data.access_token) {
63+ throw new Error(`github token exchange returned no token: ${data.error_description ?? data.error ?? "unknown"}`);
64+ }
65+ return data.access_token;
66+};
67+
68+export const fetchUser = async (accessToken: string): Promise<GithubUser> => {
69+ const res = await fetch("https://api.github.com/user", {
70+ headers: {
71+ Authorization: `token ${accessToken}`,
72+ Accept: "application/vnd.github+json",
73+ "User-Agent": "tdd.md",
74+ },
75+ });
76+ if (!res.ok) throw new Error(`github user fetch failed: ${res.status}`);
77+ return (await res.json()) as GithubUser;
78+};
79+
80+export const fetchPrimaryEmail = async (accessToken: string): Promise<string | null> => {
81+ const res = await fetch("https://api.github.com/user/emails", {
82+ headers: {
83+ Authorization: `token ${accessToken}`,
84+ Accept: "application/vnd.github+json",
85+ "User-Agent": "tdd.md",
86+ },
87+ });
88+ if (!res.ok) return null;
89+ const emails = (await res.json()) as GithubEmail[];
90+ const verified = emails.filter((e) => e.verified);
91+ return verified.find((e) => e.primary)?.email ?? verified[0]?.email ?? null;
92+};
93+
94+// Pulls .tdd-md.json from a public GitHub repo's default branch via the
95+// raw-content host. No auth — public-repo only for now (private repos
96+// land when we install a GitHub App, deferred to a later sliver).
97+export const fetchProjectConfig = async (
98+ repoOwner: string,
99+ repoName: string,
100+): Promise<ProjectConfig> => {
101+ const url = `https://raw.githubusercontent.com/${encodeURIComponent(repoOwner)}/${encodeURIComponent(repoName)}/HEAD/${PROJECT_CONFIG_PATH}`;
102+ const res = await fetch(url, {
103+ headers: { Accept: "application/json", "User-Agent": "tdd.md" },
104+ });
105+ if (res.status === 404) {
106+ throw new Error(
107+ `${PROJECT_CONFIG_PATH} not found in ${repoOwner}/${repoName} on the default branch (or the repo is private; private repos aren't supported yet).`,
108+ );
109+ }
110+ if (!res.ok) {
111+ throw new Error(
112+ `Couldn't fetch ${PROJECT_CONFIG_PATH} from ${repoOwner}/${repoName}: HTTP ${res.status}`,
113+ );
114+ }
115+ let parsed: unknown;
116+ try {
117+ parsed = await res.json();
118+ } catch {
119+ throw new Error(`${PROJECT_CONFIG_PATH} in ${repoOwner}/${repoName} isn't valid JSON`);
120+ }
121+ return parseProjectConfig(parsed);
122+};
added src/c21_app.ts +1176 −0
@@ -0,0 +1,1176 @@
1+// c21 — handlers: the route table + fallback fetch. Composes the lower
2+// layers (c13 db, c14 secondary I/O, c31 models, c32 logic, c51 render)
3+// into the HTTP surface served by Bun.serve in c11_server.
4+
5+import {
6+ renderPage,
7+ renderNotFound,
8+ htmlResponse,
9+ errorPage,
10+ phaseSpan,
11+ relativeTime,
12+ reportsLandingMd,
13+ execSummaryMd,
14+ agentDrilldownMd,
15+ testsOverviewMd,
16+ projectsLandingMd,
17+ projectRegisterMd,
18+ projectDetailMd,
19+} from "./c51_render.ts";
20+import * as github from "./c14_github.ts";
21+import * as forgejo from "./c14_forgejo.ts";
22+import {
23+ FORGEJO_URL,
24+ adminApiHeaders,
25+ getUserVisibility,
26+ proxyToForgejo,
27+ type ForgejoUserSummary,
28+} from "./c14_forgejo.ts";
29+import { parseCommit, computeProgress } from "./c31_commits.ts";
30+import { loadGame, listGames } from "./c31_games.ts";
31+import { ALL_POSTS } from "./c31_blog.ts";
32+import { ALL_GUIDES } from "./c31_guides.ts";
33+import { DEMO_REPORTS } from "./c31_reports_demo.ts";
34+import { parseRepoIdentifier } from "./c31_project_config.ts";
35+import { fetchProjectConfig } from "./c14_github.ts";
36+import { judge } from "./c32_judge.ts";
37+import {
38+ SESSION_TTL_SEC,
39+ getViewer,
40+ randomHex,
41+ parseCookies,
42+ signSession,
43+ sessionCookieHeader,
44+ timingSafeEqual,
45+ hmacSha256Hex,
46+} from "./c32_session.ts";
47+import {
48+ latestRun,
49+ allLatestRuns,
50+ listActiveProjects,
51+ getProject,
52+ upsertProject,
53+} from "./c13_database.ts";
54+
55+const HOME_MD = "./content/home.md";
56+const GAME_DIR = "./content/games";
57+
58+const BASE_URL = process.env.BASE_URL ?? "https://tdd.md";
59+const CALLBACK_URL = `${BASE_URL}/auth/github/callback`;
60+
61+const HOME_DESCRIPTION =
62+ "Test-driven development for agentic coding. Your AI agent practices on scored katas; the judge replays its commits against hidden tests and posts a public verdict on the discipline.";
63+
64+const homeBody = await Bun.file(HOME_MD).text();
65+const HOME_HTML = await renderPage({
66+ title: "tdd.md — TDD for agentic coding",
67+ description: HOME_DESCRIPTION,
68+ bodyMarkdown: homeBody,
69+ active: "home",
70+ jsonLd: {
71+ "@context": "https://schema.org",
72+ "@type": "WebSite",
73+ name: "tdd.md",
74+ url: "https://tdd.md",
75+ description: HOME_DESCRIPTION,
76+ },
77+});
78+
79+const ALL_GAMES = await listGames();
80+
81+const gamesIndexBody = `# games
82+
83+${ALL_GAMES.length === 0
84+ ? "_No katas registered yet._"
85+ : `| kata | description | steps |\n|---|---|---|\n${ALL_GAMES.map(
86+ (g) => `| [${g.id}](/games/${g.id}) | ${g.description} | ${g.steps.length} |`,
87+ ).join("\n")}`
88+}
89+
90+> Ready to play? [Register your agent →](/agents/register)
91+> Using a specific agent? See the [agent-specific guides](/guides) — Claude Code, Cursor, Aider.
92+`;
93+
94+const GAMES_INDEX_HTML = await renderPage({
95+ title: "TDD katas — tdd.md",
96+ description:
97+ "Browse the TDD katas. Pick a challenge, push red→green→refactor commits, and earn a public verdict graded against hidden tests.",
98+ bodyMarkdown: gamesIndexBody,
99+ ogPath: "https://tdd.md/games",
100+ active: "games",
101+});
102+
103+const renderKata = async (kata: string): Promise<Response | null> => {
104+ const file = Bun.file(`${GAME_DIR}/${kata}/spec.md`);
105+ if (!(await file.exists())) return null;
106+ const md = await file.text();
107+ // Pull the kata's own description from spec.ts when available — it's
108+ // the canonical short copy (rendered on /games + sitemap previews).
109+ let description: string | undefined;
110+ try {
111+ const game = await loadGame(kata);
112+ description = game.description;
113+ } catch {
114+ // unknown kata; use the site default
115+ }
116+ const html = await renderPage({
117+ title: `${kata} TDD kata — tdd.md`,
118+ description,
119+ bodyMarkdown: md,
120+ ogPath: `https://tdd.md/games/${kata}`,
121+ active: "games",
122+ });
123+ return new Response(html, { headers: { "Content-Type": "text/html; charset=utf-8" } });
124+};
125+
126+const renderAgentsIndex = async (): Promise<Response> => {
127+ let users: ForgejoUserSummary[] = [];
128+ const adminToken = process.env.FORGEJO_ADMIN_TOKEN;
129+ if (adminToken) {
130+ const r = await fetch(`${FORGEJO_URL}/api/v1/admin/users?limit=200`, {
131+ headers: adminApiHeaders(),
132+ });
133+ if (r.ok) users = (await r.json()) as ForgejoUserSummary[];
134+ }
135+ // Drop the admin (id 1) and anyone whose visibility isn't "public" —
136+ // private and limited agents stay invisible on the public index.
137+ const agents = users.filter(
138+ (u) => u.id !== 1 && !u.is_admin && (u.visibility ?? "public") === "public",
139+ );
140+
141+ // Per-agent score totals from the latest run per repo.
142+ const allRuns = allLatestRuns();
143+ const totalsByOwner = new Map<string, { score: number; runs: number }>();
144+ for (const r of allRuns) {
145+ const t = totalsByOwner.get(r.owner) ?? { score: 0, runs: 0 };
146+ t.score += r.verdict.totalScore;
147+ t.runs += 1;
148+ totalsByOwner.set(r.owner, t);
149+ }
150+
151+ let body: string;
152+ if (agents.length === 0) {
153+ body = `# agents
154+
155+> No agents registered yet. Be the first.
156+
157+[ Register your agent → ](/agents/register)
158+`;
159+ } else {
160+ const rows = agents
161+ .map((u) => {
162+ const t = totalsByOwner.get(u.login) ?? { score: 0, runs: 0 };
163+ const sign = t.score >= 0 ? "+" : "";
164+ return `| [${u.login}](/agents/${u.login}) | ${t.runs} | ${sign}${t.score} |`;
165+ })
166+ .join("\n");
167+ body = `# agents
168+
169+| agent | attempts | total score |
170+|---|---|---|
171+${rows}
172+
173+[ Register your agent → ](/agents/register)
174+`;
175+ }
176+
177+ const description =
178+ agents.length === 0
179+ ? "AI agents doing test-driven development on tdd.md — registration is open, sign in with GitHub to play."
180+ : `${agents.length} AI ${agents.length === 1 ? "agent" : "agents"} doing test-driven development on tdd.md, scored on red→green discipline against hidden tests for agentic coding.`;
181+
182+ const html = await renderPage({
183+ title: "AI agents on tdd.md",
184+ description,
185+ bodyMarkdown: body,
186+ ogPath: "https://tdd.md/agents",
187+ active: "agents",
188+ });
189+ return htmlResponse(html);
190+};
191+
192+const renderLeaderboard = async (): Promise<Response> => {
193+ // Only show runs whose owner is public. Fetch the user list once
194+ // and build a Set so we can filter without N+1 lookups.
195+ const adminToken = process.env.FORGEJO_ADMIN_TOKEN;
196+ const publicOwners = new Set<string>();
197+ if (adminToken) {
198+ const r = await fetch(`${FORGEJO_URL}/api/v1/admin/users?limit=200`, {
199+ headers: adminApiHeaders(),
200+ });
201+ if (r.ok) {
202+ const users = (await r.json()) as ForgejoUserSummary[];
203+ for (const u of users) {
204+ if ((u.visibility ?? "public") === "public") publicOwners.add(u.login);
205+ }
206+ }
207+ }
208+ const runs = allLatestRuns()
209+ .filter((r) => publicOwners.size === 0 || publicOwners.has(r.owner))
210+ .sort((a, b) => b.verdict.totalScore - a.verdict.totalScore);
211+ let body: string;
212+ if (runs.length === 0) {
213+ body = `# leaderboard
214+
215+> No verdicts yet. The first agent to push a red→green pair lands here.
216+
217+[ Register your agent → ](/agents/register)
218+`;
219+ } else {
220+ const rows = runs
221+ .map((r, i) => {
222+ const sign = r.verdict.totalScore >= 0 ? "+" : "";
223+ const verified = r.verdict.steps.filter((s) => s.status === "verified").length;
224+ return `| ${i + 1} | [${r.owner}](/agents/${r.owner}) | [${r.repo}](/${r.owner}/${r.repo}) | ${sign}${r.verdict.totalScore} | ${verified} |`;
225+ })
226+ .join("\n");
227+ body = `# leaderboard
228+
229+| rank | agent | kata | score | verified steps |
230+|---|---|---|---|---|
231+${rows}
232+`;
233+ }
234+ const description =
235+ runs.length === 0
236+ ? "TDD leaderboard for AI agents on tdd.md — be the first verdict."
237+ : `Top AI agents by TDD score on tdd.md — ${runs.length} ranked ${runs.length === 1 ? "submission" : "submissions"} graded on red→green discipline and hidden test pass rate.`;
238+
239+ const html = await renderPage({
240+ title: "TDD leaderboard — tdd.md",
241+ description,
242+ bodyMarkdown: body,
243+ ogPath: "https://tdd.md/leaderboard",
244+ active: "leaderboard",
245+ });
246+ return htmlResponse(html);
247+};
248+
249+const REGISTER_BODY = `# register
250+
251+> Sign in with GitHub to create your tdd.md agent.
252+
253+## what we ask GitHub for
254+- your username
255+- your primary verified email
256+
257+That's it — no repo access, no anything else.
258+
259+## what you get
260+- a public agent account at \`git.tdd.md/<your-github-name>\`
261+- a push token (shown once)
262+- an empty repo for the first kata, ready to push to
263+
264+[ sign in with github → ](/auth/github/start)
265+`;
266+
267+const REGISTER_HTML = await renderPage({
268+ title: "Register your AI agent — tdd.md",
269+ description:
270+ "Sign in with GitHub to register your AI agent on tdd.md and start solving TDD katas. Public-signup, verified-identity, no extra forms.",
271+ bodyMarkdown: REGISTER_BODY,
272+ ogPath: "https://tdd.md/agents/register",
273+ active: "agents",
274+ noindex: true,
275+});
276+
277+interface ForgejoRepoSummary {
278+ description: string;
279+ clone_url: string;
280+ empty: boolean;
281+ private: boolean;
282+}
283+
284+interface ForgejoCommit {
285+ sha: string;
286+ commit: { message: string; author: { name: string; date: string } };
287+}
288+
289+const renderRepoView = async (
290+ owner: string,
291+ repo: string,
292+ viewer: string | null,
293+): Promise<Response> => {
294+ // Private/limited owners get a 404 to anonymous visitors — but the
295+ // owner themselves (verified via session cookie) can always see
296+ // their own pages.
297+ const ownerVisibility = await getUserVisibility(owner);
298+ if (ownerVisibility !== null && ownerVisibility !== "public" && viewer !== owner) {
299+ const html = await renderNotFound(`/${owner}/${repo}`);
300+ return htmlResponse(html, 404);
301+ }
302+
303+ const repoApi = `${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}`;
304+ const repoRes = await fetch(repoApi, { headers: adminApiHeaders() });
305+ if (repoRes.status === 404) {
306+ const html = await renderNotFound(`/${owner}/${repo}`);
307+ return htmlResponse(html, 404);
308+ }
309+ if (!repoRes.ok) {
310+ const html = await renderPage({
311+ title: `${owner}/${repo} — tdd.md`,
312+ bodyMarkdown: `# ${owner}/${repo}\n\n> repository unavailable`,
313+ });
314+ return htmlResponse(html, 502);
315+ }
316+ const info = (await repoRes.json()) as ForgejoRepoSummary;
317+ const cloneUrl = info.clone_url || `https://tdd.md/${owner}/${repo}.git`;
318+ const isPrivate = info.private === true;
319+
320+ // The repo name is by convention the kata id. If the kata exists, the
321+ // header link is meaningful and we know the total step count.
322+ let totalSteps: number | null = null;
323+ let kataExists = false;
324+ try {
325+ const game = await loadGame(repo);
326+ totalSteps = game.steps.length;
327+ kataExists = true;
328+ } catch {
329+ // Repo isn't a known kata — still render, just without step totals.
330+ }
331+
332+ let commits: ForgejoCommit[] = [];
333+ if (!info.empty) {
334+ const commitsRes = await fetch(`${repoApi}/commits?limit=50&stat=false`, {
335+ headers: adminApiHeaders(),
336+ });
337+ if (commitsRes.ok) commits = (await commitsRes.json()) as ForgejoCommit[];
338+ }
339+ const progress = computeProgress(commits);
340+ const verified = progress.verifiedSteps.size;
341+
342+ let status: string;
343+ if (commits.length === 0) {
344+ status = "awaiting first push";
345+ } else if (totalSteps !== null && verified >= totalSteps) {
346+ status = "kata complete";
347+ } else if (verified > 0) {
348+ status = "in progress";
349+ } else {
350+ status = "no verified steps yet";
351+ }
352+ const stepCounter = totalSteps !== null ? `${verified} / ${totalSteps}` : `${verified} / ?`;
353+
354+ let phaseLog: string;
355+ if (commits.length === 0) {
356+ phaseLog = "_No commits yet — push your first `red:` commit to start the cycle._";
357+ } else {
358+ const rows = commits.map((c) => {
359+ const sha = c.sha.slice(0, 7);
360+ const p = parseCommit(c.commit.message);
361+ const subject = (p.subject || c.commit.message.split("\n")[0] || "").replace(/\|/g, "\\|");
362+ const stepCell = p.step ? `\`${p.step}\`` : "—";
363+ return `| \`${sha}\` | ${phaseSpan(p.phase)} | ${stepCell} | ${subject} | ${relativeTime(c.commit.author.date)} |`;
364+ });
365+ phaseLog = `| sha | phase | step | message | when |\n|---|---|---|---|---|\n${rows.join("\n")}`;
366+ }
367+
368+ const kataLink = kataExists
369+ ? `[\`${repo}\` →](/games/${repo})`
370+ : `\`${repo}\``;
371+ const privateBadge = isPrivate ? ` <span class="muted">[private]</span>` : "";
372+
373+ const verdict = latestRun(owner, repo);
374+ const headSha = commits[0]?.sha ?? null;
375+ const verdictStale = verdict !== null && headSha !== null && verdict.headSha !== headSha;
376+
377+ let scoreSection: string;
378+ if (verdict === null) {
379+ scoreSection = `> Not yet judged. The next push triggers a judge run, or [run the judge now](/api/judge/${owner}/${repo}) (POST).\n\nPhase tally: <span class="red">red ${progress.redCount}</span> · <span class="green">green ${progress.greenCount}</span> · <span class="blue">refactor ${progress.refactorCount}</span>${progress.untaggedCount > 0 ? ` · <span class="muted">untagged ${progress.untaggedCount}</span>` : ""}.`;
380+ } else {
381+ const stale = verdictStale ? ` · <span class="muted">stale — newer commits not yet judged</span>` : "";
382+ const sign = verdict.totalScore >= 0 ? "+" : "";
383+ const statusClass = (status: string): string => {
384+ if (status === "verified") return "green";
385+ if (status === "discipline-only") return "blue";
386+ if (status === "no-green") return "muted";
387+ return "red";
388+ };
389+ const modeLabel = (m: string): string => {
390+ const cls = m === "strict" ? "red" : m === "pragmatic" ? "blue" : "green";
391+ return `<span class="${cls}">${m}</span>`;
392+ };
393+ const rows = verdict.steps.length === 0
394+ ? "_No red→green pairs found yet._"
395+ : `| step | red | green | hidden | status | points | explanation |\n|---|---|---|---|---|---|---|\n` +
396+ verdict.steps.map((s) => {
397+ const cls = statusClass(s.status);
398+ const sign = s.scoreDelta >= 0 ? "+" : "";
399+ const hiddenCell =
400+ s.hiddenPassed === true ? `<span class="green">pass</span>` :
401+ s.hiddenPassed === false ? `<span class="red">fail</span>` :
402+ `<span class="muted">—</span>`;
403+ const explanation = (s.explanation ?? "").replace(/\|/g, "\\|");
404+ return `| \`${s.stepId}\` | \`${s.redSha?.slice(0, 7) ?? "—"}\` | \`${s.greenSha?.slice(0, 7) ?? "—"}\` | ${hiddenCell} | <span class="${cls}">${s.status}</span> | ${sign}${s.scoreDelta} | ${explanation} |`;
405+ }).join("\n");
406+ const refactorRows = (verdict.refactors ?? []).length === 0
407+ ? ""
408+ : `\n\n### refactors\n\n| sha | step | tests | points | explanation |\n|---|---|---|---|---|\n` +
409+ verdict.refactors.map((r) => {
410+ const sign = r.scoreDelta >= 0 ? "+" : "";
411+ const cls = r.testsPassed ? "green" : "red";
412+ const verb = r.testsPassed ? "green" : "broke tests";
413+ const explanation = (r.explanation ?? "").replace(/\|/g, "\\|");
414+ return `| \`${r.sha.slice(0, 7)}\` | ${r.stepId ? `\`${r.stepId}\`` : "—"} | <span class="${cls}">${verb}</span> | ${sign}${r.scoreDelta} | ${explanation} |`;
415+ }).join("\n");
416+ const modeLine = verdict.mode ? `**mode: ${modeLabel(verdict.mode)}** · ` : "";
417+ scoreSection = `${modeLine}**total: ${sign}${verdict.totalScore}** · judged ${relativeTime(new Date(verdict.judgedAt).toISOString())}${stale}\n\n${rows}${refactorRows}`;
418+ }
419+
420+ const body = `# ${owner} · playing ${kataLink}${privateBadge}
421+
422+> ${status}
423+> **${stepCounter}** steps verified
424+
425+## phase log
426+
427+${phaseLog}
428+
429+## score
430+
431+${scoreSection}
432+
433+## clone
434+
435+\`\`\`
436+git clone ${cloneUrl}
437+\`\`\`
438+
439+[← /agents/${owner}](/agents/${owner})${kataExists ? ` · [kata spec →](/games/${repo})` : ""}
440+`;
441+
442+ // Dynamic description tailored to this attempt — gives every agent
443+ // run a unique snippet for search results and social previews instead
444+ // of falling back to the site default.
445+ const totalSnippet =
446+ verdict !== null
447+ ? `, score ${verdict.totalScore >= 0 ? "+" : ""}${verdict.totalScore}`
448+ : "";
449+ const description = kataExists
450+ ? `${owner}'s ${repo} TDD kata attempt on tdd.md — ${verified}${totalSteps !== null ? `/${totalSteps}` : ""} steps verified${totalSnippet}.`
451+ : `${owner}/${repo} on tdd.md — ${commits.length} ${commits.length === 1 ? "commit" : "commits"} in the phase log${totalSnippet}.`;
452+
453+ const html = await renderPage({
454+ title: `${owner} · ${repo}${kataExists ? " TDD kata" : ""} — tdd.md`,
455+ description,
456+ bodyMarkdown: body,
457+ ogPath: `https://tdd.md/${owner}/${repo}`,
458+ active: "agents",
459+ });
460+ return htmlResponse(html);
461+};
462+
463+const isGitProtocol = (pathname: string, search: URLSearchParams): boolean => {
464+ if (pathname.includes(".git/") || pathname.endsWith(".git")) return true;
465+ if (
466+ pathname.endsWith("/info/refs") &&
467+ (search.get("service") === "git-upload-pack" || search.get("service") === "git-receive-pack")
468+ ) {
469+ return true;
470+ }
471+ if (pathname.endsWith("/git-upload-pack") || pathname.endsWith("/git-receive-pack")) {
472+ return true;
473+ }
474+ return false;
475+};
476+
477+// Fallback handler — git-protocol proxy, bare-repo /:owner/:repo view,
478+// and /:owner/:repo.git redirects. Mounted as `fetch` on Bun.serve.
479+const appFetch = async (req: Request): Promise<Response> => {
480+ const url = new URL(req.url);
481+
482+ // Bare /<owner>/<repo>.git (no sub-path) is what someone gets when
483+ // they paste the clone URL into a browser. Without intervention our
484+ // proxy hands it to Forgejo, which renders its own repo page —
485+ // Forgejo's chrome leaks onto tdd.md. Redirect to the clean URL
486+ // so the visitor lands on our Bun-native scoreboard instead. Real
487+ // git operations always have sub-paths (/info/refs, /git-upload-pack,
488+ // /objects/...) and continue to be proxied below.
489+ const bareGitUrl = url.pathname.match(
490+ /^\/([A-Za-z0-9][A-Za-z0-9-]*)\/([A-Za-z0-9][A-Za-z0-9._-]*)\.git\/?$/,
491+ );
492+ if (bareGitUrl) {
493+ return new Response(null, {
494+ status: 302,
495+ headers: { Location: `/${bareGitUrl[1]}/${bareGitUrl[2]}` },
496+ });
497+ }
498+
499+ // Git smart-HTTP and dumb-HTTP — proxy raw to Forgejo.
500+ if (isGitProtocol(url.pathname, url.searchParams)) {
501+ return proxyToForgejo(req, url.pathname + url.search);
502+ }
503+
504+ // Bare repo URL: /<owner>/<repo> — render Bun-native view via Forgejo API.
505+ // Two segments only, no trailing path. Reserved top-level paths are
506+ // already matched by explicit routes above, so they never reach here.
507+ const repoMatch = url.pathname.match(/^\/([A-Za-z0-9][A-Za-z0-9-]*)\/([A-Za-z0-9][A-Za-z0-9._-]*)\/?$/);
508+ if (repoMatch) {
509+ const viewer = await getViewer(req);
510+ return renderRepoView(repoMatch[1]!, repoMatch[2]!, viewer);
511+ }
512+
513+ const html = await renderNotFound(url.pathname);
514+ return htmlResponse(html, 404);
515+};
516+
517+const appError = (err: Error): Response => {
518+ console.error(err);
519+ return new Response("internal error", { status: 500 });
520+};
521+
522+// ---------------------------------------------------------------------
523+// App factory — c11 calls createApp(port) to start the server. The
524+// routes literal stays inline here so Bun's path-parameter inference
525+// (`:slug` → `req.params.slug`) flows through to the handler types.
526+// ---------------------------------------------------------------------
527+
528+export const createApp = (port: number) => Bun.serve({
529+ port,
530+ error: appError,
531+ fetch: appFetch,
532+ routes: {
533+ "/": htmlResponse(HOME_HTML),
534+ "/raw": new Response(Bun.file(HOME_MD), {
535+ headers: { "Content-Type": "text/markdown; charset=utf-8" },
536+ }),
537+ "/healthz": new Response("ok"),
538+
539+ "/robots.txt": new Response(
540+ `User-agent: *\nAllow: /\nDisallow: /auth/\nDisallow: /api/\n\nSitemap: https://tdd.md/sitemap.xml\n`,
541+ { headers: { "Content-Type": "text/plain; charset=utf-8" } },
542+ ),
543+
544+ "/sitemap.xml": async () => {
545+ const today = new Date().toISOString().slice(0, 10);
546+ const url = (loc: string, priority: string) =>
547+ `<url><loc>${loc}</loc><lastmod>${today}</lastmod><priority>${priority}</priority></url>`;
548+ const kataUrls = ALL_GAMES.map((g) =>
549+ url(`https://tdd.md/games/${g.id}`, "0.8"),
550+ ).join("\n");
551+ const guideUrls = ALL_GUIDES.map((g) =>
552+ url(`https://tdd.md/guides/${g.slug}`, "0.8"),
553+ ).join("\n");
554+ const blogUrls = ALL_POSTS.map((p) =>
555+ url(`https://tdd.md/blog/${p.slug}`, "0.8"),
556+ ).join("\n");
557+ const xml = `<?xml version="1.0" encoding="UTF-8"?>
558+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
559+${url("https://tdd.md/", "1.0")}
560+${url("https://tdd.md/games", "0.9")}
561+${kataUrls}
562+${url("https://tdd.md/guides", "0.9")}
563+${guideUrls}
564+${url("https://tdd.md/blog", "0.7")}
565+${blogUrls}
566+${url("https://tdd.md/agents", "0.7")}
567+${url("https://tdd.md/leaderboard", "0.7")}
568+</urlset>`;
569+ return new Response(xml, {
570+ headers: { "Content-Type": "application/xml; charset=utf-8" },
571+ });
572+ },
573+
574+ "/og.svg": new Response(Bun.file("./public/og.svg"), {
575+ headers: {
576+ "Content-Type": "image/svg+xml",
577+ "Cache-Control": "public, max-age=3600",
578+ },
579+ }),
580+
581+ "/games": htmlResponse(GAMES_INDEX_HTML),
582+
583+ "/blog": async () => {
584+ const rows = ALL_POSTS
585+ .map((p) => `| ${p.date} | [${p.title}](/blog/${p.slug}) |`)
586+ .join("\n");
587+ const body = `# blog
588+
589+Notes on TDD, agentic coding, and the discipline that ties them together.
590+
591+| date | post |
592+|---|---|
593+${rows}
594+
595+> RSS feed coming when there's a second post.
596+
597+[← back to tdd.md](/) · [the guides](/guides) · [the katas](/games)
598+`;
599+ const html = await renderPage({
600+ title: "Blog — tdd.md",
601+ description: "Posts on test-driven development for AI coding agents — how to apply TDD with Claude Code, Cursor, and Aider, what we learn from the verdicts.",
602+ bodyMarkdown: body,
603+ ogPath: "https://tdd.md/blog",
604+ active: "blog",
605+ });
606+ return htmlResponse(html);
607+ },
608+
609+ "/blog/:slug": async (req) => {
610+ const slug = req.params.slug;
611+ const entry = ALL_POSTS.find((p) => p.slug === slug);
612+ if (!entry) {
613+ const html = await renderNotFound(`/blog/${slug}`);
614+ return htmlResponse(html, 404);
615+ }
616+ const file = Bun.file(`./content/blog/${slug}.md`);
617+ if (!(await file.exists())) {
618+ const html = await renderNotFound(`/blog/${slug}`);
619+ return htmlResponse(html, 404);
620+ }
621+ const md = await file.text();
622+ const html = await renderPage({
623+ title: `${entry.title} — tdd.md`,
624+ description: entry.description,
625+ bodyMarkdown: md,
626+ ogPath: `https://tdd.md/blog/${slug}`,
627+ active: "blog",
628+ jsonLd: {
629+ "@context": "https://schema.org",
630+ "@type": "BlogPosting",
631+ headline: entry.title,
632+ description: entry.description,
633+ datePublished: entry.date,
634+ url: `https://tdd.md/blog/${slug}`,
635+ author: { "@type": "Organization", name: "tdd.md" },
636+ },
637+ });
638+ return htmlResponse(html);
639+ },
640+
641+ "/projects": async () => {
642+ const projects = listActiveProjects();
643+ const html = await renderPage({
644+ title: "Projects — tdd.md",
645+ description: "Real repos opted in to tdd.md scoring. Each project drops .tdd-md.json at its root and gets its commits judged structurally for TDD discipline.",
646+ bodyMarkdown: projectsLandingMd(projects),
647+ ogPath: "https://tdd.md/projects",
648+ });
649+ return htmlResponse(html);
650+ },
651+
652+ "/projects/new": async (req) => {
653+ const viewer = await getViewer(req);
654+ if (req.method === "GET") {
655+ const url = new URL(req.url);
656+ const prefilled = url.searchParams.get("repo") ?? undefined;
657+ const html = await renderPage({
658+ title: "Register a project — tdd.md",
659+ description: "Onboard a real repo for TDD-discipline scoring. Drops .tdd-md.json at the repo root, register here, and the reports begin tracking commits on its tracked branches.",
660+ bodyMarkdown: projectRegisterMd(viewer, prefilled),
661+ ogPath: "https://tdd.md/projects/new",
662+ noindex: true,
663+ });
664+ return htmlResponse(html);
665+ }
666+ if (req.method !== "POST") return new Response("method not allowed", { status: 405 });
667+ if (!viewer) return new Response("unauthorized — sign in first", { status: 401 });
668+
669+ let raw = "";
670+ try {
671+ const form = await req.formData();
672+ raw = String(form.get("repo") ?? "").trim();
673+ } catch {
674+ return new Response("invalid form body", { status: 400 });
675+ }
676+
677+ const renderError = async (message: string, status = 400): Promise<Response> => {
678+ const html = await renderPage({
679+ title: "Register a project — tdd.md",
680+ bodyMarkdown: projectRegisterMd(viewer, raw, message),
681+ ogPath: "https://tdd.md/projects/new",
682+ noindex: true,
683+ });
684+ return htmlResponse(html, status);
685+ };
686+
687+ let owner: string;
688+ let repo: string;
689+ try {
690+ ({ owner, repo } = parseRepoIdentifier(raw));
691+ } catch (err) {
692+ return renderError((err as Error).message);
693+ }
694+
695+ let config;
696+ try {
697+ config = await fetchProjectConfig(owner, repo);
698+ } catch (err) {
699+ return renderError((err as Error).message);
700+ }
701+
702+ upsertProject(viewer, owner, repo, config);
703+ return new Response(null, {
704+ status: 303,
705+ headers: { Location: `/projects/${owner}/${repo}` },
706+ });
707+ },
708+
709+ "/projects/:repoOwner/:repoName": async (req) => {
710+ const { repoOwner, repoName } = req.params;
711+ const project = getProject(repoOwner, repoName);
712+ if (!project) {
713+ const html = await renderNotFound(`/projects/${repoOwner}/${repoName}`);
714+ return htmlResponse(html, 404);
715+ }
716+ const html = await renderPage({
717+ title: `${project.displayName ?? `${project.repoOwner}/${project.repoName}`} — tdd.md`,
718+ description: `${project.repoOwner}/${project.repoName} on tdd.md — ${project.testRunner === "none" ? "trace-mode" : project.testRunner} judging across ${project.trackedBranches.join(", ")}.`,
719+ bodyMarkdown: projectDetailMd(project),
720+ ogPath: `https://tdd.md/projects/${project.repoOwner}/${project.repoName}`,
721+ });
722+ return htmlResponse(html);
723+ },
724+
725+ "/reports": async () => {
726+ const html = await renderPage({
727+ title: "Reports — tdd.md",
728+ description: "Per-agent TDD-discipline reporting over real project repos: trend, failure-mode breakdown, and an exec summary fit for a quarterly readout.",
729+ bodyMarkdown: reportsLandingMd(),
730+ ogPath: "https://tdd.md/reports",
731+ noindex: true,
732+ });
733+ return htmlResponse(html);
734+ },
735+
736+ "/reports/demo": async () => {
737+ const html = await renderPage({
738+ title: "TDD-discipline rapport · Q1 2026 (demo) — tdd.md",
739+ description: "Mockup of the management-level TDD-discipline report — single page, three agents, with trend and narrative.",
740+ bodyMarkdown: execSummaryMd(),
741+ ogPath: "https://tdd.md/reports/demo",
742+ noindex: true,
743+ });
744+ return htmlResponse(html);
745+ },
746+
747+ "/reports/demo/tests": async () => {
748+ const html = await renderPage({
749+ title: "Tests overzicht (demo) — tdd.md",
750+ description: "Mockup of the per-test overview: current pass/fail snapshot per repo plus test stability over the quarter.",
751+ bodyMarkdown: testsOverviewMd(),
752+ ogPath: "https://tdd.md/reports/demo/tests",
753+ noindex: true,
754+ });
755+ return htmlResponse(html);
756+ },
757+
758+ "/reports/demo/agents/:slug": async (req) => {
759+ const slug = req.params.slug as (typeof DEMO_REPORTS)[number]["slug"];
760+ const md = agentDrilldownMd(slug);
761+ if (!md) {
762+ const html = await renderNotFound(`/reports/demo/agents/${slug}`);
763+ return htmlResponse(html, 404);
764+ }
765+ const entry = DEMO_REPORTS.find((r) => r.slug === slug)!;
766+ const html = await renderPage({
767+ title: `${entry.name} drill-down (demo) — tdd.md`,
768+ description: `Per-agent drill-down mockup for ${entry.name}: trend, failure-mode breakdown, recent flagged commits with coaching links.`,
769+ bodyMarkdown: md,
770+ ogPath: `https://tdd.md/reports/demo/agents/${slug}`,
771+ noindex: true,
772+ });
773+ return htmlResponse(html);
774+ },
775+
776+ "/guides": async () => {
777+ const rows = ALL_GUIDES
778+ .map((g) => `| [${g.title}](/guides/${g.slug}) | ${g.description} |`)
779+ .join("\n");
780+ const body = `# guides
781+
782+Agent-specific walkthroughs for using tdd.md with the major agentic-coding tools. Each guide covers setup, prompt patterns that keep the agent in TDD, and the common pitfalls that cost score.
783+
784+| guide | what it covers |
785+|---|---|
786+${rows}
787+
788+> Missing your agent? [The mechanics are the same](/) — push commits tagged \`red:\` / \`green:\` / \`refactor:\` to your kata repo. Send a PR with a new guide and we'll list it here.
789+
790+[← play a kata](/games) · [register your agent →](/you)
791+`;
792+ const html = await renderPage({
793+ title: "TDD guides for agentic coding tools — tdd.md",
794+ description: "Practical TDD walkthroughs for Claude Code, Cursor, Aider and other AI coding agents — keep your agent honest with red→green→refactor commits, scored by tdd.md.",
795+ bodyMarkdown: body,
796+ ogPath: "https://tdd.md/guides",
797+ active: "guides",
798+ });
799+ return htmlResponse(html);
800+ },
801+
802+ "/guides/:slug": async (req) => {
803+ const slug = req.params.slug;
804+ const entry = ALL_GUIDES.find((g) => g.slug === slug);
805+ if (!entry) {
806+ const html = await renderNotFound(`/guides/${slug}`);
807+ return htmlResponse(html, 404);
808+ }
809+ const file = Bun.file(`./content/guides/${slug}.md`);
810+ if (!(await file.exists())) {
811+ const html = await renderNotFound(`/guides/${slug}`);
812+ return htmlResponse(html, 404);
813+ }
814+ const md = await file.text();
815+ const html = await renderPage({
816+ title: `${entry.title} — tdd.md`,
817+ description: entry.description,
818+ bodyMarkdown: md,
819+ ogPath: `https://tdd.md/guides/${slug}`,
820+ active: "guides",
821+ });
822+ return htmlResponse(html);
823+ },
824+
825+ "/games/:kata": async (req) => {
826+ const res = await renderKata(req.params.kata);
827+ if (res) return res;
828+ const html = await renderNotFound(`/games/${req.params.kata}`);
829+ return htmlResponse(html, 404);
830+ },
831+
832+ "/agents": () => renderAgentsIndex(),
833+ "/agents/register": htmlResponse(REGISTER_HTML),
834+ "/agents/:name": async (req) => {
835+ const name = req.params.name;
836+ const viewer = await getViewer(req);
837+ const userRes = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(name)}`, {
838+ headers: adminApiHeaders(),
839+ });
840+ // Treat private/limited users as if they don't exist publicly —
841+ // unless the logged-in viewer IS the owner. Owner can always see
842+ // their own dashboard, public or not.
843+ if (userRes.ok) {
844+ const u = (await userRes.clone().json()) as ForgejoUserSummary;
845+ const ownVisibility = u.visibility ?? "public";
846+ if (ownVisibility !== "public" && viewer !== name) {
847+ const html = await renderNotFound(`/agents/${name}`);
848+ return htmlResponse(html, 404);
849+ }
850+ }
851+ if (userRes.status === 404) {
852+ const html = await renderPage({
853+ title: `${name} — agents — tdd.md`,
854+ bodyMarkdown: `# agents / ${name}\n\n> No agent registered with this name.\n\n[← all agents](/agents) · [register your own →](/agents/register)`,
855+ ogPath: `https://tdd.md/agents/${name}`,
856+ active: "agents",
857+ });
858+ return htmlResponse(html, 404);
859+ }
860+ const reposRes = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(name)}/repos?limit=50`, {
861+ headers: adminApiHeaders(),
862+ });
863+ const repos = reposRes.ok ? ((await reposRes.json()) as { name: string; description: string }[]) : [];
864+
865+ const progressByRepo = await Promise.all(
866+ repos.map(async (r) => {
867+ const cRes = await fetch(
868+ `${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(name)}/${encodeURIComponent(r.name)}/commits?limit=50&stat=false`,
869+ { headers: adminApiHeaders() },
870+ );
871+ const commits = cRes.ok ? ((await cRes.json()) as { commit: { message: string } }[]) : [];
872+ return { repo: r, progress: computeProgress(commits) };
873+ }),
874+ );
875+
876+ const totals: Record<string, number> = {};
877+ for (const r of repos) {
878+ try {
879+ const game = await loadGame(r.name);
880+ totals[r.name] = game.steps.length;
881+ } catch {
882+ // unknown kata, no total
883+ }
884+ }
885+
886+ const isSelf = viewer === name;
887+ let body = `# agents / ${name}\n\n`;
888+ if (isSelf) {
889+ body += `> Welcome back, ${name}. This is your dashboard — only you and admins see it when your profile is private.\n\n`;
890+ }
891+ if (repos.length === 0) {
892+ body += "> Registered, but no kata attempts yet.\n\n[← all agents](/agents)";
893+ } else {
894+ body += "## attempts\n\n";
895+ body += "| kata | verified | phases |\n|---|---|---|\n";
896+ for (const { repo: r, progress } of progressByRepo) {
897+ const total = totals[r.name];
898+ const verified = progress.verifiedSteps.size;
899+ const counter = total !== undefined ? `${verified} / ${total}` : `${verified} / ?`;
900+ const phases = `<span class="red">red ${progress.redCount}</span> · <span class="green">green ${progress.greenCount}</span> · <span class="blue">refactor ${progress.refactorCount}</span>`;
901+ body += `| [${r.name}](/${name}/${r.name}) | ${counter} | ${phases} |\n`;
902+ }
903+ }
904+
905+ if (isSelf) {
906+ body += `\n\n---\n\n[sign out](/auth/logout) · [toggle visibility](#) <span class="muted">(POST /api/agents/${name}/visibility with your push token)</span>`;
907+ }
908+
909+ const verifiedSteps = progressByRepo.reduce((acc, p) => acc + p.progress.verifiedSteps.size, 0);
910+ const description =
911+ repos.length === 0
912+ ? `${name} just registered on tdd.md — no kata attempts yet.`
913+ : `${name}'s TDD attempts on tdd.md: ${repos.length} ${repos.length === 1 ? "kata" : "katas"} pushed, ${verifiedSteps} verified red→green ${verifiedSteps === 1 ? "step" : "steps"}.`;
914+ const html = await renderPage({
915+ title: `${name} · TDD attempts — tdd.md`,
916+ description,
917+ bodyMarkdown: body,
918+ ogPath: `https://tdd.md/agents/${name}`,
919+ active: "agents",
920+ });
921+ return htmlResponse(html);
922+ },
923+ // Redirect the legacy URL to the canonical /:owner/:repo path —
924+ // /agents/:name/:kata used to render a placeholder before the
925+ // GitHub-style routing landed.
926+ "/agents/:name/:kata": (req) =>
927+ Response.redirect(`/${req.params.name}/${req.params.kata}`, 301),
928+
929+ "/leaderboard": () => renderLeaderboard(),
930+
931+ "/api/judge/:owner/:repo": async (req) => {
932+ if (req.method !== "POST") {
933+ return new Response("method not allowed; POST to trigger a judge run", { status: 405 });
934+ }
935+ // Manual triggers require the admin token. Push-driven runs come
936+ // through /api/forgejo/webhook with HMAC signature verification.
937+ const adminToken = process.env.FORGEJO_ADMIN_TOKEN;
938+ const provided = req.headers.get("authorization")?.replace(/^[Bb]earer\s+/, "") ?? "";
939+ if (!adminToken || !timingSafeEqual(provided, adminToken)) {
940+ return new Response("unauthorized — POST with `Authorization: Bearer <admin-token>`", { status: 401 });
941+ }
942+ try {
943+ const verdict = await judge(req.params.owner, req.params.repo);
944+ return Response.json(verdict);
945+ } catch (err) {
946+ return Response.json({ error: (err as Error).message }, { status: 500 });
947+ }
948+ },
949+
950+ // Self-service visibility toggle. Agent posts their push token in
951+ // Authorization, picks "public" | "limited" | "private". We verify
952+ // the token actually belongs to :name by hitting Forgejo's /user
953+ // endpoint with it, then PATCH the user via admin token.
954+ "/api/agents/:name/visibility": async (req) => {
955+ if (req.method !== "POST") return new Response("POST only", { status: 405 });
956+ const name = req.params.name;
957+ const provided = req.headers.get("authorization")?.replace(/^[Bb]earer\s+/, "") ?? "";
958+ if (!provided) return Response.json({ error: "missing bearer token" }, { status: 401 });
959+
960+ // Verify the token belongs to :name (or is the admin token).
961+ const adminToken = process.env.FORGEJO_ADMIN_TOKEN ?? "";
962+ let allowed = adminToken && timingSafeEqual(provided, adminToken);
963+ if (!allowed) {
964+ const meRes = await fetch(`${FORGEJO_URL}/api/v1/user`, {
965+ headers: { Authorization: `token ${provided}` },
966+ });
967+ if (meRes.ok) {
968+ const me = (await meRes.json()) as { login?: string };
969+ allowed = me.login === name;
970+ }
971+ }
972+ if (!allowed) return Response.json({ error: "token does not match agent" }, { status: 403 });
973+
974+ let body: { visibility?: string };
975+ try {
976+ body = (await req.json()) as { visibility?: string };
977+ } catch {
978+ return Response.json({ error: "invalid json" }, { status: 400 });
979+ }
980+ const visibility = body.visibility;
981+ if (visibility !== "public" && visibility !== "limited" && visibility !== "private") {
982+ return Response.json(
983+ { error: "visibility must be one of public|limited|private" },
984+ { status: 400 },
985+ );
986+ }
987+
988+ const patchRes = await fetch(
989+ `${FORGEJO_URL}/api/v1/admin/users/${encodeURIComponent(name)}`,
990+ {
991+ method: "PATCH",
992+ headers: { ...adminApiHeaders(), "Content-Type": "application/json" },
993+ body: JSON.stringify({ visibility, source_id: 0, login_name: name }),
994+ },
995+ );
996+ if (!patchRes.ok) {
997+ const text = await patchRes.text();
998+ return Response.json(
999+ { error: `forgejo PATCH failed: ${patchRes.status} ${text}` },
1000+ { status: 502 },
1001+ );
1002+ }
1003+ return Response.json({ name, visibility });
1004+ },
1005+
1006+ "/api/forgejo/webhook": async (req) => {
1007+ if (req.method !== "POST") return new Response("POST only", { status: 405 });
1008+ const secret = process.env.WEBHOOK_SECRET;
1009+ if (!secret) return new Response("webhook not configured", { status: 503 });
1010+
1011+ const body = await req.text();
1012+ const provided =
1013+ req.headers.get("x-forgejo-signature") ?? req.headers.get("x-gitea-signature") ?? "";
1014+ const expected = await hmacSha256Hex(secret, body);
1015+ if (provided.length !== expected.length || !timingSafeEqual(provided, expected)) {
1016+ return new Response("invalid signature", { status: 401 });
1017+ }
1018+
1019+ let payload: { repository?: { owner?: { login?: string }; name?: string }; ref?: string };
1020+ try {
1021+ payload = JSON.parse(body);
1022+ } catch {
1023+ return new Response("invalid json", { status: 400 });
1024+ }
1025+ const owner = payload.repository?.owner?.login;
1026+ const repo = payload.repository?.name;
1027+ if (!owner || !repo) return new Response("missing owner/repo", { status: 400 });
1028+
1029+ // Fire the judge in the background; ack immediately so Forgejo
1030+ // doesn't time out while we're checking out commits.
1031+ void judge(owner, repo).catch((err) => {
1032+ console.error(`judge failed for ${owner}/${repo}:`, err);
1033+ });
1034+ return Response.json({ accepted: true, owner, repo });
1035+ },
1036+
1037+ "/you": async (req) => {
1038+ const viewer = await getViewer(req);
1039+ const target = viewer ? `/agents/${viewer}` : "/auth/github/start";
1040+ return new Response(null, { status: 302, headers: { Location: target } });
1041+ },
1042+
1043+ "/auth/logout": (_req) => {
1044+ // Clear the session cookie and bounce back home.
1045+ return new Response(null, {
1046+ status: 302,
1047+ headers: {
1048+ Location: "/",
1049+ "Set-Cookie": sessionCookieHeader("", 0),
1050+ },
1051+ });
1052+ },
1053+
1054+ "/auth/github/start": (_req) => {
1055+ if (!github.isConfigured() || !forgejo.isConfigured()) {
1056+ return errorPage("registration is not configured on this server", 503);
1057+ }
1058+ const nonce = randomHex(16);
1059+ return new Response(null, {
1060+ status: 302,
1061+ headers: {
1062+ Location: github.authorizeUrl(nonce, CALLBACK_URL),
1063+ "Set-Cookie": `tdd_oauth_state=${nonce}; Path=/auth; HttpOnly; Secure; SameSite=Lax; Max-Age=600`,
1064+ },
1065+ });
1066+ },
1067+
1068+ "/auth/github/callback": async (req) => {
1069+ const url = new URL(req.url);
1070+ const code = url.searchParams.get("code");
1071+ const state = url.searchParams.get("state");
1072+ if (!code || !state) return errorPage("missing code or state");
1073+
1074+ const cookies = parseCookies(req.headers.get("cookie"));
1075+ const cookieState = cookies.tdd_oauth_state;
1076+ if (!cookieState || !timingSafeEqual(cookieState, state)) {
1077+ return errorPage("state mismatch — open the registration page again and retry");
1078+ }
1079+
1080+ let username: string;
1081+ let email: string;
1082+ let fullName: string | null;
1083+ try {
1084+ const accessToken = await github.exchangeCode(code, CALLBACK_URL);
1085+ const user = await github.fetchUser(accessToken);
1086+ username = user.login;
1087+ fullName = user.name;
1088+ // GitHub's noreply email format: unique per account, never collides
1089+ // with another Forgejo user. We don't need a deliverable address —
1090+ // agents authenticate by token, not by email reset flow.
1091+ email = `${user.id}+${user.login}@users.noreply.github.com`;
1092+ } catch (err) {
1093+ return errorPage(`github oauth failed: ${(err as Error).message}`, 400);
1094+ }
1095+
1096+ // Login vs register: if the user already exists in Forgejo, this
1097+ // is a returning visitor — set the session cookie, redirect to
1098+ // their dashboard, don't rotate their token.
1099+ const isExisting = await forgejo.userExists(username);
1100+ const sessionToken = await signSession(username);
1101+ const sessionCookie = sessionCookieHeader(sessionToken, SESSION_TTL_SEC);
1102+ const clearOauthState =
1103+ "tdd_oauth_state=; Path=/auth; HttpOnly; Secure; SameSite=Lax; Max-Age=0";
1104+
1105+ if (isExisting) {
1106+ return new Response(null, {
1107+ status: 302,
1108+ headers: new Headers([
1109+ ["Location", `/agents/${username}`],
1110+ ["Set-Cookie", sessionCookie],
1111+ ["Set-Cookie", clearOauthState],
1112+ ]),
1113+ });
1114+ }
1115+
1116+ let reg: forgejo.AgentRegistration;
1117+ try {
1118+ reg = await forgejo.registerAgent({
1119+ username,
1120+ email,
1121+ fullName: fullName ?? undefined,
1122+ });
1123+ } catch (err) {
1124+ return errorPage(`failed to create your agent: ${(err as Error).message}`, 422);
1125+ }
1126+
1127+ const verb = reg.isNew ? "created" : "rotated";
1128+ const body = `# welcome, ${reg.username}
1129+
1130+> Your tdd.md agent has been ${verb}. **Save the token below — this page is the only time you'll see it.** If you lose it, [register again](/agents/register) to issue a fresh one (the old one will stop working).
1131+
1132+## push token
1133+
1134+\`\`\`
1135+${reg.pushToken}
1136+\`\`\`
1137+
1138+## kata: string-calc
1139+
1140+Your repo is at [\`git.tdd.md/${reg.username}/string-calc\`](https://git.tdd.md/${reg.username}/string-calc), already initialized with a default branch \`main\`.
1141+
1142+\`\`\`
1143+git clone ${reg.repoCloneUrl}
1144+cd string-calc
1145+
1146+# play the kata, commit per phase
1147+# red: commit a failing test
1148+# green: commit the impl that makes it pass
1149+# refactor: commit a structural change with tests staying green
1150+
1151+git push
1152+# username: ${reg.username}
1153+# password: <paste the token above>
1154+\`\`\`
1155+
1156+When you push, the judge replays your commits and posts the verdict at [/agents/${reg.username}/string-calc](/agents/${reg.username}/string-calc).
1157+
1158+[← spec](/games/string-calc) · [all agents](/agents)
1159+`;
1160+
1161+ const html = await renderPage({
1162+ title: `welcome ${reg.username} — tdd.md`,
1163+ bodyMarkdown: body,
1164+ active: "agents",
1165+ noindex: true,
1166+ });
1167+ return new Response(html, {
1168+ headers: new Headers([
1169+ ["Content-Type", "text/html; charset=utf-8"],
1170+ ["Set-Cookie", sessionCookie],
1171+ ["Set-Cookie", clearOauthState],
1172+ ]),
1173+ });
1174+ },
1175+ },
1176+});
added src/c31_blog.ts +39 −0
@@ -0,0 +1,39 @@
1+// c31 — model: blog index data. The post bodies live as markdown in
2+// content/blog/<slug>.md; this file is just the registry that drives
3+// /blog, /blog/:slug, and the sitemap. New posts: drop the .md file
4+// and add an entry here.
5+
6+export interface BlogEntry {
7+ slug: string;
8+ title: string;
9+ description: string;
10+ // ISO date for the listing + sitemap lastmod.
11+ date: string;
12+}
13+
14+export const ALL_POSTS: BlogEntry[] = [
15+ {
16+ slug: "tweag-handbook-tdd",
17+ title: "Tweag's agentic TDD handbook gets the loop right — local green still isn't enough",
18+ description: "Tweag's agentic-coding handbook describes a clean TDD loop and the right rules for AI assistants — but the validation layer it leans on (run tests, see green) misses the three failure modes most likely to show up: tautology, test deletion in refactor, and assertion weakening. Here's the gap, and what closes it.",
19+ date: "2026-05-08",
20+ },
21+ {
22+ slug: "aider-tdd",
23+ title: "Aider is the closest agent to TDD on rails — until you let it auto-fix",
24+ description: "Aider's auto-commit-per-edit and bite-sized-steps philosophy make it TDD-shaped by default. Then `--auto-test` discovers it can win by deleting tests instead of fixing the impl. Here's how Aider's strengths map onto TDD, and how to keep the auto-test loop honest.",
25+ date: "2026-05-04",
26+ },
27+ {
28+ slug: "cursor-tdd",
29+ title: "Cursor knows how to do TDD. Most users skip the parts that matter.",
30+ description: "Cursor's own agent best practices document a clean TDD workflow — but most users skip the features (Plan Mode, fresh conversations, .cursor/rules) that actually make it work. Here's how to put the pieces together, with a kata you can run end-to-end.",
31+ date: "2026-05-04",
32+ },
33+ {
34+ slug: "claude-code-tdd",
35+ title: "Claude Code does not do TDD by default — here's how to make it",
36+ description: "Claude Code writes the test and impl in one breath, so the test never fails for the right reason. Two structural changes — CLAUDE.md rules + phase-separated sessions — get the discipline back, and tdd.md can verify it.",
37+ date: "2026-05-04",
38+ },
39+];
added src/c31_commits.test.ts +52 −0
@@ -0,0 +1,52 @@
1+import { test, expect } from "bun:test";
2+import { parseCommit, computeProgress } from "./c31_commits.ts";
3+
4+test("parseCommit reads a phase prefix", () => {
5+ expect(parseCommit("red: failing test for empty")).toEqual({
6+ phase: "red",
7+ step: null,
8+ subject: "failing test for empty",
9+ });
10+});
11+
12+test("parseCommit extracts step from phase(step): form", () => {
13+ expect(parseCommit("green(single-number): return n for one number")).toEqual({
14+ phase: "green",
15+ step: "single-number",
16+ subject: "return n for one number",
17+ });
18+});
19+
20+test("parseCommit recognizes 'Initial commit' as init", () => {
21+ expect(parseCommit("Initial commit").phase).toBe("init");
22+});
23+
24+test("parseCommit returns untagged for unknown messages", () => {
25+ expect(parseCommit("wip — fixing something").phase).toBe("untagged");
26+});
27+
28+test("parseCommit recognizes spike: prefix", () => {
29+ expect(parseCommit("spike: try the regex approach").phase).toBe("spike");
30+});
31+
32+test("parseCommit extracts step from spike(step):", () => {
33+ const p = parseCommit("spike(custom-separator): explore Forge regex");
34+ expect(p.phase).toBe("spike");
35+ expect(p.step).toBe("custom-separator");
36+});
37+
38+test("computeProgress verifies a step after red→green for the same step", () => {
39+ const commits = [
40+ { commit: { message: "green(empty): returns 0" } },
41+ { commit: { message: "red(empty): empty string returns 0" } },
42+ ]; // newest first, like Forgejo
43+ const p = computeProgress(commits);
44+ expect(p.verifiedSteps).toEqual(new Set(["empty"]));
45+ expect(p.redCount).toBe(1);
46+ expect(p.greenCount).toBe(1);
47+});
48+
49+test("computeProgress does not verify green-without-prior-red", () => {
50+ const commits = [{ commit: { message: "green(empty): returns 0" } }];
51+ expect(computeProgress(commits).verifiedSteps.size).toBe(0);
52+});
added src/c31_commits.ts +65 −0
@@ -0,0 +1,65 @@
1+export type Phase = "red" | "green" | "refactor" | "spike" | "init" | "untagged";
2+
3+export interface ParsedCommit {
4+ phase: Phase;
5+ step: string | null;
6+ subject: string;
7+}
8+
9+const PHASE_RE = /^(red|green|refactor|spike)(?:\(([a-z][a-z0-9-]*)\))?:\s*(.*)$/i;
10+
11+export const parseCommit = (message: string): ParsedCommit => {
12+ const subject = message.split("\n")[0] ?? "";
13+ const m = subject.match(PHASE_RE);
14+ if (m) {
15+ return {
16+ phase: m[1]!.toLowerCase() as Phase,
17+ step: m[2] ?? null,
18+ subject: m[3] ?? "",
19+ };
20+ }
21+ if (/^Initial commit$/i.test(subject)) {
22+ return { phase: "init", step: null, subject };
23+ }
24+ return { phase: "untagged", step: null, subject };
25+};
26+
27+export interface Progress {
28+ verifiedSteps: Set<string>;
29+ redCount: number;
30+ greenCount: number;
31+ refactorCount: number;
32+ spikeCount: number;
33+ untaggedCount: number;
34+}
35+
36+// A step counts as "verified" when its red commit is followed by a green
37+// for the same step. Refactor and untagged commits are tallied separately
38+// for the score breakdown but don't move verification.
39+export const computeProgress = (commits: { commit: { message: string } }[]): Progress => {
40+ const pendingRed = new Set<string>();
41+ const verifiedSteps = new Set<string>();
42+ let redCount = 0;
43+ let greenCount = 0;
44+ let refactorCount = 0;
45+ let spikeCount = 0;
46+ let untaggedCount = 0;
47+ // Forgejo returns commits newest-first; walk oldest-first to get sequence.
48+ for (const c of [...commits].reverse()) {
49+ const p = parseCommit(c.commit.message);
50+ if (p.phase === "red") {
51+ redCount++;
52+ if (p.step) pendingRed.add(p.step);
53+ } else if (p.phase === "green") {
54+ greenCount++;
55+ if (p.step && pendingRed.has(p.step)) verifiedSteps.add(p.step);
56+ } else if (p.phase === "refactor") {
57+ refactorCount++;
58+ } else if (p.phase === "spike") {
59+ spikeCount++;
60+ } else if (p.phase === "untagged") {
61+ untaggedCount++;
62+ }
63+ }
64+ return { verifiedSteps, redCount, greenCount, refactorCount, spikeCount, untaggedCount };
65+};
added src/c31_games.test.ts +26 −0
@@ -0,0 +1,26 @@
1+import { test, expect } from "bun:test";
2+import { loadGame } from "./c31_games.ts";
3+
4+test("loadGame returns a game with the expected id", async () => {
5+ const game = await loadGame("string-calc");
6+ expect(game.id).toBe("string-calc");
7+});
8+
9+test("loadGame returns the kata's step ids in order", async () => {
10+ const game = await loadGame("string-calc");
11+ expect(game.steps.map((s) => s.id)).toEqual([
12+ "empty",
13+ "single-number",
14+ "two-numbers",
15+ "n-numbers",
16+ "newline-separator",
17+ "custom-separator",
18+ "negatives-throw",
19+ ]);
20+});
21+
22+test("loadGame throws a clear error for an unknown game", async () => {
23+ await expect(loadGame("does-not-exist")).rejects.toThrow(
24+ /unknown game: does-not-exist/,
25+ );
26+});
added src/c31_games.ts +55 −0
@@ -0,0 +1,55 @@
1+export interface Step {
2+ id: string;
3+ requirement: string;
4+ // Path (relative to the kata's spec.ts) of the authoritative test file.
5+ // The judge copies this into the agent's working tree after the green
6+ // checkout and runs it — hidden tests are how we detect cheating where
7+ // an agent writes a tautological test like `expect(true).toBe(true)`.
8+ hiddenTestFile: string;
9+}
10+
11+export interface Game {
12+ id: string;
13+ // One-line summary shown on the games index and OG previews.
14+ description: string;
15+ // Human-readable function signature the agent must export. Documented
16+ // on the kata page so authors know what to build.
17+ signature: string;
18+ // The module path the hidden tests will import from. Agents must export
19+ // their solution from this exact path (relative to repo root).
20+ importPath: string;
21+ steps: Step[];
22+}
23+
24+import { readdir } from "node:fs/promises";
25+
26+// Reads every kata under content/games/ and returns the loaded specs in
27+// alphabetical order. Used to build the games index and sitemap without
28+// hard-coding individual kata ids.
29+export async function listGames(): Promise<Game[]> {
30+ let entries;
31+ try {
32+ entries = await readdir("./content/games", { withFileTypes: true });
33+ } catch {
34+ return [];
35+ }
36+ const ids = entries.filter((e) => e.isDirectory()).map((e) => e.name).sort();
37+ const games: Game[] = [];
38+ for (const id of ids) {
39+ try {
40+ games.push(await loadGame(id));
41+ } catch {
42+ // skip katas that fail to load (missing spec.ts, etc.)
43+ }
44+ }
45+ return games;
46+}
47+
48+export async function loadGame(id: string): Promise<Game> {
49+ const file = Bun.file(`./content/games/${id}/spec.ts`);
50+ if (!(await file.exists())) {
51+ throw new Error(`unknown game: ${id}`);
52+ }
53+ const mod = await import(`../content/games/${id}/spec.ts`);
54+ return mod.spec as Game;
55+}
added src/c31_guides.ts +26 −0
@@ -0,0 +1,26 @@
1+// c31 — model: agent-specific TDD-walkthrough registry. Drives
2+// /guides + /guides/:slug. Markdown bodies live in content/guides/<slug>.md.
3+
4+export interface GuideEntry {
5+ slug: string;
6+ title: string;
7+ description: string;
8+}
9+
10+export const ALL_GUIDES: GuideEntry[] = [
11+ {
12+ slug: "claude-code",
13+ title: "TDD with Claude Code",
14+ description: "Run TDD katas through Anthropic's Claude Code with phase-separated prompts and CLAUDE.md rules so the judge scores clean red→green→refactor cycles.",
15+ },
16+ {
17+ slug: "cursor",
18+ title: "TDD with Cursor",
19+ description: "Test-driven katas through Cursor — Composer per phase, project rules pinned in .cursor/rules, fresh context for red vs green.",
20+ },
21+ {
22+ slug: "aider",
23+ title: "TDD with Aider",
24+ description: "Aider's commit-per-edit model maps directly onto red→green→refactor — prompt with phase tags and the auto-commit carries through.",
25+ },
26+];
added src/c31_project_config.ts +102 −0
@@ -0,0 +1,102 @@
1+// c31 — model: types + parser for `.tdd-md.json`, the per-repo opt-in
2+// config used by the project-tracking pipeline. Pure data, no I/O.
3+// Fetching the file lives in c14_github; persistence lives in c13_database;
4+// page rendering lives in c51_render.
5+
6+export const PROJECT_CONFIG_PATH = ".tdd-md.json";
7+export const PROJECT_CONFIG_VERSION = 1;
8+
9+export type TestRunner = "none" | "bun";
10+export type AgentSlug = "claude-code" | "cursor" | "aider" | "unknown";
11+
12+export interface ProjectConfig {
13+ version: number;
14+ // "none" → trace-mode judging only (commit discipline, no test execution).
15+ // "bun" → full sandbox-runner judging (later sliver — registration accepts
16+ // the value but judging stays trace-only until the runner ships).
17+ test_runner: TestRunner;
18+ // Branches whose pushes get scored. Defaults to ["main"].
19+ tracked_branches: string[];
20+ // Optional reporting metadata.
21+ display_name?: string;
22+ team?: string;
23+}
24+
25+export const DEFAULT_CONFIG: ProjectConfig = {
26+ version: PROJECT_CONFIG_VERSION,
27+ test_runner: "none",
28+ tracked_branches: ["main"],
29+};
30+
31+// Validates and normalises a parsed JSON blob into a ProjectConfig.
32+// Throws with a human-readable message on failure — those messages are
33+// surfaced verbatim to the registering user, so they need to be useful.
34+export const parseProjectConfig = (raw: unknown): ProjectConfig => {
35+ if (!raw || typeof raw !== "object") {
36+ throw new Error(".tdd-md.json must be a JSON object");
37+ }
38+ const obj = raw as Record<string, unknown>;
39+ const version = obj.version;
40+ if (typeof version !== "number" || version !== PROJECT_CONFIG_VERSION) {
41+ throw new Error(
42+ `.tdd-md.json has version ${JSON.stringify(version)}; expected ${PROJECT_CONFIG_VERSION}`,
43+ );
44+ }
45+ let testRunner: TestRunner = "none";
46+ if (obj.test_runner !== undefined) {
47+ if (obj.test_runner !== "none" && obj.test_runner !== "bun") {
48+ throw new Error(
49+ `.tdd-md.json: test_runner must be "none" or "bun" (got ${JSON.stringify(obj.test_runner)})`,
50+ );
51+ }
52+ testRunner = obj.test_runner;
53+ }
54+ let trackedBranches: string[] = ["main"];
55+ if (obj.tracked_branches !== undefined) {
56+ if (!Array.isArray(obj.tracked_branches) || obj.tracked_branches.some((b) => typeof b !== "string" || !b)) {
57+ throw new Error(".tdd-md.json: tracked_branches must be a non-empty array of branch names");
58+ }
59+ trackedBranches = obj.tracked_branches as string[];
60+ }
61+ const config: ProjectConfig = {
62+ version,
63+ test_runner: testRunner,
64+ tracked_branches: trackedBranches,
65+ };
66+ if (typeof obj.display_name === "string" && obj.display_name) {
67+ config.display_name = obj.display_name;
68+ }
69+ if (typeof obj.team === "string" && obj.team) {
70+ config.team = obj.team;
71+ }
72+ return config;
73+};
74+
75+// Parse a GitHub repo URL or owner/repo shorthand. Accepts:
76+// https://github.com/syntaxai/tdd.md
77+// https://github.com/syntaxai/tdd.md.git
78+// github.com/syntaxai/tdd.md
79+// syntaxai/tdd.md
80+// Returns the owner + repo or throws with a precise message.
81+export const parseRepoIdentifier = (raw: string): { owner: string; repo: string } => {
82+ const trimmed = raw.trim();
83+ if (!trimmed) throw new Error("Repository URL is required.");
84+ let path = trimmed;
85+ const httpsMatch = path.match(/^https?:\/\/(?:www\.)?github\.com\/(.+)$/i);
86+ if (httpsMatch?.[1]) path = httpsMatch[1];
87+ const bareMatch = path.match(/^github\.com\/(.+)$/i);
88+ if (bareMatch?.[1]) path = bareMatch[1];
89+ path = path.replace(/\.git$/i, "").replace(/\/+$/, "");
90+ const parts = path.split("/").filter(Boolean);
91+ const owner = parts[0];
92+ const repo = parts[1];
93+ if (parts.length !== 2 || !owner || !repo) {
94+ throw new Error(
95+ `Couldn't parse "${raw}" as a GitHub repo. Use a URL like https://github.com/owner/name or the shorthand owner/name.`,
96+ );
97+ }
98+ if (!/^[A-Za-z0-9._-]+$/.test(owner) || !/^[A-Za-z0-9._-]+$/.test(repo)) {
99+ throw new Error(`"${raw}" contains characters that aren't valid for a GitHub owner/repo.`);
100+ }
101+ return { owner, repo };
102+};
added src/c31_reports_demo.ts +201 −0
@@ -0,0 +1,201 @@
1+// c31 — model: synthetic dataset for the reporting mockups. Pure data,
2+// no I/O, no rendering. The c51_render builders consume these to produce
3+// the demo views at /reports/demo/*. When the real ingest pipeline ships
4+// the same shape gets populated from c13_database queries instead.
5+
6+export interface RecentFlagged {
7+ date: string;
8+ repo: string;
9+ sha: string;
10+ phase: "red" | "green" | "refactor";
11+ failure: string;
12+ pts: number;
13+}
14+
15+export interface FailureSlice {
16+ label: string;
17+ pct: number;
18+ tone: "red" | "green" | "muted" | "accent";
19+}
20+
21+export interface AgentReport {
22+ slug: "claude-code" | "cursor" | "aider";
23+ name: string;
24+ score: number;
25+ delta: number;
26+ commits: number;
27+ phaseCoveragePct: number;
28+ streak: number;
29+ streakBroken: boolean;
30+ topIssueLabel: string;
31+ topIssuePct: number;
32+ failureMix: FailureSlice[];
33+ trend: number[];
34+ recent: RecentFlagged[];
35+}
36+
37+export interface TestFailure {
38+ test: string;
39+ since: string;
40+ flaky?: boolean;
41+}
42+
43+export interface TestSnapshot {
44+ repo: string;
45+ branch: string;
46+ total: number;
47+ passing: number;
48+ failing: number;
49+ failures: TestFailure[];
50+}
51+
52+export interface TestStability {
53+ test: string;
54+ repo: string;
55+ pass: number;
56+ fail: number;
57+ deleted: number;
58+ lastBrokenBy: AgentReport["slug"];
59+ flagged?: boolean;
60+}
61+
62+export const DEMO_PERIOD = "2026-01-01 → 2026-03-31";
63+export const DEMO_ORG = "acme-corp";
64+export const DEMO_REPOS = 4;
65+
66+export const DEMO_SNAPSHOTS: TestSnapshot[] = [
67+ {
68+ repo: "api-gateway",
69+ branch: "main",
70+ total: 247,
71+ passing: 245,
72+ failing: 2,
73+ failures: [
74+ { test: "rate-limit.spec.ts > resets at midnight UTC", since: "2026-03-26" },
75+ { test: "webhook.spec.ts > retries on 5xx with backoff", since: "2026-03-28" },
76+ ],
77+ },
78+ {
79+ repo: "billing-service",
80+ branch: "main",
81+ total: 89,
82+ passing: 89,
83+ failing: 0,
84+ failures: [],
85+ },
86+ {
87+ repo: "data-pipeline",
88+ branch: "main",
89+ total: 156,
90+ passing: 154,
91+ failing: 2,
92+ failures: [
93+ { test: "ingest.spec.ts > handles malformed CSV row", since: "2026-03-22" },
94+ { test: "ingest.spec.ts > deduplicates by hash", since: "2026-03-22" },
95+ ],
96+ },
97+ {
98+ repo: "frontend-web",
99+ branch: "main",
100+ total: 312,
101+ passing: 310,
102+ failing: 2,
103+ failures: [
104+ { test: "checkout.spec.ts > handles network timeout", since: "2026-03-15", flaky: true },
105+ { test: "login.spec.ts > redirects after auth", since: "2026-03-11", flaky: true },
106+ ],
107+ },
108+];
109+
110+export const DEMO_STABILITY: TestStability[] = [
111+ { test: "webhook.spec.ts > retries on 5xx with backoff", repo: "api-gateway", pass: 33, fail: 11, deleted: 0, lastBrokenBy: "cursor", flagged: true },
112+ { test: "checkout.spec.ts > handles network timeout", repo: "frontend-web", pass: 51, fail: 8, deleted: 0, lastBrokenBy: "cursor", flagged: true },
113+ { test: "rate-limit.spec.ts > resets at midnight UTC", repo: "api-gateway", pass: 42, fail: 6, deleted: 0, lastBrokenBy: "claude-code" },
114+ { test: "login.spec.ts > redirects after auth", repo: "frontend-web", pass: 44, fail: 5, deleted: 1, lastBrokenBy: "cursor", flagged: true },
115+ { test: "ingest.spec.ts > handles malformed CSV row", repo: "data-pipeline", pass: 38, fail: 4, deleted: 0, lastBrokenBy: "aider" },
116+ { test: "auth.spec.ts > validates JWT signature", repo: "api-gateway", pass: 47, fail: 3, deleted: 0, lastBrokenBy: "claude-code" },
117+ { test: "ingest.spec.ts > deduplicates by hash", repo: "data-pipeline", pass: 30, fail: 3, deleted: 0, lastBrokenBy: "aider" },
118+ { test: "billing.spec.ts > applies tax bracket", repo: "billing-service", pass: 29, fail: 2, deleted: 0, lastBrokenBy: "claude-code" },
119+ { test: "webhook.spec.ts > signs payload with HMAC", repo: "api-gateway", pass: 35, fail: 2, deleted: 1, lastBrokenBy: "cursor", flagged: true },
120+ { test: "billing.spec.ts > computes monthly total", repo: "billing-service", pass: 28, fail: 1, deleted: 1, lastBrokenBy: "cursor", flagged: true },
121+ { test: "invoice.spec.ts > generates PDF receipt", repo: "billing-service", pass: 25, fail: 1, deleted: 0, lastBrokenBy: "claude-code" },
122+ { test: "pricing.spec.ts > rounds to nearest cent", repo: "billing-service", pass: 26, fail: 1, deleted: 0, lastBrokenBy: "aider" },
123+];
124+
125+export const DEMO_REPORTS: AgentReport[] = [
126+ {
127+ slug: "claude-code",
128+ name: "Claude Code",
129+ score: 78,
130+ delta: +6,
131+ commits: 612,
132+ phaseCoveragePct: 92,
133+ streak: 47,
134+ streakBroken: false,
135+ topIssueLabel: "red-did-not-fail",
136+ topIssuePct: 8,
137+ failureMix: [
138+ { label: "clean cycles", pct: 84, tone: "green" },
139+ { label: "red-did-not-fail", pct: 8, tone: "red" },
140+ { label: "broken refactor", pct: 4, tone: "red" },
141+ { label: "test-deleted", pct: 2, tone: "red" },
142+ { label: "no phase tag", pct: 2, tone: "muted" },
143+ ],
144+ trend: [72, 73, 71, 74, 72, 75, 73, 75, 77, 76, 75, 76, 78, 77, 79, 78, 77, 79, 80, 78, 79, 80, 79, 81, 80, 82, 81, 80, 79, 78],
145+ recent: [
146+ { date: "2026-03-29", repo: "api-gateway", sha: "f1c8b3a", phase: "red", failure: "red-did-not-fail", pts: -5 },
147+ { date: "2026-03-24", repo: "billing-service", sha: "9d2e1f4", phase: "refactor", failure: "broken refactor", pts: -5 },
148+ { date: "2026-03-18", repo: "data-pipeline", sha: "62a9cb7", phase: "green", failure: "no phase tag (parent)", pts: 0 },
149+ ],
150+ },
151+ {
152+ slug: "cursor",
153+ name: "Cursor",
154+ score: 54,
155+ delta: -15,
156+ commits: 489,
157+ phaseCoveragePct: 71,
158+ streak: 3,
159+ streakBroken: true,
160+ topIssueLabel: "test-deleted in refactor",
161+ topIssuePct: 14,
162+ failureMix: [
163+ { label: "clean cycles", pct: 64, tone: "green" },
164+ { label: "test-deleted", pct: 14, tone: "red" },
165+ { label: "red-did-not-fail", pct: 9, tone: "red" },
166+ { label: "broken refactor", pct: 7, tone: "red" },
167+ { label: "no phase tag", pct: 6, tone: "muted" },
168+ ],
169+ trend: [69, 70, 71, 72, 70, 71, 72, 73, 72, 71, 72, 70, 68, 65, 60, 55, 50, 52, 54, 53, 56, 54, 52, 55, 53, 54, 56, 55, 54, 54],
170+ recent: [
171+ { date: "2026-03-28", repo: "api-gateway", sha: "a1b2c3d", phase: "refactor", failure: "test-deleted", pts: -20 },
172+ { date: "2026-03-26", repo: "api-gateway", sha: "4e5f6a7", phase: "green", failure: "broken refactor", pts: -5 },
173+ { date: "2026-03-23", repo: "billing-service", sha: "8b9c0d1", phase: "red", failure: "red-did-not-fail", pts: -5 },
174+ { date: "2026-03-21", repo: "api-gateway", sha: "2e3f4a5", phase: "refactor", failure: "test-deleted", pts: -20 },
175+ { date: "2026-03-19", repo: "data-pipeline", sha: "6b7c8d9", phase: "refactor", failure: "broken refactor", pts: -5 },
176+ ],
177+ },
178+ {
179+ slug: "aider",
180+ name: "Aider",
181+ score: 89,
182+ delta: +2,
183+ commits: 146,
184+ phaseCoveragePct: 96,
185+ streak: 89,
186+ streakBroken: false,
187+ topIssueLabel: "broken refactor",
188+ topIssuePct: 3,
189+ failureMix: [
190+ { label: "clean cycles", pct: 94, tone: "green" },
191+ { label: "broken refactor", pct: 3, tone: "red" },
192+ { label: "red-did-not-fail", pct: 2, tone: "red" },
193+ { label: "no phase tag", pct: 1, tone: "muted" },
194+ ],
195+ trend: [87, 88, 89, 88, 87, 89, 90, 89, 88, 89, 90, 88, 89, 90, 91, 89, 88, 89, 90, 89, 90, 91, 89, 88, 89, 90, 89, 90, 89, 89],
196+ recent: [
197+ { date: "2026-03-27", repo: "data-pipeline", sha: "3a4b5c6", phase: "refactor", failure: "broken refactor", pts: -5 },
198+ { date: "2026-03-15", repo: "billing-service", sha: "7d8e9f0", phase: "red", failure: "red-did-not-fail", pts: -5 },
199+ ],
200+ },
201+];
added src/c32_judge.ts +370 −0
@@ -0,0 +1,370 @@
1+import { mkdtempSync, rmSync } from "fs";
2+import { join } from "path";
3+import { tmpdir } from "os";
4+import { parseCommit, type Phase } from "./c31_commits.ts";
5+import { saveRun, type Verdict, type StepVerdict, type RefactorVerdict, type Mode } from "./c13_database.ts";
6+import { loadGame, type Game } from "./c31_games.ts";
7+
8+type TestRunner = "bun" | "none";
9+
10+interface TddConfig {
11+ mode: Mode;
12+ testRunner: TestRunner;
13+}
14+
15+// tdd.config.json from the agent's repo selects the scoring mode and
16+// test runner. Falls back to strict / bun when missing or unparseable.
17+//
18+// { "mode": "pragmatic", "test_runner": "none" }
19+//
20+// test_runner: "none" enables trace-only judging — no checkout, no test
21+// execution. Useful as a CI gate on projects where Bun can't run the
22+// suite (e.g. .NET, Python without bun-compat tests).
23+const readConfig = async (cwd: string): Promise<TddConfig> => {
24+ const file = Bun.file(join(cwd, "tdd.config.json"));
25+ let mode: Mode = "strict";
26+ let testRunner: TestRunner = "bun";
27+ if (await file.exists()) {
28+ try {
29+ const cfg = (await file.json()) as { mode?: string; test_runner?: string };
30+ if (cfg.mode === "pragmatic" || cfg.mode === "learning") mode = cfg.mode;
31+ if (cfg.test_runner === "none") testRunner = "none";
32+ } catch {
33+ // best effort — bad config falls back to defaults
34+ }
35+ }
36+ return { mode, testRunner };
37+};
38+
39+// Penalty halving for pragmatic, zeroing for learning. Positive deltas
40+// are unchanged across modes — earned credit is earned credit.
41+const applyMode = (delta: number, mode: Mode): number => {
42+ if (delta >= 0) return delta;
43+ if (mode === "learning") return 0;
44+ if (mode === "pragmatic") return Math.ceil(delta / 2);
45+ return delta;
46+};
47+
48+// Plain-language summary of a step verdict, written to the agent (not
49+// the human admin). One short paragraph; named intentionally so callers
50+// can see it next to the row in the score table.
51+const explainStep = (params: {
52+ status: StepVerdict["status"];
53+ redSha: string | null;
54+ greenSha: string | null;
55+ hiddenPassed: boolean | null;
56+ mode: Mode;
57+}): string => {
58+ const { status, hiddenPassed, mode } = params;
59+ switch (status) {
60+ case "verified":
61+ return "Red failed as expected, green passes your tests, and the kata's hidden tests confirm the implementation matches the requirement.";
62+ case "discipline-only":
63+ return "Red→green discipline holds, but this kata didn't ship hidden tests for the step. Partial credit awarded; full +20 isn't possible without authoritative verification.";
64+ case "no-green":
65+ return "Red commit landed; the matching green(<step>) commit hasn't been pushed yet. Push your green to lock in the score.";
66+ case "red-did-not-fail":
67+ return mode === "pragmatic"
68+ ? "Combined red+green commit detected. Pragmatic mode allows this — the cycle still counts, just with a softer score than a clean separation."
69+ : "Red commit's tests already passed when the step was first introduced — meaning the implementation was added before the test, or the test is tautological. Switch to pragmatic mode if you commit red+green together intentionally.";
70+ case "green-did-not-pass":
71+ return "Green commit's own tests still fail. The implementation doesn't yet satisfy the test you wrote — fix the impl, or reconsider whether the test reflects the requirement.";
72+ case "hidden-tests-failed":
73+ return hiddenPassed === false
74+ ? "Your tests pass, but the kata's hidden tests don't — this is the classic tautology trap. Tighten your test to mirror the requirement (e.g., assert the actual return value, not just that it runs)."
75+ : "Your tests pass, but hidden verification was inconclusive. Re-push to retry.";
76+ case "test-deleted":
77+ return "Test count dropped between red and green for this step. Once a test exists it must keep existing — refactor it, don't delete it. If the test was wrong, replace it in a separate commit before resuming the cycle.";
78+ case "trace-verified":
79+ return "Trace-only mode: red→green pair found in the commit log. Tests weren't executed (test_runner: \"none\"). Switch to bun runner for behaviour verification.";
80+ case "trace-tests-shrunk":
81+ return "Trace-only mode: the green commit's tree has fewer test files than the red commit's tree — looks like deletion. If you renamed or split test files, the tally still drops.";
82+ }
83+};
84+
85+const explainRefactor = (passed: boolean): string =>
86+ passed
87+ ? "Tests stayed green through the refactor — structural change without behavior change, the canonical refactor."
88+ : "Refactor commit broke at least one test. Either revert the refactor or write a new red→green to capture the changed behavior.";
89+
90+const FORGEJO_INTERNAL = process.env.FORGEJO_URL ?? "https://git.tdd.md";
91+const TEST_TIMEOUT_MS = 8000;
92+
93+// Sandboxed env passed to git and bun subprocesses. Strips every secret
94+// from the parent process — agent code never sees FORGEJO_ADMIN_TOKEN,
95+// GITHUB_CLIENT_SECRET, or SESSION_SECRET. PATH is fixed; HOME and TMPDIR
96+// stay inside the per-run temp dir so dotfile writes can't escape.
97+const sandboxEnv = (cwd: string): Record<string, string> => ({
98+ PATH: "/usr/local/bin:/usr/bin:/bin",
99+ HOME: cwd,
100+ TMPDIR: cwd,
101+ NODE_ENV: "test",
102+});
103+
104+const runProc = async (
105+ cmd: string[],
106+ cwd: string,
107+ timeoutMs: number,
108+): Promise<{ stdout: string; stderr: string; exitCode: number; timedOut: boolean }> => {
109+ const proc = Bun.spawn(cmd, {
110+ cwd,
111+ stdout: "pipe",
112+ stderr: "pipe",
113+ env: sandboxEnv(cwd),
114+ });
115+ let timedOut = false;
116+ const timer = setTimeout(() => {
117+ timedOut = true;
118+ proc.kill("SIGKILL");
119+ }, timeoutMs);
120+ const exitCode = await proc.exited;
121+ clearTimeout(timer);
122+ const stdout = await new Response(proc.stdout).text();
123+ const stderr = await new Response(proc.stderr).text();
124+ return { stdout: stdout.trim(), stderr: stderr.trim(), exitCode, timedOut };
125+};
126+
127+const runTests = async (cwd: string): Promise<boolean> => {
128+ const r = await runProc(["bun", "test"], cwd, TEST_TIMEOUT_MS);
129+ // Bun test exits 0 only when all tests pass.
130+ return !r.timedOut && r.exitCode === 0;
131+};
132+
133+// Language-agnostic test-file counter for trace-only mode. Uses git
134+// ls-tree at the given sha so we don't have to checkout the working
135+// tree. Matches conventional test-file naming across ecosystems:
136+// foo.test.ts, foo.spec.ts, FooTests.cs, FooTest.java, test_foo.py,
137+// foo_test.go, FooSpec.scala, foo_spec.rb.
138+const countTestFiles = async (cwd: string, sha: string): Promise<number> => {
139+ const r = await runProc(["git", "ls-tree", "-r", "--name-only", sha], cwd, 5000);
140+ if (r.exitCode !== 0) return 0;
141+ const re = /(?:^|\/)(?:[^/]*\.(?:test|spec)\.[a-z]+|[Tt]ests?\/[^/]+|test_[^/]+|[^/]+_test\.[a-z]+|[^/]+[Tt]ests?\.cs|[^/]+[Tt]est\.java)$/;
142+ let count = 0;
143+ for (const line of r.stdout.split("\n")) {
144+ if (re.test(line)) count++;
145+ }
146+ return count;
147+};
148+
149+// Count `test(` / `it(` calls in tracked *.test.ts files. Used to detect
150+// when an agent deletes tests between red and green to make a regression
151+// "pass" — a cardinal TDD sin per the kata spec.
152+const countTests = async (cwd: string): Promise<number> => {
153+ const r = await runProc(["git", "ls-files", "*.test.ts"], cwd, 5000);
154+ if (r.exitCode !== 0) return 0;
155+ const files = r.stdout.split("\n").filter((f) => f && !f.includes("__hidden_"));
156+ let count = 0;
157+ for (const f of files) {
158+ const content = await Bun.file(join(cwd, f))
159+ .text()
160+ .catch(() => "");
161+ const matches = content.match(/\b(?:test|it)\s*\(/g);
162+ if (matches) count += matches.length;
163+ }
164+ return count;
165+};
166+
167+// Runs the kata's authoritative tests against the agent's implementation
168+// at whatever commit is currently checked out. Copies the hidden test
169+// file into the working tree under a __hidden__ prefix so it doesn't
170+// collide with the agent's filenames, runs only that file, then deletes
171+// it. Returns null if the kata doesn't have hidden tests for this step.
172+const runHiddenTests = async (cwd: string, spec: Game, stepId: string): Promise<boolean | null> => {
173+ const stepDef = spec.steps.find((s) => s.id === stepId);
174+ if (!stepDef) return null;
175+ const sourcePath = `./content/games/${spec.id}/${stepDef.hiddenTestFile}`;
176+ const sourceFile = Bun.file(sourcePath);
177+ if (!(await sourceFile.exists())) return null;
178+ const content = await sourceFile.text();
179+ const targetName = `__hidden_${stepId}__.test.ts`;
180+ const targetPath = join(cwd, targetName);
181+ await Bun.write(targetPath, content);
182+ try {
183+ const r = await runProc(["bun", "test", targetName], cwd, TEST_TIMEOUT_MS);
184+ return !r.timedOut && r.exitCode === 0;
185+ } finally {
186+ try {
187+ rmSync(targetPath, { force: true });
188+ } catch {
189+ // best effort
190+ }
191+ }
192+};
193+
194+interface CommitInfo {
195+ sha: string;
196+ phase: Phase;
197+ step: string | null;
198+}
199+
200+const readCommits = async (cwd: string): Promise<CommitInfo[]> => {
201+ const r = await runProc(["git", "log", "--reverse", "--pretty=format:%H%x1f%B%x1e"], cwd, 10000);
202+ if (r.exitCode !== 0) return [];
203+ const out: CommitInfo[] = [];
204+ for (const block of r.stdout.split("\x1e")) {
205+ const t = block.trim();
206+ if (!t) continue;
207+ const [sha, message = ""] = t.split("\x1f");
208+ if (!sha) continue;
209+ const p = parseCommit(message);
210+ out.push({ sha, phase: p.phase, step: p.step });
211+ }
212+ return out;
213+};
214+
215+export const judge = async (owner: string, repo: string): Promise<Verdict> => {
216+ const cwd = mkdtempSync(join(tmpdir(), `judge-${owner}-${repo}-`));
217+ try {
218+ // Agent repos default to private. Authenticate via admin token in
219+ // an http.extraheader so the token isn't persisted in the cloned
220+ // repo's config (extraheader applies to the clone request only).
221+ const cloneUrl = `${FORGEJO_INTERNAL}/${owner}/${repo}.git`;
222+ const adminToken = process.env.FORGEJO_ADMIN_TOKEN;
223+ const gitArgs = adminToken
224+ ? ["-c", `http.extraheader=Authorization: token ${adminToken}`, "clone", "--quiet", cloneUrl, "."]
225+ : ["clone", "--quiet", cloneUrl, "."];
226+ const cloneR = await runProc(["git", ...gitArgs], cwd, 30000);
227+ if (cloneR.exitCode !== 0) {
228+ throw new Error(`clone failed: ${cloneR.stderr || cloneR.stdout}`);
229+ }
230+
231+ const commits = await readCommits(cwd);
232+ const headR = await runProc(["git", "rev-parse", "HEAD"], cwd, 5000);
233+ const headSha = headR.stdout;
234+
235+ // First red per step + first green-after-red per step (chronological).
236+ const stepRed = new Map<string, string>();
237+ const stepGreen = new Map<string, string>();
238+ for (const c of commits) {
239+ if (!c.step) continue;
240+ if (c.phase === "red" && !stepRed.has(c.step)) {
241+ stepRed.set(c.step, c.sha);
242+ } else if (c.phase === "green" && stepRed.has(c.step) && !stepGreen.has(c.step)) {
243+ stepGreen.set(c.step, c.sha);
244+ }
245+ }
246+
247+ // Read the agent's mode + runner preferences from tdd.config.json.
248+ const { mode, testRunner } = await readConfig(cwd);
249+
250+ // Load the kata's authoritative spec — used to fetch hidden tests
251+ // per step. Repos that don't match a known kata get scored on red→green
252+ // discipline only (no hidden-test verification).
253+ let spec: Game | null = null;
254+ try {
255+ spec = await loadGame(repo);
256+ } catch {
257+ spec = null;
258+ }
259+
260+ const steps: StepVerdict[] = [];
261+ for (const [stepId, redSha] of stepRed) {
262+ const greenSha = stepGreen.get(stepId) ?? null;
263+
264+ if (testRunner === "none") {
265+ // Trace-only path: don't checkout, don't run anything. Score
266+ // purely from the commit log + a language-agnostic test-file
267+ // count via `git ls-tree`. Useful for non-Bun projects.
268+ const redFiles = await countTestFiles(cwd, redSha);
269+ const greenFiles = greenSha ? await countTestFiles(cwd, greenSha) : redFiles;
270+ const filesShrank = greenSha !== null && greenFiles < redFiles;
271+
272+ let status: StepVerdict["status"];
273+ let baseDelta = 0;
274+ if (greenSha === null) {
275+ status = "no-green";
276+ } else if (filesShrank) {
277+ status = "trace-tests-shrunk";
278+ baseDelta = -10;
279+ } else {
280+ status = "trace-verified";
281+ baseDelta = 10;
282+ }
283+ const scoreDelta = applyMode(baseDelta, mode);
284+ const explanation = explainStep({ status, redSha, greenSha, hiddenPassed: null, mode });
285+ steps.push({
286+ stepId, redSha, greenSha,
287+ redFailed: null, greenPassed: null, hiddenPassed: null,
288+ status, scoreDelta, explanation,
289+ });
290+ continue;
291+ }
292+
293+ await runProc(["git", "checkout", "--quiet", redSha], cwd, 5000);
294+ const redTestCount = await countTests(cwd);
295+ const redPassed = await runTests(cwd);
296+ const redFailed = !redPassed;
297+ let greenPassed: boolean | null = null;
298+ let hiddenPassed: boolean | null = null;
299+ let testsDeleted = false;
300+ if (greenSha) {
301+ await runProc(["git", "checkout", "--quiet", greenSha], cwd, 5000);
302+ const greenTestCount = await countTests(cwd);
303+ testsDeleted = greenTestCount < redTestCount;
304+ greenPassed = await runTests(cwd);
305+ if (greenPassed && spec && !testsDeleted) {
306+ hiddenPassed = await runHiddenTests(cwd, spec, stepId);
307+ }
308+ }
309+
310+ let status: StepVerdict["status"];
311+ let baseDelta = 0;
312+ if (greenSha === null) {
313+ status = "no-green";
314+ } else if (testsDeleted) {
315+ status = "test-deleted";
316+ baseDelta = -20;
317+ } else if (!redFailed) {
318+ status = "red-did-not-fail";
319+ baseDelta = -5;
320+ } else if (greenPassed === false) {
321+ status = "green-did-not-pass";
322+ baseDelta = -5;
323+ } else if (hiddenPassed === false) {
324+ status = "hidden-tests-failed";
325+ baseDelta = 0;
326+ } else if (hiddenPassed === true) {
327+ status = "verified";
328+ baseDelta = 20;
329+ } else {
330+ status = "discipline-only";
331+ baseDelta = 5;
332+ }
333+ const scoreDelta = applyMode(baseDelta, mode);
334+ const explanation = explainStep({ status, redSha, greenSha, hiddenPassed, mode });
335+ steps.push({ stepId, redSha, greenSha, redFailed, greenPassed, hiddenPassed, status, scoreDelta, explanation });
336+ }
337+
338+ // Refactor commits aren't tied to red→green pairs: the spec rewards
339+ // any refactor that keeps the existing tests green. A broken refactor
340+ // (tests fail at the refactor commit) costs the same as a missed
341+ // green — discipline matters even outside red→green pairs.
342+ const refactors: RefactorVerdict[] = [];
343+ for (const c of commits) {
344+ if (c.phase !== "refactor") continue;
345+ await runProc(["git", "checkout", "--quiet", c.sha], cwd, 5000);
346+ const passed = await runTests(cwd);
347+ const baseDelta = passed ? 5 : -5;
348+ refactors.push({
349+ sha: c.sha,
350+ stepId: c.step,
351+ testsPassed: passed,
352+ scoreDelta: applyMode(baseDelta, mode),
353+ explanation: explainRefactor(passed),
354+ });
355+ }
356+
357+ const totalScore =
358+ steps.reduce((a, s) => a + s.scoreDelta, 0) +
359+ refactors.reduce((a, r) => a + r.scoreDelta, 0);
360+ const verdict: Verdict = { headSha, mode, steps, refactors, totalScore, judgedAt: Date.now() };
361+ saveRun(owner, repo, verdict);
362+ return verdict;
363+ } finally {
364+ try {
365+ rmSync(cwd, { recursive: true, force: true });
366+ } catch {
367+ // best effort cleanup
368+ }
369+ }
370+};
added src/c32_session.ts +81 −0
@@ -0,0 +1,81 @@
1+// c32 — logic: session signing/verification + cookie helpers. Pure
2+// HMAC over the session payload, no I/O. Handlers (c21) pull a viewer
3+// off the request via getViewer(), and the OAuth callback issues a
4+// session cookie via sessionCookieHeader + signSession.
5+
6+// 30 days. Long enough for everyday use, short enough that a leaked
7+// cookie doesn't grant indefinite access.
8+export const SESSION_TTL_SEC = 30 * 24 * 60 * 60;
9+const SESSION_COOKIE = "tdd_session";
10+
11+const sessionSecret = (): string =>
12+ process.env.SESSION_SECRET ?? process.env.WEBHOOK_SECRET ?? "";
13+
14+export const randomHex = (bytes: number): string =>
15+ Array.from(crypto.getRandomValues(new Uint8Array(bytes)))
16+ .map((b) => b.toString(16).padStart(2, "0"))
17+ .join("");
18+
19+export const parseCookies = (header: string | null): Record<string, string> => {
20+ const out: Record<string, string> = {};
21+ if (!header) return out;
22+ for (const part of header.split(";")) {
23+ const idx = part.indexOf("=");
24+ if (idx === -1) continue;
25+ const name = part.slice(0, idx).trim();
26+ const value = part.slice(idx + 1).trim();
27+ if (name) out[name] = decodeURIComponent(value);
28+ }
29+ return out;
30+};
31+
32+export const timingSafeEqual = (a: string, b: string): boolean => {
33+ if (a.length !== b.length) return false;
34+ let r = 0;
35+ for (let i = 0; i < a.length; i++) r |= a.charCodeAt(i) ^ b.charCodeAt(i);
36+ return r === 0;
37+};
38+
39+export const hmacSha256Hex = async (secret: string, body: string): Promise<string> => {
40+ const key = await crypto.subtle.importKey(
41+ "raw",
42+ new TextEncoder().encode(secret),
43+ { name: "HMAC", hash: "SHA-256" },
44+ false,
45+ ["sign"],
46+ );
47+ const sig = await crypto.subtle.sign("HMAC", key, new TextEncoder().encode(body));
48+ return Array.from(new Uint8Array(sig))
49+ .map((b) => b.toString(16).padStart(2, "0"))
50+ .join("");
51+};
52+
53+export const signSession = async (username: string): Promise<string> => {
54+ const exp = Math.floor(Date.now() / 1000) + SESSION_TTL_SEC;
55+ const payload = `${username}.${exp}`;
56+ const sig = await hmacSha256Hex(sessionSecret(), payload);
57+ return `${payload}.${sig}`;
58+};
59+
60+export const verifySession = async (cookie: string): Promise<string | null> => {
61+ const parts = cookie.split(".");
62+ if (parts.length !== 3) return null;
63+ const [username, expStr, providedSig] = parts;
64+ if (!username || !expStr || !providedSig) return null;
65+ const exp = Number(expStr);
66+ if (!Number.isFinite(exp) || exp < Math.floor(Date.now() / 1000)) return null;
67+ const expectedSig = await hmacSha256Hex(sessionSecret(), `${username}.${expStr}`);
68+ if (!timingSafeEqual(providedSig, expectedSig)) return null;
69+ return username;
70+};
71+
72+export const getViewer = async (req: Request): Promise<string | null> => {
73+ if (!sessionSecret()) return null;
74+ const cookies = parseCookies(req.headers.get("cookie"));
75+ const raw = cookies[SESSION_COOKIE];
76+ if (!raw) return null;
77+ return verifySession(raw);
78+};
79+
80+export const sessionCookieHeader = (value: string, maxAge: number): string =>
81+ `${SESSION_COOKIE}=${value}; Path=/; HttpOnly; Secure; SameSite=Lax; Max-Age=${maxAge}`;
added src/c51_render.ts +528 −0
@@ -0,0 +1,528 @@
1+// c51 — UI: HTML rendering. Page chrome (renderPage / renderNotFound)
2+// plus all per-page body builders. Imports types from c13/c31; never
3+// from c11 or c21 (lower-numbered layers can be imported, higher ones
4+// cannot).
5+
6+import { marked } from "marked";
7+import type { ProjectRow } from "./c13_database.ts";
8+import { PROJECT_CONFIG_PATH } from "./c31_project_config.ts";
9+import type { Phase } from "./c31_commits.ts";
10+import {
11+ DEMO_PERIOD,
12+ DEMO_ORG,
13+ DEMO_REPOS,
14+ DEMO_REPORTS,
15+ DEMO_SNAPSHOTS,
16+ DEMO_STABILITY,
17+ type AgentReport,
18+ type FailureSlice,
19+ type TestSnapshot,
20+ type TestStability,
21+} from "./c31_reports_demo.ts";
22+
23+const STYLE_CSS = "./public/style.css";
24+const css = await Bun.file(STYLE_CSS).text();
25+
26+export type Section = "home" | "games" | "guides" | "blog" | "agents" | "leaderboard";
27+
28+export interface PageOptions {
29+ title: string;
30+ bodyMarkdown: string;
31+ description?: string;
32+ ogPath?: string;
33+ active?: Section;
34+ noindex?: boolean;
35+ jsonLd?: Record<string, unknown>;
36+}
37+
38+const SITE_DESCRIPTION = "Test-driven development for agentic coding. Scored katas, public verdicts.";
39+
40+const escape = (s: string): string =>
41+ s.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
42+
43+const navLink = (href: string, label: string, active: boolean): string => {
44+ const cls = active ? ' class="nav-active"' : "";
45+ return `<a href="${href}"${cls}>${label}</a>`;
46+};
47+
48+const nav = (active?: Section): string => `<nav class="md-nav">${navLink("/", "tdd.md", active === "home")} <span class="md-nav-sep">·</span> ${navLink("/games", "games", active === "games")} <span class="md-nav-sep">·</span> ${navLink("/guides", "guides", active === "guides")} <span class="md-nav-sep">·</span> ${navLink("/blog", "blog", active === "blog")} <span class="md-nav-sep">·</span> ${navLink("/agents", "agents", active === "agents")} <span class="md-nav-sep">·</span> ${navLink("/leaderboard", "leaderboard", active === "leaderboard")}</nav>`;
49+
50+export const renderPage = async (opts: PageOptions): Promise<string> => {
51+ const body = await marked.parse(opts.bodyMarkdown, { gfm: true, breaks: false });
52+ const description = opts.description ?? SITE_DESCRIPTION;
53+ const ogPath = opts.ogPath ?? "https://tdd.md";
54+ const robots = opts.noindex ? `<meta name="robots" content="noindex,nofollow">\n` : "";
55+ const jsonLd = opts.jsonLd
56+ ? `<script type="application/ld+json">${JSON.stringify(opts.jsonLd)}</script>\n`
57+ : "";
58+ return `<!doctype html>
59+<html lang="en">
60+<head>
61+<meta charset="utf-8">
62+<meta name="viewport" content="width=device-width,initial-scale=1">
63+<meta name="color-scheme" content="dark light">
64+<meta name="description" content="${escape(description)}">
65+${robots}<link rel="canonical" href="${escape(ogPath)}">
66+<meta property="og:title" content="${escape(opts.title)}">
67+<meta property="og:description" content="${escape(description)}">
68+<meta property="og:type" content="website">
69+<meta property="og:url" content="${escape(ogPath)}">
70+<meta property="og:image" content="https://tdd.md/og.svg">
71+<meta property="og:image:type" content="image/svg+xml">
72+<meta property="og:image:width" content="1200">
73+<meta property="og:image:height" content="630">
74+<meta property="og:site_name" content="tdd.md">
75+<meta name="twitter:card" content="summary_large_image">
76+<meta name="twitter:title" content="${escape(opts.title)}">
77+<meta name="twitter:description" content="${escape(description)}">
78+<meta name="twitter:image" content="https://tdd.md/og.svg">
79+<title>${escape(opts.title)}</title>
80+${jsonLd}<style>${css}</style>
81+</head>
82+<body>
83+${nav(opts.active)}
84+<main class="md">
85+${body}
86+</main>
87+</body>
88+</html>`;
89+};
90+
91+export const renderNotFound = async (path: string): Promise<string> =>
92+ renderPage({
93+ title: "404 — tdd.md",
94+ bodyMarkdown: `# 404\n\n> No such path: \`${path}\`\n\nTry [home](/), [games](/games), [agents](/agents), or [leaderboard](/leaderboard).`,
95+ noindex: true,
96+ });
97+
98+// ---------------------------------------------------------------------
99+// Small response/formatting helpers used by c21 handlers.
100+// ---------------------------------------------------------------------
101+
102+export const htmlResponse = (html: string, status = 200): Response =>
103+ new Response(html, { status, headers: { "Content-Type": "text/html; charset=utf-8" } });
104+
105+export const errorPage = async (message: string, status = 400): Promise<Response> => {
106+ const html = await renderPage({
107+ title: "error — tdd.md",
108+ bodyMarkdown: `# error\n\n> ${message}\n\n[← back](/agents/register)`,
109+ active: "agents",
110+ });
111+ return htmlResponse(html, status);
112+};
113+
114+export const phaseSpan = (p: Phase): string => {
115+ const cls = p === "red" ? "red" : p === "green" ? "green" : p === "refactor" ? "blue" : "muted";
116+ return `<span class="${cls}">${p}</span>`;
117+};
118+
119+export const relativeTime = (iso: string): string => {
120+ const ms = Date.now() - new Date(iso).getTime();
121+ if (ms < 60_000) return `${Math.max(0, Math.floor(ms / 1000))}s ago`;
122+ if (ms < 3_600_000) return `${Math.floor(ms / 60_000)}m ago`;
123+ if (ms < 86_400_000) return `${Math.floor(ms / 3_600_000)}h ago`;
124+ return `${Math.floor(ms / 86_400_000)}d ago`;
125+};
126+
127+// ---------------------------------------------------------------------
128+// Body builders for /projects.
129+// ---------------------------------------------------------------------
130+
131+const projectListRow = (p: ProjectRow): string => {
132+ const slug = `${p.repoOwner}/${p.repoName}`;
133+ const display = p.displayName ?? slug;
134+ const team = p.team ? ` <span class="muted">· ${escape(p.team)}</span>` : "";
135+ const branches = p.trackedBranches.map((b) => `\`${b}\``).join(", ");
136+ const runner = p.testRunner === "none" ? "trace-only" : p.testRunner;
137+ return `| [${escape(display)}](/projects/${p.repoOwner}/${p.repoName}) ${team} | ${branches} | ${runner} |`;
138+};
139+
140+export const projectsLandingMd = (projects: ProjectRow[]): string => {
141+ const rows = projects.length === 0
142+ ? `| _no projects yet — [register one](/projects/new)_ | | |`
143+ : projects.map(projectListRow).join("\n");
144+ return `# projects
145+
146+> Real repos that opted in to tdd.md scoring. Each project drops \`${PROJECT_CONFIG_PATH}\` at its root, registers here, and from then on its commits on tracked branches get judged structurally — red-fails, green-passes, no test-deletion, no regression. The aggregated scores feed [the reports](/reports).
147+
148+## tracked
149+
150+| project | branches | runner |
151+|---|---|---|
152+${rows}
153+
154+## register a repo
155+
156+[Register a project →](/projects/new) — paste a public GitHub URL; tdd.md fetches \`${PROJECT_CONFIG_PATH}\` from the default branch and onboards it.
157+
158+## the config file
159+
160+Drop \`${PROJECT_CONFIG_PATH}\` at the root of your repo's default branch:
161+
162+\`\`\`json
163+{
164+ "version": 1,
165+ "test_runner": "none",
166+ "tracked_branches": ["main"],
167+ "display_name": "API Gateway",
168+ "team": "platform"
169+}
170+\`\`\`
171+
172+- **\`test_runner\`** — \`"none"\` for trace-mode (commit-discipline only, language-agnostic). \`"bun"\` will run the test suite once the sandbox-runner ships.
173+- **\`tracked_branches\`** — pushes to these branches get scored. Defaults to \`["main"]\`.
174+- **\`display_name\`** / **\`team\`** — optional, only used in the reporting UI.
175+
176+## what comes next
177+
178+Registration just stores the project. Per-commit judging (the part that produces score data for the reports) lands in the next sliver — until then the [report pages](/reports) keep showing the demo dataset.
179+
180+[← back to tdd.md](/) · [the reports](/reports)
181+`;
182+};
183+
184+export const projectRegisterMd = (
185+ viewer: string | null,
186+ prefilled?: string,
187+ errorMessage?: string,
188+): string => {
189+ if (!viewer) {
190+ return `# register a project
191+
192+> You need to sign in before registering a project. We use your GitHub identity to record who onboarded the repo.
193+
194+[ sign in with github → ](/auth/github/start)
195+
196+[← all projects](/projects)
197+`;
198+ }
199+ const error = errorMessage
200+ ? `<div class="project-form-error"><strong>Couldn't register that repo:</strong><br>${escape(errorMessage)}</div>`
201+ : "";
202+ const value = prefilled ? ` value="${escape(prefilled)}"` : "";
203+ return `# register a project
204+
205+> Paste a public GitHub URL. tdd.md fetches \`${PROJECT_CONFIG_PATH}\` from its default branch, validates it, and onboards the repo. Re-register the same repo to refresh the config.
206+
207+${error}
208+
209+<form method="post" action="/projects/new" class="project-form">
210+ <label for="repo-url">Repository URL or <code>owner/name</code></label>
211+ <input id="repo-url" name="repo" type="text" required
212+ placeholder="https://github.com/owner/name"
213+ autocomplete="off" autocapitalize="off" autocorrect="off"${value} />
214+ <button type="submit">Register</button>
215+</form>
216+
217+> Signed in as <code>${escape(viewer)}</code>. Don't have \`${PROJECT_CONFIG_PATH}\` yet? [See the format on /projects](/projects#the-config-file).
218+
219+[← all projects](/projects)
220+`;
221+};
222+
223+// ---------------------------------------------------------------------
224+// Body builders for /reports.
225+// ---------------------------------------------------------------------
226+
227+const trendArrow = (delta: number): { glyph: string; cls: string } =>
228+ delta > 0 ? { glyph: "↑", cls: "up" } : delta < 0 ? { glyph: "↓", cls: "down" } : { glyph: "→", cls: "flat" };
229+
230+const sparkline = (values: number[], height = 60, width = 320): string => {
231+ if (values.length === 0) return "";
232+ const min = Math.min(...values);
233+ const max = Math.max(...values);
234+ const range = Math.max(1, max - min);
235+ const stepX = width / Math.max(1, values.length - 1);
236+ const pad = 6;
237+ const innerH = height - pad * 2;
238+ const points = values
239+ .map((v, i) => {
240+ const x = (i * stepX).toFixed(1);
241+ const y = (pad + innerH - ((v - min) / range) * innerH).toFixed(1);
242+ return `${x},${y}`;
243+ })
244+ .join(" ");
245+ return `<svg class="report-sparkline" viewBox="0 0 ${width} ${height}" preserveAspectRatio="none" aria-hidden="true">
246+ <polyline fill="none" stroke="currentColor" stroke-width="1.5" points="${points}" />
247+</svg>`;
248+};
249+
250+const tile = (a: AgentReport): string => {
251+ const arr = trendArrow(a.delta);
252+ const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`;
253+ return `<div class="report-tile">
254+ <p class="report-tile-name"><a href="/reports/demo/agents/${a.slug}">${escape(a.name)}</a></p>
255+ <p class="report-tile-score">${a.score}<span class="report-tile-score-suffix"> / 100</span></p>
256+ <p class="report-tile-trend ${arr.cls}">${arr.glyph} ${escape(deltaStr)}</p>
257+ <p class="report-tile-volume">${a.commits.toLocaleString()} commits</p>
258+ <div class="report-tile-issue">top issue: <strong>${escape(a.topIssueLabel)}</strong> (${a.topIssuePct}%)</div>
259+</div>`;
260+};
261+
262+const bars = (mix: FailureSlice[]): string => {
263+ const rows = mix
264+ .map(
265+ (s) => `<div class="report-bar-row">
266+ <span class="report-bar-label">${escape(s.label)}</span>
267+ <span class="report-bar-track"><span class="report-bar-fill ${s.tone}" style="width: ${s.pct}%"></span></span>
268+ <span class="report-bar-pct">${s.pct}%</span>
269+</div>`,
270+ )
271+ .join("\n");
272+ return `<div class="report-bars">${rows}</div>`;
273+};
274+
275+const streakBox = (a: AgentReport): string => {
276+ const cls = a.streakBroken ? "broken" : a.streak >= 30 ? "long" : "";
277+ const label = a.streakBroken ? "recent break" : "consecutive clean cycles";
278+ return `<span class="report-streak ${cls}"><span class="report-streak-num">${a.streak}</span> ${label}</span>`;
279+};
280+
281+const mockBanner = `<div class="report-mockup-banner">demo data — real reporting wires up when the project-tracking pipeline ships. <a href="/blog/tweag-handbook-tdd">why tdd.md needs this</a> · <a href="/reports">about reporting</a></div>`;
282+
283+const snapshotBlock = (s: TestSnapshot): string => {
284+ const failuresHtml = s.failures.length === 0
285+ ? `<li class="test-list-pass">all ${s.passing} tests groen</li>`
286+ : s.failures
287+ .map(
288+ (f) =>
289+ `<li class="test-list-fail">${escape(f.test)} <span class="test-list-meta">${f.flaky ? "intermittent · " : ""}sinds ${f.since}</span></li>`,
290+ )
291+ .concat([`<li class="test-list-collapsed">+ ${s.passing.toLocaleString()} passing tests</li>`])
292+ .join("\n");
293+ const statusCls = s.failing === 0 ? "ok" : "bad";
294+ return `<div class="test-snapshot ${statusCls}">
295+ <p class="test-snapshot-head"><strong>${escape(s.repo)}</strong> <span class="test-snapshot-branch">@ ${escape(s.branch)}</span></p>
296+ <p class="test-snapshot-stats">${s.total.toLocaleString()} tests · <span class="green">${s.passing.toLocaleString()} passing</span>${s.failing > 0 ? ` · <span class="red">${s.failing.toLocaleString()} failing</span>` : ""}</p>
297+ <ul class="test-list">
298+${failuresHtml}
299+ </ul>
300+</div>`;
301+};
302+
303+const agentTagHtml = (slug: AgentReport["slug"]): string => {
304+ const name = DEMO_REPORTS.find((r) => r.slug === slug)?.name ?? slug;
305+ return `<a class="agent-tag" href="/reports/demo/agents/${slug}">${escape(name)}</a>`;
306+};
307+
308+const stabilityRow = (s: TestStability): string => {
309+ const cls = s.flagged ? "test-stab-row flagged" : "test-stab-row";
310+ const warn = s.flagged ? ` <span class="test-stab-warn" title="test-deletion of weakening dit kwartaal">⚠</span>` : "";
311+ return `<tr class="${cls}">
312+ <td class="test-stab-name">${escape(s.test)}<div class="test-stab-repo">${escape(s.repo)}</div></td>
313+ <td class="test-stab-num green">${s.pass}</td>
314+ <td class="test-stab-num ${s.fail >= 8 ? "red" : ""}">${s.fail}</td>
315+ <td class="test-stab-num ${s.deleted > 0 ? "red" : ""}">${s.deleted}</td>
316+ <td class="test-stab-by">${agentTagHtml(s.lastBrokenBy)}${warn}</td>
317+</tr>`;
318+};
319+
320+export const reportsLandingMd = (): string => `# reports
321+
322+> Per-agent TDD-discipline reporting over real project repos. The judge replays each commit on tracked branches and scores it structurally — red-fails, green-passes, no test-deletion, no regression. The scores roll up per agent over time, with trend, failure-mode breakdown, and an exec summary fit for a quarterly readout.
323+
324+This is a design preview. The pipeline that ingests real repos isn't wired yet; what you can navigate today is a mockup with synthetic data:
325+
326+- [exec summary mockup →](/reports/demo) — single page, 1 quarter, 3 agents
327+- [per-agent drill-down →](/reports/demo/agents/cursor) — trend, failure mix, recent flagged commits
328+- [tests overzicht →](/reports/demo/tests) — huidige stand per repo + test-stabiliteit per test-naam
329+
330+Want a real repo on this layer? [Register a project →](/projects) — drops \`.tdd-md.json\` at the repo root, onboards in seconds. Per-commit judging follows in the next sliver; until then registered projects show up under [/projects](/projects) but don't yet feed the report numbers.
331+
332+## what gets measured
333+
334+This layer measures **discipline**, not code-quality. Without hidden tests (those only exist on katas), tdd.md can't catch tautologies or weakened assertions on real repos. It *can* catch:
335+
336+| failure mode | what triggers it | what it costs |
337+|---|---|---|
338+| \`red-did-not-fail\` | commit tagged \`red:\` but tests pass | -5 / commit |
339+| \`test-deleted\` | test count drops between commits | -20 / commit |
340+| \`broken refactor\` | tests fail at a \`refactor:\` commit | -5 / commit |
341+| \`no phase tag\` | tracked-branch commit missing \`red\\|green\\|refactor:\` | counts against phase-coverage % |
342+
343+The metric pair that anchors the report is **discipline-score** (0-100) + **phase-coverage %**. An agent with 0% phase-coverage doesn't *do* TDD — its score is N/A, not 0. Don't let a low-volume non-attempt look like a high-volume slip.
344+
345+## reading the data
346+
347+For management:
348+- the [exec summary](/reports/demo) gives one number per agent + a narrative paragraph. Prints to one page.
349+
350+For team-leads:
351+- the [drill-down](/reports/demo/agents/cursor) shows trend, failure-mix, streak, and the most recent flagged commits with one-click coaching links to the [Claude Code](/blog/claude-code-tdd) / [Cursor](/blog/cursor-tdd) / [Aider](/blog/aider-tdd) posts.
352+
353+[← back to tdd.md](/) · [the blog](/blog) · [the katas](/games)
354+`;
355+
356+export const execSummaryMd = (): string => {
357+ const totalCommits = DEMO_REPORTS.reduce((s, a) => s + a.commits, 0);
358+ const tiles = DEMO_REPORTS.map(tile).join("\n");
359+ return `# tdd-discipline rapport · q1 2026
360+
361+${mockBanner}
362+
363+> **Periode** ${DEMO_PERIOD} · **Scope** ${DEMO_REPOS} repos · ${totalCommits.toLocaleString()} AI-toegeschreven commits in ${escape(DEMO_ORG)}.
364+
365+<div class="report-tiles">
366+${tiles}
367+</div>
368+
369+## wat veranderde dit kwartaal
370+
371+Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de \`api-gateway\` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf.
372+
373+## wat we doen
374+
375+- **Cursor in \`api-gateway\`**: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel "never delete a test in a refactor commit" gepind ([details →](/reports/demo/agents/cursor)).
376+- **Claude Code uitrollen**: het CLAUDE.md-template dat in \`billing-service\` werkte naar de andere drie repos kopiëren.
377+- **Volgende meting**: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt.
378+
379+## wat dit getal *niet* meet
380+
381+Discipline, niet code-kwaliteit. Hidden tests (zoals op de katas) bestaan niet voor productie-repos, dus *tautologische* tests en *zwak-geformuleerde* asserties blijven onzichtbaar voor de judge. Dit cijfer zegt: "de agent volgt de TDD-cyclus eerlijk". Het zegt niets over of de tests die hij schrijft het juiste beweren. Voor dat tweede signaal blijft kata-performance ([leaderboard](/leaderboard)) de proxy.
382+
383+---
384+
385+[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports)
386+`;
387+};
388+
389+export const agentDrilldownMd = (slug: AgentReport["slug"]): string | null => {
390+ const a = DEMO_REPORTS.find((r) => r.slug === slug);
391+ if (!a) return null;
392+ const arr = trendArrow(a.delta);
393+ const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`;
394+ const recentRows = a.recent
395+ .map(
396+ (r) =>
397+ `| ${r.date} | \`${r.repo}\` | \`${r.sha}\` | ${r.phase} | ${r.failure} | ${r.pts} |`,
398+ )
399+ .join("\n");
400+ return `# ${a.name} · drill-down
401+
402+${mockBanner}
403+
404+> Discipline-score **${a.score} / 100** <span class="report-tile-trend ${arr.cls}">${arr.glyph} ${deltaStr}</span> over ${DEMO_PERIOD}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage **${a.phaseCoveragePct}%**.
405+
406+## trend (30 dagen)
407+
408+<div class="${arr.cls === "down" ? "red" : arr.cls === "up" ? "green" : "muted"}">
409+${sparkline(a.trend)}
410+</div>
411+
412+${streakBox(a)}
413+
414+## failure-mode breakdown
415+
416+${bars(a.failureMix)}
417+
418+Top issue dit kwartaal: **${escape(a.topIssueLabel)}** (${a.topIssuePct}% van commits).
419+
420+## recent flagged
421+
422+| date | repo | sha | phase | failure | pts |
423+|---|---|---|---|---|---|
424+${recentRows}
425+
426+## coaching
427+
428+- ${a.slug === "claude-code" ? `[Claude Code does not do TDD by default](/blog/claude-code-tdd) — CLAUDE.md rules + fresh-context boundaries that prevent \`red-did-not-fail\`.` : a.slug === "cursor" ? `[Cursor knows how to do TDD; users skip the parts that matter](/blog/cursor-tdd) — Plan Mode, fresh chats, \`.cursor/rules\` to stop test-deletion.` : `[Aider is the closest agent to TDD on rails — until \`--auto-test\`](/blog/aider-tdd) — keep auto-test off for green commits, on for refactor.`}
429+- [Tweag's TDD handbook needs a judge](/blog/tweag-handbook-tdd) — why local green isn't enough.
430+
431+---
432+
433+[← exec summary](/reports/demo) · [back to /reports](/reports)
434+`;
435+};
436+
437+export const testsOverviewMd = (): string => {
438+ const total = DEMO_SNAPSHOTS.reduce((s, r) => s + r.total, 0);
439+ const passing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.passing, 0);
440+ const failing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.failing, 0);
441+ const snapshots = DEMO_SNAPSHOTS.map(snapshotBlock).join("\n");
442+ const stabRows = DEMO_STABILITY.map(stabilityRow).join("\n");
443+ return `# tests overzicht
444+
445+${mockBanner}
446+
447+> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${DEMO_PERIOD}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen".
448+
449+## huidige stand · per repo
450+
451+<div class="test-snapshots">
452+${snapshots}
453+</div>
454+
455+**Totaal**: ${total.toLocaleString()} tests · <span class="green">${passing.toLocaleString()} passing</span> · <span class="${failing > 0 ? "red" : "muted"}">${failing.toLocaleString()} failing</span>.
456+
457+## test-stabiliteit · q1 2026
458+
459+Top 12 meest-flappende tests dit kwartaal, met aantal pass/fail/deleted-events en de agent die de test het laatst heeft gebroken.
460+
461+<table class="test-stability">
462+<thead>
463+ <tr>
464+ <th>test</th>
465+ <th class="num">pass</th>
466+ <th class="num">fail</th>
467+ <th class="num">del</th>
468+ <th>laatst gebroken door</th>
469+ </tr>
470+</thead>
471+<tbody>
472+${stabRows}
473+</tbody>
474+</table>
475+
476+> ⚠ markeert tests waarbij dit kwartaal een test-deletion of weakening-event is gedetecteerd. In een echte setup linkt klik op een test-naam door naar de commit-historie van die specifieke test.
477+
478+## hoe lees je dit
479+
480+- **Veel pass, weinig fail, 0 del**: gezond. Test doet wat hij moet, niemand sloopt 'm.
481+- **Veel fail, 0 del**: test vangt actief regressies. Goed nieuws — discipline werkt.
482+- **Fail én del > 0**: test wordt onder druk gezet. Coach de agent die 'm gebroken heeft (klik op het tag-icoon).
483+- **Snapshot rood + stabiliteit hoog**: bekende, langlopende kapotte test. Apart onderwerp, niet per se een agent-probleem.
484+
485+---
486+
487+[← exec summary](/reports/demo) · [back to /reports](/reports)
488+`;
489+};
490+
491+// ---------------------------------------------------------------------
492+// Body builder for /projects/:owner/:repo.
493+// ---------------------------------------------------------------------
494+
495+export const projectDetailMd = (p: ProjectRow): string => {
496+ const display = p.displayName ?? `${p.repoOwner}/${p.repoName}`;
497+ const registeredAt = new Date(p.registeredAt).toISOString().slice(0, 10);
498+ const branches = p.trackedBranches.map((b) => `\`${b}\``).join(", ");
499+ const runnerNote = p.testRunner === "none"
500+ ? "Trace-mode — judging looks at commit phase tags, test-count drift, and refactor stability. No test execution."
501+ : "Bun runner — test suite executes in a sandbox at every tracked-branch commit. (Sandbox-runner ships in the next sliver; meanwhile this falls back to trace-mode.)";
502+ return `# ${escape(display)}
503+
504+> [${escape(p.repoOwner)}/${escape(p.repoName)}](https://github.com/${p.repoOwner}/${p.repoName}) · registered by [${escape(p.registeredBy)}](/agents/${p.registeredBy}) on ${registeredAt}.
505+
506+## config
507+
508+| key | value |
509+|---|---|
510+| test_runner | \`${p.testRunner}\` |
511+| tracked_branches | ${branches} |
512+| display_name | ${p.displayName ? `\`${escape(p.displayName)}\`` : "_(none)_"} |
513+| team | ${p.team ? `\`${escape(p.team)}\`` : "_(none)_"} |
514+| status | \`${p.status}\` |
515+
516+${runnerNote}
517+
518+## scored commits
519+
520+> _No commits judged yet._ The webhook ingest + judging pipeline lands in the next sliver — once it does, scored commits for tracked branches will appear here grouped by agent.
521+
522+## refresh
523+
524+Push an updated \`${PROJECT_CONFIG_PATH}\` to your default branch and [re-register](/projects/new?repo=${encodeURIComponent(`${p.repoOwner}/${p.repoName}`)}) to pick up the new config.
525+
526+[← all projects](/projects)
527+`;
528+};
removed src/commits.test.ts +0 −52
@@ -1,52 +0,0 @@
1-import { test, expect } from "bun:test";
2-import { parseCommit, computeProgress } from "./commits";
3-
4-test("parseCommit reads a phase prefix", () => {
5- expect(parseCommit("red: failing test for empty")).toEqual({
6- phase: "red",
7- step: null,
8- subject: "failing test for empty",
9- });
10-});
11-
12-test("parseCommit extracts step from phase(step): form", () => {
13- expect(parseCommit("green(single-number): return n for one number")).toEqual({
14- phase: "green",
15- step: "single-number",
16- subject: "return n for one number",
17- });
18-});
19-
20-test("parseCommit recognizes 'Initial commit' as init", () => {
21- expect(parseCommit("Initial commit").phase).toBe("init");
22-});
23-
24-test("parseCommit returns untagged for unknown messages", () => {
25- expect(parseCommit("wip — fixing something").phase).toBe("untagged");
26-});
27-
28-test("parseCommit recognizes spike: prefix", () => {
29- expect(parseCommit("spike: try the regex approach").phase).toBe("spike");
30-});
31-
32-test("parseCommit extracts step from spike(step):", () => {
33- const p = parseCommit("spike(custom-separator): explore Forge regex");
34- expect(p.phase).toBe("spike");
35- expect(p.step).toBe("custom-separator");
36-});
37-
38-test("computeProgress verifies a step after red→green for the same step", () => {
39- const commits = [
40- { commit: { message: "green(empty): returns 0" } },
41- { commit: { message: "red(empty): empty string returns 0" } },
42- ]; // newest first, like Forgejo
43- const p = computeProgress(commits);
44- expect(p.verifiedSteps).toEqual(new Set(["empty"]));
45- expect(p.redCount).toBe(1);
46- expect(p.greenCount).toBe(1);
47-});
48-
49-test("computeProgress does not verify green-without-prior-red", () => {
50- const commits = [{ commit: { message: "green(empty): returns 0" } }];
51- expect(computeProgress(commits).verifiedSteps.size).toBe(0);
52-});
removed src/commits.ts +0 −65
@@ -1,65 +0,0 @@
1-export type Phase = "red" | "green" | "refactor" | "spike" | "init" | "untagged";
2-
3-export interface ParsedCommit {
4- phase: Phase;
5- step: string | null;
6- subject: string;
7-}
8-
9-const PHASE_RE = /^(red|green|refactor|spike)(?:\(([a-z][a-z0-9-]*)\))?:\s*(.*)$/i;
10-
11-export const parseCommit = (message: string): ParsedCommit => {
12- const subject = message.split("\n")[0] ?? "";
13- const m = subject.match(PHASE_RE);
14- if (m) {
15- return {
16- phase: m[1]!.toLowerCase() as Phase,
17- step: m[2] ?? null,
18- subject: m[3] ?? "",
19- };
20- }
21- if (/^Initial commit$/i.test(subject)) {
22- return { phase: "init", step: null, subject };
23- }
24- return { phase: "untagged", step: null, subject };
25-};
26-
27-export interface Progress {
28- verifiedSteps: Set<string>;
29- redCount: number;
30- greenCount: number;
31- refactorCount: number;
32- spikeCount: number;
33- untaggedCount: number;
34-}
35-
36-// A step counts as "verified" when its red commit is followed by a green
37-// for the same step. Refactor and untagged commits are tallied separately
38-// for the score breakdown but don't move verification.
39-export const computeProgress = (commits: { commit: { message: string } }[]): Progress => {
40- const pendingRed = new Set<string>();
41- const verifiedSteps = new Set<string>();
42- let redCount = 0;
43- let greenCount = 0;
44- let refactorCount = 0;
45- let spikeCount = 0;
46- let untaggedCount = 0;
47- // Forgejo returns commits newest-first; walk oldest-first to get sequence.
48- for (const c of [...commits].reverse()) {
49- const p = parseCommit(c.commit.message);
50- if (p.phase === "red") {
51- redCount++;
52- if (p.step) pendingRed.add(p.step);
53- } else if (p.phase === "green") {
54- greenCount++;
55- if (p.step && pendingRed.has(p.step)) verifiedSteps.add(p.step);
56- } else if (p.phase === "refactor") {
57- refactorCount++;
58- } else if (p.phase === "spike") {
59- spikeCount++;
60- } else if (p.phase === "untagged") {
61- untaggedCount++;
62- }
63- }
64- return { verifiedSteps, redCount, greenCount, refactorCount, spikeCount, untaggedCount };
65-};
removed src/db.ts +0 −214
@@ -1,214 +0,0 @@
1-import { Database } from "bun:sqlite";
2-import type { ProjectConfig, TestRunner } from "./projects";
3-
4-const DB_PATH = process.env.TDD_DB_PATH ?? ":memory:";
5-
6-let db: Database | null = null;
7-
8-const getDb = (): Database => {
9- if (db) return db;
10- db = new Database(DB_PATH, { create: true });
11- db.exec(`
12- CREATE TABLE IF NOT EXISTS runs (
13- id INTEGER PRIMARY KEY AUTOINCREMENT,
14- owner TEXT NOT NULL,
15- repo TEXT NOT NULL,
16- head_sha TEXT NOT NULL,
17- judged_at INTEGER NOT NULL,
18- verdict_json TEXT NOT NULL
19- );
20- CREATE INDEX IF NOT EXISTS idx_runs_owner_repo
21- ON runs(owner, repo, judged_at DESC);
22-
23- CREATE TABLE IF NOT EXISTS projects (
24- id INTEGER PRIMARY KEY AUTOINCREMENT,
25- registered_by TEXT NOT NULL,
26- repo_owner TEXT NOT NULL,
27- repo_name TEXT NOT NULL,
28- test_runner TEXT NOT NULL DEFAULT 'none',
29- tracked_branches TEXT NOT NULL,
30- display_name TEXT,
31- team TEXT,
32- registered_at INTEGER NOT NULL,
33- status TEXT NOT NULL DEFAULT 'active',
34- UNIQUE(repo_owner, repo_name)
35- );
36- CREATE INDEX IF NOT EXISTS idx_projects_registered_by
37- ON projects(registered_by);
38- `);
39- return db;
40-};
41-
42-export type Mode = "strict" | "pragmatic" | "learning";
43-
44-export interface StepVerdict {
45- stepId: string;
46- redSha: string | null;
47- greenSha: string | null;
48- redFailed: boolean | null;
49- greenPassed: boolean | null;
50- // Whether the kata's authoritative hidden tests pass against the agent's
51- // implementation at the green commit. null when no hidden tests exist
52- // for the step (unknown kata, or step not registered with the spec).
53- hiddenPassed: boolean | null;
54- status:
55- | "verified"
56- | "discipline-only"
57- | "no-green"
58- | "red-did-not-fail"
59- | "green-did-not-pass"
60- | "hidden-tests-failed"
61- | "test-deleted"
62- // Trace-only mode: tests not executed, only commit discipline checked.
63- // Used when test_runner: "none" — language-agnostic, useful as a
64- // CI gate on real projects where Bun can't run the test suite.
65- | "trace-verified"
66- | "trace-tests-shrunk";
67- scoreDelta: number;
68- // Coach-style explanation of the verdict — what happened, why the score
69- // is what it is, and (when relevant) how to improve next time.
70- explanation: string;
71-}
72-
73-export interface RefactorVerdict {
74- sha: string;
75- stepId: string | null;
76- testsPassed: boolean;
77- scoreDelta: number;
78- explanation: string;
79-}
80-
81-export interface Verdict {
82- headSha: string;
83- mode: Mode;
84- steps: StepVerdict[];
85- refactors: RefactorVerdict[];
86- totalScore: number;
87- judgedAt: number;
88-}
89-
90-export const saveRun = (owner: string, repo: string, verdict: Verdict): void => {
91- getDb().run(
92- `INSERT INTO runs (owner, repo, head_sha, judged_at, verdict_json) VALUES (?, ?, ?, ?, ?)`,
93- [owner, repo, verdict.headSha, verdict.judgedAt, JSON.stringify(verdict)],
94- );
95-};
96-
97-export const latestRun = (owner: string, repo: string): Verdict | null => {
98- const row = getDb()
99- .query<{ verdict_json: string }, [string, string]>(
100- `SELECT verdict_json FROM runs WHERE owner = ? AND repo = ? ORDER BY judged_at DESC LIMIT 1`,
101- )
102- .get(owner, repo);
103- if (!row) return null;
104- return JSON.parse(row.verdict_json) as Verdict;
105-};
106-
107-export interface ProjectRow {
108- id: number;
109- registeredBy: string;
110- repoOwner: string;
111- repoName: string;
112- testRunner: TestRunner;
113- trackedBranches: string[];
114- displayName: string | null;
115- team: string | null;
116- registeredAt: number;
117- status: "active" | "paused";
118-}
119-
120-interface ProjectDbRow {
121- id: number;
122- registered_by: string;
123- repo_owner: string;
124- repo_name: string;
125- test_runner: string;
126- tracked_branches: string;
127- display_name: string | null;
128- team: string | null;
129- registered_at: number;
130- status: string;
131-}
132-
133-const rowToProject = (r: ProjectDbRow): ProjectRow => ({
134- id: r.id,
135- registeredBy: r.registered_by,
136- repoOwner: r.repo_owner,
137- repoName: r.repo_name,
138- testRunner: (r.test_runner === "bun" ? "bun" : "none") as TestRunner,
139- trackedBranches: JSON.parse(r.tracked_branches) as string[],
140- displayName: r.display_name,
141- team: r.team,
142- registeredAt: r.registered_at,
143- status: r.status === "paused" ? "paused" : "active",
144-});
145-
146-// Inserts or updates a project. Re-registering the same repo refreshes
147-// its config (test_runner, tracked_branches, display_name, team) without
148-// duplicating the row. Returns the stored project.
149-export const upsertProject = (
150- registeredBy: string,
151- repoOwner: string,
152- repoName: string,
153- config: ProjectConfig,
154-): ProjectRow => {
155- const now = Date.now();
156- const branches = JSON.stringify(config.tracked_branches);
157- const display = config.display_name ?? null;
158- const team = config.team ?? null;
159- getDb().run(
160- `INSERT INTO projects (registered_by, repo_owner, repo_name, test_runner, tracked_branches, display_name, team, registered_at, status)
161- VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'active')
162- ON CONFLICT(repo_owner, repo_name) DO UPDATE SET
163- test_runner = excluded.test_runner,
164- tracked_branches = excluded.tracked_branches,
165- display_name = excluded.display_name,
166- team = excluded.team,
167- status = 'active'`,
168- [registeredBy, repoOwner, repoName, config.test_runner, branches, display, team, now],
169- );
170- const row = getDb()
171- .query<ProjectDbRow, [string, string]>(
172- `SELECT * FROM projects WHERE repo_owner = ? AND repo_name = ?`,
173- )
174- .get(repoOwner, repoName);
175- if (!row) throw new Error("project upsert returned no row");
176- return rowToProject(row);
177-};
178-
179-export const getProject = (repoOwner: string, repoName: string): ProjectRow | null => {
180- const row = getDb()
181- .query<ProjectDbRow, [string, string]>(
182- `SELECT * FROM projects WHERE repo_owner = ? AND repo_name = ?`,
183- )
184- .get(repoOwner, repoName);
185- return row ? rowToProject(row) : null;
186-};
187-
188-export const listActiveProjects = (): ProjectRow[] => {
189- const rows = getDb()
190- .query<ProjectDbRow, []>(
191- `SELECT * FROM projects WHERE status = 'active' ORDER BY registered_at DESC`,
192- )
193- .all();
194- return rows.map(rowToProject);
195-};
196-
197-// Latest verdict per (owner, repo) across all agents — drives the
198-// leaderboard and the /agents index.
199-export const allLatestRuns = (): { owner: string; repo: string; verdict: Verdict }[] => {
200- const rows = getDb()
201- .query<{ owner: string; repo: string; verdict_json: string }, []>(
202- `SELECT owner, repo, verdict_json FROM runs r1
203- WHERE judged_at = (
204- SELECT MAX(judged_at) FROM runs r2
205- WHERE r2.owner = r1.owner AND r2.repo = r1.repo
206- )`,
207- )
208- .all();
209- return rows.map((r) => ({
210- owner: r.owner,
211- repo: r.repo,
212- verdict: JSON.parse(r.verdict_json) as Verdict,
213- }));
214-};
removed src/forgejo.ts +0 −261
@@ -1,261 +0,0 @@
1-// Internal URL — Bun container talks to Forgejo via host.containers.internal
2-// (rootless podman's standard hostname for the host network). Falls back to
3-// the public URL for local dev.
4-const FORGEJO_URL = process.env.FORGEJO_URL ?? "https://git.tdd.md";
5-const ADMIN_TOKEN = process.env.FORGEJO_ADMIN_TOKEN ?? "";
6-
7-const adminAuth = (): HeadersInit => ({
8- Authorization: `token ${ADMIN_TOKEN}`,
9-});
10-
11-const userAuth = (username: string, password: string): HeadersInit => ({
12- Authorization: `Basic ${btoa(`${username}:${password}`)}`,
13-});
14-
15-export const isConfigured = (): boolean => ADMIN_TOKEN !== "";
16-
17-export const userExists = async (username: string): Promise<boolean> => {
18- const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}`, {
19- headers: adminAuth(),
20- });
21- return res.status === 200;
22-};
23-
24-export const createUser = async (params: {
25- username: string;
26- email: string;
27- password: string;
28- fullName?: string;
29-}): Promise<void> => {
30- const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users`, {
31- method: "POST",
32- headers: { ...adminAuth(), "Content-Type": "application/json" },
33- body: JSON.stringify({
34- username: params.username,
35- email: params.email,
36- password: params.password,
37- full_name: params.fullName ?? params.username,
38- must_change_password: false,
39- send_notify: false,
40- }),
41- });
42- if (!res.ok) {
43- const text = await res.text();
44- throw new Error(`forgejo createUser ${res.status}: ${text}`);
45- }
46-};
47-
48-export const setUserPassword = async (username: string, password: string): Promise<void> => {
49- const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users/${encodeURIComponent(username)}`, {
50- method: "PATCH",
51- headers: { ...adminAuth(), "Content-Type": "application/json" },
52- body: JSON.stringify({
53- password,
54- must_change_password: false,
55- source_id: 0,
56- login_name: username,
57- }),
58- });
59- if (!res.ok) {
60- const text = await res.text();
61- throw new Error(`forgejo setUserPassword ${res.status}: ${text}`);
62- }
63-};
64-
65-export const repoExists = async (owner: string, repo: string): Promise<boolean> => {
66- const res = await fetch(`${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}`, {
67- headers: adminAuth(),
68- });
69- return res.status === 200;
70-};
71-
72-// Creates a per-repo webhook that fires on push events. The webhook
73-// posts to /api/forgejo/webhook on tdd.md, signed with WEBHOOK_SECRET so
74-// our endpoint can verify it. Idempotent — checks for an existing hook
75-// with the same URL before creating.
76-export const ensureRepoWebhook = async (params: {
77- owner: string;
78- repo: string;
79- webhookUrl: string;
80- secret: string;
81-}): Promise<void> => {
82- const base = `${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(params.owner)}/${encodeURIComponent(params.repo)}/hooks`;
83- const listRes = await fetch(base, { headers: adminAuth() });
84- if (listRes.ok) {
85- const hooks = (await listRes.json()) as { id: number; config: { url?: string } }[];
86- const exists = hooks.some((h) => h.config?.url === params.webhookUrl);
87- if (exists) return;
88- }
89- const res = await fetch(base, {
90- method: "POST",
91- headers: { ...adminAuth(), "Content-Type": "application/json" },
92- body: JSON.stringify({
93- type: "forgejo",
94- active: true,
95- events: ["push"],
96- config: {
97- url: params.webhookUrl,
98- content_type: "json",
99- secret: params.secret,
100- },
101- }),
102- });
103- if (!res.ok) {
104- const text = await res.text();
105- throw new Error(`forgejo ensureRepoWebhook ${res.status}: ${text}`);
106- }
107-};
108-
109-export const createRepoForUser = async (params: {
110- username: string;
111- name: string;
112- description?: string;
113-}): Promise<void> => {
114- const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users/${encodeURIComponent(params.username)}/repos`, {
115- method: "POST",
116- headers: { ...adminAuth(), "Content-Type": "application/json" },
117- body: JSON.stringify({
118- name: params.name,
119- description: params.description ?? "",
120- // Private by default — the source is the agent's, not ours to
121- // publish. Verdicts still render on tdd.md via admin-mediated
122- // API calls; clones require the agent's push token.
123- private: true,
124- // No auto_init: the agent's first push becomes the genuine initial
125- // commit. An admin-authored "Initial commit" would muddle the phase
126- // log and break attribution on the agent's repo page.
127- auto_init: false,
128- default_branch: "main",
129- }),
130- });
131- if (!res.ok) {
132- const text = await res.text();
133- throw new Error(`forgejo createRepo ${res.status}: ${text}`);
134- }
135-};
136-
137-interface TokenInfo {
138- id: number;
139- name: string;
140-}
141-
142-const listTokens = async (username: string, password: string): Promise<TokenInfo[]> => {
143- const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}/tokens`, {
144- headers: userAuth(username, password),
145- });
146- if (!res.ok) return [];
147- return (await res.json()) as TokenInfo[];
148-};
149-
150-const deleteToken = async (username: string, password: string, tokenId: number): Promise<void> => {
151- await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}/tokens/${tokenId}`, {
152- method: "DELETE",
153- headers: userAuth(username, password),
154- });
155-};
156-
157-export const createPushToken = async (params: {
158- username: string;
159- password: string;
160- name: string;
161-}): Promise<string> => {
162- // Revoke any existing tokens with the same name so re-registration always
163- // returns a fresh one and the previous one is invalidated.
164- const existing = await listTokens(params.username, params.password);
165- for (const t of existing) {
166- if (t.name === params.name) {
167- await deleteToken(params.username, params.password, t.id);
168- }
169- }
170-
171- const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(params.username)}/tokens`, {
172- method: "POST",
173- headers: { ...userAuth(params.username, params.password), "Content-Type": "application/json" },
174- body: JSON.stringify({
175- name: params.name,
176- // write:repository for the push; read:user so the agent can
177- // verify their own identity against tdd.md's self-service
178- // endpoints (e.g. POST /api/agents/:name/visibility).
179- scopes: ["write:repository", "read:user"],
180- }),
181- });
182- if (!res.ok) {
183- const text = await res.text();
184- throw new Error(`forgejo createPushToken ${res.status}: ${text}`);
185- }
186- const data = (await res.json()) as { sha1: string };
187- return data.sha1;
188-};
189-
190-const randomPassword = (): string =>
191- Array.from(crypto.getRandomValues(new Uint8Array(32)))
192- .map((b) => b.toString(16).padStart(2, "0"))
193- .join("");
194-
195-export interface AgentRegistration {
196- username: string;
197- pushToken: string;
198- repoCloneUrl: string;
199- isNew: boolean;
200-}
201-
202-// Idempotent: if the user exists, reset their password and rotate the push
203-// token. Always also ensures the kata repo exists.
204-export const registerAgent = async (params: {
205- username: string;
206- email: string;
207- fullName?: string;
208- kata?: string;
209-}): Promise<AgentRegistration> => {
210- const password = randomPassword();
211- const isNew = !(await userExists(params.username));
212-
213- if (isNew) {
214- await createUser({
215- username: params.username,
216- email: params.email,
217- password,
218- fullName: params.fullName,
219- });
220- } else {
221- await setUserPassword(params.username, password);
222- }
223-
224- const pushToken = await createPushToken({
225- username: params.username,
226- password,
227- name: "tdd-md-push",
228- });
229-
230- const kata = params.kata ?? "string-calc";
231- if (!(await repoExists(params.username, kata))) {
232- await createRepoForUser({
233- username: params.username,
234- name: kata,
235- description: `${params.username}'s submission for the ${kata} kata`,
236- });
237- }
238-
239- const baseUrl = process.env.BASE_URL ?? "https://tdd.md";
240- const webhookSecret = process.env.WEBHOOK_SECRET;
241- if (webhookSecret) {
242- try {
243- await ensureRepoWebhook({
244- owner: params.username,
245- repo: kata,
246- webhookUrl: `${baseUrl}/api/forgejo/webhook`,
247- secret: webhookSecret,
248- });
249- } catch (err) {
250- // Webhook is convenience; registration must still succeed without it.
251- console.error(`webhook setup failed for ${params.username}/${kata}:`, err);
252- }
253- }
254-
255- return {
256- username: params.username,
257- pushToken,
258- repoCloneUrl: `${baseUrl}/${params.username}/${kata}.git`,
259- isNew,
260- };
261-};
removed src/games.test.ts +0 −26
@@ -1,26 +0,0 @@
1-import { test, expect } from "bun:test";
2-import { loadGame } from "./games";
3-
4-test("loadGame returns a game with the expected id", async () => {
5- const game = await loadGame("string-calc");
6- expect(game.id).toBe("string-calc");
7-});
8-
9-test("loadGame returns the kata's step ids in order", async () => {
10- const game = await loadGame("string-calc");
11- expect(game.steps.map((s) => s.id)).toEqual([
12- "empty",
13- "single-number",
14- "two-numbers",
15- "n-numbers",
16- "newline-separator",
17- "custom-separator",
18- "negatives-throw",
19- ]);
20-});
21-
22-test("loadGame throws a clear error for an unknown game", async () => {
23- await expect(loadGame("does-not-exist")).rejects.toThrow(
24- /unknown game: does-not-exist/,
25- );
26-});
removed src/games.ts +0 −55
@@ -1,55 +0,0 @@
1-export interface Step {
2- id: string;
3- requirement: string;
4- // Path (relative to the kata's spec.ts) of the authoritative test file.
5- // The judge copies this into the agent's working tree after the green
6- // checkout and runs it — hidden tests are how we detect cheating where
7- // an agent writes a tautological test like `expect(true).toBe(true)`.
8- hiddenTestFile: string;
9-}
10-
11-export interface Game {
12- id: string;
13- // One-line summary shown on the games index and OG previews.
14- description: string;
15- // Human-readable function signature the agent must export. Documented
16- // on the kata page so authors know what to build.
17- signature: string;
18- // The module path the hidden tests will import from. Agents must export
19- // their solution from this exact path (relative to repo root).
20- importPath: string;
21- steps: Step[];
22-}
23-
24-import { readdir } from "node:fs/promises";
25-
26-// Reads every kata under content/games/ and returns the loaded specs in
27-// alphabetical order. Used to build the games index and sitemap without
28-// hard-coding individual kata ids.
29-export async function listGames(): Promise<Game[]> {
30- let entries;
31- try {
32- entries = await readdir("./content/games", { withFileTypes: true });
33- } catch {
34- return [];
35- }
36- const ids = entries.filter((e) => e.isDirectory()).map((e) => e.name).sort();
37- const games: Game[] = [];
38- for (const id of ids) {
39- try {
40- games.push(await loadGame(id));
41- } catch {
42- // skip katas that fail to load (missing spec.ts, etc.)
43- }
44- }
45- return games;
46-}
47-
48-export async function loadGame(id: string): Promise<Game> {
49- const file = Bun.file(`./content/games/${id}/spec.ts`);
50- if (!(await file.exists())) {
51- throw new Error(`unknown game: ${id}`);
52- }
53- const mod = await import(`../content/games/${id}/spec.ts`);
54- return mod.spec as Game;
55-}
removed src/github_oauth.ts +0 −80
@@ -1,80 +0,0 @@
1-const CLIENT_ID = process.env.GITHUB_CLIENT_ID ?? "";
2-const CLIENT_SECRET = process.env.GITHUB_CLIENT_SECRET ?? "";
3-
4-export interface GithubUser {
5- login: string;
6- id: number;
7- email: string | null;
8- avatar_url: string;
9- name: string | null;
10-}
11-
12-export interface GithubEmail {
13- email: string;
14- primary: boolean;
15- verified: boolean;
16- visibility: string | null;
17-}
18-
19-export const isConfigured = (): boolean => CLIENT_ID !== "" && CLIENT_SECRET !== "";
20-
21-export const authorizeUrl = (state: string, redirectUri: string): string => {
22- const params = new URLSearchParams({
23- client_id: CLIENT_ID,
24- redirect_uri: redirectUri,
25- scope: "read:user user:email",
26- state,
27- allow_signup: "true",
28- });
29- return `https://github.com/login/oauth/authorize?${params}`;
30-};
31-
32-export const exchangeCode = async (code: string, redirectUri: string): Promise<string> => {
33- const res = await fetch("https://github.com/login/oauth/access_token", {
34- method: "POST",
35- headers: {
36- Accept: "application/json",
37- "Content-Type": "application/json",
38- },
39- body: JSON.stringify({
40- client_id: CLIENT_ID,
41- client_secret: CLIENT_SECRET,
42- code,
43- redirect_uri: redirectUri,
44- }),
45- });
46- if (!res.ok) {
47- throw new Error(`github token exchange failed: ${res.status}`);
48- }
49- const data = (await res.json()) as { access_token?: string; error?: string; error_description?: string };
50- if (!data.access_token) {
51- throw new Error(`github token exchange returned no token: ${data.error_description ?? data.error ?? "unknown"}`);
52- }
53- return data.access_token;
54-};
55-
56-export const fetchUser = async (accessToken: string): Promise<GithubUser> => {
57- const res = await fetch("https://api.github.com/user", {
58- headers: {
59- Authorization: `token ${accessToken}`,
60- Accept: "application/vnd.github+json",
61- "User-Agent": "tdd.md",
62- },
63- });
64- if (!res.ok) throw new Error(`github user fetch failed: ${res.status}`);
65- return (await res.json()) as GithubUser;
66-};
67-
68-export const fetchPrimaryEmail = async (accessToken: string): Promise<string | null> => {
69- const res = await fetch("https://api.github.com/user/emails", {
70- headers: {
71- Authorization: `token ${accessToken}`,
72- Accept: "application/vnd.github+json",
73- "User-Agent": "tdd.md",
74- },
75- });
76- if (!res.ok) return null;
77- const emails = (await res.json()) as GithubEmail[];
78- const verified = emails.filter((e) => e.verified);
79- return verified.find((e) => e.primary)?.email ?? verified[0]?.email ?? null;
80-};
removed src/judge.ts +0 −370
@@ -1,370 +0,0 @@
1-import { mkdtempSync, rmSync } from "fs";
2-import { join } from "path";
3-import { tmpdir } from "os";
4-import { parseCommit, type Phase } from "./commits";
5-import { saveRun, type Verdict, type StepVerdict, type RefactorVerdict, type Mode } from "./db";
6-import { loadGame, type Game } from "./games";
7-
8-type TestRunner = "bun" | "none";
9-
10-interface TddConfig {
11- mode: Mode;
12- testRunner: TestRunner;
13-}
14-
15-// tdd.config.json from the agent's repo selects the scoring mode and
16-// test runner. Falls back to strict / bun when missing or unparseable.
17-//
18-// { "mode": "pragmatic", "test_runner": "none" }
19-//
20-// test_runner: "none" enables trace-only judging — no checkout, no test
21-// execution. Useful as a CI gate on projects where Bun can't run the
22-// suite (e.g. .NET, Python without bun-compat tests).
23-const readConfig = async (cwd: string): Promise<TddConfig> => {
24- const file = Bun.file(join(cwd, "tdd.config.json"));
25- let mode: Mode = "strict";
26- let testRunner: TestRunner = "bun";
27- if (await file.exists()) {
28- try {
29- const cfg = (await file.json()) as { mode?: string; test_runner?: string };
30- if (cfg.mode === "pragmatic" || cfg.mode === "learning") mode = cfg.mode;
31- if (cfg.test_runner === "none") testRunner = "none";
32- } catch {
33- // best effort — bad config falls back to defaults
34- }
35- }
36- return { mode, testRunner };
37-};
38-
39-// Penalty halving for pragmatic, zeroing for learning. Positive deltas
40-// are unchanged across modes — earned credit is earned credit.
41-const applyMode = (delta: number, mode: Mode): number => {
42- if (delta >= 0) return delta;
43- if (mode === "learning") return 0;
44- if (mode === "pragmatic") return Math.ceil(delta / 2);
45- return delta;
46-};
47-
48-// Plain-language summary of a step verdict, written to the agent (not
49-// the human admin). One short paragraph; named intentionally so callers
50-// can see it next to the row in the score table.
51-const explainStep = (params: {
52- status: StepVerdict["status"];
53- redSha: string | null;
54- greenSha: string | null;
55- hiddenPassed: boolean | null;
56- mode: Mode;
57-}): string => {
58- const { status, hiddenPassed, mode } = params;
59- switch (status) {
60- case "verified":
61- return "Red failed as expected, green passes your tests, and the kata's hidden tests confirm the implementation matches the requirement.";
62- case "discipline-only":
63- return "Red→green discipline holds, but this kata didn't ship hidden tests for the step. Partial credit awarded; full +20 isn't possible without authoritative verification.";
64- case "no-green":
65- return "Red commit landed; the matching green(<step>) commit hasn't been pushed yet. Push your green to lock in the score.";
66- case "red-did-not-fail":
67- return mode === "pragmatic"
68- ? "Combined red+green commit detected. Pragmatic mode allows this — the cycle still counts, just with a softer score than a clean separation."
69- : "Red commit's tests already passed when the step was first introduced — meaning the implementation was added before the test, or the test is tautological. Switch to pragmatic mode if you commit red+green together intentionally.";
70- case "green-did-not-pass":
71- return "Green commit's own tests still fail. The implementation doesn't yet satisfy the test you wrote — fix the impl, or reconsider whether the test reflects the requirement.";
72- case "hidden-tests-failed":
73- return hiddenPassed === false
74- ? "Your tests pass, but the kata's hidden tests don't — this is the classic tautology trap. Tighten your test to mirror the requirement (e.g., assert the actual return value, not just that it runs)."
75- : "Your tests pass, but hidden verification was inconclusive. Re-push to retry.";
76- case "test-deleted":
77- return "Test count dropped between red and green for this step. Once a test exists it must keep existing — refactor it, don't delete it. If the test was wrong, replace it in a separate commit before resuming the cycle.";
78- case "trace-verified":
79- return "Trace-only mode: red→green pair found in the commit log. Tests weren't executed (test_runner: \"none\"). Switch to bun runner for behaviour verification.";
80- case "trace-tests-shrunk":
81- return "Trace-only mode: the green commit's tree has fewer test files than the red commit's tree — looks like deletion. If you renamed or split test files, the tally still drops.";
82- }
83-};
84-
85-const explainRefactor = (passed: boolean): string =>
86- passed
87- ? "Tests stayed green through the refactor — structural change without behavior change, the canonical refactor."
88- : "Refactor commit broke at least one test. Either revert the refactor or write a new red→green to capture the changed behavior.";
89-
90-const FORGEJO_INTERNAL = process.env.FORGEJO_URL ?? "https://git.tdd.md";
91-const TEST_TIMEOUT_MS = 8000;
92-
93-// Sandboxed env passed to git and bun subprocesses. Strips every secret
94-// from the parent process — agent code never sees FORGEJO_ADMIN_TOKEN,
95-// GITHUB_CLIENT_SECRET, or SESSION_SECRET. PATH is fixed; HOME and TMPDIR
96-// stay inside the per-run temp dir so dotfile writes can't escape.
97-const sandboxEnv = (cwd: string): Record<string, string> => ({
98- PATH: "/usr/local/bin:/usr/bin:/bin",
99- HOME: cwd,
100- TMPDIR: cwd,
101- NODE_ENV: "test",
102-});
103-
104-const runProc = async (
105- cmd: string[],
106- cwd: string,
107- timeoutMs: number,
108-): Promise<{ stdout: string; stderr: string; exitCode: number; timedOut: boolean }> => {
109- const proc = Bun.spawn(cmd, {
110- cwd,
111- stdout: "pipe",
112- stderr: "pipe",
113- env: sandboxEnv(cwd),
114- });
115- let timedOut = false;
116- const timer = setTimeout(() => {
117- timedOut = true;
118- proc.kill("SIGKILL");
119- }, timeoutMs);
120- const exitCode = await proc.exited;
121- clearTimeout(timer);
122- const stdout = await new Response(proc.stdout).text();
123- const stderr = await new Response(proc.stderr).text();
124- return { stdout: stdout.trim(), stderr: stderr.trim(), exitCode, timedOut };
125-};
126-
127-const runTests = async (cwd: string): Promise<boolean> => {
128- const r = await runProc(["bun", "test"], cwd, TEST_TIMEOUT_MS);
129- // Bun test exits 0 only when all tests pass.
130- return !r.timedOut && r.exitCode === 0;
131-};
132-
133-// Language-agnostic test-file counter for trace-only mode. Uses git
134-// ls-tree at the given sha so we don't have to checkout the working
135-// tree. Matches conventional test-file naming across ecosystems:
136-// foo.test.ts, foo.spec.ts, FooTests.cs, FooTest.java, test_foo.py,
137-// foo_test.go, FooSpec.scala, foo_spec.rb.
138-const countTestFiles = async (cwd: string, sha: string): Promise<number> => {
139- const r = await runProc(["git", "ls-tree", "-r", "--name-only", sha], cwd, 5000);
140- if (r.exitCode !== 0) return 0;
141- const re = /(?:^|\/)(?:[^/]*\.(?:test|spec)\.[a-z]+|[Tt]ests?\/[^/]+|test_[^/]+|[^/]+_test\.[a-z]+|[^/]+[Tt]ests?\.cs|[^/]+[Tt]est\.java)$/;
142- let count = 0;
143- for (const line of r.stdout.split("\n")) {
144- if (re.test(line)) count++;
145- }
146- return count;
147-};
148-
149-// Count `test(` / `it(` calls in tracked *.test.ts files. Used to detect
150-// when an agent deletes tests between red and green to make a regression
151-// "pass" — a cardinal TDD sin per the kata spec.
152-const countTests = async (cwd: string): Promise<number> => {
153- const r = await runProc(["git", "ls-files", "*.test.ts"], cwd, 5000);
154- if (r.exitCode !== 0) return 0;
155- const files = r.stdout.split("\n").filter((f) => f && !f.includes("__hidden_"));
156- let count = 0;
157- for (const f of files) {
158- const content = await Bun.file(join(cwd, f))
159- .text()
160- .catch(() => "");
161- const matches = content.match(/\b(?:test|it)\s*\(/g);
162- if (matches) count += matches.length;
163- }
164- return count;
165-};
166-
167-// Runs the kata's authoritative tests against the agent's implementation
168-// at whatever commit is currently checked out. Copies the hidden test
169-// file into the working tree under a __hidden__ prefix so it doesn't
170-// collide with the agent's filenames, runs only that file, then deletes
171-// it. Returns null if the kata doesn't have hidden tests for this step.
172-const runHiddenTests = async (cwd: string, spec: Game, stepId: string): Promise<boolean | null> => {
173- const stepDef = spec.steps.find((s) => s.id === stepId);
174- if (!stepDef) return null;
175- const sourcePath = `./content/games/${spec.id}/${stepDef.hiddenTestFile}`;
176- const sourceFile = Bun.file(sourcePath);
177- if (!(await sourceFile.exists())) return null;
178- const content = await sourceFile.text();
179- const targetName = `__hidden_${stepId}__.test.ts`;
180- const targetPath = join(cwd, targetName);
181- await Bun.write(targetPath, content);
182- try {
183- const r = await runProc(["bun", "test", targetName], cwd, TEST_TIMEOUT_MS);
184- return !r.timedOut && r.exitCode === 0;
185- } finally {
186- try {
187- rmSync(targetPath, { force: true });
188- } catch {
189- // best effort
190- }
191- }
192-};
193-
194-interface CommitInfo {
195- sha: string;
196- phase: Phase;
197- step: string | null;
198-}
199-
200-const readCommits = async (cwd: string): Promise<CommitInfo[]> => {
201- const r = await runProc(["git", "log", "--reverse", "--pretty=format:%H%x1f%B%x1e"], cwd, 10000);
202- if (r.exitCode !== 0) return [];
203- const out: CommitInfo[] = [];
204- for (const block of r.stdout.split("\x1e")) {
205- const t = block.trim();
206- if (!t) continue;
207- const [sha, message = ""] = t.split("\x1f");
208- if (!sha) continue;
209- const p = parseCommit(message);
210- out.push({ sha, phase: p.phase, step: p.step });
211- }
212- return out;
213-};
214-
215-export const judge = async (owner: string, repo: string): Promise<Verdict> => {
216- const cwd = mkdtempSync(join(tmpdir(), `judge-${owner}-${repo}-`));
217- try {
218- // Agent repos default to private. Authenticate via admin token in
219- // an http.extraheader so the token isn't persisted in the cloned
220- // repo's config (extraheader applies to the clone request only).
221- const cloneUrl = `${FORGEJO_INTERNAL}/${owner}/${repo}.git`;
222- const adminToken = process.env.FORGEJO_ADMIN_TOKEN;
223- const gitArgs = adminToken
224- ? ["-c", `http.extraheader=Authorization: token ${adminToken}`, "clone", "--quiet", cloneUrl, "."]
225- : ["clone", "--quiet", cloneUrl, "."];
226- const cloneR = await runProc(["git", ...gitArgs], cwd, 30000);
227- if (cloneR.exitCode !== 0) {
228- throw new Error(`clone failed: ${cloneR.stderr || cloneR.stdout}`);
229- }
230-
231- const commits = await readCommits(cwd);
232- const headR = await runProc(["git", "rev-parse", "HEAD"], cwd, 5000);
233- const headSha = headR.stdout;
234-
235- // First red per step + first green-after-red per step (chronological).
236- const stepRed = new Map<string, string>();
237- const stepGreen = new Map<string, string>();
238- for (const c of commits) {
239- if (!c.step) continue;
240- if (c.phase === "red" && !stepRed.has(c.step)) {
241- stepRed.set(c.step, c.sha);
242- } else if (c.phase === "green" && stepRed.has(c.step) && !stepGreen.has(c.step)) {
243- stepGreen.set(c.step, c.sha);
244- }
245- }
246-
247- // Read the agent's mode + runner preferences from tdd.config.json.
248- const { mode, testRunner } = await readConfig(cwd);
249-
250- // Load the kata's authoritative spec — used to fetch hidden tests
251- // per step. Repos that don't match a known kata get scored on red→green
252- // discipline only (no hidden-test verification).
253- let spec: Game | null = null;
254- try {
255- spec = await loadGame(repo);
256- } catch {
257- spec = null;
258- }
259-
260- const steps: StepVerdict[] = [];
261- for (const [stepId, redSha] of stepRed) {
262- const greenSha = stepGreen.get(stepId) ?? null;
263-
264- if (testRunner === "none") {
265- // Trace-only path: don't checkout, don't run anything. Score
266- // purely from the commit log + a language-agnostic test-file
267- // count via `git ls-tree`. Useful for non-Bun projects.
268- const redFiles = await countTestFiles(cwd, redSha);
269- const greenFiles = greenSha ? await countTestFiles(cwd, greenSha) : redFiles;
270- const filesShrank = greenSha !== null && greenFiles < redFiles;
271-
272- let status: StepVerdict["status"];
273- let baseDelta = 0;
274- if (greenSha === null) {
275- status = "no-green";
276- } else if (filesShrank) {
277- status = "trace-tests-shrunk";
278- baseDelta = -10;
279- } else {
280- status = "trace-verified";
281- baseDelta = 10;
282- }
283- const scoreDelta = applyMode(baseDelta, mode);
284- const explanation = explainStep({ status, redSha, greenSha, hiddenPassed: null, mode });
285- steps.push({
286- stepId, redSha, greenSha,
287- redFailed: null, greenPassed: null, hiddenPassed: null,
288- status, scoreDelta, explanation,
289- });
290- continue;
291- }
292-
293- await runProc(["git", "checkout", "--quiet", redSha], cwd, 5000);
294- const redTestCount = await countTests(cwd);
295- const redPassed = await runTests(cwd);
296- const redFailed = !redPassed;
297- let greenPassed: boolean | null = null;
298- let hiddenPassed: boolean | null = null;
299- let testsDeleted = false;
300- if (greenSha) {
301- await runProc(["git", "checkout", "--quiet", greenSha], cwd, 5000);
302- const greenTestCount = await countTests(cwd);
303- testsDeleted = greenTestCount < redTestCount;
304- greenPassed = await runTests(cwd);
305- if (greenPassed && spec && !testsDeleted) {
306- hiddenPassed = await runHiddenTests(cwd, spec, stepId);
307- }
308- }
309-
310- let status: StepVerdict["status"];
311- let baseDelta = 0;
312- if (greenSha === null) {
313- status = "no-green";
314- } else if (testsDeleted) {
315- status = "test-deleted";
316- baseDelta = -20;
317- } else if (!redFailed) {
318- status = "red-did-not-fail";
319- baseDelta = -5;
320- } else if (greenPassed === false) {
321- status = "green-did-not-pass";
322- baseDelta = -5;
323- } else if (hiddenPassed === false) {
324- status = "hidden-tests-failed";
325- baseDelta = 0;
326- } else if (hiddenPassed === true) {
327- status = "verified";
328- baseDelta = 20;
329- } else {
330- status = "discipline-only";
331- baseDelta = 5;
332- }
333- const scoreDelta = applyMode(baseDelta, mode);
334- const explanation = explainStep({ status, redSha, greenSha, hiddenPassed, mode });
335- steps.push({ stepId, redSha, greenSha, redFailed, greenPassed, hiddenPassed, status, scoreDelta, explanation });
336- }
337-
338- // Refactor commits aren't tied to red→green pairs: the spec rewards
339- // any refactor that keeps the existing tests green. A broken refactor
340- // (tests fail at the refactor commit) costs the same as a missed
341- // green — discipline matters even outside red→green pairs.
342- const refactors: RefactorVerdict[] = [];
343- for (const c of commits) {
344- if (c.phase !== "refactor") continue;
345- await runProc(["git", "checkout", "--quiet", c.sha], cwd, 5000);
346- const passed = await runTests(cwd);
347- const baseDelta = passed ? 5 : -5;
348- refactors.push({
349- sha: c.sha,
350- stepId: c.step,
351- testsPassed: passed,
352- scoreDelta: applyMode(baseDelta, mode),
353- explanation: explainRefactor(passed),
354- });
355- }
356-
357- const totalScore =
358- steps.reduce((a, s) => a + s.scoreDelta, 0) +
359- refactors.reduce((a, r) => a + r.scoreDelta, 0);
360- const verdict: Verdict = { headSha, mode, steps, refactors, totalScore, judgedAt: Date.now() };
361- saveRun(owner, repo, verdict);
362- return verdict;
363- } finally {
364- try {
365- rmSync(cwd, { recursive: true, force: true });
366- } catch {
367- // best effort cleanup
368- }
369- }
370-};
removed src/projects.ts +0 −271
@@ -1,271 +0,0 @@
1-import type { ProjectRow } from "./db";
2-
3-// Project-tracking ingest contract — block 1 of the reporting pipeline.
4-//
5-// A "project" is a real repo whose pushes get scored on TDD discipline.
6-// Distinct from a kata: katas are the practice ground (fixed steps,
7-// hidden tests); projects are production code judged purely structurally.
8-//
9-// Onboarding: a repo opts in by adding `.tdd-md.json` at its root on the
10-// default branch. tdd.md fetches the file (via raw.githubusercontent),
11-// validates it, and registers the project in our SQLite store. Per-commit
12-// judging follows in a later sliver — this module covers config + ingest
13-// of the registration itself.
14-
15-export const PROJECT_CONFIG_PATH = ".tdd-md.json";
16-export const PROJECT_CONFIG_VERSION = 1;
17-
18-export type TestRunner = "none" | "bun";
19-export type AgentSlug = "claude-code" | "cursor" | "aider" | "unknown";
20-
21-export interface ProjectConfig {
22- version: number;
23- // "none" → trace-mode judging only (commit discipline, no test execution).
24- // "bun" → full sandbox-runner judging (later sliver — registration accepts
25- // the value but judging stays trace-only until the runner ships).
26- test_runner: TestRunner;
27- // Branches whose pushes get scored. Defaults to ["main"].
28- tracked_branches: string[];
29- // Optional reporting metadata.
30- display_name?: string;
31- team?: string;
32-}
33-
34-export const DEFAULT_CONFIG: ProjectConfig = {
35- version: PROJECT_CONFIG_VERSION,
36- test_runner: "none",
37- tracked_branches: ["main"],
38-};
39-
40-// Validates and normalises a parsed JSON blob into a ProjectConfig.
41-// Throws with a human-readable message on failure — those messages are
42-// surfaced verbatim to the registering user, so they need to be useful.
43-export const parseProjectConfig = (raw: unknown): ProjectConfig => {
44- if (!raw || typeof raw !== "object") {
45- throw new Error(".tdd-md.json must be a JSON object");
46- }
47- const obj = raw as Record<string, unknown>;
48- const version = obj.version;
49- if (typeof version !== "number" || version !== PROJECT_CONFIG_VERSION) {
50- throw new Error(
51- `.tdd-md.json has version ${JSON.stringify(version)}; expected ${PROJECT_CONFIG_VERSION}`,
52- );
53- }
54- let testRunner: TestRunner = "none";
55- if (obj.test_runner !== undefined) {
56- if (obj.test_runner !== "none" && obj.test_runner !== "bun") {
57- throw new Error(
58- `.tdd-md.json: test_runner must be "none" or "bun" (got ${JSON.stringify(obj.test_runner)})`,
59- );
60- }
61- testRunner = obj.test_runner;
62- }
63- let trackedBranches: string[] = ["main"];
64- if (obj.tracked_branches !== undefined) {
65- if (!Array.isArray(obj.tracked_branches) || obj.tracked_branches.some((b) => typeof b !== "string" || !b)) {
66- throw new Error(".tdd-md.json: tracked_branches must be a non-empty array of branch names");
67- }
68- trackedBranches = obj.tracked_branches as string[];
69- }
70- const config: ProjectConfig = {
71- version,
72- test_runner: testRunner,
73- tracked_branches: trackedBranches,
74- };
75- if (typeof obj.display_name === "string" && obj.display_name) {
76- config.display_name = obj.display_name;
77- }
78- if (typeof obj.team === "string" && obj.team) {
79- config.team = obj.team;
80- }
81- return config;
82-};
83-
84-// Pulls .tdd-md.json from a public GitHub repo's default branch via the
85-// raw-content host. No auth — public-repo only for now (private repos
86-// land when we install a GitHub App, deferred to a later sliver).
87-export const fetchProjectConfig = async (
88- repoOwner: string,
89- repoName: string,
90-): Promise<ProjectConfig> => {
91- const url = `https://raw.githubusercontent.com/${encodeURIComponent(repoOwner)}/${encodeURIComponent(repoName)}/HEAD/${PROJECT_CONFIG_PATH}`;
92- const res = await fetch(url, {
93- headers: { Accept: "application/json", "User-Agent": "tdd.md" },
94- });
95- if (res.status === 404) {
96- throw new Error(
97- `${PROJECT_CONFIG_PATH} not found in ${repoOwner}/${repoName} on the default branch (or the repo is private; private repos aren't supported yet).`,
98- );
99- }
100- if (!res.ok) {
101- throw new Error(
102- `Couldn't fetch ${PROJECT_CONFIG_PATH} from ${repoOwner}/${repoName}: HTTP ${res.status}`,
103- );
104- }
105- let parsed: unknown;
106- try {
107- parsed = await res.json();
108- } catch {
109- throw new Error(`${PROJECT_CONFIG_PATH} in ${repoOwner}/${repoName} isn't valid JSON`);
110- }
111- return parseProjectConfig(parsed);
112-};
113-
114-// Parse a GitHub repo URL or owner/repo shorthand. Accepts:
115-// https://github.com/syntaxai/tdd.md
116-// https://github.com/syntaxai/tdd.md.git
117-// github.com/syntaxai/tdd.md
118-// syntaxai/tdd.md
119-// Returns the owner + repo or throws with a precise message.
120-export const parseRepoIdentifier = (raw: string): { owner: string; repo: string } => {
121- const trimmed = raw.trim();
122- if (!trimmed) throw new Error("Repository URL is required.");
123- let path = trimmed;
124- const httpsMatch = path.match(/^https?:\/\/(?:www\.)?github\.com\/(.+)$/i);
125- if (httpsMatch?.[1]) path = httpsMatch[1];
126- const bareMatch = path.match(/^github\.com\/(.+)$/i);
127- if (bareMatch?.[1]) path = bareMatch[1];
128- path = path.replace(/\.git$/i, "").replace(/\/+$/, "");
129- const parts = path.split("/").filter(Boolean);
130- const owner = parts[0];
131- const repo = parts[1];
132- if (parts.length !== 2 || !owner || !repo) {
133- throw new Error(
134- `Couldn't parse "${raw}" as a GitHub repo. Use a URL like https://github.com/owner/name or the shorthand owner/name.`,
135- );
136- }
137- if (!/^[A-Za-z0-9._-]+$/.test(owner) || !/^[A-Za-z0-9._-]+$/.test(repo)) {
138- throw new Error(`"${raw}" contains characters that aren't valid for a GitHub owner/repo.`);
139- }
140- return { owner, repo };
141-};
142-
143-const escape = (s: string): string =>
144- s.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
145-
146-const projectListRow = (p: ProjectRow): string => {
147- const slug = `${p.repoOwner}/${p.repoName}`;
148- const display = p.displayName ?? slug;
149- const team = p.team ? ` <span class="muted">· ${escape(p.team)}</span>` : "";
150- const branches = p.trackedBranches.map((b) => `\`${b}\``).join(", ");
151- const runner = p.testRunner === "none" ? "trace-only" : p.testRunner;
152- return `| [${escape(display)}](/projects/${p.repoOwner}/${p.repoName}) ${team} | ${branches} | ${runner} |`;
153-};
154-
155-export const projectsLandingMd = (projects: ProjectRow[]): string => {
156- const rows = projects.length === 0
157- ? `| _no projects yet — [register one](/projects/new)_ | | |`
158- : projects.map(projectListRow).join("\n");
159- return `# projects
160-
161-> Real repos that opted in to tdd.md scoring. Each project drops \`${PROJECT_CONFIG_PATH}\` at its root, registers here, and from then on its commits on tracked branches get judged structurally — red-fails, green-passes, no test-deletion, no regression. The aggregated scores feed [the reports](/reports).
162-
163-## tracked
164-
165-| project | branches | runner |
166-|---|---|---|
167-${rows}
168-
169-## register a repo
170-
171-[Register a project →](/projects/new) — paste a public GitHub URL; tdd.md fetches \`${PROJECT_CONFIG_PATH}\` from the default branch and onboards it.
172-
173-## the config file
174-
175-Drop \`${PROJECT_CONFIG_PATH}\` at the root of your repo's default branch:
176-
177-\`\`\`json
178-{
179- "version": 1,
180- "test_runner": "none",
181- "tracked_branches": ["main"],
182- "display_name": "API Gateway",
183- "team": "platform"
184-}
185-\`\`\`
186-
187-- **\`test_runner\`** — \`"none"\` for trace-mode (commit-discipline only, language-agnostic). \`"bun"\` will run the test suite once the sandbox-runner ships.
188-- **\`tracked_branches\`** — pushes to these branches get scored. Defaults to \`["main"]\`.
189-- **\`display_name\`** / **\`team\`** — optional, only used in the reporting UI.
190-
191-## what comes next
192-
193-Registration just stores the project. Per-commit judging (the part that produces score data for the reports) lands in the next sliver — until then the [report pages](/reports) keep showing the demo dataset.
194-
195-[← back to tdd.md](/) · [the reports](/reports)
196-`;
197-};
198-
199-export const projectRegisterMd = (
200- viewer: string | null,
201- prefilled?: string,
202- errorMessage?: string,
203-): string => {
204- if (!viewer) {
205- return `# register a project
206-
207-> You need to sign in before registering a project. We use your GitHub identity to record who onboarded the repo.
208-
209-[ sign in with github → ](/auth/github/start)
210-
211-[← all projects](/projects)
212-`;
213- }
214- const error = errorMessage
215- ? `<div class="project-form-error"><strong>Couldn't register that repo:</strong><br>${escape(errorMessage)}</div>`
216- : "";
217- const value = prefilled ? ` value="${escape(prefilled)}"` : "";
218- return `# register a project
219-
220-> Paste a public GitHub URL. tdd.md fetches \`${PROJECT_CONFIG_PATH}\` from its default branch, validates it, and onboards the repo. Re-register the same repo to refresh the config.
221-
222-${error}
223-
224-<form method="post" action="/projects/new" class="project-form">
225- <label for="repo-url">Repository URL or <code>owner/name</code></label>
226- <input id="repo-url" name="repo" type="text" required
227- placeholder="https://github.com/owner/name"
228- autocomplete="off" autocapitalize="off" autocorrect="off"${value} />
229- <button type="submit">Register</button>
230-</form>
231-
232-> Signed in as <code>${escape(viewer)}</code>. Don't have \`${PROJECT_CONFIG_PATH}\` yet? [See the format on /projects](/projects#the-config-file).
233-
234-[← all projects](/projects)
235-`;
236-};
237-
238-export const projectDetailMd = (p: ProjectRow): string => {
239- const display = p.displayName ?? `${p.repoOwner}/${p.repoName}`;
240- const registeredAt = new Date(p.registeredAt).toISOString().slice(0, 10);
241- const branches = p.trackedBranches.map((b) => `\`${b}\``).join(", ");
242- const runnerNote = p.testRunner === "none"
243- ? "Trace-mode — judging looks at commit phase tags, test-count drift, and refactor stability. No test execution."
244- : "Bun runner — test suite executes in a sandbox at every tracked-branch commit. (Sandbox-runner ships in the next sliver; meanwhile this falls back to trace-mode.)";
245- return `# ${escape(display)}
246-
247-> [${escape(p.repoOwner)}/${escape(p.repoName)}](https://github.com/${p.repoOwner}/${p.repoName}) · registered by [${escape(p.registeredBy)}](/agents/${p.registeredBy}) on ${registeredAt}.
248-
249-## config
250-
251-| key | value |
252-|---|---|
253-| test_runner | \`${p.testRunner}\` |
254-| tracked_branches | ${branches} |
255-| display_name | ${p.displayName ? `\`${escape(p.displayName)}\`` : "_(none)_"} |
256-| team | ${p.team ? `\`${escape(p.team)}\`` : "_(none)_"} |
257-| status | \`${p.status}\` |
258-
259-${runnerNote}
260-
261-## scored commits
262-
263-> _No commits judged yet._ The webhook ingest + judging pipeline lands in the next sliver — once it does, scored commits for tracked branches will appear here grouped by agent.
264-
265-## refresh
266-
267-Push an updated \`${PROJECT_CONFIG_PATH}\` to your default branch and [re-register](/projects/new?repo=${encodeURIComponent(`${p.repoOwner}/${p.repoName}`)}) to pick up the new config.
268-
269-[← all projects](/projects)
270-`;
271-};
removed src/render.ts +0 −76
@@ -1,76 +0,0 @@
1-import { marked } from "marked";
2-
3-const STYLE_CSS = "./public/style.css";
4-const css = await Bun.file(STYLE_CSS).text();
5-
6-export type Section = "home" | "games" | "guides" | "blog" | "agents" | "leaderboard";
7-
8-export interface PageOptions {
9- title: string;
10- bodyMarkdown: string;
11- description?: string;
12- ogPath?: string;
13- active?: Section;
14- noindex?: boolean;
15- jsonLd?: Record<string, unknown>;
16-}
17-
18-const SITE_DESCRIPTION = "Test-driven development for agentic coding. Scored katas, public verdicts.";
19-
20-const escape = (s: string): string =>
21- s.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
22-
23-const navLink = (href: string, label: string, active: boolean): string => {
24- const cls = active ? ' class="nav-active"' : "";
25- return `<a href="${href}"${cls}>${label}</a>`;
26-};
27-
28-const nav = (active?: Section): string => `<nav class="md-nav">${navLink("/", "tdd.md", active === "home")} <span class="md-nav-sep">·</span> ${navLink("/games", "games", active === "games")} <span class="md-nav-sep">·</span> ${navLink("/guides", "guides", active === "guides")} <span class="md-nav-sep">·</span> ${navLink("/blog", "blog", active === "blog")} <span class="md-nav-sep">·</span> ${navLink("/agents", "agents", active === "agents")} <span class="md-nav-sep">·</span> ${navLink("/leaderboard", "leaderboard", active === "leaderboard")}</nav>`;
29-
30-export const renderPage = async (opts: PageOptions): Promise<string> => {
31- const body = await marked.parse(opts.bodyMarkdown, { gfm: true, breaks: false });
32- const description = opts.description ?? SITE_DESCRIPTION;
33- const ogPath = opts.ogPath ?? "https://tdd.md";
34- const robots = opts.noindex ? `<meta name="robots" content="noindex,nofollow">\n` : "";
35- const jsonLd = opts.jsonLd
36- ? `<script type="application/ld+json">${JSON.stringify(opts.jsonLd)}</script>\n`
37- : "";
38- return `<!doctype html>
39-<html lang="en">
40-<head>
41-<meta charset="utf-8">
42-<meta name="viewport" content="width=device-width,initial-scale=1">
43-<meta name="color-scheme" content="dark light">
44-<meta name="description" content="${escape(description)}">
45-${robots}<link rel="canonical" href="${escape(ogPath)}">
46-<meta property="og:title" content="${escape(opts.title)}">
47-<meta property="og:description" content="${escape(description)}">
48-<meta property="og:type" content="website">
49-<meta property="og:url" content="${escape(ogPath)}">
50-<meta property="og:image" content="https://tdd.md/og.svg">
51-<meta property="og:image:type" content="image/svg+xml">
52-<meta property="og:image:width" content="1200">
53-<meta property="og:image:height" content="630">
54-<meta property="og:site_name" content="tdd.md">
55-<meta name="twitter:card" content="summary_large_image">
56-<meta name="twitter:title" content="${escape(opts.title)}">
57-<meta name="twitter:description" content="${escape(description)}">
58-<meta name="twitter:image" content="https://tdd.md/og.svg">
59-<title>${escape(opts.title)}</title>
60-${jsonLd}<style>${css}</style>
61-</head>
62-<body>
63-${nav(opts.active)}
64-<main class="md">
65-${body}
66-</main>
67-</body>
68-</html>`;
69-};
70-
71-export const renderNotFound = async (path: string): Promise<string> =>
72- renderPage({
73- title: "404 — tdd.md",
74- bodyMarkdown: `# 404\n\n> No such path: \`${path}\`\n\nTry [home](/), [games](/games), [agents](/agents), or [leaderboard](/leaderboard).`,
75- noindex: true,
76- });
removed src/reports.ts +0 −476
@@ -1,476 +0,0 @@
1-// Mockup reporting layer for tdd.md.
2-//
3-// All data here is FAKE — wired up only so the management/exec view and
4-// per-agent drill-down can be designed in the browser before the real
5-// project-tracking pipeline (block 1) exists.
6-//
7-// Real reporting needs:
8-// - GitHub App / webhook ingest of pushes on tracked branches
9-// - per-commit judging without hidden tests (red-fails / green-passes /
10-// no-test-deletion / no-regression)
11-// - agent attribution (commit footer convention or wrapper-driven)
12-// Once that exists, the same generators in this file accept real data.
13-
14-interface RecentFlagged {
15- date: string;
16- repo: string;
17- sha: string;
18- phase: "red" | "green" | "refactor";
19- failure: string;
20- pts: number;
21-}
22-
23-interface FailureSlice {
24- label: string;
25- pct: number;
26- tone: "red" | "green" | "muted" | "accent";
27-}
28-
29-export interface AgentReport {
30- slug: "claude-code" | "cursor" | "aider";
31- name: string;
32- score: number;
33- delta: number;
34- commits: number;
35- phaseCoveragePct: number;
36- streak: number;
37- streakBroken: boolean;
38- topIssueLabel: string;
39- topIssuePct: number;
40- failureMix: FailureSlice[];
41- trend: number[];
42- recent: RecentFlagged[];
43-}
44-
45-export const DEMO_PERIOD = "2026-01-01 → 2026-03-31";
46-export const DEMO_ORG = "acme-corp";
47-export const DEMO_REPOS = 4;
48-
49-interface TestFailure {
50- test: string;
51- since: string;
52- flaky?: boolean;
53-}
54-
55-interface TestSnapshot {
56- repo: string;
57- branch: string;
58- total: number;
59- passing: number;
60- failing: number;
61- failures: TestFailure[];
62-}
63-
64-interface TestStability {
65- test: string;
66- repo: string;
67- pass: number;
68- fail: number;
69- deleted: number;
70- lastBrokenBy: AgentReport["slug"];
71- flagged?: boolean;
72-}
73-
74-export const DEMO_SNAPSHOTS: TestSnapshot[] = [
75- {
76- repo: "api-gateway",
77- branch: "main",
78- total: 247,
79- passing: 245,
80- failing: 2,
81- failures: [
82- { test: "rate-limit.spec.ts > resets at midnight UTC", since: "2026-03-26" },
83- { test: "webhook.spec.ts > retries on 5xx with backoff", since: "2026-03-28" },
84- ],
85- },
86- {
87- repo: "billing-service",
88- branch: "main",
89- total: 89,
90- passing: 89,
91- failing: 0,
92- failures: [],
93- },
94- {
95- repo: "data-pipeline",
96- branch: "main",
97- total: 156,
98- passing: 154,
99- failing: 2,
100- failures: [
101- { test: "ingest.spec.ts > handles malformed CSV row", since: "2026-03-22" },
102- { test: "ingest.spec.ts > deduplicates by hash", since: "2026-03-22" },
103- ],
104- },
105- {
106- repo: "frontend-web",
107- branch: "main",
108- total: 312,
109- passing: 310,
110- failing: 2,
111- failures: [
112- { test: "checkout.spec.ts > handles network timeout", since: "2026-03-15", flaky: true },
113- { test: "login.spec.ts > redirects after auth", since: "2026-03-11", flaky: true },
114- ],
115- },
116-];
117-
118-export const DEMO_STABILITY: TestStability[] = [
119- { test: "webhook.spec.ts > retries on 5xx with backoff", repo: "api-gateway", pass: 33, fail: 11, deleted: 0, lastBrokenBy: "cursor", flagged: true },
120- { test: "checkout.spec.ts > handles network timeout", repo: "frontend-web", pass: 51, fail: 8, deleted: 0, lastBrokenBy: "cursor", flagged: true },
121- { test: "rate-limit.spec.ts > resets at midnight UTC", repo: "api-gateway", pass: 42, fail: 6, deleted: 0, lastBrokenBy: "claude-code" },
122- { test: "login.spec.ts > redirects after auth", repo: "frontend-web", pass: 44, fail: 5, deleted: 1, lastBrokenBy: "cursor", flagged: true },
123- { test: "ingest.spec.ts > handles malformed CSV row", repo: "data-pipeline", pass: 38, fail: 4, deleted: 0, lastBrokenBy: "aider" },
124- { test: "auth.spec.ts > validates JWT signature", repo: "api-gateway", pass: 47, fail: 3, deleted: 0, lastBrokenBy: "claude-code" },
125- { test: "ingest.spec.ts > deduplicates by hash", repo: "data-pipeline", pass: 30, fail: 3, deleted: 0, lastBrokenBy: "aider" },
126- { test: "billing.spec.ts > applies tax bracket", repo: "billing-service", pass: 29, fail: 2, deleted: 0, lastBrokenBy: "claude-code" },
127- { test: "webhook.spec.ts > signs payload with HMAC", repo: "api-gateway", pass: 35, fail: 2, deleted: 1, lastBrokenBy: "cursor", flagged: true },
128- { test: "billing.spec.ts > computes monthly total", repo: "billing-service", pass: 28, fail: 1, deleted: 1, lastBrokenBy: "cursor", flagged: true },
129- { test: "invoice.spec.ts > generates PDF receipt", repo: "billing-service", pass: 25, fail: 1, deleted: 0, lastBrokenBy: "claude-code" },
130- { test: "pricing.spec.ts > rounds to nearest cent", repo: "billing-service", pass: 26, fail: 1, deleted: 0, lastBrokenBy: "aider" },
131-];
132-
133-export const DEMO_REPORTS: AgentReport[] = [
134- {
135- slug: "claude-code",
136- name: "Claude Code",
137- score: 78,
138- delta: +6,
139- commits: 612,
140- phaseCoveragePct: 92,
141- streak: 47,
142- streakBroken: false,
143- topIssueLabel: "red-did-not-fail",
144- topIssuePct: 8,
145- failureMix: [
146- { label: "clean cycles", pct: 84, tone: "green" },
147- { label: "red-did-not-fail", pct: 8, tone: "red" },
148- { label: "broken refactor", pct: 4, tone: "red" },
149- { label: "test-deleted", pct: 2, tone: "red" },
150- { label: "no phase tag", pct: 2, tone: "muted" },
151- ],
152- trend: [72, 73, 71, 74, 72, 75, 73, 75, 77, 76, 75, 76, 78, 77, 79, 78, 77, 79, 80, 78, 79, 80, 79, 81, 80, 82, 81, 80, 79, 78],
153- recent: [
154- { date: "2026-03-29", repo: "api-gateway", sha: "f1c8b3a", phase: "red", failure: "red-did-not-fail", pts: -5 },
155- { date: "2026-03-24", repo: "billing-service", sha: "9d2e1f4", phase: "refactor", failure: "broken refactor", pts: -5 },
156- { date: "2026-03-18", repo: "data-pipeline", sha: "62a9cb7", phase: "green", failure: "no phase tag (parent)", pts: 0 },
157- ],
158- },
159- {
160- slug: "cursor",
161- name: "Cursor",
162- score: 54,
163- delta: -15,
164- commits: 489,
165- phaseCoveragePct: 71,
166- streak: 3,
167- streakBroken: true,
168- topIssueLabel: "test-deleted in refactor",
169- topIssuePct: 14,
170- failureMix: [
171- { label: "clean cycles", pct: 64, tone: "green" },
172- { label: "test-deleted", pct: 14, tone: "red" },
173- { label: "red-did-not-fail", pct: 9, tone: "red" },
174- { label: "broken refactor", pct: 7, tone: "red" },
175- { label: "no phase tag", pct: 6, tone: "muted" },
176- ],
177- trend: [69, 70, 71, 72, 70, 71, 72, 73, 72, 71, 72, 70, 68, 65, 60, 55, 50, 52, 54, 53, 56, 54, 52, 55, 53, 54, 56, 55, 54, 54],
178- recent: [
179- { date: "2026-03-28", repo: "api-gateway", sha: "a1b2c3d", phase: "refactor", failure: "test-deleted", pts: -20 },
180- { date: "2026-03-26", repo: "api-gateway", sha: "4e5f6a7", phase: "green", failure: "broken refactor", pts: -5 },
181- { date: "2026-03-23", repo: "billing-service", sha: "8b9c0d1", phase: "red", failure: "red-did-not-fail", pts: -5 },
182- { date: "2026-03-21", repo: "api-gateway", sha: "2e3f4a5", phase: "refactor", failure: "test-deleted", pts: -20 },
183- { date: "2026-03-19", repo: "data-pipeline", sha: "6b7c8d9", phase: "refactor", failure: "broken refactor", pts: -5 },
184- ],
185- },
186- {
187- slug: "aider",
188- name: "Aider",
189- score: 89,
190- delta: +2,
191- commits: 146,
192- phaseCoveragePct: 96,
193- streak: 89,
194- streakBroken: false,
195- topIssueLabel: "broken refactor",
196- topIssuePct: 3,
197- failureMix: [
198- { label: "clean cycles", pct: 94, tone: "green" },
199- { label: "broken refactor", pct: 3, tone: "red" },
200- { label: "red-did-not-fail", pct: 2, tone: "red" },
201- { label: "no phase tag", pct: 1, tone: "muted" },
202- ],
203- trend: [87, 88, 89, 88, 87, 89, 90, 89, 88, 89, 90, 88, 89, 90, 91, 89, 88, 89, 90, 89, 90, 91, 89, 88, 89, 90, 89, 90, 89, 89],
204- recent: [
205- { date: "2026-03-27", repo: "data-pipeline", sha: "3a4b5c6", phase: "refactor", failure: "broken refactor", pts: -5 },
206- { date: "2026-03-15", repo: "billing-service", sha: "7d8e9f0", phase: "red", failure: "red-did-not-fail", pts: -5 },
207- ],
208- },
209-];
210-
211-const escape = (s: string): string =>
212- s.replace(/&/g, "&amp;").replace(/"/g, "&quot;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
213-
214-const trendArrow = (delta: number): { glyph: string; cls: string } =>
215- delta > 0 ? { glyph: "↑", cls: "up" } : delta < 0 ? { glyph: "↓", cls: "down" } : { glyph: "→", cls: "flat" };
216-
217-const sparkline = (values: number[], height = 60, width = 320): string => {
218- if (values.length === 0) return "";
219- const min = Math.min(...values);
220- const max = Math.max(...values);
221- const range = Math.max(1, max - min);
222- const stepX = width / Math.max(1, values.length - 1);
223- const pad = 6;
224- const innerH = height - pad * 2;
225- const points = values
226- .map((v, i) => {
227- const x = (i * stepX).toFixed(1);
228- const y = (pad + innerH - ((v - min) / range) * innerH).toFixed(1);
229- return `${x},${y}`;
230- })
231- .join(" ");
232- return `<svg class="report-sparkline" viewBox="0 0 ${width} ${height}" preserveAspectRatio="none" aria-hidden="true">
233- <polyline fill="none" stroke="currentColor" stroke-width="1.5" points="${points}" />
234-</svg>`;
235-};
236-
237-const tile = (a: AgentReport): string => {
238- const arr = trendArrow(a.delta);
239- const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`;
240- return `<div class="report-tile">
241- <p class="report-tile-name"><a href="/reports/demo/agents/${a.slug}">${escape(a.name)}</a></p>
242- <p class="report-tile-score">${a.score}<span class="report-tile-score-suffix"> / 100</span></p>
243- <p class="report-tile-trend ${arr.cls}">${arr.glyph} ${escape(deltaStr)}</p>
244- <p class="report-tile-volume">${a.commits.toLocaleString()} commits</p>
245- <div class="report-tile-issue">top issue: <strong>${escape(a.topIssueLabel)}</strong> (${a.topIssuePct}%)</div>
246-</div>`;
247-};
248-
249-const bars = (mix: FailureSlice[]): string => {
250- const rows = mix
251- .map(
252- (s) => `<div class="report-bar-row">
253- <span class="report-bar-label">${escape(s.label)}</span>
254- <span class="report-bar-track"><span class="report-bar-fill ${s.tone}" style="width: ${s.pct}%"></span></span>
255- <span class="report-bar-pct">${s.pct}%</span>
256-</div>`,
257- )
258- .join("\n");
259- return `<div class="report-bars">${rows}</div>`;
260-};
261-
262-const streakBox = (a: AgentReport): string => {
263- const cls = a.streakBroken ? "broken" : a.streak >= 30 ? "long" : "";
264- const label = a.streakBroken ? "recent break" : "consecutive clean cycles";
265- return `<span class="report-streak ${cls}"><span class="report-streak-num">${a.streak}</span> ${label}</span>`;
266-};
267-
268-const mockBanner = `<div class="report-mockup-banner">demo data — real reporting wires up when the project-tracking pipeline ships. <a href="/blog/tweag-handbook-tdd">why tdd.md needs this</a> · <a href="/reports">about reporting</a></div>`;
269-
270-const snapshotBlock = (s: TestSnapshot): string => {
271- const failuresHtml = s.failures.length === 0
272- ? `<li class="test-list-pass">all ${s.passing} tests groen</li>`
273- : s.failures
274- .map(
275- (f) =>
276- `<li class="test-list-fail">${escape(f.test)} <span class="test-list-meta">${f.flaky ? "intermittent · " : ""}sinds ${f.since}</span></li>`,
277- )
278- .concat([`<li class="test-list-collapsed">+ ${s.passing.toLocaleString()} passing tests</li>`])
279- .join("\n");
280- const statusCls = s.failing === 0 ? "ok" : "bad";
281- return `<div class="test-snapshot ${statusCls}">
282- <p class="test-snapshot-head"><strong>${escape(s.repo)}</strong> <span class="test-snapshot-branch">@ ${escape(s.branch)}</span></p>
283- <p class="test-snapshot-stats">${s.total.toLocaleString()} tests · <span class="green">${s.passing.toLocaleString()} passing</span>${s.failing > 0 ? ` · <span class="red">${s.failing.toLocaleString()} failing</span>` : ""}</p>
284- <ul class="test-list">
285-${failuresHtml}
286- </ul>
287-</div>`;
288-};
289-
290-const agentTagHtml = (slug: AgentReport["slug"]): string => {
291- const name = DEMO_REPORTS.find((r) => r.slug === slug)?.name ?? slug;
292- return `<a class="agent-tag" href="/reports/demo/agents/${slug}">${escape(name)}</a>`;
293-};
294-
295-const stabilityRow = (s: TestStability): string => {
296- const cls = s.flagged ? "test-stab-row flagged" : "test-stab-row";
297- const warn = s.flagged ? ` <span class="test-stab-warn" title="test-deletion of weakening dit kwartaal">⚠</span>` : "";
298- return `<tr class="${cls}">
299- <td class="test-stab-name">${escape(s.test)}<div class="test-stab-repo">${escape(s.repo)}</div></td>
300- <td class="test-stab-num green">${s.pass}</td>
301- <td class="test-stab-num ${s.fail >= 8 ? "red" : ""}">${s.fail}</td>
302- <td class="test-stab-num ${s.deleted > 0 ? "red" : ""}">${s.deleted}</td>
303- <td class="test-stab-by">${agentTagHtml(s.lastBrokenBy)}${warn}</td>
304-</tr>`;
305-};
306-
307-export const testsOverviewMd = (): string => {
308- const total = DEMO_SNAPSHOTS.reduce((s, r) => s + r.total, 0);
309- const passing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.passing, 0);
310- const failing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.failing, 0);
311- const snapshots = DEMO_SNAPSHOTS.map(snapshotBlock).join("\n");
312- const stabRows = DEMO_STABILITY.map(stabilityRow).join("\n");
313- return `# tests overzicht
314-
315-${mockBanner}
316-
317-> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${DEMO_PERIOD}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen".
318-
319-## huidige stand · per repo
320-
321-<div class="test-snapshots">
322-${snapshots}
323-</div>
324-
325-**Totaal**: ${total.toLocaleString()} tests · <span class="green">${passing.toLocaleString()} passing</span> · <span class="${failing > 0 ? "red" : "muted"}">${failing.toLocaleString()} failing</span>.
326-
327-## test-stabiliteit · q1 2026
328-
329-Top 12 meest-flappende tests dit kwartaal, met aantal pass/fail/deleted-events en de agent die de test het laatst heeft gebroken.
330-
331-<table class="test-stability">
332-<thead>
333- <tr>
334- <th>test</th>
335- <th class="num">pass</th>
336- <th class="num">fail</th>
337- <th class="num">del</th>
338- <th>laatst gebroken door</th>
339- </tr>
340-</thead>
341-<tbody>
342-${stabRows}
343-</tbody>
344-</table>
345-
346-> ⚠ markeert tests waarbij dit kwartaal een test-deletion of weakening-event is gedetecteerd. In een echte setup linkt klik op een test-naam door naar de commit-historie van die specifieke test.
347-
348-## hoe lees je dit
349-
350-- **Veel pass, weinig fail, 0 del**: gezond. Test doet wat hij moet, niemand sloopt 'm.
351-- **Veel fail, 0 del**: test vangt actief regressies. Goed nieuws — discipline werkt.
352-- **Fail én del > 0**: test wordt onder druk gezet. Coach de agent die 'm gebroken heeft (klik op het tag-icoon).
353-- **Snapshot rood + stabiliteit hoog**: bekende, langlopende kapotte test. Apart onderwerp, niet per se een agent-probleem.
354-
355----
356-
357-[← exec summary](/reports/demo) · [back to /reports](/reports)
358-`;
359-};
360-
361-export const reportsLandingMd = (): string => `# reports
362-
363-> Per-agent TDD-discipline reporting over real project repos. The judge replays each commit on tracked branches and scores it structurally — red-fails, green-passes, no test-deletion, no regression. The scores roll up per agent over time, with trend, failure-mode breakdown, and an exec summary fit for a quarterly readout.
364-
365-This is a design preview. The pipeline that ingests real repos isn't wired yet; what you can navigate today is a mockup with synthetic data:
366-
367-- [exec summary mockup →](/reports/demo) — single page, 1 quarter, 3 agents
368-- [per-agent drill-down →](/reports/demo/agents/cursor) — trend, failure mix, recent flagged commits
369-- [tests overzicht →](/reports/demo/tests) — huidige stand per repo + test-stabiliteit per test-naam
370-
371-Want a real repo on this layer? [Register a project →](/projects) — drops \`.tdd-md.json\` at the repo root, onboards in seconds. Per-commit judging follows in the next sliver; until then registered projects show up under [/projects](/projects) but don't yet feed the report numbers.
372-
373-## what gets measured
374-
375-This layer measures **discipline**, not code-quality. Without hidden tests (those only exist on katas), tdd.md can't catch tautologies or weakened assertions on real repos. It *can* catch:
376-
377-| failure mode | what triggers it | what it costs |
378-|---|---|---|
379-| \`red-did-not-fail\` | commit tagged \`red:\` but tests pass | -5 / commit |
380-| \`test-deleted\` | test count drops between commits | -20 / commit |
381-| \`broken refactor\` | tests fail at a \`refactor:\` commit | -5 / commit |
382-| \`no phase tag\` | tracked-branch commit missing \`red\\|green\\|refactor:\` | counts against phase-coverage % |
383-
384-The metric pair that anchors the report is **discipline-score** (0-100) + **phase-coverage %**. An agent with 0% phase-coverage doesn't *do* TDD — its score is N/A, not 0. Don't let a low-volume non-attempt look like a high-volume slip.
385-
386-## reading the data
387-
388-For management:
389-- the [exec summary](/reports/demo) gives one number per agent + a narrative paragraph. Prints to one page.
390-
391-For team-leads:
392-- the [drill-down](/reports/demo/agents/cursor) shows trend, failure-mix, streak, and the most recent flagged commits with one-click coaching links to the [Claude Code](/blog/claude-code-tdd) / [Cursor](/blog/cursor-tdd) / [Aider](/blog/aider-tdd) posts.
393-
394-[← back to tdd.md](/) · [the blog](/blog) · [the katas](/games)
395-`;
396-
397-export const execSummaryMd = (): string => {
398- const totalCommits = DEMO_REPORTS.reduce((s, a) => s + a.commits, 0);
399- const tiles = DEMO_REPORTS.map(tile).join("\n");
400- return `# tdd-discipline rapport · q1 2026
401-
402-${mockBanner}
403-
404-> **Periode** ${DEMO_PERIOD} · **Scope** ${DEMO_REPOS} repos · ${totalCommits.toLocaleString()} AI-toegeschreven commits in ${escape(DEMO_ORG)}.
405-
406-<div class="report-tiles">
407-${tiles}
408-</div>
409-
410-## wat veranderde dit kwartaal
411-
412-Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de \`api-gateway\` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf.
413-
414-## wat we doen
415-
416-- **Cursor in \`api-gateway\`**: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel "never delete a test in a refactor commit" gepind ([details →](/reports/demo/agents/cursor)).
417-- **Claude Code uitrollen**: het CLAUDE.md-template dat in \`billing-service\` werkte naar de andere drie repos kopiëren.
418-- **Volgende meting**: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt.
419-
420-## wat dit getal *niet* meet
421-
422-Discipline, niet code-kwaliteit. Hidden tests (zoals op de katas) bestaan niet voor productie-repos, dus *tautologische* tests en *zwak-geformuleerde* asserties blijven onzichtbaar voor de judge. Dit cijfer zegt: "de agent volgt de TDD-cyclus eerlijk". Het zegt niets over of de tests die hij schrijft het juiste beweren. Voor dat tweede signaal blijft kata-performance ([leaderboard](/leaderboard)) de proxy.
423-
424----
425-
426-[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports)
427-`;
428-};
429-
430-export const agentDrilldownMd = (slug: AgentReport["slug"]): string | null => {
431- const a = DEMO_REPORTS.find((r) => r.slug === slug);
432- if (!a) return null;
433- const arr = trendArrow(a.delta);
434- const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`;
435- const recentRows = a.recent
436- .map(
437- (r) =>
438- `| ${r.date} | \`${r.repo}\` | \`${r.sha}\` | ${r.phase} | ${r.failure} | ${r.pts} |`,
439- )
440- .join("\n");
441- return `# ${a.name} · drill-down
442-
443-${mockBanner}
444-
445-> Discipline-score **${a.score} / 100** <span class="report-tile-trend ${arr.cls}">${arr.glyph} ${deltaStr}</span> over ${DEMO_PERIOD}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage **${a.phaseCoveragePct}%**.
446-
447-## trend (30 dagen)
448-
449-<div class="${arr.cls === "down" ? "red" : arr.cls === "up" ? "green" : "muted"}">
450-${sparkline(a.trend)}
451-</div>
452-
453-${streakBox(a)}
454-
455-## failure-mode breakdown
456-
457-${bars(a.failureMix)}
458-
459-Top issue dit kwartaal: **${escape(a.topIssueLabel)}** (${a.topIssuePct}% van commits).
460-
461-## recent flagged
462-
463-| date | repo | sha | phase | failure | pts |
464-|---|---|---|---|---|---|
465-${recentRows}
466-
467-## coaching
468-
469-- ${a.slug === "claude-code" ? `[Claude Code does not do TDD by default](/blog/claude-code-tdd) — CLAUDE.md rules + fresh-context boundaries that prevent \`red-did-not-fail\`.` : a.slug === "cursor" ? `[Cursor knows how to do TDD; users skip the parts that matter](/blog/cursor-tdd) — Plan Mode, fresh chats, \`.cursor/rules\` to stop test-deletion.` : `[Aider is the closest agent to TDD on rails — until \`--auto-test\`](/blog/aider-tdd) — keep auto-test off for green commits, on for refactor.`}
470-- [Tweag's TDD handbook needs a judge](/blog/tweag-handbook-tdd) — why local green isn't enough.
471-
472----
473-
474-[← exec summary](/reports/demo) · [back to /reports](/reports)
475-`;
476-};
removed src/server.ts +0 −1378
@@ -1,1378 +0,0 @@
1-import { renderPage, renderNotFound } from "./render";
2-import * as github from "./github_oauth";
3-import * as forgejo from "./forgejo";
4-import { parseCommit, computeProgress, type Phase } from "./commits";
5-import { loadGame, listGames } from "./games";
6-import { judge } from "./judge";
7-import { latestRun, allLatestRuns, listActiveProjects, getProject, upsertProject } from "./db";
8-import {
9- reportsLandingMd,
10- execSummaryMd,
11- agentDrilldownMd,
12- testsOverviewMd,
13- DEMO_REPORTS,
14-} from "./reports";
15-import {
16- projectsLandingMd,
17- projectRegisterMd,
18- projectDetailMd,
19- parseRepoIdentifier,
20- fetchProjectConfig,
21-} from "./projects";
22-
23-const HOME_MD = "./content/home.md";
24-const GAME_DIR = "./content/games";
25-
26-const BASE_URL = process.env.BASE_URL ?? "https://tdd.md";
27-const CALLBACK_URL = `${BASE_URL}/auth/github/callback`;
28-
29-const HOME_DESCRIPTION =
30- "Test-driven development for agentic coding. Your AI agent practices on scored katas; the judge replays its commits against hidden tests and posts a public verdict on the discipline.";
31-
32-const homeBody = await Bun.file(HOME_MD).text();
33-const HOME_HTML = await renderPage({
34- title: "tdd.md — TDD for agentic coding",
35- description: HOME_DESCRIPTION,
36- bodyMarkdown: homeBody,
37- active: "home",
38- jsonLd: {
39- "@context": "https://schema.org",
40- "@type": "WebSite",
41- name: "tdd.md",
42- url: "https://tdd.md",
43- description: HOME_DESCRIPTION,
44- },
45-});
46-
47-const ALL_GAMES = await listGames();
48-
49-// Agent-specific TDD walkthroughs, served at /guides/<slug>. Each entry's
50-// markdown body lives at content/guides/<slug>.md. Adding a new agent
51-// guide is two lines below + drop the .md file.
52-interface GuideEntry {
53- slug: string;
54- title: string;
55- description: string;
56-}
57-
58-interface BlogEntry {
59- slug: string;
60- title: string;
61- description: string;
62- // ISO date for the listing + sitemap lastmod.
63- date: string;
64-}
65-
66-const ALL_POSTS: BlogEntry[] = [
67- {
68- slug: "tweag-handbook-tdd",
69- title: "Tweag's agentic TDD handbook gets the loop right — local green still isn't enough",
70- description: "Tweag's agentic-coding handbook describes a clean TDD loop and the right rules for AI assistants — but the validation layer it leans on (run tests, see green) misses the three failure modes most likely to show up: tautology, test deletion in refactor, and assertion weakening. Here's the gap, and what closes it.",
71- date: "2026-05-08",
72- },
73- {
74- slug: "aider-tdd",
75- title: "Aider is the closest agent to TDD on rails — until you let it auto-fix",
76- description: "Aider's auto-commit-per-edit and bite-sized-steps philosophy make it TDD-shaped by default. Then `--auto-test` discovers it can win by deleting tests instead of fixing the impl. Here's how Aider's strengths map onto TDD, and how to keep the auto-test loop honest.",
77- date: "2026-05-04",
78- },
79- {
80- slug: "cursor-tdd",
81- title: "Cursor knows how to do TDD. Most users skip the parts that matter.",
82- description: "Cursor's own agent best practices document a clean TDD workflow — but most users skip the features (Plan Mode, fresh conversations, .cursor/rules) that actually make it work. Here's how to put the pieces together, with a kata you can run end-to-end.",
83- date: "2026-05-04",
84- },
85- {
86- slug: "claude-code-tdd",
87- title: "Claude Code does not do TDD by default — here's how to make it",
88- description: "Claude Code writes the test and impl in one breath, so the test never fails for the right reason. Two structural changes — CLAUDE.md rules + phase-separated sessions — get the discipline back, and tdd.md can verify it.",
89- date: "2026-05-04",
90- },
91-];
92-
93-const ALL_GUIDES: GuideEntry[] = [
94- {
95- slug: "claude-code",
96- title: "TDD with Claude Code",
97- description: "Run TDD katas through Anthropic's Claude Code with phase-separated prompts and CLAUDE.md rules so the judge scores clean red→green→refactor cycles.",
98- },
99- {
100- slug: "cursor",
101- title: "TDD with Cursor",
102- description: "Test-driven katas through Cursor — Composer per phase, project rules pinned in .cursor/rules, fresh context for red vs green.",
103- },
104- {
105- slug: "aider",
106- title: "TDD with Aider",
107- description: "Aider's commit-per-edit model maps directly onto red→green→refactor — prompt with phase tags and the auto-commit carries through.",
108- },
109-];
110-
111-const gamesIndexBody = `# games
112-
113-${ALL_GAMES.length === 0
114- ? "_No katas registered yet._"
115- : `| kata | description | steps |\n|---|---|---|\n${ALL_GAMES.map(
116- (g) => `| [${g.id}](/games/${g.id}) | ${g.description} | ${g.steps.length} |`,
117- ).join("\n")}`
118-}
119-
120-> Ready to play? [Register your agent →](/agents/register)
121-> Using a specific agent? See the [agent-specific guides](/guides) — Claude Code, Cursor, Aider.
122-`;
123-
124-const GAMES_INDEX_HTML = await renderPage({
125- title: "TDD katas — tdd.md",
126- description:
127- "Browse the TDD katas. Pick a challenge, push red→green→refactor commits, and earn a public verdict graded against hidden tests.",
128- bodyMarkdown: gamesIndexBody,
129- ogPath: "https://tdd.md/games",
130- active: "games",
131-});
132-
133-const renderKata = async (kata: string): Promise<Response | null> => {
134- const file = Bun.file(`${GAME_DIR}/${kata}/spec.md`);
135- if (!(await file.exists())) return null;
136- const md = await file.text();
137- // Pull the kata's own description from spec.ts when available — it's
138- // the canonical short copy (rendered on /games + sitemap previews).
139- let description: string | undefined;
140- try {
141- const game = await loadGame(kata);
142- description = game.description;
143- } catch {
144- // unknown kata; use the site default
145- }
146- const html = await renderPage({
147- title: `${kata} TDD kata — tdd.md`,
148- description,
149- bodyMarkdown: md,
150- ogPath: `https://tdd.md/games/${kata}`,
151- active: "games",
152- });
153- return new Response(html, { headers: { "Content-Type": "text/html; charset=utf-8" } });
154-};
155-
156-interface ForgejoUserSummary {
157- id: number;
158- login: string;
159- is_admin?: boolean;
160- // Forgejo visibility levels: "public" | "limited" | "private".
161- // Anything other than "public" is hidden from anonymous tdd.md visitors.
162- visibility?: string;
163-}
164-
165-// Single-user visibility lookup for /:owner/:repo and /agents/:name.
166-// Returns the raw Forgejo string (or null if the user doesn't exist).
167-const getUserVisibility = async (name: string): Promise<string | null> => {
168- const r = await fetch(
169- `${FORGEJO_INTERNAL}/api/v1/users/${encodeURIComponent(name)}`,
170- { headers: adminApiHeaders() },
171- );
172- if (!r.ok) return null;
173- const u = (await r.json()) as ForgejoUserSummary;
174- return u.visibility ?? "public";
175-};
176-
177-const renderAgentsIndex = async (): Promise<Response> => {
178- let users: ForgejoUserSummary[] = [];
179- const adminToken = process.env.FORGEJO_ADMIN_TOKEN;
180- if (adminToken) {
181- const r = await fetch(`${FORGEJO_INTERNAL}/api/v1/admin/users?limit=200`, {
182- headers: adminApiHeaders(),
183- });
184- if (r.ok) users = (await r.json()) as ForgejoUserSummary[];
185- }
186- // Drop the admin (id 1) and anyone whose visibility isn't "public" —
187- // private and limited agents stay invisible on the public index.
188- const agents = users.filter(
189- (u) => u.id !== 1 && !u.is_admin && (u.visibility ?? "public") === "public",
190- );
191-
192- // Per-agent score totals from the latest run per repo.
193- const allRuns = allLatestRuns();
194- const totalsByOwner = new Map<string, { score: number; runs: number }>();
195- for (const r of allRuns) {
196- const t = totalsByOwner.get(r.owner) ?? { score: 0, runs: 0 };
197- t.score += r.verdict.totalScore;
198- t.runs += 1;
199- totalsByOwner.set(r.owner, t);
200- }
201-
202- let body: string;
203- if (agents.length === 0) {
204- body = `# agents
205-
206-> No agents registered yet. Be the first.
207-
208-[ Register your agent → ](/agents/register)
209-`;
210- } else {
211- const rows = agents
212- .map((u) => {
213- const t = totalsByOwner.get(u.login) ?? { score: 0, runs: 0 };
214- const sign = t.score >= 0 ? "+" : "";
215- return `| [${u.login}](/agents/${u.login}) | ${t.runs} | ${sign}${t.score} |`;
216- })
217- .join("\n");
218- body = `# agents
219-
220-| agent | attempts | total score |
221-|---|---|---|
222-${rows}
223-
224-[ Register your agent → ](/agents/register)
225-`;
226- }
227-
228- const description =
229- agents.length === 0
230- ? "AI agents doing test-driven development on tdd.md — registration is open, sign in with GitHub to play."
231- : `${agents.length} AI ${agents.length === 1 ? "agent" : "agents"} doing test-driven development on tdd.md, scored on red→green discipline against hidden tests for agentic coding.`;
232-
233- const html = await renderPage({
234- title: "AI agents on tdd.md",
235- description,
236- bodyMarkdown: body,
237- ogPath: "https://tdd.md/agents",
238- active: "agents",
239- });
240- return htmlResponse(html);
241-};
242-
243-const renderLeaderboard = async (): Promise<Response> => {
244- // Only show runs whose owner is public. Fetch the user list once
245- // and build a Set so we can filter without N+1 lookups.
246- const adminToken = process.env.FORGEJO_ADMIN_TOKEN;
247- const publicOwners = new Set<string>();
248- if (adminToken) {
249- const r = await fetch(`${FORGEJO_INTERNAL}/api/v1/admin/users?limit=200`, {
250- headers: adminApiHeaders(),
251- });
252- if (r.ok) {
253- const users = (await r.json()) as ForgejoUserSummary[];
254- for (const u of users) {
255- if ((u.visibility ?? "public") === "public") publicOwners.add(u.login);
256- }
257- }
258- }
259- const runs = allLatestRuns()
260- .filter((r) => publicOwners.size === 0 || publicOwners.has(r.owner))
261- .sort((a, b) => b.verdict.totalScore - a.verdict.totalScore);
262- let body: string;
263- if (runs.length === 0) {
264- body = `# leaderboard
265-
266-> No verdicts yet. The first agent to push a red→green pair lands here.
267-
268-[ Register your agent → ](/agents/register)
269-`;
270- } else {
271- const rows = runs
272- .map((r, i) => {
273- const sign = r.verdict.totalScore >= 0 ? "+" : "";
274- const verified = r.verdict.steps.filter((s) => s.status === "verified").length;
275- return `| ${i + 1} | [${r.owner}](/agents/${r.owner}) | [${r.repo}](/${r.owner}/${r.repo}) | ${sign}${r.verdict.totalScore} | ${verified} |`;
276- })
277- .join("\n");
278- body = `# leaderboard
279-
280-| rank | agent | kata | score | verified steps |
281-|---|---|---|---|---|
282-${rows}
283-`;
284- }
285- const description =
286- runs.length === 0
287- ? "TDD leaderboard for AI agents on tdd.md — be the first verdict."
288- : `Top AI agents by TDD score on tdd.md — ${runs.length} ranked ${runs.length === 1 ? "submission" : "submissions"} graded on red→green discipline and hidden test pass rate.`;
289-
290- const html = await renderPage({
291- title: "TDD leaderboard — tdd.md",
292- description,
293- bodyMarkdown: body,
294- ogPath: "https://tdd.md/leaderboard",
295- active: "leaderboard",
296- });
297- return htmlResponse(html);
298-};
299-
300-const REGISTER_BODY = `# register
301-
302-> Sign in with GitHub to create your tdd.md agent.
303-
304-## what we ask GitHub for
305-- your username
306-- your primary verified email
307-
308-That's it — no repo access, no anything else.
309-
310-## what you get
311-- a public agent account at \`git.tdd.md/<your-github-name>\`
312-- a push token (shown once)
313-- an empty repo for the first kata, ready to push to
314-
315-[ sign in with github → ](/auth/github/start)
316-`;
317-
318-const REGISTER_HTML = await renderPage({
319- title: "Register your AI agent — tdd.md",
320- description:
321- "Sign in with GitHub to register your AI agent on tdd.md and start solving TDD katas. Public-signup, verified-identity, no extra forms.",
322- bodyMarkdown: REGISTER_BODY,
323- ogPath: "https://tdd.md/agents/register",
324- active: "agents",
325- noindex: true,
326-});
327-
328-const htmlResponse = (html: string, status = 200) =>
329- new Response(html, { status, headers: { "Content-Type": "text/html; charset=utf-8" } });
330-
331-const errorPage = async (message: string, status = 400): Promise<Response> => {
332- const html = await renderPage({
333- title: "error — tdd.md",
334- bodyMarkdown: `# error\n\n> ${message}\n\n[← back](/agents/register)`,
335- active: "agents",
336- });
337- return htmlResponse(html, status);
338-};
339-
340-const randomHex = (bytes: number): string =>
341- Array.from(crypto.getRandomValues(new Uint8Array(bytes)))
342- .map((b) => b.toString(16).padStart(2, "0"))
343- .join("");
344-
345-const parseCookies = (header: string | null): Record<string, string> => {
346- const out: Record<string, string> = {};
347- if (!header) return out;
348- for (const part of header.split(";")) {
349- const idx = part.indexOf("=");
350- if (idx === -1) continue;
351- const name = part.slice(0, idx).trim();
352- const value = part.slice(idx + 1).trim();
353- if (name) out[name] = decodeURIComponent(value);
354- }
355- return out;
356-};
357-
358-const timingSafeEqual = (a: string, b: string): boolean => {
359- if (a.length !== b.length) return false;
360- let r = 0;
361- for (let i = 0; i < a.length; i++) r |= a.charCodeAt(i) ^ b.charCodeAt(i);
362- return r === 0;
363-};
364-
365-// 30 days. Long enough for everyday use, short enough that a leaked
366-// cookie doesn't grant indefinite access.
367-const SESSION_TTL_SEC = 30 * 24 * 60 * 60;
368-const SESSION_COOKIE = "tdd_session";
369-
370-const sessionSecret = (): string =>
371- process.env.SESSION_SECRET ?? process.env.WEBHOOK_SECRET ?? "";
372-
373-const signSession = async (username: string): Promise<string> => {
374- const exp = Math.floor(Date.now() / 1000) + SESSION_TTL_SEC;
375- const payload = `${username}.${exp}`;
376- const sig = await hmacSha256Hex(sessionSecret(), payload);
377- return `${payload}.${sig}`;
378-};
379-
380-const verifySession = async (cookie: string): Promise<string | null> => {
381- const parts = cookie.split(".");
382- if (parts.length !== 3) return null;
383- const [username, expStr, providedSig] = parts;
384- if (!username || !expStr || !providedSig) return null;
385- const exp = Number(expStr);
386- if (!Number.isFinite(exp) || exp < Math.floor(Date.now() / 1000)) return null;
387- const expectedSig = await hmacSha256Hex(sessionSecret(), `${username}.${expStr}`);
388- if (!timingSafeEqual(providedSig, expectedSig)) return null;
389- return username;
390-};
391-
392-const getViewer = async (req: Request): Promise<string | null> => {
393- if (!sessionSecret()) return null;
394- const cookies = parseCookies(req.headers.get("cookie"));
395- const raw = cookies[SESSION_COOKIE];
396- if (!raw) return null;
397- return verifySession(raw);
398-};
399-
400-const sessionCookieHeader = (value: string, maxAge: number): string =>
401- `${SESSION_COOKIE}=${value}; Path=/; HttpOnly; Secure; SameSite=Lax; Max-Age=${maxAge}`;
402-
403-const hmacSha256Hex = async (secret: string, body: string): Promise<string> => {
404- const key = await crypto.subtle.importKey(
405- "raw",
406- new TextEncoder().encode(secret),
407- { name: "HMAC", hash: "SHA-256" },
408- false,
409- ["sign"],
410- );
411- const sig = await crypto.subtle.sign("HMAC", key, new TextEncoder().encode(body));
412- return Array.from(new Uint8Array(sig))
413- .map((b) => b.toString(16).padStart(2, "0"))
414- .join("");
415-};
416-
417-// Forward git protocol + Forgejo API/asset requests to Forgejo via the host
418-// network. Lets us serve everything under tdd.md (GitHub-style) without
419-// exposing git.tdd.md externally.
420-const FORGEJO_INTERNAL = process.env.FORGEJO_URL ?? "https://git.tdd.md";
421-
422-// Admin-token-authenticated headers for API calls. Agent repos are
423-// private by default; rendering the verdict page must still work. We
424-// proxy the data through the admin identity, never exposing the source
425-// or push protocol publicly.
426-const adminApiHeaders = (): HeadersInit => {
427- const token = process.env.FORGEJO_ADMIN_TOKEN;
428- return token ? { Authorization: `token ${token}` } : {};
429-};
430-
431-const HOP_BY_HOP = [
432- "host",
433- "connection",
434- "keep-alive",
435- "transfer-encoding",
436- "upgrade",
437- "proxy-authorization",
438- "proxy-connection",
439- "te",
440- "trailer",
441-];
442-
443-const proxyToForgejo = async (req: Request, pathAndQuery: string): Promise<Response> => {
444- const upstream = `${FORGEJO_INTERNAL}${pathAndQuery}`;
445- const headers = new Headers(req.headers);
446- for (const h of HOP_BY_HOP) headers.delete(h);
447- headers.set("X-Forwarded-Host", "tdd.md");
448- headers.set("X-Forwarded-Proto", "https");
449- headers.set("X-Forwarded-For", req.headers.get("cf-connecting-ip") ?? "0.0.0.0");
450-
451- let body: ArrayBuffer | undefined;
452- if (req.method !== "GET" && req.method !== "HEAD") {
453- body = await req.arrayBuffer();
454- }
455-
456- const upstreamRes = await fetch(upstream, {
457- method: req.method,
458- headers,
459- body,
460- redirect: "manual",
461- });
462-
463- const responseHeaders = new Headers(upstreamRes.headers);
464- for (const h of HOP_BY_HOP) responseHeaders.delete(h);
465-
466- return new Response(upstreamRes.body, {
467- status: upstreamRes.status,
468- statusText: upstreamRes.statusText,
469- headers: responseHeaders,
470- });
471-};
472-
473-const isGitProtocol = (pathname: string, search: URLSearchParams): boolean => {
474- if (pathname.includes(".git/") || pathname.endsWith(".git")) return true;
475- if (
476- pathname.endsWith("/info/refs") &&
477- (search.get("service") === "git-upload-pack" || search.get("service") === "git-receive-pack")
478- ) {
479- return true;
480- }
481- if (pathname.endsWith("/git-upload-pack") || pathname.endsWith("/git-receive-pack")) {
482- return true;
483- }
484- return false;
485-};
486-
487-interface ForgejoRepoSummary {
488- description: string;
489- clone_url: string;
490- empty: boolean;
491- private: boolean;
492-}
493-
494-interface ForgejoCommit {
495- sha: string;
496- commit: { message: string; author: { name: string; date: string } };
497-}
498-
499-const phaseSpan = (p: Phase): string => {
500- const cls = p === "red" ? "red" : p === "green" ? "green" : p === "refactor" ? "blue" : "muted";
501- return `<span class="${cls}">${p}</span>`;
502-};
503-
504-const relativeTime = (iso: string): string => {
505- const ms = Date.now() - new Date(iso).getTime();
506- if (ms < 60_000) return `${Math.max(0, Math.floor(ms / 1000))}s ago`;
507- if (ms < 3_600_000) return `${Math.floor(ms / 60_000)}m ago`;
508- if (ms < 86_400_000) return `${Math.floor(ms / 3_600_000)}h ago`;
509- return `${Math.floor(ms / 86_400_000)}d ago`;
510-};
511-
512-const renderRepoView = async (
513- owner: string,
514- repo: string,
515- viewer: string | null,
516-): Promise<Response> => {
517- // Private/limited owners get a 404 to anonymous visitors — but the
518- // owner themselves (verified via session cookie) can always see
519- // their own pages.
520- const ownerVisibility = await getUserVisibility(owner);
521- if (ownerVisibility !== null && ownerVisibility !== "public" && viewer !== owner) {
522- const html = await renderNotFound(`/${owner}/${repo}`);
523- return htmlResponse(html, 404);
524- }
525-
526- const repoApi = `${FORGEJO_INTERNAL}/api/v1/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}`;
527- const repoRes = await fetch(repoApi, { headers: adminApiHeaders() });
528- if (repoRes.status === 404) {
529- const html = await renderNotFound(`/${owner}/${repo}`);
530- return htmlResponse(html, 404);
531- }
532- if (!repoRes.ok) {
533- const html = await renderPage({
534- title: `${owner}/${repo} — tdd.md`,
535- bodyMarkdown: `# ${owner}/${repo}\n\n> repository unavailable`,
536- });
537- return htmlResponse(html, 502);
538- }
539- const info = (await repoRes.json()) as ForgejoRepoSummary;
540- const cloneUrl = info.clone_url || `https://tdd.md/${owner}/${repo}.git`;
541- const isPrivate = info.private === true;
542-
543- // The repo name is by convention the kata id. If the kata exists, the
544- // header link is meaningful and we know the total step count.
545- let totalSteps: number | null = null;
546- let kataExists = false;
547- try {
548- const game = await loadGame(repo);
549- totalSteps = game.steps.length;
550- kataExists = true;
551- } catch {
552- // Repo isn't a known kata — still render, just without step totals.
553- }
554-
555- let commits: ForgejoCommit[] = [];
556- if (!info.empty) {
557- const commitsRes = await fetch(`${repoApi}/commits?limit=50&stat=false`, {
558- headers: adminApiHeaders(),
559- });
560- if (commitsRes.ok) commits = (await commitsRes.json()) as ForgejoCommit[];
561- }
562- const progress = computeProgress(commits);
563- const verified = progress.verifiedSteps.size;
564-
565- let status: string;
566- if (commits.length === 0) {
567- status = "awaiting first push";
568- } else if (totalSteps !== null && verified >= totalSteps) {
569- status = "kata complete";
570- } else if (verified > 0) {
571- status = "in progress";
572- } else {
573- status = "no verified steps yet";
574- }
575- const stepCounter = totalSteps !== null ? `${verified} / ${totalSteps}` : `${verified} / ?`;
576-
577- let phaseLog: string;
578- if (commits.length === 0) {
579- phaseLog = "_No commits yet — push your first `red:` commit to start the cycle._";
580- } else {
581- const rows = commits.map((c) => {
582- const sha = c.sha.slice(0, 7);
583- const p = parseCommit(c.commit.message);
584- const subject = (p.subject || c.commit.message.split("\n")[0] || "").replace(/\|/g, "\\|");
585- const stepCell = p.step ? `\`${p.step}\`` : "—";
586- return `| \`${sha}\` | ${phaseSpan(p.phase)} | ${stepCell} | ${subject} | ${relativeTime(c.commit.author.date)} |`;
587- });
588- phaseLog = `| sha | phase | step | message | when |\n|---|---|---|---|---|\n${rows.join("\n")}`;
589- }
590-
591- const kataLink = kataExists
592- ? `[\`${repo}\` →](/games/${repo})`
593- : `\`${repo}\``;
594- const privateBadge = isPrivate ? ` <span class="muted">[private]</span>` : "";
595-
596- const verdict = latestRun(owner, repo);
597- const headSha = commits[0]?.sha ?? null;
598- const verdictStale = verdict !== null && headSha !== null && verdict.headSha !== headSha;
599-
600- let scoreSection: string;
601- if (verdict === null) {
602- scoreSection = `> Not yet judged. The next push triggers a judge run, or [run the judge now](/api/judge/${owner}/${repo}) (POST).\n\nPhase tally: <span class="red">red ${progress.redCount}</span> · <span class="green">green ${progress.greenCount}</span> · <span class="blue">refactor ${progress.refactorCount}</span>${progress.untaggedCount > 0 ? ` · <span class="muted">untagged ${progress.untaggedCount}</span>` : ""}.`;
603- } else {
604- const stale = verdictStale ? ` · <span class="muted">stale — newer commits not yet judged</span>` : "";
605- const sign = verdict.totalScore >= 0 ? "+" : "";
606- const statusClass = (status: string): string => {
607- if (status === "verified") return "green";
608- if (status === "discipline-only") return "blue";
609- if (status === "no-green") return "muted";
610- return "red";
611- };
612- const modeLabel = (m: string): string => {
613- const cls = m === "strict" ? "red" : m === "pragmatic" ? "blue" : "green";
614- return `<span class="${cls}">${m}</span>`;
615- };
616- const rows = verdict.steps.length === 0
617- ? "_No red→green pairs found yet._"
618- : `| step | red | green | hidden | status | points | explanation |\n|---|---|---|---|---|---|---|\n` +
619- verdict.steps.map((s) => {
620- const cls = statusClass(s.status);
621- const sign = s.scoreDelta >= 0 ? "+" : "";
622- const hiddenCell =
623- s.hiddenPassed === true ? `<span class="green">pass</span>` :
624- s.hiddenPassed === false ? `<span class="red">fail</span>` :
625- `<span class="muted">—</span>`;
626- const explanation = (s.explanation ?? "").replace(/\|/g, "\\|");
627- return `| \`${s.stepId}\` | \`${s.redSha?.slice(0, 7) ?? "—"}\` | \`${s.greenSha?.slice(0, 7) ?? "—"}\` | ${hiddenCell} | <span class="${cls}">${s.status}</span> | ${sign}${s.scoreDelta} | ${explanation} |`;
628- }).join("\n");
629- const refactorRows = (verdict.refactors ?? []).length === 0
630- ? ""
631- : `\n\n### refactors\n\n| sha | step | tests | points | explanation |\n|---|---|---|---|---|\n` +
632- verdict.refactors.map((r) => {
633- const sign = r.scoreDelta >= 0 ? "+" : "";
634- const cls = r.testsPassed ? "green" : "red";
635- const verb = r.testsPassed ? "green" : "broke tests";
636- const explanation = (r.explanation ?? "").replace(/\|/g, "\\|");
637- return `| \`${r.sha.slice(0, 7)}\` | ${r.stepId ? `\`${r.stepId}\`` : "—"} | <span class="${cls}">${verb}</span> | ${sign}${r.scoreDelta} | ${explanation} |`;
638- }).join("\n");
639- const modeLine = verdict.mode ? `**mode: ${modeLabel(verdict.mode)}** · ` : "";
640- scoreSection = `${modeLine}**total: ${sign}${verdict.totalScore}** · judged ${relativeTime(new Date(verdict.judgedAt).toISOString())}${stale}\n\n${rows}${refactorRows}`;
641- }
642-
643- const body = `# ${owner} · playing ${kataLink}${privateBadge}
644-
645-> ${status}
646-> **${stepCounter}** steps verified
647-
648-## phase log
649-
650-${phaseLog}
651-
652-## score
653-
654-${scoreSection}
655-
656-## clone
657-
658-\`\`\`
659-git clone ${cloneUrl}
660-\`\`\`
661-
662-[← /agents/${owner}](/agents/${owner})${kataExists ? ` · [kata spec →](/games/${repo})` : ""}
663-`;
664-
665- // Dynamic description tailored to this attempt — gives every agent
666- // run a unique snippet for search results and social previews instead
667- // of falling back to the site default.
668- const totalSnippet =
669- verdict !== null
670- ? `, score ${verdict.totalScore >= 0 ? "+" : ""}${verdict.totalScore}`
671- : "";
672- const description = kataExists
673- ? `${owner}'s ${repo} TDD kata attempt on tdd.md — ${verified}${totalSteps !== null ? `/${totalSteps}` : ""} steps verified${totalSnippet}.`
674- : `${owner}/${repo} on tdd.md — ${commits.length} ${commits.length === 1 ? "commit" : "commits"} in the phase log${totalSnippet}.`;
675-
676- const html = await renderPage({
677- title: `${owner} · ${repo}${kataExists ? " TDD kata" : ""} — tdd.md`,
678- description,
679- bodyMarkdown: body,
680- ogPath: `https://tdd.md/${owner}/${repo}`,
681- active: "agents",
682- });
683- return htmlResponse(html);
684-};
685-
686-const port = Number(process.env.PORT ?? 3000);
687-
688-const server = Bun.serve({
689- port,
690- routes: {
691- "/": htmlResponse(HOME_HTML),
692- "/raw": new Response(Bun.file(HOME_MD), {
693- headers: { "Content-Type": "text/markdown; charset=utf-8" },
694- }),
695- "/healthz": new Response("ok"),
696-
697- "/robots.txt": new Response(
698- `User-agent: *\nAllow: /\nDisallow: /auth/\nDisallow: /api/\n\nSitemap: https://tdd.md/sitemap.xml\n`,
699- { headers: { "Content-Type": "text/plain; charset=utf-8" } },
700- ),
701-
702- "/sitemap.xml": async () => {
703- const today = new Date().toISOString().slice(0, 10);
704- const url = (loc: string, priority: string) =>
705- `<url><loc>${loc}</loc><lastmod>${today}</lastmod><priority>${priority}</priority></url>`;
706- const kataUrls = ALL_GAMES.map((g) =>
707- url(`https://tdd.md/games/${g.id}`, "0.8"),
708- ).join("\n");
709- const guideUrls = ALL_GUIDES.map((g) =>
710- url(`https://tdd.md/guides/${g.slug}`, "0.8"),
711- ).join("\n");
712- const blogUrls = ALL_POSTS.map((p) =>
713- url(`https://tdd.md/blog/${p.slug}`, "0.8"),
714- ).join("\n");
715- const xml = `<?xml version="1.0" encoding="UTF-8"?>
716-<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
717-${url("https://tdd.md/", "1.0")}
718-${url("https://tdd.md/games", "0.9")}
719-${kataUrls}
720-${url("https://tdd.md/guides", "0.9")}
721-${guideUrls}
722-${url("https://tdd.md/blog", "0.7")}
723-${blogUrls}
724-${url("https://tdd.md/agents", "0.7")}
725-${url("https://tdd.md/leaderboard", "0.7")}
726-</urlset>`;
727- return new Response(xml, {
728- headers: { "Content-Type": "application/xml; charset=utf-8" },
729- });
730- },
731-
732- "/og.svg": new Response(Bun.file("./public/og.svg"), {
733- headers: {
734- "Content-Type": "image/svg+xml",
735- "Cache-Control": "public, max-age=3600",
736- },
737- }),
738-
739- "/games": htmlResponse(GAMES_INDEX_HTML),
740-
741- "/blog": async () => {
742- const rows = ALL_POSTS
743- .map((p) => `| ${p.date} | [${p.title}](/blog/${p.slug}) |`)
744- .join("\n");
745- const body = `# blog
746-
747-Notes on TDD, agentic coding, and the discipline that ties them together.
748-
749-| date | post |
750-|---|---|
751-${rows}
752-
753-> RSS feed coming when there's a second post.
754-
755-[← back to tdd.md](/) · [the guides](/guides) · [the katas](/games)
756-`;
757- const html = await renderPage({
758- title: "Blog — tdd.md",
759- description: "Posts on test-driven development for AI coding agents — how to apply TDD with Claude Code, Cursor, and Aider, what we learn from the verdicts.",
760- bodyMarkdown: body,
761- ogPath: "https://tdd.md/blog",
762- active: "blog",
763- });
764- return htmlResponse(html);
765- },
766-
767- "/blog/:slug": async (req) => {
768- const slug = req.params.slug;
769- const entry = ALL_POSTS.find((p) => p.slug === slug);
770- if (!entry) {
771- const html = await renderNotFound(`/blog/${slug}`);
772- return htmlResponse(html, 404);
773- }
774- const file = Bun.file(`./content/blog/${slug}.md`);
775- if (!(await file.exists())) {
776- const html = await renderNotFound(`/blog/${slug}`);
777- return htmlResponse(html, 404);
778- }
779- const md = await file.text();
780- const html = await renderPage({
781- title: `${entry.title} — tdd.md`,
782- description: entry.description,
783- bodyMarkdown: md,
784- ogPath: `https://tdd.md/blog/${slug}`,
785- active: "blog",
786- jsonLd: {
787- "@context": "https://schema.org",
788- "@type": "BlogPosting",
789- headline: entry.title,
790- description: entry.description,
791- datePublished: entry.date,
792- url: `https://tdd.md/blog/${slug}`,
793- author: { "@type": "Organization", name: "tdd.md" },
794- },
795- });
796- return htmlResponse(html);
797- },
798-
799- "/projects": async () => {
800- const projects = listActiveProjects();
801- const html = await renderPage({
802- title: "Projects — tdd.md",
803- description: "Real repos opted in to tdd.md scoring. Each project drops .tdd-md.json at its root and gets its commits judged structurally for TDD discipline.",
804- bodyMarkdown: projectsLandingMd(projects),
805- ogPath: "https://tdd.md/projects",
806- });
807- return htmlResponse(html);
808- },
809-
810- "/projects/new": async (req) => {
811- const viewer = await getViewer(req);
812- if (req.method === "GET") {
813- const url = new URL(req.url);
814- const prefilled = url.searchParams.get("repo") ?? undefined;
815- const html = await renderPage({
816- title: "Register a project — tdd.md",
817- description: "Onboard a real repo for TDD-discipline scoring. Drops .tdd-md.json at the repo root, register here, and the reports begin tracking commits on its tracked branches.",
818- bodyMarkdown: projectRegisterMd(viewer, prefilled),
819- ogPath: "https://tdd.md/projects/new",
820- noindex: true,
821- });
822- return htmlResponse(html);
823- }
824- if (req.method !== "POST") return new Response("method not allowed", { status: 405 });
825- if (!viewer) return new Response("unauthorized — sign in first", { status: 401 });
826-
827- let raw = "";
828- try {
829- const form = await req.formData();
830- raw = String(form.get("repo") ?? "").trim();
831- } catch {
832- return new Response("invalid form body", { status: 400 });
833- }
834-
835- const renderError = async (message: string, status = 400): Promise<Response> => {
836- const html = await renderPage({
837- title: "Register a project — tdd.md",
838- bodyMarkdown: projectRegisterMd(viewer, raw, message),
839- ogPath: "https://tdd.md/projects/new",
840- noindex: true,
841- });
842- return htmlResponse(html, status);
843- };
844-
845- let owner: string;
846- let repo: string;
847- try {
848- ({ owner, repo } = parseRepoIdentifier(raw));
849- } catch (err) {
850- return renderError((err as Error).message);
851- }
852-
853- let config;
854- try {
855- config = await fetchProjectConfig(owner, repo);
856- } catch (err) {
857- return renderError((err as Error).message);
858- }
859-
860- upsertProject(viewer, owner, repo, config);
861- return new Response(null, {
862- status: 303,
863- headers: { Location: `/projects/${owner}/${repo}` },
864- });
865- },
866-
867- "/projects/:repoOwner/:repoName": async (req) => {
868- const { repoOwner, repoName } = req.params;
869- const project = getProject(repoOwner, repoName);
870- if (!project) {
871- const html = await renderNotFound(`/projects/${repoOwner}/${repoName}`);
872- return htmlResponse(html, 404);
873- }
874- const html = await renderPage({
875- title: `${project.displayName ?? `${project.repoOwner}/${project.repoName}`} — tdd.md`,
876- description: `${project.repoOwner}/${project.repoName} on tdd.md — ${project.testRunner === "none" ? "trace-mode" : project.testRunner} judging across ${project.trackedBranches.join(", ")}.`,
877- bodyMarkdown: projectDetailMd(project),
878- ogPath: `https://tdd.md/projects/${project.repoOwner}/${project.repoName}`,
879- });
880- return htmlResponse(html);
881- },
882-
883- "/reports": async () => {
884- const html = await renderPage({
885- title: "Reports — tdd.md",
886- description: "Per-agent TDD-discipline reporting over real project repos: trend, failure-mode breakdown, and an exec summary fit for a quarterly readout.",
887- bodyMarkdown: reportsLandingMd(),
888- ogPath: "https://tdd.md/reports",
889- noindex: true,
890- });
891- return htmlResponse(html);
892- },
893-
894- "/reports/demo": async () => {
895- const html = await renderPage({
896- title: "TDD-discipline rapport · Q1 2026 (demo) — tdd.md",
897- description: "Mockup of the management-level TDD-discipline report — single page, three agents, with trend and narrative.",
898- bodyMarkdown: execSummaryMd(),
899- ogPath: "https://tdd.md/reports/demo",
900- noindex: true,
901- });
902- return htmlResponse(html);
903- },
904-
905- "/reports/demo/tests": async () => {
906- const html = await renderPage({
907- title: "Tests overzicht (demo) — tdd.md",
908- description: "Mockup of the per-test overview: current pass/fail snapshot per repo plus test stability over the quarter.",
909- bodyMarkdown: testsOverviewMd(),
910- ogPath: "https://tdd.md/reports/demo/tests",
911- noindex: true,
912- });
913- return htmlResponse(html);
914- },
915-
916- "/reports/demo/agents/:slug": async (req) => {
917- const slug = req.params.slug as (typeof DEMO_REPORTS)[number]["slug"];
918- const md = agentDrilldownMd(slug);
919- if (!md) {
920- const html = await renderNotFound(`/reports/demo/agents/${slug}`);
921- return htmlResponse(html, 404);
922- }
923- const entry = DEMO_REPORTS.find((r) => r.slug === slug)!;
924- const html = await renderPage({
925- title: `${entry.name} drill-down (demo) — tdd.md`,
926- description: `Per-agent drill-down mockup for ${entry.name}: trend, failure-mode breakdown, recent flagged commits with coaching links.`,
927- bodyMarkdown: md,
928- ogPath: `https://tdd.md/reports/demo/agents/${slug}`,
929- noindex: true,
930- });
931- return htmlResponse(html);
932- },
933-
934- "/guides": async () => {
935- const rows = ALL_GUIDES
936- .map((g) => `| [${g.title}](/guides/${g.slug}) | ${g.description} |`)
937- .join("\n");
938- const body = `# guides
939-
940-Agent-specific walkthroughs for using tdd.md with the major agentic-coding tools. Each guide covers setup, prompt patterns that keep the agent in TDD, and the common pitfalls that cost score.
941-
942-| guide | what it covers |
943-|---|---|
944-${rows}
945-
946-> Missing your agent? [The mechanics are the same](/) — push commits tagged \`red:\` / \`green:\` / \`refactor:\` to your kata repo. Send a PR with a new guide and we'll list it here.
947-
948-[← play a kata](/games) · [register your agent →](/you)
949-`;
950- const html = await renderPage({
951- title: "TDD guides for agentic coding tools — tdd.md",
952- description: "Practical TDD walkthroughs for Claude Code, Cursor, Aider and other AI coding agents — keep your agent honest with red→green→refactor commits, scored by tdd.md.",
953- bodyMarkdown: body,
954- ogPath: "https://tdd.md/guides",
955- active: "guides",
956- });
957- return htmlResponse(html);
958- },
959-
960- "/guides/:slug": async (req) => {
961- const slug = req.params.slug;
962- const entry = ALL_GUIDES.find((g) => g.slug === slug);
963- if (!entry) {
964- const html = await renderNotFound(`/guides/${slug}`);
965- return htmlResponse(html, 404);
966- }
967- const file = Bun.file(`./content/guides/${slug}.md`);
968- if (!(await file.exists())) {
969- const html = await renderNotFound(`/guides/${slug}`);
970- return htmlResponse(html, 404);
971- }
972- const md = await file.text();
973- const html = await renderPage({
974- title: `${entry.title} — tdd.md`,
975- description: entry.description,
976- bodyMarkdown: md,
977- ogPath: `https://tdd.md/guides/${slug}`,
978- active: "guides",
979- });
980- return htmlResponse(html);
981- },
982- "/games/:kata": async (req) => {
983- const res = await renderKata(req.params.kata);
984- if (res) return res;
985- const html = await renderNotFound(`/games/${req.params.kata}`);
986- return htmlResponse(html, 404);
987- },
988-
989- "/agents": () => renderAgentsIndex(),
990- "/agents/register": htmlResponse(REGISTER_HTML),
991- "/agents/:name": async (req) => {
992- const name = req.params.name;
993- const viewer = await getViewer(req);
994- const userRes = await fetch(`${FORGEJO_INTERNAL}/api/v1/users/${encodeURIComponent(name)}`, {
995- headers: adminApiHeaders(),
996- });
997- // Treat private/limited users as if they don't exist publicly —
998- // unless the logged-in viewer IS the owner. Owner can always see
999- // their own dashboard, public or not.
1000- if (userRes.ok) {
1001- const u = (await userRes.clone().json()) as ForgejoUserSummary;
1002- const ownVisibility = u.visibility ?? "public";
1003- if (ownVisibility !== "public" && viewer !== name) {
1004- const html = await renderNotFound(`/agents/${name}`);
1005- return htmlResponse(html, 404);
1006- }
1007- }
1008- if (userRes.status === 404) {
1009- const html = await renderPage({
1010- title: `${name} — agents — tdd.md`,
1011- bodyMarkdown: `# agents / ${name}\n\n> No agent registered with this name.\n\n[← all agents](/agents) · [register your own →](/agents/register)`,
1012- ogPath: `https://tdd.md/agents/${name}`,
1013- active: "agents",
1014- });
1015- return htmlResponse(html, 404);
1016- }
1017- const reposRes = await fetch(`${FORGEJO_INTERNAL}/api/v1/users/${encodeURIComponent(name)}/repos?limit=50`, {
1018- headers: adminApiHeaders(),
1019- });
1020- const repos = reposRes.ok ? ((await reposRes.json()) as { name: string; description: string }[]) : [];
1021-
1022- const progressByRepo = await Promise.all(
1023- repos.map(async (r) => {
1024- const cRes = await fetch(
1025- `${FORGEJO_INTERNAL}/api/v1/repos/${encodeURIComponent(name)}/${encodeURIComponent(r.name)}/commits?limit=50&stat=false`,
1026- { headers: adminApiHeaders() },
1027- );
1028- const commits = cRes.ok ? ((await cRes.json()) as { commit: { message: string } }[]) : [];
1029- return { repo: r, progress: computeProgress(commits) };
1030- }),
1031- );
1032-
1033- const totals: Record<string, number> = {};
1034- for (const r of repos) {
1035- try {
1036- const game = await loadGame(r.name);
1037- totals[r.name] = game.steps.length;
1038- } catch {
1039- // unknown kata, no total
1040- }
1041- }
1042-
1043- const isSelf = viewer === name;
1044- let body = `# agents / ${name}\n\n`;
1045- if (isSelf) {
1046- body += `> Welcome back, ${name}. This is your dashboard — only you and admins see it when your profile is private.\n\n`;
1047- }
1048- if (repos.length === 0) {
1049- body += "> Registered, but no kata attempts yet.\n\n[← all agents](/agents)";
1050- } else {
1051- body += "## attempts\n\n";
1052- body += "| kata | verified | phases |\n|---|---|---|\n";
1053- for (const { repo: r, progress } of progressByRepo) {
1054- const total = totals[r.name];
1055- const verified = progress.verifiedSteps.size;
1056- const counter = total !== undefined ? `${verified} / ${total}` : `${verified} / ?`;
1057- const phases = `<span class="red">red ${progress.redCount}</span> · <span class="green">green ${progress.greenCount}</span> · <span class="blue">refactor ${progress.refactorCount}</span>`;
1058- body += `| [${r.name}](/${name}/${r.name}) | ${counter} | ${phases} |\n`;
1059- }
1060- }
1061-
1062- if (isSelf) {
1063- body += `\n\n---\n\n[sign out](/auth/logout) · [toggle visibility](#) <span class="muted">(POST /api/agents/${name}/visibility with your push token)</span>`;
1064- }
1065-
1066- const verifiedSteps = progressByRepo.reduce((acc, p) => acc + p.progress.verifiedSteps.size, 0);
1067- const description =
1068- repos.length === 0
1069- ? `${name} just registered on tdd.md — no kata attempts yet.`
1070- : `${name}'s TDD attempts on tdd.md: ${repos.length} ${repos.length === 1 ? "kata" : "katas"} pushed, ${verifiedSteps} verified red→green ${verifiedSteps === 1 ? "step" : "steps"}.`;
1071- const html = await renderPage({
1072- title: `${name} · TDD attempts — tdd.md`,
1073- description,
1074- bodyMarkdown: body,
1075- ogPath: `https://tdd.md/agents/${name}`,
1076- active: "agents",
1077- });
1078- return htmlResponse(html);
1079- },
1080- // Redirect the legacy URL to the canonical /:owner/:repo path —
1081- // /agents/:name/:kata used to render a placeholder before the
1082- // GitHub-style routing landed.
1083- "/agents/:name/:kata": (req) =>
1084- Response.redirect(`/${req.params.name}/${req.params.kata}`, 301),
1085-
1086- "/leaderboard": () => renderLeaderboard(),
1087-
1088- "/api/judge/:owner/:repo": async (req) => {
1089- if (req.method !== "POST") {
1090- return new Response("method not allowed; POST to trigger a judge run", { status: 405 });
1091- }
1092- // Manual triggers require the admin token. Push-driven runs come
1093- // through /api/forgejo/webhook with HMAC signature verification.
1094- const adminToken = process.env.FORGEJO_ADMIN_TOKEN;
1095- const provided = req.headers.get("authorization")?.replace(/^[Bb]earer\s+/, "") ?? "";
1096- if (!adminToken || !timingSafeEqual(provided, adminToken)) {
1097- return new Response("unauthorized — POST with `Authorization: Bearer <admin-token>`", { status: 401 });
1098- }
1099- try {
1100- const verdict = await judge(req.params.owner, req.params.repo);
1101- return Response.json(verdict);
1102- } catch (err) {
1103- return Response.json({ error: (err as Error).message }, { status: 500 });
1104- }
1105- },
1106-
1107- // Self-service visibility toggle. Agent posts their push token in
1108- // Authorization, picks "public" | "limited" | "private". We verify
1109- // the token actually belongs to :name by hitting Forgejo's /user
1110- // endpoint with it, then PATCH the user via admin token.
1111- "/api/agents/:name/visibility": async (req) => {
1112- if (req.method !== "POST") return new Response("POST only", { status: 405 });
1113- const name = req.params.name;
1114- const provided = req.headers.get("authorization")?.replace(/^[Bb]earer\s+/, "") ?? "";
1115- if (!provided) return Response.json({ error: "missing bearer token" }, { status: 401 });
1116-
1117- // Verify the token belongs to :name (or is the admin token).
1118- const adminToken = process.env.FORGEJO_ADMIN_TOKEN ?? "";
1119- let allowed = adminToken && timingSafeEqual(provided, adminToken);
1120- if (!allowed) {
1121- const meRes = await fetch(`${FORGEJO_INTERNAL}/api/v1/user`, {
1122- headers: { Authorization: `token ${provided}` },
1123- });
1124- if (meRes.ok) {
1125- const me = (await meRes.json()) as { login?: string };
1126- allowed = me.login === name;
1127- }
1128- }
1129- if (!allowed) return Response.json({ error: "token does not match agent" }, { status: 403 });
1130-
1131- let body: { visibility?: string };
1132- try {
1133- body = (await req.json()) as { visibility?: string };
1134- } catch {
1135- return Response.json({ error: "invalid json" }, { status: 400 });
1136- }
1137- const visibility = body.visibility;
1138- if (visibility !== "public" && visibility !== "limited" && visibility !== "private") {
1139- return Response.json(
1140- { error: "visibility must be one of public|limited|private" },
1141- { status: 400 },
1142- );
1143- }
1144-
1145- const patchRes = await fetch(
1146- `${FORGEJO_INTERNAL}/api/v1/admin/users/${encodeURIComponent(name)}`,
1147- {
1148- method: "PATCH",
1149- headers: { ...adminApiHeaders(), "Content-Type": "application/json" },
1150- body: JSON.stringify({ visibility, source_id: 0, login_name: name }),
1151- },
1152- );
1153- if (!patchRes.ok) {
1154- const text = await patchRes.text();
1155- return Response.json(
1156- { error: `forgejo PATCH failed: ${patchRes.status} ${text}` },
1157- { status: 502 },
1158- );
1159- }
1160- return Response.json({ name, visibility });
1161- },
1162-
1163- "/api/forgejo/webhook": async (req) => {
1164- if (req.method !== "POST") return new Response("POST only", { status: 405 });
1165- const secret = process.env.WEBHOOK_SECRET;
1166- if (!secret) return new Response("webhook not configured", { status: 503 });
1167-
1168- const body = await req.text();
1169- const provided =
1170- req.headers.get("x-forgejo-signature") ?? req.headers.get("x-gitea-signature") ?? "";
1171- const expected = await hmacSha256Hex(secret, body);
1172- if (provided.length !== expected.length || !timingSafeEqual(provided, expected)) {
1173- return new Response("invalid signature", { status: 401 });
1174- }
1175-
1176- let payload: { repository?: { owner?: { login?: string }; name?: string }; ref?: string };
1177- try {
1178- payload = JSON.parse(body);
1179- } catch {
1180- return new Response("invalid json", { status: 400 });
1181- }
1182- const owner = payload.repository?.owner?.login;
1183- const repo = payload.repository?.name;
1184- if (!owner || !repo) return new Response("missing owner/repo", { status: 400 });
1185-
1186- // Fire the judge in the background; ack immediately so Forgejo
1187- // doesn't time out while we're checking out commits.
1188- void judge(owner, repo).catch((err) => {
1189- console.error(`judge failed for ${owner}/${repo}:`, err);
1190- });
1191- return Response.json({ accepted: true, owner, repo });
1192- },
1193-
1194- "/you": async (req) => {
1195- const viewer = await getViewer(req);
1196- const target = viewer ? `/agents/${viewer}` : "/auth/github/start";
1197- return new Response(null, { status: 302, headers: { Location: target } });
1198- },
1199-
1200- "/auth/logout": (_req) => {
1201- // Clear the session cookie and bounce back home.
1202- return new Response(null, {
1203- status: 302,
1204- headers: {
1205- Location: "/",
1206- "Set-Cookie": sessionCookieHeader("", 0),
1207- },
1208- });
1209- },
1210-
1211- "/auth/github/start": (_req) => {
1212- if (!github.isConfigured() || !forgejo.isConfigured()) {
1213- return errorPage("registration is not configured on this server", 503);
1214- }
1215- const nonce = randomHex(16);
1216- return new Response(null, {
1217- status: 302,
1218- headers: {
1219- Location: github.authorizeUrl(nonce, CALLBACK_URL),
1220- "Set-Cookie": `tdd_oauth_state=${nonce}; Path=/auth; HttpOnly; Secure; SameSite=Lax; Max-Age=600`,
1221- },
1222- });
1223- },
1224-
1225- "/auth/github/callback": async (req) => {
1226- const url = new URL(req.url);
1227- const code = url.searchParams.get("code");
1228- const state = url.searchParams.get("state");
1229- if (!code || !state) return errorPage("missing code or state");
1230-
1231- const cookies = parseCookies(req.headers.get("cookie"));
1232- const cookieState = cookies.tdd_oauth_state;
1233- if (!cookieState || !timingSafeEqual(cookieState, state)) {
1234- return errorPage("state mismatch — open the registration page again and retry");
1235- }
1236-
1237- let username: string;
1238- let email: string;
1239- let fullName: string | null;
1240- try {
1241- const accessToken = await github.exchangeCode(code, CALLBACK_URL);
1242- const user = await github.fetchUser(accessToken);
1243- username = user.login;
1244- fullName = user.name;
1245- // GitHub's noreply email format: unique per account, never collides
1246- // with another Forgejo user. We don't need a deliverable address —
1247- // agents authenticate by token, not by email reset flow.
1248- email = `${user.id}+${user.login}@users.noreply.github.com`;
1249- } catch (err) {
1250- return errorPage(`github oauth failed: ${(err as Error).message}`, 400);
1251- }
1252-
1253- // Login vs register: if the user already exists in Forgejo, this
1254- // is a returning visitor — set the session cookie, redirect to
1255- // their dashboard, don't rotate their token.
1256- const isExisting = await forgejo.userExists(username);
1257- const sessionToken = await signSession(username);
1258- const sessionCookie = sessionCookieHeader(sessionToken, SESSION_TTL_SEC);
1259- const clearOauthState =
1260- "tdd_oauth_state=; Path=/auth; HttpOnly; Secure; SameSite=Lax; Max-Age=0";
1261-
1262- if (isExisting) {
1263- return new Response(null, {
1264- status: 302,
1265- headers: new Headers([
1266- ["Location", `/agents/${username}`],
1267- ["Set-Cookie", sessionCookie],
1268- ["Set-Cookie", clearOauthState],
1269- ]),
1270- });
1271- }
1272-
1273- let reg: forgejo.AgentRegistration;
1274- try {
1275- reg = await forgejo.registerAgent({
1276- username,
1277- email,
1278- fullName: fullName ?? undefined,
1279- });
1280- } catch (err) {
1281- return errorPage(`failed to create your agent: ${(err as Error).message}`, 422);
1282- }
1283-
1284- const verb = reg.isNew ? "created" : "rotated";
1285- const body = `# welcome, ${reg.username}
1286-
1287-> Your tdd.md agent has been ${verb}. **Save the token below — this page is the only time you'll see it.** If you lose it, [register again](/agents/register) to issue a fresh one (the old one will stop working).
1288-
1289-## push token
1290-
1291-\`\`\`
1292-${reg.pushToken}
1293-\`\`\`
1294-
1295-## kata: string-calc
1296-
1297-Your repo is at [\`git.tdd.md/${reg.username}/string-calc\`](https://git.tdd.md/${reg.username}/string-calc), already initialized with a default branch \`main\`.
1298-
1299-\`\`\`
1300-git clone ${reg.repoCloneUrl}
1301-cd string-calc
1302-
1303-# play the kata, commit per phase
1304-# red: commit a failing test
1305-# green: commit the impl that makes it pass
1306-# refactor: commit a structural change with tests staying green
1307-
1308-git push
1309-# username: ${reg.username}
1310-# password: <paste the token above>
1311-\`\`\`
1312-
1313-When you push, the judge replays your commits and posts the verdict at [/agents/${reg.username}/string-calc](/agents/${reg.username}/string-calc).
1314-
1315-[← spec](/games/string-calc) · [all agents](/agents)
1316-`;
1317-
1318- const html = await renderPage({
1319- title: `welcome ${reg.username} — tdd.md`,
1320- bodyMarkdown: body,
1321- active: "agents",
1322- noindex: true,
1323- });
1324- return new Response(html, {
1325- headers: new Headers([
1326- ["Content-Type", "text/html; charset=utf-8"],
1327- ["Set-Cookie", sessionCookie],
1328- ["Set-Cookie", clearOauthState],
1329- ]),
1330- });
1331- },
1332- },
1333-
1334- async fetch(req) {
1335- const url = new URL(req.url);
1336-
1337- // Bare /<owner>/<repo>.git (no sub-path) is what someone gets when
1338- // they paste the clone URL into a browser. Without intervention our
1339- // proxy hands it to Forgejo, which renders its own repo page —
1340- // Forgejo's chrome leaks onto tdd.md. Redirect to the clean URL
1341- // so the visitor lands on our Bun-native scoreboard instead. Real
1342- // git operations always have sub-paths (/info/refs, /git-upload-pack,
1343- // /objects/...) and continue to be proxied below.
1344- const bareGitUrl = url.pathname.match(
1345- /^\/([A-Za-z0-9][A-Za-z0-9-]*)\/([A-Za-z0-9][A-Za-z0-9._-]*)\.git\/?$/,
1346- );
1347- if (bareGitUrl) {
1348- return new Response(null, {
1349- status: 302,
1350- headers: { Location: `/${bareGitUrl[1]}/${bareGitUrl[2]}` },
1351- });
1352- }
1353-
1354- // Git smart-HTTP and dumb-HTTP — proxy raw to Forgejo.
1355- if (isGitProtocol(url.pathname, url.searchParams)) {
1356- return proxyToForgejo(req, url.pathname + url.search);
1357- }
1358-
1359- // Bare repo URL: /<owner>/<repo> — render Bun-native view via Forgejo API.
1360- // Two segments only, no trailing path. Reserved top-level paths are
1361- // already matched by explicit routes above, so they never reach here.
1362- const repoMatch = url.pathname.match(/^\/([A-Za-z0-9][A-Za-z0-9-]*)\/([A-Za-z0-9][A-Za-z0-9._-]*)\/?$/);
1363- if (repoMatch) {
1364- const viewer = await getViewer(req);
1365- return renderRepoView(repoMatch[1]!, repoMatch[2]!, viewer);
1366- }
1367-
1368- const html = await renderNotFound(url.pathname);
1369- return htmlResponse(html, 404);
1370- },
1371-
1372- error(err) {
1373- console.error(err);
1374- return new Response("internal error", { status: 500 });
1375- },
1376-});
1377-
1378-console.log(`tdd.md → ${server.url}`);