ade29aa6e3e797fcfcfe1680023b69205a704e17 diff --git a/Containerfile b/Containerfile index 02ecd24ff9aa83541d94a08aa39c3f18331ab977..a4425e20d760283036dee1b37335739c091d49cf 100644 --- a/Containerfile +++ b/Containerfile @@ -25,4 +25,4 @@ EXPOSE 3000 # external probes (cloudflared upstream + the deploy script's /healthz # poll) already cover liveness. -CMD ["bun", "src/server.ts"] +CMD ["bun", "src/c11_server.ts"] diff --git a/content/games/fizzbuzz/spec.ts b/content/games/fizzbuzz/spec.ts index 1776a5f1036db0d12a24f428d56c38f46062858a..2089d9b3874ee3ab005c5655bc46335dd4fb7d83 100644 --- a/content/games/fizzbuzz/spec.ts +++ b/content/games/fizzbuzz/spec.ts @@ -1,4 +1,4 @@ -import type { Game } from "../../../src/games"; +import type { Game } from "../../../src/c31_games.ts"; export const spec: Game = { id: "fizzbuzz", diff --git a/content/games/string-calc/spec.ts b/content/games/string-calc/spec.ts index 2f824842345d0ef756d3aaa09088e7a71031f891..e81fbcc56fa44a8abbe09a4227128f9d7a5069fd 100644 --- a/content/games/string-calc/spec.ts +++ b/content/games/string-calc/spec.ts @@ -1,4 +1,4 @@ -import type { Game } from "../../../src/games"; +import type { Game } from "../../../src/c31_games.ts"; export const spec: Game = { id: "string-calc", diff --git a/package.json b/package.json index 5e4a8c494ec9aef02cd133ec222da483a572ddce..48b2124564e7bcec7b8cd2f0a6d64b8f73c71991 100644 --- a/package.json +++ b/package.json @@ -2,10 +2,10 @@ "name": "tdd.md", "private": true, "type": "module", - "module": "src/server.ts", + "module": "src/c11_server.ts", "scripts": { - "dev": "bun --hot src/server.ts", - "start": "bun src/server.ts" + "dev": "bun --hot src/c11_server.ts", + "start": "bun src/c11_server.ts" }, "dependencies": { "marked": "^14.1.4" diff --git a/src/c11_server.ts b/src/c11_server.ts new file mode 100644 index 0000000000000000000000000000000000000000..11e9533458f3d67c449ede48929121afde8038fa --- /dev/null +++ b/src/c11_server.ts @@ -0,0 +1,10 @@ +// c11 — server entry: env + Bun.serve startup. No route logic, no SQL, +// no HTML. The route table, fallback fetch, and error handler live in +// c21_app.ts; this file just reads PORT and asks createApp() to bind. + +import { createApp } from "./c21_app.ts"; + +const port = Number(process.env.PORT ?? 3000); +const server = createApp(port); + +console.log(`tdd.md → ${server.url}`); diff --git a/src/c13_database.ts b/src/c13_database.ts new file mode 100644 index 0000000000000000000000000000000000000000..c565cfbaaab796146705329a0deda509e4c99186 --- /dev/null +++ b/src/c13_database.ts @@ -0,0 +1,214 @@ +import { Database } from "bun:sqlite"; +import type { ProjectConfig, TestRunner } from "./c31_project_config.ts"; + +const DB_PATH = process.env.TDD_DB_PATH ?? ":memory:"; + +let db: Database | null = null; + +const getDb = (): Database => { + if (db) return db; + db = new Database(DB_PATH, { create: true }); + db.exec(` + CREATE TABLE IF NOT EXISTS runs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + owner TEXT NOT NULL, + repo TEXT NOT NULL, + head_sha TEXT NOT NULL, + judged_at INTEGER NOT NULL, + verdict_json TEXT NOT NULL + ); + CREATE INDEX IF NOT EXISTS idx_runs_owner_repo + ON runs(owner, repo, judged_at DESC); + + CREATE TABLE IF NOT EXISTS projects ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + registered_by TEXT NOT NULL, + repo_owner TEXT NOT NULL, + repo_name TEXT NOT NULL, + test_runner TEXT NOT NULL DEFAULT 'none', + tracked_branches TEXT NOT NULL, + display_name TEXT, + team TEXT, + registered_at INTEGER NOT NULL, + status TEXT NOT NULL DEFAULT 'active', + UNIQUE(repo_owner, repo_name) + ); + CREATE INDEX IF NOT EXISTS idx_projects_registered_by + ON projects(registered_by); + `); + return db; +}; + +export type Mode = "strict" | "pragmatic" | "learning"; + +export interface StepVerdict { + stepId: string; + redSha: string | null; + greenSha: string | null; + redFailed: boolean | null; + greenPassed: boolean | null; + // Whether the kata's authoritative hidden tests pass against the agent's + // implementation at the green commit. null when no hidden tests exist + // for the step (unknown kata, or step not registered with the spec). + hiddenPassed: boolean | null; + status: + | "verified" + | "discipline-only" + | "no-green" + | "red-did-not-fail" + | "green-did-not-pass" + | "hidden-tests-failed" + | "test-deleted" + // Trace-only mode: tests not executed, only commit discipline checked. + // Used when test_runner: "none" — language-agnostic, useful as a + // CI gate on real projects where Bun can't run the test suite. + | "trace-verified" + | "trace-tests-shrunk"; + scoreDelta: number; + // Coach-style explanation of the verdict — what happened, why the score + // is what it is, and (when relevant) how to improve next time. + explanation: string; +} + +export interface RefactorVerdict { + sha: string; + stepId: string | null; + testsPassed: boolean; + scoreDelta: number; + explanation: string; +} + +export interface Verdict { + headSha: string; + mode: Mode; + steps: StepVerdict[]; + refactors: RefactorVerdict[]; + totalScore: number; + judgedAt: number; +} + +export const saveRun = (owner: string, repo: string, verdict: Verdict): void => { + getDb().run( + `INSERT INTO runs (owner, repo, head_sha, judged_at, verdict_json) VALUES (?, ?, ?, ?, ?)`, + [owner, repo, verdict.headSha, verdict.judgedAt, JSON.stringify(verdict)], + ); +}; + +export const latestRun = (owner: string, repo: string): Verdict | null => { + const row = getDb() + .query<{ verdict_json: string }, [string, string]>( + `SELECT verdict_json FROM runs WHERE owner = ? AND repo = ? ORDER BY judged_at DESC LIMIT 1`, + ) + .get(owner, repo); + if (!row) return null; + return JSON.parse(row.verdict_json) as Verdict; +}; + +export interface ProjectRow { + id: number; + registeredBy: string; + repoOwner: string; + repoName: string; + testRunner: TestRunner; + trackedBranches: string[]; + displayName: string | null; + team: string | null; + registeredAt: number; + status: "active" | "paused"; +} + +interface ProjectDbRow { + id: number; + registered_by: string; + repo_owner: string; + repo_name: string; + test_runner: string; + tracked_branches: string; + display_name: string | null; + team: string | null; + registered_at: number; + status: string; +} + +const rowToProject = (r: ProjectDbRow): ProjectRow => ({ + id: r.id, + registeredBy: r.registered_by, + repoOwner: r.repo_owner, + repoName: r.repo_name, + testRunner: (r.test_runner === "bun" ? "bun" : "none") as TestRunner, + trackedBranches: JSON.parse(r.tracked_branches) as string[], + displayName: r.display_name, + team: r.team, + registeredAt: r.registered_at, + status: r.status === "paused" ? "paused" : "active", +}); + +// Inserts or updates a project. Re-registering the same repo refreshes +// its config (test_runner, tracked_branches, display_name, team) without +// duplicating the row. Returns the stored project. +export const upsertProject = ( + registeredBy: string, + repoOwner: string, + repoName: string, + config: ProjectConfig, +): ProjectRow => { + const now = Date.now(); + const branches = JSON.stringify(config.tracked_branches); + const display = config.display_name ?? null; + const team = config.team ?? null; + getDb().run( + `INSERT INTO projects (registered_by, repo_owner, repo_name, test_runner, tracked_branches, display_name, team, registered_at, status) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'active') + ON CONFLICT(repo_owner, repo_name) DO UPDATE SET + test_runner = excluded.test_runner, + tracked_branches = excluded.tracked_branches, + display_name = excluded.display_name, + team = excluded.team, + status = 'active'`, + [registeredBy, repoOwner, repoName, config.test_runner, branches, display, team, now], + ); + const row = getDb() + .query( + `SELECT * FROM projects WHERE repo_owner = ? AND repo_name = ?`, + ) + .get(repoOwner, repoName); + if (!row) throw new Error("project upsert returned no row"); + return rowToProject(row); +}; + +export const getProject = (repoOwner: string, repoName: string): ProjectRow | null => { + const row = getDb() + .query( + `SELECT * FROM projects WHERE repo_owner = ? AND repo_name = ?`, + ) + .get(repoOwner, repoName); + return row ? rowToProject(row) : null; +}; + +export const listActiveProjects = (): ProjectRow[] => { + const rows = getDb() + .query( + `SELECT * FROM projects WHERE status = 'active' ORDER BY registered_at DESC`, + ) + .all(); + return rows.map(rowToProject); +}; + +// Latest verdict per (owner, repo) across all agents — drives the +// leaderboard and the /agents index. +export const allLatestRuns = (): { owner: string; repo: string; verdict: Verdict }[] => { + const rows = getDb() + .query<{ owner: string; repo: string; verdict_json: string }, []>( + `SELECT owner, repo, verdict_json FROM runs r1 + WHERE judged_at = ( + SELECT MAX(judged_at) FROM runs r2 + WHERE r2.owner = r1.owner AND r2.repo = r1.repo + )`, + ) + .all(); + return rows.map((r) => ({ + owner: r.owner, + repo: r.repo, + verdict: JSON.parse(r.verdict_json) as Verdict, + })); +}; diff --git a/src/c14_forgejo.ts b/src/c14_forgejo.ts new file mode 100644 index 0000000000000000000000000000000000000000..cbcd36745ff8f483807400ea446a3829804b369d --- /dev/null +++ b/src/c14_forgejo.ts @@ -0,0 +1,345 @@ +// c14 — secondary I/O: HTTP client to the local Forgejo instance. Owns +// every URL reachable at git.tdd.md (admin API, user repos, raw git +// protocol, webhook setup) plus the proxy that forwards git-protocol +// requests through tdd.md to keep the public hostname uniform. + +// Internal URL — Bun container talks to Forgejo via host.containers.internal +// (rootless podman's standard hostname for the host network). Falls back to +// the public URL for local dev. +export const FORGEJO_URL = process.env.FORGEJO_URL ?? "https://git.tdd.md"; +const ADMIN_TOKEN = process.env.FORGEJO_ADMIN_TOKEN ?? ""; + +const adminAuth = (): HeadersInit => ({ + Authorization: `token ${ADMIN_TOKEN}`, +}); + +const userAuth = (username: string, password: string): HeadersInit => ({ + Authorization: `Basic ${btoa(`${username}:${password}`)}`, +}); + +export const isConfigured = (): boolean => ADMIN_TOKEN !== ""; + +export const userExists = async (username: string): Promise => { + const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}`, { + headers: adminAuth(), + }); + return res.status === 200; +}; + +export const createUser = async (params: { + username: string; + email: string; + password: string; + fullName?: string; +}): Promise => { + const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users`, { + method: "POST", + headers: { ...adminAuth(), "Content-Type": "application/json" }, + body: JSON.stringify({ + username: params.username, + email: params.email, + password: params.password, + full_name: params.fullName ?? params.username, + must_change_password: false, + send_notify: false, + }), + }); + if (!res.ok) { + const text = await res.text(); + throw new Error(`forgejo createUser ${res.status}: ${text}`); + } +}; + +export const setUserPassword = async (username: string, password: string): Promise => { + const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users/${encodeURIComponent(username)}`, { + method: "PATCH", + headers: { ...adminAuth(), "Content-Type": "application/json" }, + body: JSON.stringify({ + password, + must_change_password: false, + source_id: 0, + login_name: username, + }), + }); + if (!res.ok) { + const text = await res.text(); + throw new Error(`forgejo setUserPassword ${res.status}: ${text}`); + } +}; + +export const repoExists = async (owner: string, repo: string): Promise => { + const res = await fetch(`${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}`, { + headers: adminAuth(), + }); + return res.status === 200; +}; + +// Creates a per-repo webhook that fires on push events. The webhook +// posts to /api/forgejo/webhook on tdd.md, signed with WEBHOOK_SECRET so +// our endpoint can verify it. Idempotent — checks for an existing hook +// with the same URL before creating. +export const ensureRepoWebhook = async (params: { + owner: string; + repo: string; + webhookUrl: string; + secret: string; +}): Promise => { + const base = `${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(params.owner)}/${encodeURIComponent(params.repo)}/hooks`; + const listRes = await fetch(base, { headers: adminAuth() }); + if (listRes.ok) { + const hooks = (await listRes.json()) as { id: number; config: { url?: string } }[]; + const exists = hooks.some((h) => h.config?.url === params.webhookUrl); + if (exists) return; + } + const res = await fetch(base, { + method: "POST", + headers: { ...adminAuth(), "Content-Type": "application/json" }, + body: JSON.stringify({ + type: "forgejo", + active: true, + events: ["push"], + config: { + url: params.webhookUrl, + content_type: "json", + secret: params.secret, + }, + }), + }); + if (!res.ok) { + const text = await res.text(); + throw new Error(`forgejo ensureRepoWebhook ${res.status}: ${text}`); + } +}; + +export const createRepoForUser = async (params: { + username: string; + name: string; + description?: string; +}): Promise => { + const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users/${encodeURIComponent(params.username)}/repos`, { + method: "POST", + headers: { ...adminAuth(), "Content-Type": "application/json" }, + body: JSON.stringify({ + name: params.name, + description: params.description ?? "", + // Private by default — the source is the agent's, not ours to + // publish. Verdicts still render on tdd.md via admin-mediated + // API calls; clones require the agent's push token. + private: true, + // No auto_init: the agent's first push becomes the genuine initial + // commit. An admin-authored "Initial commit" would muddle the phase + // log and break attribution on the agent's repo page. + auto_init: false, + default_branch: "main", + }), + }); + if (!res.ok) { + const text = await res.text(); + throw new Error(`forgejo createRepo ${res.status}: ${text}`); + } +}; + +interface TokenInfo { + id: number; + name: string; +} + +const listTokens = async (username: string, password: string): Promise => { + const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}/tokens`, { + headers: userAuth(username, password), + }); + if (!res.ok) return []; + return (await res.json()) as TokenInfo[]; +}; + +const deleteToken = async (username: string, password: string, tokenId: number): Promise => { + await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}/tokens/${tokenId}`, { + method: "DELETE", + headers: userAuth(username, password), + }); +}; + +export const createPushToken = async (params: { + username: string; + password: string; + name: string; +}): Promise => { + // Revoke any existing tokens with the same name so re-registration always + // returns a fresh one and the previous one is invalidated. + const existing = await listTokens(params.username, params.password); + for (const t of existing) { + if (t.name === params.name) { + await deleteToken(params.username, params.password, t.id); + } + } + + const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(params.username)}/tokens`, { + method: "POST", + headers: { ...userAuth(params.username, params.password), "Content-Type": "application/json" }, + body: JSON.stringify({ + name: params.name, + // write:repository for the push; read:user so the agent can + // verify their own identity against tdd.md's self-service + // endpoints (e.g. POST /api/agents/:name/visibility). + scopes: ["write:repository", "read:user"], + }), + }); + if (!res.ok) { + const text = await res.text(); + throw new Error(`forgejo createPushToken ${res.status}: ${text}`); + } + const data = (await res.json()) as { sha1: string }; + return data.sha1; +}; + +const randomPassword = (): string => + Array.from(crypto.getRandomValues(new Uint8Array(32))) + .map((b) => b.toString(16).padStart(2, "0")) + .join(""); + +export interface AgentRegistration { + username: string; + pushToken: string; + repoCloneUrl: string; + isNew: boolean; +} + +// Idempotent: if the user exists, reset their password and rotate the push +// token. Always also ensures the kata repo exists. +export const registerAgent = async (params: { + username: string; + email: string; + fullName?: string; + kata?: string; +}): Promise => { + const password = randomPassword(); + const isNew = !(await userExists(params.username)); + + if (isNew) { + await createUser({ + username: params.username, + email: params.email, + password, + fullName: params.fullName, + }); + } else { + await setUserPassword(params.username, password); + } + + const pushToken = await createPushToken({ + username: params.username, + password, + name: "tdd-md-push", + }); + + const kata = params.kata ?? "string-calc"; + if (!(await repoExists(params.username, kata))) { + await createRepoForUser({ + username: params.username, + name: kata, + description: `${params.username}'s submission for the ${kata} kata`, + }); + } + + const baseUrl = process.env.BASE_URL ?? "https://tdd.md"; + const webhookSecret = process.env.WEBHOOK_SECRET; + if (webhookSecret) { + try { + await ensureRepoWebhook({ + owner: params.username, + repo: kata, + webhookUrl: `${baseUrl}/api/forgejo/webhook`, + secret: webhookSecret, + }); + } catch (err) { + // Webhook is convenience; registration must still succeed without it. + console.error(`webhook setup failed for ${params.username}/${kata}:`, err); + } + } + + return { + username: params.username, + pushToken, + repoCloneUrl: `${baseUrl}/${params.username}/${kata}.git`, + isNew, + }; +}; + +// --------------------------------------------------------------------- +// Read-side helpers used by c21 handlers + c51 rendering. +// --------------------------------------------------------------------- + +export interface ForgejoUserSummary { + id: number; + login: string; + is_admin?: boolean; + // Forgejo visibility levels: "public" | "limited" | "private". + // Anything other than "public" is hidden from anonymous tdd.md visitors. + visibility?: string; +} + +// Admin-token-authenticated headers for API calls. Agent repos are +// private by default; rendering the verdict page must still work. We +// proxy the data through the admin identity, never exposing the source +// or push protocol publicly. +export const adminApiHeaders = (): HeadersInit => { + const token = process.env.FORGEJO_ADMIN_TOKEN; + return token ? { Authorization: `token ${token}` } : {}; +}; + +// Single-user visibility lookup for /:owner/:repo and /agents/:name. +// Returns the raw Forgejo string (or null if the user doesn't exist). +export const getUserVisibility = async (name: string): Promise => { + const r = await fetch( + `${FORGEJO_URL}/api/v1/users/${encodeURIComponent(name)}`, + { headers: adminApiHeaders() }, + ); + if (!r.ok) return null; + const u = (await r.json()) as ForgejoUserSummary; + return u.visibility ?? "public"; +}; + +const HOP_BY_HOP = [ + "host", + "connection", + "keep-alive", + "transfer-encoding", + "upgrade", + "proxy-authorization", + "proxy-connection", + "te", + "trailer", +]; + +// Forward git protocol + Forgejo API/asset requests to Forgejo via the host +// network. Lets us serve everything under tdd.md (GitHub-style) without +// exposing git.tdd.md externally. +export const proxyToForgejo = async (req: Request, pathAndQuery: string): Promise => { + const upstream = `${FORGEJO_URL}${pathAndQuery}`; + const headers = new Headers(req.headers); + for (const h of HOP_BY_HOP) headers.delete(h); + headers.set("X-Forwarded-Host", "tdd.md"); + headers.set("X-Forwarded-Proto", "https"); + headers.set("X-Forwarded-For", req.headers.get("cf-connecting-ip") ?? "0.0.0.0"); + + let body: ArrayBuffer | undefined; + if (req.method !== "GET" && req.method !== "HEAD") { + body = await req.arrayBuffer(); + } + + const upstreamRes = await fetch(upstream, { + method: req.method, + headers, + body, + redirect: "manual", + }); + + const responseHeaders = new Headers(upstreamRes.headers); + for (const h of HOP_BY_HOP) responseHeaders.delete(h); + + return new Response(upstreamRes.body, { + status: upstreamRes.status, + statusText: upstreamRes.statusText, + headers: responseHeaders, + }); +}; diff --git a/src/c14_github.ts b/src/c14_github.ts new file mode 100644 index 0000000000000000000000000000000000000000..330cbe82ae9abbbc78727397d08253021f039939 --- /dev/null +++ b/src/c14_github.ts @@ -0,0 +1,122 @@ +// c14 — secondary I/O: HTTP clients to GitHub. Two concerns under one roof: +// 1. OAuth flow for sign-in (used by /auth/github/start + callback). +// 2. Raw-content fetch of `.tdd-md.json` from a public repo's default +// branch, for project onboarding. +// Both talk to GitHub; both are pure HTTP, no in-process state. + +import { + PROJECT_CONFIG_PATH, + parseProjectConfig, + type ProjectConfig, +} from "./c31_project_config.ts"; + +const CLIENT_ID = process.env.GITHUB_CLIENT_ID ?? ""; +const CLIENT_SECRET = process.env.GITHUB_CLIENT_SECRET ?? ""; + +export interface GithubUser { + login: string; + id: number; + email: string | null; + avatar_url: string; + name: string | null; +} + +export interface GithubEmail { + email: string; + primary: boolean; + verified: boolean; + visibility: string | null; +} + +export const isConfigured = (): boolean => CLIENT_ID !== "" && CLIENT_SECRET !== ""; + +export const authorizeUrl = (state: string, redirectUri: string): string => { + const params = new URLSearchParams({ + client_id: CLIENT_ID, + redirect_uri: redirectUri, + scope: "read:user user:email", + state, + allow_signup: "true", + }); + return `https://github.com/login/oauth/authorize?${params}`; +}; + +export const exchangeCode = async (code: string, redirectUri: string): Promise => { + const res = await fetch("https://github.com/login/oauth/access_token", { + method: "POST", + headers: { + Accept: "application/json", + "Content-Type": "application/json", + }, + body: JSON.stringify({ + client_id: CLIENT_ID, + client_secret: CLIENT_SECRET, + code, + redirect_uri: redirectUri, + }), + }); + if (!res.ok) { + throw new Error(`github token exchange failed: ${res.status}`); + } + const data = (await res.json()) as { access_token?: string; error?: string; error_description?: string }; + if (!data.access_token) { + throw new Error(`github token exchange returned no token: ${data.error_description ?? data.error ?? "unknown"}`); + } + return data.access_token; +}; + +export const fetchUser = async (accessToken: string): Promise => { + const res = await fetch("https://api.github.com/user", { + headers: { + Authorization: `token ${accessToken}`, + Accept: "application/vnd.github+json", + "User-Agent": "tdd.md", + }, + }); + if (!res.ok) throw new Error(`github user fetch failed: ${res.status}`); + return (await res.json()) as GithubUser; +}; + +export const fetchPrimaryEmail = async (accessToken: string): Promise => { + const res = await fetch("https://api.github.com/user/emails", { + headers: { + Authorization: `token ${accessToken}`, + Accept: "application/vnd.github+json", + "User-Agent": "tdd.md", + }, + }); + if (!res.ok) return null; + const emails = (await res.json()) as GithubEmail[]; + const verified = emails.filter((e) => e.verified); + return verified.find((e) => e.primary)?.email ?? verified[0]?.email ?? null; +}; + +// Pulls .tdd-md.json from a public GitHub repo's default branch via the +// raw-content host. No auth — public-repo only for now (private repos +// land when we install a GitHub App, deferred to a later sliver). +export const fetchProjectConfig = async ( + repoOwner: string, + repoName: string, +): Promise => { + const url = `https://raw.githubusercontent.com/${encodeURIComponent(repoOwner)}/${encodeURIComponent(repoName)}/HEAD/${PROJECT_CONFIG_PATH}`; + const res = await fetch(url, { + headers: { Accept: "application/json", "User-Agent": "tdd.md" }, + }); + if (res.status === 404) { + throw new Error( + `${PROJECT_CONFIG_PATH} not found in ${repoOwner}/${repoName} on the default branch (or the repo is private; private repos aren't supported yet).`, + ); + } + if (!res.ok) { + throw new Error( + `Couldn't fetch ${PROJECT_CONFIG_PATH} from ${repoOwner}/${repoName}: HTTP ${res.status}`, + ); + } + let parsed: unknown; + try { + parsed = await res.json(); + } catch { + throw new Error(`${PROJECT_CONFIG_PATH} in ${repoOwner}/${repoName} isn't valid JSON`); + } + return parseProjectConfig(parsed); +}; diff --git a/src/c21_app.ts b/src/c21_app.ts new file mode 100644 index 0000000000000000000000000000000000000000..1ec1ba4053a322453576ce15ff0416435412cd58 --- /dev/null +++ b/src/c21_app.ts @@ -0,0 +1,1176 @@ +// c21 — handlers: the route table + fallback fetch. Composes the lower +// layers (c13 db, c14 secondary I/O, c31 models, c32 logic, c51 render) +// into the HTTP surface served by Bun.serve in c11_server. + +import { + renderPage, + renderNotFound, + htmlResponse, + errorPage, + phaseSpan, + relativeTime, + reportsLandingMd, + execSummaryMd, + agentDrilldownMd, + testsOverviewMd, + projectsLandingMd, + projectRegisterMd, + projectDetailMd, +} from "./c51_render.ts"; +import * as github from "./c14_github.ts"; +import * as forgejo from "./c14_forgejo.ts"; +import { + FORGEJO_URL, + adminApiHeaders, + getUserVisibility, + proxyToForgejo, + type ForgejoUserSummary, +} from "./c14_forgejo.ts"; +import { parseCommit, computeProgress } from "./c31_commits.ts"; +import { loadGame, listGames } from "./c31_games.ts"; +import { ALL_POSTS } from "./c31_blog.ts"; +import { ALL_GUIDES } from "./c31_guides.ts"; +import { DEMO_REPORTS } from "./c31_reports_demo.ts"; +import { parseRepoIdentifier } from "./c31_project_config.ts"; +import { fetchProjectConfig } from "./c14_github.ts"; +import { judge } from "./c32_judge.ts"; +import { + SESSION_TTL_SEC, + getViewer, + randomHex, + parseCookies, + signSession, + sessionCookieHeader, + timingSafeEqual, + hmacSha256Hex, +} from "./c32_session.ts"; +import { + latestRun, + allLatestRuns, + listActiveProjects, + getProject, + upsertProject, +} from "./c13_database.ts"; + +const HOME_MD = "./content/home.md"; +const GAME_DIR = "./content/games"; + +const BASE_URL = process.env.BASE_URL ?? "https://tdd.md"; +const CALLBACK_URL = `${BASE_URL}/auth/github/callback`; + +const HOME_DESCRIPTION = + "Test-driven development for agentic coding. Your AI agent practices on scored katas; the judge replays its commits against hidden tests and posts a public verdict on the discipline."; + +const homeBody = await Bun.file(HOME_MD).text(); +const HOME_HTML = await renderPage({ + title: "tdd.md — TDD for agentic coding", + description: HOME_DESCRIPTION, + bodyMarkdown: homeBody, + active: "home", + jsonLd: { + "@context": "https://schema.org", + "@type": "WebSite", + name: "tdd.md", + url: "https://tdd.md", + description: HOME_DESCRIPTION, + }, +}); + +const ALL_GAMES = await listGames(); + +const gamesIndexBody = `# games + +${ALL_GAMES.length === 0 + ? "_No katas registered yet._" + : `| kata | description | steps |\n|---|---|---|\n${ALL_GAMES.map( + (g) => `| [${g.id}](/games/${g.id}) | ${g.description} | ${g.steps.length} |`, + ).join("\n")}` +} + +> Ready to play? [Register your agent →](/agents/register) +> Using a specific agent? See the [agent-specific guides](/guides) — Claude Code, Cursor, Aider. +`; + +const GAMES_INDEX_HTML = await renderPage({ + title: "TDD katas — tdd.md", + description: + "Browse the TDD katas. Pick a challenge, push red→green→refactor commits, and earn a public verdict graded against hidden tests.", + bodyMarkdown: gamesIndexBody, + ogPath: "https://tdd.md/games", + active: "games", +}); + +const renderKata = async (kata: string): Promise => { + const file = Bun.file(`${GAME_DIR}/${kata}/spec.md`); + if (!(await file.exists())) return null; + const md = await file.text(); + // Pull the kata's own description from spec.ts when available — it's + // the canonical short copy (rendered on /games + sitemap previews). + let description: string | undefined; + try { + const game = await loadGame(kata); + description = game.description; + } catch { + // unknown kata; use the site default + } + const html = await renderPage({ + title: `${kata} TDD kata — tdd.md`, + description, + bodyMarkdown: md, + ogPath: `https://tdd.md/games/${kata}`, + active: "games", + }); + return new Response(html, { headers: { "Content-Type": "text/html; charset=utf-8" } }); +}; + +const renderAgentsIndex = async (): Promise => { + let users: ForgejoUserSummary[] = []; + const adminToken = process.env.FORGEJO_ADMIN_TOKEN; + if (adminToken) { + const r = await fetch(`${FORGEJO_URL}/api/v1/admin/users?limit=200`, { + headers: adminApiHeaders(), + }); + if (r.ok) users = (await r.json()) as ForgejoUserSummary[]; + } + // Drop the admin (id 1) and anyone whose visibility isn't "public" — + // private and limited agents stay invisible on the public index. + const agents = users.filter( + (u) => u.id !== 1 && !u.is_admin && (u.visibility ?? "public") === "public", + ); + + // Per-agent score totals from the latest run per repo. + const allRuns = allLatestRuns(); + const totalsByOwner = new Map(); + for (const r of allRuns) { + const t = totalsByOwner.get(r.owner) ?? { score: 0, runs: 0 }; + t.score += r.verdict.totalScore; + t.runs += 1; + totalsByOwner.set(r.owner, t); + } + + let body: string; + if (agents.length === 0) { + body = `# agents + +> No agents registered yet. Be the first. + +[ Register your agent → ](/agents/register) +`; + } else { + const rows = agents + .map((u) => { + const t = totalsByOwner.get(u.login) ?? { score: 0, runs: 0 }; + const sign = t.score >= 0 ? "+" : ""; + return `| [${u.login}](/agents/${u.login}) | ${t.runs} | ${sign}${t.score} |`; + }) + .join("\n"); + body = `# agents + +| agent | attempts | total score | +|---|---|---| +${rows} + +[ Register your agent → ](/agents/register) +`; + } + + const description = + agents.length === 0 + ? "AI agents doing test-driven development on tdd.md — registration is open, sign in with GitHub to play." + : `${agents.length} AI ${agents.length === 1 ? "agent" : "agents"} doing test-driven development on tdd.md, scored on red→green discipline against hidden tests for agentic coding.`; + + const html = await renderPage({ + title: "AI agents on tdd.md", + description, + bodyMarkdown: body, + ogPath: "https://tdd.md/agents", + active: "agents", + }); + return htmlResponse(html); +}; + +const renderLeaderboard = async (): Promise => { + // Only show runs whose owner is public. Fetch the user list once + // and build a Set so we can filter without N+1 lookups. + const adminToken = process.env.FORGEJO_ADMIN_TOKEN; + const publicOwners = new Set(); + if (adminToken) { + const r = await fetch(`${FORGEJO_URL}/api/v1/admin/users?limit=200`, { + headers: adminApiHeaders(), + }); + if (r.ok) { + const users = (await r.json()) as ForgejoUserSummary[]; + for (const u of users) { + if ((u.visibility ?? "public") === "public") publicOwners.add(u.login); + } + } + } + const runs = allLatestRuns() + .filter((r) => publicOwners.size === 0 || publicOwners.has(r.owner)) + .sort((a, b) => b.verdict.totalScore - a.verdict.totalScore); + let body: string; + if (runs.length === 0) { + body = `# leaderboard + +> No verdicts yet. The first agent to push a red→green pair lands here. + +[ Register your agent → ](/agents/register) +`; + } else { + const rows = runs + .map((r, i) => { + const sign = r.verdict.totalScore >= 0 ? "+" : ""; + const verified = r.verdict.steps.filter((s) => s.status === "verified").length; + return `| ${i + 1} | [${r.owner}](/agents/${r.owner}) | [${r.repo}](/${r.owner}/${r.repo}) | ${sign}${r.verdict.totalScore} | ${verified} |`; + }) + .join("\n"); + body = `# leaderboard + +| rank | agent | kata | score | verified steps | +|---|---|---|---|---| +${rows} +`; + } + const description = + runs.length === 0 + ? "TDD leaderboard for AI agents on tdd.md — be the first verdict." + : `Top AI agents by TDD score on tdd.md — ${runs.length} ranked ${runs.length === 1 ? "submission" : "submissions"} graded on red→green discipline and hidden test pass rate.`; + + const html = await renderPage({ + title: "TDD leaderboard — tdd.md", + description, + bodyMarkdown: body, + ogPath: "https://tdd.md/leaderboard", + active: "leaderboard", + }); + return htmlResponse(html); +}; + +const REGISTER_BODY = `# register + +> Sign in with GitHub to create your tdd.md agent. + +## what we ask GitHub for +- your username +- your primary verified email + +That's it — no repo access, no anything else. + +## what you get +- a public agent account at \`git.tdd.md/\` +- a push token (shown once) +- an empty repo for the first kata, ready to push to + +[ sign in with github → ](/auth/github/start) +`; + +const REGISTER_HTML = await renderPage({ + title: "Register your AI agent — tdd.md", + description: + "Sign in with GitHub to register your AI agent on tdd.md and start solving TDD katas. Public-signup, verified-identity, no extra forms.", + bodyMarkdown: REGISTER_BODY, + ogPath: "https://tdd.md/agents/register", + active: "agents", + noindex: true, +}); + +interface ForgejoRepoSummary { + description: string; + clone_url: string; + empty: boolean; + private: boolean; +} + +interface ForgejoCommit { + sha: string; + commit: { message: string; author: { name: string; date: string } }; +} + +const renderRepoView = async ( + owner: string, + repo: string, + viewer: string | null, +): Promise => { + // Private/limited owners get a 404 to anonymous visitors — but the + // owner themselves (verified via session cookie) can always see + // their own pages. + const ownerVisibility = await getUserVisibility(owner); + if (ownerVisibility !== null && ownerVisibility !== "public" && viewer !== owner) { + const html = await renderNotFound(`/${owner}/${repo}`); + return htmlResponse(html, 404); + } + + const repoApi = `${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}`; + const repoRes = await fetch(repoApi, { headers: adminApiHeaders() }); + if (repoRes.status === 404) { + const html = await renderNotFound(`/${owner}/${repo}`); + return htmlResponse(html, 404); + } + if (!repoRes.ok) { + const html = await renderPage({ + title: `${owner}/${repo} — tdd.md`, + bodyMarkdown: `# ${owner}/${repo}\n\n> repository unavailable`, + }); + return htmlResponse(html, 502); + } + const info = (await repoRes.json()) as ForgejoRepoSummary; + const cloneUrl = info.clone_url || `https://tdd.md/${owner}/${repo}.git`; + const isPrivate = info.private === true; + + // The repo name is by convention the kata id. If the kata exists, the + // header link is meaningful and we know the total step count. + let totalSteps: number | null = null; + let kataExists = false; + try { + const game = await loadGame(repo); + totalSteps = game.steps.length; + kataExists = true; + } catch { + // Repo isn't a known kata — still render, just without step totals. + } + + let commits: ForgejoCommit[] = []; + if (!info.empty) { + const commitsRes = await fetch(`${repoApi}/commits?limit=50&stat=false`, { + headers: adminApiHeaders(), + }); + if (commitsRes.ok) commits = (await commitsRes.json()) as ForgejoCommit[]; + } + const progress = computeProgress(commits); + const verified = progress.verifiedSteps.size; + + let status: string; + if (commits.length === 0) { + status = "awaiting first push"; + } else if (totalSteps !== null && verified >= totalSteps) { + status = "kata complete"; + } else if (verified > 0) { + status = "in progress"; + } else { + status = "no verified steps yet"; + } + const stepCounter = totalSteps !== null ? `${verified} / ${totalSteps}` : `${verified} / ?`; + + let phaseLog: string; + if (commits.length === 0) { + phaseLog = "_No commits yet — push your first `red:` commit to start the cycle._"; + } else { + const rows = commits.map((c) => { + const sha = c.sha.slice(0, 7); + const p = parseCommit(c.commit.message); + const subject = (p.subject || c.commit.message.split("\n")[0] || "").replace(/\|/g, "\\|"); + const stepCell = p.step ? `\`${p.step}\`` : "—"; + return `| \`${sha}\` | ${phaseSpan(p.phase)} | ${stepCell} | ${subject} | ${relativeTime(c.commit.author.date)} |`; + }); + phaseLog = `| sha | phase | step | message | when |\n|---|---|---|---|---|\n${rows.join("\n")}`; + } + + const kataLink = kataExists + ? `[\`${repo}\` →](/games/${repo})` + : `\`${repo}\``; + const privateBadge = isPrivate ? ` [private]` : ""; + + const verdict = latestRun(owner, repo); + const headSha = commits[0]?.sha ?? null; + const verdictStale = verdict !== null && headSha !== null && verdict.headSha !== headSha; + + let scoreSection: string; + if (verdict === null) { + scoreSection = `> Not yet judged. The next push triggers a judge run, or [run the judge now](/api/judge/${owner}/${repo}) (POST).\n\nPhase tally: red ${progress.redCount} · green ${progress.greenCount} · refactor ${progress.refactorCount}${progress.untaggedCount > 0 ? ` · untagged ${progress.untaggedCount}` : ""}.`; + } else { + const stale = verdictStale ? ` · stale — newer commits not yet judged` : ""; + const sign = verdict.totalScore >= 0 ? "+" : ""; + const statusClass = (status: string): string => { + if (status === "verified") return "green"; + if (status === "discipline-only") return "blue"; + if (status === "no-green") return "muted"; + return "red"; + }; + const modeLabel = (m: string): string => { + const cls = m === "strict" ? "red" : m === "pragmatic" ? "blue" : "green"; + return `${m}`; + }; + const rows = verdict.steps.length === 0 + ? "_No red→green pairs found yet._" + : `| step | red | green | hidden | status | points | explanation |\n|---|---|---|---|---|---|---|\n` + + verdict.steps.map((s) => { + const cls = statusClass(s.status); + const sign = s.scoreDelta >= 0 ? "+" : ""; + const hiddenCell = + s.hiddenPassed === true ? `pass` : + s.hiddenPassed === false ? `fail` : + ``; + const explanation = (s.explanation ?? "").replace(/\|/g, "\\|"); + return `| \`${s.stepId}\` | \`${s.redSha?.slice(0, 7) ?? "—"}\` | \`${s.greenSha?.slice(0, 7) ?? "—"}\` | ${hiddenCell} | ${s.status} | ${sign}${s.scoreDelta} | ${explanation} |`; + }).join("\n"); + const refactorRows = (verdict.refactors ?? []).length === 0 + ? "" + : `\n\n### refactors\n\n| sha | step | tests | points | explanation |\n|---|---|---|---|---|\n` + + verdict.refactors.map((r) => { + const sign = r.scoreDelta >= 0 ? "+" : ""; + const cls = r.testsPassed ? "green" : "red"; + const verb = r.testsPassed ? "green" : "broke tests"; + const explanation = (r.explanation ?? "").replace(/\|/g, "\\|"); + return `| \`${r.sha.slice(0, 7)}\` | ${r.stepId ? `\`${r.stepId}\`` : "—"} | ${verb} | ${sign}${r.scoreDelta} | ${explanation} |`; + }).join("\n"); + const modeLine = verdict.mode ? `**mode: ${modeLabel(verdict.mode)}** · ` : ""; + scoreSection = `${modeLine}**total: ${sign}${verdict.totalScore}** · judged ${relativeTime(new Date(verdict.judgedAt).toISOString())}${stale}\n\n${rows}${refactorRows}`; + } + + const body = `# ${owner} · playing ${kataLink}${privateBadge} + +> ${status} +> **${stepCounter}** steps verified + +## phase log + +${phaseLog} + +## score + +${scoreSection} + +## clone + +\`\`\` +git clone ${cloneUrl} +\`\`\` + +[← /agents/${owner}](/agents/${owner})${kataExists ? ` · [kata spec →](/games/${repo})` : ""} +`; + + // Dynamic description tailored to this attempt — gives every agent + // run a unique snippet for search results and social previews instead + // of falling back to the site default. + const totalSnippet = + verdict !== null + ? `, score ${verdict.totalScore >= 0 ? "+" : ""}${verdict.totalScore}` + : ""; + const description = kataExists + ? `${owner}'s ${repo} TDD kata attempt on tdd.md — ${verified}${totalSteps !== null ? `/${totalSteps}` : ""} steps verified${totalSnippet}.` + : `${owner}/${repo} on tdd.md — ${commits.length} ${commits.length === 1 ? "commit" : "commits"} in the phase log${totalSnippet}.`; + + const html = await renderPage({ + title: `${owner} · ${repo}${kataExists ? " TDD kata" : ""} — tdd.md`, + description, + bodyMarkdown: body, + ogPath: `https://tdd.md/${owner}/${repo}`, + active: "agents", + }); + return htmlResponse(html); +}; + +const isGitProtocol = (pathname: string, search: URLSearchParams): boolean => { + if (pathname.includes(".git/") || pathname.endsWith(".git")) return true; + if ( + pathname.endsWith("/info/refs") && + (search.get("service") === "git-upload-pack" || search.get("service") === "git-receive-pack") + ) { + return true; + } + if (pathname.endsWith("/git-upload-pack") || pathname.endsWith("/git-receive-pack")) { + return true; + } + return false; +}; + +// Fallback handler — git-protocol proxy, bare-repo /:owner/:repo view, +// and /:owner/:repo.git redirects. Mounted as `fetch` on Bun.serve. +const appFetch = async (req: Request): Promise => { + const url = new URL(req.url); + + // Bare //.git (no sub-path) is what someone gets when + // they paste the clone URL into a browser. Without intervention our + // proxy hands it to Forgejo, which renders its own repo page — + // Forgejo's chrome leaks onto tdd.md. Redirect to the clean URL + // so the visitor lands on our Bun-native scoreboard instead. Real + // git operations always have sub-paths (/info/refs, /git-upload-pack, + // /objects/...) and continue to be proxied below. + const bareGitUrl = url.pathname.match( + /^\/([A-Za-z0-9][A-Za-z0-9-]*)\/([A-Za-z0-9][A-Za-z0-9._-]*)\.git\/?$/, + ); + if (bareGitUrl) { + return new Response(null, { + status: 302, + headers: { Location: `/${bareGitUrl[1]}/${bareGitUrl[2]}` }, + }); + } + + // Git smart-HTTP and dumb-HTTP — proxy raw to Forgejo. + if (isGitProtocol(url.pathname, url.searchParams)) { + return proxyToForgejo(req, url.pathname + url.search); + } + + // Bare repo URL: // — render Bun-native view via Forgejo API. + // Two segments only, no trailing path. Reserved top-level paths are + // already matched by explicit routes above, so they never reach here. + const repoMatch = url.pathname.match(/^\/([A-Za-z0-9][A-Za-z0-9-]*)\/([A-Za-z0-9][A-Za-z0-9._-]*)\/?$/); + if (repoMatch) { + const viewer = await getViewer(req); + return renderRepoView(repoMatch[1]!, repoMatch[2]!, viewer); + } + + const html = await renderNotFound(url.pathname); + return htmlResponse(html, 404); +}; + +const appError = (err: Error): Response => { + console.error(err); + return new Response("internal error", { status: 500 }); +}; + +// --------------------------------------------------------------------- +// App factory — c11 calls createApp(port) to start the server. The +// routes literal stays inline here so Bun's path-parameter inference +// (`:slug` → `req.params.slug`) flows through to the handler types. +// --------------------------------------------------------------------- + +export const createApp = (port: number) => Bun.serve({ + port, + error: appError, + fetch: appFetch, + routes: { + "/": htmlResponse(HOME_HTML), + "/raw": new Response(Bun.file(HOME_MD), { + headers: { "Content-Type": "text/markdown; charset=utf-8" }, + }), + "/healthz": new Response("ok"), + + "/robots.txt": new Response( + `User-agent: *\nAllow: /\nDisallow: /auth/\nDisallow: /api/\n\nSitemap: https://tdd.md/sitemap.xml\n`, + { headers: { "Content-Type": "text/plain; charset=utf-8" } }, + ), + + "/sitemap.xml": async () => { + const today = new Date().toISOString().slice(0, 10); + const url = (loc: string, priority: string) => + `${loc}${today}${priority}`; + const kataUrls = ALL_GAMES.map((g) => + url(`https://tdd.md/games/${g.id}`, "0.8"), + ).join("\n"); + const guideUrls = ALL_GUIDES.map((g) => + url(`https://tdd.md/guides/${g.slug}`, "0.8"), + ).join("\n"); + const blogUrls = ALL_POSTS.map((p) => + url(`https://tdd.md/blog/${p.slug}`, "0.8"), + ).join("\n"); + const xml = ` + +${url("https://tdd.md/", "1.0")} +${url("https://tdd.md/games", "0.9")} +${kataUrls} +${url("https://tdd.md/guides", "0.9")} +${guideUrls} +${url("https://tdd.md/blog", "0.7")} +${blogUrls} +${url("https://tdd.md/agents", "0.7")} +${url("https://tdd.md/leaderboard", "0.7")} +`; + return new Response(xml, { + headers: { "Content-Type": "application/xml; charset=utf-8" }, + }); + }, + + "/og.svg": new Response(Bun.file("./public/og.svg"), { + headers: { + "Content-Type": "image/svg+xml", + "Cache-Control": "public, max-age=3600", + }, + }), + + "/games": htmlResponse(GAMES_INDEX_HTML), + + "/blog": async () => { + const rows = ALL_POSTS + .map((p) => `| ${p.date} | [${p.title}](/blog/${p.slug}) |`) + .join("\n"); + const body = `# blog + +Notes on TDD, agentic coding, and the discipline that ties them together. + +| date | post | +|---|---| +${rows} + +> RSS feed coming when there's a second post. + +[← back to tdd.md](/) · [the guides](/guides) · [the katas](/games) +`; + const html = await renderPage({ + title: "Blog — tdd.md", + description: "Posts on test-driven development for AI coding agents — how to apply TDD with Claude Code, Cursor, and Aider, what we learn from the verdicts.", + bodyMarkdown: body, + ogPath: "https://tdd.md/blog", + active: "blog", + }); + return htmlResponse(html); + }, + + "/blog/:slug": async (req) => { + const slug = req.params.slug; + const entry = ALL_POSTS.find((p) => p.slug === slug); + if (!entry) { + const html = await renderNotFound(`/blog/${slug}`); + return htmlResponse(html, 404); + } + const file = Bun.file(`./content/blog/${slug}.md`); + if (!(await file.exists())) { + const html = await renderNotFound(`/blog/${slug}`); + return htmlResponse(html, 404); + } + const md = await file.text(); + const html = await renderPage({ + title: `${entry.title} — tdd.md`, + description: entry.description, + bodyMarkdown: md, + ogPath: `https://tdd.md/blog/${slug}`, + active: "blog", + jsonLd: { + "@context": "https://schema.org", + "@type": "BlogPosting", + headline: entry.title, + description: entry.description, + datePublished: entry.date, + url: `https://tdd.md/blog/${slug}`, + author: { "@type": "Organization", name: "tdd.md" }, + }, + }); + return htmlResponse(html); + }, + + "/projects": async () => { + const projects = listActiveProjects(); + const html = await renderPage({ + title: "Projects — tdd.md", + description: "Real repos opted in to tdd.md scoring. Each project drops .tdd-md.json at its root and gets its commits judged structurally for TDD discipline.", + bodyMarkdown: projectsLandingMd(projects), + ogPath: "https://tdd.md/projects", + }); + return htmlResponse(html); + }, + + "/projects/new": async (req) => { + const viewer = await getViewer(req); + if (req.method === "GET") { + const url = new URL(req.url); + const prefilled = url.searchParams.get("repo") ?? undefined; + const html = await renderPage({ + title: "Register a project — tdd.md", + description: "Onboard a real repo for TDD-discipline scoring. Drops .tdd-md.json at the repo root, register here, and the reports begin tracking commits on its tracked branches.", + bodyMarkdown: projectRegisterMd(viewer, prefilled), + ogPath: "https://tdd.md/projects/new", + noindex: true, + }); + return htmlResponse(html); + } + if (req.method !== "POST") return new Response("method not allowed", { status: 405 }); + if (!viewer) return new Response("unauthorized — sign in first", { status: 401 }); + + let raw = ""; + try { + const form = await req.formData(); + raw = String(form.get("repo") ?? "").trim(); + } catch { + return new Response("invalid form body", { status: 400 }); + } + + const renderError = async (message: string, status = 400): Promise => { + const html = await renderPage({ + title: "Register a project — tdd.md", + bodyMarkdown: projectRegisterMd(viewer, raw, message), + ogPath: "https://tdd.md/projects/new", + noindex: true, + }); + return htmlResponse(html, status); + }; + + let owner: string; + let repo: string; + try { + ({ owner, repo } = parseRepoIdentifier(raw)); + } catch (err) { + return renderError((err as Error).message); + } + + let config; + try { + config = await fetchProjectConfig(owner, repo); + } catch (err) { + return renderError((err as Error).message); + } + + upsertProject(viewer, owner, repo, config); + return new Response(null, { + status: 303, + headers: { Location: `/projects/${owner}/${repo}` }, + }); + }, + + "/projects/:repoOwner/:repoName": async (req) => { + const { repoOwner, repoName } = req.params; + const project = getProject(repoOwner, repoName); + if (!project) { + const html = await renderNotFound(`/projects/${repoOwner}/${repoName}`); + return htmlResponse(html, 404); + } + const html = await renderPage({ + title: `${project.displayName ?? `${project.repoOwner}/${project.repoName}`} — tdd.md`, + description: `${project.repoOwner}/${project.repoName} on tdd.md — ${project.testRunner === "none" ? "trace-mode" : project.testRunner} judging across ${project.trackedBranches.join(", ")}.`, + bodyMarkdown: projectDetailMd(project), + ogPath: `https://tdd.md/projects/${project.repoOwner}/${project.repoName}`, + }); + return htmlResponse(html); + }, + + "/reports": async () => { + const html = await renderPage({ + title: "Reports — tdd.md", + description: "Per-agent TDD-discipline reporting over real project repos: trend, failure-mode breakdown, and an exec summary fit for a quarterly readout.", + bodyMarkdown: reportsLandingMd(), + ogPath: "https://tdd.md/reports", + noindex: true, + }); + return htmlResponse(html); + }, + + "/reports/demo": async () => { + const html = await renderPage({ + title: "TDD-discipline rapport · Q1 2026 (demo) — tdd.md", + description: "Mockup of the management-level TDD-discipline report — single page, three agents, with trend and narrative.", + bodyMarkdown: execSummaryMd(), + ogPath: "https://tdd.md/reports/demo", + noindex: true, + }); + return htmlResponse(html); + }, + + "/reports/demo/tests": async () => { + const html = await renderPage({ + title: "Tests overzicht (demo) — tdd.md", + description: "Mockup of the per-test overview: current pass/fail snapshot per repo plus test stability over the quarter.", + bodyMarkdown: testsOverviewMd(), + ogPath: "https://tdd.md/reports/demo/tests", + noindex: true, + }); + return htmlResponse(html); + }, + + "/reports/demo/agents/:slug": async (req) => { + const slug = req.params.slug as (typeof DEMO_REPORTS)[number]["slug"]; + const md = agentDrilldownMd(slug); + if (!md) { + const html = await renderNotFound(`/reports/demo/agents/${slug}`); + return htmlResponse(html, 404); + } + const entry = DEMO_REPORTS.find((r) => r.slug === slug)!; + const html = await renderPage({ + title: `${entry.name} drill-down (demo) — tdd.md`, + description: `Per-agent drill-down mockup for ${entry.name}: trend, failure-mode breakdown, recent flagged commits with coaching links.`, + bodyMarkdown: md, + ogPath: `https://tdd.md/reports/demo/agents/${slug}`, + noindex: true, + }); + return htmlResponse(html); + }, + + "/guides": async () => { + const rows = ALL_GUIDES + .map((g) => `| [${g.title}](/guides/${g.slug}) | ${g.description} |`) + .join("\n"); + const body = `# guides + +Agent-specific walkthroughs for using tdd.md with the major agentic-coding tools. Each guide covers setup, prompt patterns that keep the agent in TDD, and the common pitfalls that cost score. + +| guide | what it covers | +|---|---| +${rows} + +> Missing your agent? [The mechanics are the same](/) — push commits tagged \`red:\` / \`green:\` / \`refactor:\` to your kata repo. Send a PR with a new guide and we'll list it here. + +[← play a kata](/games) · [register your agent →](/you) +`; + const html = await renderPage({ + title: "TDD guides for agentic coding tools — tdd.md", + description: "Practical TDD walkthroughs for Claude Code, Cursor, Aider and other AI coding agents — keep your agent honest with red→green→refactor commits, scored by tdd.md.", + bodyMarkdown: body, + ogPath: "https://tdd.md/guides", + active: "guides", + }); + return htmlResponse(html); + }, + + "/guides/:slug": async (req) => { + const slug = req.params.slug; + const entry = ALL_GUIDES.find((g) => g.slug === slug); + if (!entry) { + const html = await renderNotFound(`/guides/${slug}`); + return htmlResponse(html, 404); + } + const file = Bun.file(`./content/guides/${slug}.md`); + if (!(await file.exists())) { + const html = await renderNotFound(`/guides/${slug}`); + return htmlResponse(html, 404); + } + const md = await file.text(); + const html = await renderPage({ + title: `${entry.title} — tdd.md`, + description: entry.description, + bodyMarkdown: md, + ogPath: `https://tdd.md/guides/${slug}`, + active: "guides", + }); + return htmlResponse(html); + }, + + "/games/:kata": async (req) => { + const res = await renderKata(req.params.kata); + if (res) return res; + const html = await renderNotFound(`/games/${req.params.kata}`); + return htmlResponse(html, 404); + }, + + "/agents": () => renderAgentsIndex(), + "/agents/register": htmlResponse(REGISTER_HTML), + "/agents/:name": async (req) => { + const name = req.params.name; + const viewer = await getViewer(req); + const userRes = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(name)}`, { + headers: adminApiHeaders(), + }); + // Treat private/limited users as if they don't exist publicly — + // unless the logged-in viewer IS the owner. Owner can always see + // their own dashboard, public or not. + if (userRes.ok) { + const u = (await userRes.clone().json()) as ForgejoUserSummary; + const ownVisibility = u.visibility ?? "public"; + if (ownVisibility !== "public" && viewer !== name) { + const html = await renderNotFound(`/agents/${name}`); + return htmlResponse(html, 404); + } + } + if (userRes.status === 404) { + const html = await renderPage({ + title: `${name} — agents — tdd.md`, + bodyMarkdown: `# agents / ${name}\n\n> No agent registered with this name.\n\n[← all agents](/agents) · [register your own →](/agents/register)`, + ogPath: `https://tdd.md/agents/${name}`, + active: "agents", + }); + return htmlResponse(html, 404); + } + const reposRes = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(name)}/repos?limit=50`, { + headers: adminApiHeaders(), + }); + const repos = reposRes.ok ? ((await reposRes.json()) as { name: string; description: string }[]) : []; + + const progressByRepo = await Promise.all( + repos.map(async (r) => { + const cRes = await fetch( + `${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(name)}/${encodeURIComponent(r.name)}/commits?limit=50&stat=false`, + { headers: adminApiHeaders() }, + ); + const commits = cRes.ok ? ((await cRes.json()) as { commit: { message: string } }[]) : []; + return { repo: r, progress: computeProgress(commits) }; + }), + ); + + const totals: Record = {}; + for (const r of repos) { + try { + const game = await loadGame(r.name); + totals[r.name] = game.steps.length; + } catch { + // unknown kata, no total + } + } + + const isSelf = viewer === name; + let body = `# agents / ${name}\n\n`; + if (isSelf) { + body += `> Welcome back, ${name}. This is your dashboard — only you and admins see it when your profile is private.\n\n`; + } + if (repos.length === 0) { + body += "> Registered, but no kata attempts yet.\n\n[← all agents](/agents)"; + } else { + body += "## attempts\n\n"; + body += "| kata | verified | phases |\n|---|---|---|\n"; + for (const { repo: r, progress } of progressByRepo) { + const total = totals[r.name]; + const verified = progress.verifiedSteps.size; + const counter = total !== undefined ? `${verified} / ${total}` : `${verified} / ?`; + const phases = `red ${progress.redCount} · green ${progress.greenCount} · refactor ${progress.refactorCount}`; + body += `| [${r.name}](/${name}/${r.name}) | ${counter} | ${phases} |\n`; + } + } + + if (isSelf) { + body += `\n\n---\n\n[sign out](/auth/logout) · [toggle visibility](#) (POST /api/agents/${name}/visibility with your push token)`; + } + + const verifiedSteps = progressByRepo.reduce((acc, p) => acc + p.progress.verifiedSteps.size, 0); + const description = + repos.length === 0 + ? `${name} just registered on tdd.md — no kata attempts yet.` + : `${name}'s TDD attempts on tdd.md: ${repos.length} ${repos.length === 1 ? "kata" : "katas"} pushed, ${verifiedSteps} verified red→green ${verifiedSteps === 1 ? "step" : "steps"}.`; + const html = await renderPage({ + title: `${name} · TDD attempts — tdd.md`, + description, + bodyMarkdown: body, + ogPath: `https://tdd.md/agents/${name}`, + active: "agents", + }); + return htmlResponse(html); + }, + // Redirect the legacy URL to the canonical /:owner/:repo path — + // /agents/:name/:kata used to render a placeholder before the + // GitHub-style routing landed. + "/agents/:name/:kata": (req) => + Response.redirect(`/${req.params.name}/${req.params.kata}`, 301), + + "/leaderboard": () => renderLeaderboard(), + + "/api/judge/:owner/:repo": async (req) => { + if (req.method !== "POST") { + return new Response("method not allowed; POST to trigger a judge run", { status: 405 }); + } + // Manual triggers require the admin token. Push-driven runs come + // through /api/forgejo/webhook with HMAC signature verification. + const adminToken = process.env.FORGEJO_ADMIN_TOKEN; + const provided = req.headers.get("authorization")?.replace(/^[Bb]earer\s+/, "") ?? ""; + if (!adminToken || !timingSafeEqual(provided, adminToken)) { + return new Response("unauthorized — POST with `Authorization: Bearer `", { status: 401 }); + } + try { + const verdict = await judge(req.params.owner, req.params.repo); + return Response.json(verdict); + } catch (err) { + return Response.json({ error: (err as Error).message }, { status: 500 }); + } + }, + + // Self-service visibility toggle. Agent posts their push token in + // Authorization, picks "public" | "limited" | "private". We verify + // the token actually belongs to :name by hitting Forgejo's /user + // endpoint with it, then PATCH the user via admin token. + "/api/agents/:name/visibility": async (req) => { + if (req.method !== "POST") return new Response("POST only", { status: 405 }); + const name = req.params.name; + const provided = req.headers.get("authorization")?.replace(/^[Bb]earer\s+/, "") ?? ""; + if (!provided) return Response.json({ error: "missing bearer token" }, { status: 401 }); + + // Verify the token belongs to :name (or is the admin token). + const adminToken = process.env.FORGEJO_ADMIN_TOKEN ?? ""; + let allowed = adminToken && timingSafeEqual(provided, adminToken); + if (!allowed) { + const meRes = await fetch(`${FORGEJO_URL}/api/v1/user`, { + headers: { Authorization: `token ${provided}` }, + }); + if (meRes.ok) { + const me = (await meRes.json()) as { login?: string }; + allowed = me.login === name; + } + } + if (!allowed) return Response.json({ error: "token does not match agent" }, { status: 403 }); + + let body: { visibility?: string }; + try { + body = (await req.json()) as { visibility?: string }; + } catch { + return Response.json({ error: "invalid json" }, { status: 400 }); + } + const visibility = body.visibility; + if (visibility !== "public" && visibility !== "limited" && visibility !== "private") { + return Response.json( + { error: "visibility must be one of public|limited|private" }, + { status: 400 }, + ); + } + + const patchRes = await fetch( + `${FORGEJO_URL}/api/v1/admin/users/${encodeURIComponent(name)}`, + { + method: "PATCH", + headers: { ...adminApiHeaders(), "Content-Type": "application/json" }, + body: JSON.stringify({ visibility, source_id: 0, login_name: name }), + }, + ); + if (!patchRes.ok) { + const text = await patchRes.text(); + return Response.json( + { error: `forgejo PATCH failed: ${patchRes.status} ${text}` }, + { status: 502 }, + ); + } + return Response.json({ name, visibility }); + }, + + "/api/forgejo/webhook": async (req) => { + if (req.method !== "POST") return new Response("POST only", { status: 405 }); + const secret = process.env.WEBHOOK_SECRET; + if (!secret) return new Response("webhook not configured", { status: 503 }); + + const body = await req.text(); + const provided = + req.headers.get("x-forgejo-signature") ?? req.headers.get("x-gitea-signature") ?? ""; + const expected = await hmacSha256Hex(secret, body); + if (provided.length !== expected.length || !timingSafeEqual(provided, expected)) { + return new Response("invalid signature", { status: 401 }); + } + + let payload: { repository?: { owner?: { login?: string }; name?: string }; ref?: string }; + try { + payload = JSON.parse(body); + } catch { + return new Response("invalid json", { status: 400 }); + } + const owner = payload.repository?.owner?.login; + const repo = payload.repository?.name; + if (!owner || !repo) return new Response("missing owner/repo", { status: 400 }); + + // Fire the judge in the background; ack immediately so Forgejo + // doesn't time out while we're checking out commits. + void judge(owner, repo).catch((err) => { + console.error(`judge failed for ${owner}/${repo}:`, err); + }); + return Response.json({ accepted: true, owner, repo }); + }, + + "/you": async (req) => { + const viewer = await getViewer(req); + const target = viewer ? `/agents/${viewer}` : "/auth/github/start"; + return new Response(null, { status: 302, headers: { Location: target } }); + }, + + "/auth/logout": (_req) => { + // Clear the session cookie and bounce back home. + return new Response(null, { + status: 302, + headers: { + Location: "/", + "Set-Cookie": sessionCookieHeader("", 0), + }, + }); + }, + + "/auth/github/start": (_req) => { + if (!github.isConfigured() || !forgejo.isConfigured()) { + return errorPage("registration is not configured on this server", 503); + } + const nonce = randomHex(16); + return new Response(null, { + status: 302, + headers: { + Location: github.authorizeUrl(nonce, CALLBACK_URL), + "Set-Cookie": `tdd_oauth_state=${nonce}; Path=/auth; HttpOnly; Secure; SameSite=Lax; Max-Age=600`, + }, + }); + }, + + "/auth/github/callback": async (req) => { + const url = new URL(req.url); + const code = url.searchParams.get("code"); + const state = url.searchParams.get("state"); + if (!code || !state) return errorPage("missing code or state"); + + const cookies = parseCookies(req.headers.get("cookie")); + const cookieState = cookies.tdd_oauth_state; + if (!cookieState || !timingSafeEqual(cookieState, state)) { + return errorPage("state mismatch — open the registration page again and retry"); + } + + let username: string; + let email: string; + let fullName: string | null; + try { + const accessToken = await github.exchangeCode(code, CALLBACK_URL); + const user = await github.fetchUser(accessToken); + username = user.login; + fullName = user.name; + // GitHub's noreply email format: unique per account, never collides + // with another Forgejo user. We don't need a deliverable address — + // agents authenticate by token, not by email reset flow. + email = `${user.id}+${user.login}@users.noreply.github.com`; + } catch (err) { + return errorPage(`github oauth failed: ${(err as Error).message}`, 400); + } + + // Login vs register: if the user already exists in Forgejo, this + // is a returning visitor — set the session cookie, redirect to + // their dashboard, don't rotate their token. + const isExisting = await forgejo.userExists(username); + const sessionToken = await signSession(username); + const sessionCookie = sessionCookieHeader(sessionToken, SESSION_TTL_SEC); + const clearOauthState = + "tdd_oauth_state=; Path=/auth; HttpOnly; Secure; SameSite=Lax; Max-Age=0"; + + if (isExisting) { + return new Response(null, { + status: 302, + headers: new Headers([ + ["Location", `/agents/${username}`], + ["Set-Cookie", sessionCookie], + ["Set-Cookie", clearOauthState], + ]), + }); + } + + let reg: forgejo.AgentRegistration; + try { + reg = await forgejo.registerAgent({ + username, + email, + fullName: fullName ?? undefined, + }); + } catch (err) { + return errorPage(`failed to create your agent: ${(err as Error).message}`, 422); + } + + const verb = reg.isNew ? "created" : "rotated"; + const body = `# welcome, ${reg.username} + +> Your tdd.md agent has been ${verb}. **Save the token below — this page is the only time you'll see it.** If you lose it, [register again](/agents/register) to issue a fresh one (the old one will stop working). + +## push token + +\`\`\` +${reg.pushToken} +\`\`\` + +## kata: string-calc + +Your repo is at [\`git.tdd.md/${reg.username}/string-calc\`](https://git.tdd.md/${reg.username}/string-calc), already initialized with a default branch \`main\`. + +\`\`\` +git clone ${reg.repoCloneUrl} +cd string-calc + +# play the kata, commit per phase +# red: commit a failing test +# green: commit the impl that makes it pass +# refactor: commit a structural change with tests staying green + +git push +# username: ${reg.username} +# password: +\`\`\` + +When you push, the judge replays your commits and posts the verdict at [/agents/${reg.username}/string-calc](/agents/${reg.username}/string-calc). + +[← spec](/games/string-calc) · [all agents](/agents) +`; + + const html = await renderPage({ + title: `welcome ${reg.username} — tdd.md`, + bodyMarkdown: body, + active: "agents", + noindex: true, + }); + return new Response(html, { + headers: new Headers([ + ["Content-Type", "text/html; charset=utf-8"], + ["Set-Cookie", sessionCookie], + ["Set-Cookie", clearOauthState], + ]), + }); + }, + }, +}); diff --git a/src/c31_blog.ts b/src/c31_blog.ts new file mode 100644 index 0000000000000000000000000000000000000000..8e19bba1594718875ca9d22631b006337817d5d9 --- /dev/null +++ b/src/c31_blog.ts @@ -0,0 +1,39 @@ +// c31 — model: blog index data. The post bodies live as markdown in +// content/blog/.md; this file is just the registry that drives +// /blog, /blog/:slug, and the sitemap. New posts: drop the .md file +// and add an entry here. + +export interface BlogEntry { + slug: string; + title: string; + description: string; + // ISO date for the listing + sitemap lastmod. + date: string; +} + +export const ALL_POSTS: BlogEntry[] = [ + { + slug: "tweag-handbook-tdd", + title: "Tweag's agentic TDD handbook gets the loop right — local green still isn't enough", + description: "Tweag's agentic-coding handbook describes a clean TDD loop and the right rules for AI assistants — but the validation layer it leans on (run tests, see green) misses the three failure modes most likely to show up: tautology, test deletion in refactor, and assertion weakening. Here's the gap, and what closes it.", + date: "2026-05-08", + }, + { + slug: "aider-tdd", + title: "Aider is the closest agent to TDD on rails — until you let it auto-fix", + description: "Aider's auto-commit-per-edit and bite-sized-steps philosophy make it TDD-shaped by default. Then `--auto-test` discovers it can win by deleting tests instead of fixing the impl. Here's how Aider's strengths map onto TDD, and how to keep the auto-test loop honest.", + date: "2026-05-04", + }, + { + slug: "cursor-tdd", + title: "Cursor knows how to do TDD. Most users skip the parts that matter.", + description: "Cursor's own agent best practices document a clean TDD workflow — but most users skip the features (Plan Mode, fresh conversations, .cursor/rules) that actually make it work. Here's how to put the pieces together, with a kata you can run end-to-end.", + date: "2026-05-04", + }, + { + slug: "claude-code-tdd", + title: "Claude Code does not do TDD by default — here's how to make it", + description: "Claude Code writes the test and impl in one breath, so the test never fails for the right reason. Two structural changes — CLAUDE.md rules + phase-separated sessions — get the discipline back, and tdd.md can verify it.", + date: "2026-05-04", + }, +]; diff --git a/src/c31_commits.test.ts b/src/c31_commits.test.ts new file mode 100644 index 0000000000000000000000000000000000000000..f39cabfb7b3221519e74349781056fabfb99f8ea --- /dev/null +++ b/src/c31_commits.test.ts @@ -0,0 +1,52 @@ +import { test, expect } from "bun:test"; +import { parseCommit, computeProgress } from "./c31_commits.ts"; + +test("parseCommit reads a phase prefix", () => { + expect(parseCommit("red: failing test for empty")).toEqual({ + phase: "red", + step: null, + subject: "failing test for empty", + }); +}); + +test("parseCommit extracts step from phase(step): form", () => { + expect(parseCommit("green(single-number): return n for one number")).toEqual({ + phase: "green", + step: "single-number", + subject: "return n for one number", + }); +}); + +test("parseCommit recognizes 'Initial commit' as init", () => { + expect(parseCommit("Initial commit").phase).toBe("init"); +}); + +test("parseCommit returns untagged for unknown messages", () => { + expect(parseCommit("wip — fixing something").phase).toBe("untagged"); +}); + +test("parseCommit recognizes spike: prefix", () => { + expect(parseCommit("spike: try the regex approach").phase).toBe("spike"); +}); + +test("parseCommit extracts step from spike(step):", () => { + const p = parseCommit("spike(custom-separator): explore Forge regex"); + expect(p.phase).toBe("spike"); + expect(p.step).toBe("custom-separator"); +}); + +test("computeProgress verifies a step after red→green for the same step", () => { + const commits = [ + { commit: { message: "green(empty): returns 0" } }, + { commit: { message: "red(empty): empty string returns 0" } }, + ]; // newest first, like Forgejo + const p = computeProgress(commits); + expect(p.verifiedSteps).toEqual(new Set(["empty"])); + expect(p.redCount).toBe(1); + expect(p.greenCount).toBe(1); +}); + +test("computeProgress does not verify green-without-prior-red", () => { + const commits = [{ commit: { message: "green(empty): returns 0" } }]; + expect(computeProgress(commits).verifiedSteps.size).toBe(0); +}); diff --git a/src/c31_commits.ts b/src/c31_commits.ts new file mode 100644 index 0000000000000000000000000000000000000000..89e5c1950bf9976d0ee5806501333d0775f007e4 --- /dev/null +++ b/src/c31_commits.ts @@ -0,0 +1,65 @@ +export type Phase = "red" | "green" | "refactor" | "spike" | "init" | "untagged"; + +export interface ParsedCommit { + phase: Phase; + step: string | null; + subject: string; +} + +const PHASE_RE = /^(red|green|refactor|spike)(?:\(([a-z][a-z0-9-]*)\))?:\s*(.*)$/i; + +export const parseCommit = (message: string): ParsedCommit => { + const subject = message.split("\n")[0] ?? ""; + const m = subject.match(PHASE_RE); + if (m) { + return { + phase: m[1]!.toLowerCase() as Phase, + step: m[2] ?? null, + subject: m[3] ?? "", + }; + } + if (/^Initial commit$/i.test(subject)) { + return { phase: "init", step: null, subject }; + } + return { phase: "untagged", step: null, subject }; +}; + +export interface Progress { + verifiedSteps: Set; + redCount: number; + greenCount: number; + refactorCount: number; + spikeCount: number; + untaggedCount: number; +} + +// A step counts as "verified" when its red commit is followed by a green +// for the same step. Refactor and untagged commits are tallied separately +// for the score breakdown but don't move verification. +export const computeProgress = (commits: { commit: { message: string } }[]): Progress => { + const pendingRed = new Set(); + const verifiedSteps = new Set(); + let redCount = 0; + let greenCount = 0; + let refactorCount = 0; + let spikeCount = 0; + let untaggedCount = 0; + // Forgejo returns commits newest-first; walk oldest-first to get sequence. + for (const c of [...commits].reverse()) { + const p = parseCommit(c.commit.message); + if (p.phase === "red") { + redCount++; + if (p.step) pendingRed.add(p.step); + } else if (p.phase === "green") { + greenCount++; + if (p.step && pendingRed.has(p.step)) verifiedSteps.add(p.step); + } else if (p.phase === "refactor") { + refactorCount++; + } else if (p.phase === "spike") { + spikeCount++; + } else if (p.phase === "untagged") { + untaggedCount++; + } + } + return { verifiedSteps, redCount, greenCount, refactorCount, spikeCount, untaggedCount }; +}; diff --git a/src/c31_games.test.ts b/src/c31_games.test.ts new file mode 100644 index 0000000000000000000000000000000000000000..97468badf44114c5145e088a78b586b3547500e7 --- /dev/null +++ b/src/c31_games.test.ts @@ -0,0 +1,26 @@ +import { test, expect } from "bun:test"; +import { loadGame } from "./c31_games.ts"; + +test("loadGame returns a game with the expected id", async () => { + const game = await loadGame("string-calc"); + expect(game.id).toBe("string-calc"); +}); + +test("loadGame returns the kata's step ids in order", async () => { + const game = await loadGame("string-calc"); + expect(game.steps.map((s) => s.id)).toEqual([ + "empty", + "single-number", + "two-numbers", + "n-numbers", + "newline-separator", + "custom-separator", + "negatives-throw", + ]); +}); + +test("loadGame throws a clear error for an unknown game", async () => { + await expect(loadGame("does-not-exist")).rejects.toThrow( + /unknown game: does-not-exist/, + ); +}); diff --git a/src/c31_games.ts b/src/c31_games.ts new file mode 100644 index 0000000000000000000000000000000000000000..4294a10a7aee4750b4009929f03dc9b8656bf2c8 --- /dev/null +++ b/src/c31_games.ts @@ -0,0 +1,55 @@ +export interface Step { + id: string; + requirement: string; + // Path (relative to the kata's spec.ts) of the authoritative test file. + // The judge copies this into the agent's working tree after the green + // checkout and runs it — hidden tests are how we detect cheating where + // an agent writes a tautological test like `expect(true).toBe(true)`. + hiddenTestFile: string; +} + +export interface Game { + id: string; + // One-line summary shown on the games index and OG previews. + description: string; + // Human-readable function signature the agent must export. Documented + // on the kata page so authors know what to build. + signature: string; + // The module path the hidden tests will import from. Agents must export + // their solution from this exact path (relative to repo root). + importPath: string; + steps: Step[]; +} + +import { readdir } from "node:fs/promises"; + +// Reads every kata under content/games/ and returns the loaded specs in +// alphabetical order. Used to build the games index and sitemap without +// hard-coding individual kata ids. +export async function listGames(): Promise { + let entries; + try { + entries = await readdir("./content/games", { withFileTypes: true }); + } catch { + return []; + } + const ids = entries.filter((e) => e.isDirectory()).map((e) => e.name).sort(); + const games: Game[] = []; + for (const id of ids) { + try { + games.push(await loadGame(id)); + } catch { + // skip katas that fail to load (missing spec.ts, etc.) + } + } + return games; +} + +export async function loadGame(id: string): Promise { + const file = Bun.file(`./content/games/${id}/spec.ts`); + if (!(await file.exists())) { + throw new Error(`unknown game: ${id}`); + } + const mod = await import(`../content/games/${id}/spec.ts`); + return mod.spec as Game; +} diff --git a/src/c31_guides.ts b/src/c31_guides.ts new file mode 100644 index 0000000000000000000000000000000000000000..a1c29beb3e50e8346e5b1255193a55f6d1ae76a6 --- /dev/null +++ b/src/c31_guides.ts @@ -0,0 +1,26 @@ +// c31 — model: agent-specific TDD-walkthrough registry. Drives +// /guides + /guides/:slug. Markdown bodies live in content/guides/.md. + +export interface GuideEntry { + slug: string; + title: string; + description: string; +} + +export const ALL_GUIDES: GuideEntry[] = [ + { + slug: "claude-code", + title: "TDD with Claude Code", + description: "Run TDD katas through Anthropic's Claude Code with phase-separated prompts and CLAUDE.md rules so the judge scores clean red→green→refactor cycles.", + }, + { + slug: "cursor", + title: "TDD with Cursor", + description: "Test-driven katas through Cursor — Composer per phase, project rules pinned in .cursor/rules, fresh context for red vs green.", + }, + { + slug: "aider", + title: "TDD with Aider", + description: "Aider's commit-per-edit model maps directly onto red→green→refactor — prompt with phase tags and the auto-commit carries through.", + }, +]; diff --git a/src/c31_project_config.ts b/src/c31_project_config.ts new file mode 100644 index 0000000000000000000000000000000000000000..95a1b5c36b334cbba67db2f97312347227093ea5 --- /dev/null +++ b/src/c31_project_config.ts @@ -0,0 +1,102 @@ +// c31 — model: types + parser for `.tdd-md.json`, the per-repo opt-in +// config used by the project-tracking pipeline. Pure data, no I/O. +// Fetching the file lives in c14_github; persistence lives in c13_database; +// page rendering lives in c51_render. + +export const PROJECT_CONFIG_PATH = ".tdd-md.json"; +export const PROJECT_CONFIG_VERSION = 1; + +export type TestRunner = "none" | "bun"; +export type AgentSlug = "claude-code" | "cursor" | "aider" | "unknown"; + +export interface ProjectConfig { + version: number; + // "none" → trace-mode judging only (commit discipline, no test execution). + // "bun" → full sandbox-runner judging (later sliver — registration accepts + // the value but judging stays trace-only until the runner ships). + test_runner: TestRunner; + // Branches whose pushes get scored. Defaults to ["main"]. + tracked_branches: string[]; + // Optional reporting metadata. + display_name?: string; + team?: string; +} + +export const DEFAULT_CONFIG: ProjectConfig = { + version: PROJECT_CONFIG_VERSION, + test_runner: "none", + tracked_branches: ["main"], +}; + +// Validates and normalises a parsed JSON blob into a ProjectConfig. +// Throws with a human-readable message on failure — those messages are +// surfaced verbatim to the registering user, so they need to be useful. +export const parseProjectConfig = (raw: unknown): ProjectConfig => { + if (!raw || typeof raw !== "object") { + throw new Error(".tdd-md.json must be a JSON object"); + } + const obj = raw as Record; + const version = obj.version; + if (typeof version !== "number" || version !== PROJECT_CONFIG_VERSION) { + throw new Error( + `.tdd-md.json has version ${JSON.stringify(version)}; expected ${PROJECT_CONFIG_VERSION}`, + ); + } + let testRunner: TestRunner = "none"; + if (obj.test_runner !== undefined) { + if (obj.test_runner !== "none" && obj.test_runner !== "bun") { + throw new Error( + `.tdd-md.json: test_runner must be "none" or "bun" (got ${JSON.stringify(obj.test_runner)})`, + ); + } + testRunner = obj.test_runner; + } + let trackedBranches: string[] = ["main"]; + if (obj.tracked_branches !== undefined) { + if (!Array.isArray(obj.tracked_branches) || obj.tracked_branches.some((b) => typeof b !== "string" || !b)) { + throw new Error(".tdd-md.json: tracked_branches must be a non-empty array of branch names"); + } + trackedBranches = obj.tracked_branches as string[]; + } + const config: ProjectConfig = { + version, + test_runner: testRunner, + tracked_branches: trackedBranches, + }; + if (typeof obj.display_name === "string" && obj.display_name) { + config.display_name = obj.display_name; + } + if (typeof obj.team === "string" && obj.team) { + config.team = obj.team; + } + return config; +}; + +// Parse a GitHub repo URL or owner/repo shorthand. Accepts: +// https://github.com/syntaxai/tdd.md +// https://github.com/syntaxai/tdd.md.git +// github.com/syntaxai/tdd.md +// syntaxai/tdd.md +// Returns the owner + repo or throws with a precise message. +export const parseRepoIdentifier = (raw: string): { owner: string; repo: string } => { + const trimmed = raw.trim(); + if (!trimmed) throw new Error("Repository URL is required."); + let path = trimmed; + const httpsMatch = path.match(/^https?:\/\/(?:www\.)?github\.com\/(.+)$/i); + if (httpsMatch?.[1]) path = httpsMatch[1]; + const bareMatch = path.match(/^github\.com\/(.+)$/i); + if (bareMatch?.[1]) path = bareMatch[1]; + path = path.replace(/\.git$/i, "").replace(/\/+$/, ""); + const parts = path.split("/").filter(Boolean); + const owner = parts[0]; + const repo = parts[1]; + if (parts.length !== 2 || !owner || !repo) { + throw new Error( + `Couldn't parse "${raw}" as a GitHub repo. Use a URL like https://github.com/owner/name or the shorthand owner/name.`, + ); + } + if (!/^[A-Za-z0-9._-]+$/.test(owner) || !/^[A-Za-z0-9._-]+$/.test(repo)) { + throw new Error(`"${raw}" contains characters that aren't valid for a GitHub owner/repo.`); + } + return { owner, repo }; +}; diff --git a/src/c31_reports_demo.ts b/src/c31_reports_demo.ts new file mode 100644 index 0000000000000000000000000000000000000000..f39ce1b65e89521317c4ae9c1d139fb3c897aa1d --- /dev/null +++ b/src/c31_reports_demo.ts @@ -0,0 +1,201 @@ +// c31 — model: synthetic dataset for the reporting mockups. Pure data, +// no I/O, no rendering. The c51_render builders consume these to produce +// the demo views at /reports/demo/*. When the real ingest pipeline ships +// the same shape gets populated from c13_database queries instead. + +export interface RecentFlagged { + date: string; + repo: string; + sha: string; + phase: "red" | "green" | "refactor"; + failure: string; + pts: number; +} + +export interface FailureSlice { + label: string; + pct: number; + tone: "red" | "green" | "muted" | "accent"; +} + +export interface AgentReport { + slug: "claude-code" | "cursor" | "aider"; + name: string; + score: number; + delta: number; + commits: number; + phaseCoveragePct: number; + streak: number; + streakBroken: boolean; + topIssueLabel: string; + topIssuePct: number; + failureMix: FailureSlice[]; + trend: number[]; + recent: RecentFlagged[]; +} + +export interface TestFailure { + test: string; + since: string; + flaky?: boolean; +} + +export interface TestSnapshot { + repo: string; + branch: string; + total: number; + passing: number; + failing: number; + failures: TestFailure[]; +} + +export interface TestStability { + test: string; + repo: string; + pass: number; + fail: number; + deleted: number; + lastBrokenBy: AgentReport["slug"]; + flagged?: boolean; +} + +export const DEMO_PERIOD = "2026-01-01 → 2026-03-31"; +export const DEMO_ORG = "acme-corp"; +export const DEMO_REPOS = 4; + +export const DEMO_SNAPSHOTS: TestSnapshot[] = [ + { + repo: "api-gateway", + branch: "main", + total: 247, + passing: 245, + failing: 2, + failures: [ + { test: "rate-limit.spec.ts > resets at midnight UTC", since: "2026-03-26" }, + { test: "webhook.spec.ts > retries on 5xx with backoff", since: "2026-03-28" }, + ], + }, + { + repo: "billing-service", + branch: "main", + total: 89, + passing: 89, + failing: 0, + failures: [], + }, + { + repo: "data-pipeline", + branch: "main", + total: 156, + passing: 154, + failing: 2, + failures: [ + { test: "ingest.spec.ts > handles malformed CSV row", since: "2026-03-22" }, + { test: "ingest.spec.ts > deduplicates by hash", since: "2026-03-22" }, + ], + }, + { + repo: "frontend-web", + branch: "main", + total: 312, + passing: 310, + failing: 2, + failures: [ + { test: "checkout.spec.ts > handles network timeout", since: "2026-03-15", flaky: true }, + { test: "login.spec.ts > redirects after auth", since: "2026-03-11", flaky: true }, + ], + }, +]; + +export const DEMO_STABILITY: TestStability[] = [ + { test: "webhook.spec.ts > retries on 5xx with backoff", repo: "api-gateway", pass: 33, fail: 11, deleted: 0, lastBrokenBy: "cursor", flagged: true }, + { test: "checkout.spec.ts > handles network timeout", repo: "frontend-web", pass: 51, fail: 8, deleted: 0, lastBrokenBy: "cursor", flagged: true }, + { test: "rate-limit.spec.ts > resets at midnight UTC", repo: "api-gateway", pass: 42, fail: 6, deleted: 0, lastBrokenBy: "claude-code" }, + { test: "login.spec.ts > redirects after auth", repo: "frontend-web", pass: 44, fail: 5, deleted: 1, lastBrokenBy: "cursor", flagged: true }, + { test: "ingest.spec.ts > handles malformed CSV row", repo: "data-pipeline", pass: 38, fail: 4, deleted: 0, lastBrokenBy: "aider" }, + { test: "auth.spec.ts > validates JWT signature", repo: "api-gateway", pass: 47, fail: 3, deleted: 0, lastBrokenBy: "claude-code" }, + { test: "ingest.spec.ts > deduplicates by hash", repo: "data-pipeline", pass: 30, fail: 3, deleted: 0, lastBrokenBy: "aider" }, + { test: "billing.spec.ts > applies tax bracket", repo: "billing-service", pass: 29, fail: 2, deleted: 0, lastBrokenBy: "claude-code" }, + { test: "webhook.spec.ts > signs payload with HMAC", repo: "api-gateway", pass: 35, fail: 2, deleted: 1, lastBrokenBy: "cursor", flagged: true }, + { test: "billing.spec.ts > computes monthly total", repo: "billing-service", pass: 28, fail: 1, deleted: 1, lastBrokenBy: "cursor", flagged: true }, + { test: "invoice.spec.ts > generates PDF receipt", repo: "billing-service", pass: 25, fail: 1, deleted: 0, lastBrokenBy: "claude-code" }, + { test: "pricing.spec.ts > rounds to nearest cent", repo: "billing-service", pass: 26, fail: 1, deleted: 0, lastBrokenBy: "aider" }, +]; + +export const DEMO_REPORTS: AgentReport[] = [ + { + slug: "claude-code", + name: "Claude Code", + score: 78, + delta: +6, + commits: 612, + phaseCoveragePct: 92, + streak: 47, + streakBroken: false, + topIssueLabel: "red-did-not-fail", + topIssuePct: 8, + failureMix: [ + { label: "clean cycles", pct: 84, tone: "green" }, + { label: "red-did-not-fail", pct: 8, tone: "red" }, + { label: "broken refactor", pct: 4, tone: "red" }, + { label: "test-deleted", pct: 2, tone: "red" }, + { label: "no phase tag", pct: 2, tone: "muted" }, + ], + trend: [72, 73, 71, 74, 72, 75, 73, 75, 77, 76, 75, 76, 78, 77, 79, 78, 77, 79, 80, 78, 79, 80, 79, 81, 80, 82, 81, 80, 79, 78], + recent: [ + { date: "2026-03-29", repo: "api-gateway", sha: "f1c8b3a", phase: "red", failure: "red-did-not-fail", pts: -5 }, + { date: "2026-03-24", repo: "billing-service", sha: "9d2e1f4", phase: "refactor", failure: "broken refactor", pts: -5 }, + { date: "2026-03-18", repo: "data-pipeline", sha: "62a9cb7", phase: "green", failure: "no phase tag (parent)", pts: 0 }, + ], + }, + { + slug: "cursor", + name: "Cursor", + score: 54, + delta: -15, + commits: 489, + phaseCoveragePct: 71, + streak: 3, + streakBroken: true, + topIssueLabel: "test-deleted in refactor", + topIssuePct: 14, + failureMix: [ + { label: "clean cycles", pct: 64, tone: "green" }, + { label: "test-deleted", pct: 14, tone: "red" }, + { label: "red-did-not-fail", pct: 9, tone: "red" }, + { label: "broken refactor", pct: 7, tone: "red" }, + { label: "no phase tag", pct: 6, tone: "muted" }, + ], + trend: [69, 70, 71, 72, 70, 71, 72, 73, 72, 71, 72, 70, 68, 65, 60, 55, 50, 52, 54, 53, 56, 54, 52, 55, 53, 54, 56, 55, 54, 54], + recent: [ + { date: "2026-03-28", repo: "api-gateway", sha: "a1b2c3d", phase: "refactor", failure: "test-deleted", pts: -20 }, + { date: "2026-03-26", repo: "api-gateway", sha: "4e5f6a7", phase: "green", failure: "broken refactor", pts: -5 }, + { date: "2026-03-23", repo: "billing-service", sha: "8b9c0d1", phase: "red", failure: "red-did-not-fail", pts: -5 }, + { date: "2026-03-21", repo: "api-gateway", sha: "2e3f4a5", phase: "refactor", failure: "test-deleted", pts: -20 }, + { date: "2026-03-19", repo: "data-pipeline", sha: "6b7c8d9", phase: "refactor", failure: "broken refactor", pts: -5 }, + ], + }, + { + slug: "aider", + name: "Aider", + score: 89, + delta: +2, + commits: 146, + phaseCoveragePct: 96, + streak: 89, + streakBroken: false, + topIssueLabel: "broken refactor", + topIssuePct: 3, + failureMix: [ + { label: "clean cycles", pct: 94, tone: "green" }, + { label: "broken refactor", pct: 3, tone: "red" }, + { label: "red-did-not-fail", pct: 2, tone: "red" }, + { label: "no phase tag", pct: 1, tone: "muted" }, + ], + trend: [87, 88, 89, 88, 87, 89, 90, 89, 88, 89, 90, 88, 89, 90, 91, 89, 88, 89, 90, 89, 90, 91, 89, 88, 89, 90, 89, 90, 89, 89], + recent: [ + { date: "2026-03-27", repo: "data-pipeline", sha: "3a4b5c6", phase: "refactor", failure: "broken refactor", pts: -5 }, + { date: "2026-03-15", repo: "billing-service", sha: "7d8e9f0", phase: "red", failure: "red-did-not-fail", pts: -5 }, + ], + }, +]; diff --git a/src/c32_judge.ts b/src/c32_judge.ts new file mode 100644 index 0000000000000000000000000000000000000000..68c307273e2951f1955552bf0ff114f9dba70532 --- /dev/null +++ b/src/c32_judge.ts @@ -0,0 +1,370 @@ +import { mkdtempSync, rmSync } from "fs"; +import { join } from "path"; +import { tmpdir } from "os"; +import { parseCommit, type Phase } from "./c31_commits.ts"; +import { saveRun, type Verdict, type StepVerdict, type RefactorVerdict, type Mode } from "./c13_database.ts"; +import { loadGame, type Game } from "./c31_games.ts"; + +type TestRunner = "bun" | "none"; + +interface TddConfig { + mode: Mode; + testRunner: TestRunner; +} + +// tdd.config.json from the agent's repo selects the scoring mode and +// test runner. Falls back to strict / bun when missing or unparseable. +// +// { "mode": "pragmatic", "test_runner": "none" } +// +// test_runner: "none" enables trace-only judging — no checkout, no test +// execution. Useful as a CI gate on projects where Bun can't run the +// suite (e.g. .NET, Python without bun-compat tests). +const readConfig = async (cwd: string): Promise => { + const file = Bun.file(join(cwd, "tdd.config.json")); + let mode: Mode = "strict"; + let testRunner: TestRunner = "bun"; + if (await file.exists()) { + try { + const cfg = (await file.json()) as { mode?: string; test_runner?: string }; + if (cfg.mode === "pragmatic" || cfg.mode === "learning") mode = cfg.mode; + if (cfg.test_runner === "none") testRunner = "none"; + } catch { + // best effort — bad config falls back to defaults + } + } + return { mode, testRunner }; +}; + +// Penalty halving for pragmatic, zeroing for learning. Positive deltas +// are unchanged across modes — earned credit is earned credit. +const applyMode = (delta: number, mode: Mode): number => { + if (delta >= 0) return delta; + if (mode === "learning") return 0; + if (mode === "pragmatic") return Math.ceil(delta / 2); + return delta; +}; + +// Plain-language summary of a step verdict, written to the agent (not +// the human admin). One short paragraph; named intentionally so callers +// can see it next to the row in the score table. +const explainStep = (params: { + status: StepVerdict["status"]; + redSha: string | null; + greenSha: string | null; + hiddenPassed: boolean | null; + mode: Mode; +}): string => { + const { status, hiddenPassed, mode } = params; + switch (status) { + case "verified": + return "Red failed as expected, green passes your tests, and the kata's hidden tests confirm the implementation matches the requirement."; + case "discipline-only": + return "Red→green discipline holds, but this kata didn't ship hidden tests for the step. Partial credit awarded; full +20 isn't possible without authoritative verification."; + case "no-green": + return "Red commit landed; the matching green() commit hasn't been pushed yet. Push your green to lock in the score."; + case "red-did-not-fail": + return mode === "pragmatic" + ? "Combined red+green commit detected. Pragmatic mode allows this — the cycle still counts, just with a softer score than a clean separation." + : "Red commit's tests already passed when the step was first introduced — meaning the implementation was added before the test, or the test is tautological. Switch to pragmatic mode if you commit red+green together intentionally."; + case "green-did-not-pass": + return "Green commit's own tests still fail. The implementation doesn't yet satisfy the test you wrote — fix the impl, or reconsider whether the test reflects the requirement."; + case "hidden-tests-failed": + return hiddenPassed === false + ? "Your tests pass, but the kata's hidden tests don't — this is the classic tautology trap. Tighten your test to mirror the requirement (e.g., assert the actual return value, not just that it runs)." + : "Your tests pass, but hidden verification was inconclusive. Re-push to retry."; + case "test-deleted": + return "Test count dropped between red and green for this step. Once a test exists it must keep existing — refactor it, don't delete it. If the test was wrong, replace it in a separate commit before resuming the cycle."; + case "trace-verified": + return "Trace-only mode: red→green pair found in the commit log. Tests weren't executed (test_runner: \"none\"). Switch to bun runner for behaviour verification."; + case "trace-tests-shrunk": + return "Trace-only mode: the green commit's tree has fewer test files than the red commit's tree — looks like deletion. If you renamed or split test files, the tally still drops."; + } +}; + +const explainRefactor = (passed: boolean): string => + passed + ? "Tests stayed green through the refactor — structural change without behavior change, the canonical refactor." + : "Refactor commit broke at least one test. Either revert the refactor or write a new red→green to capture the changed behavior."; + +const FORGEJO_INTERNAL = process.env.FORGEJO_URL ?? "https://git.tdd.md"; +const TEST_TIMEOUT_MS = 8000; + +// Sandboxed env passed to git and bun subprocesses. Strips every secret +// from the parent process — agent code never sees FORGEJO_ADMIN_TOKEN, +// GITHUB_CLIENT_SECRET, or SESSION_SECRET. PATH is fixed; HOME and TMPDIR +// stay inside the per-run temp dir so dotfile writes can't escape. +const sandboxEnv = (cwd: string): Record => ({ + PATH: "/usr/local/bin:/usr/bin:/bin", + HOME: cwd, + TMPDIR: cwd, + NODE_ENV: "test", +}); + +const runProc = async ( + cmd: string[], + cwd: string, + timeoutMs: number, +): Promise<{ stdout: string; stderr: string; exitCode: number; timedOut: boolean }> => { + const proc = Bun.spawn(cmd, { + cwd, + stdout: "pipe", + stderr: "pipe", + env: sandboxEnv(cwd), + }); + let timedOut = false; + const timer = setTimeout(() => { + timedOut = true; + proc.kill("SIGKILL"); + }, timeoutMs); + const exitCode = await proc.exited; + clearTimeout(timer); + const stdout = await new Response(proc.stdout).text(); + const stderr = await new Response(proc.stderr).text(); + return { stdout: stdout.trim(), stderr: stderr.trim(), exitCode, timedOut }; +}; + +const runTests = async (cwd: string): Promise => { + const r = await runProc(["bun", "test"], cwd, TEST_TIMEOUT_MS); + // Bun test exits 0 only when all tests pass. + return !r.timedOut && r.exitCode === 0; +}; + +// Language-agnostic test-file counter for trace-only mode. Uses git +// ls-tree at the given sha so we don't have to checkout the working +// tree. Matches conventional test-file naming across ecosystems: +// foo.test.ts, foo.spec.ts, FooTests.cs, FooTest.java, test_foo.py, +// foo_test.go, FooSpec.scala, foo_spec.rb. +const countTestFiles = async (cwd: string, sha: string): Promise => { + const r = await runProc(["git", "ls-tree", "-r", "--name-only", sha], cwd, 5000); + if (r.exitCode !== 0) return 0; + const re = /(?:^|\/)(?:[^/]*\.(?:test|spec)\.[a-z]+|[Tt]ests?\/[^/]+|test_[^/]+|[^/]+_test\.[a-z]+|[^/]+[Tt]ests?\.cs|[^/]+[Tt]est\.java)$/; + let count = 0; + for (const line of r.stdout.split("\n")) { + if (re.test(line)) count++; + } + return count; +}; + +// Count `test(` / `it(` calls in tracked *.test.ts files. Used to detect +// when an agent deletes tests between red and green to make a regression +// "pass" — a cardinal TDD sin per the kata spec. +const countTests = async (cwd: string): Promise => { + const r = await runProc(["git", "ls-files", "*.test.ts"], cwd, 5000); + if (r.exitCode !== 0) return 0; + const files = r.stdout.split("\n").filter((f) => f && !f.includes("__hidden_")); + let count = 0; + for (const f of files) { + const content = await Bun.file(join(cwd, f)) + .text() + .catch(() => ""); + const matches = content.match(/\b(?:test|it)\s*\(/g); + if (matches) count += matches.length; + } + return count; +}; + +// Runs the kata's authoritative tests against the agent's implementation +// at whatever commit is currently checked out. Copies the hidden test +// file into the working tree under a __hidden__ prefix so it doesn't +// collide with the agent's filenames, runs only that file, then deletes +// it. Returns null if the kata doesn't have hidden tests for this step. +const runHiddenTests = async (cwd: string, spec: Game, stepId: string): Promise => { + const stepDef = spec.steps.find((s) => s.id === stepId); + if (!stepDef) return null; + const sourcePath = `./content/games/${spec.id}/${stepDef.hiddenTestFile}`; + const sourceFile = Bun.file(sourcePath); + if (!(await sourceFile.exists())) return null; + const content = await sourceFile.text(); + const targetName = `__hidden_${stepId}__.test.ts`; + const targetPath = join(cwd, targetName); + await Bun.write(targetPath, content); + try { + const r = await runProc(["bun", "test", targetName], cwd, TEST_TIMEOUT_MS); + return !r.timedOut && r.exitCode === 0; + } finally { + try { + rmSync(targetPath, { force: true }); + } catch { + // best effort + } + } +}; + +interface CommitInfo { + sha: string; + phase: Phase; + step: string | null; +} + +const readCommits = async (cwd: string): Promise => { + const r = await runProc(["git", "log", "--reverse", "--pretty=format:%H%x1f%B%x1e"], cwd, 10000); + if (r.exitCode !== 0) return []; + const out: CommitInfo[] = []; + for (const block of r.stdout.split("\x1e")) { + const t = block.trim(); + if (!t) continue; + const [sha, message = ""] = t.split("\x1f"); + if (!sha) continue; + const p = parseCommit(message); + out.push({ sha, phase: p.phase, step: p.step }); + } + return out; +}; + +export const judge = async (owner: string, repo: string): Promise => { + const cwd = mkdtempSync(join(tmpdir(), `judge-${owner}-${repo}-`)); + try { + // Agent repos default to private. Authenticate via admin token in + // an http.extraheader so the token isn't persisted in the cloned + // repo's config (extraheader applies to the clone request only). + const cloneUrl = `${FORGEJO_INTERNAL}/${owner}/${repo}.git`; + const adminToken = process.env.FORGEJO_ADMIN_TOKEN; + const gitArgs = adminToken + ? ["-c", `http.extraheader=Authorization: token ${adminToken}`, "clone", "--quiet", cloneUrl, "."] + : ["clone", "--quiet", cloneUrl, "."]; + const cloneR = await runProc(["git", ...gitArgs], cwd, 30000); + if (cloneR.exitCode !== 0) { + throw new Error(`clone failed: ${cloneR.stderr || cloneR.stdout}`); + } + + const commits = await readCommits(cwd); + const headR = await runProc(["git", "rev-parse", "HEAD"], cwd, 5000); + const headSha = headR.stdout; + + // First red per step + first green-after-red per step (chronological). + const stepRed = new Map(); + const stepGreen = new Map(); + for (const c of commits) { + if (!c.step) continue; + if (c.phase === "red" && !stepRed.has(c.step)) { + stepRed.set(c.step, c.sha); + } else if (c.phase === "green" && stepRed.has(c.step) && !stepGreen.has(c.step)) { + stepGreen.set(c.step, c.sha); + } + } + + // Read the agent's mode + runner preferences from tdd.config.json. + const { mode, testRunner } = await readConfig(cwd); + + // Load the kata's authoritative spec — used to fetch hidden tests + // per step. Repos that don't match a known kata get scored on red→green + // discipline only (no hidden-test verification). + let spec: Game | null = null; + try { + spec = await loadGame(repo); + } catch { + spec = null; + } + + const steps: StepVerdict[] = []; + for (const [stepId, redSha] of stepRed) { + const greenSha = stepGreen.get(stepId) ?? null; + + if (testRunner === "none") { + // Trace-only path: don't checkout, don't run anything. Score + // purely from the commit log + a language-agnostic test-file + // count via `git ls-tree`. Useful for non-Bun projects. + const redFiles = await countTestFiles(cwd, redSha); + const greenFiles = greenSha ? await countTestFiles(cwd, greenSha) : redFiles; + const filesShrank = greenSha !== null && greenFiles < redFiles; + + let status: StepVerdict["status"]; + let baseDelta = 0; + if (greenSha === null) { + status = "no-green"; + } else if (filesShrank) { + status = "trace-tests-shrunk"; + baseDelta = -10; + } else { + status = "trace-verified"; + baseDelta = 10; + } + const scoreDelta = applyMode(baseDelta, mode); + const explanation = explainStep({ status, redSha, greenSha, hiddenPassed: null, mode }); + steps.push({ + stepId, redSha, greenSha, + redFailed: null, greenPassed: null, hiddenPassed: null, + status, scoreDelta, explanation, + }); + continue; + } + + await runProc(["git", "checkout", "--quiet", redSha], cwd, 5000); + const redTestCount = await countTests(cwd); + const redPassed = await runTests(cwd); + const redFailed = !redPassed; + let greenPassed: boolean | null = null; + let hiddenPassed: boolean | null = null; + let testsDeleted = false; + if (greenSha) { + await runProc(["git", "checkout", "--quiet", greenSha], cwd, 5000); + const greenTestCount = await countTests(cwd); + testsDeleted = greenTestCount < redTestCount; + greenPassed = await runTests(cwd); + if (greenPassed && spec && !testsDeleted) { + hiddenPassed = await runHiddenTests(cwd, spec, stepId); + } + } + + let status: StepVerdict["status"]; + let baseDelta = 0; + if (greenSha === null) { + status = "no-green"; + } else if (testsDeleted) { + status = "test-deleted"; + baseDelta = -20; + } else if (!redFailed) { + status = "red-did-not-fail"; + baseDelta = -5; + } else if (greenPassed === false) { + status = "green-did-not-pass"; + baseDelta = -5; + } else if (hiddenPassed === false) { + status = "hidden-tests-failed"; + baseDelta = 0; + } else if (hiddenPassed === true) { + status = "verified"; + baseDelta = 20; + } else { + status = "discipline-only"; + baseDelta = 5; + } + const scoreDelta = applyMode(baseDelta, mode); + const explanation = explainStep({ status, redSha, greenSha, hiddenPassed, mode }); + steps.push({ stepId, redSha, greenSha, redFailed, greenPassed, hiddenPassed, status, scoreDelta, explanation }); + } + + // Refactor commits aren't tied to red→green pairs: the spec rewards + // any refactor that keeps the existing tests green. A broken refactor + // (tests fail at the refactor commit) costs the same as a missed + // green — discipline matters even outside red→green pairs. + const refactors: RefactorVerdict[] = []; + for (const c of commits) { + if (c.phase !== "refactor") continue; + await runProc(["git", "checkout", "--quiet", c.sha], cwd, 5000); + const passed = await runTests(cwd); + const baseDelta = passed ? 5 : -5; + refactors.push({ + sha: c.sha, + stepId: c.step, + testsPassed: passed, + scoreDelta: applyMode(baseDelta, mode), + explanation: explainRefactor(passed), + }); + } + + const totalScore = + steps.reduce((a, s) => a + s.scoreDelta, 0) + + refactors.reduce((a, r) => a + r.scoreDelta, 0); + const verdict: Verdict = { headSha, mode, steps, refactors, totalScore, judgedAt: Date.now() }; + saveRun(owner, repo, verdict); + return verdict; + } finally { + try { + rmSync(cwd, { recursive: true, force: true }); + } catch { + // best effort cleanup + } + } +}; diff --git a/src/c32_session.ts b/src/c32_session.ts new file mode 100644 index 0000000000000000000000000000000000000000..47006e72a46a994a89b28ccf2ecb680c2b134c0a --- /dev/null +++ b/src/c32_session.ts @@ -0,0 +1,81 @@ +// c32 — logic: session signing/verification + cookie helpers. Pure +// HMAC over the session payload, no I/O. Handlers (c21) pull a viewer +// off the request via getViewer(), and the OAuth callback issues a +// session cookie via sessionCookieHeader + signSession. + +// 30 days. Long enough for everyday use, short enough that a leaked +// cookie doesn't grant indefinite access. +export const SESSION_TTL_SEC = 30 * 24 * 60 * 60; +const SESSION_COOKIE = "tdd_session"; + +const sessionSecret = (): string => + process.env.SESSION_SECRET ?? process.env.WEBHOOK_SECRET ?? ""; + +export const randomHex = (bytes: number): string => + Array.from(crypto.getRandomValues(new Uint8Array(bytes))) + .map((b) => b.toString(16).padStart(2, "0")) + .join(""); + +export const parseCookies = (header: string | null): Record => { + const out: Record = {}; + if (!header) return out; + for (const part of header.split(";")) { + const idx = part.indexOf("="); + if (idx === -1) continue; + const name = part.slice(0, idx).trim(); + const value = part.slice(idx + 1).trim(); + if (name) out[name] = decodeURIComponent(value); + } + return out; +}; + +export const timingSafeEqual = (a: string, b: string): boolean => { + if (a.length !== b.length) return false; + let r = 0; + for (let i = 0; i < a.length; i++) r |= a.charCodeAt(i) ^ b.charCodeAt(i); + return r === 0; +}; + +export const hmacSha256Hex = async (secret: string, body: string): Promise => { + const key = await crypto.subtle.importKey( + "raw", + new TextEncoder().encode(secret), + { name: "HMAC", hash: "SHA-256" }, + false, + ["sign"], + ); + const sig = await crypto.subtle.sign("HMAC", key, new TextEncoder().encode(body)); + return Array.from(new Uint8Array(sig)) + .map((b) => b.toString(16).padStart(2, "0")) + .join(""); +}; + +export const signSession = async (username: string): Promise => { + const exp = Math.floor(Date.now() / 1000) + SESSION_TTL_SEC; + const payload = `${username}.${exp}`; + const sig = await hmacSha256Hex(sessionSecret(), payload); + return `${payload}.${sig}`; +}; + +export const verifySession = async (cookie: string): Promise => { + const parts = cookie.split("."); + if (parts.length !== 3) return null; + const [username, expStr, providedSig] = parts; + if (!username || !expStr || !providedSig) return null; + const exp = Number(expStr); + if (!Number.isFinite(exp) || exp < Math.floor(Date.now() / 1000)) return null; + const expectedSig = await hmacSha256Hex(sessionSecret(), `${username}.${expStr}`); + if (!timingSafeEqual(providedSig, expectedSig)) return null; + return username; +}; + +export const getViewer = async (req: Request): Promise => { + if (!sessionSecret()) return null; + const cookies = parseCookies(req.headers.get("cookie")); + const raw = cookies[SESSION_COOKIE]; + if (!raw) return null; + return verifySession(raw); +}; + +export const sessionCookieHeader = (value: string, maxAge: number): string => + `${SESSION_COOKIE}=${value}; Path=/; HttpOnly; Secure; SameSite=Lax; Max-Age=${maxAge}`; diff --git a/src/c51_render.ts b/src/c51_render.ts new file mode 100644 index 0000000000000000000000000000000000000000..0019baaf703c93998d3f6ee40c43500351fb392b --- /dev/null +++ b/src/c51_render.ts @@ -0,0 +1,528 @@ +// c51 — UI: HTML rendering. Page chrome (renderPage / renderNotFound) +// plus all per-page body builders. Imports types from c13/c31; never +// from c11 or c21 (lower-numbered layers can be imported, higher ones +// cannot). + +import { marked } from "marked"; +import type { ProjectRow } from "./c13_database.ts"; +import { PROJECT_CONFIG_PATH } from "./c31_project_config.ts"; +import type { Phase } from "./c31_commits.ts"; +import { + DEMO_PERIOD, + DEMO_ORG, + DEMO_REPOS, + DEMO_REPORTS, + DEMO_SNAPSHOTS, + DEMO_STABILITY, + type AgentReport, + type FailureSlice, + type TestSnapshot, + type TestStability, +} from "./c31_reports_demo.ts"; + +const STYLE_CSS = "./public/style.css"; +const css = await Bun.file(STYLE_CSS).text(); + +export type Section = "home" | "games" | "guides" | "blog" | "agents" | "leaderboard"; + +export interface PageOptions { + title: string; + bodyMarkdown: string; + description?: string; + ogPath?: string; + active?: Section; + noindex?: boolean; + jsonLd?: Record; +} + +const SITE_DESCRIPTION = "Test-driven development for agentic coding. Scored katas, public verdicts."; + +const escape = (s: string): string => + s.replace(/&/g, "&").replace(/"/g, """).replace(//g, ">"); + +const navLink = (href: string, label: string, active: boolean): string => { + const cls = active ? ' class="nav-active"' : ""; + return `${label}`; +}; + +const nav = (active?: Section): string => ``; + +export const renderPage = async (opts: PageOptions): Promise => { + const body = await marked.parse(opts.bodyMarkdown, { gfm: true, breaks: false }); + const description = opts.description ?? SITE_DESCRIPTION; + const ogPath = opts.ogPath ?? "https://tdd.md"; + const robots = opts.noindex ? `\n` : ""; + const jsonLd = opts.jsonLd + ? `\n` + : ""; + return ` + + + + + + +${robots} + + + + + + + + + + + + + +${escape(opts.title)} +${jsonLd} + + +${nav(opts.active)} +
+${body} +
+ +`; +}; + +export const renderNotFound = async (path: string): Promise => + renderPage({ + title: "404 — tdd.md", + bodyMarkdown: `# 404\n\n> No such path: \`${path}\`\n\nTry [home](/), [games](/games), [agents](/agents), or [leaderboard](/leaderboard).`, + noindex: true, + }); + +// --------------------------------------------------------------------- +// Small response/formatting helpers used by c21 handlers. +// --------------------------------------------------------------------- + +export const htmlResponse = (html: string, status = 200): Response => + new Response(html, { status, headers: { "Content-Type": "text/html; charset=utf-8" } }); + +export const errorPage = async (message: string, status = 400): Promise => { + const html = await renderPage({ + title: "error — tdd.md", + bodyMarkdown: `# error\n\n> ${message}\n\n[← back](/agents/register)`, + active: "agents", + }); + return htmlResponse(html, status); +}; + +export const phaseSpan = (p: Phase): string => { + const cls = p === "red" ? "red" : p === "green" ? "green" : p === "refactor" ? "blue" : "muted"; + return `${p}`; +}; + +export const relativeTime = (iso: string): string => { + const ms = Date.now() - new Date(iso).getTime(); + if (ms < 60_000) return `${Math.max(0, Math.floor(ms / 1000))}s ago`; + if (ms < 3_600_000) return `${Math.floor(ms / 60_000)}m ago`; + if (ms < 86_400_000) return `${Math.floor(ms / 3_600_000)}h ago`; + return `${Math.floor(ms / 86_400_000)}d ago`; +}; + +// --------------------------------------------------------------------- +// Body builders for /projects. +// --------------------------------------------------------------------- + +const projectListRow = (p: ProjectRow): string => { + const slug = `${p.repoOwner}/${p.repoName}`; + const display = p.displayName ?? slug; + const team = p.team ? ` · ${escape(p.team)}` : ""; + const branches = p.trackedBranches.map((b) => `\`${b}\``).join(", "); + const runner = p.testRunner === "none" ? "trace-only" : p.testRunner; + return `| [${escape(display)}](/projects/${p.repoOwner}/${p.repoName}) ${team} | ${branches} | ${runner} |`; +}; + +export const projectsLandingMd = (projects: ProjectRow[]): string => { + const rows = projects.length === 0 + ? `| _no projects yet — [register one](/projects/new)_ | | |` + : projects.map(projectListRow).join("\n"); + return `# projects + +> Real repos that opted in to tdd.md scoring. Each project drops \`${PROJECT_CONFIG_PATH}\` at its root, registers here, and from then on its commits on tracked branches get judged structurally — red-fails, green-passes, no test-deletion, no regression. The aggregated scores feed [the reports](/reports). + +## tracked + +| project | branches | runner | +|---|---|---| +${rows} + +## register a repo + +[Register a project →](/projects/new) — paste a public GitHub URL; tdd.md fetches \`${PROJECT_CONFIG_PATH}\` from the default branch and onboards it. + +## the config file + +Drop \`${PROJECT_CONFIG_PATH}\` at the root of your repo's default branch: + +\`\`\`json +{ + "version": 1, + "test_runner": "none", + "tracked_branches": ["main"], + "display_name": "API Gateway", + "team": "platform" +} +\`\`\` + +- **\`test_runner\`** — \`"none"\` for trace-mode (commit-discipline only, language-agnostic). \`"bun"\` will run the test suite once the sandbox-runner ships. +- **\`tracked_branches\`** — pushes to these branches get scored. Defaults to \`["main"]\`. +- **\`display_name\`** / **\`team\`** — optional, only used in the reporting UI. + +## what comes next + +Registration just stores the project. Per-commit judging (the part that produces score data for the reports) lands in the next sliver — until then the [report pages](/reports) keep showing the demo dataset. + +[← back to tdd.md](/) · [the reports](/reports) +`; +}; + +export const projectRegisterMd = ( + viewer: string | null, + prefilled?: string, + errorMessage?: string, +): string => { + if (!viewer) { + return `# register a project + +> You need to sign in before registering a project. We use your GitHub identity to record who onboarded the repo. + +[ sign in with github → ](/auth/github/start) + +[← all projects](/projects) +`; + } + const error = errorMessage + ? `
Couldn't register that repo:
${escape(errorMessage)}
` + : ""; + const value = prefilled ? ` value="${escape(prefilled)}"` : ""; + return `# register a project + +> Paste a public GitHub URL. tdd.md fetches \`${PROJECT_CONFIG_PATH}\` from its default branch, validates it, and onboards the repo. Re-register the same repo to refresh the config. + +${error} + +
+ + + +
+ +> Signed in as ${escape(viewer)}. Don't have \`${PROJECT_CONFIG_PATH}\` yet? [See the format on /projects](/projects#the-config-file). + +[← all projects](/projects) +`; +}; + +// --------------------------------------------------------------------- +// Body builders for /reports. +// --------------------------------------------------------------------- + +const trendArrow = (delta: number): { glyph: string; cls: string } => + delta > 0 ? { glyph: "↑", cls: "up" } : delta < 0 ? { glyph: "↓", cls: "down" } : { glyph: "→", cls: "flat" }; + +const sparkline = (values: number[], height = 60, width = 320): string => { + if (values.length === 0) return ""; + const min = Math.min(...values); + const max = Math.max(...values); + const range = Math.max(1, max - min); + const stepX = width / Math.max(1, values.length - 1); + const pad = 6; + const innerH = height - pad * 2; + const points = values + .map((v, i) => { + const x = (i * stepX).toFixed(1); + const y = (pad + innerH - ((v - min) / range) * innerH).toFixed(1); + return `${x},${y}`; + }) + .join(" "); + return ``; +}; + +const tile = (a: AgentReport): string => { + const arr = trendArrow(a.delta); + const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`; + return `
+

${escape(a.name)}

+

${a.score} / 100

+

${arr.glyph} ${escape(deltaStr)}

+

${a.commits.toLocaleString()} commits

+
top issue: ${escape(a.topIssueLabel)} (${a.topIssuePct}%)
+
`; +}; + +const bars = (mix: FailureSlice[]): string => { + const rows = mix + .map( + (s) => `
+ ${escape(s.label)} + + ${s.pct}% +
`, + ) + .join("\n"); + return `
${rows}
`; +}; + +const streakBox = (a: AgentReport): string => { + const cls = a.streakBroken ? "broken" : a.streak >= 30 ? "long" : ""; + const label = a.streakBroken ? "recent break" : "consecutive clean cycles"; + return `${a.streak} ${label}`; +}; + +const mockBanner = `
demo data — real reporting wires up when the project-tracking pipeline ships. why tdd.md needs this · about reporting
`; + +const snapshotBlock = (s: TestSnapshot): string => { + const failuresHtml = s.failures.length === 0 + ? `
  • all ${s.passing} tests groen
  • ` + : s.failures + .map( + (f) => + `
  • ${escape(f.test)} ${f.flaky ? "intermittent · " : ""}sinds ${f.since}
  • `, + ) + .concat([`
  • + ${s.passing.toLocaleString()} passing tests
  • `]) + .join("\n"); + const statusCls = s.failing === 0 ? "ok" : "bad"; + return `
    +

    ${escape(s.repo)} @ ${escape(s.branch)}

    +

    ${s.total.toLocaleString()} tests · ${s.passing.toLocaleString()} passing${s.failing > 0 ? ` · ${s.failing.toLocaleString()} failing` : ""}

    +
      +${failuresHtml} +
    +
    `; +}; + +const agentTagHtml = (slug: AgentReport["slug"]): string => { + const name = DEMO_REPORTS.find((r) => r.slug === slug)?.name ?? slug; + return `${escape(name)}`; +}; + +const stabilityRow = (s: TestStability): string => { + const cls = s.flagged ? "test-stab-row flagged" : "test-stab-row"; + const warn = s.flagged ? ` ` : ""; + return ` + ${escape(s.test)}
    ${escape(s.repo)}
    + ${s.pass} + ${s.fail} + ${s.deleted} + ${agentTagHtml(s.lastBrokenBy)}${warn} +`; +}; + +export const reportsLandingMd = (): string => `# reports + +> Per-agent TDD-discipline reporting over real project repos. The judge replays each commit on tracked branches and scores it structurally — red-fails, green-passes, no test-deletion, no regression. The scores roll up per agent over time, with trend, failure-mode breakdown, and an exec summary fit for a quarterly readout. + +This is a design preview. The pipeline that ingests real repos isn't wired yet; what you can navigate today is a mockup with synthetic data: + +- [exec summary mockup →](/reports/demo) — single page, 1 quarter, 3 agents +- [per-agent drill-down →](/reports/demo/agents/cursor) — trend, failure mix, recent flagged commits +- [tests overzicht →](/reports/demo/tests) — huidige stand per repo + test-stabiliteit per test-naam + +Want a real repo on this layer? [Register a project →](/projects) — drops \`.tdd-md.json\` at the repo root, onboards in seconds. Per-commit judging follows in the next sliver; until then registered projects show up under [/projects](/projects) but don't yet feed the report numbers. + +## what gets measured + +This layer measures **discipline**, not code-quality. Without hidden tests (those only exist on katas), tdd.md can't catch tautologies or weakened assertions on real repos. It *can* catch: + +| failure mode | what triggers it | what it costs | +|---|---|---| +| \`red-did-not-fail\` | commit tagged \`red:\` but tests pass | -5 / commit | +| \`test-deleted\` | test count drops between commits | -20 / commit | +| \`broken refactor\` | tests fail at a \`refactor:\` commit | -5 / commit | +| \`no phase tag\` | tracked-branch commit missing \`red\\|green\\|refactor:\` | counts against phase-coverage % | + +The metric pair that anchors the report is **discipline-score** (0-100) + **phase-coverage %**. An agent with 0% phase-coverage doesn't *do* TDD — its score is N/A, not 0. Don't let a low-volume non-attempt look like a high-volume slip. + +## reading the data + +For management: +- the [exec summary](/reports/demo) gives one number per agent + a narrative paragraph. Prints to one page. + +For team-leads: +- the [drill-down](/reports/demo/agents/cursor) shows trend, failure-mix, streak, and the most recent flagged commits with one-click coaching links to the [Claude Code](/blog/claude-code-tdd) / [Cursor](/blog/cursor-tdd) / [Aider](/blog/aider-tdd) posts. + +[← back to tdd.md](/) · [the blog](/blog) · [the katas](/games) +`; + +export const execSummaryMd = (): string => { + const totalCommits = DEMO_REPORTS.reduce((s, a) => s + a.commits, 0); + const tiles = DEMO_REPORTS.map(tile).join("\n"); + return `# tdd-discipline rapport · q1 2026 + +${mockBanner} + +> **Periode** ${DEMO_PERIOD} · **Scope** ${DEMO_REPOS} repos · ${totalCommits.toLocaleString()} AI-toegeschreven commits in ${escape(DEMO_ORG)}. + +
    +${tiles} +
    + +## wat veranderde dit kwartaal + +Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de \`api-gateway\` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf. + +## wat we doen + +- **Cursor in \`api-gateway\`**: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel "never delete a test in a refactor commit" gepind ([details →](/reports/demo/agents/cursor)). +- **Claude Code uitrollen**: het CLAUDE.md-template dat in \`billing-service\` werkte naar de andere drie repos kopiëren. +- **Volgende meting**: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt. + +## wat dit getal *niet* meet + +Discipline, niet code-kwaliteit. Hidden tests (zoals op de katas) bestaan niet voor productie-repos, dus *tautologische* tests en *zwak-geformuleerde* asserties blijven onzichtbaar voor de judge. Dit cijfer zegt: "de agent volgt de TDD-cyclus eerlijk". Het zegt niets over of de tests die hij schrijft het juiste beweren. Voor dat tweede signaal blijft kata-performance ([leaderboard](/leaderboard)) de proxy. + +--- + +[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports) +`; +}; + +export const agentDrilldownMd = (slug: AgentReport["slug"]): string | null => { + const a = DEMO_REPORTS.find((r) => r.slug === slug); + if (!a) return null; + const arr = trendArrow(a.delta); + const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`; + const recentRows = a.recent + .map( + (r) => + `| ${r.date} | \`${r.repo}\` | \`${r.sha}\` | ${r.phase} | ${r.failure} | ${r.pts} |`, + ) + .join("\n"); + return `# ${a.name} · drill-down + +${mockBanner} + +> Discipline-score **${a.score} / 100** ${arr.glyph} ${deltaStr} over ${DEMO_PERIOD}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage **${a.phaseCoveragePct}%**. + +## trend (30 dagen) + +
    +${sparkline(a.trend)} +
    + +${streakBox(a)} + +## failure-mode breakdown + +${bars(a.failureMix)} + +Top issue dit kwartaal: **${escape(a.topIssueLabel)}** (${a.topIssuePct}% van commits). + +## recent flagged + +| date | repo | sha | phase | failure | pts | +|---|---|---|---|---|---| +${recentRows} + +## coaching + +- ${a.slug === "claude-code" ? `[Claude Code does not do TDD by default](/blog/claude-code-tdd) — CLAUDE.md rules + fresh-context boundaries that prevent \`red-did-not-fail\`.` : a.slug === "cursor" ? `[Cursor knows how to do TDD; users skip the parts that matter](/blog/cursor-tdd) — Plan Mode, fresh chats, \`.cursor/rules\` to stop test-deletion.` : `[Aider is the closest agent to TDD on rails — until \`--auto-test\`](/blog/aider-tdd) — keep auto-test off for green commits, on for refactor.`} +- [Tweag's TDD handbook needs a judge](/blog/tweag-handbook-tdd) — why local green isn't enough. + +--- + +[← exec summary](/reports/demo) · [back to /reports](/reports) +`; +}; + +export const testsOverviewMd = (): string => { + const total = DEMO_SNAPSHOTS.reduce((s, r) => s + r.total, 0); + const passing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.passing, 0); + const failing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.failing, 0); + const snapshots = DEMO_SNAPSHOTS.map(snapshotBlock).join("\n"); + const stabRows = DEMO_STABILITY.map(stabilityRow).join("\n"); + return `# tests overzicht + +${mockBanner} + +> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${DEMO_PERIOD}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen". + +## huidige stand · per repo + +
    +${snapshots} +
    + +**Totaal**: ${total.toLocaleString()} tests · ${passing.toLocaleString()} passing · ${failing.toLocaleString()} failing. + +## test-stabiliteit · q1 2026 + +Top 12 meest-flappende tests dit kwartaal, met aantal pass/fail/deleted-events en de agent die de test het laatst heeft gebroken. + + + + + + + + + + + + +${stabRows} + +
    testpassfaildellaatst gebroken door
    + +> ⚠ markeert tests waarbij dit kwartaal een test-deletion of weakening-event is gedetecteerd. In een echte setup linkt klik op een test-naam door naar de commit-historie van die specifieke test. + +## hoe lees je dit + +- **Veel pass, weinig fail, 0 del**: gezond. Test doet wat hij moet, niemand sloopt 'm. +- **Veel fail, 0 del**: test vangt actief regressies. Goed nieuws — discipline werkt. +- **Fail én del > 0**: test wordt onder druk gezet. Coach de agent die 'm gebroken heeft (klik op het tag-icoon). +- **Snapshot rood + stabiliteit hoog**: bekende, langlopende kapotte test. Apart onderwerp, niet per se een agent-probleem. + +--- + +[← exec summary](/reports/demo) · [back to /reports](/reports) +`; +}; + +// --------------------------------------------------------------------- +// Body builder for /projects/:owner/:repo. +// --------------------------------------------------------------------- + +export const projectDetailMd = (p: ProjectRow): string => { + const display = p.displayName ?? `${p.repoOwner}/${p.repoName}`; + const registeredAt = new Date(p.registeredAt).toISOString().slice(0, 10); + const branches = p.trackedBranches.map((b) => `\`${b}\``).join(", "); + const runnerNote = p.testRunner === "none" + ? "Trace-mode — judging looks at commit phase tags, test-count drift, and refactor stability. No test execution." + : "Bun runner — test suite executes in a sandbox at every tracked-branch commit. (Sandbox-runner ships in the next sliver; meanwhile this falls back to trace-mode.)"; + return `# ${escape(display)} + +> [${escape(p.repoOwner)}/${escape(p.repoName)}](https://github.com/${p.repoOwner}/${p.repoName}) · registered by [${escape(p.registeredBy)}](/agents/${p.registeredBy}) on ${registeredAt}. + +## config + +| key | value | +|---|---| +| test_runner | \`${p.testRunner}\` | +| tracked_branches | ${branches} | +| display_name | ${p.displayName ? `\`${escape(p.displayName)}\`` : "_(none)_"} | +| team | ${p.team ? `\`${escape(p.team)}\`` : "_(none)_"} | +| status | \`${p.status}\` | + +${runnerNote} + +## scored commits + +> _No commits judged yet._ The webhook ingest + judging pipeline lands in the next sliver — once it does, scored commits for tracked branches will appear here grouped by agent. + +## refresh + +Push an updated \`${PROJECT_CONFIG_PATH}\` to your default branch and [re-register](/projects/new?repo=${encodeURIComponent(`${p.repoOwner}/${p.repoName}`)}) to pick up the new config. + +[← all projects](/projects) +`; +}; diff --git a/src/commits.test.ts b/src/commits.test.ts deleted file mode 100644 index a667164527e77d6c9ce01436806effa81fc0cf89..0000000000000000000000000000000000000000 --- a/src/commits.test.ts +++ /dev/null @@ -1,52 +0,0 @@ -import { test, expect } from "bun:test"; -import { parseCommit, computeProgress } from "./commits"; - -test("parseCommit reads a phase prefix", () => { - expect(parseCommit("red: failing test for empty")).toEqual({ - phase: "red", - step: null, - subject: "failing test for empty", - }); -}); - -test("parseCommit extracts step from phase(step): form", () => { - expect(parseCommit("green(single-number): return n for one number")).toEqual({ - phase: "green", - step: "single-number", - subject: "return n for one number", - }); -}); - -test("parseCommit recognizes 'Initial commit' as init", () => { - expect(parseCommit("Initial commit").phase).toBe("init"); -}); - -test("parseCommit returns untagged for unknown messages", () => { - expect(parseCommit("wip — fixing something").phase).toBe("untagged"); -}); - -test("parseCommit recognizes spike: prefix", () => { - expect(parseCommit("spike: try the regex approach").phase).toBe("spike"); -}); - -test("parseCommit extracts step from spike(step):", () => { - const p = parseCommit("spike(custom-separator): explore Forge regex"); - expect(p.phase).toBe("spike"); - expect(p.step).toBe("custom-separator"); -}); - -test("computeProgress verifies a step after red→green for the same step", () => { - const commits = [ - { commit: { message: "green(empty): returns 0" } }, - { commit: { message: "red(empty): empty string returns 0" } }, - ]; // newest first, like Forgejo - const p = computeProgress(commits); - expect(p.verifiedSteps).toEqual(new Set(["empty"])); - expect(p.redCount).toBe(1); - expect(p.greenCount).toBe(1); -}); - -test("computeProgress does not verify green-without-prior-red", () => { - const commits = [{ commit: { message: "green(empty): returns 0" } }]; - expect(computeProgress(commits).verifiedSteps.size).toBe(0); -}); diff --git a/src/commits.ts b/src/commits.ts deleted file mode 100644 index 89e5c1950bf9976d0ee5806501333d0775f007e4..0000000000000000000000000000000000000000 --- a/src/commits.ts +++ /dev/null @@ -1,65 +0,0 @@ -export type Phase = "red" | "green" | "refactor" | "spike" | "init" | "untagged"; - -export interface ParsedCommit { - phase: Phase; - step: string | null; - subject: string; -} - -const PHASE_RE = /^(red|green|refactor|spike)(?:\(([a-z][a-z0-9-]*)\))?:\s*(.*)$/i; - -export const parseCommit = (message: string): ParsedCommit => { - const subject = message.split("\n")[0] ?? ""; - const m = subject.match(PHASE_RE); - if (m) { - return { - phase: m[1]!.toLowerCase() as Phase, - step: m[2] ?? null, - subject: m[3] ?? "", - }; - } - if (/^Initial commit$/i.test(subject)) { - return { phase: "init", step: null, subject }; - } - return { phase: "untagged", step: null, subject }; -}; - -export interface Progress { - verifiedSteps: Set; - redCount: number; - greenCount: number; - refactorCount: number; - spikeCount: number; - untaggedCount: number; -} - -// A step counts as "verified" when its red commit is followed by a green -// for the same step. Refactor and untagged commits are tallied separately -// for the score breakdown but don't move verification. -export const computeProgress = (commits: { commit: { message: string } }[]): Progress => { - const pendingRed = new Set(); - const verifiedSteps = new Set(); - let redCount = 0; - let greenCount = 0; - let refactorCount = 0; - let spikeCount = 0; - let untaggedCount = 0; - // Forgejo returns commits newest-first; walk oldest-first to get sequence. - for (const c of [...commits].reverse()) { - const p = parseCommit(c.commit.message); - if (p.phase === "red") { - redCount++; - if (p.step) pendingRed.add(p.step); - } else if (p.phase === "green") { - greenCount++; - if (p.step && pendingRed.has(p.step)) verifiedSteps.add(p.step); - } else if (p.phase === "refactor") { - refactorCount++; - } else if (p.phase === "spike") { - spikeCount++; - } else if (p.phase === "untagged") { - untaggedCount++; - } - } - return { verifiedSteps, redCount, greenCount, refactorCount, spikeCount, untaggedCount }; -}; diff --git a/src/db.ts b/src/db.ts deleted file mode 100644 index 2091e827d77d5b6704883bc92c0aa20ff5ba8a9c..0000000000000000000000000000000000000000 --- a/src/db.ts +++ /dev/null @@ -1,214 +0,0 @@ -import { Database } from "bun:sqlite"; -import type { ProjectConfig, TestRunner } from "./projects"; - -const DB_PATH = process.env.TDD_DB_PATH ?? ":memory:"; - -let db: Database | null = null; - -const getDb = (): Database => { - if (db) return db; - db = new Database(DB_PATH, { create: true }); - db.exec(` - CREATE TABLE IF NOT EXISTS runs ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - owner TEXT NOT NULL, - repo TEXT NOT NULL, - head_sha TEXT NOT NULL, - judged_at INTEGER NOT NULL, - verdict_json TEXT NOT NULL - ); - CREATE INDEX IF NOT EXISTS idx_runs_owner_repo - ON runs(owner, repo, judged_at DESC); - - CREATE TABLE IF NOT EXISTS projects ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - registered_by TEXT NOT NULL, - repo_owner TEXT NOT NULL, - repo_name TEXT NOT NULL, - test_runner TEXT NOT NULL DEFAULT 'none', - tracked_branches TEXT NOT NULL, - display_name TEXT, - team TEXT, - registered_at INTEGER NOT NULL, - status TEXT NOT NULL DEFAULT 'active', - UNIQUE(repo_owner, repo_name) - ); - CREATE INDEX IF NOT EXISTS idx_projects_registered_by - ON projects(registered_by); - `); - return db; -}; - -export type Mode = "strict" | "pragmatic" | "learning"; - -export interface StepVerdict { - stepId: string; - redSha: string | null; - greenSha: string | null; - redFailed: boolean | null; - greenPassed: boolean | null; - // Whether the kata's authoritative hidden tests pass against the agent's - // implementation at the green commit. null when no hidden tests exist - // for the step (unknown kata, or step not registered with the spec). - hiddenPassed: boolean | null; - status: - | "verified" - | "discipline-only" - | "no-green" - | "red-did-not-fail" - | "green-did-not-pass" - | "hidden-tests-failed" - | "test-deleted" - // Trace-only mode: tests not executed, only commit discipline checked. - // Used when test_runner: "none" — language-agnostic, useful as a - // CI gate on real projects where Bun can't run the test suite. - | "trace-verified" - | "trace-tests-shrunk"; - scoreDelta: number; - // Coach-style explanation of the verdict — what happened, why the score - // is what it is, and (when relevant) how to improve next time. - explanation: string; -} - -export interface RefactorVerdict { - sha: string; - stepId: string | null; - testsPassed: boolean; - scoreDelta: number; - explanation: string; -} - -export interface Verdict { - headSha: string; - mode: Mode; - steps: StepVerdict[]; - refactors: RefactorVerdict[]; - totalScore: number; - judgedAt: number; -} - -export const saveRun = (owner: string, repo: string, verdict: Verdict): void => { - getDb().run( - `INSERT INTO runs (owner, repo, head_sha, judged_at, verdict_json) VALUES (?, ?, ?, ?, ?)`, - [owner, repo, verdict.headSha, verdict.judgedAt, JSON.stringify(verdict)], - ); -}; - -export const latestRun = (owner: string, repo: string): Verdict | null => { - const row = getDb() - .query<{ verdict_json: string }, [string, string]>( - `SELECT verdict_json FROM runs WHERE owner = ? AND repo = ? ORDER BY judged_at DESC LIMIT 1`, - ) - .get(owner, repo); - if (!row) return null; - return JSON.parse(row.verdict_json) as Verdict; -}; - -export interface ProjectRow { - id: number; - registeredBy: string; - repoOwner: string; - repoName: string; - testRunner: TestRunner; - trackedBranches: string[]; - displayName: string | null; - team: string | null; - registeredAt: number; - status: "active" | "paused"; -} - -interface ProjectDbRow { - id: number; - registered_by: string; - repo_owner: string; - repo_name: string; - test_runner: string; - tracked_branches: string; - display_name: string | null; - team: string | null; - registered_at: number; - status: string; -} - -const rowToProject = (r: ProjectDbRow): ProjectRow => ({ - id: r.id, - registeredBy: r.registered_by, - repoOwner: r.repo_owner, - repoName: r.repo_name, - testRunner: (r.test_runner === "bun" ? "bun" : "none") as TestRunner, - trackedBranches: JSON.parse(r.tracked_branches) as string[], - displayName: r.display_name, - team: r.team, - registeredAt: r.registered_at, - status: r.status === "paused" ? "paused" : "active", -}); - -// Inserts or updates a project. Re-registering the same repo refreshes -// its config (test_runner, tracked_branches, display_name, team) without -// duplicating the row. Returns the stored project. -export const upsertProject = ( - registeredBy: string, - repoOwner: string, - repoName: string, - config: ProjectConfig, -): ProjectRow => { - const now = Date.now(); - const branches = JSON.stringify(config.tracked_branches); - const display = config.display_name ?? null; - const team = config.team ?? null; - getDb().run( - `INSERT INTO projects (registered_by, repo_owner, repo_name, test_runner, tracked_branches, display_name, team, registered_at, status) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'active') - ON CONFLICT(repo_owner, repo_name) DO UPDATE SET - test_runner = excluded.test_runner, - tracked_branches = excluded.tracked_branches, - display_name = excluded.display_name, - team = excluded.team, - status = 'active'`, - [registeredBy, repoOwner, repoName, config.test_runner, branches, display, team, now], - ); - const row = getDb() - .query( - `SELECT * FROM projects WHERE repo_owner = ? AND repo_name = ?`, - ) - .get(repoOwner, repoName); - if (!row) throw new Error("project upsert returned no row"); - return rowToProject(row); -}; - -export const getProject = (repoOwner: string, repoName: string): ProjectRow | null => { - const row = getDb() - .query( - `SELECT * FROM projects WHERE repo_owner = ? AND repo_name = ?`, - ) - .get(repoOwner, repoName); - return row ? rowToProject(row) : null; -}; - -export const listActiveProjects = (): ProjectRow[] => { - const rows = getDb() - .query( - `SELECT * FROM projects WHERE status = 'active' ORDER BY registered_at DESC`, - ) - .all(); - return rows.map(rowToProject); -}; - -// Latest verdict per (owner, repo) across all agents — drives the -// leaderboard and the /agents index. -export const allLatestRuns = (): { owner: string; repo: string; verdict: Verdict }[] => { - const rows = getDb() - .query<{ owner: string; repo: string; verdict_json: string }, []>( - `SELECT owner, repo, verdict_json FROM runs r1 - WHERE judged_at = ( - SELECT MAX(judged_at) FROM runs r2 - WHERE r2.owner = r1.owner AND r2.repo = r1.repo - )`, - ) - .all(); - return rows.map((r) => ({ - owner: r.owner, - repo: r.repo, - verdict: JSON.parse(r.verdict_json) as Verdict, - })); -}; diff --git a/src/forgejo.ts b/src/forgejo.ts deleted file mode 100644 index 6a9358e0a47d01450505c0e364361c0b807afc6d..0000000000000000000000000000000000000000 --- a/src/forgejo.ts +++ /dev/null @@ -1,261 +0,0 @@ -// Internal URL — Bun container talks to Forgejo via host.containers.internal -// (rootless podman's standard hostname for the host network). Falls back to -// the public URL for local dev. -const FORGEJO_URL = process.env.FORGEJO_URL ?? "https://git.tdd.md"; -const ADMIN_TOKEN = process.env.FORGEJO_ADMIN_TOKEN ?? ""; - -const adminAuth = (): HeadersInit => ({ - Authorization: `token ${ADMIN_TOKEN}`, -}); - -const userAuth = (username: string, password: string): HeadersInit => ({ - Authorization: `Basic ${btoa(`${username}:${password}`)}`, -}); - -export const isConfigured = (): boolean => ADMIN_TOKEN !== ""; - -export const userExists = async (username: string): Promise => { - const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}`, { - headers: adminAuth(), - }); - return res.status === 200; -}; - -export const createUser = async (params: { - username: string; - email: string; - password: string; - fullName?: string; -}): Promise => { - const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users`, { - method: "POST", - headers: { ...adminAuth(), "Content-Type": "application/json" }, - body: JSON.stringify({ - username: params.username, - email: params.email, - password: params.password, - full_name: params.fullName ?? params.username, - must_change_password: false, - send_notify: false, - }), - }); - if (!res.ok) { - const text = await res.text(); - throw new Error(`forgejo createUser ${res.status}: ${text}`); - } -}; - -export const setUserPassword = async (username: string, password: string): Promise => { - const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users/${encodeURIComponent(username)}`, { - method: "PATCH", - headers: { ...adminAuth(), "Content-Type": "application/json" }, - body: JSON.stringify({ - password, - must_change_password: false, - source_id: 0, - login_name: username, - }), - }); - if (!res.ok) { - const text = await res.text(); - throw new Error(`forgejo setUserPassword ${res.status}: ${text}`); - } -}; - -export const repoExists = async (owner: string, repo: string): Promise => { - const res = await fetch(`${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}`, { - headers: adminAuth(), - }); - return res.status === 200; -}; - -// Creates a per-repo webhook that fires on push events. The webhook -// posts to /api/forgejo/webhook on tdd.md, signed with WEBHOOK_SECRET so -// our endpoint can verify it. Idempotent — checks for an existing hook -// with the same URL before creating. -export const ensureRepoWebhook = async (params: { - owner: string; - repo: string; - webhookUrl: string; - secret: string; -}): Promise => { - const base = `${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(params.owner)}/${encodeURIComponent(params.repo)}/hooks`; - const listRes = await fetch(base, { headers: adminAuth() }); - if (listRes.ok) { - const hooks = (await listRes.json()) as { id: number; config: { url?: string } }[]; - const exists = hooks.some((h) => h.config?.url === params.webhookUrl); - if (exists) return; - } - const res = await fetch(base, { - method: "POST", - headers: { ...adminAuth(), "Content-Type": "application/json" }, - body: JSON.stringify({ - type: "forgejo", - active: true, - events: ["push"], - config: { - url: params.webhookUrl, - content_type: "json", - secret: params.secret, - }, - }), - }); - if (!res.ok) { - const text = await res.text(); - throw new Error(`forgejo ensureRepoWebhook ${res.status}: ${text}`); - } -}; - -export const createRepoForUser = async (params: { - username: string; - name: string; - description?: string; -}): Promise => { - const res = await fetch(`${FORGEJO_URL}/api/v1/admin/users/${encodeURIComponent(params.username)}/repos`, { - method: "POST", - headers: { ...adminAuth(), "Content-Type": "application/json" }, - body: JSON.stringify({ - name: params.name, - description: params.description ?? "", - // Private by default — the source is the agent's, not ours to - // publish. Verdicts still render on tdd.md via admin-mediated - // API calls; clones require the agent's push token. - private: true, - // No auto_init: the agent's first push becomes the genuine initial - // commit. An admin-authored "Initial commit" would muddle the phase - // log and break attribution on the agent's repo page. - auto_init: false, - default_branch: "main", - }), - }); - if (!res.ok) { - const text = await res.text(); - throw new Error(`forgejo createRepo ${res.status}: ${text}`); - } -}; - -interface TokenInfo { - id: number; - name: string; -} - -const listTokens = async (username: string, password: string): Promise => { - const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}/tokens`, { - headers: userAuth(username, password), - }); - if (!res.ok) return []; - return (await res.json()) as TokenInfo[]; -}; - -const deleteToken = async (username: string, password: string, tokenId: number): Promise => { - await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(username)}/tokens/${tokenId}`, { - method: "DELETE", - headers: userAuth(username, password), - }); -}; - -export const createPushToken = async (params: { - username: string; - password: string; - name: string; -}): Promise => { - // Revoke any existing tokens with the same name so re-registration always - // returns a fresh one and the previous one is invalidated. - const existing = await listTokens(params.username, params.password); - for (const t of existing) { - if (t.name === params.name) { - await deleteToken(params.username, params.password, t.id); - } - } - - const res = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(params.username)}/tokens`, { - method: "POST", - headers: { ...userAuth(params.username, params.password), "Content-Type": "application/json" }, - body: JSON.stringify({ - name: params.name, - // write:repository for the push; read:user so the agent can - // verify their own identity against tdd.md's self-service - // endpoints (e.g. POST /api/agents/:name/visibility). - scopes: ["write:repository", "read:user"], - }), - }); - if (!res.ok) { - const text = await res.text(); - throw new Error(`forgejo createPushToken ${res.status}: ${text}`); - } - const data = (await res.json()) as { sha1: string }; - return data.sha1; -}; - -const randomPassword = (): string => - Array.from(crypto.getRandomValues(new Uint8Array(32))) - .map((b) => b.toString(16).padStart(2, "0")) - .join(""); - -export interface AgentRegistration { - username: string; - pushToken: string; - repoCloneUrl: string; - isNew: boolean; -} - -// Idempotent: if the user exists, reset their password and rotate the push -// token. Always also ensures the kata repo exists. -export const registerAgent = async (params: { - username: string; - email: string; - fullName?: string; - kata?: string; -}): Promise => { - const password = randomPassword(); - const isNew = !(await userExists(params.username)); - - if (isNew) { - await createUser({ - username: params.username, - email: params.email, - password, - fullName: params.fullName, - }); - } else { - await setUserPassword(params.username, password); - } - - const pushToken = await createPushToken({ - username: params.username, - password, - name: "tdd-md-push", - }); - - const kata = params.kata ?? "string-calc"; - if (!(await repoExists(params.username, kata))) { - await createRepoForUser({ - username: params.username, - name: kata, - description: `${params.username}'s submission for the ${kata} kata`, - }); - } - - const baseUrl = process.env.BASE_URL ?? "https://tdd.md"; - const webhookSecret = process.env.WEBHOOK_SECRET; - if (webhookSecret) { - try { - await ensureRepoWebhook({ - owner: params.username, - repo: kata, - webhookUrl: `${baseUrl}/api/forgejo/webhook`, - secret: webhookSecret, - }); - } catch (err) { - // Webhook is convenience; registration must still succeed without it. - console.error(`webhook setup failed for ${params.username}/${kata}:`, err); - } - } - - return { - username: params.username, - pushToken, - repoCloneUrl: `${baseUrl}/${params.username}/${kata}.git`, - isNew, - }; -}; diff --git a/src/games.test.ts b/src/games.test.ts deleted file mode 100644 index 7a4e80781912fb1baee12af5223fc95520ccebb1..0000000000000000000000000000000000000000 --- a/src/games.test.ts +++ /dev/null @@ -1,26 +0,0 @@ -import { test, expect } from "bun:test"; -import { loadGame } from "./games"; - -test("loadGame returns a game with the expected id", async () => { - const game = await loadGame("string-calc"); - expect(game.id).toBe("string-calc"); -}); - -test("loadGame returns the kata's step ids in order", async () => { - const game = await loadGame("string-calc"); - expect(game.steps.map((s) => s.id)).toEqual([ - "empty", - "single-number", - "two-numbers", - "n-numbers", - "newline-separator", - "custom-separator", - "negatives-throw", - ]); -}); - -test("loadGame throws a clear error for an unknown game", async () => { - await expect(loadGame("does-not-exist")).rejects.toThrow( - /unknown game: does-not-exist/, - ); -}); diff --git a/src/games.ts b/src/games.ts deleted file mode 100644 index 4294a10a7aee4750b4009929f03dc9b8656bf2c8..0000000000000000000000000000000000000000 --- a/src/games.ts +++ /dev/null @@ -1,55 +0,0 @@ -export interface Step { - id: string; - requirement: string; - // Path (relative to the kata's spec.ts) of the authoritative test file. - // The judge copies this into the agent's working tree after the green - // checkout and runs it — hidden tests are how we detect cheating where - // an agent writes a tautological test like `expect(true).toBe(true)`. - hiddenTestFile: string; -} - -export interface Game { - id: string; - // One-line summary shown on the games index and OG previews. - description: string; - // Human-readable function signature the agent must export. Documented - // on the kata page so authors know what to build. - signature: string; - // The module path the hidden tests will import from. Agents must export - // their solution from this exact path (relative to repo root). - importPath: string; - steps: Step[]; -} - -import { readdir } from "node:fs/promises"; - -// Reads every kata under content/games/ and returns the loaded specs in -// alphabetical order. Used to build the games index and sitemap without -// hard-coding individual kata ids. -export async function listGames(): Promise { - let entries; - try { - entries = await readdir("./content/games", { withFileTypes: true }); - } catch { - return []; - } - const ids = entries.filter((e) => e.isDirectory()).map((e) => e.name).sort(); - const games: Game[] = []; - for (const id of ids) { - try { - games.push(await loadGame(id)); - } catch { - // skip katas that fail to load (missing spec.ts, etc.) - } - } - return games; -} - -export async function loadGame(id: string): Promise { - const file = Bun.file(`./content/games/${id}/spec.ts`); - if (!(await file.exists())) { - throw new Error(`unknown game: ${id}`); - } - const mod = await import(`../content/games/${id}/spec.ts`); - return mod.spec as Game; -} diff --git a/src/github_oauth.ts b/src/github_oauth.ts deleted file mode 100644 index 675ad139851ca18c2cdeeb72c6b0e32f4ccdd133..0000000000000000000000000000000000000000 --- a/src/github_oauth.ts +++ /dev/null @@ -1,80 +0,0 @@ -const CLIENT_ID = process.env.GITHUB_CLIENT_ID ?? ""; -const CLIENT_SECRET = process.env.GITHUB_CLIENT_SECRET ?? ""; - -export interface GithubUser { - login: string; - id: number; - email: string | null; - avatar_url: string; - name: string | null; -} - -export interface GithubEmail { - email: string; - primary: boolean; - verified: boolean; - visibility: string | null; -} - -export const isConfigured = (): boolean => CLIENT_ID !== "" && CLIENT_SECRET !== ""; - -export const authorizeUrl = (state: string, redirectUri: string): string => { - const params = new URLSearchParams({ - client_id: CLIENT_ID, - redirect_uri: redirectUri, - scope: "read:user user:email", - state, - allow_signup: "true", - }); - return `https://github.com/login/oauth/authorize?${params}`; -}; - -export const exchangeCode = async (code: string, redirectUri: string): Promise => { - const res = await fetch("https://github.com/login/oauth/access_token", { - method: "POST", - headers: { - Accept: "application/json", - "Content-Type": "application/json", - }, - body: JSON.stringify({ - client_id: CLIENT_ID, - client_secret: CLIENT_SECRET, - code, - redirect_uri: redirectUri, - }), - }); - if (!res.ok) { - throw new Error(`github token exchange failed: ${res.status}`); - } - const data = (await res.json()) as { access_token?: string; error?: string; error_description?: string }; - if (!data.access_token) { - throw new Error(`github token exchange returned no token: ${data.error_description ?? data.error ?? "unknown"}`); - } - return data.access_token; -}; - -export const fetchUser = async (accessToken: string): Promise => { - const res = await fetch("https://api.github.com/user", { - headers: { - Authorization: `token ${accessToken}`, - Accept: "application/vnd.github+json", - "User-Agent": "tdd.md", - }, - }); - if (!res.ok) throw new Error(`github user fetch failed: ${res.status}`); - return (await res.json()) as GithubUser; -}; - -export const fetchPrimaryEmail = async (accessToken: string): Promise => { - const res = await fetch("https://api.github.com/user/emails", { - headers: { - Authorization: `token ${accessToken}`, - Accept: "application/vnd.github+json", - "User-Agent": "tdd.md", - }, - }); - if (!res.ok) return null; - const emails = (await res.json()) as GithubEmail[]; - const verified = emails.filter((e) => e.verified); - return verified.find((e) => e.primary)?.email ?? verified[0]?.email ?? null; -}; diff --git a/src/judge.ts b/src/judge.ts deleted file mode 100644 index ebe57dd51fa8346e39b4f8f9264ed7ea3cb152b6..0000000000000000000000000000000000000000 --- a/src/judge.ts +++ /dev/null @@ -1,370 +0,0 @@ -import { mkdtempSync, rmSync } from "fs"; -import { join } from "path"; -import { tmpdir } from "os"; -import { parseCommit, type Phase } from "./commits"; -import { saveRun, type Verdict, type StepVerdict, type RefactorVerdict, type Mode } from "./db"; -import { loadGame, type Game } from "./games"; - -type TestRunner = "bun" | "none"; - -interface TddConfig { - mode: Mode; - testRunner: TestRunner; -} - -// tdd.config.json from the agent's repo selects the scoring mode and -// test runner. Falls back to strict / bun when missing or unparseable. -// -// { "mode": "pragmatic", "test_runner": "none" } -// -// test_runner: "none" enables trace-only judging — no checkout, no test -// execution. Useful as a CI gate on projects where Bun can't run the -// suite (e.g. .NET, Python without bun-compat tests). -const readConfig = async (cwd: string): Promise => { - const file = Bun.file(join(cwd, "tdd.config.json")); - let mode: Mode = "strict"; - let testRunner: TestRunner = "bun"; - if (await file.exists()) { - try { - const cfg = (await file.json()) as { mode?: string; test_runner?: string }; - if (cfg.mode === "pragmatic" || cfg.mode === "learning") mode = cfg.mode; - if (cfg.test_runner === "none") testRunner = "none"; - } catch { - // best effort — bad config falls back to defaults - } - } - return { mode, testRunner }; -}; - -// Penalty halving for pragmatic, zeroing for learning. Positive deltas -// are unchanged across modes — earned credit is earned credit. -const applyMode = (delta: number, mode: Mode): number => { - if (delta >= 0) return delta; - if (mode === "learning") return 0; - if (mode === "pragmatic") return Math.ceil(delta / 2); - return delta; -}; - -// Plain-language summary of a step verdict, written to the agent (not -// the human admin). One short paragraph; named intentionally so callers -// can see it next to the row in the score table. -const explainStep = (params: { - status: StepVerdict["status"]; - redSha: string | null; - greenSha: string | null; - hiddenPassed: boolean | null; - mode: Mode; -}): string => { - const { status, hiddenPassed, mode } = params; - switch (status) { - case "verified": - return "Red failed as expected, green passes your tests, and the kata's hidden tests confirm the implementation matches the requirement."; - case "discipline-only": - return "Red→green discipline holds, but this kata didn't ship hidden tests for the step. Partial credit awarded; full +20 isn't possible without authoritative verification."; - case "no-green": - return "Red commit landed; the matching green() commit hasn't been pushed yet. Push your green to lock in the score."; - case "red-did-not-fail": - return mode === "pragmatic" - ? "Combined red+green commit detected. Pragmatic mode allows this — the cycle still counts, just with a softer score than a clean separation." - : "Red commit's tests already passed when the step was first introduced — meaning the implementation was added before the test, or the test is tautological. Switch to pragmatic mode if you commit red+green together intentionally."; - case "green-did-not-pass": - return "Green commit's own tests still fail. The implementation doesn't yet satisfy the test you wrote — fix the impl, or reconsider whether the test reflects the requirement."; - case "hidden-tests-failed": - return hiddenPassed === false - ? "Your tests pass, but the kata's hidden tests don't — this is the classic tautology trap. Tighten your test to mirror the requirement (e.g., assert the actual return value, not just that it runs)." - : "Your tests pass, but hidden verification was inconclusive. Re-push to retry."; - case "test-deleted": - return "Test count dropped between red and green for this step. Once a test exists it must keep existing — refactor it, don't delete it. If the test was wrong, replace it in a separate commit before resuming the cycle."; - case "trace-verified": - return "Trace-only mode: red→green pair found in the commit log. Tests weren't executed (test_runner: \"none\"). Switch to bun runner for behaviour verification."; - case "trace-tests-shrunk": - return "Trace-only mode: the green commit's tree has fewer test files than the red commit's tree — looks like deletion. If you renamed or split test files, the tally still drops."; - } -}; - -const explainRefactor = (passed: boolean): string => - passed - ? "Tests stayed green through the refactor — structural change without behavior change, the canonical refactor." - : "Refactor commit broke at least one test. Either revert the refactor or write a new red→green to capture the changed behavior."; - -const FORGEJO_INTERNAL = process.env.FORGEJO_URL ?? "https://git.tdd.md"; -const TEST_TIMEOUT_MS = 8000; - -// Sandboxed env passed to git and bun subprocesses. Strips every secret -// from the parent process — agent code never sees FORGEJO_ADMIN_TOKEN, -// GITHUB_CLIENT_SECRET, or SESSION_SECRET. PATH is fixed; HOME and TMPDIR -// stay inside the per-run temp dir so dotfile writes can't escape. -const sandboxEnv = (cwd: string): Record => ({ - PATH: "/usr/local/bin:/usr/bin:/bin", - HOME: cwd, - TMPDIR: cwd, - NODE_ENV: "test", -}); - -const runProc = async ( - cmd: string[], - cwd: string, - timeoutMs: number, -): Promise<{ stdout: string; stderr: string; exitCode: number; timedOut: boolean }> => { - const proc = Bun.spawn(cmd, { - cwd, - stdout: "pipe", - stderr: "pipe", - env: sandboxEnv(cwd), - }); - let timedOut = false; - const timer = setTimeout(() => { - timedOut = true; - proc.kill("SIGKILL"); - }, timeoutMs); - const exitCode = await proc.exited; - clearTimeout(timer); - const stdout = await new Response(proc.stdout).text(); - const stderr = await new Response(proc.stderr).text(); - return { stdout: stdout.trim(), stderr: stderr.trim(), exitCode, timedOut }; -}; - -const runTests = async (cwd: string): Promise => { - const r = await runProc(["bun", "test"], cwd, TEST_TIMEOUT_MS); - // Bun test exits 0 only when all tests pass. - return !r.timedOut && r.exitCode === 0; -}; - -// Language-agnostic test-file counter for trace-only mode. Uses git -// ls-tree at the given sha so we don't have to checkout the working -// tree. Matches conventional test-file naming across ecosystems: -// foo.test.ts, foo.spec.ts, FooTests.cs, FooTest.java, test_foo.py, -// foo_test.go, FooSpec.scala, foo_spec.rb. -const countTestFiles = async (cwd: string, sha: string): Promise => { - const r = await runProc(["git", "ls-tree", "-r", "--name-only", sha], cwd, 5000); - if (r.exitCode !== 0) return 0; - const re = /(?:^|\/)(?:[^/]*\.(?:test|spec)\.[a-z]+|[Tt]ests?\/[^/]+|test_[^/]+|[^/]+_test\.[a-z]+|[^/]+[Tt]ests?\.cs|[^/]+[Tt]est\.java)$/; - let count = 0; - for (const line of r.stdout.split("\n")) { - if (re.test(line)) count++; - } - return count; -}; - -// Count `test(` / `it(` calls in tracked *.test.ts files. Used to detect -// when an agent deletes tests between red and green to make a regression -// "pass" — a cardinal TDD sin per the kata spec. -const countTests = async (cwd: string): Promise => { - const r = await runProc(["git", "ls-files", "*.test.ts"], cwd, 5000); - if (r.exitCode !== 0) return 0; - const files = r.stdout.split("\n").filter((f) => f && !f.includes("__hidden_")); - let count = 0; - for (const f of files) { - const content = await Bun.file(join(cwd, f)) - .text() - .catch(() => ""); - const matches = content.match(/\b(?:test|it)\s*\(/g); - if (matches) count += matches.length; - } - return count; -}; - -// Runs the kata's authoritative tests against the agent's implementation -// at whatever commit is currently checked out. Copies the hidden test -// file into the working tree under a __hidden__ prefix so it doesn't -// collide with the agent's filenames, runs only that file, then deletes -// it. Returns null if the kata doesn't have hidden tests for this step. -const runHiddenTests = async (cwd: string, spec: Game, stepId: string): Promise => { - const stepDef = spec.steps.find((s) => s.id === stepId); - if (!stepDef) return null; - const sourcePath = `./content/games/${spec.id}/${stepDef.hiddenTestFile}`; - const sourceFile = Bun.file(sourcePath); - if (!(await sourceFile.exists())) return null; - const content = await sourceFile.text(); - const targetName = `__hidden_${stepId}__.test.ts`; - const targetPath = join(cwd, targetName); - await Bun.write(targetPath, content); - try { - const r = await runProc(["bun", "test", targetName], cwd, TEST_TIMEOUT_MS); - return !r.timedOut && r.exitCode === 0; - } finally { - try { - rmSync(targetPath, { force: true }); - } catch { - // best effort - } - } -}; - -interface CommitInfo { - sha: string; - phase: Phase; - step: string | null; -} - -const readCommits = async (cwd: string): Promise => { - const r = await runProc(["git", "log", "--reverse", "--pretty=format:%H%x1f%B%x1e"], cwd, 10000); - if (r.exitCode !== 0) return []; - const out: CommitInfo[] = []; - for (const block of r.stdout.split("\x1e")) { - const t = block.trim(); - if (!t) continue; - const [sha, message = ""] = t.split("\x1f"); - if (!sha) continue; - const p = parseCommit(message); - out.push({ sha, phase: p.phase, step: p.step }); - } - return out; -}; - -export const judge = async (owner: string, repo: string): Promise => { - const cwd = mkdtempSync(join(tmpdir(), `judge-${owner}-${repo}-`)); - try { - // Agent repos default to private. Authenticate via admin token in - // an http.extraheader so the token isn't persisted in the cloned - // repo's config (extraheader applies to the clone request only). - const cloneUrl = `${FORGEJO_INTERNAL}/${owner}/${repo}.git`; - const adminToken = process.env.FORGEJO_ADMIN_TOKEN; - const gitArgs = adminToken - ? ["-c", `http.extraheader=Authorization: token ${adminToken}`, "clone", "--quiet", cloneUrl, "."] - : ["clone", "--quiet", cloneUrl, "."]; - const cloneR = await runProc(["git", ...gitArgs], cwd, 30000); - if (cloneR.exitCode !== 0) { - throw new Error(`clone failed: ${cloneR.stderr || cloneR.stdout}`); - } - - const commits = await readCommits(cwd); - const headR = await runProc(["git", "rev-parse", "HEAD"], cwd, 5000); - const headSha = headR.stdout; - - // First red per step + first green-after-red per step (chronological). - const stepRed = new Map(); - const stepGreen = new Map(); - for (const c of commits) { - if (!c.step) continue; - if (c.phase === "red" && !stepRed.has(c.step)) { - stepRed.set(c.step, c.sha); - } else if (c.phase === "green" && stepRed.has(c.step) && !stepGreen.has(c.step)) { - stepGreen.set(c.step, c.sha); - } - } - - // Read the agent's mode + runner preferences from tdd.config.json. - const { mode, testRunner } = await readConfig(cwd); - - // Load the kata's authoritative spec — used to fetch hidden tests - // per step. Repos that don't match a known kata get scored on red→green - // discipline only (no hidden-test verification). - let spec: Game | null = null; - try { - spec = await loadGame(repo); - } catch { - spec = null; - } - - const steps: StepVerdict[] = []; - for (const [stepId, redSha] of stepRed) { - const greenSha = stepGreen.get(stepId) ?? null; - - if (testRunner === "none") { - // Trace-only path: don't checkout, don't run anything. Score - // purely from the commit log + a language-agnostic test-file - // count via `git ls-tree`. Useful for non-Bun projects. - const redFiles = await countTestFiles(cwd, redSha); - const greenFiles = greenSha ? await countTestFiles(cwd, greenSha) : redFiles; - const filesShrank = greenSha !== null && greenFiles < redFiles; - - let status: StepVerdict["status"]; - let baseDelta = 0; - if (greenSha === null) { - status = "no-green"; - } else if (filesShrank) { - status = "trace-tests-shrunk"; - baseDelta = -10; - } else { - status = "trace-verified"; - baseDelta = 10; - } - const scoreDelta = applyMode(baseDelta, mode); - const explanation = explainStep({ status, redSha, greenSha, hiddenPassed: null, mode }); - steps.push({ - stepId, redSha, greenSha, - redFailed: null, greenPassed: null, hiddenPassed: null, - status, scoreDelta, explanation, - }); - continue; - } - - await runProc(["git", "checkout", "--quiet", redSha], cwd, 5000); - const redTestCount = await countTests(cwd); - const redPassed = await runTests(cwd); - const redFailed = !redPassed; - let greenPassed: boolean | null = null; - let hiddenPassed: boolean | null = null; - let testsDeleted = false; - if (greenSha) { - await runProc(["git", "checkout", "--quiet", greenSha], cwd, 5000); - const greenTestCount = await countTests(cwd); - testsDeleted = greenTestCount < redTestCount; - greenPassed = await runTests(cwd); - if (greenPassed && spec && !testsDeleted) { - hiddenPassed = await runHiddenTests(cwd, spec, stepId); - } - } - - let status: StepVerdict["status"]; - let baseDelta = 0; - if (greenSha === null) { - status = "no-green"; - } else if (testsDeleted) { - status = "test-deleted"; - baseDelta = -20; - } else if (!redFailed) { - status = "red-did-not-fail"; - baseDelta = -5; - } else if (greenPassed === false) { - status = "green-did-not-pass"; - baseDelta = -5; - } else if (hiddenPassed === false) { - status = "hidden-tests-failed"; - baseDelta = 0; - } else if (hiddenPassed === true) { - status = "verified"; - baseDelta = 20; - } else { - status = "discipline-only"; - baseDelta = 5; - } - const scoreDelta = applyMode(baseDelta, mode); - const explanation = explainStep({ status, redSha, greenSha, hiddenPassed, mode }); - steps.push({ stepId, redSha, greenSha, redFailed, greenPassed, hiddenPassed, status, scoreDelta, explanation }); - } - - // Refactor commits aren't tied to red→green pairs: the spec rewards - // any refactor that keeps the existing tests green. A broken refactor - // (tests fail at the refactor commit) costs the same as a missed - // green — discipline matters even outside red→green pairs. - const refactors: RefactorVerdict[] = []; - for (const c of commits) { - if (c.phase !== "refactor") continue; - await runProc(["git", "checkout", "--quiet", c.sha], cwd, 5000); - const passed = await runTests(cwd); - const baseDelta = passed ? 5 : -5; - refactors.push({ - sha: c.sha, - stepId: c.step, - testsPassed: passed, - scoreDelta: applyMode(baseDelta, mode), - explanation: explainRefactor(passed), - }); - } - - const totalScore = - steps.reduce((a, s) => a + s.scoreDelta, 0) + - refactors.reduce((a, r) => a + r.scoreDelta, 0); - const verdict: Verdict = { headSha, mode, steps, refactors, totalScore, judgedAt: Date.now() }; - saveRun(owner, repo, verdict); - return verdict; - } finally { - try { - rmSync(cwd, { recursive: true, force: true }); - } catch { - // best effort cleanup - } - } -}; diff --git a/src/projects.ts b/src/projects.ts deleted file mode 100644 index f49b34957824250ea00f49c2593fa1d2501e645a..0000000000000000000000000000000000000000 --- a/src/projects.ts +++ /dev/null @@ -1,271 +0,0 @@ -import type { ProjectRow } from "./db"; - -// Project-tracking ingest contract — block 1 of the reporting pipeline. -// -// A "project" is a real repo whose pushes get scored on TDD discipline. -// Distinct from a kata: katas are the practice ground (fixed steps, -// hidden tests); projects are production code judged purely structurally. -// -// Onboarding: a repo opts in by adding `.tdd-md.json` at its root on the -// default branch. tdd.md fetches the file (via raw.githubusercontent), -// validates it, and registers the project in our SQLite store. Per-commit -// judging follows in a later sliver — this module covers config + ingest -// of the registration itself. - -export const PROJECT_CONFIG_PATH = ".tdd-md.json"; -export const PROJECT_CONFIG_VERSION = 1; - -export type TestRunner = "none" | "bun"; -export type AgentSlug = "claude-code" | "cursor" | "aider" | "unknown"; - -export interface ProjectConfig { - version: number; - // "none" → trace-mode judging only (commit discipline, no test execution). - // "bun" → full sandbox-runner judging (later sliver — registration accepts - // the value but judging stays trace-only until the runner ships). - test_runner: TestRunner; - // Branches whose pushes get scored. Defaults to ["main"]. - tracked_branches: string[]; - // Optional reporting metadata. - display_name?: string; - team?: string; -} - -export const DEFAULT_CONFIG: ProjectConfig = { - version: PROJECT_CONFIG_VERSION, - test_runner: "none", - tracked_branches: ["main"], -}; - -// Validates and normalises a parsed JSON blob into a ProjectConfig. -// Throws with a human-readable message on failure — those messages are -// surfaced verbatim to the registering user, so they need to be useful. -export const parseProjectConfig = (raw: unknown): ProjectConfig => { - if (!raw || typeof raw !== "object") { - throw new Error(".tdd-md.json must be a JSON object"); - } - const obj = raw as Record; - const version = obj.version; - if (typeof version !== "number" || version !== PROJECT_CONFIG_VERSION) { - throw new Error( - `.tdd-md.json has version ${JSON.stringify(version)}; expected ${PROJECT_CONFIG_VERSION}`, - ); - } - let testRunner: TestRunner = "none"; - if (obj.test_runner !== undefined) { - if (obj.test_runner !== "none" && obj.test_runner !== "bun") { - throw new Error( - `.tdd-md.json: test_runner must be "none" or "bun" (got ${JSON.stringify(obj.test_runner)})`, - ); - } - testRunner = obj.test_runner; - } - let trackedBranches: string[] = ["main"]; - if (obj.tracked_branches !== undefined) { - if (!Array.isArray(obj.tracked_branches) || obj.tracked_branches.some((b) => typeof b !== "string" || !b)) { - throw new Error(".tdd-md.json: tracked_branches must be a non-empty array of branch names"); - } - trackedBranches = obj.tracked_branches as string[]; - } - const config: ProjectConfig = { - version, - test_runner: testRunner, - tracked_branches: trackedBranches, - }; - if (typeof obj.display_name === "string" && obj.display_name) { - config.display_name = obj.display_name; - } - if (typeof obj.team === "string" && obj.team) { - config.team = obj.team; - } - return config; -}; - -// Pulls .tdd-md.json from a public GitHub repo's default branch via the -// raw-content host. No auth — public-repo only for now (private repos -// land when we install a GitHub App, deferred to a later sliver). -export const fetchProjectConfig = async ( - repoOwner: string, - repoName: string, -): Promise => { - const url = `https://raw.githubusercontent.com/${encodeURIComponent(repoOwner)}/${encodeURIComponent(repoName)}/HEAD/${PROJECT_CONFIG_PATH}`; - const res = await fetch(url, { - headers: { Accept: "application/json", "User-Agent": "tdd.md" }, - }); - if (res.status === 404) { - throw new Error( - `${PROJECT_CONFIG_PATH} not found in ${repoOwner}/${repoName} on the default branch (or the repo is private; private repos aren't supported yet).`, - ); - } - if (!res.ok) { - throw new Error( - `Couldn't fetch ${PROJECT_CONFIG_PATH} from ${repoOwner}/${repoName}: HTTP ${res.status}`, - ); - } - let parsed: unknown; - try { - parsed = await res.json(); - } catch { - throw new Error(`${PROJECT_CONFIG_PATH} in ${repoOwner}/${repoName} isn't valid JSON`); - } - return parseProjectConfig(parsed); -}; - -// Parse a GitHub repo URL or owner/repo shorthand. Accepts: -// https://github.com/syntaxai/tdd.md -// https://github.com/syntaxai/tdd.md.git -// github.com/syntaxai/tdd.md -// syntaxai/tdd.md -// Returns the owner + repo or throws with a precise message. -export const parseRepoIdentifier = (raw: string): { owner: string; repo: string } => { - const trimmed = raw.trim(); - if (!trimmed) throw new Error("Repository URL is required."); - let path = trimmed; - const httpsMatch = path.match(/^https?:\/\/(?:www\.)?github\.com\/(.+)$/i); - if (httpsMatch?.[1]) path = httpsMatch[1]; - const bareMatch = path.match(/^github\.com\/(.+)$/i); - if (bareMatch?.[1]) path = bareMatch[1]; - path = path.replace(/\.git$/i, "").replace(/\/+$/, ""); - const parts = path.split("/").filter(Boolean); - const owner = parts[0]; - const repo = parts[1]; - if (parts.length !== 2 || !owner || !repo) { - throw new Error( - `Couldn't parse "${raw}" as a GitHub repo. Use a URL like https://github.com/owner/name or the shorthand owner/name.`, - ); - } - if (!/^[A-Za-z0-9._-]+$/.test(owner) || !/^[A-Za-z0-9._-]+$/.test(repo)) { - throw new Error(`"${raw}" contains characters that aren't valid for a GitHub owner/repo.`); - } - return { owner, repo }; -}; - -const escape = (s: string): string => - s.replace(/&/g, "&").replace(/"/g, """).replace(//g, ">"); - -const projectListRow = (p: ProjectRow): string => { - const slug = `${p.repoOwner}/${p.repoName}`; - const display = p.displayName ?? slug; - const team = p.team ? ` · ${escape(p.team)}` : ""; - const branches = p.trackedBranches.map((b) => `\`${b}\``).join(", "); - const runner = p.testRunner === "none" ? "trace-only" : p.testRunner; - return `| [${escape(display)}](/projects/${p.repoOwner}/${p.repoName}) ${team} | ${branches} | ${runner} |`; -}; - -export const projectsLandingMd = (projects: ProjectRow[]): string => { - const rows = projects.length === 0 - ? `| _no projects yet — [register one](/projects/new)_ | | |` - : projects.map(projectListRow).join("\n"); - return `# projects - -> Real repos that opted in to tdd.md scoring. Each project drops \`${PROJECT_CONFIG_PATH}\` at its root, registers here, and from then on its commits on tracked branches get judged structurally — red-fails, green-passes, no test-deletion, no regression. The aggregated scores feed [the reports](/reports). - -## tracked - -| project | branches | runner | -|---|---|---| -${rows} - -## register a repo - -[Register a project →](/projects/new) — paste a public GitHub URL; tdd.md fetches \`${PROJECT_CONFIG_PATH}\` from the default branch and onboards it. - -## the config file - -Drop \`${PROJECT_CONFIG_PATH}\` at the root of your repo's default branch: - -\`\`\`json -{ - "version": 1, - "test_runner": "none", - "tracked_branches": ["main"], - "display_name": "API Gateway", - "team": "platform" -} -\`\`\` - -- **\`test_runner\`** — \`"none"\` for trace-mode (commit-discipline only, language-agnostic). \`"bun"\` will run the test suite once the sandbox-runner ships. -- **\`tracked_branches\`** — pushes to these branches get scored. Defaults to \`["main"]\`. -- **\`display_name\`** / **\`team\`** — optional, only used in the reporting UI. - -## what comes next - -Registration just stores the project. Per-commit judging (the part that produces score data for the reports) lands in the next sliver — until then the [report pages](/reports) keep showing the demo dataset. - -[← back to tdd.md](/) · [the reports](/reports) -`; -}; - -export const projectRegisterMd = ( - viewer: string | null, - prefilled?: string, - errorMessage?: string, -): string => { - if (!viewer) { - return `# register a project - -> You need to sign in before registering a project. We use your GitHub identity to record who onboarded the repo. - -[ sign in with github → ](/auth/github/start) - -[← all projects](/projects) -`; - } - const error = errorMessage - ? `
    Couldn't register that repo:
    ${escape(errorMessage)}
    ` - : ""; - const value = prefilled ? ` value="${escape(prefilled)}"` : ""; - return `# register a project - -> Paste a public GitHub URL. tdd.md fetches \`${PROJECT_CONFIG_PATH}\` from its default branch, validates it, and onboards the repo. Re-register the same repo to refresh the config. - -${error} - -
    - - - -
    - -> Signed in as ${escape(viewer)}. Don't have \`${PROJECT_CONFIG_PATH}\` yet? [See the format on /projects](/projects#the-config-file). - -[← all projects](/projects) -`; -}; - -export const projectDetailMd = (p: ProjectRow): string => { - const display = p.displayName ?? `${p.repoOwner}/${p.repoName}`; - const registeredAt = new Date(p.registeredAt).toISOString().slice(0, 10); - const branches = p.trackedBranches.map((b) => `\`${b}\``).join(", "); - const runnerNote = p.testRunner === "none" - ? "Trace-mode — judging looks at commit phase tags, test-count drift, and refactor stability. No test execution." - : "Bun runner — test suite executes in a sandbox at every tracked-branch commit. (Sandbox-runner ships in the next sliver; meanwhile this falls back to trace-mode.)"; - return `# ${escape(display)} - -> [${escape(p.repoOwner)}/${escape(p.repoName)}](https://github.com/${p.repoOwner}/${p.repoName}) · registered by [${escape(p.registeredBy)}](/agents/${p.registeredBy}) on ${registeredAt}. - -## config - -| key | value | -|---|---| -| test_runner | \`${p.testRunner}\` | -| tracked_branches | ${branches} | -| display_name | ${p.displayName ? `\`${escape(p.displayName)}\`` : "_(none)_"} | -| team | ${p.team ? `\`${escape(p.team)}\`` : "_(none)_"} | -| status | \`${p.status}\` | - -${runnerNote} - -## scored commits - -> _No commits judged yet._ The webhook ingest + judging pipeline lands in the next sliver — once it does, scored commits for tracked branches will appear here grouped by agent. - -## refresh - -Push an updated \`${PROJECT_CONFIG_PATH}\` to your default branch and [re-register](/projects/new?repo=${encodeURIComponent(`${p.repoOwner}/${p.repoName}`)}) to pick up the new config. - -[← all projects](/projects) -`; -}; diff --git a/src/render.ts b/src/render.ts deleted file mode 100644 index a3017242a786405aa52c49086211bcd1d6b8e2d4..0000000000000000000000000000000000000000 --- a/src/render.ts +++ /dev/null @@ -1,76 +0,0 @@ -import { marked } from "marked"; - -const STYLE_CSS = "./public/style.css"; -const css = await Bun.file(STYLE_CSS).text(); - -export type Section = "home" | "games" | "guides" | "blog" | "agents" | "leaderboard"; - -export interface PageOptions { - title: string; - bodyMarkdown: string; - description?: string; - ogPath?: string; - active?: Section; - noindex?: boolean; - jsonLd?: Record; -} - -const SITE_DESCRIPTION = "Test-driven development for agentic coding. Scored katas, public verdicts."; - -const escape = (s: string): string => - s.replace(/&/g, "&").replace(/"/g, """).replace(//g, ">"); - -const navLink = (href: string, label: string, active: boolean): string => { - const cls = active ? ' class="nav-active"' : ""; - return `${label}`; -}; - -const nav = (active?: Section): string => ``; - -export const renderPage = async (opts: PageOptions): Promise => { - const body = await marked.parse(opts.bodyMarkdown, { gfm: true, breaks: false }); - const description = opts.description ?? SITE_DESCRIPTION; - const ogPath = opts.ogPath ?? "https://tdd.md"; - const robots = opts.noindex ? `\n` : ""; - const jsonLd = opts.jsonLd - ? `\n` - : ""; - return ` - - - - - - -${robots} - - - - - - - - - - - - - -${escape(opts.title)} -${jsonLd} - - -${nav(opts.active)} -
    -${body} -
    - -`; -}; - -export const renderNotFound = async (path: string): Promise => - renderPage({ - title: "404 — tdd.md", - bodyMarkdown: `# 404\n\n> No such path: \`${path}\`\n\nTry [home](/), [games](/games), [agents](/agents), or [leaderboard](/leaderboard).`, - noindex: true, - }); diff --git a/src/reports.ts b/src/reports.ts deleted file mode 100644 index 7b71618c29bf37f979724231250ceef6fbb41799..0000000000000000000000000000000000000000 --- a/src/reports.ts +++ /dev/null @@ -1,476 +0,0 @@ -// Mockup reporting layer for tdd.md. -// -// All data here is FAKE — wired up only so the management/exec view and -// per-agent drill-down can be designed in the browser before the real -// project-tracking pipeline (block 1) exists. -// -// Real reporting needs: -// - GitHub App / webhook ingest of pushes on tracked branches -// - per-commit judging without hidden tests (red-fails / green-passes / -// no-test-deletion / no-regression) -// - agent attribution (commit footer convention or wrapper-driven) -// Once that exists, the same generators in this file accept real data. - -interface RecentFlagged { - date: string; - repo: string; - sha: string; - phase: "red" | "green" | "refactor"; - failure: string; - pts: number; -} - -interface FailureSlice { - label: string; - pct: number; - tone: "red" | "green" | "muted" | "accent"; -} - -export interface AgentReport { - slug: "claude-code" | "cursor" | "aider"; - name: string; - score: number; - delta: number; - commits: number; - phaseCoveragePct: number; - streak: number; - streakBroken: boolean; - topIssueLabel: string; - topIssuePct: number; - failureMix: FailureSlice[]; - trend: number[]; - recent: RecentFlagged[]; -} - -export const DEMO_PERIOD = "2026-01-01 → 2026-03-31"; -export const DEMO_ORG = "acme-corp"; -export const DEMO_REPOS = 4; - -interface TestFailure { - test: string; - since: string; - flaky?: boolean; -} - -interface TestSnapshot { - repo: string; - branch: string; - total: number; - passing: number; - failing: number; - failures: TestFailure[]; -} - -interface TestStability { - test: string; - repo: string; - pass: number; - fail: number; - deleted: number; - lastBrokenBy: AgentReport["slug"]; - flagged?: boolean; -} - -export const DEMO_SNAPSHOTS: TestSnapshot[] = [ - { - repo: "api-gateway", - branch: "main", - total: 247, - passing: 245, - failing: 2, - failures: [ - { test: "rate-limit.spec.ts > resets at midnight UTC", since: "2026-03-26" }, - { test: "webhook.spec.ts > retries on 5xx with backoff", since: "2026-03-28" }, - ], - }, - { - repo: "billing-service", - branch: "main", - total: 89, - passing: 89, - failing: 0, - failures: [], - }, - { - repo: "data-pipeline", - branch: "main", - total: 156, - passing: 154, - failing: 2, - failures: [ - { test: "ingest.spec.ts > handles malformed CSV row", since: "2026-03-22" }, - { test: "ingest.spec.ts > deduplicates by hash", since: "2026-03-22" }, - ], - }, - { - repo: "frontend-web", - branch: "main", - total: 312, - passing: 310, - failing: 2, - failures: [ - { test: "checkout.spec.ts > handles network timeout", since: "2026-03-15", flaky: true }, - { test: "login.spec.ts > redirects after auth", since: "2026-03-11", flaky: true }, - ], - }, -]; - -export const DEMO_STABILITY: TestStability[] = [ - { test: "webhook.spec.ts > retries on 5xx with backoff", repo: "api-gateway", pass: 33, fail: 11, deleted: 0, lastBrokenBy: "cursor", flagged: true }, - { test: "checkout.spec.ts > handles network timeout", repo: "frontend-web", pass: 51, fail: 8, deleted: 0, lastBrokenBy: "cursor", flagged: true }, - { test: "rate-limit.spec.ts > resets at midnight UTC", repo: "api-gateway", pass: 42, fail: 6, deleted: 0, lastBrokenBy: "claude-code" }, - { test: "login.spec.ts > redirects after auth", repo: "frontend-web", pass: 44, fail: 5, deleted: 1, lastBrokenBy: "cursor", flagged: true }, - { test: "ingest.spec.ts > handles malformed CSV row", repo: "data-pipeline", pass: 38, fail: 4, deleted: 0, lastBrokenBy: "aider" }, - { test: "auth.spec.ts > validates JWT signature", repo: "api-gateway", pass: 47, fail: 3, deleted: 0, lastBrokenBy: "claude-code" }, - { test: "ingest.spec.ts > deduplicates by hash", repo: "data-pipeline", pass: 30, fail: 3, deleted: 0, lastBrokenBy: "aider" }, - { test: "billing.spec.ts > applies tax bracket", repo: "billing-service", pass: 29, fail: 2, deleted: 0, lastBrokenBy: "claude-code" }, - { test: "webhook.spec.ts > signs payload with HMAC", repo: "api-gateway", pass: 35, fail: 2, deleted: 1, lastBrokenBy: "cursor", flagged: true }, - { test: "billing.spec.ts > computes monthly total", repo: "billing-service", pass: 28, fail: 1, deleted: 1, lastBrokenBy: "cursor", flagged: true }, - { test: "invoice.spec.ts > generates PDF receipt", repo: "billing-service", pass: 25, fail: 1, deleted: 0, lastBrokenBy: "claude-code" }, - { test: "pricing.spec.ts > rounds to nearest cent", repo: "billing-service", pass: 26, fail: 1, deleted: 0, lastBrokenBy: "aider" }, -]; - -export const DEMO_REPORTS: AgentReport[] = [ - { - slug: "claude-code", - name: "Claude Code", - score: 78, - delta: +6, - commits: 612, - phaseCoveragePct: 92, - streak: 47, - streakBroken: false, - topIssueLabel: "red-did-not-fail", - topIssuePct: 8, - failureMix: [ - { label: "clean cycles", pct: 84, tone: "green" }, - { label: "red-did-not-fail", pct: 8, tone: "red" }, - { label: "broken refactor", pct: 4, tone: "red" }, - { label: "test-deleted", pct: 2, tone: "red" }, - { label: "no phase tag", pct: 2, tone: "muted" }, - ], - trend: [72, 73, 71, 74, 72, 75, 73, 75, 77, 76, 75, 76, 78, 77, 79, 78, 77, 79, 80, 78, 79, 80, 79, 81, 80, 82, 81, 80, 79, 78], - recent: [ - { date: "2026-03-29", repo: "api-gateway", sha: "f1c8b3a", phase: "red", failure: "red-did-not-fail", pts: -5 }, - { date: "2026-03-24", repo: "billing-service", sha: "9d2e1f4", phase: "refactor", failure: "broken refactor", pts: -5 }, - { date: "2026-03-18", repo: "data-pipeline", sha: "62a9cb7", phase: "green", failure: "no phase tag (parent)", pts: 0 }, - ], - }, - { - slug: "cursor", - name: "Cursor", - score: 54, - delta: -15, - commits: 489, - phaseCoveragePct: 71, - streak: 3, - streakBroken: true, - topIssueLabel: "test-deleted in refactor", - topIssuePct: 14, - failureMix: [ - { label: "clean cycles", pct: 64, tone: "green" }, - { label: "test-deleted", pct: 14, tone: "red" }, - { label: "red-did-not-fail", pct: 9, tone: "red" }, - { label: "broken refactor", pct: 7, tone: "red" }, - { label: "no phase tag", pct: 6, tone: "muted" }, - ], - trend: [69, 70, 71, 72, 70, 71, 72, 73, 72, 71, 72, 70, 68, 65, 60, 55, 50, 52, 54, 53, 56, 54, 52, 55, 53, 54, 56, 55, 54, 54], - recent: [ - { date: "2026-03-28", repo: "api-gateway", sha: "a1b2c3d", phase: "refactor", failure: "test-deleted", pts: -20 }, - { date: "2026-03-26", repo: "api-gateway", sha: "4e5f6a7", phase: "green", failure: "broken refactor", pts: -5 }, - { date: "2026-03-23", repo: "billing-service", sha: "8b9c0d1", phase: "red", failure: "red-did-not-fail", pts: -5 }, - { date: "2026-03-21", repo: "api-gateway", sha: "2e3f4a5", phase: "refactor", failure: "test-deleted", pts: -20 }, - { date: "2026-03-19", repo: "data-pipeline", sha: "6b7c8d9", phase: "refactor", failure: "broken refactor", pts: -5 }, - ], - }, - { - slug: "aider", - name: "Aider", - score: 89, - delta: +2, - commits: 146, - phaseCoveragePct: 96, - streak: 89, - streakBroken: false, - topIssueLabel: "broken refactor", - topIssuePct: 3, - failureMix: [ - { label: "clean cycles", pct: 94, tone: "green" }, - { label: "broken refactor", pct: 3, tone: "red" }, - { label: "red-did-not-fail", pct: 2, tone: "red" }, - { label: "no phase tag", pct: 1, tone: "muted" }, - ], - trend: [87, 88, 89, 88, 87, 89, 90, 89, 88, 89, 90, 88, 89, 90, 91, 89, 88, 89, 90, 89, 90, 91, 89, 88, 89, 90, 89, 90, 89, 89], - recent: [ - { date: "2026-03-27", repo: "data-pipeline", sha: "3a4b5c6", phase: "refactor", failure: "broken refactor", pts: -5 }, - { date: "2026-03-15", repo: "billing-service", sha: "7d8e9f0", phase: "red", failure: "red-did-not-fail", pts: -5 }, - ], - }, -]; - -const escape = (s: string): string => - s.replace(/&/g, "&").replace(/"/g, """).replace(//g, ">"); - -const trendArrow = (delta: number): { glyph: string; cls: string } => - delta > 0 ? { glyph: "↑", cls: "up" } : delta < 0 ? { glyph: "↓", cls: "down" } : { glyph: "→", cls: "flat" }; - -const sparkline = (values: number[], height = 60, width = 320): string => { - if (values.length === 0) return ""; - const min = Math.min(...values); - const max = Math.max(...values); - const range = Math.max(1, max - min); - const stepX = width / Math.max(1, values.length - 1); - const pad = 6; - const innerH = height - pad * 2; - const points = values - .map((v, i) => { - const x = (i * stepX).toFixed(1); - const y = (pad + innerH - ((v - min) / range) * innerH).toFixed(1); - return `${x},${y}`; - }) - .join(" "); - return ``; -}; - -const tile = (a: AgentReport): string => { - const arr = trendArrow(a.delta); - const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`; - return `
    -

    ${escape(a.name)}

    -

    ${a.score} / 100

    -

    ${arr.glyph} ${escape(deltaStr)}

    -

    ${a.commits.toLocaleString()} commits

    -
    top issue: ${escape(a.topIssueLabel)} (${a.topIssuePct}%)
    -
    `; -}; - -const bars = (mix: FailureSlice[]): string => { - const rows = mix - .map( - (s) => `
    - ${escape(s.label)} - - ${s.pct}% -
    `, - ) - .join("\n"); - return `
    ${rows}
    `; -}; - -const streakBox = (a: AgentReport): string => { - const cls = a.streakBroken ? "broken" : a.streak >= 30 ? "long" : ""; - const label = a.streakBroken ? "recent break" : "consecutive clean cycles"; - return `${a.streak} ${label}`; -}; - -const mockBanner = `
    demo data — real reporting wires up when the project-tracking pipeline ships. why tdd.md needs this · about reporting
    `; - -const snapshotBlock = (s: TestSnapshot): string => { - const failuresHtml = s.failures.length === 0 - ? `
  • all ${s.passing} tests groen
  • ` - : s.failures - .map( - (f) => - `
  • ${escape(f.test)} ${f.flaky ? "intermittent · " : ""}sinds ${f.since}
  • `, - ) - .concat([`
  • + ${s.passing.toLocaleString()} passing tests
  • `]) - .join("\n"); - const statusCls = s.failing === 0 ? "ok" : "bad"; - return `
    -

    ${escape(s.repo)} @ ${escape(s.branch)}

    -

    ${s.total.toLocaleString()} tests · ${s.passing.toLocaleString()} passing${s.failing > 0 ? ` · ${s.failing.toLocaleString()} failing` : ""}

    -
      -${failuresHtml} -
    -
    `; -}; - -const agentTagHtml = (slug: AgentReport["slug"]): string => { - const name = DEMO_REPORTS.find((r) => r.slug === slug)?.name ?? slug; - return `${escape(name)}`; -}; - -const stabilityRow = (s: TestStability): string => { - const cls = s.flagged ? "test-stab-row flagged" : "test-stab-row"; - const warn = s.flagged ? ` ` : ""; - return ` - ${escape(s.test)}
    ${escape(s.repo)}
    - ${s.pass} - ${s.fail} - ${s.deleted} - ${agentTagHtml(s.lastBrokenBy)}${warn} -`; -}; - -export const testsOverviewMd = (): string => { - const total = DEMO_SNAPSHOTS.reduce((s, r) => s + r.total, 0); - const passing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.passing, 0); - const failing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.failing, 0); - const snapshots = DEMO_SNAPSHOTS.map(snapshotBlock).join("\n"); - const stabRows = DEMO_STABILITY.map(stabilityRow).join("\n"); - return `# tests overzicht - -${mockBanner} - -> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${DEMO_PERIOD}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen". - -## huidige stand · per repo - -
    -${snapshots} -
    - -**Totaal**: ${total.toLocaleString()} tests · ${passing.toLocaleString()} passing · ${failing.toLocaleString()} failing. - -## test-stabiliteit · q1 2026 - -Top 12 meest-flappende tests dit kwartaal, met aantal pass/fail/deleted-events en de agent die de test het laatst heeft gebroken. - - - - - - - - - - - - -${stabRows} - -
    testpassfaildellaatst gebroken door
    - -> ⚠ markeert tests waarbij dit kwartaal een test-deletion of weakening-event is gedetecteerd. In een echte setup linkt klik op een test-naam door naar de commit-historie van die specifieke test. - -## hoe lees je dit - -- **Veel pass, weinig fail, 0 del**: gezond. Test doet wat hij moet, niemand sloopt 'm. -- **Veel fail, 0 del**: test vangt actief regressies. Goed nieuws — discipline werkt. -- **Fail én del > 0**: test wordt onder druk gezet. Coach de agent die 'm gebroken heeft (klik op het tag-icoon). -- **Snapshot rood + stabiliteit hoog**: bekende, langlopende kapotte test. Apart onderwerp, niet per se een agent-probleem. - ---- - -[← exec summary](/reports/demo) · [back to /reports](/reports) -`; -}; - -export const reportsLandingMd = (): string => `# reports - -> Per-agent TDD-discipline reporting over real project repos. The judge replays each commit on tracked branches and scores it structurally — red-fails, green-passes, no test-deletion, no regression. The scores roll up per agent over time, with trend, failure-mode breakdown, and an exec summary fit for a quarterly readout. - -This is a design preview. The pipeline that ingests real repos isn't wired yet; what you can navigate today is a mockup with synthetic data: - -- [exec summary mockup →](/reports/demo) — single page, 1 quarter, 3 agents -- [per-agent drill-down →](/reports/demo/agents/cursor) — trend, failure mix, recent flagged commits -- [tests overzicht →](/reports/demo/tests) — huidige stand per repo + test-stabiliteit per test-naam - -Want a real repo on this layer? [Register a project →](/projects) — drops \`.tdd-md.json\` at the repo root, onboards in seconds. Per-commit judging follows in the next sliver; until then registered projects show up under [/projects](/projects) but don't yet feed the report numbers. - -## what gets measured - -This layer measures **discipline**, not code-quality. Without hidden tests (those only exist on katas), tdd.md can't catch tautologies or weakened assertions on real repos. It *can* catch: - -| failure mode | what triggers it | what it costs | -|---|---|---| -| \`red-did-not-fail\` | commit tagged \`red:\` but tests pass | -5 / commit | -| \`test-deleted\` | test count drops between commits | -20 / commit | -| \`broken refactor\` | tests fail at a \`refactor:\` commit | -5 / commit | -| \`no phase tag\` | tracked-branch commit missing \`red\\|green\\|refactor:\` | counts against phase-coverage % | - -The metric pair that anchors the report is **discipline-score** (0-100) + **phase-coverage %**. An agent with 0% phase-coverage doesn't *do* TDD — its score is N/A, not 0. Don't let a low-volume non-attempt look like a high-volume slip. - -## reading the data - -For management: -- the [exec summary](/reports/demo) gives one number per agent + a narrative paragraph. Prints to one page. - -For team-leads: -- the [drill-down](/reports/demo/agents/cursor) shows trend, failure-mix, streak, and the most recent flagged commits with one-click coaching links to the [Claude Code](/blog/claude-code-tdd) / [Cursor](/blog/cursor-tdd) / [Aider](/blog/aider-tdd) posts. - -[← back to tdd.md](/) · [the blog](/blog) · [the katas](/games) -`; - -export const execSummaryMd = (): string => { - const totalCommits = DEMO_REPORTS.reduce((s, a) => s + a.commits, 0); - const tiles = DEMO_REPORTS.map(tile).join("\n"); - return `# tdd-discipline rapport · q1 2026 - -${mockBanner} - -> **Periode** ${DEMO_PERIOD} · **Scope** ${DEMO_REPOS} repos · ${totalCommits.toLocaleString()} AI-toegeschreven commits in ${escape(DEMO_ORG)}. - -
    -${tiles} -
    - -## wat veranderde dit kwartaal - -Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de \`api-gateway\` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf. - -## wat we doen - -- **Cursor in \`api-gateway\`**: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel "never delete a test in a refactor commit" gepind ([details →](/reports/demo/agents/cursor)). -- **Claude Code uitrollen**: het CLAUDE.md-template dat in \`billing-service\` werkte naar de andere drie repos kopiëren. -- **Volgende meting**: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt. - -## wat dit getal *niet* meet - -Discipline, niet code-kwaliteit. Hidden tests (zoals op de katas) bestaan niet voor productie-repos, dus *tautologische* tests en *zwak-geformuleerde* asserties blijven onzichtbaar voor de judge. Dit cijfer zegt: "de agent volgt de TDD-cyclus eerlijk". Het zegt niets over of de tests die hij schrijft het juiste beweren. Voor dat tweede signaal blijft kata-performance ([leaderboard](/leaderboard)) de proxy. - ---- - -[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports) -`; -}; - -export const agentDrilldownMd = (slug: AgentReport["slug"]): string | null => { - const a = DEMO_REPORTS.find((r) => r.slug === slug); - if (!a) return null; - const arr = trendArrow(a.delta); - const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`; - const recentRows = a.recent - .map( - (r) => - `| ${r.date} | \`${r.repo}\` | \`${r.sha}\` | ${r.phase} | ${r.failure} | ${r.pts} |`, - ) - .join("\n"); - return `# ${a.name} · drill-down - -${mockBanner} - -> Discipline-score **${a.score} / 100** ${arr.glyph} ${deltaStr} over ${DEMO_PERIOD}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage **${a.phaseCoveragePct}%**. - -## trend (30 dagen) - -
    -${sparkline(a.trend)} -
    - -${streakBox(a)} - -## failure-mode breakdown - -${bars(a.failureMix)} - -Top issue dit kwartaal: **${escape(a.topIssueLabel)}** (${a.topIssuePct}% van commits). - -## recent flagged - -| date | repo | sha | phase | failure | pts | -|---|---|---|---|---|---| -${recentRows} - -## coaching - -- ${a.slug === "claude-code" ? `[Claude Code does not do TDD by default](/blog/claude-code-tdd) — CLAUDE.md rules + fresh-context boundaries that prevent \`red-did-not-fail\`.` : a.slug === "cursor" ? `[Cursor knows how to do TDD; users skip the parts that matter](/blog/cursor-tdd) — Plan Mode, fresh chats, \`.cursor/rules\` to stop test-deletion.` : `[Aider is the closest agent to TDD on rails — until \`--auto-test\`](/blog/aider-tdd) — keep auto-test off for green commits, on for refactor.`} -- [Tweag's TDD handbook needs a judge](/blog/tweag-handbook-tdd) — why local green isn't enough. - ---- - -[← exec summary](/reports/demo) · [back to /reports](/reports) -`; -}; diff --git a/src/server.ts b/src/server.ts deleted file mode 100644 index b270b6f3eaf49d3722a1ff737872872065650ef1..0000000000000000000000000000000000000000 --- a/src/server.ts +++ /dev/null @@ -1,1378 +0,0 @@ -import { renderPage, renderNotFound } from "./render"; -import * as github from "./github_oauth"; -import * as forgejo from "./forgejo"; -import { parseCommit, computeProgress, type Phase } from "./commits"; -import { loadGame, listGames } from "./games"; -import { judge } from "./judge"; -import { latestRun, allLatestRuns, listActiveProjects, getProject, upsertProject } from "./db"; -import { - reportsLandingMd, - execSummaryMd, - agentDrilldownMd, - testsOverviewMd, - DEMO_REPORTS, -} from "./reports"; -import { - projectsLandingMd, - projectRegisterMd, - projectDetailMd, - parseRepoIdentifier, - fetchProjectConfig, -} from "./projects"; - -const HOME_MD = "./content/home.md"; -const GAME_DIR = "./content/games"; - -const BASE_URL = process.env.BASE_URL ?? "https://tdd.md"; -const CALLBACK_URL = `${BASE_URL}/auth/github/callback`; - -const HOME_DESCRIPTION = - "Test-driven development for agentic coding. Your AI agent practices on scored katas; the judge replays its commits against hidden tests and posts a public verdict on the discipline."; - -const homeBody = await Bun.file(HOME_MD).text(); -const HOME_HTML = await renderPage({ - title: "tdd.md — TDD for agentic coding", - description: HOME_DESCRIPTION, - bodyMarkdown: homeBody, - active: "home", - jsonLd: { - "@context": "https://schema.org", - "@type": "WebSite", - name: "tdd.md", - url: "https://tdd.md", - description: HOME_DESCRIPTION, - }, -}); - -const ALL_GAMES = await listGames(); - -// Agent-specific TDD walkthroughs, served at /guides/. Each entry's -// markdown body lives at content/guides/.md. Adding a new agent -// guide is two lines below + drop the .md file. -interface GuideEntry { - slug: string; - title: string; - description: string; -} - -interface BlogEntry { - slug: string; - title: string; - description: string; - // ISO date for the listing + sitemap lastmod. - date: string; -} - -const ALL_POSTS: BlogEntry[] = [ - { - slug: "tweag-handbook-tdd", - title: "Tweag's agentic TDD handbook gets the loop right — local green still isn't enough", - description: "Tweag's agentic-coding handbook describes a clean TDD loop and the right rules for AI assistants — but the validation layer it leans on (run tests, see green) misses the three failure modes most likely to show up: tautology, test deletion in refactor, and assertion weakening. Here's the gap, and what closes it.", - date: "2026-05-08", - }, - { - slug: "aider-tdd", - title: "Aider is the closest agent to TDD on rails — until you let it auto-fix", - description: "Aider's auto-commit-per-edit and bite-sized-steps philosophy make it TDD-shaped by default. Then `--auto-test` discovers it can win by deleting tests instead of fixing the impl. Here's how Aider's strengths map onto TDD, and how to keep the auto-test loop honest.", - date: "2026-05-04", - }, - { - slug: "cursor-tdd", - title: "Cursor knows how to do TDD. Most users skip the parts that matter.", - description: "Cursor's own agent best practices document a clean TDD workflow — but most users skip the features (Plan Mode, fresh conversations, .cursor/rules) that actually make it work. Here's how to put the pieces together, with a kata you can run end-to-end.", - date: "2026-05-04", - }, - { - slug: "claude-code-tdd", - title: "Claude Code does not do TDD by default — here's how to make it", - description: "Claude Code writes the test and impl in one breath, so the test never fails for the right reason. Two structural changes — CLAUDE.md rules + phase-separated sessions — get the discipline back, and tdd.md can verify it.", - date: "2026-05-04", - }, -]; - -const ALL_GUIDES: GuideEntry[] = [ - { - slug: "claude-code", - title: "TDD with Claude Code", - description: "Run TDD katas through Anthropic's Claude Code with phase-separated prompts and CLAUDE.md rules so the judge scores clean red→green→refactor cycles.", - }, - { - slug: "cursor", - title: "TDD with Cursor", - description: "Test-driven katas through Cursor — Composer per phase, project rules pinned in .cursor/rules, fresh context for red vs green.", - }, - { - slug: "aider", - title: "TDD with Aider", - description: "Aider's commit-per-edit model maps directly onto red→green→refactor — prompt with phase tags and the auto-commit carries through.", - }, -]; - -const gamesIndexBody = `# games - -${ALL_GAMES.length === 0 - ? "_No katas registered yet._" - : `| kata | description | steps |\n|---|---|---|\n${ALL_GAMES.map( - (g) => `| [${g.id}](/games/${g.id}) | ${g.description} | ${g.steps.length} |`, - ).join("\n")}` -} - -> Ready to play? [Register your agent →](/agents/register) -> Using a specific agent? See the [agent-specific guides](/guides) — Claude Code, Cursor, Aider. -`; - -const GAMES_INDEX_HTML = await renderPage({ - title: "TDD katas — tdd.md", - description: - "Browse the TDD katas. Pick a challenge, push red→green→refactor commits, and earn a public verdict graded against hidden tests.", - bodyMarkdown: gamesIndexBody, - ogPath: "https://tdd.md/games", - active: "games", -}); - -const renderKata = async (kata: string): Promise => { - const file = Bun.file(`${GAME_DIR}/${kata}/spec.md`); - if (!(await file.exists())) return null; - const md = await file.text(); - // Pull the kata's own description from spec.ts when available — it's - // the canonical short copy (rendered on /games + sitemap previews). - let description: string | undefined; - try { - const game = await loadGame(kata); - description = game.description; - } catch { - // unknown kata; use the site default - } - const html = await renderPage({ - title: `${kata} TDD kata — tdd.md`, - description, - bodyMarkdown: md, - ogPath: `https://tdd.md/games/${kata}`, - active: "games", - }); - return new Response(html, { headers: { "Content-Type": "text/html; charset=utf-8" } }); -}; - -interface ForgejoUserSummary { - id: number; - login: string; - is_admin?: boolean; - // Forgejo visibility levels: "public" | "limited" | "private". - // Anything other than "public" is hidden from anonymous tdd.md visitors. - visibility?: string; -} - -// Single-user visibility lookup for /:owner/:repo and /agents/:name. -// Returns the raw Forgejo string (or null if the user doesn't exist). -const getUserVisibility = async (name: string): Promise => { - const r = await fetch( - `${FORGEJO_INTERNAL}/api/v1/users/${encodeURIComponent(name)}`, - { headers: adminApiHeaders() }, - ); - if (!r.ok) return null; - const u = (await r.json()) as ForgejoUserSummary; - return u.visibility ?? "public"; -}; - -const renderAgentsIndex = async (): Promise => { - let users: ForgejoUserSummary[] = []; - const adminToken = process.env.FORGEJO_ADMIN_TOKEN; - if (adminToken) { - const r = await fetch(`${FORGEJO_INTERNAL}/api/v1/admin/users?limit=200`, { - headers: adminApiHeaders(), - }); - if (r.ok) users = (await r.json()) as ForgejoUserSummary[]; - } - // Drop the admin (id 1) and anyone whose visibility isn't "public" — - // private and limited agents stay invisible on the public index. - const agents = users.filter( - (u) => u.id !== 1 && !u.is_admin && (u.visibility ?? "public") === "public", - ); - - // Per-agent score totals from the latest run per repo. - const allRuns = allLatestRuns(); - const totalsByOwner = new Map(); - for (const r of allRuns) { - const t = totalsByOwner.get(r.owner) ?? { score: 0, runs: 0 }; - t.score += r.verdict.totalScore; - t.runs += 1; - totalsByOwner.set(r.owner, t); - } - - let body: string; - if (agents.length === 0) { - body = `# agents - -> No agents registered yet. Be the first. - -[ Register your agent → ](/agents/register) -`; - } else { - const rows = agents - .map((u) => { - const t = totalsByOwner.get(u.login) ?? { score: 0, runs: 0 }; - const sign = t.score >= 0 ? "+" : ""; - return `| [${u.login}](/agents/${u.login}) | ${t.runs} | ${sign}${t.score} |`; - }) - .join("\n"); - body = `# agents - -| agent | attempts | total score | -|---|---|---| -${rows} - -[ Register your agent → ](/agents/register) -`; - } - - const description = - agents.length === 0 - ? "AI agents doing test-driven development on tdd.md — registration is open, sign in with GitHub to play." - : `${agents.length} AI ${agents.length === 1 ? "agent" : "agents"} doing test-driven development on tdd.md, scored on red→green discipline against hidden tests for agentic coding.`; - - const html = await renderPage({ - title: "AI agents on tdd.md", - description, - bodyMarkdown: body, - ogPath: "https://tdd.md/agents", - active: "agents", - }); - return htmlResponse(html); -}; - -const renderLeaderboard = async (): Promise => { - // Only show runs whose owner is public. Fetch the user list once - // and build a Set so we can filter without N+1 lookups. - const adminToken = process.env.FORGEJO_ADMIN_TOKEN; - const publicOwners = new Set(); - if (adminToken) { - const r = await fetch(`${FORGEJO_INTERNAL}/api/v1/admin/users?limit=200`, { - headers: adminApiHeaders(), - }); - if (r.ok) { - const users = (await r.json()) as ForgejoUserSummary[]; - for (const u of users) { - if ((u.visibility ?? "public") === "public") publicOwners.add(u.login); - } - } - } - const runs = allLatestRuns() - .filter((r) => publicOwners.size === 0 || publicOwners.has(r.owner)) - .sort((a, b) => b.verdict.totalScore - a.verdict.totalScore); - let body: string; - if (runs.length === 0) { - body = `# leaderboard - -> No verdicts yet. The first agent to push a red→green pair lands here. - -[ Register your agent → ](/agents/register) -`; - } else { - const rows = runs - .map((r, i) => { - const sign = r.verdict.totalScore >= 0 ? "+" : ""; - const verified = r.verdict.steps.filter((s) => s.status === "verified").length; - return `| ${i + 1} | [${r.owner}](/agents/${r.owner}) | [${r.repo}](/${r.owner}/${r.repo}) | ${sign}${r.verdict.totalScore} | ${verified} |`; - }) - .join("\n"); - body = `# leaderboard - -| rank | agent | kata | score | verified steps | -|---|---|---|---|---| -${rows} -`; - } - const description = - runs.length === 0 - ? "TDD leaderboard for AI agents on tdd.md — be the first verdict." - : `Top AI agents by TDD score on tdd.md — ${runs.length} ranked ${runs.length === 1 ? "submission" : "submissions"} graded on red→green discipline and hidden test pass rate.`; - - const html = await renderPage({ - title: "TDD leaderboard — tdd.md", - description, - bodyMarkdown: body, - ogPath: "https://tdd.md/leaderboard", - active: "leaderboard", - }); - return htmlResponse(html); -}; - -const REGISTER_BODY = `# register - -> Sign in with GitHub to create your tdd.md agent. - -## what we ask GitHub for -- your username -- your primary verified email - -That's it — no repo access, no anything else. - -## what you get -- a public agent account at \`git.tdd.md/\` -- a push token (shown once) -- an empty repo for the first kata, ready to push to - -[ sign in with github → ](/auth/github/start) -`; - -const REGISTER_HTML = await renderPage({ - title: "Register your AI agent — tdd.md", - description: - "Sign in with GitHub to register your AI agent on tdd.md and start solving TDD katas. Public-signup, verified-identity, no extra forms.", - bodyMarkdown: REGISTER_BODY, - ogPath: "https://tdd.md/agents/register", - active: "agents", - noindex: true, -}); - -const htmlResponse = (html: string, status = 200) => - new Response(html, { status, headers: { "Content-Type": "text/html; charset=utf-8" } }); - -const errorPage = async (message: string, status = 400): Promise => { - const html = await renderPage({ - title: "error — tdd.md", - bodyMarkdown: `# error\n\n> ${message}\n\n[← back](/agents/register)`, - active: "agents", - }); - return htmlResponse(html, status); -}; - -const randomHex = (bytes: number): string => - Array.from(crypto.getRandomValues(new Uint8Array(bytes))) - .map((b) => b.toString(16).padStart(2, "0")) - .join(""); - -const parseCookies = (header: string | null): Record => { - const out: Record = {}; - if (!header) return out; - for (const part of header.split(";")) { - const idx = part.indexOf("="); - if (idx === -1) continue; - const name = part.slice(0, idx).trim(); - const value = part.slice(idx + 1).trim(); - if (name) out[name] = decodeURIComponent(value); - } - return out; -}; - -const timingSafeEqual = (a: string, b: string): boolean => { - if (a.length !== b.length) return false; - let r = 0; - for (let i = 0; i < a.length; i++) r |= a.charCodeAt(i) ^ b.charCodeAt(i); - return r === 0; -}; - -// 30 days. Long enough for everyday use, short enough that a leaked -// cookie doesn't grant indefinite access. -const SESSION_TTL_SEC = 30 * 24 * 60 * 60; -const SESSION_COOKIE = "tdd_session"; - -const sessionSecret = (): string => - process.env.SESSION_SECRET ?? process.env.WEBHOOK_SECRET ?? ""; - -const signSession = async (username: string): Promise => { - const exp = Math.floor(Date.now() / 1000) + SESSION_TTL_SEC; - const payload = `${username}.${exp}`; - const sig = await hmacSha256Hex(sessionSecret(), payload); - return `${payload}.${sig}`; -}; - -const verifySession = async (cookie: string): Promise => { - const parts = cookie.split("."); - if (parts.length !== 3) return null; - const [username, expStr, providedSig] = parts; - if (!username || !expStr || !providedSig) return null; - const exp = Number(expStr); - if (!Number.isFinite(exp) || exp < Math.floor(Date.now() / 1000)) return null; - const expectedSig = await hmacSha256Hex(sessionSecret(), `${username}.${expStr}`); - if (!timingSafeEqual(providedSig, expectedSig)) return null; - return username; -}; - -const getViewer = async (req: Request): Promise => { - if (!sessionSecret()) return null; - const cookies = parseCookies(req.headers.get("cookie")); - const raw = cookies[SESSION_COOKIE]; - if (!raw) return null; - return verifySession(raw); -}; - -const sessionCookieHeader = (value: string, maxAge: number): string => - `${SESSION_COOKIE}=${value}; Path=/; HttpOnly; Secure; SameSite=Lax; Max-Age=${maxAge}`; - -const hmacSha256Hex = async (secret: string, body: string): Promise => { - const key = await crypto.subtle.importKey( - "raw", - new TextEncoder().encode(secret), - { name: "HMAC", hash: "SHA-256" }, - false, - ["sign"], - ); - const sig = await crypto.subtle.sign("HMAC", key, new TextEncoder().encode(body)); - return Array.from(new Uint8Array(sig)) - .map((b) => b.toString(16).padStart(2, "0")) - .join(""); -}; - -// Forward git protocol + Forgejo API/asset requests to Forgejo via the host -// network. Lets us serve everything under tdd.md (GitHub-style) without -// exposing git.tdd.md externally. -const FORGEJO_INTERNAL = process.env.FORGEJO_URL ?? "https://git.tdd.md"; - -// Admin-token-authenticated headers for API calls. Agent repos are -// private by default; rendering the verdict page must still work. We -// proxy the data through the admin identity, never exposing the source -// or push protocol publicly. -const adminApiHeaders = (): HeadersInit => { - const token = process.env.FORGEJO_ADMIN_TOKEN; - return token ? { Authorization: `token ${token}` } : {}; -}; - -const HOP_BY_HOP = [ - "host", - "connection", - "keep-alive", - "transfer-encoding", - "upgrade", - "proxy-authorization", - "proxy-connection", - "te", - "trailer", -]; - -const proxyToForgejo = async (req: Request, pathAndQuery: string): Promise => { - const upstream = `${FORGEJO_INTERNAL}${pathAndQuery}`; - const headers = new Headers(req.headers); - for (const h of HOP_BY_HOP) headers.delete(h); - headers.set("X-Forwarded-Host", "tdd.md"); - headers.set("X-Forwarded-Proto", "https"); - headers.set("X-Forwarded-For", req.headers.get("cf-connecting-ip") ?? "0.0.0.0"); - - let body: ArrayBuffer | undefined; - if (req.method !== "GET" && req.method !== "HEAD") { - body = await req.arrayBuffer(); - } - - const upstreamRes = await fetch(upstream, { - method: req.method, - headers, - body, - redirect: "manual", - }); - - const responseHeaders = new Headers(upstreamRes.headers); - for (const h of HOP_BY_HOP) responseHeaders.delete(h); - - return new Response(upstreamRes.body, { - status: upstreamRes.status, - statusText: upstreamRes.statusText, - headers: responseHeaders, - }); -}; - -const isGitProtocol = (pathname: string, search: URLSearchParams): boolean => { - if (pathname.includes(".git/") || pathname.endsWith(".git")) return true; - if ( - pathname.endsWith("/info/refs") && - (search.get("service") === "git-upload-pack" || search.get("service") === "git-receive-pack") - ) { - return true; - } - if (pathname.endsWith("/git-upload-pack") || pathname.endsWith("/git-receive-pack")) { - return true; - } - return false; -}; - -interface ForgejoRepoSummary { - description: string; - clone_url: string; - empty: boolean; - private: boolean; -} - -interface ForgejoCommit { - sha: string; - commit: { message: string; author: { name: string; date: string } }; -} - -const phaseSpan = (p: Phase): string => { - const cls = p === "red" ? "red" : p === "green" ? "green" : p === "refactor" ? "blue" : "muted"; - return `${p}`; -}; - -const relativeTime = (iso: string): string => { - const ms = Date.now() - new Date(iso).getTime(); - if (ms < 60_000) return `${Math.max(0, Math.floor(ms / 1000))}s ago`; - if (ms < 3_600_000) return `${Math.floor(ms / 60_000)}m ago`; - if (ms < 86_400_000) return `${Math.floor(ms / 3_600_000)}h ago`; - return `${Math.floor(ms / 86_400_000)}d ago`; -}; - -const renderRepoView = async ( - owner: string, - repo: string, - viewer: string | null, -): Promise => { - // Private/limited owners get a 404 to anonymous visitors — but the - // owner themselves (verified via session cookie) can always see - // their own pages. - const ownerVisibility = await getUserVisibility(owner); - if (ownerVisibility !== null && ownerVisibility !== "public" && viewer !== owner) { - const html = await renderNotFound(`/${owner}/${repo}`); - return htmlResponse(html, 404); - } - - const repoApi = `${FORGEJO_INTERNAL}/api/v1/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}`; - const repoRes = await fetch(repoApi, { headers: adminApiHeaders() }); - if (repoRes.status === 404) { - const html = await renderNotFound(`/${owner}/${repo}`); - return htmlResponse(html, 404); - } - if (!repoRes.ok) { - const html = await renderPage({ - title: `${owner}/${repo} — tdd.md`, - bodyMarkdown: `# ${owner}/${repo}\n\n> repository unavailable`, - }); - return htmlResponse(html, 502); - } - const info = (await repoRes.json()) as ForgejoRepoSummary; - const cloneUrl = info.clone_url || `https://tdd.md/${owner}/${repo}.git`; - const isPrivate = info.private === true; - - // The repo name is by convention the kata id. If the kata exists, the - // header link is meaningful and we know the total step count. - let totalSteps: number | null = null; - let kataExists = false; - try { - const game = await loadGame(repo); - totalSteps = game.steps.length; - kataExists = true; - } catch { - // Repo isn't a known kata — still render, just without step totals. - } - - let commits: ForgejoCommit[] = []; - if (!info.empty) { - const commitsRes = await fetch(`${repoApi}/commits?limit=50&stat=false`, { - headers: adminApiHeaders(), - }); - if (commitsRes.ok) commits = (await commitsRes.json()) as ForgejoCommit[]; - } - const progress = computeProgress(commits); - const verified = progress.verifiedSteps.size; - - let status: string; - if (commits.length === 0) { - status = "awaiting first push"; - } else if (totalSteps !== null && verified >= totalSteps) { - status = "kata complete"; - } else if (verified > 0) { - status = "in progress"; - } else { - status = "no verified steps yet"; - } - const stepCounter = totalSteps !== null ? `${verified} / ${totalSteps}` : `${verified} / ?`; - - let phaseLog: string; - if (commits.length === 0) { - phaseLog = "_No commits yet — push your first `red:` commit to start the cycle._"; - } else { - const rows = commits.map((c) => { - const sha = c.sha.slice(0, 7); - const p = parseCommit(c.commit.message); - const subject = (p.subject || c.commit.message.split("\n")[0] || "").replace(/\|/g, "\\|"); - const stepCell = p.step ? `\`${p.step}\`` : "—"; - return `| \`${sha}\` | ${phaseSpan(p.phase)} | ${stepCell} | ${subject} | ${relativeTime(c.commit.author.date)} |`; - }); - phaseLog = `| sha | phase | step | message | when |\n|---|---|---|---|---|\n${rows.join("\n")}`; - } - - const kataLink = kataExists - ? `[\`${repo}\` →](/games/${repo})` - : `\`${repo}\``; - const privateBadge = isPrivate ? ` [private]` : ""; - - const verdict = latestRun(owner, repo); - const headSha = commits[0]?.sha ?? null; - const verdictStale = verdict !== null && headSha !== null && verdict.headSha !== headSha; - - let scoreSection: string; - if (verdict === null) { - scoreSection = `> Not yet judged. The next push triggers a judge run, or [run the judge now](/api/judge/${owner}/${repo}) (POST).\n\nPhase tally: red ${progress.redCount} · green ${progress.greenCount} · refactor ${progress.refactorCount}${progress.untaggedCount > 0 ? ` · untagged ${progress.untaggedCount}` : ""}.`; - } else { - const stale = verdictStale ? ` · stale — newer commits not yet judged` : ""; - const sign = verdict.totalScore >= 0 ? "+" : ""; - const statusClass = (status: string): string => { - if (status === "verified") return "green"; - if (status === "discipline-only") return "blue"; - if (status === "no-green") return "muted"; - return "red"; - }; - const modeLabel = (m: string): string => { - const cls = m === "strict" ? "red" : m === "pragmatic" ? "blue" : "green"; - return `${m}`; - }; - const rows = verdict.steps.length === 0 - ? "_No red→green pairs found yet._" - : `| step | red | green | hidden | status | points | explanation |\n|---|---|---|---|---|---|---|\n` + - verdict.steps.map((s) => { - const cls = statusClass(s.status); - const sign = s.scoreDelta >= 0 ? "+" : ""; - const hiddenCell = - s.hiddenPassed === true ? `pass` : - s.hiddenPassed === false ? `fail` : - ``; - const explanation = (s.explanation ?? "").replace(/\|/g, "\\|"); - return `| \`${s.stepId}\` | \`${s.redSha?.slice(0, 7) ?? "—"}\` | \`${s.greenSha?.slice(0, 7) ?? "—"}\` | ${hiddenCell} | ${s.status} | ${sign}${s.scoreDelta} | ${explanation} |`; - }).join("\n"); - const refactorRows = (verdict.refactors ?? []).length === 0 - ? "" - : `\n\n### refactors\n\n| sha | step | tests | points | explanation |\n|---|---|---|---|---|\n` + - verdict.refactors.map((r) => { - const sign = r.scoreDelta >= 0 ? "+" : ""; - const cls = r.testsPassed ? "green" : "red"; - const verb = r.testsPassed ? "green" : "broke tests"; - const explanation = (r.explanation ?? "").replace(/\|/g, "\\|"); - return `| \`${r.sha.slice(0, 7)}\` | ${r.stepId ? `\`${r.stepId}\`` : "—"} | ${verb} | ${sign}${r.scoreDelta} | ${explanation} |`; - }).join("\n"); - const modeLine = verdict.mode ? `**mode: ${modeLabel(verdict.mode)}** · ` : ""; - scoreSection = `${modeLine}**total: ${sign}${verdict.totalScore}** · judged ${relativeTime(new Date(verdict.judgedAt).toISOString())}${stale}\n\n${rows}${refactorRows}`; - } - - const body = `# ${owner} · playing ${kataLink}${privateBadge} - -> ${status} -> **${stepCounter}** steps verified - -## phase log - -${phaseLog} - -## score - -${scoreSection} - -## clone - -\`\`\` -git clone ${cloneUrl} -\`\`\` - -[← /agents/${owner}](/agents/${owner})${kataExists ? ` · [kata spec →](/games/${repo})` : ""} -`; - - // Dynamic description tailored to this attempt — gives every agent - // run a unique snippet for search results and social previews instead - // of falling back to the site default. - const totalSnippet = - verdict !== null - ? `, score ${verdict.totalScore >= 0 ? "+" : ""}${verdict.totalScore}` - : ""; - const description = kataExists - ? `${owner}'s ${repo} TDD kata attempt on tdd.md — ${verified}${totalSteps !== null ? `/${totalSteps}` : ""} steps verified${totalSnippet}.` - : `${owner}/${repo} on tdd.md — ${commits.length} ${commits.length === 1 ? "commit" : "commits"} in the phase log${totalSnippet}.`; - - const html = await renderPage({ - title: `${owner} · ${repo}${kataExists ? " TDD kata" : ""} — tdd.md`, - description, - bodyMarkdown: body, - ogPath: `https://tdd.md/${owner}/${repo}`, - active: "agents", - }); - return htmlResponse(html); -}; - -const port = Number(process.env.PORT ?? 3000); - -const server = Bun.serve({ - port, - routes: { - "/": htmlResponse(HOME_HTML), - "/raw": new Response(Bun.file(HOME_MD), { - headers: { "Content-Type": "text/markdown; charset=utf-8" }, - }), - "/healthz": new Response("ok"), - - "/robots.txt": new Response( - `User-agent: *\nAllow: /\nDisallow: /auth/\nDisallow: /api/\n\nSitemap: https://tdd.md/sitemap.xml\n`, - { headers: { "Content-Type": "text/plain; charset=utf-8" } }, - ), - - "/sitemap.xml": async () => { - const today = new Date().toISOString().slice(0, 10); - const url = (loc: string, priority: string) => - `${loc}${today}${priority}`; - const kataUrls = ALL_GAMES.map((g) => - url(`https://tdd.md/games/${g.id}`, "0.8"), - ).join("\n"); - const guideUrls = ALL_GUIDES.map((g) => - url(`https://tdd.md/guides/${g.slug}`, "0.8"), - ).join("\n"); - const blogUrls = ALL_POSTS.map((p) => - url(`https://tdd.md/blog/${p.slug}`, "0.8"), - ).join("\n"); - const xml = ` - -${url("https://tdd.md/", "1.0")} -${url("https://tdd.md/games", "0.9")} -${kataUrls} -${url("https://tdd.md/guides", "0.9")} -${guideUrls} -${url("https://tdd.md/blog", "0.7")} -${blogUrls} -${url("https://tdd.md/agents", "0.7")} -${url("https://tdd.md/leaderboard", "0.7")} -`; - return new Response(xml, { - headers: { "Content-Type": "application/xml; charset=utf-8" }, - }); - }, - - "/og.svg": new Response(Bun.file("./public/og.svg"), { - headers: { - "Content-Type": "image/svg+xml", - "Cache-Control": "public, max-age=3600", - }, - }), - - "/games": htmlResponse(GAMES_INDEX_HTML), - - "/blog": async () => { - const rows = ALL_POSTS - .map((p) => `| ${p.date} | [${p.title}](/blog/${p.slug}) |`) - .join("\n"); - const body = `# blog - -Notes on TDD, agentic coding, and the discipline that ties them together. - -| date | post | -|---|---| -${rows} - -> RSS feed coming when there's a second post. - -[← back to tdd.md](/) · [the guides](/guides) · [the katas](/games) -`; - const html = await renderPage({ - title: "Blog — tdd.md", - description: "Posts on test-driven development for AI coding agents — how to apply TDD with Claude Code, Cursor, and Aider, what we learn from the verdicts.", - bodyMarkdown: body, - ogPath: "https://tdd.md/blog", - active: "blog", - }); - return htmlResponse(html); - }, - - "/blog/:slug": async (req) => { - const slug = req.params.slug; - const entry = ALL_POSTS.find((p) => p.slug === slug); - if (!entry) { - const html = await renderNotFound(`/blog/${slug}`); - return htmlResponse(html, 404); - } - const file = Bun.file(`./content/blog/${slug}.md`); - if (!(await file.exists())) { - const html = await renderNotFound(`/blog/${slug}`); - return htmlResponse(html, 404); - } - const md = await file.text(); - const html = await renderPage({ - title: `${entry.title} — tdd.md`, - description: entry.description, - bodyMarkdown: md, - ogPath: `https://tdd.md/blog/${slug}`, - active: "blog", - jsonLd: { - "@context": "https://schema.org", - "@type": "BlogPosting", - headline: entry.title, - description: entry.description, - datePublished: entry.date, - url: `https://tdd.md/blog/${slug}`, - author: { "@type": "Organization", name: "tdd.md" }, - }, - }); - return htmlResponse(html); - }, - - "/projects": async () => { - const projects = listActiveProjects(); - const html = await renderPage({ - title: "Projects — tdd.md", - description: "Real repos opted in to tdd.md scoring. Each project drops .tdd-md.json at its root and gets its commits judged structurally for TDD discipline.", - bodyMarkdown: projectsLandingMd(projects), - ogPath: "https://tdd.md/projects", - }); - return htmlResponse(html); - }, - - "/projects/new": async (req) => { - const viewer = await getViewer(req); - if (req.method === "GET") { - const url = new URL(req.url); - const prefilled = url.searchParams.get("repo") ?? undefined; - const html = await renderPage({ - title: "Register a project — tdd.md", - description: "Onboard a real repo for TDD-discipline scoring. Drops .tdd-md.json at the repo root, register here, and the reports begin tracking commits on its tracked branches.", - bodyMarkdown: projectRegisterMd(viewer, prefilled), - ogPath: "https://tdd.md/projects/new", - noindex: true, - }); - return htmlResponse(html); - } - if (req.method !== "POST") return new Response("method not allowed", { status: 405 }); - if (!viewer) return new Response("unauthorized — sign in first", { status: 401 }); - - let raw = ""; - try { - const form = await req.formData(); - raw = String(form.get("repo") ?? "").trim(); - } catch { - return new Response("invalid form body", { status: 400 }); - } - - const renderError = async (message: string, status = 400): Promise => { - const html = await renderPage({ - title: "Register a project — tdd.md", - bodyMarkdown: projectRegisterMd(viewer, raw, message), - ogPath: "https://tdd.md/projects/new", - noindex: true, - }); - return htmlResponse(html, status); - }; - - let owner: string; - let repo: string; - try { - ({ owner, repo } = parseRepoIdentifier(raw)); - } catch (err) { - return renderError((err as Error).message); - } - - let config; - try { - config = await fetchProjectConfig(owner, repo); - } catch (err) { - return renderError((err as Error).message); - } - - upsertProject(viewer, owner, repo, config); - return new Response(null, { - status: 303, - headers: { Location: `/projects/${owner}/${repo}` }, - }); - }, - - "/projects/:repoOwner/:repoName": async (req) => { - const { repoOwner, repoName } = req.params; - const project = getProject(repoOwner, repoName); - if (!project) { - const html = await renderNotFound(`/projects/${repoOwner}/${repoName}`); - return htmlResponse(html, 404); - } - const html = await renderPage({ - title: `${project.displayName ?? `${project.repoOwner}/${project.repoName}`} — tdd.md`, - description: `${project.repoOwner}/${project.repoName} on tdd.md — ${project.testRunner === "none" ? "trace-mode" : project.testRunner} judging across ${project.trackedBranches.join(", ")}.`, - bodyMarkdown: projectDetailMd(project), - ogPath: `https://tdd.md/projects/${project.repoOwner}/${project.repoName}`, - }); - return htmlResponse(html); - }, - - "/reports": async () => { - const html = await renderPage({ - title: "Reports — tdd.md", - description: "Per-agent TDD-discipline reporting over real project repos: trend, failure-mode breakdown, and an exec summary fit for a quarterly readout.", - bodyMarkdown: reportsLandingMd(), - ogPath: "https://tdd.md/reports", - noindex: true, - }); - return htmlResponse(html); - }, - - "/reports/demo": async () => { - const html = await renderPage({ - title: "TDD-discipline rapport · Q1 2026 (demo) — tdd.md", - description: "Mockup of the management-level TDD-discipline report — single page, three agents, with trend and narrative.", - bodyMarkdown: execSummaryMd(), - ogPath: "https://tdd.md/reports/demo", - noindex: true, - }); - return htmlResponse(html); - }, - - "/reports/demo/tests": async () => { - const html = await renderPage({ - title: "Tests overzicht (demo) — tdd.md", - description: "Mockup of the per-test overview: current pass/fail snapshot per repo plus test stability over the quarter.", - bodyMarkdown: testsOverviewMd(), - ogPath: "https://tdd.md/reports/demo/tests", - noindex: true, - }); - return htmlResponse(html); - }, - - "/reports/demo/agents/:slug": async (req) => { - const slug = req.params.slug as (typeof DEMO_REPORTS)[number]["slug"]; - const md = agentDrilldownMd(slug); - if (!md) { - const html = await renderNotFound(`/reports/demo/agents/${slug}`); - return htmlResponse(html, 404); - } - const entry = DEMO_REPORTS.find((r) => r.slug === slug)!; - const html = await renderPage({ - title: `${entry.name} drill-down (demo) — tdd.md`, - description: `Per-agent drill-down mockup for ${entry.name}: trend, failure-mode breakdown, recent flagged commits with coaching links.`, - bodyMarkdown: md, - ogPath: `https://tdd.md/reports/demo/agents/${slug}`, - noindex: true, - }); - return htmlResponse(html); - }, - - "/guides": async () => { - const rows = ALL_GUIDES - .map((g) => `| [${g.title}](/guides/${g.slug}) | ${g.description} |`) - .join("\n"); - const body = `# guides - -Agent-specific walkthroughs for using tdd.md with the major agentic-coding tools. Each guide covers setup, prompt patterns that keep the agent in TDD, and the common pitfalls that cost score. - -| guide | what it covers | -|---|---| -${rows} - -> Missing your agent? [The mechanics are the same](/) — push commits tagged \`red:\` / \`green:\` / \`refactor:\` to your kata repo. Send a PR with a new guide and we'll list it here. - -[← play a kata](/games) · [register your agent →](/you) -`; - const html = await renderPage({ - title: "TDD guides for agentic coding tools — tdd.md", - description: "Practical TDD walkthroughs for Claude Code, Cursor, Aider and other AI coding agents — keep your agent honest with red→green→refactor commits, scored by tdd.md.", - bodyMarkdown: body, - ogPath: "https://tdd.md/guides", - active: "guides", - }); - return htmlResponse(html); - }, - - "/guides/:slug": async (req) => { - const slug = req.params.slug; - const entry = ALL_GUIDES.find((g) => g.slug === slug); - if (!entry) { - const html = await renderNotFound(`/guides/${slug}`); - return htmlResponse(html, 404); - } - const file = Bun.file(`./content/guides/${slug}.md`); - if (!(await file.exists())) { - const html = await renderNotFound(`/guides/${slug}`); - return htmlResponse(html, 404); - } - const md = await file.text(); - const html = await renderPage({ - title: `${entry.title} — tdd.md`, - description: entry.description, - bodyMarkdown: md, - ogPath: `https://tdd.md/guides/${slug}`, - active: "guides", - }); - return htmlResponse(html); - }, - "/games/:kata": async (req) => { - const res = await renderKata(req.params.kata); - if (res) return res; - const html = await renderNotFound(`/games/${req.params.kata}`); - return htmlResponse(html, 404); - }, - - "/agents": () => renderAgentsIndex(), - "/agents/register": htmlResponse(REGISTER_HTML), - "/agents/:name": async (req) => { - const name = req.params.name; - const viewer = await getViewer(req); - const userRes = await fetch(`${FORGEJO_INTERNAL}/api/v1/users/${encodeURIComponent(name)}`, { - headers: adminApiHeaders(), - }); - // Treat private/limited users as if they don't exist publicly — - // unless the logged-in viewer IS the owner. Owner can always see - // their own dashboard, public or not. - if (userRes.ok) { - const u = (await userRes.clone().json()) as ForgejoUserSummary; - const ownVisibility = u.visibility ?? "public"; - if (ownVisibility !== "public" && viewer !== name) { - const html = await renderNotFound(`/agents/${name}`); - return htmlResponse(html, 404); - } - } - if (userRes.status === 404) { - const html = await renderPage({ - title: `${name} — agents — tdd.md`, - bodyMarkdown: `# agents / ${name}\n\n> No agent registered with this name.\n\n[← all agents](/agents) · [register your own →](/agents/register)`, - ogPath: `https://tdd.md/agents/${name}`, - active: "agents", - }); - return htmlResponse(html, 404); - } - const reposRes = await fetch(`${FORGEJO_INTERNAL}/api/v1/users/${encodeURIComponent(name)}/repos?limit=50`, { - headers: adminApiHeaders(), - }); - const repos = reposRes.ok ? ((await reposRes.json()) as { name: string; description: string }[]) : []; - - const progressByRepo = await Promise.all( - repos.map(async (r) => { - const cRes = await fetch( - `${FORGEJO_INTERNAL}/api/v1/repos/${encodeURIComponent(name)}/${encodeURIComponent(r.name)}/commits?limit=50&stat=false`, - { headers: adminApiHeaders() }, - ); - const commits = cRes.ok ? ((await cRes.json()) as { commit: { message: string } }[]) : []; - return { repo: r, progress: computeProgress(commits) }; - }), - ); - - const totals: Record = {}; - for (const r of repos) { - try { - const game = await loadGame(r.name); - totals[r.name] = game.steps.length; - } catch { - // unknown kata, no total - } - } - - const isSelf = viewer === name; - let body = `# agents / ${name}\n\n`; - if (isSelf) { - body += `> Welcome back, ${name}. This is your dashboard — only you and admins see it when your profile is private.\n\n`; - } - if (repos.length === 0) { - body += "> Registered, but no kata attempts yet.\n\n[← all agents](/agents)"; - } else { - body += "## attempts\n\n"; - body += "| kata | verified | phases |\n|---|---|---|\n"; - for (const { repo: r, progress } of progressByRepo) { - const total = totals[r.name]; - const verified = progress.verifiedSteps.size; - const counter = total !== undefined ? `${verified} / ${total}` : `${verified} / ?`; - const phases = `red ${progress.redCount} · green ${progress.greenCount} · refactor ${progress.refactorCount}`; - body += `| [${r.name}](/${name}/${r.name}) | ${counter} | ${phases} |\n`; - } - } - - if (isSelf) { - body += `\n\n---\n\n[sign out](/auth/logout) · [toggle visibility](#) (POST /api/agents/${name}/visibility with your push token)`; - } - - const verifiedSteps = progressByRepo.reduce((acc, p) => acc + p.progress.verifiedSteps.size, 0); - const description = - repos.length === 0 - ? `${name} just registered on tdd.md — no kata attempts yet.` - : `${name}'s TDD attempts on tdd.md: ${repos.length} ${repos.length === 1 ? "kata" : "katas"} pushed, ${verifiedSteps} verified red→green ${verifiedSteps === 1 ? "step" : "steps"}.`; - const html = await renderPage({ - title: `${name} · TDD attempts — tdd.md`, - description, - bodyMarkdown: body, - ogPath: `https://tdd.md/agents/${name}`, - active: "agents", - }); - return htmlResponse(html); - }, - // Redirect the legacy URL to the canonical /:owner/:repo path — - // /agents/:name/:kata used to render a placeholder before the - // GitHub-style routing landed. - "/agents/:name/:kata": (req) => - Response.redirect(`/${req.params.name}/${req.params.kata}`, 301), - - "/leaderboard": () => renderLeaderboard(), - - "/api/judge/:owner/:repo": async (req) => { - if (req.method !== "POST") { - return new Response("method not allowed; POST to trigger a judge run", { status: 405 }); - } - // Manual triggers require the admin token. Push-driven runs come - // through /api/forgejo/webhook with HMAC signature verification. - const adminToken = process.env.FORGEJO_ADMIN_TOKEN; - const provided = req.headers.get("authorization")?.replace(/^[Bb]earer\s+/, "") ?? ""; - if (!adminToken || !timingSafeEqual(provided, adminToken)) { - return new Response("unauthorized — POST with `Authorization: Bearer `", { status: 401 }); - } - try { - const verdict = await judge(req.params.owner, req.params.repo); - return Response.json(verdict); - } catch (err) { - return Response.json({ error: (err as Error).message }, { status: 500 }); - } - }, - - // Self-service visibility toggle. Agent posts their push token in - // Authorization, picks "public" | "limited" | "private". We verify - // the token actually belongs to :name by hitting Forgejo's /user - // endpoint with it, then PATCH the user via admin token. - "/api/agents/:name/visibility": async (req) => { - if (req.method !== "POST") return new Response("POST only", { status: 405 }); - const name = req.params.name; - const provided = req.headers.get("authorization")?.replace(/^[Bb]earer\s+/, "") ?? ""; - if (!provided) return Response.json({ error: "missing bearer token" }, { status: 401 }); - - // Verify the token belongs to :name (or is the admin token). - const adminToken = process.env.FORGEJO_ADMIN_TOKEN ?? ""; - let allowed = adminToken && timingSafeEqual(provided, adminToken); - if (!allowed) { - const meRes = await fetch(`${FORGEJO_INTERNAL}/api/v1/user`, { - headers: { Authorization: `token ${provided}` }, - }); - if (meRes.ok) { - const me = (await meRes.json()) as { login?: string }; - allowed = me.login === name; - } - } - if (!allowed) return Response.json({ error: "token does not match agent" }, { status: 403 }); - - let body: { visibility?: string }; - try { - body = (await req.json()) as { visibility?: string }; - } catch { - return Response.json({ error: "invalid json" }, { status: 400 }); - } - const visibility = body.visibility; - if (visibility !== "public" && visibility !== "limited" && visibility !== "private") { - return Response.json( - { error: "visibility must be one of public|limited|private" }, - { status: 400 }, - ); - } - - const patchRes = await fetch( - `${FORGEJO_INTERNAL}/api/v1/admin/users/${encodeURIComponent(name)}`, - { - method: "PATCH", - headers: { ...adminApiHeaders(), "Content-Type": "application/json" }, - body: JSON.stringify({ visibility, source_id: 0, login_name: name }), - }, - ); - if (!patchRes.ok) { - const text = await patchRes.text(); - return Response.json( - { error: `forgejo PATCH failed: ${patchRes.status} ${text}` }, - { status: 502 }, - ); - } - return Response.json({ name, visibility }); - }, - - "/api/forgejo/webhook": async (req) => { - if (req.method !== "POST") return new Response("POST only", { status: 405 }); - const secret = process.env.WEBHOOK_SECRET; - if (!secret) return new Response("webhook not configured", { status: 503 }); - - const body = await req.text(); - const provided = - req.headers.get("x-forgejo-signature") ?? req.headers.get("x-gitea-signature") ?? ""; - const expected = await hmacSha256Hex(secret, body); - if (provided.length !== expected.length || !timingSafeEqual(provided, expected)) { - return new Response("invalid signature", { status: 401 }); - } - - let payload: { repository?: { owner?: { login?: string }; name?: string }; ref?: string }; - try { - payload = JSON.parse(body); - } catch { - return new Response("invalid json", { status: 400 }); - } - const owner = payload.repository?.owner?.login; - const repo = payload.repository?.name; - if (!owner || !repo) return new Response("missing owner/repo", { status: 400 }); - - // Fire the judge in the background; ack immediately so Forgejo - // doesn't time out while we're checking out commits. - void judge(owner, repo).catch((err) => { - console.error(`judge failed for ${owner}/${repo}:`, err); - }); - return Response.json({ accepted: true, owner, repo }); - }, - - "/you": async (req) => { - const viewer = await getViewer(req); - const target = viewer ? `/agents/${viewer}` : "/auth/github/start"; - return new Response(null, { status: 302, headers: { Location: target } }); - }, - - "/auth/logout": (_req) => { - // Clear the session cookie and bounce back home. - return new Response(null, { - status: 302, - headers: { - Location: "/", - "Set-Cookie": sessionCookieHeader("", 0), - }, - }); - }, - - "/auth/github/start": (_req) => { - if (!github.isConfigured() || !forgejo.isConfigured()) { - return errorPage("registration is not configured on this server", 503); - } - const nonce = randomHex(16); - return new Response(null, { - status: 302, - headers: { - Location: github.authorizeUrl(nonce, CALLBACK_URL), - "Set-Cookie": `tdd_oauth_state=${nonce}; Path=/auth; HttpOnly; Secure; SameSite=Lax; Max-Age=600`, - }, - }); - }, - - "/auth/github/callback": async (req) => { - const url = new URL(req.url); - const code = url.searchParams.get("code"); - const state = url.searchParams.get("state"); - if (!code || !state) return errorPage("missing code or state"); - - const cookies = parseCookies(req.headers.get("cookie")); - const cookieState = cookies.tdd_oauth_state; - if (!cookieState || !timingSafeEqual(cookieState, state)) { - return errorPage("state mismatch — open the registration page again and retry"); - } - - let username: string; - let email: string; - let fullName: string | null; - try { - const accessToken = await github.exchangeCode(code, CALLBACK_URL); - const user = await github.fetchUser(accessToken); - username = user.login; - fullName = user.name; - // GitHub's noreply email format: unique per account, never collides - // with another Forgejo user. We don't need a deliverable address — - // agents authenticate by token, not by email reset flow. - email = `${user.id}+${user.login}@users.noreply.github.com`; - } catch (err) { - return errorPage(`github oauth failed: ${(err as Error).message}`, 400); - } - - // Login vs register: if the user already exists in Forgejo, this - // is a returning visitor — set the session cookie, redirect to - // their dashboard, don't rotate their token. - const isExisting = await forgejo.userExists(username); - const sessionToken = await signSession(username); - const sessionCookie = sessionCookieHeader(sessionToken, SESSION_TTL_SEC); - const clearOauthState = - "tdd_oauth_state=; Path=/auth; HttpOnly; Secure; SameSite=Lax; Max-Age=0"; - - if (isExisting) { - return new Response(null, { - status: 302, - headers: new Headers([ - ["Location", `/agents/${username}`], - ["Set-Cookie", sessionCookie], - ["Set-Cookie", clearOauthState], - ]), - }); - } - - let reg: forgejo.AgentRegistration; - try { - reg = await forgejo.registerAgent({ - username, - email, - fullName: fullName ?? undefined, - }); - } catch (err) { - return errorPage(`failed to create your agent: ${(err as Error).message}`, 422); - } - - const verb = reg.isNew ? "created" : "rotated"; - const body = `# welcome, ${reg.username} - -> Your tdd.md agent has been ${verb}. **Save the token below — this page is the only time you'll see it.** If you lose it, [register again](/agents/register) to issue a fresh one (the old one will stop working). - -## push token - -\`\`\` -${reg.pushToken} -\`\`\` - -## kata: string-calc - -Your repo is at [\`git.tdd.md/${reg.username}/string-calc\`](https://git.tdd.md/${reg.username}/string-calc), already initialized with a default branch \`main\`. - -\`\`\` -git clone ${reg.repoCloneUrl} -cd string-calc - -# play the kata, commit per phase -# red: commit a failing test -# green: commit the impl that makes it pass -# refactor: commit a structural change with tests staying green - -git push -# username: ${reg.username} -# password: -\`\`\` - -When you push, the judge replays your commits and posts the verdict at [/agents/${reg.username}/string-calc](/agents/${reg.username}/string-calc). - -[← spec](/games/string-calc) · [all agents](/agents) -`; - - const html = await renderPage({ - title: `welcome ${reg.username} — tdd.md`, - bodyMarkdown: body, - active: "agents", - noindex: true, - }); - return new Response(html, { - headers: new Headers([ - ["Content-Type", "text/html; charset=utf-8"], - ["Set-Cookie", sessionCookie], - ["Set-Cookie", clearOauthState], - ]), - }); - }, - }, - - async fetch(req) { - const url = new URL(req.url); - - // Bare //.git (no sub-path) is what someone gets when - // they paste the clone URL into a browser. Without intervention our - // proxy hands it to Forgejo, which renders its own repo page — - // Forgejo's chrome leaks onto tdd.md. Redirect to the clean URL - // so the visitor lands on our Bun-native scoreboard instead. Real - // git operations always have sub-paths (/info/refs, /git-upload-pack, - // /objects/...) and continue to be proxied below. - const bareGitUrl = url.pathname.match( - /^\/([A-Za-z0-9][A-Za-z0-9-]*)\/([A-Za-z0-9][A-Za-z0-9._-]*)\.git\/?$/, - ); - if (bareGitUrl) { - return new Response(null, { - status: 302, - headers: { Location: `/${bareGitUrl[1]}/${bareGitUrl[2]}` }, - }); - } - - // Git smart-HTTP and dumb-HTTP — proxy raw to Forgejo. - if (isGitProtocol(url.pathname, url.searchParams)) { - return proxyToForgejo(req, url.pathname + url.search); - } - - // Bare repo URL: // — render Bun-native view via Forgejo API. - // Two segments only, no trailing path. Reserved top-level paths are - // already matched by explicit routes above, so they never reach here. - const repoMatch = url.pathname.match(/^\/([A-Za-z0-9][A-Za-z0-9-]*)\/([A-Za-z0-9][A-Za-z0-9._-]*)\/?$/); - if (repoMatch) { - const viewer = await getViewer(req); - return renderRepoView(repoMatch[1]!, repoMatch[2]!, viewer); - } - - const html = await renderNotFound(url.pathname); - return htmlResponse(html, 404); - }, - - error(err) { - console.error(err); - return new Response("internal error", { status: 500 }); - }, -}); - -console.log(`tdd.md → ${server.url}`);