${escape(opts.title)}

a6c69dca5ba70bf06d0d45a651e0b4120ff10685 diff --git a/.gitignore b/.gitignore index 8f5f93722836ac975b24970911d47a2273614444..a10315de6708e7ea4368a7aa128fe9e9b08de60c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ node_modules/ .env .env.local .bun-cache/ +.claude/ diff --git a/src/c21_app.ts b/src/c21_app.ts index 1ec1ba4053a322453576ce15ff0416435412cd58..2d6909b00d9bf8f5c31d70610a251c3698e53eb5 100644 --- a/src/c21_app.ts +++ b/src/c21_app.ts @@ -6,58 +6,49 @@ import { renderPage, renderNotFound, htmlResponse, - errorPage, - phaseSpan, - relativeTime, +} from "./c51_render_layout.ts"; +import { + projectsLandingMd, + projectRegisterMd, + projectDetailMd, +} from "./c51_render_projects.ts"; +import { reportsLandingMd, execSummaryMd, agentDrilldownMd, testsOverviewMd, - projectsLandingMd, - projectRegisterMd, - projectDetailMd, -} from "./c51_render.ts"; -import * as github from "./c14_github.ts"; -import * as forgejo from "./c14_forgejo.ts"; +} from "./c51_render_reports.ts"; import { FORGEJO_URL, adminApiHeaders, - getUserVisibility, proxyToForgejo, - type ForgejoUserSummary, } from "./c14_forgejo.ts"; -import { parseCommit, computeProgress } from "./c31_commits.ts"; -import { loadGame, listGames } from "./c31_games.ts"; +import { fetchProjectConfig } from "./c14_github.ts"; +import { listGames, loadGame } from "./c31_games.ts"; import { ALL_POSTS } from "./c31_blog.ts"; import { ALL_GUIDES } from "./c31_guides.ts"; import { DEMO_REPORTS } from "./c31_reports_demo.ts"; import { parseRepoIdentifier } from "./c31_project_config.ts"; -import { fetchProjectConfig } from "./c14_github.ts"; import { judge } from "./c32_judge.ts"; import { - SESSION_TTL_SEC, getViewer, - randomHex, - parseCookies, - signSession, sessionCookieHeader, timingSafeEqual, hmacSha256Hex, } from "./c32_session.ts"; import { - latestRun, - allLatestRuns, listActiveProjects, getProject, upsertProject, } from "./c13_database.ts"; +import { renderRepoView } from "./c21_handlers_repo_view.ts"; +import { renderAgentsIndex, renderAgentDetail } from "./c21_handlers_agents.ts"; +import { renderLeaderboard } from "./c21_handlers_leaderboard.ts"; +import { startGithubOauth, handleGithubCallback } from "./c21_handlers_auth.ts"; const HOME_MD = "./content/home.md"; const GAME_DIR = "./content/games"; -const BASE_URL = process.env.BASE_URL ?? "https://tdd.md"; -const CALLBACK_URL = `${BASE_URL}/auth/github/callback`; - const HOME_DESCRIPTION = "Test-driven development for agentic coding. Your AI agent practices on scored katas; the judge replays its commits against hidden tests and posts a public verdict on the discipline."; @@ -123,129 +114,6 @@ const renderKata = async (kata: string): Promise => { return new Response(html, { headers: { "Content-Type": "text/html; charset=utf-8" } }); }; -const renderAgentsIndex = async (): Promise => { - let users: ForgejoUserSummary[] = []; - const adminToken = process.env.FORGEJO_ADMIN_TOKEN; - if (adminToken) { - const r = await fetch(`${FORGEJO_URL}/api/v1/admin/users?limit=200`, { - headers: adminApiHeaders(), - }); - if (r.ok) users = (await r.json()) as ForgejoUserSummary[]; - } - // Drop the admin (id 1) and anyone whose visibility isn't "public" — - // private and limited agents stay invisible on the public index. - const agents = users.filter( - (u) => u.id !== 1 && !u.is_admin && (u.visibility ?? "public") === "public", - ); - - // Per-agent score totals from the latest run per repo. - const allRuns = allLatestRuns(); - const totalsByOwner = new Map(); - for (const r of allRuns) { - const t = totalsByOwner.get(r.owner) ?? { score: 0, runs: 0 }; - t.score += r.verdict.totalScore; - t.runs += 1; - totalsByOwner.set(r.owner, t); - } - - let body: string; - if (agents.length === 0) { - body = `# agents - -> No agents registered yet. Be the first. - -[ Register your agent → ](/agents/register) -`; - } else { - const rows = agents - .map((u) => { - const t = totalsByOwner.get(u.login) ?? { score: 0, runs: 0 }; - const sign = t.score >= 0 ? "+" : ""; - return `| [${u.login}](/agents/${u.login}) | ${t.runs} | ${sign}${t.score} |`; - }) - .join("\n"); - body = `# agents - -| agent | attempts | total score | -|---|---|---| -${rows} - -[ Register your agent → ](/agents/register) -`; - } - - const description = - agents.length === 0 - ? "AI agents doing test-driven development on tdd.md — registration is open, sign in with GitHub to play." - : `${agents.length} AI ${agents.length === 1 ? "agent" : "agents"} doing test-driven development on tdd.md, scored on red→green discipline against hidden tests for agentic coding.`; - - const html = await renderPage({ - title: "AI agents on tdd.md", - description, - bodyMarkdown: body, - ogPath: "https://tdd.md/agents", - active: "agents", - }); - return htmlResponse(html); -}; - -const renderLeaderboard = async (): Promise => { - // Only show runs whose owner is public. Fetch the user list once - // and build a Set so we can filter without N+1 lookups. - const adminToken = process.env.FORGEJO_ADMIN_TOKEN; - const publicOwners = new Set(); - if (adminToken) { - const r = await fetch(`${FORGEJO_URL}/api/v1/admin/users?limit=200`, { - headers: adminApiHeaders(), - }); - if (r.ok) { - const users = (await r.json()) as ForgejoUserSummary[]; - for (const u of users) { - if ((u.visibility ?? "public") === "public") publicOwners.add(u.login); - } - } - } - const runs = allLatestRuns() - .filter((r) => publicOwners.size === 0 || publicOwners.has(r.owner)) - .sort((a, b) => b.verdict.totalScore - a.verdict.totalScore); - let body: string; - if (runs.length === 0) { - body = `# leaderboard - -> No verdicts yet. The first agent to push a red→green pair lands here. - -[ Register your agent → ](/agents/register) -`; - } else { - const rows = runs - .map((r, i) => { - const sign = r.verdict.totalScore >= 0 ? "+" : ""; - const verified = r.verdict.steps.filter((s) => s.status === "verified").length; - return `| ${i + 1} | [${r.owner}](/agents/${r.owner}) | [${r.repo}](/${r.owner}/${r.repo}) | ${sign}${r.verdict.totalScore} | ${verified} |`; - }) - .join("\n"); - body = `# leaderboard - -| rank | agent | kata | score | verified steps | -|---|---|---|---|---| -${rows} -`; - } - const description = - runs.length === 0 - ? "TDD leaderboard for AI agents on tdd.md — be the first verdict." - : `Top AI agents by TDD score on tdd.md — ${runs.length} ranked ${runs.length === 1 ? "submission" : "submissions"} graded on red→green discipline and hidden test pass rate.`; - - const html = await renderPage({ - title: "TDD leaderboard — tdd.md", - description, - bodyMarkdown: body, - ogPath: "https://tdd.md/leaderboard", - active: "leaderboard", - }); - return htmlResponse(html); -}; - const REGISTER_BODY = `# register > Sign in with GitHub to create your tdd.md agent. @@ -274,191 +142,6 @@ const REGISTER_HTML = await renderPage({ noindex: true, }); -interface ForgejoRepoSummary { - description: string; - clone_url: string; - empty: boolean; - private: boolean; -} - -interface ForgejoCommit { - sha: string; - commit: { message: string; author: { name: string; date: string } }; -} - -const renderRepoView = async ( - owner: string, - repo: string, - viewer: string | null, -): Promise => { - // Private/limited owners get a 404 to anonymous visitors — but the - // owner themselves (verified via session cookie) can always see - // their own pages. - const ownerVisibility = await getUserVisibility(owner); - if (ownerVisibility !== null && ownerVisibility !== "public" && viewer !== owner) { - const html = await renderNotFound(`/${owner}/${repo}`); - return htmlResponse(html, 404); - } - - const repoApi = `${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}`; - const repoRes = await fetch(repoApi, { headers: adminApiHeaders() }); - if (repoRes.status === 404) { - const html = await renderNotFound(`/${owner}/${repo}`); - return htmlResponse(html, 404); - } - if (!repoRes.ok) { - const html = await renderPage({ - title: `${owner}/${repo} — tdd.md`, - bodyMarkdown: `# ${owner}/${repo}\n\n> repository unavailable`, - }); - return htmlResponse(html, 502); - } - const info = (await repoRes.json()) as ForgejoRepoSummary; - const cloneUrl = info.clone_url || `https://tdd.md/${owner}/${repo}.git`; - const isPrivate = info.private === true; - - // The repo name is by convention the kata id. If the kata exists, the - // header link is meaningful and we know the total step count. - let totalSteps: number | null = null; - let kataExists = false; - try { - const game = await loadGame(repo); - totalSteps = game.steps.length; - kataExists = true; - } catch { - // Repo isn't a known kata — still render, just without step totals. - } - - let commits: ForgejoCommit[] = []; - if (!info.empty) { - const commitsRes = await fetch(`${repoApi}/commits?limit=50&stat=false`, { - headers: adminApiHeaders(), - }); - if (commitsRes.ok) commits = (await commitsRes.json()) as ForgejoCommit[]; - } - const progress = computeProgress(commits); - const verified = progress.verifiedSteps.size; - - let status: string; - if (commits.length === 0) { - status = "awaiting first push"; - } else if (totalSteps !== null && verified >= totalSteps) { - status = "kata complete"; - } else if (verified > 0) { - status = "in progress"; - } else { - status = "no verified steps yet"; - } - const stepCounter = totalSteps !== null ? `${verified} / ${totalSteps}` : `${verified} / ?`; - - let phaseLog: string; - if (commits.length === 0) { - phaseLog = "_No commits yet — push your first `red:` commit to start the cycle._"; - } else { - const rows = commits.map((c) => { - const sha = c.sha.slice(0, 7); - const p = parseCommit(c.commit.message); - const subject = (p.subject || c.commit.message.split("\n")[0] || "").replace(/\|/g, "\\|"); - const stepCell = p.step ? `\`${p.step}\`` : "—"; - return `| \`${sha}\` | ${phaseSpan(p.phase)} | ${stepCell} | ${subject} | ${relativeTime(c.commit.author.date)} |`; - }); - phaseLog = `| sha | phase | step | message | when |\n|---|---|---|---|---|\n${rows.join("\n")}`; - } - - const kataLink = kataExists - ? `[\`${repo}\` →](/games/${repo})` - : `\`${repo}\``; - const privateBadge = isPrivate ? ` [private]` : ""; - - const verdict = latestRun(owner, repo); - const headSha = commits[0]?.sha ?? null; - const verdictStale = verdict !== null && headSha !== null && verdict.headSha !== headSha; - - let scoreSection: string; - if (verdict === null) { - scoreSection = `> Not yet judged. The next push triggers a judge run, or [run the judge now](/api/judge/${owner}/${repo}) (POST).\n\nPhase tally: red ${progress.redCount} · green ${progress.greenCount} · refactor ${progress.refactorCount}${progress.untaggedCount > 0 ? ` · untagged ${progress.untaggedCount}` : ""}.`; - } else { - const stale = verdictStale ? ` · stale — newer commits not yet judged` : ""; - const sign = verdict.totalScore >= 0 ? "+" : ""; - const statusClass = (status: string): string => { - if (status === "verified") return "green"; - if (status === "discipline-only") return "blue"; - if (status === "no-green") return "muted"; - return "red"; - }; - const modeLabel = (m: string): string => { - const cls = m === "strict" ? "red" : m === "pragmatic" ? "blue" : "green"; - return `${m}`; - }; - const rows = verdict.steps.length === 0 - ? "_No red→green pairs found yet._" - : `| step | red | green | hidden | status | points | explanation |\n|---|---|---|---|---|---|---|\n` + - verdict.steps.map((s) => { - const cls = statusClass(s.status); - const sign = s.scoreDelta >= 0 ? "+" : ""; - const hiddenCell = - s.hiddenPassed === true ? `pass` : - s.hiddenPassed === false ? `fail` : - `—`; - const explanation = (s.explanation ?? "").replace(/\|/g, "\\|"); - return `| \`${s.stepId}\` | \`${s.redSha?.slice(0, 7) ?? "—"}\` | \`${s.greenSha?.slice(0, 7) ?? "—"}\` | ${hiddenCell} | ${s.status} | ${sign}${s.scoreDelta} | ${explanation} |`; - }).join("\n"); - const refactorRows = (verdict.refactors ?? []).length === 0 - ? "" - : `\n\n### refactors\n\n| sha | step | tests | points | explanation |\n|---|---|---|---|---|\n` + - verdict.refactors.map((r) => { - const sign = r.scoreDelta >= 0 ? "+" : ""; - const cls = r.testsPassed ? "green" : "red"; - const verb = r.testsPassed ? "green" : "broke tests"; - const explanation = (r.explanation ?? "").replace(/\|/g, "\\|"); - return `| \`${r.sha.slice(0, 7)}\` | ${r.stepId ? `\`${r.stepId}\`` : "—"} | ${verb} | ${sign}${r.scoreDelta} | ${explanation} |`; - }).join("\n"); - const modeLine = verdict.mode ? `**mode: ${modeLabel(verdict.mode)}** · ` : ""; - scoreSection = `${modeLine}**total: ${sign}${verdict.totalScore}** · judged ${relativeTime(new Date(verdict.judgedAt).toISOString())}${stale}\n\n${rows}${refactorRows}`; - } - - const body = `# ${owner} · playing ${kataLink}${privateBadge} - -> ${status} -> **${stepCounter}** steps verified - -## phase log - -${phaseLog} - -## score - -${scoreSection} - -## clone - -\`\`\` -git clone ${cloneUrl} -\`\`\` - -[← /agents/${owner}](/agents/${owner})${kataExists ? ` · [kata spec →](/games/${repo})` : ""} -`; - - // Dynamic description tailored to this attempt — gives every agent - // run a unique snippet for search results and social previews instead - // of falling back to the site default. - const totalSnippet = - verdict !== null - ? `, score ${verdict.totalScore >= 0 ? "+" : ""}${verdict.totalScore}` - : ""; - const description = kataExists - ? `${owner}'s ${repo} TDD kata attempt on tdd.md — ${verified}${totalSteps !== null ? `/${totalSteps}` : ""} steps verified${totalSnippet}.` - : `${owner}/${repo} on tdd.md — ${commits.length} ${commits.length === 1 ? "commit" : "commits"} in the phase log${totalSnippet}.`; - - const html = await renderPage({ - title: `${owner} · ${repo}${kataExists ? " TDD kata" : ""} — tdd.md`, - description, - bodyMarkdown: body, - ogPath: `https://tdd.md/${owner}/${repo}`, - active: "agents", - }); - return htmlResponse(html); -}; const isGitProtocol = (pathname: string, search: URLSearchParams): boolean => { if (pathname.includes(".git/") || pathname.endsWith(".git")) return true; @@ -832,93 +515,8 @@ ${rows} "/agents": () => renderAgentsIndex(), "/agents/register": htmlResponse(REGISTER_HTML), "/agents/:name": async (req) => { - const name = req.params.name; const viewer = await getViewer(req); - const userRes = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(name)}`, { - headers: adminApiHeaders(), - }); - // Treat private/limited users as if they don't exist publicly — - // unless the logged-in viewer IS the owner. Owner can always see - // their own dashboard, public or not. - if (userRes.ok) { - const u = (await userRes.clone().json()) as ForgejoUserSummary; - const ownVisibility = u.visibility ?? "public"; - if (ownVisibility !== "public" && viewer !== name) { - const html = await renderNotFound(`/agents/${name}`); - return htmlResponse(html, 404); - } - } - if (userRes.status === 404) { - const html = await renderPage({ - title: `${name} — agents — tdd.md`, - bodyMarkdown: `# agents / ${name}\n\n> No agent registered with this name.\n\n[← all agents](/agents) · [register your own →](/agents/register)`, - ogPath: `https://tdd.md/agents/${name}`, - active: "agents", - }); - return htmlResponse(html, 404); - } - const reposRes = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(name)}/repos?limit=50`, { - headers: adminApiHeaders(), - }); - const repos = reposRes.ok ? ((await reposRes.json()) as { name: string; description: string }[]) : []; - - const progressByRepo = await Promise.all( - repos.map(async (r) => { - const cRes = await fetch( - `${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(name)}/${encodeURIComponent(r.name)}/commits?limit=50&stat=false`, - { headers: adminApiHeaders() }, - ); - const commits = cRes.ok ? ((await cRes.json()) as { commit: { message: string } }[]) : []; - return { repo: r, progress: computeProgress(commits) }; - }), - ); - - const totals: Record = {}; - for (const r of repos) { - try { - const game = await loadGame(r.name); - totals[r.name] = game.steps.length; - } catch { - // unknown kata, no total - } - } - - const isSelf = viewer === name; - let body = `# agents / ${name}\n\n`; - if (isSelf) { - body += `> Welcome back, ${name}. This is your dashboard — only you and admins see it when your profile is private.\n\n`; - } - if (repos.length === 0) { - body += "> Registered, but no kata attempts yet.\n\n[← all agents](/agents)"; - } else { - body += "## attempts\n\n"; - body += "| kata | verified | phases |\n|---|---|---|\n"; - for (const { repo: r, progress } of progressByRepo) { - const total = totals[r.name]; - const verified = progress.verifiedSteps.size; - const counter = total !== undefined ? `${verified} / ${total}` : `${verified} / ?`; - const phases = `red ${progress.redCount} · green ${progress.greenCount} · refactor ${progress.refactorCount}`; - body += `| [${r.name}](/${name}/${r.name}) | ${counter} | ${phases} |\n`; - } - } - - if (isSelf) { - body += `\n\n---\n\n[sign out](/auth/logout) · [toggle visibility](#) (POST /api/agents/${name}/visibility with your push token)`; - } - - const verifiedSteps = progressByRepo.reduce((acc, p) => acc + p.progress.verifiedSteps.size, 0); - const description = - repos.length === 0 - ? `${name} just registered on tdd.md — no kata attempts yet.` - : `${name}'s TDD attempts on tdd.md: ${repos.length} ${repos.length === 1 ? "kata" : "katas"} pushed, ${verifiedSteps} verified red→green ${verifiedSteps === 1 ? "step" : "steps"}.`; - const html = await renderPage({ - title: `${name} · TDD attempts — tdd.md`, - description, - bodyMarkdown: body, - ogPath: `https://tdd.md/agents/${name}`, - active: "agents", - }); - return htmlResponse(html); + return renderAgentDetail(req.params.name, viewer); }, // Redirect the legacy URL to the canonical /:owner/:repo path — // /agents/:name/:kata used to render a placeholder before the @@ -1051,126 +649,9 @@ ${rows} }); }, - "/auth/github/start": (_req) => { - if (!github.isConfigured() || !forgejo.isConfigured()) { - return errorPage("registration is not configured on this server", 503); - } - const nonce = randomHex(16); - return new Response(null, { - status: 302, - headers: { - Location: github.authorizeUrl(nonce, CALLBACK_URL), - "Set-Cookie": `tdd_oauth_state=${nonce}; Path=/auth; HttpOnly; Secure; SameSite=Lax; Max-Age=600`, - }, - }); - }, - - "/auth/github/callback": async (req) => { - const url = new URL(req.url); - const code = url.searchParams.get("code"); - const state = url.searchParams.get("state"); - if (!code || !state) return errorPage("missing code or state"); - - const cookies = parseCookies(req.headers.get("cookie")); - const cookieState = cookies.tdd_oauth_state; - if (!cookieState || !timingSafeEqual(cookieState, state)) { - return errorPage("state mismatch — open the registration page again and retry"); - } - - let username: string; - let email: string; - let fullName: string | null; - try { - const accessToken = await github.exchangeCode(code, CALLBACK_URL); - const user = await github.fetchUser(accessToken); - username = user.login; - fullName = user.name; - // GitHub's noreply email format: unique per account, never collides - // with another Forgejo user. We don't need a deliverable address — - // agents authenticate by token, not by email reset flow. - email = `${user.id}+${user.login}@users.noreply.github.com`; - } catch (err) { - return errorPage(`github oauth failed: ${(err as Error).message}`, 400); - } - - // Login vs register: if the user already exists in Forgejo, this - // is a returning visitor — set the session cookie, redirect to - // their dashboard, don't rotate their token. - const isExisting = await forgejo.userExists(username); - const sessionToken = await signSession(username); - const sessionCookie = sessionCookieHeader(sessionToken, SESSION_TTL_SEC); - const clearOauthState = - "tdd_oauth_state=; Path=/auth; HttpOnly; Secure; SameSite=Lax; Max-Age=0"; - - if (isExisting) { - return new Response(null, { - status: 302, - headers: new Headers([ - ["Location", `/agents/${username}`], - ["Set-Cookie", sessionCookie], - ["Set-Cookie", clearOauthState], - ]), - }); - } - - let reg: forgejo.AgentRegistration; - try { - reg = await forgejo.registerAgent({ - username, - email, - fullName: fullName ?? undefined, - }); - } catch (err) { - return errorPage(`failed to create your agent: ${(err as Error).message}`, 422); - } - - const verb = reg.isNew ? "created" : "rotated"; - const body = `# welcome, ${reg.username} - -> Your tdd.md agent has been ${verb}. **Save the token below — this page is the only time you'll see it.** If you lose it, [register again](/agents/register) to issue a fresh one (the old one will stop working). - -## push token - -\`\`\` -${reg.pushToken} -\`\`\` - -## kata: string-calc - -Your repo is at [\`git.tdd.md/${reg.username}/string-calc\`](https://git.tdd.md/${reg.username}/string-calc), already initialized with a default branch \`main\`. - -\`\`\` -git clone ${reg.repoCloneUrl} -cd string-calc - -# play the kata, commit per phase -# red: commit a failing test -# green: commit the impl that makes it pass -# refactor: commit a structural change with tests staying green + "/auth/github/start": (_req) => startGithubOauth(), -git push -# username: ${reg.username} -# password: -\`\`\` + "/auth/github/callback": async (req) => handleGithubCallback(req), -When you push, the judge replays your commits and posts the verdict at [/agents/${reg.username}/string-calc](/agents/${reg.username}/string-calc). - -[← spec](/games/string-calc) · [all agents](/agents) -`; - - const html = await renderPage({ - title: `welcome ${reg.username} — tdd.md`, - bodyMarkdown: body, - active: "agents", - noindex: true, - }); - return new Response(html, { - headers: new Headers([ - ["Content-Type", "text/html; charset=utf-8"], - ["Set-Cookie", sessionCookie], - ["Set-Cookie", clearOauthState], - ]), - }); - }, }, }); diff --git a/src/c21_handlers_agents.ts b/src/c21_handlers_agents.ts new file mode 100644 index 0000000000000000000000000000000000000000..99486b75960829dcbcfd3103418074846c212e01 --- /dev/null +++ b/src/c21_handlers_agents.ts @@ -0,0 +1,175 @@ +// c21 (agents) — handlers for /agents (index) and /agents/:name (detail). +// Both compose Forgejo admin lookups (c14) with kata progress (c31) and +// the verdict store (c13). The route table in c21_app.ts forwards the +// matching path here. + +import { + FORGEJO_URL, + adminApiHeaders, + type ForgejoUserSummary, +} from "./c14_forgejo.ts"; +import { computeProgress } from "./c31_commits.ts"; +import { loadGame } from "./c31_games.ts"; +import { allLatestRuns } from "./c13_database.ts"; +import { + renderPage, + renderNotFound, + htmlResponse, +} from "./c51_render_layout.ts"; + +export const renderAgentsIndex = async (): Promise => { + let users: ForgejoUserSummary[] = []; + const adminToken = process.env.FORGEJO_ADMIN_TOKEN; + if (adminToken) { + const r = await fetch(`${FORGEJO_URL}/api/v1/admin/users?limit=200`, { + headers: adminApiHeaders(), + }); + if (r.ok) users = (await r.json()) as ForgejoUserSummary[]; + } + // Drop the admin (id 1) and anyone whose visibility isn't "public" — + // private and limited agents stay invisible on the public index. + const agents = users.filter( + (u) => u.id !== 1 && !u.is_admin && (u.visibility ?? "public") === "public", + ); + + // Per-agent score totals from the latest run per repo. + const allRuns = allLatestRuns(); + const totalsByOwner = new Map(); + for (const r of allRuns) { + const t = totalsByOwner.get(r.owner) ?? { score: 0, runs: 0 }; + t.score += r.verdict.totalScore; + t.runs += 1; + totalsByOwner.set(r.owner, t); + } + + let body: string; + if (agents.length === 0) { + body = `# agents + +> No agents registered yet. Be the first. + +[ Register your agent → ](/agents/register) +`; + } else { + const rows = agents + .map((u) => { + const t = totalsByOwner.get(u.login) ?? { score: 0, runs: 0 }; + const sign = t.score >= 0 ? "+" : ""; + return `| [${u.login}](/agents/${u.login}) | ${t.runs} | ${sign}${t.score} |`; + }) + .join("\n"); + body = `# agents + +| agent | attempts | total score | +|---|---|---| +${rows} + +[ Register your agent → ](/agents/register) +`; + } + + const description = + agents.length === 0 + ? "AI agents doing test-driven development on tdd.md — registration is open, sign in with GitHub to play." + : `${agents.length} AI ${agents.length === 1 ? "agent" : "agents"} doing test-driven development on tdd.md, scored on red→green discipline against hidden tests for agentic coding.`; + + const html = await renderPage({ + title: "AI agents on tdd.md", + description, + bodyMarkdown: body, + ogPath: "https://tdd.md/agents", + active: "agents", + }); + return htmlResponse(html); +}; + +export const renderAgentDetail = async ( + name: string, + viewer: string | null, +): Promise => { + const userRes = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(name)}`, { + headers: adminApiHeaders(), + }); + // Treat private/limited users as if they don't exist publicly — + // unless the logged-in viewer IS the owner. Owner can always see + // their own dashboard, public or not. + if (userRes.ok) { + const u = (await userRes.clone().json()) as ForgejoUserSummary; + const ownVisibility = u.visibility ?? "public"; + if (ownVisibility !== "public" && viewer !== name) { + const html = await renderNotFound(`/agents/${name}`); + return htmlResponse(html, 404); + } + } + if (userRes.status === 404) { + const html = await renderPage({ + title: `${name} — agents — tdd.md`, + bodyMarkdown: `# agents / ${name}\n\n> No agent registered with this name.\n\n[← all agents](/agents) · [register your own →](/agents/register)`, + ogPath: `https://tdd.md/agents/${name}`, + active: "agents", + }); + return htmlResponse(html, 404); + } + const reposRes = await fetch(`${FORGEJO_URL}/api/v1/users/${encodeURIComponent(name)}/repos?limit=50`, { + headers: adminApiHeaders(), + }); + const repos = reposRes.ok ? ((await reposRes.json()) as { name: string; description: string }[]) : []; + + const progressByRepo = await Promise.all( + repos.map(async (r) => { + const cRes = await fetch( + `${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(name)}/${encodeURIComponent(r.name)}/commits?limit=50&stat=false`, + { headers: adminApiHeaders() }, + ); + const commits = cRes.ok ? ((await cRes.json()) as { commit: { message: string } }[]) : []; + return { repo: r, progress: computeProgress(commits) }; + }), + ); + + const totals: Record = {}; + for (const r of repos) { + try { + const game = await loadGame(r.name); + totals[r.name] = game.steps.length; + } catch { + // unknown kata, no total + } + } + + const isSelf = viewer === name; + let body = `# agents / ${name}\n\n`; + if (isSelf) { + body += `> Welcome back, ${name}. This is your dashboard — only you and admins see it when your profile is private.\n\n`; + } + if (repos.length === 0) { + body += "> Registered, but no kata attempts yet.\n\n[← all agents](/agents)"; + } else { + body += "## attempts\n\n"; + body += "| kata | verified | phases |\n|---|---|---|\n"; + for (const { repo: r, progress } of progressByRepo) { + const total = totals[r.name]; + const verified = progress.verifiedSteps.size; + const counter = total !== undefined ? `${verified} / ${total}` : `${verified} / ?`; + const phases = `red ${progress.redCount} · green ${progress.greenCount} · refactor ${progress.refactorCount}`; + body += `| [${r.name}](/${name}/${r.name}) | ${counter} | ${phases} |\n`; + } + } + + if (isSelf) { + body += `\n\n---\n\n[sign out](/auth/logout) · [toggle visibility](#) (POST /api/agents/${name}/visibility with your push token)`; + } + + const verifiedSteps = progressByRepo.reduce((acc, p) => acc + p.progress.verifiedSteps.size, 0); + const description = + repos.length === 0 + ? `${name} just registered on tdd.md — no kata attempts yet.` + : `${name}'s TDD attempts on tdd.md: ${repos.length} ${repos.length === 1 ? "kata" : "katas"} pushed, ${verifiedSteps} verified red→green ${verifiedSteps === 1 ? "step" : "steps"}.`; + const html = await renderPage({ + title: `${name} · TDD attempts — tdd.md`, + description, + bodyMarkdown: body, + ogPath: `https://tdd.md/agents/${name}`, + active: "agents", + }); + return htmlResponse(html); +}; diff --git a/src/c21_handlers_auth.ts b/src/c21_handlers_auth.ts new file mode 100644 index 0000000000000000000000000000000000000000..d52b5342163f503f7ab90c68123123c38b0c9cd0 --- /dev/null +++ b/src/c21_handlers_auth.ts @@ -0,0 +1,145 @@ +// c21 (auth) — GitHub OAuth start + callback handlers. Composes +// c14_github (token exchange + user fetch), c14_forgejo (existence check +// + agent registration), c32_session (sign + cookie), c51 layout for +// the welcome page rendered after first-time registration. + +import * as github from "./c14_github.ts"; +import * as forgejo from "./c14_forgejo.ts"; +import { + SESSION_TTL_SEC, + parseCookies, + signSession, + sessionCookieHeader, + timingSafeEqual, + randomHex, +} from "./c32_session.ts"; +import { renderPage, errorPage } from "./c51_render_layout.ts"; + +const BASE_URL = process.env.BASE_URL ?? "https://tdd.md"; +const CALLBACK_URL = `${BASE_URL}/auth/github/callback`; + +const CLEAR_OAUTH_STATE = + "tdd_oauth_state=; Path=/auth; HttpOnly; Secure; SameSite=Lax; Max-Age=0"; + +export const startGithubOauth = (): Response => { + if (!github.isConfigured() || !forgejo.isConfigured()) { + // errorPage is async; we wrap below. + return new Response("registration is not configured on this server", { status: 503 }); + } + const nonce = randomHex(16); + return new Response(null, { + status: 302, + headers: { + Location: github.authorizeUrl(nonce, CALLBACK_URL), + "Set-Cookie": `tdd_oauth_state=${nonce}; Path=/auth; HttpOnly; Secure; SameSite=Lax; Max-Age=600`, + }, + }); +}; + +const welcomeBody = (reg: forgejo.AgentRegistration): string => { + const verb = reg.isNew ? "created" : "rotated"; + return `# welcome, ${reg.username} + +> Your tdd.md agent has been ${verb}. **Save the token below — this page is the only time you'll see it.** If you lose it, [register again](/agents/register) to issue a fresh one (the old one will stop working). + +## push token + +\`\`\` +${reg.pushToken} +\`\`\` + +## kata: string-calc + +Your repo is at [\`git.tdd.md/${reg.username}/string-calc\`](https://git.tdd.md/${reg.username}/string-calc), already initialized with a default branch \`main\`. + +\`\`\` +git clone ${reg.repoCloneUrl} +cd string-calc + +# play the kata, commit per phase +# red: commit a failing test +# green: commit the impl that makes it pass +# refactor: commit a structural change with tests staying green + +git push +# username: ${reg.username} +# password: +\`\`\` + +When you push, the judge replays your commits and posts the verdict at [/agents/${reg.username}/string-calc](/agents/${reg.username}/string-calc). + +[← spec](/games/string-calc) · [all agents](/agents) +`; +}; + +export const handleGithubCallback = async (req: Request): Promise => { + const url = new URL(req.url); + const code = url.searchParams.get("code"); + const state = url.searchParams.get("state"); + if (!code || !state) return errorPage("missing code or state"); + + const cookies = parseCookies(req.headers.get("cookie")); + const cookieState = cookies.tdd_oauth_state; + if (!cookieState || !timingSafeEqual(cookieState, state)) { + return errorPage("state mismatch — open the registration page again and retry"); + } + + let username: string; + let email: string; + let fullName: string | null; + try { + const accessToken = await github.exchangeCode(code, CALLBACK_URL); + const user = await github.fetchUser(accessToken); + username = user.login; + fullName = user.name; + // GitHub's noreply email format: unique per account, never collides + // with another Forgejo user. We don't need a deliverable address — + // agents authenticate by token, not by email reset flow. + email = `${user.id}+${user.login}@users.noreply.github.com`; + } catch (err) { + return errorPage(`github oauth failed: ${(err as Error).message}`, 400); + } + + // Login vs register: if the user already exists in Forgejo, this + // is a returning visitor — set the session cookie, redirect to + // their dashboard, don't rotate their token. + const isExisting = await forgejo.userExists(username); + const sessionToken = await signSession(username); + const sessionCookie = sessionCookieHeader(sessionToken, SESSION_TTL_SEC); + + if (isExisting) { + return new Response(null, { + status: 302, + headers: new Headers([ + ["Location", `/agents/${username}`], + ["Set-Cookie", sessionCookie], + ["Set-Cookie", CLEAR_OAUTH_STATE], + ]), + }); + } + + let reg: forgejo.AgentRegistration; + try { + reg = await forgejo.registerAgent({ + username, + email, + fullName: fullName ?? undefined, + }); + } catch (err) { + return errorPage(`failed to create your agent: ${(err as Error).message}`, 422); + } + + const html = await renderPage({ + title: `welcome ${reg.username} — tdd.md`, + bodyMarkdown: welcomeBody(reg), + active: "agents", + noindex: true, + }); + return new Response(html, { + headers: new Headers([ + ["Content-Type", "text/html; charset=utf-8"], + ["Set-Cookie", sessionCookie], + ["Set-Cookie", CLEAR_OAUTH_STATE], + ]), + }); +}; diff --git a/src/c21_handlers_leaderboard.ts b/src/c21_handlers_leaderboard.ts new file mode 100644 index 0000000000000000000000000000000000000000..3619449414ab1afdbbc3d44cebdf2c5dc3f61007 --- /dev/null +++ b/src/c21_handlers_leaderboard.ts @@ -0,0 +1,71 @@ +// c21 (leaderboard) — handler that ranks tracked agents by their kata +// verdict totals. Forgejo admin lookup gives us the public/limited +// filter; c13 supplies the per-repo verdicts. + +import { + FORGEJO_URL, + adminApiHeaders, + type ForgejoUserSummary, +} from "./c14_forgejo.ts"; +import { allLatestRuns } from "./c13_database.ts"; +import { + renderPage, + htmlResponse, +} from "./c51_render_layout.ts"; + +export const renderLeaderboard = async (): Promise => { + // Only show runs whose owner is public. Fetch the user list once + // and build a Set so we can filter without N+1 lookups. + const adminToken = process.env.FORGEJO_ADMIN_TOKEN; + const publicOwners = new Set(); + if (adminToken) { + const r = await fetch(`${FORGEJO_URL}/api/v1/admin/users?limit=200`, { + headers: adminApiHeaders(), + }); + if (r.ok) { + const users = (await r.json()) as ForgejoUserSummary[]; + for (const u of users) { + if ((u.visibility ?? "public") === "public") publicOwners.add(u.login); + } + } + } + const runs = allLatestRuns() + .filter((r) => publicOwners.size === 0 || publicOwners.has(r.owner)) + .sort((a, b) => b.verdict.totalScore - a.verdict.totalScore); + let body: string; + if (runs.length === 0) { + body = `# leaderboard + +> No verdicts yet. The first agent to push a red→green pair lands here. + +[ Register your agent → ](/agents/register) +`; + } else { + const rows = runs + .map((r, i) => { + const sign = r.verdict.totalScore >= 0 ? "+" : ""; + const verified = r.verdict.steps.filter((s) => s.status === "verified").length; + return `| ${i + 1} | [${r.owner}](/agents/${r.owner}) | [${r.repo}](/${r.owner}/${r.repo}) | ${sign}${r.verdict.totalScore} | ${verified} |`; + }) + .join("\n"); + body = `# leaderboard + +| rank | agent | kata | score | verified steps | +|---|---|---|---|---| +${rows} +`; + } + const description = + runs.length === 0 + ? "TDD leaderboard for AI agents on tdd.md — be the first verdict." + : `Top AI agents by TDD score on tdd.md — ${runs.length} ranked ${runs.length === 1 ? "submission" : "submissions"} graded on red→green discipline and hidden test pass rate.`; + + const html = await renderPage({ + title: "TDD leaderboard — tdd.md", + description, + bodyMarkdown: body, + ogPath: "https://tdd.md/leaderboard", + active: "leaderboard", + }); + return htmlResponse(html); +}; diff --git a/src/c21_handlers_repo_view.ts b/src/c21_handlers_repo_view.ts new file mode 100644 index 0000000000000000000000000000000000000000..63dc7009a9e5b038bb9c2a500ff8e6ceb8b4c0e2 --- /dev/null +++ b/src/c21_handlers_repo_view.ts @@ -0,0 +1,207 @@ +// c21 (repo-view) — handler that renders the bare /:owner/:repo page. +// Composes c14_forgejo (repo + commits via admin API), c31 commits + +// games (parsing, kata lookup), c13 verdict store, c51 layout helpers. +// Exposed via the c21_app.ts fallback fetch — reserved top-level routes +// are matched first, this is the catch-all for //. + +import { + FORGEJO_URL, + adminApiHeaders, + getUserVisibility, +} from "./c14_forgejo.ts"; +import { parseCommit, computeProgress } from "./c31_commits.ts"; +import { loadGame } from "./c31_games.ts"; +import { latestRun } from "./c13_database.ts"; +import { + renderPage, + renderNotFound, + htmlResponse, + phaseSpan, + relativeTime, +} from "./c51_render_layout.ts"; + +interface ForgejoRepoSummary { + description: string; + clone_url: string; + empty: boolean; + private: boolean; +} + +interface ForgejoCommit { + sha: string; + commit: { message: string; author: { name: string; date: string } }; +} + +export const renderRepoView = async ( + owner: string, + repo: string, + viewer: string | null, +): Promise => { + // Private/limited owners get a 404 to anonymous visitors — but the + // owner themselves (verified via session cookie) can always see + // their own pages. + const ownerVisibility = await getUserVisibility(owner); + if (ownerVisibility !== null && ownerVisibility !== "public" && viewer !== owner) { + const html = await renderNotFound(`/${owner}/${repo}`); + return htmlResponse(html, 404); + } + + const repoApi = `${FORGEJO_URL}/api/v1/repos/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}`; + const repoRes = await fetch(repoApi, { headers: adminApiHeaders() }); + if (repoRes.status === 404) { + const html = await renderNotFound(`/${owner}/${repo}`); + return htmlResponse(html, 404); + } + if (!repoRes.ok) { + const html = await renderPage({ + title: `${owner}/${repo} — tdd.md`, + bodyMarkdown: `# ${owner}/${repo}\n\n> repository unavailable`, + }); + return htmlResponse(html, 502); + } + const info = (await repoRes.json()) as ForgejoRepoSummary; + const cloneUrl = info.clone_url || `https://tdd.md/${owner}/${repo}.git`; + const isPrivate = info.private === true; + + // The repo name is by convention the kata id. If the kata exists, the + // header link is meaningful and we know the total step count. + let totalSteps: number | null = null; + let kataExists = false; + try { + const game = await loadGame(repo); + totalSteps = game.steps.length; + kataExists = true; + } catch { + // Repo isn't a known kata — still render, just without step totals. + } + + let commits: ForgejoCommit[] = []; + if (!info.empty) { + const commitsRes = await fetch(`${repoApi}/commits?limit=50&stat=false`, { + headers: adminApiHeaders(), + }); + if (commitsRes.ok) commits = (await commitsRes.json()) as ForgejoCommit[]; + } + const progress = computeProgress(commits); + const verified = progress.verifiedSteps.size; + + let status: string; + if (commits.length === 0) { + status = "awaiting first push"; + } else if (totalSteps !== null && verified >= totalSteps) { + status = "kata complete"; + } else if (verified > 0) { + status = "in progress"; + } else { + status = "no verified steps yet"; + } + const stepCounter = totalSteps !== null ? `${verified} / ${totalSteps}` : `${verified} / ?`; + + let phaseLog: string; + if (commits.length === 0) { + phaseLog = "_No commits yet — push your first `red:` commit to start the cycle._"; + } else { + const rows = commits.map((c) => { + const sha = c.sha.slice(0, 7); + const p = parseCommit(c.commit.message); + const subject = (p.subject || c.commit.message.split("\n")[0] || "").replace(/\|/g, "\\|"); + const stepCell = p.step ? `\`${p.step}\`` : "—"; + return `| \`${sha}\` | ${phaseSpan(p.phase)} | ${stepCell} | ${subject} | ${relativeTime(c.commit.author.date)} |`; + }); + phaseLog = `| sha | phase | step | message | when |\n|---|---|---|---|---|\n${rows.join("\n")}`; + } + + const kataLink = kataExists + ? `[\`${repo}\` →](/games/${repo})` + : `\`${repo}\``; + const privateBadge = isPrivate ? ` [private]` : ""; + + const verdict = latestRun(owner, repo); + const headSha = commits[0]?.sha ?? null; + const verdictStale = verdict !== null && headSha !== null && verdict.headSha !== headSha; + + let scoreSection: string; + if (verdict === null) { + scoreSection = `> Not yet judged. The next push triggers a judge run, or [run the judge now](/api/judge/${owner}/${repo}) (POST).\n\nPhase tally: red ${progress.redCount} · green ${progress.greenCount} · refactor ${progress.refactorCount}${progress.untaggedCount > 0 ? ` · untagged ${progress.untaggedCount}` : ""}.`; + } else { + const stale = verdictStale ? ` · stale — newer commits not yet judged` : ""; + const sign = verdict.totalScore >= 0 ? "+" : ""; + const statusClass = (status: string): string => { + if (status === "verified") return "green"; + if (status === "discipline-only") return "blue"; + if (status === "no-green") return "muted"; + return "red"; + }; + const modeLabel = (m: string): string => { + const cls = m === "strict" ? "red" : m === "pragmatic" ? "blue" : "green"; + return `${m}`; + }; + const rows = verdict.steps.length === 0 + ? "_No red→green pairs found yet._" + : `| step | red | green | hidden | status | points | explanation |\n|---|---|---|---|---|---|---|\n` + + verdict.steps.map((s) => { + const cls = statusClass(s.status); + const sign = s.scoreDelta >= 0 ? "+" : ""; + const hiddenCell = + s.hiddenPassed === true ? `pass` : + s.hiddenPassed === false ? `fail` : + `—`; + const explanation = (s.explanation ?? "").replace(/\|/g, "\\|"); + return `| \`${s.stepId}\` | \`${s.redSha?.slice(0, 7) ?? "—"}\` | \`${s.greenSha?.slice(0, 7) ?? "—"}\` | ${hiddenCell} | ${s.status} | ${sign}${s.scoreDelta} | ${explanation} |`; + }).join("\n"); + const refactorRows = (verdict.refactors ?? []).length === 0 + ? "" + : `\n\n### refactors\n\n| sha | step | tests | points | explanation |\n|---|---|---|---|---|\n` + + verdict.refactors.map((r) => { + const sign = r.scoreDelta >= 0 ? "+" : ""; + const cls = r.testsPassed ? "green" : "red"; + const verb = r.testsPassed ? "green" : "broke tests"; + const explanation = (r.explanation ?? "").replace(/\|/g, "\\|"); + return `| \`${r.sha.slice(0, 7)}\` | ${r.stepId ? `\`${r.stepId}\`` : "—"} | ${verb} | ${sign}${r.scoreDelta} | ${explanation} |`; + }).join("\n"); + const modeLine = verdict.mode ? `**mode: ${modeLabel(verdict.mode)}** · ` : ""; + scoreSection = `${modeLine}**total: ${sign}${verdict.totalScore}** · judged ${relativeTime(new Date(verdict.judgedAt).toISOString())}${stale}\n\n${rows}${refactorRows}`; + } + + const body = `# ${owner} · playing ${kataLink}${privateBadge} + +> ${status} +> **${stepCounter}** steps verified + +## phase log + +${phaseLog} + +## score + +${scoreSection} + +## clone + +\`\`\` +git clone ${cloneUrl} +\`\`\` + +[← /agents/${owner}](/agents/${owner})${kataExists ? ` · [kata spec →](/games/${repo})` : ""} +`; + + // Dynamic description tailored to this attempt — gives every agent + // run a unique snippet for search results and social previews instead + // of falling back to the site default. + const totalSnippet = + verdict !== null + ? `, score ${verdict.totalScore >= 0 ? "+" : ""}${verdict.totalScore}` + : ""; + const description = kataExists + ? `${owner}'s ${repo} TDD kata attempt on tdd.md — ${verified}${totalSteps !== null ? `/${totalSteps}` : ""} steps verified${totalSnippet}.` + : `${owner}/${repo} on tdd.md — ${commits.length} ${commits.length === 1 ? "commit" : "commits"} in the phase log${totalSnippet}.`; + + const html = await renderPage({ + title: `${owner} · ${repo}${kataExists ? " TDD kata" : ""} — tdd.md`, + description, + bodyMarkdown: body, + ogPath: `https://tdd.md/${owner}/${repo}`, + active: "agents", + }); + return htmlResponse(html); +}; diff --git a/src/c51_render.ts b/src/c51_render.ts deleted file mode 100644 index 0019baaf703c93998d3f6ee40c43500351fb392b..0000000000000000000000000000000000000000 --- a/src/c51_render.ts +++ /dev/null @@ -1,528 +0,0 @@ -// c51 — UI: HTML rendering. Page chrome (renderPage / renderNotFound) -// plus all per-page body builders. Imports types from c13/c31; never -// from c11 or c21 (lower-numbered layers can be imported, higher ones -// cannot). - -import { marked } from "marked"; -import type { ProjectRow } from "./c13_database.ts"; -import { PROJECT_CONFIG_PATH } from "./c31_project_config.ts"; -import type { Phase } from "./c31_commits.ts"; -import { - DEMO_PERIOD, - DEMO_ORG, - DEMO_REPOS, - DEMO_REPORTS, - DEMO_SNAPSHOTS, - DEMO_STABILITY, - type AgentReport, - type FailureSlice, - type TestSnapshot, - type TestStability, -} from "./c31_reports_demo.ts"; - -const STYLE_CSS = "./public/style.css"; -const css = await Bun.file(STYLE_CSS).text(); - -export type Section = "home" | "games" | "guides" | "blog" | "agents" | "leaderboard"; - -export interface PageOptions { - title: string; - bodyMarkdown: string; - description?: string; - ogPath?: string; - active?: Section; - noindex?: boolean; - jsonLd?: Record; -} - -const SITE_DESCRIPTION = "Test-driven development for agentic coding. Scored katas, public verdicts."; - -const escape = (s: string): string => - s.replace(/&/g, "&").replace(/"/g, """).replace(//g, ">"); - -const navLink = (href: string, label: string, active: boolean): string => { - const cls = active ? ' class="nav-active"' : ""; - return `${label}`; -}; - -const nav = (active?: Section): string => ``; - -export const renderPage = async (opts: PageOptions): Promise => { - const body = await marked.parse(opts.bodyMarkdown, { gfm: true, breaks: false }); - const description = opts.description ?? SITE_DESCRIPTION; - const ogPath = opts.ogPath ?? "https://tdd.md"; - const robots = opts.noindex ? `\n` : ""; - const jsonLd = opts.jsonLd - ? `\n` - : ""; - return ` - - - - - - -${robots} - - - - - - - - - - - - - -${escape(opts.title)} -${jsonLd} - - -${nav(opts.active)} -

-${body} -

- -`; -}; - -export const renderNotFound = async (path: string): Promise => - renderPage({ - title: "404 — tdd.md", - bodyMarkdown: `# 404\n\n> No such path: \`${path}\`\n\nTry [home](/), [games](/games), [agents](/agents), or [leaderboard](/leaderboard).`, - noindex: true, - }); - -// --------------------------------------------------------------------- -// Small response/formatting helpers used by c21 handlers. -// --------------------------------------------------------------------- - -export const htmlResponse = (html: string, status = 200): Response => - new Response(html, { status, headers: { "Content-Type": "text/html; charset=utf-8" } }); - -export const errorPage = async (message: string, status = 400): Promise => { - const html = await renderPage({ - title: "error — tdd.md", - bodyMarkdown: `# error\n\n> ${message}\n\n[← back](/agents/register)`, - active: "agents", - }); - return htmlResponse(html, status); -}; - -export const phaseSpan = (p: Phase): string => { - const cls = p === "red" ? "red" : p === "green" ? "green" : p === "refactor" ? "blue" : "muted"; - return `${p}`; -}; - -export const relativeTime = (iso: string): string => { - const ms = Date.now() - new Date(iso).getTime(); - if (ms < 60_000) return `${Math.max(0, Math.floor(ms / 1000))}s ago`; - if (ms < 3_600_000) return `${Math.floor(ms / 60_000)}m ago`; - if (ms < 86_400_000) return `${Math.floor(ms / 3_600_000)}h ago`; - return `${Math.floor(ms / 86_400_000)}d ago`; -}; - -// --------------------------------------------------------------------- -// Body builders for /projects. -// --------------------------------------------------------------------- - -const projectListRow = (p: ProjectRow): string => { - const slug = `${p.repoOwner}/${p.repoName}`; - const display = p.displayName ?? slug; - const team = p.team ? ` · ${escape(p.team)}` : ""; - const branches = p.trackedBranches.map((b) => `\`${b}\``).join(", "); - const runner = p.testRunner === "none" ? "trace-only" : p.testRunner; - return `| [${escape(display)}](/projects/${p.repoOwner}/${p.repoName}) ${team} | ${branches} | ${runner} |`; -}; - -export const projectsLandingMd = (projects: ProjectRow[]): string => { - const rows = projects.length === 0 - ? `| _no projects yet — [register one](/projects/new)_ | | |` - : projects.map(projectListRow).join("\n"); - return `# projects - -> Real repos that opted in to tdd.md scoring. Each project drops \`${PROJECT_CONFIG_PATH}\` at its root, registers here, and from then on its commits on tracked branches get judged structurally — red-fails, green-passes, no test-deletion, no regression. The aggregated scores feed [the reports](/reports). - -## tracked - -| project | branches | runner | -|---|---|---| -${rows} - -## register a repo - -[Register a project →](/projects/new) — paste a public GitHub URL; tdd.md fetches \`${PROJECT_CONFIG_PATH}\` from the default branch and onboards it. - -## the config file - -Drop \`${PROJECT_CONFIG_PATH}\` at the root of your repo's default branch: - -\`\`\`json -{ - "version": 1, - "test_runner": "none", - "tracked_branches": ["main"], - "display_name": "API Gateway", - "team": "platform" -} -\`\`\` - -- **\`test_runner\`** — \`"none"\` for trace-mode (commit-discipline only, language-agnostic). \`"bun"\` will run the test suite once the sandbox-runner ships. -- **\`tracked_branches\`** — pushes to these branches get scored. Defaults to \`["main"]\`. -- **\`display_name\`** / **\`team\`** — optional, only used in the reporting UI. - -## what comes next - -Registration just stores the project. Per-commit judging (the part that produces score data for the reports) lands in the next sliver — until then the [report pages](/reports) keep showing the demo dataset. - -[← back to tdd.md](/) · [the reports](/reports) -`; -}; - -export const projectRegisterMd = ( - viewer: string | null, - prefilled?: string, - errorMessage?: string, -): string => { - if (!viewer) { - return `# register a project - -> You need to sign in before registering a project. We use your GitHub identity to record who onboarded the repo. - -[ sign in with github → ](/auth/github/start) - -[← all projects](/projects) -`; - } - const error = errorMessage - ? `

Couldn't register that repo:
${escape(errorMessage)}

` - : ""; - const value = prefilled ? ` value="${escape(prefilled)}"` : ""; - return `# register a project - -> Paste a public GitHub URL. tdd.md fetches \`${PROJECT_CONFIG_PATH}\` from its default branch, validates it, and onboards the repo. Re-register the same repo to refresh the config. - -${error} - - - -> Signed in as ${escape(viewer)}. Don't have \`${PROJECT_CONFIG_PATH}\` yet? [See the format on /projects](/projects#the-config-file). - -[← all projects](/projects) -`; -}; - -// --------------------------------------------------------------------- -// Body builders for /reports. -// --------------------------------------------------------------------- - -const trendArrow = (delta: number): { glyph: string; cls: string } => - delta > 0 ? { glyph: "↑", cls: "up" } : delta < 0 ? { glyph: "↓", cls: "down" } : { glyph: "→", cls: "flat" }; - -const sparkline = (values: number[], height = 60, width = 320): string => { - if (values.length === 0) return ""; - const min = Math.min(...values); - const max = Math.max(...values); - const range = Math.max(1, max - min); - const stepX = width / Math.max(1, values.length - 1); - const pad = 6; - const innerH = height - pad * 2; - const points = values - .map((v, i) => { - const x = (i * stepX).toFixed(1); - const y = (pad + innerH - ((v - min) / range) * innerH).toFixed(1); - return `${x},${y}`; - }) - .join(" "); - return ``; -}; - -const tile = (a: AgentReport): string => { - const arr = trendArrow(a.delta); - const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`; - return `

${escape(a.name)}

${a.score} / 100

${arr.glyph} ${escape(deltaStr)}

${a.commits.toLocaleString()} commits

top issue: ${escape(a.topIssueLabel)} (${a.topIssuePct}%)

`; -}; - -const bars = (mix: FailureSlice[]): string => { - const rows = mix - .map( - (s) => `

- ${escape(s.label)} - - ${s.pct}% -

`, - ) - .join("\n"); - return `

${rows}

`; -}; - -const streakBox = (a: AgentReport): string => { - const cls = a.streakBroken ? "broken" : a.streak >= 30 ? "long" : ""; - const label = a.streakBroken ? "recent break" : "consecutive clean cycles"; - return `${a.streak} ${label}`; -}; - -const mockBanner = ``; - -const snapshotBlock = (s: TestSnapshot): string => { - const failuresHtml = s.failures.length === 0 - ? `

all ${s.passing} tests groen

` - : s.failures - .map( - (f) => - `

${escape(f.test)} ${f.flaky ? "intermittent · " : ""}sinds ${f.since}

`, - ) - .concat([`

+ ${s.passing.toLocaleString()} passing tests

`]) - .join("\n"); - const statusCls = s.failing === 0 ? "ok" : "bad"; - return `

${escape(s.repo)} @ ${escape(s.branch)}

${s.total.toLocaleString()} tests · ${s.passing.toLocaleString()} passing${s.failing > 0 ? ` · ${s.failing.toLocaleString()} failing` : ""}

-${failuresHtml} - -

`; -}; - -const agentTagHtml = (slug: AgentReport["slug"]): string => { - const name = DEMO_REPORTS.find((r) => r.slug === slug)?.name ?? slug; - return `${escape(name)}`; -}; - -const stabilityRow = (s: TestStability): string => { - const cls = s.flagged ? "test-stab-row flagged" : "test-stab-row"; - const warn = s.flagged ? ` ⚠` : ""; - return ` - ${escape(s.test)}

${escape(s.repo)}

- ${s.pass} - ${s.fail} - ${s.deleted} - ${agentTagHtml(s.lastBrokenBy)}${warn} -`; -}; - -export const reportsLandingMd = (): string => `# reports - -> Per-agent TDD-discipline reporting over real project repos. The judge replays each commit on tracked branches and scores it structurally — red-fails, green-passes, no test-deletion, no regression. The scores roll up per agent over time, with trend, failure-mode breakdown, and an exec summary fit for a quarterly readout. - -This is a design preview. The pipeline that ingests real repos isn't wired yet; what you can navigate today is a mockup with synthetic data: - -- [exec summary mockup →](/reports/demo) — single page, 1 quarter, 3 agents -- [per-agent drill-down →](/reports/demo/agents/cursor) — trend, failure mix, recent flagged commits -- [tests overzicht →](/reports/demo/tests) — huidige stand per repo + test-stabiliteit per test-naam - -Want a real repo on this layer? [Register a project →](/projects) — drops \`.tdd-md.json\` at the repo root, onboards in seconds. Per-commit judging follows in the next sliver; until then registered projects show up under [/projects](/projects) but don't yet feed the report numbers. - -## what gets measured - -This layer measures **discipline**, not code-quality. Without hidden tests (those only exist on katas), tdd.md can't catch tautologies or weakened assertions on real repos. It *can* catch: - -| failure mode | what triggers it | what it costs | -|---|---|---| -| \`red-did-not-fail\` | commit tagged \`red:\` but tests pass | -5 / commit | -| \`test-deleted\` | test count drops between commits | -20 / commit | -| \`broken refactor\` | tests fail at a \`refactor:\` commit | -5 / commit | -| \`no phase tag\` | tracked-branch commit missing \`red\\|green\\|refactor:\` | counts against phase-coverage % | - -The metric pair that anchors the report is **discipline-score** (0-100) + **phase-coverage %**. An agent with 0% phase-coverage doesn't *do* TDD — its score is N/A, not 0. Don't let a low-volume non-attempt look like a high-volume slip. - -## reading the data - -For management: -- the [exec summary](/reports/demo) gives one number per agent + a narrative paragraph. Prints to one page. - -For team-leads: -- the [drill-down](/reports/demo/agents/cursor) shows trend, failure-mix, streak, and the most recent flagged commits with one-click coaching links to the [Claude Code](/blog/claude-code-tdd) / [Cursor](/blog/cursor-tdd) / [Aider](/blog/aider-tdd) posts. - -[← back to tdd.md](/) · [the blog](/blog) · [the katas](/games) -`; - -export const execSummaryMd = (): string => { - const totalCommits = DEMO_REPORTS.reduce((s, a) => s + a.commits, 0); - const tiles = DEMO_REPORTS.map(tile).join("\n"); - return `# tdd-discipline rapport · q1 2026 - -${mockBanner} - -> **Periode** ${DEMO_PERIOD} · **Scope** ${DEMO_REPOS} repos · ${totalCommits.toLocaleString()} AI-toegeschreven commits in ${escape(DEMO_ORG)}. - -

-${tiles} -

- -## wat veranderde dit kwartaal - -Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de \`api-gateway\` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf. - -## wat we doen - -- **Cursor in \`api-gateway\`**: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel "never delete a test in a refactor commit" gepind ([details →](/reports/demo/agents/cursor)). -- **Claude Code uitrollen**: het CLAUDE.md-template dat in \`billing-service\` werkte naar de andere drie repos kopiëren. -- **Volgende meting**: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt. - -## wat dit getal *niet* meet - -Discipline, niet code-kwaliteit. Hidden tests (zoals op de katas) bestaan niet voor productie-repos, dus *tautologische* tests en *zwak-geformuleerde* asserties blijven onzichtbaar voor de judge. Dit cijfer zegt: "de agent volgt de TDD-cyclus eerlijk". Het zegt niets over of de tests die hij schrijft het juiste beweren. Voor dat tweede signaal blijft kata-performance ([leaderboard](/leaderboard)) de proxy. - ---- - -[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports) -`; -}; - -export const agentDrilldownMd = (slug: AgentReport["slug"]): string | null => { - const a = DEMO_REPORTS.find((r) => r.slug === slug); - if (!a) return null; - const arr = trendArrow(a.delta); - const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`; - const recentRows = a.recent - .map( - (r) => - `| ${r.date} | \`${r.repo}\` | \`${r.sha}\` | ${r.phase} | ${r.failure} | ${r.pts} |`, - ) - .join("\n"); - return `# ${a.name} · drill-down - -${mockBanner} - -> Discipline-score **${a.score} / 100** ${arr.glyph} ${deltaStr} over ${DEMO_PERIOD}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage **${a.phaseCoveragePct}%**. - -## trend (30 dagen) - -

-${sparkline(a.trend)} -

- -${streakBox(a)} - -## failure-mode breakdown - -${bars(a.failureMix)} - -Top issue dit kwartaal: **${escape(a.topIssueLabel)}** (${a.topIssuePct}% van commits). - -## recent flagged - -| date | repo | sha | phase | failure | pts | -|---|---|---|---|---|---| -${recentRows} - -## coaching - -- ${a.slug === "claude-code" ? `[Claude Code does not do TDD by default](/blog/claude-code-tdd) — CLAUDE.md rules + fresh-context boundaries that prevent \`red-did-not-fail\`.` : a.slug === "cursor" ? `[Cursor knows how to do TDD; users skip the parts that matter](/blog/cursor-tdd) — Plan Mode, fresh chats, \`.cursor/rules\` to stop test-deletion.` : `[Aider is the closest agent to TDD on rails — until \`--auto-test\`](/blog/aider-tdd) — keep auto-test off for green commits, on for refactor.`} -- [Tweag's TDD handbook needs a judge](/blog/tweag-handbook-tdd) — why local green isn't enough. - ---- - -[← exec summary](/reports/demo) · [back to /reports](/reports) -`; -}; - -export const testsOverviewMd = (): string => { - const total = DEMO_SNAPSHOTS.reduce((s, r) => s + r.total, 0); - const passing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.passing, 0); - const failing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.failing, 0); - const snapshots = DEMO_SNAPSHOTS.map(snapshotBlock).join("\n"); - const stabRows = DEMO_STABILITY.map(stabilityRow).join("\n"); - return `# tests overzicht - -${mockBanner} - -> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${DEMO_PERIOD}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen". - -## huidige stand · per repo - -

-${snapshots} -

- -**Totaal**: ${total.toLocaleString()} tests · ${passing.toLocaleString()} passing · ${failing.toLocaleString()} failing. - -## test-stabiliteit · q1 2026 - -Top 12 meest-flappende tests dit kwartaal, met aantal pass/fail/deleted-events en de agent die de test het laatst heeft gebroken. - - - - - - - - - - - - -${stabRows} - -

test	pass	fail	del	laatst gebroken door

- -> ⚠ markeert tests waarbij dit kwartaal een test-deletion of weakening-event is gedetecteerd. In een echte setup linkt klik op een test-naam door naar de commit-historie van die specifieke test. - -## hoe lees je dit - -- **Veel pass, weinig fail, 0 del**: gezond. Test doet wat hij moet, niemand sloopt 'm. -- **Veel fail, 0 del**: test vangt actief regressies. Goed nieuws — discipline werkt. -- **Fail én del > 0**: test wordt onder druk gezet. Coach de agent die 'm gebroken heeft (klik op het tag-icoon). -- **Snapshot rood + stabiliteit hoog**: bekende, langlopende kapotte test. Apart onderwerp, niet per se een agent-probleem. - ---- - -[← exec summary](/reports/demo) · [back to /reports](/reports) -`; -}; - -// --------------------------------------------------------------------- -// Body builder for /projects/:owner/:repo. -// --------------------------------------------------------------------- - -export const projectDetailMd = (p: ProjectRow): string => { - const display = p.displayName ?? `${p.repoOwner}/${p.repoName}`; - const registeredAt = new Date(p.registeredAt).toISOString().slice(0, 10); - const branches = p.trackedBranches.map((b) => `\`${b}\``).join(", "); - const runnerNote = p.testRunner === "none" - ? "Trace-mode — judging looks at commit phase tags, test-count drift, and refactor stability. No test execution." - : "Bun runner — test suite executes in a sandbox at every tracked-branch commit. (Sandbox-runner ships in the next sliver; meanwhile this falls back to trace-mode.)"; - return `# ${escape(display)} - -> [${escape(p.repoOwner)}/${escape(p.repoName)}](https://github.com/${p.repoOwner}/${p.repoName}) · registered by [${escape(p.registeredBy)}](/agents/${p.registeredBy}) on ${registeredAt}. - -## config - -| key | value | -|---|---| -| test_runner | \`${p.testRunner}\` | -| tracked_branches | ${branches} | -| display_name | ${p.displayName ? `\`${escape(p.displayName)}\`` : "_(none)_"} | -| team | ${p.team ? `\`${escape(p.team)}\`` : "_(none)_"} | -| status | \`${p.status}\` | - -${runnerNote} - -## scored commits - -> _No commits judged yet._ The webhook ingest + judging pipeline lands in the next sliver — once it does, scored commits for tracked branches will appear here grouped by agent. - -## refresh - -Push an updated \`${PROJECT_CONFIG_PATH}\` to your default branch and [re-register](/projects/new?repo=${encodeURIComponent(`${p.repoOwner}/${p.repoName}`)}) to pick up the new config. - -[← all projects](/projects) -`; -}; diff --git a/src/c51_render_layout.ts b/src/c51_render_layout.ts new file mode 100644 index 0000000000000000000000000000000000000000..0d853e387153eff29fd1cc14610ef324225e9e2b --- /dev/null +++ b/src/c51_render_layout.ts @@ -0,0 +1,113 @@ +// c51 (layout) — UI: page chrome + small response/format helpers shared +// across every domain. Bigger per-domain body builders live next to this +// file as `c51_render_.ts` (projects, reports). Layout exports +// `escape`, `renderPage`, `renderNotFound`, `htmlResponse`, `errorPage`, +// `phaseSpan`, `relativeTime`, plus the `Section` + `PageOptions` types. +// Per the SAMA convention, lower layers don't import from this one. + +import { marked } from "marked"; +import type { Phase } from "./c31_commits.ts"; + +const STYLE_CSS = "./public/style.css"; +const css = await Bun.file(STYLE_CSS).text(); + +export type Section = "home" | "games" | "guides" | "blog" | "agents" | "leaderboard"; + +export interface PageOptions { + title: string; + bodyMarkdown: string; + description?: string; + ogPath?: string; + active?: Section; + noindex?: boolean; + jsonLd?: Record; +} + +const SITE_DESCRIPTION = "Test-driven development for agentic coding. Scored katas, public verdicts."; + +export const escape = (s: string): string => + s.replace(/&/g, "&").replace(/"/g, """).replace(//g, ">"); + +const navLink = (href: string, label: string, active: boolean): string => { + const cls = active ? ' class="nav-active"' : ""; + return `${label}`; +}; + +const nav = (active?: Section): string => ``; + +export const renderPage = async (opts: PageOptions): Promise => { + const body = await marked.parse(opts.bodyMarkdown, { gfm: true, breaks: false }); + const description = opts.description ?? SITE_DESCRIPTION; + const ogPath = opts.ogPath ?? "https://tdd.md"; + const robots = opts.noindex ? `\n` : ""; + const jsonLd = opts.jsonLd + ? `\n` + : ""; + return ` + + + + + + +${robots} + + + + + + + + + + + + + +${escape(opts.title)} +${jsonLd} + + +${nav(opts.active)} +

+${body} +

+ +`; +}; + +export const renderNotFound = async (path: string): Promise => + renderPage({ + title: "404 — tdd.md", + bodyMarkdown: `# 404\n\n> No such path: \`${path}\`\n\nTry [home](/), [games](/games), [agents](/agents), or [leaderboard](/leaderboard).`, + noindex: true, + }); + +// --------------------------------------------------------------------- +// Small response/formatting helpers used by c21 handlers + domain renders. +// --------------------------------------------------------------------- + +export const htmlResponse = (html: string, status = 200): Response => + new Response(html, { status, headers: { "Content-Type": "text/html; charset=utf-8" } }); + +export const errorPage = async (message: string, status = 400): Promise => { + const html = await renderPage({ + title: "error — tdd.md", + bodyMarkdown: `# error\n\n> ${message}\n\n[← back](/agents/register)`, + active: "agents", + }); + return htmlResponse(html, status); +}; + +export const phaseSpan = (p: Phase): string => { + const cls = p === "red" ? "red" : p === "green" ? "green" : p === "refactor" ? "blue" : "muted"; + return `${p}`; +}; + +export const relativeTime = (iso: string): string => { + const ms = Date.now() - new Date(iso).getTime(); + if (ms < 60_000) return `${Math.max(0, Math.floor(ms / 1000))}s ago`; + if (ms < 3_600_000) return `${Math.floor(ms / 60_000)}m ago`; + if (ms < 86_400_000) return `${Math.floor(ms / 3_600_000)}h ago`; + return `${Math.floor(ms / 86_400_000)}d ago`; +}; diff --git a/src/c51_render_projects.ts b/src/c51_render_projects.ts new file mode 100644 index 0000000000000000000000000000000000000000..e74463d7d53b97b95526a2eb5d59c1d2132a0ba1 --- /dev/null +++ b/src/c51_render_projects.ts @@ -0,0 +1,133 @@ +// c51 (projects) — body builders for /projects, /projects/new, +// /projects/:owner/:repo. Imports chrome helpers from c51_render_layout. + +import type { ProjectRow } from "./c13_database.ts"; +import { PROJECT_CONFIG_PATH } from "./c31_project_config.ts"; +import { escape } from "./c51_render_layout.ts"; + +const projectListRow = (p: ProjectRow): string => { + const slug = `${p.repoOwner}/${p.repoName}`; + const display = p.displayName ?? slug; + const team = p.team ? ` · ${escape(p.team)}` : ""; + const branches = p.trackedBranches.map((b) => `\`${b}\``).join(", "); + const runner = p.testRunner === "none" ? "trace-only" : p.testRunner; + return `| [${escape(display)}](/projects/${p.repoOwner}/${p.repoName}) ${team} | ${branches} | ${runner} |`; +}; + +export const projectsLandingMd = (projects: ProjectRow[]): string => { + const rows = projects.length === 0 + ? `| _no projects yet — [register one](/projects/new)_ | | |` + : projects.map(projectListRow).join("\n"); + return `# projects + +> Real repos that opted in to tdd.md scoring. Each project drops \`${PROJECT_CONFIG_PATH}\` at its root, registers here, and from then on its commits on tracked branches get judged structurally — red-fails, green-passes, no test-deletion, no regression. The aggregated scores feed [the reports](/reports). + +## tracked + +| project | branches | runner | +|---|---|---| +${rows} + +## register a repo + +[Register a project →](/projects/new) — paste a public GitHub URL; tdd.md fetches \`${PROJECT_CONFIG_PATH}\` from the default branch and onboards it. + +## the config file + +Drop \`${PROJECT_CONFIG_PATH}\` at the root of your repo's default branch: + +\`\`\`json +{ + "version": 1, + "test_runner": "none", + "tracked_branches": ["main"], + "display_name": "API Gateway", + "team": "platform" +} +\`\`\` + +- **\`test_runner\`** — \`"none"\` for trace-mode (commit-discipline only, language-agnostic). \`"bun"\` will run the test suite once the sandbox-runner ships. +- **\`tracked_branches\`** — pushes to these branches get scored. Defaults to \`["main"]\`. +- **\`display_name\`** / **\`team\`** — optional, only used in the reporting UI. + +## what comes next + +Registration just stores the project. Per-commit judging (the part that produces score data for the reports) lands in the next sliver — until then the [report pages](/reports) keep showing the demo dataset. + +[← back to tdd.md](/) · [the reports](/reports) +`; +}; + +export const projectRegisterMd = ( + viewer: string | null, + prefilled?: string, + errorMessage?: string, +): string => { + if (!viewer) { + return `# register a project + +> You need to sign in before registering a project. We use your GitHub identity to record who onboarded the repo. + +[ sign in with github → ](/auth/github/start) + +[← all projects](/projects) +`; + } + const error = errorMessage + ? `

Couldn't register that repo:
${escape(errorMessage)}

` + : ""; + const value = prefilled ? ` value="${escape(prefilled)}"` : ""; + return `# register a project + +> Paste a public GitHub URL. tdd.md fetches \`${PROJECT_CONFIG_PATH}\` from its default branch, validates it, and onboards the repo. Re-register the same repo to refresh the config. + +${error} + + + +> Signed in as ${escape(viewer)}. Don't have \`${PROJECT_CONFIG_PATH}\` yet? [See the format on /projects](/projects#the-config-file). + +[← all projects](/projects) +`; +}; + +export const projectDetailMd = (p: ProjectRow): string => { + const display = p.displayName ?? `${p.repoOwner}/${p.repoName}`; + const registeredAt = new Date(p.registeredAt).toISOString().slice(0, 10); + const branches = p.trackedBranches.map((b) => `\`${b}\``).join(", "); + const runnerNote = p.testRunner === "none" + ? "Trace-mode — judging looks at commit phase tags, test-count drift, and refactor stability. No test execution." + : "Bun runner — test suite executes in a sandbox at every tracked-branch commit. (Sandbox-runner ships in the next sliver; meanwhile this falls back to trace-mode.)"; + return `# ${escape(display)} + +> [${escape(p.repoOwner)}/${escape(p.repoName)}](https://github.com/${p.repoOwner}/${p.repoName}) · registered by [${escape(p.registeredBy)}](/agents/${p.registeredBy}) on ${registeredAt}. + +## config + +| key | value | +|---|---| +| test_runner | \`${p.testRunner}\` | +| tracked_branches | ${branches} | +| display_name | ${p.displayName ? `\`${escape(p.displayName)}\`` : "_(none)_"} | +| team | ${p.team ? `\`${escape(p.team)}\`` : "_(none)_"} | +| status | \`${p.status}\` | + +${runnerNote} + +## scored commits + +> _No commits judged yet._ The webhook ingest + judging pipeline lands in the next sliver — once it does, scored commits for tracked branches will appear here grouped by agent. + +## refresh + +Push an updated \`${PROJECT_CONFIG_PATH}\` to your default branch and [re-register](/projects/new?repo=${encodeURIComponent(`${p.repoOwner}/${p.repoName}`)}) to pick up the new config. + +[← all projects](/projects) +`; +}; diff --git a/src/c51_render_reports.ts b/src/c51_render_reports.ts new file mode 100644 index 0000000000000000000000000000000000000000..b34901f6d307c8544790e3aa228d6b2275e03988 --- /dev/null +++ b/src/c51_render_reports.ts @@ -0,0 +1,281 @@ +// c51 (reports) — body builders for /reports, /reports/demo, +// /reports/demo/agents/:slug, /reports/demo/tests. All synthetic data +// comes from c31_reports_demo; chrome helpers come from c51_render_layout. + +import { + DEMO_PERIOD, + DEMO_ORG, + DEMO_REPOS, + DEMO_REPORTS, + DEMO_SNAPSHOTS, + DEMO_STABILITY, + type AgentReport, + type FailureSlice, + type TestSnapshot, + type TestStability, +} from "./c31_reports_demo.ts"; +import { escape } from "./c51_render_layout.ts"; + +const trendArrow = (delta: number): { glyph: string; cls: string } => + delta > 0 ? { glyph: "↑", cls: "up" } : delta < 0 ? { glyph: "↓", cls: "down" } : { glyph: "→", cls: "flat" }; + +const sparkline = (values: number[], height = 60, width = 320): string => { + if (values.length === 0) return ""; + const min = Math.min(...values); + const max = Math.max(...values); + const range = Math.max(1, max - min); + const stepX = width / Math.max(1, values.length - 1); + const pad = 6; + const innerH = height - pad * 2; + const points = values + .map((v, i) => { + const x = (i * stepX).toFixed(1); + const y = (pad + innerH - ((v - min) / range) * innerH).toFixed(1); + return `${x},${y}`; + }) + .join(" "); + return ``; +}; + +const tile = (a: AgentReport): string => { + const arr = trendArrow(a.delta); + const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`; + return `

${escape(a.name)}

${a.score} / 100

${arr.glyph} ${escape(deltaStr)}

${a.commits.toLocaleString()} commits

top issue: ${escape(a.topIssueLabel)} (${a.topIssuePct}%)

`; +}; + +const bars = (mix: FailureSlice[]): string => { + const rows = mix + .map( + (s) => `

+ ${escape(s.label)} + + ${s.pct}% +

`, + ) + .join("\n"); + return `

${rows}

`; +}; + +const streakBox = (a: AgentReport): string => { + const cls = a.streakBroken ? "broken" : a.streak >= 30 ? "long" : ""; + const label = a.streakBroken ? "recent break" : "consecutive clean cycles"; + return `${a.streak} ${label}`; +}; + +const mockBanner = ``; + +const snapshotBlock = (s: TestSnapshot): string => { + const failuresHtml = s.failures.length === 0 + ? `

all ${s.passing} tests groen

` + : s.failures + .map( + (f) => + `

${escape(f.test)} ${f.flaky ? "intermittent · " : ""}sinds ${f.since}

`, + ) + .concat([`

+ ${s.passing.toLocaleString()} passing tests

`]) + .join("\n"); + const statusCls = s.failing === 0 ? "ok" : "bad"; + return `

${escape(s.repo)} @ ${escape(s.branch)}

${s.total.toLocaleString()} tests · ${s.passing.toLocaleString()} passing${s.failing > 0 ? ` · ${s.failing.toLocaleString()} failing` : ""}

+${failuresHtml} + +

`; +}; + +const agentTagHtml = (slug: AgentReport["slug"]): string => { + const name = DEMO_REPORTS.find((r) => r.slug === slug)?.name ?? slug; + return `${escape(name)}`; +}; + +const stabilityRow = (s: TestStability): string => { + const cls = s.flagged ? "test-stab-row flagged" : "test-stab-row"; + const warn = s.flagged ? ` ⚠` : ""; + return ` + ${escape(s.test)}

${escape(s.repo)}

+ ${s.pass} + ${s.fail} + ${s.deleted} + ${agentTagHtml(s.lastBrokenBy)}${warn} +`; +}; + +export const reportsLandingMd = (): string => `# reports + +> Per-agent TDD-discipline reporting over real project repos. The judge replays each commit on tracked branches and scores it structurally — red-fails, green-passes, no test-deletion, no regression. The scores roll up per agent over time, with trend, failure-mode breakdown, and an exec summary fit for a quarterly readout. + +This is a design preview. The pipeline that ingests real repos isn't wired yet; what you can navigate today is a mockup with synthetic data: + +- [exec summary mockup →](/reports/demo) — single page, 1 quarter, 3 agents +- [per-agent drill-down →](/reports/demo/agents/cursor) — trend, failure mix, recent flagged commits +- [tests overzicht →](/reports/demo/tests) — huidige stand per repo + test-stabiliteit per test-naam + +Want a real repo on this layer? [Register a project →](/projects) — drops \`.tdd-md.json\` at the repo root, onboards in seconds. Per-commit judging follows in the next sliver; until then registered projects show up under [/projects](/projects) but don't yet feed the report numbers. + +## what gets measured + +This layer measures **discipline**, not code-quality. Without hidden tests (those only exist on katas), tdd.md can't catch tautologies or weakened assertions on real repos. It *can* catch: + +| failure mode | what triggers it | what it costs | +|---|---|---| +| \`red-did-not-fail\` | commit tagged \`red:\` but tests pass | -5 / commit | +| \`test-deleted\` | test count drops between commits | -20 / commit | +| \`broken refactor\` | tests fail at a \`refactor:\` commit | -5 / commit | +| \`no phase tag\` | tracked-branch commit missing \`red\\|green\\|refactor:\` | counts against phase-coverage % | + +The metric pair that anchors the report is **discipline-score** (0-100) + **phase-coverage %**. An agent with 0% phase-coverage doesn't *do* TDD — its score is N/A, not 0. Don't let a low-volume non-attempt look like a high-volume slip. + +## reading the data + +For management: +- the [exec summary](/reports/demo) gives one number per agent + a narrative paragraph. Prints to one page. + +For team-leads: +- the [drill-down](/reports/demo/agents/cursor) shows trend, failure-mix, streak, and the most recent flagged commits with one-click coaching links to the [Claude Code](/blog/claude-code-tdd) / [Cursor](/blog/cursor-tdd) / [Aider](/blog/aider-tdd) posts. + +[← back to tdd.md](/) · [the blog](/blog) · [the katas](/games) +`; + +export const execSummaryMd = (): string => { + const totalCommits = DEMO_REPORTS.reduce((s, a) => s + a.commits, 0); + const tiles = DEMO_REPORTS.map(tile).join("\n"); + return `# tdd-discipline rapport · q1 2026 + +${mockBanner} + +> **Periode** ${DEMO_PERIOD} · **Scope** ${DEMO_REPOS} repos · ${totalCommits.toLocaleString()} AI-toegeschreven commits in ${escape(DEMO_ORG)}. + +

+${tiles} +

+ +## wat veranderde dit kwartaal + +Cursor's score zakte 15 punten nadat agent-mode in maart default werd; test-deletion-incidenten stegen van 2% naar 14% van refactor-commits, geconcentreerd in de \`api-gateway\` repo. Claude Code's score steeg na invoering van phase-getagde commit-prefix in CLAUDE.md aan het einde van januari. Aider blijft stabiel hoog — auto-commit-per-edit voorkomt het meeste cross-phase bedrog vanzelf. + +## wat we doen + +- **Cursor in \`api-gateway\`**: agent-mode gedeactiveerd voor refactor-prompts, CONVENTIONS-regel "never delete a test in a refactor commit" gepind ([details →](/reports/demo/agents/cursor)). +- **Claude Code uitrollen**: het CLAUDE.md-template dat in \`billing-service\` werkte naar de andere drie repos kopiëren. +- **Volgende meting**: 2026-04-30, mid-Q2, om te zien of de Cursor-fix vasthoudt. + +## wat dit getal *niet* meet + +Discipline, niet code-kwaliteit. Hidden tests (zoals op de katas) bestaan niet voor productie-repos, dus *tautologische* tests en *zwak-geformuleerde* asserties blijven onzichtbaar voor de judge. Dit cijfer zegt: "de agent volgt de TDD-cyclus eerlijk". Het zegt niets over of de tests die hij schrijft het juiste beweren. Voor dat tweede signaal blijft kata-performance ([leaderboard](/leaderboard)) de proxy. + +--- + +[per-agent drill-down: Claude Code](/reports/demo/agents/claude-code) · [Cursor](/reports/demo/agents/cursor) · [Aider](/reports/demo/agents/aider) · [tests overzicht](/reports/demo/tests) · [back to /reports](/reports) +`; +}; + +export const agentDrilldownMd = (slug: AgentReport["slug"]): string | null => { + const a = DEMO_REPORTS.find((r) => r.slug === slug); + if (!a) return null; + const arr = trendArrow(a.delta); + const deltaStr = a.delta > 0 ? `+${a.delta}` : `${a.delta}`; + const recentRows = a.recent + .map( + (r) => + `| ${r.date} | \`${r.repo}\` | \`${r.sha}\` | ${r.phase} | ${r.failure} | ${r.pts} |`, + ) + .join("\n"); + return `# ${a.name} · drill-down + +${mockBanner} + +> Discipline-score **${a.score} / 100** ${arr.glyph} ${deltaStr} over ${DEMO_PERIOD}. ${a.commits.toLocaleString()} commits geanalyseerd, phase-coverage **${a.phaseCoveragePct}%**. + +## trend (30 dagen) + +

+${sparkline(a.trend)} +

+ +${streakBox(a)} + +## failure-mode breakdown + +${bars(a.failureMix)} + +Top issue dit kwartaal: **${escape(a.topIssueLabel)}** (${a.topIssuePct}% van commits). + +## recent flagged + +| date | repo | sha | phase | failure | pts | +|---|---|---|---|---|---| +${recentRows} + +## coaching + +- ${a.slug === "claude-code" ? `[Claude Code does not do TDD by default](/blog/claude-code-tdd) — CLAUDE.md rules + fresh-context boundaries that prevent \`red-did-not-fail\`.` : a.slug === "cursor" ? `[Cursor knows how to do TDD; users skip the parts that matter](/blog/cursor-tdd) — Plan Mode, fresh chats, \`.cursor/rules\` to stop test-deletion.` : `[Aider is the closest agent to TDD on rails — until \`--auto-test\`](/blog/aider-tdd) — keep auto-test off for green commits, on for refactor.`} +- [Tweag's TDD handbook needs a judge](/blog/tweag-handbook-tdd) — why local green isn't enough. + +--- + +[← exec summary](/reports/demo) · [back to /reports](/reports) +`; +}; + +export const testsOverviewMd = (): string => { + const total = DEMO_SNAPSHOTS.reduce((s, r) => s + r.total, 0); + const passing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.passing, 0); + const failing = DEMO_SNAPSHOTS.reduce((s, r) => s + r.failing, 0); + const snapshots = DEMO_SNAPSHOTS.map(snapshotBlock).join("\n"); + const stabRows = DEMO_STABILITY.map(stabilityRow).join("\n"); + return `# tests overzicht + +${mockBanner} + +> Snapshot van de huidige test-stand per repo + stabiliteit van individuele tests over ${DEMO_PERIOD}. Een hoge fail-count zonder deletion betekent dat de test echte regressies vangt; hoge fail+deletion is het signaal dat een test onder druk komt te staan — vaak het spoor van een agent die het makkelijker maakt zichzelf te laten "winnen". + +## huidige stand · per repo + +

+${snapshots} +

+ +**Totaal**: ${total.toLocaleString()} tests · ${passing.toLocaleString()} passing · ${failing.toLocaleString()} failing. + +## test-stabiliteit · q1 2026 + +Top 12 meest-flappende tests dit kwartaal, met aantal pass/fail/deleted-events en de agent die de test het laatst heeft gebroken. + + + + + + + + + + + + +${stabRows} + +

test	pass	fail	del	laatst gebroken door

+ +> ⚠ markeert tests waarbij dit kwartaal een test-deletion of weakening-event is gedetecteerd. In een echte setup linkt klik op een test-naam door naar de commit-historie van die specifieke test. + +## hoe lees je dit + +- **Veel pass, weinig fail, 0 del**: gezond. Test doet wat hij moet, niemand sloopt 'm. +- **Veel fail, 0 del**: test vangt actief regressies. Goed nieuws — discipline werkt. +- **Fail én del > 0**: test wordt onder druk gezet. Coach de agent die 'm gebroken heeft (klik op het tag-icoon). +- **Snapshot rood + stabiliteit hoog**: bekende, langlopende kapotte test. Apart onderwerp, niet per se een agent-probleem. + +--- + +[← exec summary](/reports/demo) · [back to /reports](/reports) +`; +};