Implement /sitemap.xml per SAMA v2 — Layer 1 helper + sibling test
The previous inline sitemap had three correctness bugs that the new helper fixes: - every URL got `<lastmod>` = today (so search engines saw the entire site as freshly modified on every crawl). Now blog URLs carry their actual `date` field; other URLs omit `<lastmod>`. - no XML escape on `<loc>` values, so an `&` in a URL would produce invalid XML. - all XML construction lived at Layer 3 (d21), violating §1.2. New shape: - Layer 0: SITE_BASE_URL added to a31_site_config.ts. - Layer 1: b32_sitemap.ts — `renderSitemap(urls)` is pure, no I/O. Eleven STATIC_PATHS const matches the /goal load-bearing list. - Layer 1 sibling test: 12 cases — XML escape (& < > " '), empty list, single URL with/without lastmod, order preservation, determinism, declaration/closing tag, STATIC_PATHS shape. - Layer 3: handler closure imports registries + helper, emits `Cache-Control: public, max-age=3600`. URL coverage: - 11 static (per /goal) - ALL_POSTS → /blog/<slug> with lastmod = date - ALL_SAMA → /sama/<slug> - ALL_GUIDES → /guides/<slug> Drops /agents and per-game URLs from previous inline sitemap — neither was in the /goal's enumerated list. Adds /sama/v2, /sama/v2/verify, /sama/v2/example-crud, /sama/v2/example-wordpress. Tests: 379/379 pass (367 → 379, +12 new). robots.txt already references the sitemap (unchanged). Co-Authored-By: Claude Opus 4.7 <[email protected]>
4 files changed · +200 −32
src/a31_site_config.ts
+4
−0
| @@ -3,6 +3,10 @@ | ||
| 3 | 3 | // reports/live, sitemap, etc.) reference the same values without |
| 4 | 4 | // circular imports between c21_handlers_*. |
| 5 | 5 | |
| 6 | +// Canonical absolute base for every public URL the site emits | |
| 7 | +// (sitemap, og:url, canonical link, RSS, etc.). No trailing slash. | |
| 8 | +export const SITE_BASE_URL = "https://tdd.md"; | |
| 9 | + | |
| 6 | 10 | export const LIVE_REPO_OWNER = "syntaxai"; |
| 7 | 11 | export const LIVE_REPO_NAME = "tdd.md"; |
| 8 | 12 | // Number of recent commits the live-reports view samples from the |
src/b32_sitemap.test.ts
+113
−0
| @@ -0,0 +1,113 @@ | ||
| 1 | +import { describe, expect, test } from "bun:test"; | |
| 2 | +import { | |
| 3 | + escapeXml, | |
| 4 | + renderSitemap, | |
| 5 | + STATIC_PATHS, | |
| 6 | + type SitemapUrl, | |
| 7 | +} from "./b32_sitemap.ts"; | |
| 8 | + | |
| 9 | +describe("escapeXml", () => { | |
| 10 | + test("escapes the five named entities", () => { | |
| 11 | + expect(escapeXml("&")).toBe("&"); | |
| 12 | + expect(escapeXml("<")).toBe("<"); | |
| 13 | + expect(escapeXml(">")).toBe(">"); | |
| 14 | + expect(escapeXml('"')).toBe("""); | |
| 15 | + expect(escapeXml("'")).toBe("'"); | |
| 16 | + }); | |
| 17 | + | |
| 18 | + test("leaves regular URL characters untouched", () => { | |
| 19 | + const s = "https://tdd.md/blog/sama-v2-workingset-cross-repo-baseline"; | |
| 20 | + expect(escapeXml(s)).toBe(s); | |
| 21 | + }); | |
| 22 | + | |
| 23 | + test("ampersand always escapes first (no double-escape)", () => { | |
| 24 | + expect(escapeXml("a & b < c")).toBe("a & b < c"); | |
| 25 | + }); | |
| 26 | +}); | |
| 27 | + | |
| 28 | +describe("renderSitemap", () => { | |
| 29 | + test("empty list → valid urlset with no <url> children", () => { | |
| 30 | + const xml = renderSitemap([]); | |
| 31 | + expect(xml).toBe(`<?xml version="1.0" encoding="UTF-8"?> | |
| 32 | +<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> | |
| 33 | +</urlset>`); | |
| 34 | + }); | |
| 35 | + | |
| 36 | + test("single URL with lastmod", () => { | |
| 37 | + const xml = renderSitemap([ | |
| 38 | + { loc: "https://tdd.md/blog/x", lastmod: "2026-05-25" }, | |
| 39 | + ]); | |
| 40 | + expect(xml).toBe(`<?xml version="1.0" encoding="UTF-8"?> | |
| 41 | +<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> | |
| 42 | + <url><loc>https://tdd.md/blog/x</loc><lastmod>2026-05-25</lastmod></url> | |
| 43 | +</urlset>`); | |
| 44 | + }); | |
| 45 | + | |
| 46 | + test("single URL without lastmod omits the <lastmod> element", () => { | |
| 47 | + const xml = renderSitemap([{ loc: "https://tdd.md/sama" }]); | |
| 48 | + expect(xml).toContain("<url><loc>https://tdd.md/sama</loc></url>"); | |
| 49 | + expect(xml).not.toContain("<lastmod>"); | |
| 50 | + }); | |
| 51 | + | |
| 52 | + test("multiple URLs preserve input order", () => { | |
| 53 | + const xml = renderSitemap([ | |
| 54 | + { loc: "https://tdd.md/a" }, | |
| 55 | + { loc: "https://tdd.md/b" }, | |
| 56 | + { loc: "https://tdd.md/c" }, | |
| 57 | + ]); | |
| 58 | + const aIdx = xml.indexOf("/a</loc>"); | |
| 59 | + const bIdx = xml.indexOf("/b</loc>"); | |
| 60 | + const cIdx = xml.indexOf("/c</loc>"); | |
| 61 | + expect(aIdx).toBeGreaterThan(-1); | |
| 62 | + expect(aIdx).toBeLessThan(bIdx); | |
| 63 | + expect(bIdx).toBeLessThan(cIdx); | |
| 64 | + }); | |
| 65 | + | |
| 66 | + test("XML-escapes & and < inside <loc> values", () => { | |
| 67 | + const xml = renderSitemap([ | |
| 68 | + { loc: "https://tdd.md/q?a=1&b=2" }, | |
| 69 | + { loc: "https://tdd.md/<weird>" }, | |
| 70 | + ]); | |
| 71 | + expect(xml).toContain("<loc>https://tdd.md/q?a=1&b=2</loc>"); | |
| 72 | + expect(xml).toContain("<loc>https://tdd.md/<weird></loc>"); | |
| 73 | + expect(xml).not.toContain("a=1&b=2"); | |
| 74 | + }); | |
| 75 | + | |
| 76 | + test("opens with the XML declaration and closes with </urlset>", () => { | |
| 77 | + const xml = renderSitemap([{ loc: "https://tdd.md/" }]); | |
| 78 | + expect(xml.startsWith('<?xml version="1.0" encoding="UTF-8"?>')).toBe(true); | |
| 79 | + expect(xml.endsWith("</urlset>")).toBe(true); | |
| 80 | + }); | |
| 81 | + | |
| 82 | + test("deterministic — same input twice → byte-identical output", () => { | |
| 83 | + const urls: ReadonlyArray<SitemapUrl> = [ | |
| 84 | + { loc: "https://tdd.md/", lastmod: "2026-05-25" }, | |
| 85 | + { loc: "https://tdd.md/blog" }, | |
| 86 | + ]; | |
| 87 | + expect(renderSitemap(urls)).toBe(renderSitemap(urls)); | |
| 88 | + }); | |
| 89 | +}); | |
| 90 | + | |
| 91 | +describe("STATIC_PATHS", () => { | |
| 92 | + test("covers the eleven load-bearing routes from the /goal", () => { | |
| 93 | + expect(STATIC_PATHS).toEqual([ | |
| 94 | + "/", | |
| 95 | + "/blog", | |
| 96 | + "/games", | |
| 97 | + "/leaderboard", | |
| 98 | + "/sama", | |
| 99 | + "/sama/v2", | |
| 100 | + "/sama/v2/verify", | |
| 101 | + "/sama/v2/example-crud", | |
| 102 | + "/sama/v2/example-wordpress", | |
| 103 | + "/sama/skill", | |
| 104 | + "/guides", | |
| 105 | + ]); | |
| 106 | + }); | |
| 107 | + | |
| 108 | + test("each path is absolute (starts with /)", () => { | |
| 109 | + for (const p of STATIC_PATHS) { | |
| 110 | + expect(p.startsWith("/")).toBe(true); | |
| 111 | + } | |
| 112 | + }); | |
| 113 | +}); | |
src/b32_sitemap.ts
+53
−0
| @@ -0,0 +1,53 @@ | ||
| 1 | +// b32 — Layer 1 pure helper: render a sitemaps.org 0.9 urlset. | |
| 2 | +// No I/O. Deterministic: same input array → same output bytes. | |
| 3 | +// Caller (d21_app.ts) composes the URL list from ALL_POSTS, | |
| 4 | +// ALL_SAMA, ALL_GUIDES, and STATIC_PATHS, then asks this module | |
| 5 | +// for the XML string. Sibling test pins escape behaviour and | |
| 6 | +// shape; the verifier's §4.3 modeled-tests check requires it. | |
| 7 | + | |
| 8 | +export interface SitemapUrl { | |
| 9 | + readonly loc: string; | |
| 10 | + readonly lastmod?: string; | |
| 11 | +} | |
| 12 | + | |
| 13 | +// The eleven load-bearing static URLs that don't come from a | |
| 14 | +// registry. Each must correspond to a literal route registered | |
| 15 | +// in d21_app.ts; the handler iterates this list verbatim. | |
| 16 | +export const STATIC_PATHS: ReadonlyArray<string> = [ | |
| 17 | + "/", | |
| 18 | + "/blog", | |
| 19 | + "/games", | |
| 20 | + "/leaderboard", | |
| 21 | + "/sama", | |
| 22 | + "/sama/v2", | |
| 23 | + "/sama/v2/verify", | |
| 24 | + "/sama/v2/example-crud", | |
| 25 | + "/sama/v2/example-wordpress", | |
| 26 | + "/sama/skill", | |
| 27 | + "/guides", | |
| 28 | +]; | |
| 29 | + | |
| 30 | +// Minimal XML 1.0 escape for character data + attribute values. | |
| 31 | +// The five named entities are the canonical set; anything else | |
| 32 | +// is left as-is (the sitemap spec requires UTF-8, not ASCII). | |
| 33 | +export const escapeXml = (s: string): string => | |
| 34 | + s | |
| 35 | + .replace(/&/g, "&") | |
| 36 | + .replace(/</g, "<") | |
| 37 | + .replace(/>/g, ">") | |
| 38 | + .replace(/"/g, """) | |
| 39 | + .replace(/'/g, "'"); | |
| 40 | + | |
| 41 | +const renderUrl = (u: SitemapUrl): string => { | |
| 42 | + const loc = `<loc>${escapeXml(u.loc)}</loc>`; | |
| 43 | + const lastmod = | |
| 44 | + u.lastmod !== undefined ? `<lastmod>${escapeXml(u.lastmod)}</lastmod>` : ""; | |
| 45 | + return ` <url>${loc}${lastmod}</url>`; | |
| 46 | +}; | |
| 47 | + | |
| 48 | +export const renderSitemap = (urls: ReadonlyArray<SitemapUrl>): string => { | |
| 49 | + const body = urls.map(renderUrl).join("\n"); | |
| 50 | + const inner = body.length > 0 ? `\n${body}\n` : "\n"; | |
| 51 | + return `<?xml version="1.0" encoding="UTF-8"?> | |
| 52 | +<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">${inner}</urlset>`; | |
| 53 | +}; | |
src/d21_app.ts
+30
−32
| @@ -12,6 +12,12 @@ import { listGames, loadGame } from "./a31_games.ts"; | ||
| 12 | 12 | import { ALL_POSTS } from "./a31_blog.ts"; |
| 13 | 13 | import { ALL_GUIDES } from "./a31_guides.ts"; |
| 14 | 14 | import { ALL_SAMA } from "./a31_sama.ts"; |
| 15 | +import { SITE_BASE_URL } from "./a31_site_config.ts"; | |
| 16 | +import { | |
| 17 | + renderSitemap, | |
| 18 | + STATIC_PATHS, | |
| 19 | + type SitemapUrl, | |
| 20 | +} from "./b32_sitemap.ts"; | |
| 15 | 21 | import { |
| 16 | 22 | getViewer, |
| 17 | 23 | sessionCookieHeader, |
| @@ -181,39 +187,31 @@ export const createApp = (port: number) => Bun.serve({ | ||
| 181 | 187 | { headers: { "Content-Type": "text/plain; charset=utf-8" } }, |
| 182 | 188 | ), |
| 183 | 189 | |
| 184 | - "/sitemap.xml": async () => { | |
| 185 | - const today = new Date().toISOString().slice(0, 10); | |
| 186 | - const url = (loc: string, priority: string) => | |
| 187 | - `<url><loc>${loc}</loc><lastmod>${today}</lastmod><priority>${priority}</priority></url>`; | |
| 188 | - const kataUrls = ALL_GAMES.map((g) => | |
| 189 | - url(`https://tdd.md/games/${g.id}`, "0.8"), | |
| 190 | - ).join("\n"); | |
| 191 | - const guideUrls = ALL_GUIDES.map((g) => | |
| 192 | - url(`https://tdd.md/guides/${g.slug}`, "0.8"), | |
| 193 | - ).join("\n"); | |
| 194 | - const samaUrls = ALL_SAMA.map((d) => | |
| 195 | - url(`https://tdd.md/sama/${d.slug}`, "0.8"), | |
| 196 | - ).join("\n"); | |
| 197 | - const blogUrls = ALL_POSTS.map((p) => | |
| 198 | - url(`https://tdd.md/blog/${p.slug}`, "0.8"), | |
| 199 | - ).join("\n"); | |
| 200 | - const xml = `<?xml version="1.0" encoding="UTF-8"?> | |
| 201 | -<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> | |
| 202 | -${url("https://tdd.md/", "1.0")} | |
| 203 | -${url("https://tdd.md/games", "0.9")} | |
| 204 | -${kataUrls} | |
| 205 | -${url("https://tdd.md/guides", "0.9")} | |
| 206 | -${guideUrls} | |
| 207 | -${url("https://tdd.md/sama", "0.9")} | |
| 208 | -${samaUrls} | |
| 209 | -${url("https://tdd.md/sama/skill", "0.8")} | |
| 210 | -${url("https://tdd.md/blog", "0.7")} | |
| 211 | -${blogUrls} | |
| 212 | -${url("https://tdd.md/agents", "0.7")} | |
| 213 | -${url("https://tdd.md/leaderboard", "0.7")} | |
| 214 | -</urlset>`; | |
| 190 | + "/sitemap.xml": () => { | |
| 191 | + const staticUrls: SitemapUrl[] = STATIC_PATHS.map((p) => ({ | |
| 192 | + loc: `${SITE_BASE_URL}${p}`, | |
| 193 | + })); | |
| 194 | + const blogUrls: SitemapUrl[] = ALL_POSTS.map((p) => ({ | |
| 195 | + loc: `${SITE_BASE_URL}/blog/${p.slug}`, | |
| 196 | + lastmod: p.date, | |
| 197 | + })); | |
| 198 | + const samaUrls: SitemapUrl[] = ALL_SAMA.map((d) => ({ | |
| 199 | + loc: `${SITE_BASE_URL}/sama/${d.slug}`, | |
| 200 | + })); | |
| 201 | + const guideUrls: SitemapUrl[] = ALL_GUIDES.map((g) => ({ | |
| 202 | + loc: `${SITE_BASE_URL}/guides/${g.slug}`, | |
| 203 | + })); | |
| 204 | + const xml = renderSitemap([ | |
| 205 | + ...staticUrls, | |
| 206 | + ...blogUrls, | |
| 207 | + ...samaUrls, | |
| 208 | + ...guideUrls, | |
| 209 | + ]); | |
| 215 | 210 | return new Response(xml, { |
| 216 | - headers: { "Content-Type": "application/xml; charset=utf-8" }, | |
| 211 | + headers: { | |
| 212 | + "Content-Type": "application/xml; charset=utf-8", | |
| 213 | + "Cache-Control": "public, max-age=3600", | |
| 214 | + }, | |
| 217 | 215 | }); |
| 218 | 216 | }, |
| 219 | 217 | |