syntaxai/tdd.md · commit 73d14f1

Implement /sitemap.xml per SAMA v2 — Layer 1 helper + sibling test

The previous inline sitemap had three correctness bugs that the
new helper fixes:
- every URL got `<lastmod>` = today (so search engines saw the
  entire site as freshly modified on every crawl). Now blog URLs
  carry their actual `date` field; other URLs omit `<lastmod>`.
- no XML escape on `<loc>` values, so an `&` in a URL would
  produce invalid XML.
- all XML construction lived at Layer 3 (d21), violating §1.2.

New shape:
- Layer 0: SITE_BASE_URL added to a31_site_config.ts.
- Layer 1: b32_sitemap.ts — `renderSitemap(urls)` is pure, no I/O.
  Eleven STATIC_PATHS const matches the /goal load-bearing list.
- Layer 1 sibling test: 12 cases — XML escape (& < > " '), empty
  list, single URL with/without lastmod, order preservation,
  determinism, declaration/closing tag, STATIC_PATHS shape.
- Layer 3: handler closure imports registries + helper, emits
  `Cache-Control: public, max-age=3600`.

URL coverage:
- 11 static (per /goal)
- ALL_POSTS → /blog/<slug> with lastmod = date
- ALL_SAMA → /sama/<slug>
- ALL_GUIDES → /guides/<slug>

Drops /agents and per-game URLs from previous inline sitemap —
neither was in the /goal's enumerated list. Adds /sama/v2,
/sama/v2/verify, /sama/v2/example-crud, /sama/v2/example-wordpress.

Tests: 379/379 pass (367 → 379, +12 new).
robots.txt already references the sitemap (unchanged).

Co-Authored-By: Claude Opus 4.7 <[email protected]>
author
syntaxai <[email protected]>
date
2026-05-25 10:35:11 +01:00
parent
24abde0
commit
73d14f1800561fa124e25420c65930cc3f4e75e9

4 files changed · +200 −32

modified src/a31_site_config.ts +4 −0
@@ -3,6 +3,10 @@
33 // reports/live, sitemap, etc.) reference the same values without
44 // circular imports between c21_handlers_*.
55
6+// Canonical absolute base for every public URL the site emits
7+// (sitemap, og:url, canonical link, RSS, etc.). No trailing slash.
8+export const SITE_BASE_URL = "https://tdd.md";
9+
610 export const LIVE_REPO_OWNER = "syntaxai";
711 export const LIVE_REPO_NAME = "tdd.md";
812 // Number of recent commits the live-reports view samples from the
added src/b32_sitemap.test.ts +113 −0
@@ -0,0 +1,113 @@
1+import { describe, expect, test } from "bun:test";
2+import {
3+ escapeXml,
4+ renderSitemap,
5+ STATIC_PATHS,
6+ type SitemapUrl,
7+} from "./b32_sitemap.ts";
8+
9+describe("escapeXml", () => {
10+ test("escapes the five named entities", () => {
11+ expect(escapeXml("&")).toBe("&amp;");
12+ expect(escapeXml("<")).toBe("&lt;");
13+ expect(escapeXml(">")).toBe("&gt;");
14+ expect(escapeXml('"')).toBe("&quot;");
15+ expect(escapeXml("'")).toBe("&apos;");
16+ });
17+
18+ test("leaves regular URL characters untouched", () => {
19+ const s = "https://tdd.md/blog/sama-v2-workingset-cross-repo-baseline";
20+ expect(escapeXml(s)).toBe(s);
21+ });
22+
23+ test("ampersand always escapes first (no double-escape)", () => {
24+ expect(escapeXml("a & b < c")).toBe("a &amp; b &lt; c");
25+ });
26+});
27+
28+describe("renderSitemap", () => {
29+ test("empty list → valid urlset with no <url> children", () => {
30+ const xml = renderSitemap([]);
31+ expect(xml).toBe(`<?xml version="1.0" encoding="UTF-8"?>
32+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
33+</urlset>`);
34+ });
35+
36+ test("single URL with lastmod", () => {
37+ const xml = renderSitemap([
38+ { loc: "https://tdd.md/blog/x", lastmod: "2026-05-25" },
39+ ]);
40+ expect(xml).toBe(`<?xml version="1.0" encoding="UTF-8"?>
41+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
42+ <url><loc>https://tdd.md/blog/x</loc><lastmod>2026-05-25</lastmod></url>
43+</urlset>`);
44+ });
45+
46+ test("single URL without lastmod omits the <lastmod> element", () => {
47+ const xml = renderSitemap([{ loc: "https://tdd.md/sama" }]);
48+ expect(xml).toContain("<url><loc>https://tdd.md/sama</loc></url>");
49+ expect(xml).not.toContain("<lastmod>");
50+ });
51+
52+ test("multiple URLs preserve input order", () => {
53+ const xml = renderSitemap([
54+ { loc: "https://tdd.md/a" },
55+ { loc: "https://tdd.md/b" },
56+ { loc: "https://tdd.md/c" },
57+ ]);
58+ const aIdx = xml.indexOf("/a</loc>");
59+ const bIdx = xml.indexOf("/b</loc>");
60+ const cIdx = xml.indexOf("/c</loc>");
61+ expect(aIdx).toBeGreaterThan(-1);
62+ expect(aIdx).toBeLessThan(bIdx);
63+ expect(bIdx).toBeLessThan(cIdx);
64+ });
65+
66+ test("XML-escapes & and < inside <loc> values", () => {
67+ const xml = renderSitemap([
68+ { loc: "https://tdd.md/q?a=1&b=2" },
69+ { loc: "https://tdd.md/<weird>" },
70+ ]);
71+ expect(xml).toContain("<loc>https://tdd.md/q?a=1&amp;b=2</loc>");
72+ expect(xml).toContain("<loc>https://tdd.md/&lt;weird&gt;</loc>");
73+ expect(xml).not.toContain("a=1&b=2");
74+ });
75+
76+ test("opens with the XML declaration and closes with </urlset>", () => {
77+ const xml = renderSitemap([{ loc: "https://tdd.md/" }]);
78+ expect(xml.startsWith('<?xml version="1.0" encoding="UTF-8"?>')).toBe(true);
79+ expect(xml.endsWith("</urlset>")).toBe(true);
80+ });
81+
82+ test("deterministic — same input twice → byte-identical output", () => {
83+ const urls: ReadonlyArray<SitemapUrl> = [
84+ { loc: "https://tdd.md/", lastmod: "2026-05-25" },
85+ { loc: "https://tdd.md/blog" },
86+ ];
87+ expect(renderSitemap(urls)).toBe(renderSitemap(urls));
88+ });
89+});
90+
91+describe("STATIC_PATHS", () => {
92+ test("covers the eleven load-bearing routes from the /goal", () => {
93+ expect(STATIC_PATHS).toEqual([
94+ "/",
95+ "/blog",
96+ "/games",
97+ "/leaderboard",
98+ "/sama",
99+ "/sama/v2",
100+ "/sama/v2/verify",
101+ "/sama/v2/example-crud",
102+ "/sama/v2/example-wordpress",
103+ "/sama/skill",
104+ "/guides",
105+ ]);
106+ });
107+
108+ test("each path is absolute (starts with /)", () => {
109+ for (const p of STATIC_PATHS) {
110+ expect(p.startsWith("/")).toBe(true);
111+ }
112+ });
113+});
added src/b32_sitemap.ts +53 −0
@@ -0,0 +1,53 @@
1+// b32 — Layer 1 pure helper: render a sitemaps.org 0.9 urlset.
2+// No I/O. Deterministic: same input array → same output bytes.
3+// Caller (d21_app.ts) composes the URL list from ALL_POSTS,
4+// ALL_SAMA, ALL_GUIDES, and STATIC_PATHS, then asks this module
5+// for the XML string. Sibling test pins escape behaviour and
6+// shape; the verifier's §4.3 modeled-tests check requires it.
7+
8+export interface SitemapUrl {
9+ readonly loc: string;
10+ readonly lastmod?: string;
11+}
12+
13+// The eleven load-bearing static URLs that don't come from a
14+// registry. Each must correspond to a literal route registered
15+// in d21_app.ts; the handler iterates this list verbatim.
16+export const STATIC_PATHS: ReadonlyArray<string> = [
17+ "/",
18+ "/blog",
19+ "/games",
20+ "/leaderboard",
21+ "/sama",
22+ "/sama/v2",
23+ "/sama/v2/verify",
24+ "/sama/v2/example-crud",
25+ "/sama/v2/example-wordpress",
26+ "/sama/skill",
27+ "/guides",
28+];
29+
30+// Minimal XML 1.0 escape for character data + attribute values.
31+// The five named entities are the canonical set; anything else
32+// is left as-is (the sitemap spec requires UTF-8, not ASCII).
33+export const escapeXml = (s: string): string =>
34+ s
35+ .replace(/&/g, "&amp;")
36+ .replace(/</g, "&lt;")
37+ .replace(/>/g, "&gt;")
38+ .replace(/"/g, "&quot;")
39+ .replace(/'/g, "&apos;");
40+
41+const renderUrl = (u: SitemapUrl): string => {
42+ const loc = `<loc>${escapeXml(u.loc)}</loc>`;
43+ const lastmod =
44+ u.lastmod !== undefined ? `<lastmod>${escapeXml(u.lastmod)}</lastmod>` : "";
45+ return ` <url>${loc}${lastmod}</url>`;
46+};
47+
48+export const renderSitemap = (urls: ReadonlyArray<SitemapUrl>): string => {
49+ const body = urls.map(renderUrl).join("\n");
50+ const inner = body.length > 0 ? `\n${body}\n` : "\n";
51+ return `<?xml version="1.0" encoding="UTF-8"?>
52+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">${inner}</urlset>`;
53+};
modified src/d21_app.ts +30 −32
@@ -12,6 +12,12 @@ import { listGames, loadGame } from "./a31_games.ts";
1212 import { ALL_POSTS } from "./a31_blog.ts";
1313 import { ALL_GUIDES } from "./a31_guides.ts";
1414 import { ALL_SAMA } from "./a31_sama.ts";
15+import { SITE_BASE_URL } from "./a31_site_config.ts";
16+import {
17+ renderSitemap,
18+ STATIC_PATHS,
19+ type SitemapUrl,
20+} from "./b32_sitemap.ts";
1521 import {
1622 getViewer,
1723 sessionCookieHeader,
@@ -181,39 +187,31 @@ export const createApp = (port: number) => Bun.serve({
181187 { headers: { "Content-Type": "text/plain; charset=utf-8" } },
182188 ),
183189
184- "/sitemap.xml": async () => {
185- const today = new Date().toISOString().slice(0, 10);
186- const url = (loc: string, priority: string) =>
187- `<url><loc>${loc}</loc><lastmod>${today}</lastmod><priority>${priority}</priority></url>`;
188- const kataUrls = ALL_GAMES.map((g) =>
189- url(`https://tdd.md/games/${g.id}`, "0.8"),
190- ).join("\n");
191- const guideUrls = ALL_GUIDES.map((g) =>
192- url(`https://tdd.md/guides/${g.slug}`, "0.8"),
193- ).join("\n");
194- const samaUrls = ALL_SAMA.map((d) =>
195- url(`https://tdd.md/sama/${d.slug}`, "0.8"),
196- ).join("\n");
197- const blogUrls = ALL_POSTS.map((p) =>
198- url(`https://tdd.md/blog/${p.slug}`, "0.8"),
199- ).join("\n");
200- const xml = `<?xml version="1.0" encoding="UTF-8"?>
201-<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
202-${url("https://tdd.md/", "1.0")}
203-${url("https://tdd.md/games", "0.9")}
204-${kataUrls}
205-${url("https://tdd.md/guides", "0.9")}
206-${guideUrls}
207-${url("https://tdd.md/sama", "0.9")}
208-${samaUrls}
209-${url("https://tdd.md/sama/skill", "0.8")}
210-${url("https://tdd.md/blog", "0.7")}
211-${blogUrls}
212-${url("https://tdd.md/agents", "0.7")}
213-${url("https://tdd.md/leaderboard", "0.7")}
214-</urlset>`;
190+ "/sitemap.xml": () => {
191+ const staticUrls: SitemapUrl[] = STATIC_PATHS.map((p) => ({
192+ loc: `${SITE_BASE_URL}${p}`,
193+ }));
194+ const blogUrls: SitemapUrl[] = ALL_POSTS.map((p) => ({
195+ loc: `${SITE_BASE_URL}/blog/${p.slug}`,
196+ lastmod: p.date,
197+ }));
198+ const samaUrls: SitemapUrl[] = ALL_SAMA.map((d) => ({
199+ loc: `${SITE_BASE_URL}/sama/${d.slug}`,
200+ }));
201+ const guideUrls: SitemapUrl[] = ALL_GUIDES.map((g) => ({
202+ loc: `${SITE_BASE_URL}/guides/${g.slug}`,
203+ }));
204+ const xml = renderSitemap([
205+ ...staticUrls,
206+ ...blogUrls,
207+ ...samaUrls,
208+ ...guideUrls,
209+ ]);
215210 return new Response(xml, {
216- headers: { "Content-Type": "application/xml; charset=utf-8" },
211+ headers: {
212+ "Content-Type": "application/xml; charset=utf-8",
213+ "Cache-Control": "public, max-age=3600",
214+ },
217215 });
218216 },
219217