83a1f700f38684082b4c2119c9b231240ed65a07 diff --git a/content/blog/sama-v2-git-url-refactor-plan.md b/content/blog/sama-v2-git-url-refactor-plan.md new file mode 100644 index 0000000000000000000000000000000000000000..31cd11031eb8ceef8a9615139fed950dfc6e122d --- /dev/null +++ b/content/blog/sama-v2-git-url-refactor-plan.md @@ -0,0 +1,135 @@ +# Shortening `/GIT/` URLs: a single-tenant URL has a redundant segment + +Every link on this site that points at the source code passes through `/GIT/:owner/:repo/...`. The owner segment is always `syntaxai`. The repo segment is always `tdd.md`. The handler validates both, 404s anything else, and never reads them again. The user-visible URL is doing structural work for a multi-tenant case that doesn't exist. + +Concrete example — the verifier source link: + +``` +before: https://tdd.md/GIT/syntaxai/tdd.md/blob/main/src/b32_sama_v2_verify.ts +after: https://tdd.md/GIT/tdd.md/blob/main/src/b32_sama_v2_verify.ts +``` + +Nine characters shorter. The change is small but the workflow it sits inside is the same one this site is built around — `/goal` slash command as contract, SAMA v2 as discipline, the verifier as anti-fudge gate. This post is the **plan**, written before the `/goal` fires. + +![URL anatomy — the owner segment is policy overhead, not data](/images/git-url-anatomy.png?v=1) + +## Why dropping the owner is safe + +The relevant code is twenty-one lines down [`src/d21_handlers_repo_browse.ts`](/GIT/syntaxai/tdd.md/blob/main/src/d21_handlers_repo_browse.ts): + +```ts +const isAllowedRepo = (owner: string, repo: string): boolean => + owner === LIVE_REPO_OWNER && // "syntaxai" + repo === LIVE_REPO_NAME && // "tdd.md" + SAFE_OWNER_REPO.test(owner) && + SAFE_OWNER_REPO.test(repo); +``` + +The check is structural — there is exactly one allowed pair, and any deviation produces a 404. So the owner segment carries no information the handler couldn't supply itself. It's a position in the URL that exists only to make the URL look like a GitHub URL — which, given that the data is *not* on GitHub, is a costume rather than a contract. + +The signature also drops to `isAllowedRepo(repo)`. `LIVE_REPO_OWNER` stays in `src/a31_site_config.ts` — it's still the truthful owner for the backing git operations, the Forgejo proxy, and any future feature that needs to talk about provenance. It just stops showing up in user-facing URLs. + +## The interesting design decision — one regex, not 49 redirects + +A grep across the repo finds **49 references** to the old URL form across **10 source files** and **7 content files** — link builders, hard-coded markdown in `/sama/v2/verify`, blog posts that point at specific files for their empirical claims, the verifier page itself. + +Naive approach: hand-maintain a list of 49 old-URL → new-URL mappings as a redirect table. Cost: rewrites work today, but the list rots the next time someone adds a new file or blog post (50 grows to 60 grows to 100). Anti-pattern. + +The right shape is **one regex in the fallback handler** that matches the *pattern* of the old URL and rewrites to the new one: + +![Shipping the URL change — old URL → regex matcher → 301 → new URL](/images/git-url-redirect.png?v=1) + +```ts +const oldGitUrl = url.pathname.match( + /^\/GIT\/syntaxai\/tdd\.md\/(.+)$/, +); +if (oldGitUrl) { + return new Response(null, { + status: 301, + headers: { + Location: `/GIT/tdd.md/${oldGitUrl[1]}`, + "Cache-Control": "public, max-age=86400", + }, + }); +} +``` + +Five lines. Covers all 49 known references and every future URL with the same shape. Cost: one commit. Lifetime maintenance: zero. + +The 301 (permanent redirect) is the load-bearing detail — search engines treat 301 as "update your index"; they treat 302 as "this is temporary, keep the old URL." We want the index to converge on the new URL, so 301 it is. + +## How this maps onto SAMA v2 + +The refactor touches files across three layers, all in expected ways: + +| Layer | What changes | +|---|---| +| **Layer 0 · Pure** (`a31_site_config.ts`) | `LIVE_REPO_OWNER` stays exported — still the truthful owner constant, just no longer used to build URLs | +| **Layer 1 · Core** | No changes — there are no Layer-1 helpers in the `/GIT/` flow; the URL surface is pure routing | +| **Layer 2 · Adapter** (`c14_git.ts`) | No changes — `lsTree` and `readBlobAtRef` already take `(ref, path)`, never owner/repo | +| **Layer 3 · Entry** | All the changes live here — `parseRepoBrowsePath` callers, `repoBrowseHandler` signature, the Bun explicit route `/GIT/:repo/commit/:sha`, the new 301 redirect, and the link builders in `b51_render_*.ts` | + +The layer surface tells you the refactor is contained — no Adapter changes, no business-logic changes, no test-of-pure-helper changes. Only the routing/rendering surface moves. That's the "small refactor" smell the [Layer 2 stays empty](/blog/sama-v2-sitemap-implementation-plan) sitemap post identified — when the change is genuinely about the URL surface, the deeper layers don't need to move. + +## Anti-fudge — what the `/goal` rules out + +The plan deliberately doesn't do these things, even though each is locally appealing: + +- **No hand-maintained list of redirects.** One regex pattern covers all 49 current references and every future one. If the regex grows into "a list", the anti-fudge clause has been violated. +- **No removal of `LIVE_REPO_OWNER`.** The constant has callers beyond URL construction (the live-reports view, the Forgejo proxy hostname). Removing it from `a31_site_config.ts` would be a different, larger refactor that the URL change shouldn't drag in. +- **No touching of git-protocol URLs.** `/syntaxai/tdd.md.git` and the bare-repo view at `/syntaxai/tdd.md` go through `isGitProtocol` + `repoMatch` in [`d21_handlers_fallback.ts`](/GIT/syntaxai/tdd.md/blob/main/src/d21_handlers_fallback.ts). Those URLs are git-client-facing — agents and humans have copy-pasted them into clone commands, into CI configs, into other agents' system prompts. Changing them risks breakage for cosmetics. They stay. +- **No alias.** Both URL forms working forever creates two canonical URLs and lets the old one quietly remain in new code. The 301 is what forces consolidation — search engines update, internal code paths rewrite themselves, and a year from now the old form is just a redirect line in one file. +- **No verifier change.** `/sama/v2/verify` stays at 7/7 ✓ across the merge. The §4 check logic is frozen; if a structural choice the refactor wants to make would fail the verifier, the choice changes — not the verifier. + +## The work, sized + +Three categories of file change: + +- **Wiring (4 files)**: the fallback handler gets the new redirect + the parse regex drops `owner`; the explicit Bun commit route in `d21_app.ts` becomes `/GIT/:repo/commit/:sha`; `repoBrowseHandler` and `commitViewHandler` lose the `owner` argument; `isAllowedRepo` collapses to one argument. +- **Link builders (3 files)**: `b51_render_repo.ts` (eight call sites — breadcrumbs, parent-dir, raw/source links), `b51_render_commit.ts` (two call sites), `b51_render_edit.ts` (one hard-coded URL). +- **Hard-coded markdown (7 files)**: `content/home.md`, `content/sama/v2.md`, four blog posts that point at specific source files for their empirical claims, `src/d21_handlers_sama.ts:137` (markdown embedded in the verifier page body). One sed pass, all done. + +The test files (`b51_render_repo.test.ts`, `b51_render_commit.test.ts`) pin the rendered URL strings — those expectations update mechanically with the link-builder changes. Test count stays at 379+; no test count regression. + +## Live-verify clauses + +What the `/goal` requires to verify *after deploy*, not just *in CI*: + +```bash +$ curl -I https://tdd.md/GIT/syntaxai/tdd.md/blob/main/src/b32_sama_v2_verify.ts +HTTP/2 301 +location: /GIT/tdd.md/blob/main/src/b32_sama_v2_verify.ts + +$ curl -L https://tdd.md/GIT/syntaxai/tdd.md/blob/main/src/b32_sama_v2_verify.ts +HTTP/2 200 +< file content > + +$ curl -s https://tdd.md/GIT/tdd.md/tree/main | head -1 +< 200, directory listing HTML > + +$ curl -s https://tdd.md/sama/v2/verify | grep -o '7/7' +7/7 +``` + +Plus the silent live check: every blog post on the site has its `/GIT/` links rewritten, so clicking any "view source" link in any of the empirical-chain posts lands on a working URL — no broken navigation surfaced after the merge. + +## What lands when this ships + +After deploy: + +- Every `/GIT/` URL on the site uses the new shape. +- The verifier source — the URL search engines and AI crawlers should index as "the artifact this site's argument rests on" — gets shorter and more readable. +- Old URLs already indexed by Google, cached by Twitter card scrapers, sitting in someone else's blog post, or pasted into someone's notes file all permanently-redirect to the new form. Index reconverges in a search-engine refresh cycle. +- `/sama/v2/verify` continues to report **7 ✓ / 7**. +- One new pattern — the regex-as-redirect — surfaces a reusable shape for future URL refactors. If the site renames `/sama/v2/example-crud` to `/sama/v2/examples/crud` next month, the same shape applies. + +## Companion postmortem + +This is the plan. The postmortem will follow after the merge with: + +- The actual file diff (likely tight — most line changes are mechanical `s/syntaxai\/tdd\.md/tdd.md/g` substitutions). +- Whether the regex caught everything (especially in places `grep` missed — embedded HTML strings, multi-line URLs, etc.). +- The `/sama/v2/verify` output before and after the merge. +- Anything the anti-fudge clauses caught that the plan missed. + +If the refactor lands cleanly with the regex absorbing all 49 references — that's the data point: pattern-as-redirect is a reusable shape, and the next URL refactor needs ten lines plus a sed pass. diff --git a/public/images/git-url-anatomy.png b/public/images/git-url-anatomy.png new file mode 100644 index 0000000000000000000000000000000000000000..61de6dfca5ebc8a9b6e8ae02a41a9044ff094ea5 Binary files /dev/null and b/public/images/git-url-anatomy.png differ diff --git a/public/images/git-url-anatomy.svg b/public/images/git-url-anatomy.svg new file mode 100644 index 0000000000000000000000000000000000000000..61befa8957a9239eacbdb650b3c34d636a8478e0 --- /dev/null +++ b/public/images/git-url-anatomy.svg @@ -0,0 +1,45 @@ + + + + + + /GIT/ URL anatomy — one segment is doing no work + isAllowedRepo() already enforces a single tenant. + The owner segment is checked, rejected if anything else, and never read again. It's policy overhead, not data. + + + + BEFORE + + + https://tdd.md/GIT/syntaxai/tdd.md/blob/main/src/b32_sama_v2_verify.ts + + + ↑ redundant — always "syntaxai", validated then ignored + + + + + // src/d21_handlers_repo_browse.ts:26 + const isAllowedRepo = (owner: string, repo: string): boolean => + owner === LIVE_REPO_OWNER && // "syntaxai" — checked but never user-supplied in practice + repo === LIVE_REPO_NAME && // "tdd.md" + SAFE_OWNER_REPO.test(owner) && SAFE_OWNER_REPO.test(repo); + + + + AFTER + + + https://tdd.md/GIT/tdd.md/blob/main/src/b32_sama_v2_verify.ts + + + + 49 references touched · 10 source files · 7 content files · 1 regex 301-redirect + + + + https://tdd.md + diff --git a/public/images/git-url-redirect.png b/public/images/git-url-redirect.png new file mode 100644 index 0000000000000000000000000000000000000000..842d9776dbef3cb1fcf7faf2e7cce5f61ff43664 Binary files /dev/null and b/public/images/git-url-redirect.png differ diff --git a/public/images/git-url-redirect.svg b/public/images/git-url-redirect.svg new file mode 100644 index 0000000000000000000000000000000000000000..b086c17bcbfeb56998a484c9c539f041ecedb8fd --- /dev/null +++ b/public/images/git-url-redirect.svg @@ -0,0 +1,67 @@ + + + + + + Shipping the URL change — one regex, not 49 redirects + Inbound links survive. SEO juice carries over. + A single regex in d21_handlers_fallback.ts rewrites every old URL to the new shape with a 301. No hand-maintained map. + + + + + + INBOUND · cached externally + GET /GIT/syntaxai/tdd.md/ + blob/main/src/ + b32_sama_v2_verify.ts + + + + + + + + + + FALLBACK HANDLER · one regex + /^\/GIT\/syntaxai\/tdd\.md\/ + (.+)$/ + → rewrite /GIT/tdd.md/$1, 301 + + + + + + + + + + 301 · Location header + /GIT/tdd.md/blob/main/ + src/ + b32_sama_v2_verify.ts + + + + + + $ curl -I https://tdd.md/GIT/syntaxai/tdd.md/blob/main/src/b32_sama_v2_verify.ts + HTTP/2 301 + location: /GIT/tdd.md/blob/main/src/b32_sama_v2_verify.ts + cache-control: public, max-age=86400 + $ curl -L https://tdd.md/GIT/syntaxai/tdd.md/blob/main/src/b32_sama_v2_verify.ts + HTTP/2 200 + + + + + Why one regex, not 49 entries + A hand-maintained URL map would need updating every time a new file is added (49 grows to 50, 60, 80…). + The regex matches the path PATTERN, so any future URL — even ones that don't exist yet — automatically redirects. + Cost: 1 commit. Lifetime maintenance: 0. The anti-fudge clause in /goal forbids the hand-maintained alternative. + + + + https://tdd.md + diff --git a/src/a31_blog.ts b/src/a31_blog.ts index 1fe4f45b16b702562d4d34e2420f83fd8eb6f30f..9661bbb94312198e9a1e6f79e83cbcc126d0444b 100644 --- a/src/a31_blog.ts +++ b/src/a31_blog.ts @@ -12,6 +12,12 @@ export interface BlogEntry { } export const ALL_POSTS: BlogEntry[] = [ + { + slug: "sama-v2-git-url-refactor-plan", + title: "Shortening /GIT/ URLs: a single-tenant URL has a redundant segment", + description: "Every link on the site that points at source code goes through /GIT/:owner/:repo/... and the owner is always 'syntaxai', the repo is always 'tdd.md', and the handler 404s anything else. The user-visible URL does structural work for a multi-tenant case that doesn't exist. This post is the implementation PLAN for dropping the owner segment: /GIT/syntaxai/tdd.md/blob/main/src/b32_sama_v2_verify.ts → /GIT/tdd.md/blob/main/src/b32_sama_v2_verify.ts. The interesting design decision is the redirect strategy — 49 references to the old form across 10 source files + 7 content files, and the temptation is to hand-maintain a redirect table. The right shape is ONE regex in the fallback handler that matches the path PATTERN, not the path values, and 301s to the new form. Cost: one commit. Lifetime maintenance: zero. Includes two visualizations (URL anatomy + redirect flow) and walks the SAMA layer surface to show the refactor is contained to Layer 3 routing/rendering — no Adapter, no Core, no Pure changes. Anti-fudge clauses called out: no hand-maintained URL list, no removal of LIVE_REPO_OWNER (still needed for git operations + Forgejo proxy), no touching of git-protocol URLs (which agents and humans have copy-pasted into clone commands), no alias mode (both URLs working forever lets the old form quietly remain canonical), no verifier change. Postmortem to follow after the /goal fires and the refactor merges — pattern-as-redirect promises to be a reusable shape for future URL refactors (the next time /sama/v2/example-crud becomes /sama/v2/examples/crud, the same ten lines apply).", + date: "2026-05-25", + }, { slug: "sama-v2-sitemap-implementation-plan", title: "Building /sitemap.xml under SAMA v2 — a Claude Code /goal walkthrough", diff --git a/src/d21_handlers_fallback.ts b/src/d21_handlers_fallback.ts index c7c1154742f1d5eb3016f6b37c8fcdacc4a06a0c..60b3a9e9831bc5836d25759d2d76aaa2753bf056 100644 --- a/src/d21_handlers_fallback.ts +++ b/src/d21_handlers_fallback.ts @@ -48,6 +48,32 @@ export const appFetch = async (req: Request): Promise => { } const url = urlR.value; + // Static images under /images/.. Convention: every new + // site image lives at public/images/ and is served from /images/. + // The whitelist of extensions + the strict filename pattern blocks + // path traversal (no slashes after /images/, no leading dots). + const imagesMatch = url.pathname.match( + /^\/images\/([A-Za-z0-9][A-Za-z0-9._-]*)\.(svg|png|webp|jpg|jpeg|gif)$/, + ); + if (imagesMatch) { + const file = Bun.file(`./public/images/${imagesMatch[1]}.${imagesMatch[2]}`); + if (await file.exists()) { + const ext = imagesMatch[2]!; + const contentType = + ext === "svg" ? "image/svg+xml" : + ext === "png" ? "image/png" : + ext === "webp" ? "image/webp" : + ext === "gif" ? "image/gif" : + "image/jpeg"; + return new Response(file, { + headers: { + "Content-Type": contentType, + "Cache-Control": "public, max-age=3600", + }, + }); + } + } + // Admin edit/delete on multi-segment slugs (company/about, docs/spec/grammar // etc.). Bun's `:slug` param can't span "/" so anything with two-or-more // segments after the type slot ends up here. Single-segment is handled