// c32 — pure: parse rendered HTML and extract anchor entries for // h2/h3 headings. Used by the docs layout to build the right-rail // "on this page" navigator. No I/O; given a string in, returns a // list of anchors out. // // Input shape: HTML produced by `marked` (which adds `id` attrs to // headings via the GFM-slugger by default in our config). When an // id is missing, we slug-ify the heading text ourselves so the // anchor link still works. export interface Anchor { level: 2 | 3; text: string; id: string; } const slugify = (raw: string): string => raw .toLowerCase() .replace(/<[^>]*>/g, "") .replace(/&[a-z]+;/g, " ") .replace(/[^a-z0-9\s-]/g, "") .trim() .replace(/\s+/g, "-"); const stripTags = (s: string): string => s.replace(/<[^>]*>/g, "").replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, '"').replace(/'|'/g, "'").trim(); export const extractAnchors = (html: string): Anchor[] => { const out: Anchor[] = []; const re = /]*))?>([\s\S]*?)<\/h\1>/g; let m: RegExpExecArray | null; while ((m = re.exec(html)) !== null) { const level = parseInt(m[1] ?? "2", 10) as 2 | 3; const attrs = m[2] ?? ""; const inner = m[3] ?? ""; const idMatch = /\bid="([^"]+)"/.exec(attrs); const text = stripTags(inner); if (!text) continue; const id = idMatch?.[1] ?? slugify(text); if (!id) continue; out.push({ level, text, id }); } return out; };