// c32 — pure: parse rendered HTML and extract anchor entries for
// h2/h3 headings. Used by the docs layout to build the right-rail
// "on this page" navigator. No I/O; given a string in, returns a
// list of anchors out.
//
// Input shape: HTML produced by `marked` (which adds `id` attrs to
// headings via the GFM-slugger by default in our config). When an
// id is missing, we slug-ify the heading text ourselves so the
// anchor link still works.
export interface Anchor {
level: 2 | 3;
text: string;
id: string;
}
const slugify = (raw: string): string =>
raw
.toLowerCase()
.replace(/<[^>]*>/g, "")
.replace(/&[a-z]+;/g, " ")
.replace(/[^a-z0-9\s-]/g, "")
.trim()
.replace(/\s+/g, "-");
const stripTags = (s: string): string => s.replace(/<[^>]*>/g, "").replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, '"').replace(/'|'/g, "'").trim();
export const extractAnchors = (html: string): Anchor[] => {
const out: Anchor[] = [];
const re = /]*))?>([\s\S]*?)<\/h\1>/g;
let m: RegExpExecArray | null;
while ((m = re.exec(html)) !== null) {
const level = parseInt(m[1] ?? "2", 10) as 2 | 3;
const attrs = m[2] ?? "";
const inner = m[3] ?? "";
const idMatch = /\bid="([^"]+)"/.exec(attrs);
const text = stripTags(inner);
if (!text) continue;
const id = idMatch?.[1] ?? slugify(text);
if (!id) continue;
out.push({ level, text, id });
}
return out;
};