syntaxai/tdd.md · main · src / a31_sxdoc_parse.test.ts
import { test, expect } from "bun:test";
import { htmlToSx } from "./a31_sxdoc_parse.ts";
import { SX_DOC_VERSION } from "./a31_sxdoc.ts";
test("returns an empty document for empty input", () => {
const doc = htmlToSx("");
expect(doc.v).toBe(SX_DOC_VERSION);
expect(doc.blocks).toEqual([]);
});
test("parses a simple paragraph", () => {
const doc = htmlToSx("<p>Hello world</p>");
expect(doc.blocks).toHaveLength(1);
expect(doc.blocks[0]).toEqual({
t: "p",
c: [{ t: "text", v: "Hello world" }],
});
});
test("parses headings with correct level for h1-h6", () => {
for (const level of [1, 2, 3, 4, 5, 6] as const) {
const doc = htmlToSx(`<h${level}>Title ${level}</h${level}>`);
expect(doc.blocks).toHaveLength(1);
expect(doc.blocks[0]).toEqual({
t: "h", level,
c: [{ t: "text", v: `Title ${level}` }],
});
}
});
test("parses unordered list with items wrapped as paragraphs", () => {
const doc = htmlToSx("<ul><li>one</li><li>two</li></ul>");
expect(doc.blocks).toHaveLength(1);
expect(doc.blocks[0]).toEqual({
t: "ul",
items: [
[{ t: "p", c: [{ t: "text", v: "one" }] }],
[{ t: "p", c: [{ t: "text", v: "two" }] }],
],
});
});
test("parses ordered list", () => {
const doc = htmlToSx("<ol><li>first</li></ol>");
const block = doc.blocks[0];
expect(block.t).toBe("ol");
expect((block as { items: unknown }).items).toEqual([
[{ t: "p", c: [{ t: "text", v: "first" }] }],
]);
});
test("parses nested lists inside a list item", () => {
const doc = htmlToSx("<ul><li>outer<ul><li>inner</li></ul></li></ul>");
const outer = doc.blocks[0] as { t: "ul"; items: unknown[][] };
expect(outer.t).toBe("ul");
expect(outer.items[0]).toHaveLength(2);
expect(outer.items[0][0]).toEqual({ t: "p", c: [{ t: "text", v: "outer" }] });
expect(outer.items[0][1]).toEqual({
t: "ul",
items: [[{ t: "p", c: [{ t: "text", v: "inner" }] }]],
});
});
test("parses blockquote with paragraph inside", () => {
const doc = htmlToSx("<blockquote><p>quoted</p></blockquote>");
expect(doc.blocks).toEqual([{
t: "quote",
c: [{ t: "p", c: [{ t: "text", v: "quoted" }] }],
}]);
});
test("parses blockquote with loose text wraps it in a paragraph", () => {
const doc = htmlToSx("<blockquote>loose</blockquote>");
expect(doc.blocks[0]).toEqual({
t: "quote",
c: [{ t: "p", c: [{ t: "text", v: "loose" }] }],
});
});
test("parses pre>code with language hint", () => {
const doc = htmlToSx(`<pre><code class="language-ts">const x = 1;</code></pre>`);
expect(doc.blocks[0]).toEqual({
t: "code", lang: "ts", src: "const x = 1;",
});
});
test("parses pre without inner code element", () => {
const doc = htmlToSx("<pre>raw text</pre>");
expect(doc.blocks[0]).toEqual({
t: "code", lang: "", src: "raw text",
});
});
test("preserves encoded entities in code blocks", () => {
const doc = htmlToSx(`<pre><code><p></code></pre>`);
expect(doc.blocks[0]).toEqual({
t: "code", lang: "", src: "<p>",
});
});
test("parses img with src and alt", () => {
const doc = htmlToSx(`<img src="/x.png" alt="x icon">`);
expect(doc.blocks[0]).toEqual({ t: "img", src: "/x.png", alt: "x icon" });
});
test("parses img with width and height attributes", () => {
const doc = htmlToSx(`<img src="/a.jpg" width="200" height="100">`);
expect(doc.blocks[0]).toEqual({ t: "img", src: "/a.jpg", w: 200, h: 100 });
});
test("skips img with empty src", () => {
const doc = htmlToSx(`<img src="">`);
expect(doc.blocks).toEqual([]);
});
test("parses figure with figcaption", () => {
const doc = htmlToSx(`<figure><img src="/y.png"><figcaption>nice y</figcaption></figure>`);
expect(doc.blocks[0]).toEqual({
t: "img", src: "/y.png", caption: "nice y",
});
});
test("parses hr", () => {
const doc = htmlToSx("<hr>");
expect(doc.blocks[0]).toEqual({ t: "hr" });
});
test("parses inline bold and italic marks", () => {
const doc = htmlToSx("<p><strong>bold</strong> and <em>ital</em></p>");
expect(doc.blocks[0]).toEqual({
t: "p",
c: [
{ t: "text", v: "bold", m: ["b"] },
{ t: "text", v: " and " },
{ t: "text", v: "ital", m: ["i"] },
],
});
});
test("composes nested marks into a single mark array", () => {
const doc = htmlToSx("<p><strong><em>both</em></strong></p>");
expect(doc.blocks[0]).toEqual({
t: "p",
c: [{ t: "text", v: "both", m: ["b", "i"] }],
});
});
test("dedupes repeated marks across nested wrappers", () => {
const doc = htmlToSx("<p><b><strong>x</strong></b></p>");
const para = doc.blocks[0] as { c: Array<{ m?: string[] }> };
expect(para.c[0].m).toEqual(["b"]);
});
test("treats <br> as a newline text run carrying marks", () => {
const doc = htmlToSx("<p>a<br>b</p>");
expect(doc.blocks[0]).toEqual({
t: "p",
c: [
{ t: "text", v: "a" },
{ t: "text", v: "\n" },
{ t: "text", v: "b" },
],
});
});
test("parses anchor links with href", () => {
const doc = htmlToSx(`<p><a href="/x">click</a></p>`);
expect(doc.blocks[0]).toEqual({
t: "p",
c: [{ t: "a", href: "/x", c: [{ t: "text", v: "click" }] }],
});
});
test("strips unknown inline wrappers like span and keeps content", () => {
const doc = htmlToSx(`<p>before <span class="x">middle</span> after</p>`);
expect(doc.blocks[0]).toEqual({
t: "p",
c: [
{ t: "text", v: "before " },
{ t: "text", v: "middle" },
{ t: "text", v: " after" },
],
});
});
test("parses a standalone shortcode out of plain text", () => {
const doc = htmlToSx("<p>[[sx:event-count]]</p>");
expect(doc.blocks).toEqual([
{ t: "shortcode", name: "event-count", args: {} },
]);
});
test("parses a shortcode with quoted and bare args", () => {
const doc = htmlToSx(`<p>[[sx:list tag="blog" limit=5]]</p>`);
expect(doc.blocks).toEqual([
{ t: "shortcode", name: "list", args: { tag: "blog", limit: "5" } },
]);
});
test("lifts a shortcode out of a mixed paragraph", () => {
const doc = htmlToSx("<p>before [[sx:x]] after</p>");
expect(doc.blocks).toEqual([
{ t: "p", c: [{ t: "text", v: "before " }] },
{ t: "shortcode", name: "x", args: {} },
{ t: "p", c: [{ t: "text", v: " after" }] },
]);
});
test("recurses into div/section/article containers", () => {
const doc = htmlToSx("<div><p>one</p><section><p>two</p></section></div>");
expect(doc.blocks).toHaveLength(2);
expect(doc.blocks[0]).toEqual({ t: "p", c: [{ t: "text", v: "one" }] });
expect(doc.blocks[1]).toEqual({ t: "p", c: [{ t: "text", v: "two" }] });
});
test("falls back to html escape-hatch for unknown elements", () => {
const doc = htmlToSx(`<table><tr><td>x</td></tr></table>`);
expect(doc.blocks).toHaveLength(1);
expect(doc.blocks[0].t).toBe("html");
expect((doc.blocks[0] as { src: string }).src).toContain("<table>");
});
test("decodes named entities in inline text", () => {
const doc = htmlToSx("<p>A & B</p>");
expect(doc.blocks[0]).toEqual({
t: "p", c: [{ t: "text", v: "A & B" }],
});
});
test("ignores empty paragraphs", () => {
const doc = htmlToSx("<p></p><p>real</p>");
expect(doc.blocks).toHaveLength(1);
expect(doc.blocks[0]).toEqual({ t: "p", c: [{ t: "text", v: "real" }] });
});