syntaxai/tdd.md · main · src / a31_sxdoc_parse.test.ts

a31_sxdoc_parse.test.ts 235 lines · 7277 bytes raw
import { test, expect } from "bun:test";
import { htmlToSx } from "./a31_sxdoc_parse.ts";
import { SX_DOC_VERSION } from "./a31_sxdoc.ts";

test("returns an empty document for empty input", () => {
  const doc = htmlToSx("");
  expect(doc.v).toBe(SX_DOC_VERSION);
  expect(doc.blocks).toEqual([]);
});

test("parses a simple paragraph", () => {
  const doc = htmlToSx("<p>Hello world</p>");
  expect(doc.blocks).toHaveLength(1);
  expect(doc.blocks[0]).toEqual({
    t: "p",
    c: [{ t: "text", v: "Hello world" }],
  });
});

test("parses headings with correct level for h1-h6", () => {
  for (const level of [1, 2, 3, 4, 5, 6] as const) {
    const doc = htmlToSx(`<h${level}>Title ${level}</h${level}>`);
    expect(doc.blocks).toHaveLength(1);
    expect(doc.blocks[0]).toEqual({
      t: "h", level,
      c: [{ t: "text", v: `Title ${level}` }],
    });
  }
});

test("parses unordered list with items wrapped as paragraphs", () => {
  const doc = htmlToSx("<ul><li>one</li><li>two</li></ul>");
  expect(doc.blocks).toHaveLength(1);
  expect(doc.blocks[0]).toEqual({
    t: "ul",
    items: [
      [{ t: "p", c: [{ t: "text", v: "one" }] }],
      [{ t: "p", c: [{ t: "text", v: "two" }] }],
    ],
  });
});

test("parses ordered list", () => {
  const doc = htmlToSx("<ol><li>first</li></ol>");
  const block = doc.blocks[0];
  expect(block.t).toBe("ol");
  expect((block as { items: unknown }).items).toEqual([
    [{ t: "p", c: [{ t: "text", v: "first" }] }],
  ]);
});

test("parses nested lists inside a list item", () => {
  const doc = htmlToSx("<ul><li>outer<ul><li>inner</li></ul></li></ul>");
  const outer = doc.blocks[0] as { t: "ul"; items: unknown[][] };
  expect(outer.t).toBe("ul");
  expect(outer.items[0]).toHaveLength(2);
  expect(outer.items[0][0]).toEqual({ t: "p", c: [{ t: "text", v: "outer" }] });
  expect(outer.items[0][1]).toEqual({
    t: "ul",
    items: [[{ t: "p", c: [{ t: "text", v: "inner" }] }]],
  });
});

test("parses blockquote with paragraph inside", () => {
  const doc = htmlToSx("<blockquote><p>quoted</p></blockquote>");
  expect(doc.blocks).toEqual([{
    t: "quote",
    c: [{ t: "p", c: [{ t: "text", v: "quoted" }] }],
  }]);
});

test("parses blockquote with loose text wraps it in a paragraph", () => {
  const doc = htmlToSx("<blockquote>loose</blockquote>");
  expect(doc.blocks[0]).toEqual({
    t: "quote",
    c: [{ t: "p", c: [{ t: "text", v: "loose" }] }],
  });
});

test("parses pre>code with language hint", () => {
  const doc = htmlToSx(`<pre><code class="language-ts">const x = 1;</code></pre>`);
  expect(doc.blocks[0]).toEqual({
    t: "code", lang: "ts", src: "const x = 1;",
  });
});

test("parses pre without inner code element", () => {
  const doc = htmlToSx("<pre>raw text</pre>");
  expect(doc.blocks[0]).toEqual({
    t: "code", lang: "", src: "raw text",
  });
});

test("preserves encoded entities in code blocks", () => {
  const doc = htmlToSx(`<pre><code>&lt;p&gt;</code></pre>`);
  expect(doc.blocks[0]).toEqual({
    t: "code", lang: "", src: "<p>",
  });
});

test("parses img with src and alt", () => {
  const doc = htmlToSx(`<img src="/x.png" alt="x icon">`);
  expect(doc.blocks[0]).toEqual({ t: "img", src: "/x.png", alt: "x icon" });
});

test("parses img with width and height attributes", () => {
  const doc = htmlToSx(`<img src="/a.jpg" width="200" height="100">`);
  expect(doc.blocks[0]).toEqual({ t: "img", src: "/a.jpg", w: 200, h: 100 });
});

test("skips img with empty src", () => {
  const doc = htmlToSx(`<img src="">`);
  expect(doc.blocks).toEqual([]);
});

test("parses figure with figcaption", () => {
  const doc = htmlToSx(`<figure><img src="/y.png"><figcaption>nice y</figcaption></figure>`);
  expect(doc.blocks[0]).toEqual({
    t: "img", src: "/y.png", caption: "nice y",
  });
});

test("parses hr", () => {
  const doc = htmlToSx("<hr>");
  expect(doc.blocks[0]).toEqual({ t: "hr" });
});

test("parses inline bold and italic marks", () => {
  const doc = htmlToSx("<p><strong>bold</strong> and <em>ital</em></p>");
  expect(doc.blocks[0]).toEqual({
    t: "p",
    c: [
      { t: "text", v: "bold", m: ["b"] },
      { t: "text", v: " and " },
      { t: "text", v: "ital", m: ["i"] },
    ],
  });
});

test("composes nested marks into a single mark array", () => {
  const doc = htmlToSx("<p><strong><em>both</em></strong></p>");
  expect(doc.blocks[0]).toEqual({
    t: "p",
    c: [{ t: "text", v: "both", m: ["b", "i"] }],
  });
});

test("dedupes repeated marks across nested wrappers", () => {
  const doc = htmlToSx("<p><b><strong>x</strong></b></p>");
  const para = doc.blocks[0] as { c: Array<{ m?: string[] }> };
  expect(para.c[0].m).toEqual(["b"]);
});

test("treats <br> as a newline text run carrying marks", () => {
  const doc = htmlToSx("<p>a<br>b</p>");
  expect(doc.blocks[0]).toEqual({
    t: "p",
    c: [
      { t: "text", v: "a" },
      { t: "text", v: "\n" },
      { t: "text", v: "b" },
    ],
  });
});

test("parses anchor links with href", () => {
  const doc = htmlToSx(`<p><a href="/x">click</a></p>`);
  expect(doc.blocks[0]).toEqual({
    t: "p",
    c: [{ t: "a", href: "/x", c: [{ t: "text", v: "click" }] }],
  });
});

test("strips unknown inline wrappers like span and keeps content", () => {
  const doc = htmlToSx(`<p>before <span class="x">middle</span> after</p>`);
  expect(doc.blocks[0]).toEqual({
    t: "p",
    c: [
      { t: "text", v: "before " },
      { t: "text", v: "middle" },
      { t: "text", v: " after" },
    ],
  });
});

test("parses a standalone shortcode out of plain text", () => {
  const doc = htmlToSx("<p>[[sx:event-count]]</p>");
  expect(doc.blocks).toEqual([
    { t: "shortcode", name: "event-count", args: {} },
  ]);
});

test("parses a shortcode with quoted and bare args", () => {
  const doc = htmlToSx(`<p>[[sx:list tag="blog" limit=5]]</p>`);
  expect(doc.blocks).toEqual([
    { t: "shortcode", name: "list", args: { tag: "blog", limit: "5" } },
  ]);
});

test("lifts a shortcode out of a mixed paragraph", () => {
  const doc = htmlToSx("<p>before [[sx:x]] after</p>");
  expect(doc.blocks).toEqual([
    { t: "p", c: [{ t: "text", v: "before " }] },
    { t: "shortcode", name: "x", args: {} },
    { t: "p", c: [{ t: "text", v: " after" }] },
  ]);
});

test("recurses into div/section/article containers", () => {
  const doc = htmlToSx("<div><p>one</p><section><p>two</p></section></div>");
  expect(doc.blocks).toHaveLength(2);
  expect(doc.blocks[0]).toEqual({ t: "p", c: [{ t: "text", v: "one" }] });
  expect(doc.blocks[1]).toEqual({ t: "p", c: [{ t: "text", v: "two" }] });
});

test("falls back to html escape-hatch for unknown elements", () => {
  const doc = htmlToSx(`<table><tr><td>x</td></tr></table>`);
  expect(doc.blocks).toHaveLength(1);
  expect(doc.blocks[0].t).toBe("html");
  expect((doc.blocks[0] as { src: string }).src).toContain("<table>");
});

test("decodes named entities in inline text", () => {
  const doc = htmlToSx("<p>A &amp; B</p>");
  expect(doc.blocks[0]).toEqual({
    t: "p", c: [{ t: "text", v: "A & B" }],
  });
});

test("ignores empty paragraphs", () => {
  const doc = htmlToSx("<p></p><p>real</p>");
  expect(doc.blocks).toHaveLength(1);
  expect(doc.blocks[0]).toEqual({ t: "p", c: [{ t: "text", v: "real" }] });
});