syntaxai/tdd.md · main · src / c14_sama_profile.ts

c14_sama_profile.ts 286 lines · 9756 bytes raw
// c14 — adapter: loads + parses sama.profile.toml (the SAMA v2 profile
// declaration at the repo root) and walks the source tree to feed the
// v2 verifier. Layer 2 in SAMA v2 terms: this is the boundary where
// external input (the TOML file on disk + the contents of src/) is
// parsed into the typed SamaV2Input shape that the pure verifier in
// c32_sama_v2_verify consumes.
//
// The TOML parser handles the subset our profile uses (string values,
// string arrays, and arrays of inline tables) — not full TOML. The
// alternative is depending on an external parser; the subset is small
// enough that an inline implementation keeps the verifier dependency-
// free and easy to inspect.

import { readdirSync, readFileSync } from "node:fs";
import { resolve } from "node:path";
import {
  PROFILE_ATOMIC_EXEMPTION_VALUES,
  PROFILE_LAYOUT_VALUES,
  PROFILE_TESTS_VALUES,
  type LayerNumber,
  type LayerSpec,
  type ProfileAtomicExemption,
  type ProfileLayout,
  type ProfileSpec,
  type ProfileTests,
  type SamaV2Input,
  type Sublayer,
} from "./a31_sama_v2.ts";

// — TOML subset parser ----------------------------------------------

const stripComment = (line: string): string => {
  // Comments only outside string literals. Our profile keeps no '#'
  // inside strings so a naive split on the first '#' is fine. If the
  // profile ever needs that, escape via a sentinel and post-process.
  const idx = line.indexOf("#");
  return idx === -1 ? line : line.slice(0, idx);
};

const parseStringValue = (raw: string): string => {
  const t = raw.trim();
  if ((t.startsWith('"') && t.endsWith('"')) || (t.startsWith("'") && t.endsWith("'"))) {
    return t.slice(1, -1);
  }
  throw new Error(`expected quoted string, got: ${raw}`);
};

const parseStringArray = (raw: string): string[] => {
  // Expect `[ "a", "b", ... ]` on a single line.
  const t = raw.trim();
  if (!t.startsWith("[") || !t.endsWith("]")) {
    throw new Error(`expected [..] array, got: ${raw}`);
  }
  const inner = t.slice(1, -1).trim();
  if (inner === "") return [];
  return inner.split(",").map((s) => parseStringValue(s.trim()));
};

const parseInlineTable = (raw: string): Record<string, string> => {
  // Expect `{ key = "value", key2 = "value2" }` on one line.
  const t = raw.trim();
  if (!t.startsWith("{") || !t.endsWith("}")) {
    throw new Error(`expected inline table, got: ${raw}`);
  }
  const inner = t.slice(1, -1).trim();
  const out: Record<string, string> = {};
  if (inner === "") return out;
  // Split on commas that aren't inside a quoted string. Our subset
  // doesn't use quoted commas, so a plain split is enough.
  for (const pair of inner.split(",")) {
    const eq = pair.indexOf("=");
    if (eq === -1) throw new Error(`malformed inline-table entry: ${pair}`);
    const key = pair.slice(0, eq).trim();
    const value = pair.slice(eq + 1).trim();
    out[key] = parseStringValue(value);
  }
  return out;
};

interface ParseState {
  sections: Map<string, Map<string, unknown>>;
}

export const parseProfileToml = (text: string): ProfileSpec => {
  const state: ParseState = { sections: new Map() };
  const top = new Map<string, unknown>();
  state.sections.set("__top__", top);

  // Pre-process: join continuation lines for multi-line arrays of
  // inline tables. Walk by char-level bracket tracking — when '[' is
  // open in a value, keep accumulating until the matching ']' arrives.
  const physLines = text.split("\n");
  const logical: string[] = [];
  let buf = "";
  let depth = 0;
  for (const raw of physLines) {
    const line = stripComment(raw);
    if (depth === 0) {
      if (buf === "") buf = line; else buf += " " + line;
    } else {
      buf += " " + line;
    }
    for (const c of line) {
      if (c === "[" || c === "{") depth++;
      else if (c === "]" || c === "}") depth--;
    }
    if (depth <= 0) {
      depth = 0;
      logical.push(buf);
      buf = "";
    }
  }
  if (buf.trim() !== "") logical.push(buf);

  let currentSection = "__top__";
  for (const raw of logical) {
    const line = raw.trim();
    if (line === "") continue;
    if (line.startsWith("[") && line.endsWith("]")) {
      currentSection = line.slice(1, -1).trim();
      if (!state.sections.has(currentSection)) {
        state.sections.set(currentSection, new Map());
      }
      continue;
    }
    const eq = line.indexOf("=");
    if (eq === -1) throw new Error(`unparseable line: ${line}`);
    const key = line.slice(0, eq).trim();
    const valueRaw = line.slice(eq + 1).trim();
    let value: unknown;
    if (valueRaw.startsWith("[") && valueRaw.endsWith("]")) {
      // Array — string array or array of inline tables. Peek at the
      // first non-bracket char inside.
      const inner = valueRaw.slice(1, -1).trim();
      if (inner.startsWith("{")) {
        // Array of inline tables. Split on commas at depth 0.
        const tables: Array<Record<string, string>> = [];
        let cur = "";
        let d = 0;
        for (const c of inner) {
          if (c === "{") d++;
          if (c === "}") d--;
          if (c === "," && d === 0) {
            tables.push(parseInlineTable(cur));
            cur = "";
          } else {
            cur += c;
          }
        }
        if (cur.trim() !== "") tables.push(parseInlineTable(cur));
        value = tables;
      } else {
        value = parseStringArray(valueRaw);
      }
    } else {
      value = parseStringValue(valueRaw);
    }
    state.sections.get(currentSection)!.set(key, value);
  }

  // Now assemble ProfileSpec.
  const samaVersion = top.get("sama_version") as string | undefined;
  const profile = top.get("profile") as string | undefined;
  if (typeof samaVersion !== "string" || typeof profile !== "string") {
    throw new Error("profile must declare `sama_version` and `profile` at the top level");
  }

  // v2.1 optional dialect flags — see /sama/v2 §6.1–6.3 and the
  // ProfileSpec comment in a31_sama_v2.ts. Absent ≡ v2.0 defaults.
  const validateEnum = <T extends string>(
    fieldName: string,
    raw: unknown,
    allowed: readonly T[],
  ): T | undefined => {
    if (raw === undefined) return undefined;
    if (typeof raw !== "string") {
      throw new Error(
        `profile field \`${fieldName}\` must be a string, got: ${typeof raw}`,
      );
    }
    if (!(allowed as readonly string[]).includes(raw)) {
      const allowedQuoted = allowed.map((v) => JSON.stringify(v)).join(", ");
      throw new Error(
        `profile field \`${fieldName}\` has invalid value ${JSON.stringify(raw)} ` +
          `(expected one of: ${allowedQuoted}). ` +
          `See /sama/v2 §6 for the v2.1 dialect set.`,
      );
    }
    return raw as T;
  };

  const layout = validateEnum<ProfileLayout>(
    "layout",
    top.get("layout"),
    PROFILE_LAYOUT_VALUES,
  );
  const tests = validateEnum<ProfileTests>(
    "tests",
    top.get("tests"),
    PROFILE_TESTS_VALUES,
  );
  const atomicExemption = validateEnum<ProfileAtomicExemption>(
    "atomic_exemption",
    top.get("atomic_exemption"),
    PROFILE_ATOMIC_EXEMPTION_VALUES,
  );

  const buildLayer = (k: LayerNumber): LayerSpec => {
    const sec = state.sections.get(`layers.${k}`);
    if (!sec) {
      throw new Error(`profile is missing required section [layers.${k}]`);
    }
    const sublayersRaw = sec.get("sublayers") as Array<Record<string, string>> | undefined;
    const prefixes = sec.get("prefixes") as string[] | undefined;
    const subs: Sublayer[] = [];
    if (sublayersRaw && sublayersRaw.length > 0) {
      sublayersRaw.forEach((row, index) => {
        if (!row.name || !row.prefix) {
          throw new Error(`[layers.${k}] sublayer ${index} missing name/prefix`);
        }
        subs.push({ name: row.name, prefix: row.prefix, index });
      });
    } else if (prefixes && prefixes.length > 0) {
      prefixes.forEach((prefix, index) => {
        subs.push({ name: "default", prefix, index });
      });
    } else {
      // Empty layer is permitted (spec §2.1: "Leave a canonical layer
      // empty"). The verifier just won't assign any file to it.
    }
    return { sublayers: subs };
  };

  return {
    samaVersion,
    profile,
    ...(layout !== undefined ? { layout } : {}),
    ...(tests !== undefined ? { tests } : {}),
    ...(atomicExemption !== undefined ? { atomicExemption } : {}),
    layers: {
      0: buildLayer(0),
      1: buildLayer(1),
      2: buildLayer(2),
      3: buildLayer(3),
    },
  };
};

// — Filesystem I/O --------------------------------------------------

const REPO_ROOT_GUESS = process.cwd();

export const loadProfile = async (
  repoRoot: string = REPO_ROOT_GUESS,
): Promise<ProfileSpec> => {
  const path = resolve(repoRoot, "sama.profile.toml");
  const text = await Bun.file(path).text();
  return parseProfileToml(text);
};

// Walk src/ and read every .ts (sources + test siblings) into a map
// keyed by repo-relative path ("src/cXX_*.ts").
export const loadRepoFiles = (
  repoRoot: string = REPO_ROOT_GUESS,
): Map<string, string> => {
  const srcDir = resolve(repoRoot, "src");
  const out = new Map<string, string>();
  const entries = readdirSync(srcDir, { withFileTypes: true });
  for (const e of entries) {
    if (!e.isFile() || !e.name.endsWith(".ts")) continue;
    const repoPath = `src/${e.name}`;
    out.set(repoPath, readFileSync(resolve(srcDir, e.name), "utf8"));
  }
  return out;
};

// Convenience: composes loadProfile + loadRepoFiles into the
// SamaV2Input the verifier consumes. Handler code calls this then
// passes the result straight to verifySamaV2.
export const buildSamaV2Input = async (
  repoRoot: string = REPO_ROOT_GUESS,
): Promise<SamaV2Input> => ({
  profile: await loadProfile(repoRoot),
  files: loadRepoFiles(repoRoot),
});