syntaxai/tdd.md · main · src / c14_rust_graph_depth.ts

c14_rust_graph_depth.ts 391 lines · 13309 bytes raw
// c14 — adapter: builds a workspace-crate dependency DAG for a Cargo
// workspace rooted at a given path, then computes graphDepth via the
// pure helper in b32_graph_depth_polyglot.ts.
//
// Module-granularity per /sama/v2 §5 (operational) — see the comment
// at the top of b32_graph_depth_polyglot.ts. The TS metric works at
// file level; Go's natural unit is the package directory; Rust's
// natural unit is the CRATE (Cargo workspace member). graphDepth
// here = longest path through the workspace-internal crate
// dependency graph.
//
// Algorithm:
//   1. Read <root>/Cargo.toml.
//   2. Identify workspace members:
//      - From [workspace] members = [...]  — explicit list.
//      - If the root also has [package], the root itself is a
//        workspace member (a "regular workspace with root crate",
//        as ripgrep is — vs a "virtual workspace" where the root
//        has only [workspace]).
//   3. For each workspace member, read its own Cargo.toml. Get its
//      crate name from [package] name = "...".
//   4. Parse the member's [dependencies] (and [dev-dependencies]?
//      — no: graphDepth is about production deps, dev-deps are not
//      part of the runtime DAG). For each dep:
//        - If `path = "../foo"` or `path = "crates/foo"` → resolve
//          to a directory and match it to a workspace-member dir.
//        - If `workspace = true` → look it up in the root's
//          [workspace.dependencies] map; if THAT has `path = "..."`,
//          it's a workspace-internal dep.
//        - Otherwise it's an external crate (crates.io) and excluded.
//   5. Edges = (importing-crate-name → imported-crate-name).
//   6. Pass to computeGraphDepth.
//
// The TOML subset parsed here is the same shape c14_sama_profile.ts
// handles for sama.profile.toml: string values, string arrays, and
// the dotted-section + inline-table forms Cargo manifests use. This
// adapter has its own scoped parser to avoid coupling the SAMA
// profile parser to Cargo's idioms.

import { readFileSync, statSync } from "node:fs";
import { dirname, resolve } from "node:path";
import {
  computeGraphDepth,
  type GraphDepthResult,
} from "./b32_graph_depth_polyglot.ts";

// — Tiny TOML parser sufficient for Cargo.toml structure ----------

type TomlValue = string | string[] | Record<string, string>;

interface TomlDoc {
  sections: Map<string, Map<string, TomlValue>>;
}

const stripComment = (line: string): string => {
  // Cargo manifests don't put '#' inside strings used here.
  const idx = line.indexOf("#");
  return idx === -1 ? line : line.slice(0, idx);
};

const parseInlineTableLoose = (raw: string): Record<string, string> => {
  // `{ version = "0.4", path = "crates/x", workspace = true }`
  const t = raw.trim();
  if (!t.startsWith("{") || !t.endsWith("}")) return {};
  const inner = t.slice(1, -1).trim();
  const out: Record<string, string> = {};
  if (inner === "") return out;
  // Split on commas not inside quotes.
  const parts: string[] = [];
  let cur = "";
  let inStr = false;
  let quote = "";
  for (const ch of inner) {
    if (inStr) {
      cur += ch;
      if (ch === quote) inStr = false;
      continue;
    }
    if (ch === '"' || ch === "'") {
      inStr = true;
      quote = ch;
      cur += ch;
      continue;
    }
    if (ch === ",") {
      parts.push(cur);
      cur = "";
      continue;
    }
    cur += ch;
  }
  if (cur.trim() !== "") parts.push(cur);

  for (const p of parts) {
    const eq = p.indexOf("=");
    if (eq === -1) continue;
    const key = p.slice(0, eq).trim();
    const rawVal = p.slice(eq + 1).trim();
    if ((rawVal.startsWith('"') && rawVal.endsWith('"')) || (rawVal.startsWith("'") && rawVal.endsWith("'"))) {
      out[key] = rawVal.slice(1, -1);
    } else if (rawVal === "true" || rawVal === "false") {
      out[key] = rawVal;
    } else {
      // numbers, etc — store raw stringified
      out[key] = rawVal;
    }
  }
  return out;
};

export const parseCargoToml = (text: string): TomlDoc => {
  const sections = new Map<string, Map<string, TomlValue>>();
  sections.set("__top__", new Map());

  // Stitch multi-line array values (`members = [\n  "a",\n  "b",\n]`).
  const physLines = text.split("\n");
  const logical: string[] = [];
  let buf = "";
  let arrayDepth = 0;
  let inlineDepth = 0;
  for (const raw of physLines) {
    const line = stripComment(raw);
    buf = buf === "" ? line : buf + " " + line;
    for (const c of line) {
      if (c === "[") arrayDepth++;
      else if (c === "]") arrayDepth--;
      else if (c === "{") inlineDepth++;
      else if (c === "}") inlineDepth--;
    }
    // A line that starts with `[` and ends with `]` and has 0 depth
    // is a section header — but only if the whole bracketed string
    // is the line, otherwise it's an array literal mid-line.
    if (arrayDepth <= 0 && inlineDepth <= 0) {
      arrayDepth = 0;
      inlineDepth = 0;
      logical.push(buf);
      buf = "";
    }
  }
  if (buf.trim() !== "") logical.push(buf);

  let currentSection = "__top__";
  const headerRe = /^\s*\[\s*([^\[\]]+)\s*\]\s*$/;          // [table]
  const arrayHeaderRe = /^\s*\[\[\s*([^\[\]]+)\s*\]\]\s*$/; // [[array-of-tables]]
  for (const rawLogical of logical) {
    const line = rawLogical.trim();
    if (line === "") continue;
    const ah = arrayHeaderRe.exec(line);
    if (ah) {
      // Array-of-tables (e.g. [[bin]], [[test]]). We don't merge
      // multiple entries — we just route them to a unique scratch
      // section so their key=value lines don't pollute the
      // previous [table] (notably [package]).
      const base = ah[1]!.trim();
      let i = 0;
      let key = `__arrtable__${base}_${i}`;
      while (sections.has(key)) { i++; key = `__arrtable__${base}_${i}`; }
      currentSection = key;
      sections.set(currentSection, new Map());
      continue;
    }
    const hm = headerRe.exec(line);
    if (hm) {
      currentSection = hm[1]!.trim();
      if (!sections.has(currentSection)) {
        sections.set(currentSection, new Map());
      }
      continue;
    }
    const eq = line.indexOf("=");
    if (eq === -1) continue;
    const key = line.slice(0, eq).trim();
    const rawVal = line.slice(eq + 1).trim();
    let value: TomlValue;
    if (rawVal.startsWith("[") && rawVal.endsWith("]")) {
      // Array. Cargo's [workspace] members = ["crates/x", "crates/y"]
      // form is what we need; other array shapes are skipped.
      const inner = rawVal.slice(1, -1).trim();
      if (inner === "") value = [];
      else {
        // Split commas at depth 0.
        const parts: string[] = [];
        let cur = "";
        let depth = 0;
        let inStr = false;
        let quote = "";
        for (const ch of inner) {
          if (inStr) {
            cur += ch;
            if (ch === quote) inStr = false;
            continue;
          }
          if (ch === '"' || ch === "'") {
            inStr = true;
            quote = ch;
            cur += ch;
            continue;
          }
          if (ch === "[" || ch === "{") depth++;
          else if (ch === "]" || ch === "}") depth--;
          if (ch === "," && depth === 0) {
            parts.push(cur);
            cur = "";
            continue;
          }
          cur += ch;
        }
        if (cur.trim() !== "") parts.push(cur);
        const strings: string[] = [];
        for (const p of parts) {
          const t = p.trim();
          if ((t.startsWith('"') && t.endsWith('"')) || (t.startsWith("'") && t.endsWith("'"))) {
            strings.push(t.slice(1, -1));
          }
        }
        value = strings;
      }
    } else if (rawVal.startsWith("{")) {
      value = parseInlineTableLoose(rawVal);
    } else if ((rawVal.startsWith('"') && rawVal.endsWith('"')) || (rawVal.startsWith("'") && rawVal.endsWith("'"))) {
      value = rawVal.slice(1, -1);
    } else {
      // bool / number / unknown — store raw
      value = rawVal;
    }
    sections.get(currentSection)!.set(key, value);
  }
  return { sections };
};

// — Adapter logic --------------------------------------------------

interface WorkspaceMember {
  name: string;        // crate name from its own [package] name
  dir: string;         // repo-relative directory of its Cargo.toml
  toml: TomlDoc;
}

const isStringArray = (v: TomlValue | undefined): v is string[] =>
  Array.isArray(v) && v.every((x) => typeof x === "string");

const isInlineTable = (v: TomlValue | undefined): v is Record<string, string> =>
  typeof v === "object" && !Array.isArray(v) && v !== null;

const collectWorkspaceMembers = (
  root: string,
  rootToml: TomlDoc,
): WorkspaceMember[] => {
  const out: WorkspaceMember[] = [];

  // Explicit workspace members.
  const ws = rootToml.sections.get("workspace");
  const memberDirs: string[] = [];
  if (ws) {
    const members = ws.get("members");
    if (isStringArray(members)) {
      for (const m of members) memberDirs.push(m);
    }
  }

  for (const md of memberDirs) {
    const memberToml = resolve(root, md, "Cargo.toml");
    let text: string;
    try {
      text = readFileSync(memberToml, "utf8");
    } catch {
      continue;
    }
    const parsed = parseCargoToml(text);
    const pkg = parsed.sections.get("package");
    if (!pkg) continue;
    const name = pkg.get("name");
    if (typeof name !== "string") continue;
    out.push({ name, dir: md, toml: parsed });
  }

  // If the root itself has [package], the root is also a workspace
  // member (regular workspace with root crate — ripgrep's shape).
  const rootPkg = rootToml.sections.get("package");
  if (rootPkg) {
    const name = rootPkg.get("name");
    if (typeof name === "string") {
      out.push({ name, dir: ".", toml: rootToml });
    }
  }
  return out;
};

const collectWorkspaceDependencies = (
  rootToml: TomlDoc,
): Map<string, Record<string, string>> => {
  // [workspace.dependencies] section: maps dep-name → inline-table
  // or string-version. When `workspace = true` is used in a member,
  // we look here to see if that name maps to a workspace-internal
  // crate (i.e. has a `path = "..."`).
  const out = new Map<string, Record<string, string>>();
  const sec = rootToml.sections.get("workspace.dependencies");
  if (!sec) return out;
  for (const [k, v] of sec) {
    if (isInlineTable(v)) out.set(k, v);
    else if (typeof v === "string") out.set(k, { version: v });
  }
  return out;
};

const memberHasInternalDep = (
  member: WorkspaceMember,
  depName: string,
  depSpec: TomlValue,
  byName: Map<string, WorkspaceMember>,
  workspaceDeps: Map<string, Record<string, string>>,
): string | null => {
  // Returns the workspace-member name this dep resolves to, or null.

  // Case A: inline table with path = "..."
  if (isInlineTable(depSpec)) {
    if (depSpec.path) {
      // Path resolves relative to the importing member's dir.
      // We don't need the absolute resolution — just need to
      // identify which workspace member it points at. Match by
      // dep NAME (since path-style internal deps in Cargo usually
      // name the dep the same as its crate name).
      if (byName.has(depName)) return depName;
    }
    if (depSpec.workspace === "true") {
      const ws = workspaceDeps.get(depName);
      if (ws && ws.path) {
        if (byName.has(depName)) return depName;
      }
    }
  }
  // Case B: string version-only (external crate) → not internal.
  // Case C: `dep = { workspace = true }` already handled above.
  return null;
};

export interface RustGraphDepthResult extends GraphDepthResult {
  language: "rust";
  workspaceName: string;
}

export const computeRustGraphDepth = (repoRoot: string): RustGraphDepthResult => {
  const root = resolve(repoRoot);
  const rootStat = statSync(root);
  if (!rootStat.isDirectory()) {
    throw new Error(`expected a directory, got: ${repoRoot}`);
  }
  const rootCargo = readFileSync(resolve(root, "Cargo.toml"), "utf8");
  const rootToml = parseCargoToml(rootCargo);
  const rootPkg = rootToml.sections.get("package");
  const workspaceName = (rootPkg && typeof rootPkg.get("name") === "string"
    ? (rootPkg.get("name") as string)
    : (() => {
        // virtual workspace — use the directory name.
        const segs = root.split("/");
        return segs[segs.length - 1] ?? "workspace";
      })());

  const members = collectWorkspaceMembers(root, rootToml);
  const byName = new Map<string, WorkspaceMember>();
  for (const m of members) byName.set(m.name, m);

  const workspaceDeps = collectWorkspaceDependencies(rootToml);

  // Build edges: for each member, scan its [dependencies] entries.
  const nodes = members.map((m) => m.name);
  const edges: Array<[string, string]> = [];
  const seen = new Set<string>();

  for (const m of members) {
    const deps = m.toml.sections.get("dependencies");
    if (!deps) continue;
    for (const [depName, depSpec] of deps) {
      const target = memberHasInternalDep(m, depName, depSpec, byName, workspaceDeps);
      if (target === null) continue;
      if (target === m.name) continue;
      const key = `${m.name} ${target}`;
      if (seen.has(key)) continue;
      seen.add(key);
      edges.push([m.name, target]);
    }
  }

  const result = computeGraphDepth({ nodes, edges });
  return {
    ...result,
    language: "rust",
    workspaceName,
  };
};