ddd862b1e2a436f9e4ba6e8d6609638a87e60f1b diff --git a/content/blog/sama-v2-go-project-dive.md b/content/blog/sama-v2-go-project-dive.md index fbd77a172d1e9bdd7e8f9787a01e0aa7f42d979a..c1606c03b106b652e7cd533fa8a0bee99b4a522c 100644 --- a/content/blog/sama-v2-go-project-dive.md +++ b/content/blog/sama-v2-go-project-dive.md @@ -146,7 +146,7 @@ Derives from Law. No file's declared layer is contradicted by what it imports. | metric | `dive` (Go) | WP plugin (PHP, estimated) | tdd.md (TS, measured) | |---|---|---|---| | §4 checks passing | ~5 / 7 (estimated) | 0 / 7 | 7 / 7 | -| graphDepth | ~5 (estimated; cmd → command → ui → dive → filetree → internal/utils) | ~3 | 7 | +| **graphDepth** | **12 (measured, [dive@d6c69194](https://github.com/wagoodman/dive/commit/d6c691947f8fda635c952a17ee3b7555379d58f0))** — originally estimated ~5 | ~3 | 7 | | boundaryRatio | ~85% (estimated; one borderline case in `options/ci.go`) | <10% | 100% | | **workingSetFit (50–500 LOC)** | **52.17% (measured, [dive@d6c69194](https://github.com/wagoodman/dive/commit/d6c691947f8fda635c952a17ee3b7555379d58f0))** — originally estimated ~80% | ~47% | 80% (measured) | | violationCounts (sum) | ~30 (estimated; mostly Modeled-tests gaps) | 17+ | 0 | @@ -159,6 +159,16 @@ The distribution explains it: **44 files under 50 LOC** (mostly small type-only The original ~80% estimate was wrong, and wrong in a direction casual eyeballing wouldn't catch — counting visible-on-the-screen files isn't the same as counting them and applying a band filter. That 28-point miss between estimate and measurement is itself the empirical case for the metric existing at all: the metric surfaces a property the human estimate missed. +### graphDepth, measured: 12 (originally estimated ~5) + +The polyglot graphDepth emitter at [`scripts/measure-graph-depth.ts`](/GIT/syntaxai/tdd.md/blob/main/scripts/measure-graph-depth.ts) walks `dive`'s [`go.mod`](https://github.com/wagoodman/dive/blob/d6c691947f8fda635c952a17ee3b7555379d58f0/go.mod), collects every `.go` file's imports, filters to intra-module imports (those starting with `github.com/wagoodman/dive/`), aggregates them per-package-directory, and computes the longest path. The result for `dive@d6c69194`: **27 package directories, 80 internal edges, longest dependency chain of depth 12**. + +A 12-deep import chain is more than twice the audit's eyeball estimate of ~5. The estimate was wrong because I was thinking in *top-level package categories* (`cmd`, `command`, `ui`, `dive`, `filetree`, `internal/utils` — six things), but the actual Go package graph treats each subdirectory as its own package. `cmd/dive/cli/internal/ui/v1/viewmodel` is a different package from `cmd/dive/cli/internal/ui/v1/view`, even though they read like one category to a human; the import graph sees them as distinct hops. The 12-deep chain weaves through subdirectories the human-readable description folded into one bullet. + +This is the same shape of finding as the workingSetFit one above: the *metric* sees the structure; the *eye* sees the categories. Both are useful, but only the metric is mechanically comparable across repos. + +Module-granularity note: the polyglot graphDepth metric counts at the Go package-directory level — multiple `.go` files in one directory share their package and therefore their imports. This is the natural Go analog to the TS file-level metric (TS one module ≈ one file; Go one package ≈ one directory). The semantic is documented in [`src/b32_graph_depth_polyglot.ts`](/GIT/syntaxai/tdd.md/blob/main/src/b32_graph_depth_polyglot.ts). + ## What `dive` would look like at 7/7 — the last 30% Far less work than the WordPress refactor sketch from earlier. Three concrete changes get from ~5/7 to 7/7: diff --git a/content/blog/sama-v2-rust-project-ripgrep.md b/content/blog/sama-v2-rust-project-ripgrep.md index 62c0dd00aa75b5a0cda0dc0218e4cb8b655673bf..189a58165c47a0afc6a8dd9b12834ebf8b352792 100644 --- a/content/blog/sama-v2-rust-project-ripgrep.md +++ b/content/blog/sama-v2-rust-project-ripgrep.md @@ -150,7 +150,7 @@ Derives from Law on the same edge set. | metric | ripgrep | dive (Go) | tdd.md (TS, measured) | WP plugin (PHP) | |---|---|---|---|---| | §4 checks passing | ~3/7 strict, ~5/7 under v2.1 dialects (estimated) | ~5/7 (estimated) | 7/7 ✓ | 0/7 | -| graphDepth | ~5 estimated (matcher → engine → searcher → printer → core) | ~5 (estimated) | 7 | ~3 | +| **graphDepth** | **5 (measured, [ripgrep@4519153e](https://github.com/BurntSushi/ripgrep/commit/4519153e5e461527f4bca45b042fff45c4ec6fb9))** — originally estimated ~5, confirmed exactly | **12 (measured, [dive@d6c69194](https://github.com/wagoodman/dive/commit/d6c691947f8fda635c952a17ee3b7555379d58f0))** — originally estimated ~5 | 7 | ~3 | | boundaryRatio | ~95% (estimated) | ~85% (estimated) | 100% | <10% | | **workingSetFit (50–500 LOC)** | **54.00% (measured, [ripgrep@4519153e](https://github.com/BurntSushi/ripgrep/commit/4519153e5e461527f4bca45b042fff45c4ec6fb9))** — originally estimated ~60% | **52.17% (measured, [dive@d6c69194](https://github.com/wagoodman/dive/commit/d6c691947f8fda635c952a17ee3b7555379d58f0))** — originally estimated ~80% | 80% | ~47% | | violationCounts (sum) | ~50 estimated (Atomic + Modeled-tests under sibling-rule) | ~30 (estimated) | 0 | 17+ | @@ -163,6 +163,33 @@ The cross-repo comparison the measurement makes possible is more interesting tha This is exactly the §5 intent. The metric surfaces a property; whether that property is good or bad depends on what the file content *should be*. Compliance scores conflate the two; metrics keep them separate. +### graphDepth, measured: 5 (originally estimated ~5 — confirmed exactly) + +The polyglot graphDepth emitter at [`scripts/measure-graph-depth.ts`](/GIT/syntaxai/tdd.md/blob/main/scripts/measure-graph-depth.ts) reads `ripgrep`'s root [`Cargo.toml`](https://github.com/BurntSushi/ripgrep/blob/4519153e5e461527f4bca45b042fff45c4ec6fb9/Cargo.toml), identifies workspace members + the root crate, parses each member's `[dependencies]` section (production deps only — `[dev-dependencies]` excluded from the runtime DAG), filters to workspace-internal deps (`path = "../foo"` or `workspace = true` cross-referenced against `[workspace.dependencies]`), and computes the longest crate-level chain. The result for `ripgrep@4519153e`: **10 workspace crates, 15 internal edges, longest dependency chain of depth 5**. + +**Hand-trace** (auditable per [/sama/v2 §0](/sama/v2)). The 10 workspace crates and their internal edges, extracted from `crates/*/Cargo.toml`: + +| crate | internal deps | +|---|---| +| `ripgrep` (root, binary `rg`) | `grep`, `ignore` | +| `grep` (meta-crate) | `grep-cli`, `grep-matcher`, `grep-pcre2`, `grep-printer`, `grep-regex`, `grep-searcher` | +| `grep-cli` | `globset` | +| `grep-matcher` | *(none — pure trait crate, the abstraction at the bottom)* | +| `grep-pcre2` | `grep-matcher` | +| `grep-regex` | `grep-matcher` | +| `grep-searcher` | `grep-matcher` | +| `grep-printer` | `grep-matcher`, `grep-searcher` | +| `ignore` | `globset` | +| `globset` | *(none — leaf crate)* | + +**15 edges total** (count: 2 + 6 + 1 + 0 + 1 + 1 + 1 + 2 + 1 + 0 = 15 ✓). + +The longest path: **`ripgrep → grep → grep-printer → grep-searcher → grep-matcher`** — five crates, depth 5. Multiple paths reach depth 5 (e.g. `ripgrep → grep → grep-pcre2 → grep-matcher` is only depth 4; `ripgrep → grep → grep-searcher → grep-matcher` is depth 4; the printer-via-searcher chain is what wins). The audit's original estimate "(matcher → engine → searcher → printer → core)" turns out to describe the same chain reading bottom-up: `matcher ← searcher ← printer ← grep ← ripgrep`. Same five nodes, same depth, confirmed by measurement. + +Module-granularity note: the polyglot graphDepth metric counts at the Rust crate level — each Cargo workspace member is one node. This is the natural Rust analog to the TS file-level metric (TS one module ≈ one file; Rust one module ≈ one crate). Semantic documented in [`src/b32_graph_depth_polyglot.ts`](/GIT/syntaxai/tdd.md/blob/main/src/b32_graph_depth_polyglot.ts). + +The contrast with `dive`'s measured depth 12 is itself interesting: ripgrep's crate-level graph is *flatter* than dive's package-directory graph, even though both are mature CLI codebases. Some of that is genuine — ripgrep's workspace is 10 crates organized as a clean DAG; dive's 27 package directories include many subdirectory hops that drive the chain longer. Some is granularity: a Rust crate often contains what a Go developer would split into multiple package directories. The two depths aren't directly comparable for "which codebase is deeper"; they ARE directly comparable as "graphDepth at each language's natural module unit," which is the spec's intent. + ## What a rebuilt ripgrep would look like — the small version **For the full parallel-architecture sketch — every layer, every file move, predicted §5 metrics, the rebuilt `sama.profile.toml`, and concrete Rust code samples for the two file splits — see the companion post: [`ripgrep`, rebuilt under SAMA v2](/blog/sama-v2-rust-project-ripgrep-rebuilt).** diff --git a/scripts/measure-graph-depth.ts b/scripts/measure-graph-depth.ts new file mode 100644 index 0000000000000000000000000000000000000000..fbb4a1622793cec0242c919686045400897820ad --- /dev/null +++ b/scripts/measure-graph-depth.ts @@ -0,0 +1,63 @@ +#!/usr/bin/env bun +// measure-graph-depth — CLI for the §5 polyglot graphDepth metric. +// Given a path to a checked-out Go module or Rust Cargo workspace, +// emit the measured longest dependency chain as JSON to stdout. +// +// Usage: +// bun scripts/measure-graph-depth.ts --lang go +// bun scripts/measure-graph-depth.ts --lang rust +// +// Module-granularity per language: Go = package directory (multiple +// .go files in one directory share imports); Rust = crate (Cargo +// workspace member). See /sama/v2 §5 (operational) and the source +// comment at the top of src/b32_graph_depth_polyglot.ts. + +import { computeGoGraphDepth } from "../src/c14_go_graph_depth.ts"; +import { computeRustGraphDepth } from "../src/c14_rust_graph_depth.ts"; + +const args = process.argv.slice(2); + +const usage = (): never => { + console.error( + "Usage: bun scripts/measure-graph-depth.ts --lang go|rust", + ); + process.exit(2); +}; + +if (args.length < 3) usage(); + +const repoPath = args[0]!; +let lang: "go" | "rust" | null = null; + +for (let i = 1; i < args.length; i++) { + const a = args[i]; + if (a === "--lang") { + const v = args[++i]; + if (v !== "go" && v !== "rust") { + console.error(`--lang must be "go" or "rust", got: ${v}`); + process.exit(2); + } + lang = v; + } else { + console.error(`unknown argument: ${a}`); + usage(); + } +} + +if (lang === null) usage(); + +const result = + lang === "go" ? computeGoGraphDepth(repoPath) : computeRustGraphDepth(repoPath); + +const output: Record = { + language: result.language, + repoPath, + ...(lang === "go" + ? { modulePath: (result as { modulePath: string }).modulePath } + : { workspaceName: (result as { workspaceName: string }).workspaceName }), + nodeCount: result.nodeCount, + edgeCount: result.edgeCount, + depth: result.depth, +}; + +console.log(JSON.stringify(output, null, 2)); diff --git a/src/b32_graph_depth_polyglot.test.ts b/src/b32_graph_depth_polyglot.test.ts new file mode 100644 index 0000000000000000000000000000000000000000..1879793f1397ca2ac7ced16a62631d501e9a3382 --- /dev/null +++ b/src/b32_graph_depth_polyglot.test.ts @@ -0,0 +1,137 @@ +import { describe, expect, test } from "bun:test"; +import { computeGraphDepth, type Graph } from "./b32_graph_depth_polyglot.ts"; + +// Mirror b32_sama_v2_metrics.test.ts graphDepth cases. Same algorithm +// (longest path in import DAG with bounded cycles), same edge-cases +// (empty, single node, linear chain, cycle, branching). The polyglot +// helper is allowed to be language-agnostic but the formula and +// cycle-handling must match the TS reference. + +describe("computeGraphDepth — empty + trivial", () => { + test("empty graph → 0 (matches TS metric on an empty file map)", () => { + const r = computeGraphDepth({ nodes: [], edges: [] }); + expect(r.depth).toBe(0); + expect(r.nodeCount).toBe(0); + expect(r.edgeCount).toBe(0); + }); + + test("single node, no edges → 1", () => { + const r = computeGraphDepth({ nodes: ["a"], edges: [] }); + expect(r.depth).toBe(1); + expect(r.nodeCount).toBe(1); + expect(r.edgeCount).toBe(0); + }); +}); + +describe("computeGraphDepth — linear chains", () => { + test("chain a → b → c → 3", () => { + const r = computeGraphDepth({ + nodes: ["a", "b", "c"], + edges: [["a", "b"], ["b", "c"]], + }); + expect(r.depth).toBe(3); + expect(r.edgeCount).toBe(2); + }); + + test("chain of 5 → 5 (matches the TS chain p3 → p2 → p1 → p0 case)", () => { + const r = computeGraphDepth({ + nodes: ["a", "b", "c", "d", "e"], + edges: [["a", "b"], ["b", "c"], ["c", "d"], ["d", "e"]], + }); + expect(r.depth).toBe(5); + }); +}); + +describe("computeGraphDepth — cycles are bounded", () => { + test("cycle of 2 (a → b → a) terminates with finite depth", () => { + const r = computeGraphDepth({ + nodes: ["a", "b"], + edges: [["a", "b"], ["b", "a"]], + }); + expect(Number.isFinite(r.depth)).toBe(true); + expect(r.depth).toBeGreaterThanOrEqual(1); + }); + + test("cycle of 3 (a → b → c → a) terminates with finite depth", () => { + const r = computeGraphDepth({ + nodes: ["a", "b", "c"], + edges: [["a", "b"], ["b", "c"], ["c", "a"]], + }); + expect(Number.isFinite(r.depth)).toBe(true); + expect(r.depth).toBeGreaterThanOrEqual(1); + }); + + test("self-loop is also bounded", () => { + const r = computeGraphDepth({ + nodes: ["a"], + edges: [["a", "a"]], + }); + expect(Number.isFinite(r.depth)).toBe(true); + }); +}); + +describe("computeGraphDepth — branching → longest path, not sum", () => { + test("a → {b, c} → d (diamond) → 3 (not 4)", () => { + const r = computeGraphDepth({ + nodes: ["a", "b", "c", "d"], + edges: [["a", "b"], ["a", "c"], ["b", "d"], ["c", "d"]], + }); + expect(r.depth).toBe(3); + }); + + test("two disjoint chains; longest wins", () => { + const r = computeGraphDepth({ + nodes: ["a", "b", "x", "y", "z"], + edges: [["a", "b"], ["x", "y"], ["y", "z"]], + }); + // chain a→b has length 2; chain x→y→z has length 3. + expect(r.depth).toBe(3); + }); + + test("two paths different lengths, max picks the longer", () => { + // a → b → c → d (4) and a → e (2). Longest path = 4. + const r = computeGraphDepth({ + nodes: ["a", "b", "c", "d", "e"], + edges: [["a", "b"], ["b", "c"], ["c", "d"], ["a", "e"]], + }); + expect(r.depth).toBe(4); + }); +}); + +describe("computeGraphDepth — edge filtering", () => { + test("edges referencing non-declared nodes are silently ignored", () => { + const r = computeGraphDepth({ + nodes: ["a", "b"], + edges: [["a", "b"], ["a", "external"], ["external", "a"]], + }); + // Only the a → b edge is between declared nodes. Depth = 2. + expect(r.depth).toBe(2); + expect(r.edgeCount).toBe(1); + }); +}); + +describe("computeGraphDepth — reproducibility", () => { + test("same input → identical output across two runs (deep-equal)", () => { + const g: Graph = { + nodes: ["a", "b", "c", "d"], + edges: [["a", "b"], ["b", "c"], ["c", "d"]], + }; + const r1 = computeGraphDepth(g); + const r2 = computeGraphDepth(g); + expect(r1).toEqual(r2); + }); + + test("edge order independence — same edges in different order → same depth", () => { + const r1 = computeGraphDepth({ + nodes: ["a", "b", "c"], + edges: [["a", "b"], ["b", "c"]], + }); + const r2 = computeGraphDepth({ + nodes: ["c", "a", "b"], + edges: [["b", "c"], ["a", "b"]], + }); + expect(r1.depth).toBe(r2.depth); + expect(r1.nodeCount).toBe(r2.nodeCount); + expect(r1.edgeCount).toBe(r2.edgeCount); + }); +}); diff --git a/src/b32_graph_depth_polyglot.ts b/src/b32_graph_depth_polyglot.ts new file mode 100644 index 0000000000000000000000000000000000000000..a57693a70a2005765ec64ce4dc0d599fa3a38d14 --- /dev/null +++ b/src/b32_graph_depth_polyglot.ts @@ -0,0 +1,89 @@ +// b32 — logic: §5 graphDepth metric for polyglot dependency graphs. +// Pure function, no I/O. Given a directed graph as nodes + edges, +// returns the longest path length using memoised DFS. Cycles are +// bounded (back-edge target treated as terminal of depth 1) so the +// function always terminates with a finite number, mirroring +// b32_sama_v2_metrics.ts's computeGraphDepth. +// +// Module-granularity note (per /sama/v2 §5 operational and the v2.1 +// dialects at /sama/v2#6a-v21-dialects-provisional): the TS metric +// works at FILE level because in TS one module ≈ one file. The +// natural cross-language analog is per-language: Go's unit is the +// PACKAGE DIRECTORY (multiple .go files in one directory all live in +// the same package and share imports); Rust's unit is the CRATE +// (Cargo workspace member). The depth measured here is the longest +// chain of dependency relationships at each language's natural unit. +// This semantic is documented in the adapter source comments +// (c14_go_graph_depth.ts, c14_rust_graph_depth.ts) and surfaced in +// the audit page hand-traces. +// +// Consumed by the two adapters which feed it a pre-built {nodes, +// edges} pair, keeping this module pure and unit-testable. + +export interface Graph { + // List of node identifiers (deduplicated by the adapter before + // calling). For Go: package-directory repo-relative paths. For + // Rust: workspace-crate names. The helper does not care which. + nodes: ReadonlyArray; + // Directed edges as [from, to]. The helper is forgiving: edges + // referencing nodes not in `nodes` are silently ignored (they + // can't extend a path through nodes the caller did not declare). + edges: ReadonlyArray; +} + +export interface GraphDepthResult { + nodeCount: number; + edgeCount: number; + depth: number; +} + +export const computeGraphDepth = (graph: Graph): GraphDepthResult => { + const nodeSet = new Set(graph.nodes); + if (nodeSet.size === 0) { + return { nodeCount: 0, edgeCount: 0, depth: 0 }; + } + + // Adjacency (only edges that connect declared nodes). + const adj = new Map(); + for (const n of nodeSet) adj.set(n, []); + let edgeCount = 0; + for (const [from, to] of graph.edges) { + if (!nodeSet.has(from) || !nodeSet.has(to)) continue; + adj.get(from)!.push(to); + edgeCount++; + } + + // Memoised DFS for longest path. Cycle handling matches + // b32_sama_v2_metrics.ts: a re-entered node returns depth 1 so the + // recursion terminates with a finite value (the Law check would + // flag a cycle separately; the metric still has to emit a number). + const memo = new Map(); + const visiting = new Set(); + + const depthFrom = (node: string): number => { + const cached = memo.get(node); + if (cached !== undefined) return cached; + if (visiting.has(node)) return 1; + visiting.add(node); + let best = 1; + for (const next of adj.get(node) ?? []) { + const d = depthFrom(next) + 1; + if (d > best) best = d; + } + visiting.delete(node); + memo.set(node, best); + return best; + }; + + let max = 0; + for (const n of nodeSet) { + const d = depthFrom(n); + if (d > max) max = d; + } + + return { + nodeCount: nodeSet.size, + edgeCount, + depth: max, + }; +}; diff --git a/src/c14_go_graph_depth.test.ts b/src/c14_go_graph_depth.test.ts new file mode 100644 index 0000000000000000000000000000000000000000..c111f7a1498b76b7920c8c2607dbda003dd04493 --- /dev/null +++ b/src/c14_go_graph_depth.test.ts @@ -0,0 +1,172 @@ +import { afterAll, beforeAll, describe, expect, test } from "bun:test"; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { resolve } from "node:path"; +import { + collectGoImports, + computeGoGraphDepth, + parseGoModulePath, +} from "./c14_go_graph_depth.ts"; + +const FIXTURE = mkdtempSync(resolve(tmpdir(), "tdd-md-go-graph-")); + +const writeFile = (rel: string, content: string): void => { + const abs = resolve(FIXTURE, rel); + mkdirSync(abs.split("/").slice(0, -1).join("/"), { recursive: true }); + writeFileSync(abs, content); +}; + +beforeAll(() => { + writeFile( + "go.mod", + `module github.com/example/fixture + +go 1.22 +`, + ); + // Three packages forming a chain entry → middle → leaf, plus + // some external imports we should NOT count. + writeFile( + "cmd/entry/main.go", + `package main + +import ( + "fmt" + "github.com/example/fixture/internal/middle" + "github.com/example/external/library" +) + +func main() { + fmt.Println(middle.X) + _ = library.Y +} +`, + ); + writeFile( + "internal/middle/middle.go", + `package middle + +import ( + "github.com/example/fixture/internal/leaf" +) + +var X = leaf.Z +`, + ); + writeFile( + "internal/leaf/leaf.go", + `package leaf + +var Z = 1 +`, + ); + // A test file that should be excluded. + writeFile( + "internal/leaf/leaf_test.go", + `package leaf + +import ( + "testing" + "github.com/example/fixture/internal/middle" +) + +func TestZ(t *testing.T) { _ = middle.X } +`, + ); + // A vendored file that should also be skipped. + writeFile( + "vendor/some/lib.go", + `package some + +import "github.com/example/fixture/cmd/entry" + +var _ = entry.X +`, + ); +}); + +afterAll(() => { + rmSync(FIXTURE, { recursive: true, force: true }); +}); + +describe("parseGoModulePath", () => { + test("extracts the module path from a typical go.mod", () => { + expect(parseGoModulePath('module github.com/x/y\n\ngo 1.22\n')) + .toBe('github.com/x/y'); + }); + + test("handles quoted module paths", () => { + expect(parseGoModulePath('module "github.com/x/y"\n')) + .toBe('github.com/x/y'); + }); + + test("throws when the go.mod has no module directive", () => { + expect(() => parseGoModulePath('go 1.22\n')).toThrow(/module/); + }); +}); + +describe("collectGoImports", () => { + test("single-line import", () => { + expect(collectGoImports('package x\n\nimport "fmt"\n')).toEqual(['fmt']); + }); + + test("block import", () => { + const imports = collectGoImports(`package x + +import ( + "fmt" + "strings" + "github.com/x/y" +) +`); + expect(imports).toEqual(['fmt', 'strings', 'github.com/x/y']); + }); + + test("aliased imports", () => { + const imports = collectGoImports(`package x + +import ( + myfmt "fmt" + _ "side-effect/pkg" +) +`); + expect(imports).toEqual(['fmt', 'side-effect/pkg']); + }); + + test("ignores commented-out imports", () => { + const imports = collectGoImports(`package x + +// import "ignored" +import "fmt" +`); + expect(imports).toEqual(['fmt']); + }); +}); + +describe("computeGoGraphDepth — end-to-end on fixture", () => { + test("entry → middle → leaf chain produces depth 3", () => { + const r = computeGoGraphDepth(FIXTURE); + expect(r.language).toBe('go'); + expect(r.modulePath).toBe('github.com/example/fixture'); + // Three intra-module package directories: cmd/entry, + // internal/middle, internal/leaf. (vendor/some excluded.) + expect(r.nodeCount).toBe(3); + // Two intra-module edges: cmd/entry → internal/middle, + // internal/middle → internal/leaf. (External and vendored + // edges excluded; the _test.go edge to middle excluded because + // _test.go files are skipped.) + expect(r.edgeCount).toBe(2); + expect(r.depth).toBe(3); + }); + + test("result echoes the modulePath so callers can audit", () => { + const r = computeGoGraphDepth(FIXTURE); + expect(r.modulePath).toBe('github.com/example/fixture'); + }); + + test("re-running on the same tree produces identical numbers", () => { + const a = computeGoGraphDepth(FIXTURE); + const b = computeGoGraphDepth(FIXTURE); + expect(a).toEqual(b); + }); +}); diff --git a/src/c14_go_graph_depth.ts b/src/c14_go_graph_depth.ts new file mode 100644 index 0000000000000000000000000000000000000000..73fc9661e8f8dfcbd551bd6518ef8623cc3efab5 Binary files /dev/null and b/src/c14_go_graph_depth.ts differ diff --git a/src/c14_rust_graph_depth.test.ts b/src/c14_rust_graph_depth.test.ts new file mode 100644 index 0000000000000000000000000000000000000000..b070505ab9294453d1c2dc2d73ffd9004f1684a9 --- /dev/null +++ b/src/c14_rust_graph_depth.test.ts @@ -0,0 +1,199 @@ +import { afterAll, beforeAll, describe, expect, test } from "bun:test"; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { resolve } from "node:path"; +import { + computeRustGraphDepth, + parseCargoToml, +} from "./c14_rust_graph_depth.ts"; + +const FIXTURE = mkdtempSync(resolve(tmpdir(), "tdd-md-rust-graph-")); + +const writeFile = (rel: string, content: string): void => { + const abs = resolve(FIXTURE, rel); + mkdirSync(abs.split("/").slice(0, -1).join("/"), { recursive: true }); + writeFileSync(abs, content); +}; + +beforeAll(() => { + // Fixture: a workspace with a root crate + three member crates. + // Dependency chain: root → middle → leaf, with `core` standalone. + // root (top) + // └─→ middle (path) + // └─→ leaf (workspace = true) + // core (no internal deps) + writeFile( + "Cargo.toml", + `[package] +name = "rootcrate" +version = "0.1.0" +edition = "2021" + +[workspace] +members = [ + "crates/middle", + "crates/leaf", + "crates/core", +] + +[workspace.dependencies] +leaf = { version = "0.1", path = "crates/leaf" } + +[dependencies] +middle = { version = "0.1", path = "crates/middle" } +serde = "1.0" +`, + ); + writeFile( + "crates/middle/Cargo.toml", + `[package] +name = "middle" +version = "0.1.0" +edition = "2021" + +[dependencies] +leaf = { workspace = true } +anyhow = "1.0" +`, + ); + writeFile( + "crates/leaf/Cargo.toml", + `[package] +name = "leaf" +version = "0.1.0" +edition = "2021" + +[dependencies] +log = "0.4" +`, + ); + writeFile( + "crates/core/Cargo.toml", + `[package] +name = "core" +version = "0.1.0" +edition = "2021" + +[dependencies] +log = "0.4" +`, + ); +}); + +afterAll(() => { + rmSync(FIXTURE, { recursive: true, force: true }); +}); + +describe("parseCargoToml", () => { + test("extracts a simple [package] name", () => { + const doc = parseCargoToml(`[package]\nname = "myseg"\nversion = "0.1.0"\n`); + const pkg = doc.sections.get("package"); + expect(pkg?.get("name")).toBe("myseg"); + }); + + test("extracts a multi-line workspace.members array", () => { + const doc = parseCargoToml(`[workspace] +members = [ + "crates/a", + "crates/b", +] +`); + const ws = doc.sections.get("workspace"); + expect(ws?.get("members")).toEqual(["crates/a", "crates/b"]); + }); + + test("parses an inline-table dependency spec", () => { + const doc = parseCargoToml(`[dependencies] +mydep = { version = "0.1", path = "crates/mydep" } +`); + const deps = doc.sections.get("dependencies"); + const spec = deps?.get("mydep") as Record; + expect(spec.version).toBe("0.1"); + expect(spec.path).toBe("crates/mydep"); + }); + + test("parses workspace = true dep style", () => { + const doc = parseCargoToml(`[dependencies] +foo = { workspace = true } +`); + const deps = doc.sections.get("dependencies"); + const spec = deps?.get("foo") as Record; + expect(spec.workspace).toBe("true"); + }); +}); + +describe("computeRustGraphDepth — end-to-end on fixture", () => { + test("root → middle → leaf chain produces depth 3", () => { + const r = computeRustGraphDepth(FIXTURE); + expect(r.language).toBe("rust"); + expect(r.workspaceName).toBe("rootcrate"); + // 4 crates: rootcrate, middle, leaf, core. + expect(r.nodeCount).toBe(4); + // 2 internal edges: rootcrate → middle, middle → leaf. + // (core has no internal deps; serde/anyhow/log are external.) + expect(r.edgeCount).toBe(2); + expect(r.depth).toBe(3); + }); + + test("standalone core crate doesn't contribute to longest path", () => { + const r = computeRustGraphDepth(FIXTURE); + // core is included as a node (depth-1 leaf) but does not extend + // the longest chain. Longest is still rootcrate → middle → leaf. + expect(r.depth).toBe(3); + }); + + test("re-running on the same workspace produces identical numbers", () => { + const a = computeRustGraphDepth(FIXTURE); + const b = computeRustGraphDepth(FIXTURE); + expect(a).toEqual(b); + }); +}); + +describe("computeRustGraphDepth — virtual workspace (no root [package])", () => { + const VW = mkdtempSync(resolve(tmpdir(), "tdd-md-rust-virtual-")); + + const writeVW = (rel: string, content: string): void => { + const abs = resolve(VW, rel); + mkdirSync(abs.split("/").slice(0, -1).join("/"), { recursive: true }); + writeFileSync(abs, content); + }; + + beforeAll(() => { + writeVW( + "Cargo.toml", + `[workspace] +members = ["crates/a", "crates/b"] +`, + ); + writeVW( + "crates/a/Cargo.toml", + `[package] +name = "a" +version = "0.1.0" +edition = "2021" + +[dependencies] +b = { path = "../b" } +`, + ); + writeVW( + "crates/b/Cargo.toml", + `[package] +name = "b" +version = "0.1.0" +edition = "2021" +`, + ); + }); + + afterAll(() => { + rmSync(VW, { recursive: true, force: true }); + }); + + test("virtual workspace: 2 crates, 1 edge, depth 2", () => { + const r = computeRustGraphDepth(VW); + expect(r.nodeCount).toBe(2); + expect(r.edgeCount).toBe(1); + expect(r.depth).toBe(2); + }); +}); diff --git a/src/c14_rust_graph_depth.ts b/src/c14_rust_graph_depth.ts new file mode 100644 index 0000000000000000000000000000000000000000..5864cdefa0fe9c244354db4d7b38e1138ca18921 --- /dev/null +++ b/src/c14_rust_graph_depth.ts @@ -0,0 +1,390 @@ +// c14 — adapter: builds a workspace-crate dependency DAG for a Cargo +// workspace rooted at a given path, then computes graphDepth via the +// pure helper in b32_graph_depth_polyglot.ts. +// +// Module-granularity per /sama/v2 §5 (operational) — see the comment +// at the top of b32_graph_depth_polyglot.ts. The TS metric works at +// file level; Go's natural unit is the package directory; Rust's +// natural unit is the CRATE (Cargo workspace member). graphDepth +// here = longest path through the workspace-internal crate +// dependency graph. +// +// Algorithm: +// 1. Read /Cargo.toml. +// 2. Identify workspace members: +// - From [workspace] members = [...] — explicit list. +// - If the root also has [package], the root itself is a +// workspace member (a "regular workspace with root crate", +// as ripgrep is — vs a "virtual workspace" where the root +// has only [workspace]). +// 3. For each workspace member, read its own Cargo.toml. Get its +// crate name from [package] name = "...". +// 4. Parse the member's [dependencies] (and [dev-dependencies]? +// — no: graphDepth is about production deps, dev-deps are not +// part of the runtime DAG). For each dep: +// - If `path = "../foo"` or `path = "crates/foo"` → resolve +// to a directory and match it to a workspace-member dir. +// - If `workspace = true` → look it up in the root's +// [workspace.dependencies] map; if THAT has `path = "..."`, +// it's a workspace-internal dep. +// - Otherwise it's an external crate (crates.io) and excluded. +// 5. Edges = (importing-crate-name → imported-crate-name). +// 6. Pass to computeGraphDepth. +// +// The TOML subset parsed here is the same shape c14_sama_profile.ts +// handles for sama.profile.toml: string values, string arrays, and +// the dotted-section + inline-table forms Cargo manifests use. This +// adapter has its own scoped parser to avoid coupling the SAMA +// profile parser to Cargo's idioms. + +import { readFileSync, statSync } from "node:fs"; +import { dirname, resolve } from "node:path"; +import { + computeGraphDepth, + type GraphDepthResult, +} from "./b32_graph_depth_polyglot.ts"; + +// — Tiny TOML parser sufficient for Cargo.toml structure ---------- + +type TomlValue = string | string[] | Record; + +interface TomlDoc { + sections: Map>; +} + +const stripComment = (line: string): string => { + // Cargo manifests don't put '#' inside strings used here. + const idx = line.indexOf("#"); + return idx === -1 ? line : line.slice(0, idx); +}; + +const parseInlineTableLoose = (raw: string): Record => { + // `{ version = "0.4", path = "crates/x", workspace = true }` + const t = raw.trim(); + if (!t.startsWith("{") || !t.endsWith("}")) return {}; + const inner = t.slice(1, -1).trim(); + const out: Record = {}; + if (inner === "") return out; + // Split on commas not inside quotes. + const parts: string[] = []; + let cur = ""; + let inStr = false; + let quote = ""; + for (const ch of inner) { + if (inStr) { + cur += ch; + if (ch === quote) inStr = false; + continue; + } + if (ch === '"' || ch === "'") { + inStr = true; + quote = ch; + cur += ch; + continue; + } + if (ch === ",") { + parts.push(cur); + cur = ""; + continue; + } + cur += ch; + } + if (cur.trim() !== "") parts.push(cur); + + for (const p of parts) { + const eq = p.indexOf("="); + if (eq === -1) continue; + const key = p.slice(0, eq).trim(); + const rawVal = p.slice(eq + 1).trim(); + if ((rawVal.startsWith('"') && rawVal.endsWith('"')) || (rawVal.startsWith("'") && rawVal.endsWith("'"))) { + out[key] = rawVal.slice(1, -1); + } else if (rawVal === "true" || rawVal === "false") { + out[key] = rawVal; + } else { + // numbers, etc — store raw stringified + out[key] = rawVal; + } + } + return out; +}; + +export const parseCargoToml = (text: string): TomlDoc => { + const sections = new Map>(); + sections.set("__top__", new Map()); + + // Stitch multi-line array values (`members = [\n "a",\n "b",\n]`). + const physLines = text.split("\n"); + const logical: string[] = []; + let buf = ""; + let arrayDepth = 0; + let inlineDepth = 0; + for (const raw of physLines) { + const line = stripComment(raw); + buf = buf === "" ? line : buf + " " + line; + for (const c of line) { + if (c === "[") arrayDepth++; + else if (c === "]") arrayDepth--; + else if (c === "{") inlineDepth++; + else if (c === "}") inlineDepth--; + } + // A line that starts with `[` and ends with `]` and has 0 depth + // is a section header — but only if the whole bracketed string + // is the line, otherwise it's an array literal mid-line. + if (arrayDepth <= 0 && inlineDepth <= 0) { + arrayDepth = 0; + inlineDepth = 0; + logical.push(buf); + buf = ""; + } + } + if (buf.trim() !== "") logical.push(buf); + + let currentSection = "__top__"; + const headerRe = /^\s*\[\s*([^\[\]]+)\s*\]\s*$/; // [table] + const arrayHeaderRe = /^\s*\[\[\s*([^\[\]]+)\s*\]\]\s*$/; // [[array-of-tables]] + for (const rawLogical of logical) { + const line = rawLogical.trim(); + if (line === "") continue; + const ah = arrayHeaderRe.exec(line); + if (ah) { + // Array-of-tables (e.g. [[bin]], [[test]]). We don't merge + // multiple entries — we just route them to a unique scratch + // section so their key=value lines don't pollute the + // previous [table] (notably [package]). + const base = ah[1]!.trim(); + let i = 0; + let key = `__arrtable__${base}_${i}`; + while (sections.has(key)) { i++; key = `__arrtable__${base}_${i}`; } + currentSection = key; + sections.set(currentSection, new Map()); + continue; + } + const hm = headerRe.exec(line); + if (hm) { + currentSection = hm[1]!.trim(); + if (!sections.has(currentSection)) { + sections.set(currentSection, new Map()); + } + continue; + } + const eq = line.indexOf("="); + if (eq === -1) continue; + const key = line.slice(0, eq).trim(); + const rawVal = line.slice(eq + 1).trim(); + let value: TomlValue; + if (rawVal.startsWith("[") && rawVal.endsWith("]")) { + // Array. Cargo's [workspace] members = ["crates/x", "crates/y"] + // form is what we need; other array shapes are skipped. + const inner = rawVal.slice(1, -1).trim(); + if (inner === "") value = []; + else { + // Split commas at depth 0. + const parts: string[] = []; + let cur = ""; + let depth = 0; + let inStr = false; + let quote = ""; + for (const ch of inner) { + if (inStr) { + cur += ch; + if (ch === quote) inStr = false; + continue; + } + if (ch === '"' || ch === "'") { + inStr = true; + quote = ch; + cur += ch; + continue; + } + if (ch === "[" || ch === "{") depth++; + else if (ch === "]" || ch === "}") depth--; + if (ch === "," && depth === 0) { + parts.push(cur); + cur = ""; + continue; + } + cur += ch; + } + if (cur.trim() !== "") parts.push(cur); + const strings: string[] = []; + for (const p of parts) { + const t = p.trim(); + if ((t.startsWith('"') && t.endsWith('"')) || (t.startsWith("'") && t.endsWith("'"))) { + strings.push(t.slice(1, -1)); + } + } + value = strings; + } + } else if (rawVal.startsWith("{")) { + value = parseInlineTableLoose(rawVal); + } else if ((rawVal.startsWith('"') && rawVal.endsWith('"')) || (rawVal.startsWith("'") && rawVal.endsWith("'"))) { + value = rawVal.slice(1, -1); + } else { + // bool / number / unknown — store raw + value = rawVal; + } + sections.get(currentSection)!.set(key, value); + } + return { sections }; +}; + +// — Adapter logic -------------------------------------------------- + +interface WorkspaceMember { + name: string; // crate name from its own [package] name + dir: string; // repo-relative directory of its Cargo.toml + toml: TomlDoc; +} + +const isStringArray = (v: TomlValue | undefined): v is string[] => + Array.isArray(v) && v.every((x) => typeof x === "string"); + +const isInlineTable = (v: TomlValue | undefined): v is Record => + typeof v === "object" && !Array.isArray(v) && v !== null; + +const collectWorkspaceMembers = ( + root: string, + rootToml: TomlDoc, +): WorkspaceMember[] => { + const out: WorkspaceMember[] = []; + + // Explicit workspace members. + const ws = rootToml.sections.get("workspace"); + const memberDirs: string[] = []; + if (ws) { + const members = ws.get("members"); + if (isStringArray(members)) { + for (const m of members) memberDirs.push(m); + } + } + + for (const md of memberDirs) { + const memberToml = resolve(root, md, "Cargo.toml"); + let text: string; + try { + text = readFileSync(memberToml, "utf8"); + } catch { + continue; + } + const parsed = parseCargoToml(text); + const pkg = parsed.sections.get("package"); + if (!pkg) continue; + const name = pkg.get("name"); + if (typeof name !== "string") continue; + out.push({ name, dir: md, toml: parsed }); + } + + // If the root itself has [package], the root is also a workspace + // member (regular workspace with root crate — ripgrep's shape). + const rootPkg = rootToml.sections.get("package"); + if (rootPkg) { + const name = rootPkg.get("name"); + if (typeof name === "string") { + out.push({ name, dir: ".", toml: rootToml }); + } + } + return out; +}; + +const collectWorkspaceDependencies = ( + rootToml: TomlDoc, +): Map> => { + // [workspace.dependencies] section: maps dep-name → inline-table + // or string-version. When `workspace = true` is used in a member, + // we look here to see if that name maps to a workspace-internal + // crate (i.e. has a `path = "..."`). + const out = new Map>(); + const sec = rootToml.sections.get("workspace.dependencies"); + if (!sec) return out; + for (const [k, v] of sec) { + if (isInlineTable(v)) out.set(k, v); + else if (typeof v === "string") out.set(k, { version: v }); + } + return out; +}; + +const memberHasInternalDep = ( + member: WorkspaceMember, + depName: string, + depSpec: TomlValue, + byName: Map, + workspaceDeps: Map>, +): string | null => { + // Returns the workspace-member name this dep resolves to, or null. + + // Case A: inline table with path = "..." + if (isInlineTable(depSpec)) { + if (depSpec.path) { + // Path resolves relative to the importing member's dir. + // We don't need the absolute resolution — just need to + // identify which workspace member it points at. Match by + // dep NAME (since path-style internal deps in Cargo usually + // name the dep the same as its crate name). + if (byName.has(depName)) return depName; + } + if (depSpec.workspace === "true") { + const ws = workspaceDeps.get(depName); + if (ws && ws.path) { + if (byName.has(depName)) return depName; + } + } + } + // Case B: string version-only (external crate) → not internal. + // Case C: `dep = { workspace = true }` already handled above. + return null; +}; + +export interface RustGraphDepthResult extends GraphDepthResult { + language: "rust"; + workspaceName: string; +} + +export const computeRustGraphDepth = (repoRoot: string): RustGraphDepthResult => { + const root = resolve(repoRoot); + const rootStat = statSync(root); + if (!rootStat.isDirectory()) { + throw new Error(`expected a directory, got: ${repoRoot}`); + } + const rootCargo = readFileSync(resolve(root, "Cargo.toml"), "utf8"); + const rootToml = parseCargoToml(rootCargo); + const rootPkg = rootToml.sections.get("package"); + const workspaceName = (rootPkg && typeof rootPkg.get("name") === "string" + ? (rootPkg.get("name") as string) + : (() => { + // virtual workspace — use the directory name. + const segs = root.split("/"); + return segs[segs.length - 1] ?? "workspace"; + })()); + + const members = collectWorkspaceMembers(root, rootToml); + const byName = new Map(); + for (const m of members) byName.set(m.name, m); + + const workspaceDeps = collectWorkspaceDependencies(rootToml); + + // Build edges: for each member, scan its [dependencies] entries. + const nodes = members.map((m) => m.name); + const edges: Array<[string, string]> = []; + const seen = new Set(); + + for (const m of members) { + const deps = m.toml.sections.get("dependencies"); + if (!deps) continue; + for (const [depName, depSpec] of deps) { + const target = memberHasInternalDep(m, depName, depSpec, byName, workspaceDeps); + if (target === null) continue; + if (target === m.name) continue; + const key = `${m.name} ${target}`; + if (seen.has(key)) continue; + seen.add(key); + edges.push([m.name, target]); + } + } + + const result = computeGraphDepth({ nodes, edges }); + return { + ...result, + language: "rust", + workspaceName, + }; +};