33dcc9f48dd621b10a167e6a7d83113f2578020e diff --git a/content/blog/sama-v2-go-project-dive.md b/content/blog/sama-v2-go-project-dive.md index 91821d797afab62abd28ddff0e5c9dc3ddc11450..fbd77a172d1e9bdd7e8f9787a01e0aa7f42d979a 100644 --- a/content/blog/sama-v2-go-project-dive.md +++ b/content/blog/sama-v2-go-project-dive.md @@ -141,17 +141,23 @@ Derives from Law. No file's declared layer is contradicted by what it imports. **Estimated tally: 5 of 7 pass under the directory-based dialect, with 2 named failures (Sorted, Modeled-tests).** That's a real result, not "0/7 because no one tried." -## The §5 metrics — estimated for `dive` +## The §5 metrics — mixed measurement and estimate for `dive` -| metric | `dive` (Go, estimated) | WP plugin (PHP, estimated) | tdd.md (TS, measured) | +| metric | `dive` (Go) | WP plugin (PHP, estimated) | tdd.md (TS, measured) | |---|---|---|---| -| §4 checks passing | ~5 / 7 | 0 / 7 | 7 / 7 | -| graphDepth | ~5 (cmd → command → ui → dive → filetree → internal/utils) | ~3 | 7 | -| boundaryRatio | ~85% (one borderline case in `options/ci.go`) | <10% | 100% | -| workingSetFit (50–500 LOC) | ~80% | ~47% | 80% | -| violationCounts (sum) | ~30 (mostly Modeled-tests gaps) | 17+ | 0 | +| §4 checks passing | ~5 / 7 (estimated) | 0 / 7 | 7 / 7 | +| graphDepth | ~5 (estimated; cmd → command → ui → dive → filetree → internal/utils) | ~3 | 7 | +| boundaryRatio | ~85% (estimated; one borderline case in `options/ci.go`) | <10% | 100% | +| **workingSetFit (50–500 LOC)** | **52.17% (measured, [dive@d6c69194](https://github.com/wagoodman/dive/commit/d6c691947f8fda635c952a17ee3b7555379d58f0))** — originally estimated ~80% | ~47% | 80% (measured) | +| violationCounts (sum) | ~30 (estimated; mostly Modeled-tests gaps) | 17+ | 0 | -The `workingSetFit` is essentially **identical** between `dive` and this site (80%). Two unrelated projects, two different languages, two different scopes, written by different teams under different conventions — landing at the same fit ratio is a useful data point: 80% might just be what "reasonably engineered" looks like on this axis. +**The `workingSetFit` is the metric I most expected to land near tdd.md's 80%** — two engineered codebases, both with linters and conventions. The measurement says otherwise. + +**Hand-trace** (auditable per [/sama/v2 §0](/sama/v2)): running `find /tmp/dive -name '*.go' -not -name '*_test.go' -not -path '*/.git/*' -not -path '*/vendor/*' | wc -l` returns **92 source .go files**. Of those, **48** fall in [50, 500] LOC inclusive (matching `WORKING_SET_MIN_LOC` and `WORKING_SET_MAX_LOC` in [`src/a31_sama_v2.ts`](/GIT/syntaxai/tdd.md/blob/main/src/a31_sama_v2.ts)). 48 ÷ 92 = 0.5217 ≈ 52.17%. The polyglot §5 emitter at [`scripts/measure-working-set.ts`](/GIT/syntaxai/tdd.md/blob/main/scripts/measure-working-set.ts) produces the same number from the same source tree. + +The distribution explains it: **44 files under 50 LOC** (mostly small type-only modules, single-helper files, and platform-shim stubs like `dive/image/docker/docker_host_windows.go` at 6 LOC), **48 in band**, and — strikingly — **0 over 500 LOC**. `dive`'s working-set miss is not god-classes (the §4.5 Atomic check passes outright); it's the *opposite* failure mode: many files small enough to fall below the substantive-module threshold. + +The original ~80% estimate was wrong, and wrong in a direction casual eyeballing wouldn't catch — counting visible-on-the-screen files isn't the same as counting them and applying a band filter. That 28-point miss between estimate and measurement is itself the empirical case for the metric existing at all: the metric surfaces a property the human estimate missed. ## What `dive` would look like at 7/7 — the last 30% diff --git a/content/blog/sama-v2-rust-project-ripgrep.md b/content/blog/sama-v2-rust-project-ripgrep.md index 4fdd534a5a324e154b05934b51fdd6c0f785ce9c..62c0dd00aa75b5a0cda0dc0218e4cb8b655673bf 100644 --- a/content/blog/sama-v2-rust-project-ripgrep.md +++ b/content/blog/sama-v2-rust-project-ripgrep.md @@ -145,17 +145,21 @@ Derives from Law on the same edge set. *(Update: all three dialects have since been drafted into [/sama/v2 §6.A](/sama/v2#6a-v21-dialects-provisional) as v2.1-draft extensions, with the same five-part operational structure — what they relax, what property they preserve, and the falsifiable cross-repo experiment that would invalidate each.)* -## §5 metric estimates +## §5 metrics — measured workingSetFit, estimated the rest -| metric | ripgrep (estimated) | dive (Go) | tdd.md (TS, measured) | WP plugin (PHP) | +| metric | ripgrep | dive (Go) | tdd.md (TS, measured) | WP plugin (PHP) | |---|---|---|---|---| -| §4 checks passing | ~3/7 strict, ~5/7 under v2.1 dialects | ~5/7 | 7/7 ✓ | 0/7 | -| graphDepth | ~5 (matcher → engine → searcher → printer → core) | ~5 | 7 | ~3 | -| boundaryRatio | ~95% | ~85% | 100% | <10% | -| workingSetFit (50–500 LOC) | ~60% (those 19 big files drag it down) | ~80% | 80% | ~47% | -| violationCounts (sum) | ~50 (19 Atomic + ~30 Modeled-tests under sibling-rule) | ~30 | 0 | 17+ | +| §4 checks passing | ~3/7 strict, ~5/7 under v2.1 dialects (estimated) | ~5/7 (estimated) | 7/7 ✓ | 0/7 | +| graphDepth | ~5 estimated (matcher → engine → searcher → printer → core) | ~5 (estimated) | 7 | ~3 | +| boundaryRatio | ~95% (estimated) | ~85% (estimated) | 100% | <10% | +| **workingSetFit (50–500 LOC)** | **54.00% (measured, [ripgrep@4519153e](https://github.com/BurntSushi/ripgrep/commit/4519153e5e461527f4bca45b042fff45c4ec6fb9))** — originally estimated ~60% | **52.17% (measured, [dive@d6c69194](https://github.com/wagoodman/dive/commit/d6c691947f8fda635c952a17ee3b7555379d58f0))** — originally estimated ~80% | 80% | ~47% | +| violationCounts (sum) | ~50 estimated (Atomic + Modeled-tests under sibling-rule) | ~30 (estimated) | 0 | 17+ | -ripgrep's `workingSetFit` is the metric that surprises here: ~60%, lower than dive *and* lower than this site. That's the 19 big files pulling the distribution down. **And yet most of those files are appropriate to their content.** It's a useful signal: workingSetFit is not by itself a quality measure — a project full of declaration catalogs will score lower than a project full of small handlers without being architecturally worse. +ripgrep's `workingSetFit` measures 54.00% (from the polyglot §5 emitter at [`scripts/measure-working-set.ts`](/GIT/syntaxai/tdd.md/blob/main/scripts/measure-working-set.ts), inclusive bounds [50, 500] LOC). The distribution: **100 .rs files** total, **16 under 50 LOC**, **54 in band**, **30 over 500 LOC** — appreciably more than the "19 big files" I eyeballed in the original audit. The over-cap list ranges from the textbook declarative-exempt catalog (`crates/core/flags/defs.rs` at 7,780 LOC) down to genuinely borderline files at 500–800 LOC like `crates/pcre2/src/matcher.rs` (506) and `crates/cli/src/decompress.rs` (533). + +**And yet most of those files are appropriate to their content.** workingSetFit by itself doesn't say which side of the line each file falls on — that's what the [declarative-exemption dialect](/sama/v2#63-declarative-exemption-dialect) is for. The metric surfaces the property; the policy decides what to do with it. + +The cross-repo comparison the measurement makes possible is more interesting than the single number. **ripgrep (54%) and dive (52%) measure within two percentage points of each other** — two unrelated codebases in two different languages, written by different teams under different conventions, landing in the same working-set band when measured against the same bounds. That's the kind of cross-repo signal §6 says it wants. The eyeballed estimates (~60% and ~80%) said the two projects were 20 points apart; the measurement says they're 2 points apart. The metric, not the eye, was right. This is exactly the §5 intent. The metric surfaces a property; whether that property is good or bad depends on what the file content *should be*. Compliance scores conflate the two; metrics keep them separate. diff --git a/content/home.md b/content/home.md index b070ccf05fba96d3e49dcfc1d478b36226170fe2..e83005853f554c6ffecbec74ddc34944431b05b6 100644 --- a/content/home.md +++ b/content/home.md @@ -56,17 +56,18 @@ SAMA bundles those findings into four constraints a CI job can enforce. *Sorted* **The load-bearing property isn't that LLMs have small context windows — modern models have 200k+ tokens.** The load-bearing property is **mechanical enforceability**: the verifier fails the build when a file crosses the line cap or an import points the wrong way. Discipline that lives only in code review quietly slips under agent pressure; discipline that lives in a CI gate keeps its shape across an arbitrary number of agent commits. The context-window research above explains the *why*; the verifier explains the *how*. -## Three datapoints on the same axes +## Datapoints on the same axes -Empirical baseline so far (the §5 metrics, [computed live](/sama/v2/verify) for this site and hand-traced for the two audits): +Empirical baseline so far. The §4 score for this site is [computed live](/sama/v2/verify); the §4 scores for the other repos are hand-estimated. The **workingSetFit** column is now measured for three of the four repos by the polyglot §5 emitter at [`scripts/measure-working-set.ts`](/GIT/syntaxai/tdd.md/blob/main/scripts/measure-working-set.ts); the remaining columns are still hand-estimated where flagged. | project | language | §4 score | workingSetFit | boundaryRatio | graphDepth | |---|---|---|---|---|---| -| **tdd.md** (this site) | TypeScript | **7 / 7 ✓** (measured) | 80% | 100% | 7 | -| [**wagoodman/dive**](/blog/sama-v2-go-project-dive) | Go | ~5 / 7 (estimated) | ~80% | ~85% | ~5 | -| [**Open Graph plugin**](/blog/sama-v2-wordpress-plugin-audit) | PHP / WordPress | 0 / 7 (estimated) | ~47% | <10% | ~3 | +| **tdd.md** (this site) | TypeScript | **7 / 7 ✓** (measured) | 80% (measured) | 100% (measured) | 7 (measured) | +| [**wagoodman/dive**](/blog/sama-v2-go-project-dive) | Go | ~5 / 7 (estimated) | **52.17%** (measured, [@d6c69194](https://github.com/wagoodman/dive/commit/d6c691947f8fda635c952a17ee3b7555379d58f0)) | ~85% (estimated) | ~5 (estimated) | +| [**BurntSushi/ripgrep**](/blog/sama-v2-rust-project-ripgrep) | Rust | ~3-5 / 7 (estimated, depends on v2.1 dialect uptake) | **54.00%** (measured, [@4519153e](https://github.com/BurntSushi/ripgrep/commit/4519153e5e461527f4bca45b042fff45c4ec6fb9)) | ~95% (estimated) | ~5 (estimated) | +| [**Open Graph plugin**](/blog/sama-v2-wordpress-plugin-audit) | PHP / WordPress | 0 / 7 (estimated) | ~47% (estimated) | <10% (estimated) | ~3 (estimated) | -Three points is not yet a "v2 is worth following" claim. §6 of the spec is explicit that promotion to official requires cross-repo deltas, not a single dogfood. But the same five numbers are now defined, computable, and published — which is the prerequisite the spec sets before any later claim becomes testable. +Four points is not yet a "v2 is worth following" claim. §6 of the spec is explicit that promotion to official requires cross-repo *deltas*, not a single dogfood. But three workingSetFit rows are now *measured* against the same bounds the spec defines — a quiet but load-bearing step from "we have numbers" to "we have *the same* numbers across repos." The cross-repo signal that emerges: ripgrep (54.00%) and dive (52.17%) land within two percentage points of each other, suggesting workingSetFit in the 50–55% range may be characteristic of mature compiled-language CLI tools — a hypothesis that needs more datapoints to confirm but is now *testable* in a way it was not when the numbers were all eyeballed. ## See it in practice diff --git a/scripts/measure-working-set.ts b/scripts/measure-working-set.ts new file mode 100644 index 0000000000000000000000000000000000000000..7e8b2d008f9e8079b1086d12147044f9fe9e6d7a --- /dev/null +++ b/scripts/measure-working-set.ts @@ -0,0 +1,76 @@ +#!/usr/bin/env bun +// measure-working-set — CLI for the §5 polyglot workingSetFit metric. +// Given a path to a checked-out Go or Rust source tree, emit the +// measured ratio as JSON to stdout. +// +// Usage: +// bun scripts/measure-working-set.ts --lang go +// bun scripts/measure-working-set.ts --lang rust +// +// The number it emits is reproducible: given the same checked-out +// source tree, every run prints the same ratio to full float precision. +// Pair the output with the repo's commit SHA when reporting; see +// /sama/v2 §5 (operational) for the bounds reasoning. + +import { measureWorkingSetForRepo } from "../src/c14_working_set_walker.ts"; +import type { PolyglotLanguage } from "../src/b32_working_set_polyglot.ts"; + +const args = process.argv.slice(2); + +const usage = (): never => { + console.error( + "Usage: bun scripts/measure-working-set.ts --lang go|rust [--verbose]", + ); + process.exit(2); +}; + +if (args.length < 3) usage(); + +const repoPath = args[0]!; +let lang: PolyglotLanguage | null = null; +let verbose = false; + +for (let i = 1; i < args.length; i++) { + const a = args[i]; + if (a === "--lang") { + const v = args[++i]; + if (v !== "go" && v !== "rust") { + console.error(`--lang must be "go" or "rust", got: ${v}`); + process.exit(2); + } + lang = v; + } else if (a === "--verbose") { + verbose = true; + } else { + console.error(`unknown argument: ${a}`); + usage(); + } +} + +if (lang === null) usage(); + +const result = measureWorkingSetForRepo(repoPath, lang!); + +const output: Record = { + language: result.language, + repoPath, + minLoc: result.minLoc, + maxLoc: result.maxLoc, + total: result.total, + included: result.included, + ratio: result.ratio, + ratioPercent: Number((result.ratio * 100).toFixed(2)), +}; + +if (verbose) { + output.files = result.files.map((f) => ({ + path: f.path, + locCount: f.locCount, + inBand: + f.locCount >= result.minLoc && + f.locCount <= result.maxLoc && + !(lang === "go" && f.path.endsWith("_test.go")), + })); +} + +console.log(JSON.stringify(output, null, 2)); diff --git a/src/b32_working_set_polyglot.test.ts b/src/b32_working_set_polyglot.test.ts new file mode 100644 index 0000000000000000000000000000000000000000..1eef13f5cfa163499dd7ee3ed42d14d362fd1324 --- /dev/null +++ b/src/b32_working_set_polyglot.test.ts @@ -0,0 +1,164 @@ +import { describe, expect, test } from "bun:test"; +import { + WORKING_SET_MAX_LOC, + WORKING_SET_MIN_LOC, +} from "./a31_sama_v2.ts"; +import { + computeWorkingSetFitPolyglot, + type PolyglotLanguage, + type WorkingSetFile, +} from "./b32_working_set_polyglot.ts"; + +// Mirror the inclusive-bound assertions in b32_sama_v2_metrics.test.ts. +// Same algorithm, same constants, same edge behaviour — the polyglot +// helper is allowed to compute a different SET of files (Go/Rust source +// trees rather than src/*.ts), but the RATIO formula must match the +// TS metric byte-for-byte. These tests pin that. + +const file = (path: string, locCount: number): WorkingSetFile => ({ path, locCount }); + +describe("computeWorkingSetFitPolyglot — empty input", () => { + test("empty list → 1.0 vacuous (matches the TS metric on an empty file map)", () => { + const r = computeWorkingSetFitPolyglot([], "go"); + expect(r.ratio).toBe(1.0); + expect(r.included).toBe(0); + expect(r.total).toBe(0); + }); + + test("empty list also vacuous under Rust", () => { + const r = computeWorkingSetFitPolyglot([], "rust"); + expect(r.ratio).toBe(1.0); + }); +}); + +describe("computeWorkingSetFitPolyglot — single-file extremes", () => { + test("a single 100-line Go file → 1.0", () => { + const r = computeWorkingSetFitPolyglot([file("pkg/x.go", 100)], "go"); + expect(r.ratio).toBe(1.0); + expect(r.included).toBe(1); + expect(r.total).toBe(1); + }); + + test("a single 10-line file falls below the min → 0.0", () => { + const r = computeWorkingSetFitPolyglot([file("pkg/x.go", 10)], "go"); + expect(r.ratio).toBe(0.0); + expect(r.included).toBe(0); + expect(r.total).toBe(1); + }); + + test("a single 600-line file exceeds the max → 0.0", () => { + const r = computeWorkingSetFitPolyglot([file("pkg/x.go", 600)], "go"); + expect(r.ratio).toBe(0.0); + expect(r.included).toBe(0); + expect(r.total).toBe(1); + }); +}); + +describe("computeWorkingSetFitPolyglot — bound-edge inclusivity", () => { + // The TS metric uses `lines >= MIN && lines <= MAX`. These tests + // mirror b32_sama_v2_metrics.test.ts's "exact bounds are inclusive". + test("LOC = 49 → out of band", () => { + const r = computeWorkingSetFitPolyglot([file("pkg/x.go", WORKING_SET_MIN_LOC - 1)], "go"); + expect(r.included).toBe(0); + }); + + test("LOC = 50 → in band", () => { + const r = computeWorkingSetFitPolyglot([file("pkg/x.go", WORKING_SET_MIN_LOC)], "go"); + expect(r.included).toBe(1); + }); + + test("LOC = 500 → in band", () => { + const r = computeWorkingSetFitPolyglot([file("pkg/x.go", WORKING_SET_MAX_LOC)], "go"); + expect(r.included).toBe(1); + }); + + test("LOC = 501 → out of band", () => { + const r = computeWorkingSetFitPolyglot([file("pkg/x.go", WORKING_SET_MAX_LOC + 1)], "go"); + expect(r.included).toBe(0); + }); +}); + +describe("computeWorkingSetFitPolyglot — mixed inputs", () => { + test("half in / half out → 0.5", () => { + const r = computeWorkingSetFitPolyglot([ + file("pkg/a.go", 100), + file("pkg/b.go", 10), + ], "go"); + expect(r.ratio).toBe(0.5); + }); + + test("two in / two out → 0.5", () => { + const r = computeWorkingSetFitPolyglot([ + file("pkg/a.go", 100), + file("pkg/b.go", 300), + file("pkg/c.go", 10), + file("pkg/d.go", 800), + ], "go"); + expect(r.ratio).toBe(0.5); + }); +}); + +describe("computeWorkingSetFitPolyglot — Go test-file exclusion", () => { + test("*_test.go files do NOT count toward total or included", () => { + const r = computeWorkingSetFitPolyglot([ + file("pkg/x.go", 100), + file("pkg/x_test.go", 200), + file("pkg/y_test.go", 50), + ], "go"); + // Only x.go counts; both _test.go files dropped before tallying. + expect(r.total).toBe(1); + expect(r.included).toBe(1); + expect(r.ratio).toBe(1.0); + }); + + test("a 100-line source + a 1-line _test.go sibling → 1.0 (mirrors the TS metric)", () => { + const r = computeWorkingSetFitPolyglot([ + file("pkg/x.go", 100), + file("pkg/x_test.go", 1), + ], "go"); + expect(r.ratio).toBe(1.0); + }); +}); + +describe("computeWorkingSetFitPolyglot — Rust inline-tests asymmetry", () => { + test("Rust includes ALL .rs files (no path-based test exclusion)", () => { + // Rust convention: tests live inside source files under + // #[cfg(test)] mod tests. The polyglot helper preserves that — + // it does NOT exclude any .rs path. The asymmetry is documented + // in the b32_working_set_polyglot.ts source comment. + const r = computeWorkingSetFitPolyglot([ + file("src/lib.rs", 100), + file("src/tests.rs", 100), + file("src/something_test.rs", 100), + ], "rust"); + expect(r.total).toBe(3); + expect(r.included).toBe(3); + expect(r.ratio).toBe(1.0); + }); +}); + +describe("computeWorkingSetFitPolyglot — reproducibility", () => { + test("same input → identical output across runs (deep-equal)", () => { + const input: WorkingSetFile[] = [ + file("a.go", 100), + file("b.go", 60), + file("c.go", 480), + file("d.go", 20), + file("e_test.go", 999), + ]; + const langs: PolyglotLanguage[] = ["go", "rust"]; + for (const l of langs) { + const a = computeWorkingSetFitPolyglot(input, l); + const b = computeWorkingSetFitPolyglot(input, l); + expect(a).toEqual(b); + } + }); +}); + +describe("computeWorkingSetFitPolyglot — bounds echo", () => { + test("result echoes minLoc / maxLoc from a31_sama_v2.ts (auditable)", () => { + const r = computeWorkingSetFitPolyglot([], "go"); + expect(r.minLoc).toBe(WORKING_SET_MIN_LOC); + expect(r.maxLoc).toBe(WORKING_SET_MAX_LOC); + }); +}); diff --git a/src/b32_working_set_polyglot.ts b/src/b32_working_set_polyglot.ts new file mode 100644 index 0000000000000000000000000000000000000000..ff0439a49b64490a4187e9cfcea50a521a58fb55 --- /dev/null +++ b/src/b32_working_set_polyglot.ts @@ -0,0 +1,82 @@ +// b32 — logic: §5 workingSetFit metric for polyglot source trees +// (Go, Rust). Pure function, no I/O. Mirrors the formula in +// src/b32_sama_v2_metrics.ts byte-for-byte: +// +// workingSetFit = files-in-band ÷ total-source-files +// +// where in-band means WORKING_SET_MIN_LOC ≤ LOC ≤ WORKING_SET_MAX_LOC, +// inclusive on both ends. Bounds are imported from a31_sama_v2.ts so +// the cross-language number is computed against the same band as this +// site's own metric — the single-source-of-truth determinism property +// from /sama/v2 §0. +// +// Used by scripts/measure-working-set.ts (the polyglot CLI) and the +// c14_working_set_walker.ts adapter, which feed it a pre-counted file +// summary so this module stays pure and unit-testable. + +import { + WORKING_SET_MAX_LOC, + WORKING_SET_MIN_LOC, +} from "./a31_sama_v2.ts"; + +// Language tag governs the test-file exclusion rule below. +export type PolyglotLanguage = "go" | "rust"; + +export interface WorkingSetFile { + // Repo-relative path (e.g. "crates/printer/src/standard.rs"). + path: string; + // File length in lines, matching the TS metric's `content.split("\n").length`. + locCount: number; +} + +export interface WorkingSetResult { + language: PolyglotLanguage; + included: number; // files inside [MIN, MAX] LOC, inclusive + total: number; // total source files (after test-file exclusion) + ratio: number; // included / total; empty-input → 1.0 vacuous + minLoc: number; // echoed back from a31 so callers can audit + maxLoc: number; +} + +// Test-file exclusion. The asymmetry is honest, not arbitrary: +// +// Go: tests live in `*_test.go` files. The TS metric excludes +// `*.test.ts` for the same structural reason — they aren't +// working modules in their own right, they verify one. +// +// Rust: tests live INSIDE source files under `#[cfg(test)] mod tests`. +// Excluding files at file-granularity would either lose every +// tested file or accidentally include all of them. The +// inline-tests dialect drafted at /sama/v2#62-inline-tests-dialect +// is what makes this asymmetry coherent: where the test attaches +// is a language-level choice; the working-set property the metric +// measures is unaffected. +const isPolyglotTestFile = (path: string, lang: PolyglotLanguage): boolean => { + if (lang === "go") return path.endsWith("_test.go"); + return false; +}; + +export const computeWorkingSetFitPolyglot = ( + files: ReadonlyArray, + lang: PolyglotLanguage, +): WorkingSetResult => { + let included = 0; + let total = 0; + for (const f of files) { + if (isPolyglotTestFile(f.path, lang)) continue; + total++; + if (f.locCount >= WORKING_SET_MIN_LOC && f.locCount <= WORKING_SET_MAX_LOC) { + included++; + } + } + // Match the TS metric: empty input → 1.0 (vacuously satisfied). + const ratio = total === 0 ? 1.0 : included / total; + return { + language: lang, + included, + total, + ratio, + minLoc: WORKING_SET_MIN_LOC, + maxLoc: WORKING_SET_MAX_LOC, + }; +}; diff --git a/src/c14_working_set_walker.test.ts b/src/c14_working_set_walker.test.ts new file mode 100644 index 0000000000000000000000000000000000000000..6906972978bea4c6684b200ffe8ff540575e519e --- /dev/null +++ b/src/c14_working_set_walker.test.ts @@ -0,0 +1,117 @@ +import { afterAll, beforeAll, describe, expect, test } from "bun:test"; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { resolve } from "node:path"; +import { + collectPolyglotFiles, + measureWorkingSetForRepo, +} from "./c14_working_set_walker.ts"; + +// Hermetic fixture: build a tiny fake repo in a tmpdir, walk it, +// assert what comes back. The CLI script's real-world use against +// /tmp/dive and /tmp/ripgrep is exercised via the measurement step +// in this PR, not via unit tests; this file pins the algorithm. + +const FIXTURE_ROOT = mkdtempSync(resolve(tmpdir(), "tdd-md-wswalker-")); + +const writeFile = (relPath: string, lineCount: number): void => { + const abs = resolve(FIXTURE_ROOT, relPath); + mkdirSync(abs.split("/").slice(0, -1).join("/"), { recursive: true }); + const lines = Array.from({ length: lineCount }, (_, i) => `// line ${i}`); + writeFileSync(abs, lines.join("\n")); +}; + +beforeAll(() => { + // Top-level Go sources (one in-band, one out-of-band, one test file). + writeFile("a.go", 100); // in band + writeFile("b.go", 600); // out (over) + writeFile("c_test.go", 200); // excluded for Go + // Nested. + writeFile("pkg/inner.go", 60); // in band, inside subdir + writeFile("pkg/tiny.go", 10); // out (under) + // Rust sources (separate sub-tree). + writeFile("rs/src/lib.rs", 120); // in band + writeFile("rs/src/big.rs", 700); // out (over) + writeFile("rs/src/tests.rs", 75); // included (Rust has no path test rule) + // Skip directories that should NOT be walked. + writeFile(".git/HEAD.go", 100); // .git is skipped + writeFile("target/build.rs", 100); // target/ is skipped + writeFile("vendor/pkg.go", 100); // vendor/ is skipped + writeFile("node_modules/dep.go", 100); // node_modules/ skipped +}); + +afterAll(() => { + rmSync(FIXTURE_ROOT, { recursive: true, force: true }); +}); + +describe("collectPolyglotFiles — Go", () => { + test("walks recursively and finds the right .go files", () => { + const files = collectPolyglotFiles(FIXTURE_ROOT, "go"); + const paths = files.map((f) => f.path); + // Excluded: .git/*, target/*, vendor/*, node_modules/*. + // Included: a.go, b.go, c_test.go (the helper RETURNS it; the + // metric helper drops it during the count — separation of concerns). + expect(paths).toContain("a.go"); + expect(paths).toContain("b.go"); + expect(paths).toContain("c_test.go"); + expect(paths).toContain("pkg/inner.go"); + expect(paths).toContain("pkg/tiny.go"); + expect(paths).not.toContain(".git/HEAD.go"); + expect(paths).not.toContain("vendor/pkg.go"); + expect(paths).not.toContain("node_modules/dep.go"); + }); + + test("LOC counts match content.split('\\n').length", () => { + const files = collectPolyglotFiles(FIXTURE_ROOT, "go"); + const a = files.find((f) => f.path === "a.go"); + // We wrote 100 lines joined by "\n" → split("\n").length === 100. + expect(a?.locCount).toBe(100); + }); + + test("returns files in deterministic sorted order", () => { + const a = collectPolyglotFiles(FIXTURE_ROOT, "go").map((f) => f.path); + const b = collectPolyglotFiles(FIXTURE_ROOT, "go").map((f) => f.path); + expect(a).toEqual(b); + const sorted = [...a].sort((x, y) => x.localeCompare(y)); + expect(a).toEqual(sorted); + }); +}); + +describe("collectPolyglotFiles — Rust", () => { + test("finds only .rs files; ignores .go", () => { + const files = collectPolyglotFiles(FIXTURE_ROOT, "rust"); + const paths = files.map((f) => f.path); + expect(paths).toContain("rs/src/lib.rs"); + expect(paths).toContain("rs/src/big.rs"); + expect(paths).toContain("rs/src/tests.rs"); + expect(paths.every((p) => p.endsWith(".rs"))).toBe(true); + }); + + test("target/build.rs is excluded (skipped dir)", () => { + const files = collectPolyglotFiles(FIXTURE_ROOT, "rust"); + const paths = files.map((f) => f.path); + expect(paths).not.toContain("target/build.rs"); + }); +}); + +describe("measureWorkingSetForRepo — end-to-end", () => { + test("Go fixture: 2 in band (a.go=100, pkg/inner.go=60) of 4 source files (excluding c_test.go) = 0.5", () => { + const r = measureWorkingSetForRepo(FIXTURE_ROOT, "go"); + expect(r.total).toBe(4); // a, b, pkg/inner, pkg/tiny (c_test excluded) + expect(r.included).toBe(2); // a, pkg/inner + expect(r.ratio).toBe(0.5); + }); + + test("Rust fixture: 2 in band (lib.rs=120, tests.rs=75) of 3 .rs files = 2/3", () => { + const r = measureWorkingSetForRepo(FIXTURE_ROOT, "rust"); + expect(r.total).toBe(3); + expect(r.included).toBe(2); + expect(r.ratio).toBeCloseTo(2 / 3, 6); + }); + + test("echoes the bounds back so callers can audit which numbers produced the ratio", () => { + const r = measureWorkingSetForRepo(FIXTURE_ROOT, "go"); + expect(r.minLoc).toBe(50); + expect(r.maxLoc).toBe(500); + }); +}); diff --git a/src/c14_working_set_walker.ts b/src/c14_working_set_walker.ts new file mode 100644 index 0000000000000000000000000000000000000000..d1e8fc7b32d3c499bb774f247234807092154887 --- /dev/null +++ b/src/c14_working_set_walker.ts @@ -0,0 +1,105 @@ +// c14 — adapter: filesystem walker that produces a polyglot +// WorkingSetFile summary for an external source tree (Go or Rust). +// Recursive directory walk; counts lines of each .go / .rs file using +// the same `content.split("\n").length` rule as b32_sama_v2_metrics so +// the cross-language metric matches the TS metric byte-for-byte. +// +// Skipped directories are the conventional non-source trees that +// would otherwise inflate the denominator with vendored / generated +// / build artefacts: .git, target/ (Rust build output), vendor/ (Go +// vendored deps), node_modules/ (incidental, defensive). +// +// The walker is hermetic — given a path that is a directory it +// resolves the file set deterministically. Calls into the pure helper +// in b32_working_set_polyglot.ts for the ratio. + +import { readdirSync, readFileSync, statSync } from "node:fs"; +import { resolve } from "node:path"; +import { + computeWorkingSetFitPolyglot, + type PolyglotLanguage, + type WorkingSetFile, + type WorkingSetResult, +} from "./b32_working_set_polyglot.ts"; + +const SKIPPED_DIRS: ReadonlySet = new Set([ + ".git", + "target", + "vendor", + "node_modules", +]); + +const EXTENSION_FOR: Record = { + go: ".go", + rust: ".rs", +}; + +// Walk a directory and return every {path, locCount} pair for files +// whose extension matches the target language. Paths are returned +// repo-relative (i.e. relative to the `repoRoot` passed in) so they're +// stable across machines. +export const collectPolyglotFiles = ( + repoRoot: string, + lang: PolyglotLanguage, +): WorkingSetFile[] => { + const ext = EXTENSION_FOR[lang]; + const out: WorkingSetFile[] = []; + + const walk = (absDir: string, relDir: string): void => { + let entries: ReturnType; + try { + entries = readdirSync(absDir, { withFileTypes: true }); + } catch { + // Permission errors / non-existent: surface to caller, but + // letting one bad subtree halt the whole measurement would be + // worse than reporting the partial set. Return silently here; + // the CLI's smoke checks at the top level will catch a totally + // unreadable root. + return; + } + for (const e of entries) { + if (e.name.startsWith(".") && e.name !== ".") { + // .git, .github, .vscode, ...: defensive skip on all dotdirs + // for directories; dotfiles are skipped too (they're never + // .go/.rs sources anyway, but the explicit skip is cheap). + if (e.isDirectory() && SKIPPED_DIRS.has(e.name)) continue; + if (e.isDirectory()) continue; // skip all hidden dirs + } + if (e.isDirectory()) { + if (SKIPPED_DIRS.has(e.name)) continue; + const sub = resolve(absDir, e.name); + const subRel = relDir === "" ? e.name : `${relDir}/${e.name}`; + walk(sub, subRel); + continue; + } + if (!e.isFile()) continue; + if (!e.name.endsWith(ext)) continue; + const abs = resolve(absDir, e.name); + const relPath = relDir === "" ? e.name : `${relDir}/${e.name}`; + const content = readFileSync(abs, "utf8"); + // Match b32_sama_v2_metrics.ts: lines = content.split("\n").length. + const locCount = content.split("\n").length; + out.push({ path: relPath, locCount }); + } + }; + + const root = resolve(repoRoot); + const rootStat = statSync(root); + if (!rootStat.isDirectory()) { + throw new Error(`expected a directory, got: ${repoRoot}`); + } + walk(root, ""); + // Sort for deterministic output (readdirSync is platform-dependent). + out.sort((a, b) => a.path.localeCompare(b.path)); + return out; +}; + +// Convenience: walk + compute in one call. Used by the CLI script. +export const measureWorkingSetForRepo = ( + repoRoot: string, + lang: PolyglotLanguage, +): WorkingSetResult & { files: WorkingSetFile[] } => { + const files = collectPolyglotFiles(repoRoot, lang); + const result = computeWorkingSetFitPolyglot(files, lang); + return { ...result, files }; +};