syntaxai/tdd.md · main · src / b32_working_set_polyglot.ts

b32_working_set_polyglot.ts 83 lines · 3035 bytes raw
// b32 — logic: §5 workingSetFit metric for polyglot source trees
// (Go, Rust). Pure function, no I/O. Mirrors the formula in
// src/b32_sama_v2_metrics.ts byte-for-byte:
//
//     workingSetFit = files-in-band ÷ total-source-files
//
// where in-band means WORKING_SET_MIN_LOC ≤ LOC ≤ WORKING_SET_MAX_LOC,
// inclusive on both ends. Bounds are imported from a31_sama_v2.ts so
// the cross-language number is computed against the same band as this
// site's own metric — the single-source-of-truth determinism property
// from /sama/v2 §0.
//
// Used by scripts/measure-working-set.ts (the polyglot CLI) and the
// c14_working_set_walker.ts adapter, which feed it a pre-counted file
// summary so this module stays pure and unit-testable.

import {
  WORKING_SET_MAX_LOC,
  WORKING_SET_MIN_LOC,
} from "./a31_sama_v2.ts";

// Language tag governs the test-file exclusion rule below.
export type PolyglotLanguage = "go" | "rust";

export interface WorkingSetFile {
  // Repo-relative path (e.g. "crates/printer/src/standard.rs").
  path: string;
  // File length in lines, matching the TS metric's `content.split("\n").length`.
  locCount: number;
}

export interface WorkingSetResult {
  language: PolyglotLanguage;
  included: number;   // files inside [MIN, MAX] LOC, inclusive
  total: number;      // total source files (after test-file exclusion)
  ratio: number;      // included / total; empty-input → 1.0 vacuous
  minLoc: number;     // echoed back from a31 so callers can audit
  maxLoc: number;
}

// Test-file exclusion. The asymmetry is honest, not arbitrary:
//
//   Go: tests live in `*_test.go` files. The TS metric excludes
//       `*.test.ts` for the same structural reason — they aren't
//       working modules in their own right, they verify one.
//
//   Rust: tests live INSIDE source files under `#[cfg(test)] mod tests`.
//         Excluding files at file-granularity would either lose every
//         tested file or accidentally include all of them. The
//         inline-tests dialect drafted at /sama/v2#62-inline-tests-dialect
//         is what makes this asymmetry coherent: where the test attaches
//         is a language-level choice; the working-set property the metric
//         measures is unaffected.
const isPolyglotTestFile = (path: string, lang: PolyglotLanguage): boolean => {
  if (lang === "go") return path.endsWith("_test.go");
  return false;
};

export const computeWorkingSetFitPolyglot = (
  files: ReadonlyArray<WorkingSetFile>,
  lang: PolyglotLanguage,
): WorkingSetResult => {
  let included = 0;
  let total = 0;
  for (const f of files) {
    if (isPolyglotTestFile(f.path, lang)) continue;
    total++;
    if (f.locCount >= WORKING_SET_MIN_LOC && f.locCount <= WORKING_SET_MAX_LOC) {
      included++;
    }
  }
  // Match the TS metric: empty input → 1.0 (vacuously satisfied).
  const ratio = total === 0 ? 1.0 : included / total;
  return {
    language: lang,
    included,
    total,
    ratio,
    minLoc: WORKING_SET_MIN_LOC,
    maxLoc: WORKING_SET_MAX_LOC,
  };
};