// c14 — adapter: filesystem walker that produces a polyglot // WorkingSetFile summary for an external source tree (Go or Rust). // Recursive directory walk; counts lines of each .go / .rs file using // the same `content.split("\n").length` rule as b32_sama_v2_metrics so // the cross-language metric matches the TS metric byte-for-byte. // // Skipped directories are the conventional non-source trees that // would otherwise inflate the denominator with vendored / generated // / build artefacts: .git, target/ (Rust build output), vendor/ (Go // vendored deps), node_modules/ (incidental, defensive). // // The walker is hermetic — given a path that is a directory it // resolves the file set deterministically. Calls into the pure helper // in b32_working_set_polyglot.ts for the ratio. import { readdirSync, readFileSync, statSync } from "node:fs"; import { resolve } from "node:path"; import { computeWorkingSetFitPolyglot, type PolyglotLanguage, type WorkingSetFile, type WorkingSetResult, } from "./b32_working_set_polyglot.ts"; const SKIPPED_DIRS: ReadonlySet = new Set([ ".git", "target", "vendor", "node_modules", ]); const EXTENSION_FOR: Record = { go: ".go", rust: ".rs", }; // Walk a directory and return every {path, locCount} pair for files // whose extension matches the target language. Paths are returned // repo-relative (i.e. relative to the `repoRoot` passed in) so they're // stable across machines. export const collectPolyglotFiles = ( repoRoot: string, lang: PolyglotLanguage, ): WorkingSetFile[] => { const ext = EXTENSION_FOR[lang]; const out: WorkingSetFile[] = []; const walk = (absDir: string, relDir: string): void => { let entries: ReturnType; try { entries = readdirSync(absDir, { withFileTypes: true }); } catch { // Permission errors / non-existent: surface to caller, but // letting one bad subtree halt the whole measurement would be // worse than reporting the partial set. Return silently here; // the CLI's smoke checks at the top level will catch a totally // unreadable root. return; } for (const e of entries) { if (e.name.startsWith(".") && e.name !== ".") { // .git, .github, .vscode, ...: defensive skip on all dotdirs // for directories; dotfiles are skipped too (they're never // .go/.rs sources anyway, but the explicit skip is cheap). if (e.isDirectory() && SKIPPED_DIRS.has(e.name)) continue; if (e.isDirectory()) continue; // skip all hidden dirs } if (e.isDirectory()) { if (SKIPPED_DIRS.has(e.name)) continue; const sub = resolve(absDir, e.name); const subRel = relDir === "" ? e.name : `${relDir}/${e.name}`; walk(sub, subRel); continue; } if (!e.isFile()) continue; if (!e.name.endsWith(ext)) continue; const abs = resolve(absDir, e.name); const relPath = relDir === "" ? e.name : `${relDir}/${e.name}`; const content = readFileSync(abs, "utf8"); // Match b32_sama_v2_metrics.ts: lines = content.split("\n").length. const locCount = content.split("\n").length; out.push({ path: relPath, locCount }); } }; const root = resolve(repoRoot); const rootStat = statSync(root); if (!rootStat.isDirectory()) { throw new Error(`expected a directory, got: ${repoRoot}`); } walk(root, ""); // Sort for deterministic output (readdirSync is platform-dependent). out.sort((a, b) => a.path.localeCompare(b.path)); return out; }; // Convenience: walk + compute in one call. Used by the CLI script. export const measureWorkingSetForRepo = ( repoRoot: string, lang: PolyglotLanguage, ): WorkingSetResult & { files: WorkingSetFile[] } => { const files = collectPolyglotFiles(repoRoot, lang); const result = computeWorkingSetFitPolyglot(files, lang); return { ...result, files }; };