syntaxai/tdd.md · main · src / c14_working_set_walker.ts
// c14 — adapter: filesystem walker that produces a polyglot
// WorkingSetFile summary for an external source tree (Go or Rust).
// Recursive directory walk; counts lines of each .go / .rs file using
// the same `content.split("\n").length` rule as b32_sama_v2_metrics so
// the cross-language metric matches the TS metric byte-for-byte.
//
// Skipped directories are the conventional non-source trees that
// would otherwise inflate the denominator with vendored / generated
// / build artefacts: .git, target/ (Rust build output), vendor/ (Go
// vendored deps), node_modules/ (incidental, defensive).
//
// The walker is hermetic — given a path that is a directory it
// resolves the file set deterministically. Calls into the pure helper
// in b32_working_set_polyglot.ts for the ratio.
import { readdirSync, readFileSync, statSync } from "node:fs";
import { resolve } from "node:path";
import {
computeWorkingSetFitPolyglot,
type PolyglotLanguage,
type WorkingSetFile,
type WorkingSetResult,
} from "./b32_working_set_polyglot.ts";
const SKIPPED_DIRS: ReadonlySet<string> = new Set([
".git",
"target",
"vendor",
"node_modules",
]);
const EXTENSION_FOR: Record<PolyglotLanguage, string> = {
go: ".go",
rust: ".rs",
};
// Walk a directory and return every {path, locCount} pair for files
// whose extension matches the target language. Paths are returned
// repo-relative (i.e. relative to the `repoRoot` passed in) so they're
// stable across machines.
export const collectPolyglotFiles = (
repoRoot: string,
lang: PolyglotLanguage,
): WorkingSetFile[] => {
const ext = EXTENSION_FOR[lang];
const out: WorkingSetFile[] = [];
const walk = (absDir: string, relDir: string): void => {
let entries: ReturnType<typeof readdirSync>;
try {
entries = readdirSync(absDir, { withFileTypes: true });
} catch {
// Permission errors / non-existent: surface to caller, but
// letting one bad subtree halt the whole measurement would be
// worse than reporting the partial set. Return silently here;
// the CLI's smoke checks at the top level will catch a totally
// unreadable root.
return;
}
for (const e of entries) {
if (e.name.startsWith(".") && e.name !== ".") {
// .git, .github, .vscode, ...: defensive skip on all dotdirs
// for directories; dotfiles are skipped too (they're never
// .go/.rs sources anyway, but the explicit skip is cheap).
if (e.isDirectory() && SKIPPED_DIRS.has(e.name)) continue;
if (e.isDirectory()) continue; // skip all hidden dirs
}
if (e.isDirectory()) {
if (SKIPPED_DIRS.has(e.name)) continue;
const sub = resolve(absDir, e.name);
const subRel = relDir === "" ? e.name : `${relDir}/${e.name}`;
walk(sub, subRel);
continue;
}
if (!e.isFile()) continue;
if (!e.name.endsWith(ext)) continue;
const abs = resolve(absDir, e.name);
const relPath = relDir === "" ? e.name : `${relDir}/${e.name}`;
const content = readFileSync(abs, "utf8");
// Match b32_sama_v2_metrics.ts: lines = content.split("\n").length.
const locCount = content.split("\n").length;
out.push({ path: relPath, locCount });
}
};
const root = resolve(repoRoot);
const rootStat = statSync(root);
if (!rootStat.isDirectory()) {
throw new Error(`expected a directory, got: ${repoRoot}`);
}
walk(root, "");
// Sort for deterministic output (readdirSync is platform-dependent).
out.sort((a, b) => a.path.localeCompare(b.path));
return out;
};
// Convenience: walk + compute in one call. Used by the CLI script.
export const measureWorkingSetForRepo = (
repoRoot: string,
lang: PolyglotLanguage,
): WorkingSetResult & { files: WorkingSetFile[] } => {
const files = collectPolyglotFiles(repoRoot, lang);
const result = computeWorkingSetFitPolyglot(files, lang);
return { ...result, files };
};