5ebbd0c33454fcb0a34c35476fe25a3cff7d5de0 diff --git a/content/sama/v2.md b/content/sama/v2.md index b1d47e9a5ec21d5f176b8b8295bd85462fd304a1..ec2009eb8e9859ba75cba627789d36eeba2d1946 100644 --- a/content/sama/v2.md +++ b/content/sama/v2.md @@ -141,6 +141,46 @@ Report the **delta** between SAMA-on and SAMA-off runs on these metrics — not --- +## 5 (operational) — Core metrics definitions + +This subsection pins how the §5 metrics are computed by the verifier at [/sama/v2/verify](/sama/v2/verify). The values are functions of `(sama.profile.toml, src/**.ts)` alone: same source tree + same profile → identical numbers across runs. + +- **graphDepth** = length of the longest path in the import DAG. Nodes are SAMA source files (`src/*.ts` non-test, matching a profile prefix); edges are static relative-path imports (`from "./...ts"`) between them. A file with no imports has depth 1. Empty graph = 0. Cycles (which the Law check would flag separately) are bounded so the metric still terminates. + +- **fanByLayer** = for each canonical layer L ∈ {0,1,2,3}, two distribution summaries: **fanIn** (count of edges arriving at files in L) and **fanOut** (count of edges leaving files in L). Each summary reports `{mean, p50, p95, max}` (nearest-rank percentile) over the files in L. Empty layers report all-zero summaries. + +- **boundaryRatio** = (parse-boundary call sites in Layer 2 files) ÷ (parse-boundary call sites anywhere in the source tree). The set of "parse-boundary call sites" is defined by the shared detector that also powers the §4.4 Modeled-boundary check — currently `JSON.parse(...)` and `new URL(...)` outside string literals and comments. Both consumers share the helper in `src/a31_sama_v2.ts`, so they cannot diverge. When no parse boundaries exist anywhere, `boundaryRatio = 1.0` (vacuously satisfied). + +- **workingSetFit** = (count of source files with `WORKING_SET_MIN_LOC ≤ LOC ≤ WORKING_SET_MAX_LOC`) ÷ (total source files). The bounds are *intentional defaults documented before the numbers, not retrofitted to flatter this repo*: + - **Upper 500** — comfortably below the §4.5 Atomic 700-LOC cap, leaving headroom before a file approaches "split soon" territory. + - **Lower 50** — below this, a file is too small to be a substantive module; it is usually a type-only file, a stub, or a single helper that would read better inlined into a sibling. Type-only files (Layer 0 model shards) and minimal test fixtures fall here by design. They are acceptable but counted as "not in the working-set sweet spot" because they are not load-bearing modules. + + Bounds are hard-coded constants `WORKING_SET_MIN_LOC = 50` and `WORKING_SET_MAX_LOC = 500` in [`src/a31_sama_v2.ts`](/GIT/syntaxai/tdd.md/blob/main/src/a31_sama_v2.ts) for v1 of the metrics emitter. Making them profile-configurable is a deliberate later step (requires extending the TOML subset parser to handle integer values). + +- **violationCounts** = a record keyed by the seven §4 checks (`sorted`, `architecture`, `modeledTests`, `modeledBoundary`, `atomic`, `law`, `consistency`), each holding the integer count of violations that check produced on this run. Reported even when a check passes (value = 0) — this is §5's "trailing signal: which rules agents *almost* break." The verifier enumerates **all** violations per check (no short-circuit on first failure within a check), so the count is meaningful — not "1 if failed, 0 if passed". + +### Worked example — boundaryRatio for this repo (hand-traced) + +The §0 contract ("deterministic program; no LLM judgment") is auditable only if the metric output matches a hand trace. Walking `boundaryRatio` for this repo's `src/` against the live verifier: + +A raw grep across non-test `src/*.ts` finds seven hits matching `JSON.parse(` and four hits matching `new URL(`. The shared detector strips comments and string literals first, which removes the explanatory mentions inside `// ...` lines and inside docstring literals. After stripping, the surviving real call sites are: + +| call site | layer (prefix → L) | +|---|---| +| `src/c13_database.ts:133` `JSON.parse(row.verdict_json)` | `c13_` → L2 | +| `src/c13_database.ts:159` `JSON.parse(r.tracked_branches)` | `c13_` → L2 | +| `src/c13_database.ts:273` `JSON.parse(r.doc_json)` | `c13_` → L2 | +| `src/c13_database.ts:373` `JSON.parse(r.verdict_json)` | `c13_` → L2 | +| `src/c14_request_parse.ts:28` `JSON.parse(text)` | `c14_` → L2 | +| `src/c14_request_parse.ts:20` `new URL(text)` | `c14_` → L2 | +| `src/c14_client_bundle.ts:72` `new URL(import.meta.url)` | `c14_` → L2 | + +Total: 7 parse-boundary call sites; all 7 fall under prefixes the profile maps to Layer 2. + +`boundaryRatio = 7 / 7 = 1.0 = 100.0%` — which is exactly what [/sama/v2/verify](/sama/v2/verify) reports under §5 Core metrics. The hand count and the verifier's count match by construction: both consume `findParseBoundaryCallSites` in [`src/a31_sama_v2.ts`](/GIT/syntaxai/tdd.md/blob/main/src/a31_sama_v2.ts), and the Modeled-boundary check (#4) uses the same source of truth — so it cannot diverge. + +--- + ## 6. Evolution policy (how the standard stays alive without rotting) - **The core (§1) is frozen.** Changing the four layers or the Law requires a major version and an extraordinarily high evidentiary bar: cross-repo data showing the current core measurably harms agent performance. diff --git a/src/a31_sama_v2.ts b/src/a31_sama_v2.ts index 6031a8065d4707c3a28c610c3e6abb5de4cecfdd..8edaa768e19106716874a1b22c04d849a9147d16 100644 --- a/src/a31_sama_v2.ts +++ b/src/a31_sama_v2.ts @@ -1,9 +1,10 @@ -// c31 — model: types for the SAMA v2 verifier pipeline. Pure data -// shapes: the parsed profile (ProfileSpec), the verifier's input -// (SamaV2Input), and its output (SamaV2Report). No I/O lives here; -// c14_sama_profile parses the .toml into ProfileSpec, c32_sama_v2_verify -// applies the seven §4 checks against (ProfileSpec, files), and -// c21_handlers_sama renders the SamaV2Report. +// a31 — model: types, constants, and pure helpers for the SAMA v2 +// verifier + §5 core metrics emitter. No I/O lives here. c14_sama_profile +// parses .toml into ProfileSpec; b32_sama_v2_verify applies the seven §4 +// checks; b32_sama_v2_metrics computes the §5 metrics. The verifier and +// metrics emitter share the helpers below — particularly the parse- +// boundary detector — so the Modeled-boundary check (#4) and +// boundaryRatio metric move in lockstep. export type LayerNumber = 0 | 1 | 2 | 3; @@ -37,7 +38,7 @@ export interface ProfileSpec { export interface SamaV2Input { profile: ProfileSpec; - // Map keyed by repo-relative path (e.g. "src/c11_server.ts") to + // Map keyed by repo-relative path (e.g. "src/d11_server.ts") to // file contents. The verifier never reads files itself; the loader // populates this map. files: Map; @@ -79,9 +80,71 @@ export interface SamaV2Report { overallPassed: boolean; } -// Helper used in the verifier and re-exported here so call sites can -// type-narrow against the same source: returns the layer number a -// file's basename declares, or null if no profile prefix matches. +// — §5 core metrics: shape ---------------------------------------- +// +// Operational definitions are pinned on /sama/v2 §5 (operational). +// The metric VALUES are computed in b32_sama_v2_metrics; this file +// just declares the shape so callers (and the renderer) can type-narrow. + +export interface FanSummary { + // {mean, p50, p95, max} over a per-file fan-in or fan-out series. + // Empty series → all zeros. + mean: number; + p50: number; + p95: number; + max: number; +} + +export interface FanByLayer { + 0: { fanIn: FanSummary; fanOut: FanSummary }; + 1: { fanIn: FanSummary; fanOut: FanSummary }; + 2: { fanIn: FanSummary; fanOut: FanSummary }; + 3: { fanIn: FanSummary; fanOut: FanSummary }; +} + +export interface SamaV2ViolationCounts { + // Counts of violations per §4 check, reported even when a check + // passes (value = 0). This is §5's "trailing signal: which rules + // agents *almost* break." + sorted: number; + architecture: number; + modeledTests: number; + modeledBoundary: number; + atomic: number; + law: number; + consistency: number; +} + +export interface SamaV2Metrics { + graphDepth: number; + fanByLayer: FanByLayer; + boundaryRatio: number; + workingSetFit: number; + violationCounts: SamaV2ViolationCounts; +} + +// — Working-set bounds (per /sama/v2 §5 documented reasoning) ----- +// +// Upper 500: comfortably below the §4.5 Atomic 700-LOC cap, leaving +// headroom before a file approaches "split soon" territory. +// Lower 50: below this, a file is too small to be a substantive +// module — usually a type-only file, a stub, or a single helper that +// would read better inlined into a sibling. Type-only files (Layer 0 +// model shards) and minimal test fixtures fall here by design; they +// are acceptable but counted as "not in the working-set sweet spot" +// because they are not load-bearing modules. +// +// Hard-coded for v1 of the metrics emitter. Making them profile- +// configurable is a deliberate later step (requires extending the +// TOML subset parser to handle integer values). +export const WORKING_SET_MIN_LOC = 50; +export const WORKING_SET_MAX_LOC = 500; + +// — Layer assignment helper -------------------------------------- +// +// Returns the canonical layer a file's basename declares via prefix, +// or null if no profile prefix matches. The verifier and metrics +// emitter both call this for every file they examine. export const declaredLayer = ( path: string, profile: ProfileSpec, @@ -95,3 +158,140 @@ export const declaredLayer = ( } return null; }; + +// — File classifiers --------------------------------------------- + +// A SAMA file is one we expect to obey the layer rules: any *.ts +// under src/ that isn't a *.test.ts. Tests live next to source as +// siblings; they're examined for the Modeled check but don't carry +// their own layer. +export const isSamaFile = (path: string): boolean => + path.startsWith("src/") && path.endsWith(".ts") && !path.endsWith(".test.ts"); + +export const isTestFile = (path: string): boolean => + path.startsWith("src/") && path.endsWith(".test.ts"); + +// — Source-mask helpers ------------------------------------------ + +// Strip JS/TS string literals and comments to whitespace so a regex +// that walks the source doesn't trip on test fixtures that contain +// the very patterns we're scanning for. Preserves newline positions +// so line numbers stay aligned. +export const stripStringsAndComments = (src: string): string => { + let out = ""; + let i = 0; + while (i < src.length) { + const c = src[i]; + const n = src[i + 1]; + if (c === "/" && n === "/") { + out += " "; + i += 2; + while (i < src.length && src[i] !== "\n") { out += " "; i++; } + } else if (c === "/" && n === "*") { + out += " "; + i += 2; + while (i < src.length - 1 && !(src[i] === "*" && src[i + 1] === "/")) { + out += src[i] === "\n" ? "\n" : " "; + i++; + } + out += " "; + i += 2; + } else if (c === '"' || c === "'" || c === "`") { + const quote = c; + out += " "; + i++; + while (i < src.length && src[i] !== quote) { + if (src[i] === "\\" && i + 1 < src.length) { out += " "; i += 2; continue; } + out += src[i] === "\n" ? "\n" : " "; + i++; + } + out += " "; + i++; + } else { + out += c; + i++; + } + } + return out; +}; + +// Collect every relative ".ts" import edge in a file. Scans raw +// source: a stripped copy would erase the quoted import paths along +// with all other string literals, so the regex must run over the +// original. To avoid picking up import-like strings inside test +// fixtures, we cross-check each match position against the stripped +// mask — if the keyword `from` lands on whitespace in the mask, it +// was inside a string literal and we skip it. +export const collectRelativeImports = (content: string): string[] => { + const mask = stripStringsAndComments(content); + const re = /\bfrom\s+["'](\.\/[A-Za-z0-9_./-]+\.ts)["']/g; + const out: string[] = []; + let m: RegExpExecArray | null; + while ((m = re.exec(content)) !== null) { + // If the `from` keyword position is whitespace in the mask, the + // entire match was inside a string literal (e.g. a test fixture). + if (mask[m.index] === " " || mask[m.index] === "\n") continue; + if (m[1]) out.push(m[1]); + } + return out; +}; + +// Resolve a relative import like "./c14_git.ts" from the importing +// file's directory to the repo-relative path used as the input map's +// key (e.g. "src/c14_git.ts"). +export const resolveImport = (fromPath: string, importPath: string): string => { + const dir = fromPath.split("/").slice(0, -1).join("/"); + const rel = importPath.replace(/^\.\//, ""); + return dir + "/" + rel; +}; + +// — Parse-boundary call-site detector ----------------------------- +// +// Source of truth for what counts as "external input parsed at the +// boundary" under SAMA v2 §4.4. Consumed by: +// - b32_sama_v2_verify.checkModeledBoundary (#4) — flags Layer 1/3 +// files where any pattern occurs; emits one violation per +// (file, pattern) pair preserving PARSE_BOUNDARY_PATTERNS order. +// - b32_sama_v2_metrics.boundaryRatio — counts every individual +// call site and reports the Layer-2 share. +// If you change the patterns, both check and metric move in lockstep. + +export const PARSE_BOUNDARY_PATTERNS: ReadonlyArray<{ + name: "JSON.parse" | "new URL"; + source: string; +}> = [ + { name: "JSON.parse", source: "\\bJSON\\.parse\\s*\\(" }, + { name: "new URL", source: "\\bnew\\s+URL\\s*\\(" }, +]; + +export interface ParseBoundaryCallSite { + file: string; + pattern: "JSON.parse" | "new URL"; + // Position in the stripped source. Useful for line-number lookup; + // the verifier currently only needs (file, pattern). + index: number; +} + +// Walk every SAMA file (src/*.ts non-test) and return every parse- +// boundary call site. Operates on the stripped source so string- +// literal fixtures don't false-positive. Iteration order: files in +// input map order (Map preserves insertion order), patterns in +// PARSE_BOUNDARY_PATTERNS order, occurrences in source order. +export const findParseBoundaryCallSites = ( + files: Map, +): ParseBoundaryCallSite[] => { + const out: ParseBoundaryCallSite[] = []; + for (const [path, content] of files) { + if (!isSamaFile(path)) continue; + const stripped = stripStringsAndComments(content); + for (const pat of PARSE_BOUNDARY_PATTERNS) { + // Fresh regex per file so lastIndex never bleeds. + const re = new RegExp(pat.source, "g"); + let m: RegExpExecArray | null; + while ((m = re.exec(stripped)) !== null) { + out.push({ file: path, pattern: pat.name, index: m.index }); + } + } + } + return out; +}; diff --git a/src/b32_sama_v2_metrics.test.ts b/src/b32_sama_v2_metrics.test.ts new file mode 100644 index 0000000000000000000000000000000000000000..310aab53b30f35792ff85df1ba19b4a338b669f3 --- /dev/null +++ b/src/b32_sama_v2_metrics.test.ts @@ -0,0 +1,253 @@ +import { describe, expect, test } from "bun:test"; +import { computeCoreMetrics } from "./b32_sama_v2_metrics.ts"; +import { + WORKING_SET_MAX_LOC, + WORKING_SET_MIN_LOC, + type ProfileSpec, + type SamaV2Input, +} from "./a31_sama_v2.ts"; + +// Flat fixture profile (one prefix per layer) so the metric tests +// don't depend on the live profile. The Law-check sublayer ordering +// isn't relevant here — these tests target the metrics computation, +// not the conformance verdict. +const FIXTURE_PROFILE: ProfileSpec = { + samaVersion: "2.0", + profile: "metrics-test", + layers: { + 0: { sublayers: [{ name: "default", prefix: "p0_", index: 0 }] }, + 1: { sublayers: [{ name: "default", prefix: "p1_", index: 0 }] }, + 2: { sublayers: [{ name: "default", prefix: "p2_", index: 0 }] }, + 3: { sublayers: [{ name: "default", prefix: "p3_", index: 0 }] }, + }, +}; + +const mk = (entries: Array<[string, string]>): SamaV2Input => ({ + profile: FIXTURE_PROFILE, + files: new Map(entries), +}); + +// Helper: produce a file with `n` lines of harmless code (so +// split("\n").length === n). +const linesOf = (n: number): string => + Array.from({ length: n }, (_, i) => `const x${i} = ${i};`).join("\n"); + +// Helper: a minimal sibling test body for Layer-1/2 fixtures. +const TEST_BODY = 'import { test, expect } from "bun:test"; test("ok", () => { expect(1).toBe(1); });\n'; + +describe("computeCoreMetrics — graphDepth", () => { + test("empty repo → 0", () => { + const m = computeCoreMetrics(mk([])); + expect(m.graphDepth).toBe(0); + }); + + test("single file with no imports → 1", () => { + const m = computeCoreMetrics(mk([ + ["src/p0_a.ts", "export const x = 1;\n"], + ])); + expect(m.graphDepth).toBe(1); + }); + + test("chain p3 → p2 → p1 → p0 → 4", () => { + const m = computeCoreMetrics(mk([ + ["src/p0_a.ts", "export const x = 1;\n"], + ["src/p1_a.ts", `import { x } from "./p0_a.ts";\nexport const y = x;\n`], + ["src/p1_a.test.ts", TEST_BODY], + ["src/p2_a.ts", `import { y } from "./p1_a.ts";\nexport const z = y;\n`], + ["src/p2_a.test.ts", TEST_BODY], + ["src/p3_a.ts", `import { z } from "./p2_a.ts";\nexport const w = z;\n`], + ])); + expect(m.graphDepth).toBe(4); + }); + + test("a cycle is bounded (does not infinite-loop)", () => { + // p1_a ↔ p1_b cycle (same-layer; Law would flag it, but graphDepth + // must still terminate with a finite number). + const m = computeCoreMetrics(mk([ + ["src/p1_a.ts", `import { y } from "./p1_b.ts";\nexport const x = y;\n`], + ["src/p1_a.test.ts", TEST_BODY], + ["src/p1_b.ts", `import { x } from "./p1_a.ts";\nexport const y = x;\n`], + ["src/p1_b.test.ts", TEST_BODY], + ])); + expect(Number.isFinite(m.graphDepth)).toBe(true); + expect(m.graphDepth).toBeGreaterThanOrEqual(1); + }); +}); + +describe("computeCoreMetrics — fanByLayer", () => { + test("empty repo → all-zero summaries", () => { + const m = computeCoreMetrics(mk([])); + for (const L of [0, 1, 2, 3] as const) { + expect(m.fanByLayer[L].fanIn).toEqual({ mean: 0, p50: 0, p95: 0, max: 0 }); + expect(m.fanByLayer[L].fanOut).toEqual({ mean: 0, p50: 0, p95: 0, max: 0 }); + } + }); + + test("single Layer-0 file with no edges → all zeros at L0", () => { + const m = computeCoreMetrics(mk([ + ["src/p0_a.ts", "export const x = 1;\n"], + ])); + expect(m.fanByLayer[0].fanIn.max).toBe(0); + expect(m.fanByLayer[0].fanOut.max).toBe(0); + }); + + test("two Layer-1 files importing same Layer-0 → L0.fanIn.max = 2, L1.fanOut.max = 1", () => { + const m = computeCoreMetrics(mk([ + ["src/p0_a.ts", "export const x = 1;\n"], + ["src/p1_a.ts", `import { x } from "./p0_a.ts";\nexport const y = x;\n`], + ["src/p1_a.test.ts", TEST_BODY], + ["src/p1_b.ts", `import { x } from "./p0_a.ts";\nexport const z = x;\n`], + ["src/p1_b.test.ts", TEST_BODY], + ])); + expect(m.fanByLayer[0].fanIn.max).toBe(2); + expect(m.fanByLayer[1].fanOut.max).toBe(1); + expect(m.fanByLayer[1].fanIn.max).toBe(0); + }); +}); + +describe("computeCoreMetrics — boundaryRatio", () => { + test("no parse boundaries anywhere → 1.0 (vacuously)", () => { + const m = computeCoreMetrics(mk([ + ["src/p0_a.ts", "export const x = 1;\n"], + ])); + expect(m.boundaryRatio).toBe(1.0); + }); + + test("JSON.parse only in Layer 2 → 1.0", () => { + const m = computeCoreMetrics(mk([ + ["src/p2_a.ts", "export const f = (s: string) => JSON.parse(s);\n"], + ["src/p2_a.test.ts", TEST_BODY], + ])); + expect(m.boundaryRatio).toBe(1.0); + }); + + test("JSON.parse in Layer 1 and Layer 2 → 0.5", () => { + const m = computeCoreMetrics(mk([ + ["src/p1_a.ts", "export const f = (s: string) => JSON.parse(s);\n"], + ["src/p1_a.test.ts", TEST_BODY], + ["src/p2_a.ts", "export const g = (s: string) => JSON.parse(s);\n"], + ["src/p2_a.test.ts", TEST_BODY], + ])); + expect(m.boundaryRatio).toBe(0.5); + }); + + test("string literal containing JSON.parse doesn't false-positive", () => { + const m = computeCoreMetrics(mk([ + ["src/p1_a.ts", `const explainer = "call JSON.parse here";\nexport const x = explainer.length;\n`], + ["src/p1_a.test.ts", TEST_BODY], + ])); + expect(m.boundaryRatio).toBe(1.0); + }); + + test("counts every call site, not just every file", () => { + // Two JSON.parse in one Layer-2 file, one in Layer-1 → ratio = 2/3 + const m = computeCoreMetrics(mk([ + ["src/p1_a.ts", "export const f = (s: string) => JSON.parse(s);\n"], + ["src/p1_a.test.ts", TEST_BODY], + ["src/p2_a.ts", "export const g = (s: string) => JSON.parse(s);\nexport const h = (s: string) => JSON.parse(s);\n"], + ["src/p2_a.test.ts", TEST_BODY], + ])); + expect(m.boundaryRatio).toBeCloseTo(2 / 3, 6); + }); +}); + +describe("computeCoreMetrics — workingSetFit", () => { + test("empty repo → 1.0", () => { + const m = computeCoreMetrics(mk([])); + expect(m.workingSetFit).toBe(1.0); + }); + + test("a single 100-line file → 1.0", () => { + const m = computeCoreMetrics(mk([ + ["src/p0_a.ts", linesOf(100)], + ])); + expect(m.workingSetFit).toBe(1.0); + }); + + test("a 10-line file falls below the min → 0.0", () => { + const m = computeCoreMetrics(mk([ + ["src/p0_a.ts", linesOf(10)], + ])); + expect(m.workingSetFit).toBe(0.0); + }); + + test("a 600-line file exceeds the max → 0.0", () => { + const m = computeCoreMetrics(mk([ + ["src/p0_a.ts", linesOf(600)], + ])); + expect(m.workingSetFit).toBe(0.0); + }); + + test("two files: one 100-line (in), one 10-line (out) → 0.5", () => { + const m = computeCoreMetrics(mk([ + ["src/p0_a.ts", linesOf(100)], + ["src/p0_b.ts", linesOf(10)], + ])); + expect(m.workingSetFit).toBe(0.5); + }); + + test("exact bounds are inclusive (50 and 500 count as in the sweet spot)", () => { + const m = computeCoreMetrics(mk([ + ["src/p0_min.ts", linesOf(WORKING_SET_MIN_LOC)], + ["src/p0_max.ts", linesOf(WORKING_SET_MAX_LOC)], + ])); + expect(m.workingSetFit).toBe(1.0); + }); + + test("test files don't count toward the metric (only SAMA source files)", () => { + // One 100-line Layer-1 source + a tiny sibling test. Sibling test + // is 1 line, far below the min, but it's excluded. + const m = computeCoreMetrics(mk([ + ["src/p1_a.ts", linesOf(100)], + ["src/p1_a.test.ts", TEST_BODY], + ])); + expect(m.workingSetFit).toBe(1.0); + }); +}); + +describe("computeCoreMetrics — violationCounts", () => { + test("conforming fixture → all counts = 0", () => { + const m = computeCoreMetrics(mk([ + ["src/p0_a.ts", "export const x = 1;\n"], + ])); + expect(m.violationCounts).toEqual({ + sorted: 0, architecture: 0, modeledTests: 0, modeledBoundary: 0, + atomic: 0, law: 0, consistency: 0, + }); + }); + + test("Layer-1 file without sibling test → modeledTests = 1", () => { + const m = computeCoreMetrics(mk([ + ["src/p1_a.ts", "export const y = 1;\n"], + ])); + expect(m.violationCounts.modeledTests).toBe(1); + }); + + test("counts are populated even when overall verdict is conforming (trailing signal shape)", () => { + // Single Layer-0 file → all checks pass → all counts are 0 (not + // missing). This is the §5 contract: keys exist regardless. + const m = computeCoreMetrics(mk([ + ["src/p0_a.ts", "export const x = 1;\n"], + ])); + const keys = Object.keys(m.violationCounts).sort(); + expect(keys).toEqual([ + "architecture", "atomic", "consistency", "law", + "modeledBoundary", "modeledTests", "sorted", + ]); + }); +}); + +describe("computeCoreMetrics — reproducibility", () => { + test("same input → identical output across two runs (deep-equal)", () => { + const input = mk([ + ["src/p0_a.ts", "export const x = 1;\n"], + ["src/p1_a.ts", `import { x } from "./p0_a.ts";\nexport const y = x;\n`], + ["src/p1_a.test.ts", TEST_BODY], + ["src/p2_a.ts", `import { y } from "./p1_a.ts";\nexport const f = (s: string) => JSON.parse(s);\n`], + ["src/p2_a.test.ts", TEST_BODY], + ]); + const m1 = computeCoreMetrics(input); + const m2 = computeCoreMetrics(input); + expect(m1).toEqual(m2); + }); +}); diff --git a/src/b32_sama_v2_metrics.ts b/src/b32_sama_v2_metrics.ts new file mode 100644 index 0000000000000000000000000000000000000000..814186da13b74434619b01f4f32a9733b5df2427 --- /dev/null +++ b/src/b32_sama_v2_metrics.ts @@ -0,0 +1,220 @@ +// b32 — logic: SAMA v2 §5 core metrics emitter. Pure function over +// SamaV2Input that returns the five §5 metrics (graphDepth, fanByLayer, +// boundaryRatio, workingSetFit, violationCounts). No I/O, no clock, +// no filesystem; same source tree + same profile → identical numbers. +// +// The empirical artefact §6 of /sama/v2 requires before any later +// claim (skeleton, agent experiment, external repo audit) can be +// measured as a delta. Operational definitions live on /sama/v2 §5. +// +// Shared helpers (declaredLayer, isSamaFile, collectRelativeImports, +// resolveImport, findParseBoundaryCallSites) come from a31_sama_v2 so +// this module and b32_sama_v2_verify agree by construction — the +// Modeled-boundary check (#4) and boundaryRatio metric consume the +// same detector and cannot diverge. + +import { + WORKING_SET_MAX_LOC, + WORKING_SET_MIN_LOC, + collectRelativeImports, + declaredLayer, + findParseBoundaryCallSites, + isSamaFile, + resolveImport, + type FanByLayer, + type FanSummary, + type LayerNumber, + type SamaV2Input, + type SamaV2Metrics, + type SamaV2ViolationCounts, +} from "./a31_sama_v2.ts"; +import { verifySamaV2 } from "./b32_sama_v2_verify.ts"; + +// — graphDepth ---------------------------------------------------- +// +// Longest path in the import DAG. Nodes = SAMA source files (src/*.ts +// non-test); edges = static relative-path imports between them. A +// file with no imports has depth 1. Empty graph = 0. +// +// Memoised DFS. If a cycle is encountered (the Law check would flag +// it separately), we treat the back-edge target as a terminal of +// depth 1 so the metric still terminates with a finite number. +const computeGraphDepth = (files: Map): number => { + const samaPaths = [...files.keys()].filter(isSamaFile); + if (samaPaths.length === 0) return 0; + + // Build adjacency (only edges that land on known SAMA files). + const adj = new Map(); + for (const path of samaPaths) { + const content = files.get(path) ?? ""; + const out: string[] = []; + for (const imp of collectRelativeImports(content)) { + const resolved = resolveImport(path, imp); + if (files.has(resolved) && isSamaFile(resolved)) out.push(resolved); + } + adj.set(path, out); + } + + const memo = new Map(); + const visiting = new Set(); + + const depth = (node: string): number => { + const cached = memo.get(node); + if (cached !== undefined) return cached; + if (visiting.has(node)) return 1; // cycle: treat as terminal + visiting.add(node); + let best = 1; + for (const next of adj.get(node) ?? []) { + const d = depth(next) + 1; + if (d > best) best = d; + } + visiting.delete(node); + memo.set(node, best); + return best; + }; + + let max = 0; + for (const p of samaPaths) { + const d = depth(p); + if (d > max) max = d; + } + return max; +}; + +// — fanByLayer ---------------------------------------------------- +// +// Per canonical layer L ∈ {0,1,2,3}: fan-in (count of edges arriving +// at files in L) and fan-out (count of edges leaving files in L). +// Each summary = {mean, p50, p95, max} computed over the per-file +// series within L. Empty layer = all-zero summary. + +const summarize = (values: number[]): FanSummary => { + if (values.length === 0) return { mean: 0, p50: 0, p95: 0, max: 0 }; + const sorted = [...values].sort((a, b) => a - b); + const sum = sorted.reduce((s, v) => s + v, 0); + const mean = sum / sorted.length; + const percentile = (frac: number): number => { + // Nearest-rank percentile: index = ceil(frac * N) - 1, clamped. + const idx = Math.min(sorted.length - 1, Math.max(0, Math.ceil(frac * sorted.length) - 1)); + return sorted[idx]!; + }; + return { + mean, + p50: percentile(0.5), + p95: percentile(0.95), + max: sorted[sorted.length - 1]!, + }; +}; + +const computeFanByLayer = (input: SamaV2Input): FanByLayer => { + const samaPaths = [...input.files.keys()].filter(isSamaFile); + const fanOut = new Map(); + const fanIn = new Map(); + for (const p of samaPaths) { + fanOut.set(p, 0); + fanIn.set(p, 0); + } + for (const path of samaPaths) { + const content = input.files.get(path) ?? ""; + for (const imp of collectRelativeImports(content)) { + const resolved = resolveImport(path, imp); + if (!fanOut.has(resolved)) continue; + fanOut.set(path, (fanOut.get(path) ?? 0) + 1); + fanIn.set(resolved, (fanIn.get(resolved) ?? 0) + 1); + } + } + + const buckets: Record = { + 0: { in: [], out: [] }, + 1: { in: [], out: [] }, + 2: { in: [], out: [] }, + 3: { in: [], out: [] }, + }; + for (const path of samaPaths) { + const decl = declaredLayer(path, input.profile); + if (!decl) continue; + buckets[decl.layer].in.push(fanIn.get(path) ?? 0); + buckets[decl.layer].out.push(fanOut.get(path) ?? 0); + } + + return { + 0: { fanIn: summarize(buckets[0].in), fanOut: summarize(buckets[0].out) }, + 1: { fanIn: summarize(buckets[1].in), fanOut: summarize(buckets[1].out) }, + 2: { fanIn: summarize(buckets[2].in), fanOut: summarize(buckets[2].out) }, + 3: { fanIn: summarize(buckets[3].in), fanOut: summarize(buckets[3].out) }, + }; +}; + +// — boundaryRatio ------------------------------------------------- +// +// (parse-boundary call sites in Layer 2 files) ÷ (parse-boundary +// call sites anywhere). Uses the SAME detector as the §4.4 check. +// No boundaries anywhere → 1.0 (vacuously satisfied: there is no +// out-of-Layer-2 leak because there is no boundary at all). +// +// "Layer 2" here means the file's declaredLayer is 2. Unprefixed +// files (declaredLayer = null) count toward the denominator but +// not the numerator — that is the truthful reading of the §5 +// definition. +const computeBoundaryRatio = (input: SamaV2Input): number => { + const sites = findParseBoundaryCallSites(input.files); + if (sites.length === 0) return 1.0; + let inLayer2 = 0; + for (const site of sites) { + const decl = declaredLayer(site.file, input.profile); + if (decl !== null && decl.layer === 2) inLayer2++; + } + return inLayer2 / sites.length; +}; + +// — workingSetFit ------------------------------------------------- +// +// (source files with WORKING_SET_MIN_LOC ≤ LOC ≤ WORKING_SET_MAX_LOC) +// ÷ (total source files). Empty repo → 1.0. Test files don't count; +// the metric is about working modules, not their sibling tests. +// +// Bounds are hard-coded constants in a31_sama_v2.ts. The reasoning +// (Atomic 700-LOC headroom; sub-50 = type-only/stub) lives on +// /sama/v2 §5 — preceding the numbers, not retrofitted. +const computeWorkingSetFit = (input: SamaV2Input): number => { + const samaPaths = [...input.files.keys()].filter(isSamaFile); + if (samaPaths.length === 0) return 1.0; + let inSweetSpot = 0; + for (const p of samaPaths) { + const lines = (input.files.get(p) ?? "").split("\n").length; + if (lines >= WORKING_SET_MIN_LOC && lines <= WORKING_SET_MAX_LOC) inSweetSpot++; + } + return inSweetSpot / samaPaths.length; +}; + +// — violationCounts ---------------------------------------------- +// +// Per-check violation count from a fresh verifier run on the same +// input. Reported even when a check passes (value = 0) — §5's +// "trailing signal: which rules agents *almost* break." The verifier +// enumerates ALL violations per check (no short-circuit), so this +// count is meaningful — not "1 if failed, 0 if passed". +const computeViolationCounts = (input: SamaV2Input): SamaV2ViolationCounts => { + const report = verifySamaV2(input); + const byId = new Map(); + for (const c of report.checks) byId.set(c.id, c.violations.length); + return { + sorted: byId.get(1) ?? 0, + architecture: byId.get(2) ?? 0, + modeledTests: byId.get(3) ?? 0, + modeledBoundary: byId.get(4) ?? 0, + atomic: byId.get(5) ?? 0, + law: byId.get(6) ?? 0, + consistency: byId.get(7) ?? 0, + }; +}; + +// — Orchestrator -------------------------------------------------- + +export const computeCoreMetrics = (input: SamaV2Input): SamaV2Metrics => ({ + graphDepth: computeGraphDepth(input.files), + fanByLayer: computeFanByLayer(input), + boundaryRatio: computeBoundaryRatio(input), + workingSetFit: computeWorkingSetFit(input), + violationCounts: computeViolationCounts(input), +}); diff --git a/src/b32_sama_v2_verify.ts b/src/b32_sama_v2_verify.ts index a6d584412e46a1dcf251604daeb4a0a76812cc7e..f30f188339a9553303b8c073274b3e5af610701f 100644 --- a/src/b32_sama_v2_verify.ts +++ b/src/b32_sama_v2_verify.ts @@ -1,105 +1,27 @@ -// c32 — logic: the SAMA v2 verifier. Implements the seven §4 +// b32 — logic: the SAMA v2 verifier. Implements the seven §4 // conformance checks (Sorted, Architecture, Modeled-tests, // Modeled-boundary, Atomic, the Law §1.2, Consistency §3) as pure // functions over an in-memory (profile, files) input. Never reads -// the filesystem — the loader (c14_sama_profile + c21 handler) -// populates the input map. No mocks, no stubs: every check is a -// real grep/string-op on the supplied content. +// the filesystem — the loader (c14_sama_profile + d21 handler) +// populates the input map. The shared pure helpers and the parse- +// boundary detector live in a31_sama_v2 so this verifier and the +// §5 metrics emitter agree by construction. import { + PARSE_BOUNDARY_PATTERNS, + collectRelativeImports, declaredLayer, + findParseBoundaryCallSites, + isSamaFile, + isTestFile, + resolveImport, + stripStringsAndComments, type SamaV2Check, type SamaV2Input, type SamaV2Report, type SamaV2Violation, } from "./a31_sama_v2.ts"; -// — shared utilities ------------------------------------------------- - -// A SAMA file is one we expect to obey the layer rules: any *.ts -// under src/ that isn't a *.test.ts. Tests live next to source as -// siblings; they're examined for the Modeled check but don't carry -// their own layer. -const isSamaFile = (path: string): boolean => - path.startsWith("src/") && path.endsWith(".ts") && !path.endsWith(".test.ts"); - -const isTestFile = (path: string): boolean => - path.startsWith("src/") && path.endsWith(".test.ts"); - -// Strip JS/TS string literals and comments to whitespace so a regex -// that walks the source doesn't trip on test fixtures that contain -// the very patterns we're scanning for. Same shape as the helper in -// c32_sama_verify; duplicated here to keep c32_sama_v2_verify a -// stand-alone module the loader can pull in without dragging the v1 -// verifier with it. -const stripStringsAndComments = (src: string): string => { - let out = ""; - let i = 0; - while (i < src.length) { - const c = src[i]; - const n = src[i + 1]; - if (c === "/" && n === "/") { - out += " "; - i += 2; - while (i < src.length && src[i] !== "\n") { out += " "; i++; } - } else if (c === "/" && n === "*") { - out += " "; - i += 2; - while (i < src.length - 1 && !(src[i] === "*" && src[i + 1] === "/")) { - out += src[i] === "\n" ? "\n" : " "; - i++; - } - out += " "; - i += 2; - } else if (c === '"' || c === "'" || c === "`") { - const quote = c; - out += " "; - i++; - while (i < src.length && src[i] !== quote) { - if (src[i] === "\\" && i + 1 < src.length) { out += " "; i += 2; continue; } - out += src[i] === "\n" ? "\n" : " "; - i++; - } - out += " "; - i++; - } else { - out += c; - i++; - } - } - return out; -}; - -// Collect every relative ".ts" import edge in a file. Scans raw -// source: a stripped copy would erase the quoted import paths along -// with all other string literals, so the regex must run over the -// original. To avoid picking up import-like strings inside test -// fixtures, we cross-check each match position against the stripped -// mask — if the keyword `from` lands on whitespace in the mask, it -// was inside a string literal and we skip it. -const collectRelativeImports = (content: string): string[] => { - const mask = stripStringsAndComments(content); - const re = /\bfrom\s+["'](\.\/[A-Za-z0-9_./-]+\.ts)["']/g; - const out: string[] = []; - let m: RegExpExecArray | null; - while ((m = re.exec(content)) !== null) { - // If the `from` keyword position is whitespace in the mask, the - // entire match was inside a string literal (e.g. a test fixture). - if (mask[m.index] === " " || mask[m.index] === "\n") continue; - if (m[1]) out.push(m[1]); - } - return out; -}; - -// Resolve a relative import like "./c14_git.ts" from the importing -// file's directory to the repo-relative path used as the input map's -// key (e.g. "src/c14_git.ts"). -const resolveImport = (fromPath: string, importPath: string): string => { - const dir = fromPath.split("/").slice(0, -1).join("/"); - const rel = importPath.replace(/^\.\//, ""); - return dir + "/" + rel; -}; - // — Check 1: Sorted ------------------------------------------------- // // "Every file carries a profile-recognised prefix; lexicographic @@ -221,22 +143,38 @@ const checkModeledTests = (input: SamaV2Input): SamaV2Check => { // params) are treated as delegation to the platform's own Layer 2, // not parsing performed in our Layer 3. The verifier reports any // raw JSON.parse / new URL calls landing outside Layer 2. -const BOUNDARY_PATTERNS = [ - { name: "JSON.parse", re: /\bJSON\.parse\s*\(/ }, - { name: "new URL", re: /\bnew\s+URL\s*\(/ }, -]; +// +// The call-site detector lives in a31_sama_v2 (findParseBoundary- +// CallSites). This check consumes its output and groups by +// (file, pattern) so the violation list stays at file-pattern +// granularity — the same shape pre-refactor. The §5 boundaryRatio +// metric consumes the same detector and counts individual call +// sites, but does not change this check's verdict. const checkModeledBoundary = (input: SamaV2Input): SamaV2Check => { const violations: SamaV2Violation[] = []; let examined = 0; - for (const [path, content] of input.files.entries()) { + + // Bucket call sites by file → set of patterns observed. + const patternsByFile = new Map>(); + for (const site of findParseBoundaryCallSites(input.files)) { + let s = patternsByFile.get(site.file); + if (!s) { s = new Set(); patternsByFile.set(site.file, s); } + s.add(site.pattern); + } + + // Iterate files in input order; emit one violation per (file, + // pattern) for files outside Layer 2, preserving PARSE_BOUNDARY_- + // PATTERNS order. This matches the pre-refactor verdict bit-for-bit. + for (const path of input.files.keys()) { if (!isSamaFile(path)) continue; const decl = declaredLayer(path, input.profile); if (!decl) continue; examined++; if (decl.layer === 2) continue; // Layer 2 is the legitimate site. - const stripped = stripStringsAndComments(content); - for (const pat of BOUNDARY_PATTERNS) { - if (pat.re.test(stripped)) { + const observed = patternsByFile.get(path); + if (!observed) continue; + for (const pat of PARSE_BOUNDARY_PATTERNS) { + if (observed.has(pat.name)) { violations.push({ file: path, detail: `boundary pattern \`${pat.name}\` found in Layer ${decl.layer} — parsing belongs in Layer 2`, diff --git a/src/d21_handlers_sama.ts b/src/d21_handlers_sama.ts index 83baf46cfc62ee4d40e49218ca165e64a1308891..6ea652fcc5aafc07784044f7b033e8bffdbfbdb4 100644 --- a/src/d21_handlers_sama.ts +++ b/src/d21_handlers_sama.ts @@ -67,9 +67,49 @@ export const samaSkillHandler = async (): Promise => { import { buildSamaV2Input } from "./c14_sama_profile.ts"; import { verifySamaV2 } from "./b32_sama_v2_verify.ts"; -import type { SamaV2Report } from "./a31_sama_v2.ts"; +import { computeCoreMetrics } from "./b32_sama_v2_metrics.ts"; +import type { FanSummary, SamaV2Metrics, SamaV2Report } from "./a31_sama_v2.ts"; -const renderV2Report = (report: SamaV2Report): string => { +// Render §5 metrics block beneath the existing 7-check verdict. +// Numbers come straight from computeCoreMetrics on the same input +// the verifier consumed — operational definitions on /sama/v2 §5. +const fmtFan = (s: FanSummary): string => + `${s.mean.toFixed(2)} / ${s.p50} / ${s.p95} / ${s.max}`; +const fmtPct = (n: number): string => `${(n * 100).toFixed(1)}%`; + +const renderMetricsBlock = (m: SamaV2Metrics): string => `## §5 Core metrics + +> *Snapshot of this run. Operational definitions at [/sama/v2 §5](/sama/v2#5-operational--core-metrics-definitions). The baseline these numbers anchor is what later claims (skeleton scaffolds, agent A/B experiments, external-repo audits) will be measured against as a delta.* + +| metric | value | +|---|---| +| **graphDepth** | ${m.graphDepth} | +| **boundaryRatio** | ${fmtPct(m.boundaryRatio)} | +| **workingSetFit** | ${fmtPct(m.workingSetFit)} | + +### fan distribution per layer + +| layer | fan-in (mean / p50 / p95 / max) | fan-out (mean / p50 / p95 / max) | +|---|---|---| +| 0 — Pure | ${fmtFan(m.fanByLayer[0].fanIn)} | ${fmtFan(m.fanByLayer[0].fanOut)} | +| 1 — Core | ${fmtFan(m.fanByLayer[1].fanIn)} | ${fmtFan(m.fanByLayer[1].fanOut)} | +| 2 — Adapter | ${fmtFan(m.fanByLayer[2].fanIn)} | ${fmtFan(m.fanByLayer[2].fanOut)} | +| 3 — Entry | ${fmtFan(m.fanByLayer[3].fanIn)} | ${fmtFan(m.fanByLayer[3].fanOut)} | + +### violation counts (trailing signal — emitted even when checks pass) + +| check | count | +|---|---| +| #1 Sorted | ${m.violationCounts.sorted} | +| #2 Architecture | ${m.violationCounts.architecture} | +| #3 Modeled (tests) | ${m.violationCounts.modeledTests} | +| #4 Modeled (boundary) | ${m.violationCounts.modeledBoundary} | +| #5 Atomic | ${m.violationCounts.atomic} | +| #6 Law (§1.2) | ${m.violationCounts.law} | +| #7 Consistency (§3) | ${m.violationCounts.consistency} | +`; + +const renderV2Report = (report: SamaV2Report, metrics: SamaV2Metrics): string => { const summary = report.overallPassed ? `✓ conforms · profile \`${report.profile}\` · ${report.examined} files examined · ${report.checks.length}/${report.checks.length} checks pass` : `${report.checks.filter((c) => c.passed).length}/${report.checks.length} checks pass · profile \`${report.profile}\` · ${report.examined} files examined`; @@ -94,13 +134,14 @@ const renderV2Report = (report: SamaV2Report): string => { > ${summary} -The verifier in [\`src/c32_sama_v2_verify.ts\`](/GIT/syntaxai/tdd.md/blob/main/src/c32_sama_v2_verify.ts) ingests [\`sama.profile.toml\`](/GIT/syntaxai/tdd.md/blob/main/sama.profile.toml) and runs the seven §4 conformance checks against the current source tree on this server. No clone, no token; the server reads its own \`src/\` and the committed profile, runs the same logic the sibling unit tests cover, and renders the verdict below. +The verifier in [\`src/b32_sama_v2_verify.ts\`](/GIT/syntaxai/tdd.md/blob/main/src/b32_sama_v2_verify.ts) ingests [\`sama.profile.toml\`](/GIT/syntaxai/tdd.md/blob/main/sama.profile.toml) and runs the seven §4 conformance checks against the current source tree on this server. No clone, no token; the server reads its own \`src/\` and the committed profile, runs the same logic the sibling unit tests cover, and renders the verdict below. The §5 core metrics emitter ([\`src/b32_sama_v2_metrics.ts\`](/GIT/syntaxai/tdd.md/blob/main/src/b32_sama_v2_metrics.ts)) runs on the same input and shares the parse-boundary detector with the Modeled-boundary check. | check | verdict | examined | |---|---|---| ${rows} -${details ? `## Open violations\n\n${details}` : ""} +${details ? `## Open violations\n\n${details}\n` : ""} +${renderMetricsBlock(metrics)} [← /sama/v2](/sama/v2) · [← /sama](/sama) · [the v1 dogfood](/sama/verify?repo=syntaxai/tdd.md) `; @@ -111,7 +152,8 @@ export const samaV2VerifyHandler = async (): Promise => { try { const input = await buildSamaV2Input(); const report = verifySamaV2(input); - body = renderV2Report(report); + const metrics = computeCoreMetrics(input); + body = renderV2Report(report, metrics); } catch (err) { body = `# SAMA v2 verify — error\n\nThe verifier failed before producing a verdict:\n\n\`\`\`\n${(err as Error).message}\n\`\`\`\n\n[← /sama/v2](/sama/v2)`; }