syntaxai/tdd.md · main · src / a31_diff_parse.ts
// c31 — model: pure parser for unified-diff output. Takes the raw text
// emitted by `git diff` / Forgejo's `.diff` endpoint and produces the
// structured shape c51_render_commit consumes. No I/O, no I/O assumptions
// — handed a string, returns a tree.
export type DiffLineKind = "context" | "added" | "removed";
export interface DiffLine {
kind: DiffLineKind;
text: string;
// 1-based line numbers in the old / new file. Null for the side
// that doesn't have this line (e.g. additions have oldNum:null).
oldNum: number | null;
newNum: number | null;
}
export interface DiffHunk {
oldStart: number;
oldLength: number;
newStart: number;
newLength: number;
// The "@@ ... @@" suffix Forgejo/git puts after the second @@. Often
// the surrounding function/section name. Free text, may be empty.
heading: string;
lines: DiffLine[];
}
export interface DiffFile {
// Path on the new side. For deletes this is the old path mirrored
// here so one field is enough to render a row.
path: string;
// Old path, set only on renames + deletes. Equal to `path` for
// straightforward edits.
oldPath: string;
status: "added" | "removed" | "modified" | "renamed";
hunks: DiffHunk[];
added: number;
removed: number;
}
export interface ParsedDiff {
files: DiffFile[];
}
// Parse a `@@ -oldStart,oldLength +newStart,newLength @@ heading` header.
// Returns null when the line doesn't match. The length parts are
// optional in unified-diff (defaults to 1) — handle both shapes.
const HUNK_HEADER = /^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)$/;
const parseHunkHeader = (line: string): Omit<DiffHunk, "lines"> | null => {
const m = HUNK_HEADER.exec(line);
if (!m) return null;
return {
oldStart: parseInt(m[1]!, 10),
oldLength: m[2] !== undefined ? parseInt(m[2], 10) : 1,
newStart: parseInt(m[3]!, 10),
newLength: m[4] !== undefined ? parseInt(m[4], 10) : 1,
heading: (m[5] ?? "").trim(),
};
};
export const parseUnifiedDiff = (raw: string): ParsedDiff => {
const files: DiffFile[] = [];
let currentFile: DiffFile | null = null;
let currentHunk: DiffHunk | null = null;
let oldLineNo = 0;
let newLineNo = 0;
const lines = raw.split("\n");
for (let i = 0; i < lines.length; i++) {
const line = lines[i] ?? "";
if (line.startsWith("diff --git ")) {
// New file boundary. Try to extract paths from "a/X b/Y" — git
// emits them quoted only when special chars are present, which
// we don't expect for our markdown content.
const m = /^diff --git a\/(.+) b\/(.+)$/.exec(line);
const oldPath = m?.[1] ?? "";
const path = m?.[2] ?? "";
currentFile = {
path,
oldPath,
status: "modified",
hunks: [],
added: 0,
removed: 0,
};
currentHunk = null;
files.push(currentFile);
continue;
}
if (currentFile === null) continue; // preamble, skip
if (line.startsWith("new file mode")) {
currentFile.status = "added";
continue;
}
if (line.startsWith("deleted file mode")) {
currentFile.status = "removed";
continue;
}
if (line.startsWith("rename from ") || line.startsWith("rename to ")) {
currentFile.status = "renamed";
continue;
}
// Skip the index, ---/+++ headers — useful info already captured
// from "diff --git" / mode lines.
if (
line.startsWith("index ") ||
line.startsWith("--- ") ||
line.startsWith("+++ ") ||
line.startsWith("similarity index") ||
line.startsWith("Binary files")
) {
continue;
}
if (line.startsWith("@@")) {
const header = parseHunkHeader(line);
if (!header) continue;
currentHunk = { ...header, lines: [] };
currentFile.hunks.push(currentHunk);
oldLineNo = header.oldStart;
newLineNo = header.newStart;
continue;
}
if (currentHunk === null) continue;
// Body lines — first char is the marker. An empty string at the
// tail of the input (from a trailing "\n") falls through as
// context with text "" — that matches what git emits.
const marker = line[0] ?? " ";
const text = line.slice(1);
if (marker === "+") {
currentHunk.lines.push({ kind: "added", text, oldNum: null, newNum: newLineNo });
newLineNo++;
currentFile.added++;
} else if (marker === "-") {
currentHunk.lines.push({ kind: "removed", text, oldNum: oldLineNo, newNum: null });
oldLineNo++;
currentFile.removed++;
} else if (marker === " " || marker === "") {
// Skip a stray empty line that follows the last hunk before the
// next "diff --git" — it's not a real context line.
const next = lines[i + 1] ?? "";
if (line === "" && (next.startsWith("diff --git ") || next === "")) continue;
currentHunk.lines.push({ kind: "context", text, oldNum: oldLineNo, newNum: newLineNo });
oldLineNo++;
newLineNo++;
} else if (marker === "\\") {
// "\ No newline at end of file" — informational, skip.
continue;
}
}
return { files };
};