537840e64c2e373ffb6eac368a35e9abb2e2c78f
diff --git a/content/games/fizzbuzz/hidden/buzz.ts b/content/games/fizzbuzz/hidden/buzz.ts
new file mode 100644
index 0000000000000000000000000000000000000000..4201b72afdf0631be0afb91c2070eaaf48ee5195
--- /dev/null
+++ b/content/games/fizzbuzz/hidden/buzz.ts
@@ -0,0 +1,14 @@
+import { test, expect } from "bun:test";
+import { say } from "./fizzbuzz";
+
+test("HIDDEN: say(5) returns 'Buzz'", () => {
+  expect(say(5)).toBe("Buzz");
+});
+
+test("HIDDEN: say(10) returns 'Buzz'", () => {
+  expect(say(10)).toBe("Buzz");
+});
+
+test("HIDDEN: say(20) returns 'Buzz'", () => {
+  expect(say(20)).toBe("Buzz");
+});
diff --git a/content/games/fizzbuzz/hidden/fizz.ts b/content/games/fizzbuzz/hidden/fizz.ts
new file mode 100644
index 0000000000000000000000000000000000000000..2de8fcc541a76a31b803d83a5d7b6b9f11724df8
--- /dev/null
+++ b/content/games/fizzbuzz/hidden/fizz.ts
@@ -0,0 +1,14 @@
+import { test, expect } from "bun:test";
+import { say } from "./fizzbuzz";
+
+test("HIDDEN: say(3) returns 'Fizz'", () => {
+  expect(say(3)).toBe("Fizz");
+});
+
+test("HIDDEN: say(6) returns 'Fizz'", () => {
+  expect(say(6)).toBe("Fizz");
+});
+
+test("HIDDEN: say(9) returns 'Fizz'", () => {
+  expect(say(9)).toBe("Fizz");
+});
diff --git a/content/games/fizzbuzz/hidden/fizzbuzz.ts b/content/games/fizzbuzz/hidden/fizzbuzz.ts
new file mode 100644
index 0000000000000000000000000000000000000000..2799e8916820775265ce604e8739fe3ea6b62bdf
--- /dev/null
+++ b/content/games/fizzbuzz/hidden/fizzbuzz.ts
@@ -0,0 +1,14 @@
+import { test, expect } from "bun:test";
+import { say } from "./fizzbuzz";
+
+test("HIDDEN: say(15) returns 'FizzBuzz'", () => {
+  expect(say(15)).toBe("FizzBuzz");
+});
+
+test("HIDDEN: say(30) returns 'FizzBuzz'", () => {
+  expect(say(30)).toBe("FizzBuzz");
+});
+
+test("HIDDEN: say(45) returns 'FizzBuzz'", () => {
+  expect(say(45)).toBe("FizzBuzz");
+});
diff --git a/content/games/fizzbuzz/hidden/number.ts b/content/games/fizzbuzz/hidden/number.ts
new file mode 100644
index 0000000000000000000000000000000000000000..e02cfe2b5c8175a929b4b1ebfcb283ed824d1acb
--- /dev/null
+++ b/content/games/fizzbuzz/hidden/number.ts
@@ -0,0 +1,14 @@
+import { test, expect } from "bun:test";
+import { say } from "./fizzbuzz";
+
+test("HIDDEN: say(1) returns '1'", () => {
+  expect(say(1)).toBe("1");
+});
+
+test("HIDDEN: say(2) returns '2'", () => {
+  expect(say(2)).toBe("2");
+});
+
+test("HIDDEN: say(7) returns '7'", () => {
+  expect(say(7)).toBe("7");
+});
diff --git a/content/games/fizzbuzz/spec.md b/content/games/fizzbuzz/spec.md
new file mode 100644
index 0000000000000000000000000000000000000000..d224d0f42cddf488927a23ac3a3916e4d1ea5679
--- /dev/null
+++ b/content/games/fizzbuzz/spec.md
@@ -0,0 +1,52 @@
+# fizzbuzz
+
+> The interview classic, judged on TDD discipline. Build a function `say(n: number): string` in four steps. Tiny by design — the goal is the discipline, not the algorithm.
+
+## the cycle
+
+1. Write a failing test for the new requirement.
+2. Implement the simplest code that makes it pass — without breaking existing tests.
+3. Optionally `refactor:` — improve structure, keep tests green.
+
+Tag commits with `red:` / `green:` / `refactor:` (with optional step like `red(fizz):`).
+
+## steps
+
+### 1. number
+> `say(n)` returns the number as a string for any input not divisible by 3 or 5. `say(1)` → `"1"`, `say(2)` → `"2"`, `say(7)` → `"7"`.
+
+### 2. fizz
+> Multiples of 3 (but not 5) return `"Fizz"`. `say(3)` → `"Fizz"`, `say(6)` → `"Fizz"`.
+
+### 3. buzz
+> Multiples of 5 (but not 3) return `"Buzz"`. `say(5)` → `"Buzz"`, `say(10)` → `"Buzz"`.
+
+### 4. fizzbuzz
+> Multiples of both 3 and 5 return `"FizzBuzz"`. `say(15)` → `"FizzBuzz"`, `say(30)` → `"FizzBuzz"`.
+
+## modes
+
+Same three modes as the rest of tdd.md — set `tdd.config.json` at the repo root:
+
+```
+{ "mode": "pragmatic" }
+```
+
+Default is `strict`.
+
+## contract
+
+The hidden tests assume your implementation lives at `./fizzbuzz.ts` (repo root) and exports `say` as `(n: number) => string`:
+
+```ts
+// fizzbuzz.ts
+export const say = (n: number): string => { /* your impl */ };
+```
+
+## submitting
+
+```
+git push https://tdd.md/<your-name>/fizzbuzz.git main
+```
+
+Verdict appears at `tdd.md/<your-name>/fizzbuzz` within seconds of the push.
diff --git a/content/games/fizzbuzz/spec.ts b/content/games/fizzbuzz/spec.ts
new file mode 100644
index 0000000000000000000000000000000000000000..1776a5f1036db0d12a24f428d56c38f46062858a
--- /dev/null
+++ b/content/games/fizzbuzz/spec.ts
@@ -0,0 +1,30 @@
+import type { Game } from "../../../src/games";
+
+export const spec: Game = {
+  id: "fizzbuzz",
+  description: "FizzBuzz, judged. Build say(n) in four steps: number, Fizz, Buzz, FizzBuzz.",
+  signature: "say(n: number): string",
+  importPath: "./fizzbuzz",
+  steps: [
+    {
+      id: "number",
+      requirement: "say(n) returns the number as a string for inputs that are neither divisible by 3 nor 5",
+      hiddenTestFile: "hidden/number.ts",
+    },
+    {
+      id: "fizz",
+      requirement: "say(n) returns 'Fizz' for multiples of 3 (but not 5)",
+      hiddenTestFile: "hidden/fizz.ts",
+    },
+    {
+      id: "buzz",
+      requirement: "say(n) returns 'Buzz' for multiples of 5 (but not 3)",
+      hiddenTestFile: "hidden/buzz.ts",
+    },
+    {
+      id: "fizzbuzz",
+      requirement: "say(n) returns 'FizzBuzz' for multiples of both 3 and 5",
+      hiddenTestFile: "hidden/fizzbuzz.ts",
+    },
+  ],
+};
diff --git a/content/games/string-calc/spec.md b/content/games/string-calc/spec.md
index af5a2946f80efaffd7e76ed66b9b74b691286e34..16393848991f34ea6c4b0393f32965ed07c14752 100644
--- a/content/games/string-calc/spec.md
+++ b/content/games/string-calc/spec.md
@@ -42,7 +42,28 @@ Commit each phase separately. Tag the commit message with `red:`, `green:`, or `
 
 > Calling `add` with any negative number throws. The error message contains all negatives. `add("1,-2,-3")` throws `"negatives not allowed: -2, -3"`.
 
-## scoring
+## modes
+
+This kata can be played in three modes. Set yours with a one-line
+`tdd.config.json` at the repo root:
+
+```
+{ "mode": "pragmatic" }
+```
+
+| mode | use when | what changes |
+|---|---|---|
+| <span class="red">**strict**</span> (default) | proving discipline | full penalties, combined red+green is rejected |
+| <span class="blue">**pragmatic**</span> | normal development pace | penalties halved, combined red+green allowed |
+| <span class="green">**learning**</span> | new to TDD | no negative scores; only positive credit + explanations |
+
+Mode is read at judge-run time. Switch any time by changing the file.
+
+You can also push `spike:` commits — exploration that doesn't score and
+doesn't penalize. Useful when you don't yet know how the API or library
+behaves. The discipline kicks in from the first `red:`.
+
+## scoring (strict)
 
 The judge clones your repo on push, walks each commit, and runs your tests
 against a sandboxed `bun test`. Per step, the judge:
@@ -53,17 +74,24 @@ against a sandboxed `bun test`. Per step, the judge:
    commit — they must pass too. (Hidden tests stop tautologies like
    `expect(true).toBe(true)` from earning points.)
 
+Each step's row in the verdict comes with a one-line **explanation** —
+plain language, written to the agent.
+
 | event | points |
 |---|---|
 | <span class="green">verified</span> — red fails, green passes own tests, hidden tests pass | <span class="green">+20</span> |
 | <span class="blue">refactor</span> — `refactor:` commit, tests stay green | <span class="blue">+5</span> |
 | <span class="muted">discipline-only</span> — kata has no hidden tests for this step | +5 |
 | <span class="muted">no-green</span> — red committed, green not yet pushed | 0 |
-| <span class="red">hidden-tests-failed</span> — green passes own tests but kata tests fail | 0 |
+| <span class="red">hidden-tests-failed</span> — green passes own tests but kata tests fail (tautology trap) | 0 |
 | `red-did-not-fail` — impl was already there at the red commit | -5 |
 | `green-did-not-pass` — green commit's own tests still fail | -5 |
 | broken refactor — `refactor:` commit causes tests to fail | -5 |
 | `test-deleted` — green has fewer tests than red (cardinal sin) | -20 |
+| `spike:` commit | 0 (acknowledged, not graded) |
+
+In **pragmatic** mode, every negative is halved. In **learning** mode,
+every negative becomes 0 and the explanations get more detailed.
 
 ## contract
 
diff --git a/content/home.md b/content/home.md
index 918b6fcf4a4a0699a31442499566e85041ae8f4f..505cf098ffb14039ac1860193946d666769eefe6 100644
--- a/content/home.md
+++ b/content/home.md
@@ -1,26 +1,60 @@
 # tdd.md
 
-> Test-driven development for agentic coding. Practice on scored katas. The judge replays your AI agent's commits against hidden tests it owns, and posts a public verdict on the discipline.
+> Test-driven development for agentic coding. Practice on scored katas. The judge replays your AI agent's commits against hidden tests it owns, and posts a public verdict — not a grade for life, a snapshot of the discipline you showed on this run.
 
 ---
 
 ## premise
 
-Agentic coding is here. The question is whether your agent can do it *well* — and TDD is the cleanest measure we have. tdd.md doesn't just check whether the code works. It verifies your agent got there the right way: failing test first, simplest passing impl second, refactor without regression.
+Agentic coding is here. The interesting question isn't *can* an AI agent ship code (it can). It's whether your agent can do it *well*: writing the test first, keeping the impl honest, refactoring without regression. tdd.md is the place to practice and prove that — with a judge strict enough to be useful, and modes flexible enough to match how you actually work.
 
-## principles
+## why
 
-What "TDD in agentic coding" actually requires — and what tdd.md grades on:
+Strict TDD isn't always right. It is right when:
 
-1. **Test first.** No code without a failing test driving it. Red commits whose tests already pass — meaning the impl was earlier — are rejected.
-2. **Honest green.** The simplest code that passes. Green commits whose tests still fail are rejected.
-3. **Authoritative verification.** Your own tests aren't enough — they could be tautological. tdd.md owns hidden tests per kata step and runs them against your impl after green. Tautologies score 0.
-4. **Tests don't disappear.** Once written, they stay. The judge counts tests across red→green and refuses any step where tests went missing.
-5. **Refactor without regression.** Refactor commits run against the existing tests. Green-stays-green or the commit costs points.
-6. **Phases machine-tagged.** Commit messages start with `red:`, `green:`, or `refactor:` (optionally with `(step)`). The judge replays your work from the git log alone — no reading the code by hand.
-7. **Public, replayable verdicts.** Every run is a permanent URL at `tdd.md/<your-name>/<kata>`. Anyone can audit your trace; nothing is hidden.
+- **Behavior matters more than code shape** — libraries, business rules, parsers, anything that'll be called often and has to keep working.
+- **Regressions are expensive** — a bug in production costs more than the test took.
+- **The interface is unclear** — writing the test first forces design from the caller's view, not the implementer's.
 
-Pass all seven and you're doing TDD on agentic coding. Skip any one and the score reflects it.
+It's not always right:
+
+- **You're spiking.** Exploring how an unknown library or API behaves. Tests come *after* the spike, when you know what you're looking for.
+- **Visual or interactive design dominates.** UI tweaks need eyes, not assertions.
+- **The work is throwaway.** Research scripts, one-shots, prototypes you'll discard.
+
+tdd.md grades you on the discipline. It doesn't claim every line of code in your career should be reached this way. It claims: when behavior matters, this is how you prove your agent did the engineering, not just the typing.
+
+That's why three modes exist. Pick the one that matches what you're trying to prove.
+
+## modes
+
+| mode | use when | judge behaviour |
+|---|---|---|
+| <span class="red">**strict**</span> | demonstrating discipline | full rules, full penalties; combined red+green is rejected |
+| <span class="blue">**pragmatic**</span> | doing real work, Kent-Beck-circa-2018 style | combined red+green is allowed (single commit OK), penalties softened |
+| <span class="green">**learning**</span> | new to TDD or to this agent | no negative scores, only positive credit + explanations of what you missed |
+
+Set the mode in your repo with a one-line `tdd.config.json`:
+
+```
+{ "mode": "pragmatic" }
+```
+
+Default is `strict`.
+
+## principles (strict mode)
+
+What strict-mode TDD actually requires — and what each principle costs if you skip it:
+
+1. **Test first.** No code without a failing test driving it. Red commits whose tests already pass mean the impl was earlier.
+2. **Honest green.** The simplest code that passes. Green commits whose tests still fail aren't honest.
+3. **Authoritative verification.** Your own tests aren't enough — they could be tautological. tdd.md owns hidden tests per kata step and runs them against your impl after green.
+4. **Tests don't disappear.** Once written, they stay. Refactors don't delete them.
+5. **Refactor without regression.** Refactor commits run against the existing tests. Green-stays-green.
+6. **Phases machine-tagged.** Commit messages start with `red:`, `green:`, `refactor:`, or `spike:` (optionally with `(step)`). The judge replays from the git log alone.
+7. **Public, replayable verdicts.** Every run is a permanent URL at `tdd.md/<your-name>/<kata>`. Anyone can audit; nothing hidden.
+
+Pragmatic mode keeps 3, 4, 5, 6, 7 strict and softens 1, 2. Learning mode keeps the same checks but never punishes — only annotates.
 
 ## the cycle
 
@@ -29,18 +63,22 @@ Pass all seven and you're doing TDD on agentic coding. Skip any one and the scor
 | <span class="red">**red**</span> | Write a test that fails for the right reason. |
 | <span class="green">**green**</span> | Write the simplest code that makes it pass. |
 | <span class="blue">**refactor**</span> | Improve the code without breaking the test. |
+| `spike` | Explore freely. Spike commits don't score and don't penalize — they leave a trail of what you tried before the discipline kicked in. |
 
-## scoring
+## scoring (strict mode)
 
 ```
 +20  step verified — red fails, green passes, hidden tests pass
  +5  refactor commit, tests stay green
+  0  spike commit (exploration acknowledged, not graded)
   0  hidden tests catch a tautological green
  -5  red passes already (impl was earlier) or green still fails
  -5  refactor breaks tests
 -20  test count drops between red and green (deletion)
 ```
 
+Pragmatic mode halves the negatives and accepts combined red+green commits. Learning mode floors all negatives at 0 and adds an explanation per step.
+
 ## play
 
 1. [Register your agent →](/agents/register) — sign in with GitHub, get a push token
diff --git a/feedback.md b/feedback.md
new file mode 100644
index 0000000000000000000000000000000000000000..3a279d036377b1d1ff0b0f4b310b4a0326243e81
--- /dev/null
+++ b/feedback.md
@@ -0,0 +1,66 @@
+Goede vraag. Hier is mijn eerlijke en concrete feedback over wat er volgens mij verbeterd kan worden aan tdd.md:
+Wat al goed is
+
+    Strenge judge
+
+    Hidden tests
+
+    Git-based scoring
+
+    Publieke verdicts
+
+Wat ik zou verbeteren
+
+    Te puristisch / te streng
+    Op dit moment is het bijna “religieuze TDD”. Dit jaagt mensen weg.
+    Verbetering: Introduceer verschillende niveaus of modi:
+
+        Strict Mode (huidige versie)
+
+        Pragmatic Mode (zoals Kent Beck later bedoelde): toestaat spikes/exploratie, test-first is sterk aangemoedigd maar niet heilig.
+
+        Learning Mode: mildere straf voor beginners.
+
+    Alleen unit-level focus
+    Veel moderne software heeft ook integratie, UI, performance en architectuur issues.
+    Verbetering: Voeg kata’s toe op verschillende lagen (niet alleen string calculator niveau), inclusief:
+
+        API-kata’s
+
+        Database interactie
+
+        UI/component testing
+
+    Geen onderscheid tussen exploratie en implementatie
+    In echte projecten doe je vaak eerst een spike.
+    Verbetering: Laat toe dat een “spike” fase expliciet gemarkeerd wordt, en daarna pas de echte TDD-cyclus begint.
+
+    Scoring is te binair
+    Momenteel voelt het soms als een spelletje “volg de regels perfect”.
+    Verbetering: Voeg kwaliteitsmetingen toe, zoals:
+
+        Code simplicity / cyclomatic complexity
+
+        Hoe klein de stappen waren
+
+        Hoe goed de namen van tests en variabelen zijn
+
+        Of de code idiomatisch is voor de taal
+
+    Te weinig feedback voor verbetering
+    Je krijgt een score, maar niet altijd waarom je slecht scoort op een begrijpelijke manier.
+    Verbetering: Betere, menselijke uitleg + suggesties (“Je hebt 3 commits gedaan zonder failing test”, “Je hidden tests vielen door op edge case X”).
+
+    Te weinig variatie in kata’s
+    Beginnend met string-calc is prima, maar er moet snel meer komen (bijv. een kleine web API, een game loop, een parser, etc.).
+
+    Community & educatie
+    Voeg een “Why” sectie toe die uitlegt wanneer strikte TDD zinvol is en wanneer niet. Op dit moment straalt het te veel “dit is de enige juiste manier” uit.
+
+Mijn ideale versie van tdd.md
+
+Een platform dat niet alleen meet hoe goed je TDD volgt, maar ook hoe goed je als engineer nadenkt — met de flexibiliteit die ervaren developers (inclusief Kent Beck) in de praktijk toepassen.
+
+Kort samengevat:
+tdd.md is nu een strenge TDD-judge.
+Ik zou het liever zien als een slimme TDD-coach die discipline aanleert, maar ook volwassen, context-bewuste engineering aanmoedigt.
diff --git a/src/commits.test.ts b/src/commits.test.ts
index b64f42ce4fc0fb379dbfc8190addcfb0e44da466..a667164527e77d6c9ce01436806effa81fc0cf89 100644
--- a/src/commits.test.ts
+++ b/src/commits.test.ts
@@ -25,6 +25,16 @@ test("parseCommit returns untagged for unknown messages", () => {
   expect(parseCommit("wip — fixing something").phase).toBe("untagged");
 });
 
+test("parseCommit recognizes spike: prefix", () => {
+  expect(parseCommit("spike: try the regex approach").phase).toBe("spike");
+});
+
+test("parseCommit extracts step from spike(step):", () => {
+  const p = parseCommit("spike(custom-separator): explore Forge regex");
+  expect(p.phase).toBe("spike");
+  expect(p.step).toBe("custom-separator");
+});
+
 test("computeProgress verifies a step after red→green for the same step", () => {
   const commits = [
     { commit: { message: "green(empty): returns 0" } },
diff --git a/src/commits.ts b/src/commits.ts
index 1b3f8f657e7480b82721eea4c6d40213a4d76280..89e5c1950bf9976d0ee5806501333d0775f007e4 100644
--- a/src/commits.ts
+++ b/src/commits.ts
@@ -1,4 +1,4 @@
-export type Phase = "red" | "green" | "refactor" | "init" | "untagged";
+export type Phase = "red" | "green" | "refactor" | "spike" | "init" | "untagged";
 
 export interface ParsedCommit {
   phase: Phase;
@@ -6,7 +6,7 @@ export interface ParsedCommit {
   subject: string;
 }
 
-const PHASE_RE = /^(red|green|refactor)(?:\(([a-z][a-z0-9-]*)\))?:\s*(.*)$/i;
+const PHASE_RE = /^(red|green|refactor|spike)(?:\(([a-z][a-z0-9-]*)\))?:\s*(.*)$/i;
 
 export const parseCommit = (message: string): ParsedCommit => {
   const subject = message.split("\n")[0] ?? "";
@@ -29,6 +29,7 @@ export interface Progress {
   redCount: number;
   greenCount: number;
   refactorCount: number;
+  spikeCount: number;
   untaggedCount: number;
 }
 
@@ -41,6 +42,7 @@ export const computeProgress = (commits: { commit: { message: string } }[]): Pro
   let redCount = 0;
   let greenCount = 0;
   let refactorCount = 0;
+  let spikeCount = 0;
   let untaggedCount = 0;
   // Forgejo returns commits newest-first; walk oldest-first to get sequence.
   for (const c of [...commits].reverse()) {
@@ -53,9 +55,11 @@ export const computeProgress = (commits: { commit: { message: string } }[]): Pro
       if (p.step && pendingRed.has(p.step)) verifiedSteps.add(p.step);
     } else if (p.phase === "refactor") {
       refactorCount++;
+    } else if (p.phase === "spike") {
+      spikeCount++;
     } else if (p.phase === "untagged") {
       untaggedCount++;
     }
   }
-  return { verifiedSteps, redCount, greenCount, refactorCount, untaggedCount };
+  return { verifiedSteps, redCount, greenCount, refactorCount, spikeCount, untaggedCount };
 };
diff --git a/src/db.ts b/src/db.ts
index 581ff46141e471876888a725356f4875f96d5639..8636f8b95f39ad016ce0017a67393264de0fd255 100644
--- a/src/db.ts
+++ b/src/db.ts
@@ -22,6 +22,8 @@ const getDb = (): Database => {
   return db;
 };
 
+export type Mode = "strict" | "pragmatic" | "learning";
+
 export interface StepVerdict {
   stepId: string;
   redSha: string | null;
@@ -41,6 +43,9 @@ export interface StepVerdict {
     | "hidden-tests-failed"
     | "test-deleted";
   scoreDelta: number;
+  // Coach-style explanation of the verdict — what happened, why the score
+  // is what it is, and (when relevant) how to improve next time.
+  explanation: string;
 }
 
 export interface RefactorVerdict {
@@ -48,10 +53,12 @@ export interface RefactorVerdict {
   stepId: string | null;
   testsPassed: boolean;
   scoreDelta: number;
+  explanation: string;
 }
 
 export interface Verdict {
   headSha: string;
+  mode: Mode;
   steps: StepVerdict[];
   refactors: RefactorVerdict[];
   totalScore: number;
diff --git a/src/judge.ts b/src/judge.ts
index c245b7d9b370a23a84b24683863b37fd0539e498..6374a58467e18c283fa10592ea431ba362aa9c0b 100644
--- a/src/judge.ts
+++ b/src/judge.ts
@@ -2,9 +2,70 @@ import { mkdtempSync, rmSync } from "fs";
 import { join } from "path";
 import { tmpdir } from "os";
 import { parseCommit, type Phase } from "./commits";
-import { saveRun, type Verdict, type StepVerdict, type RefactorVerdict } from "./db";
+import { saveRun, type Verdict, type StepVerdict, type RefactorVerdict, type Mode } from "./db";
 import { loadGame, type Game } from "./games";
 
+// tdd.config.json from the agent's repo selects the scoring mode.
+// Falls back to strict when missing or unparseable.
+const readMode = async (cwd: string): Promise<Mode> => {
+  const file = Bun.file(join(cwd, "tdd.config.json"));
+  if (!(await file.exists())) return "strict";
+  try {
+    const cfg = (await file.json()) as { mode?: string };
+    if (cfg.mode === "pragmatic" || cfg.mode === "learning") return cfg.mode;
+    return "strict";
+  } catch {
+    return "strict";
+  }
+};
+
+// Penalty halving for pragmatic, zeroing for learning. Positive deltas
+// are unchanged across modes — earned credit is earned credit.
+const applyMode = (delta: number, mode: Mode): number => {
+  if (delta >= 0) return delta;
+  if (mode === "learning") return 0;
+  if (mode === "pragmatic") return Math.ceil(delta / 2);
+  return delta;
+};
+
+// Plain-language summary of a step verdict, written to the agent (not
+// the human admin). One short paragraph; named intentionally so callers
+// can see it next to the row in the score table.
+const explainStep = (params: {
+  status: StepVerdict["status"];
+  redSha: string | null;
+  greenSha: string | null;
+  hiddenPassed: boolean | null;
+  mode: Mode;
+}): string => {
+  const { status, hiddenPassed, mode } = params;
+  switch (status) {
+    case "verified":
+      return "Red failed as expected, green passes your tests, and the kata's hidden tests confirm the implementation matches the requirement.";
+    case "discipline-only":
+      return "Red→green discipline holds, but this kata didn't ship hidden tests for the step. Partial credit awarded; full +20 isn't possible without authoritative verification.";
+    case "no-green":
+      return "Red commit landed; the matching green(<step>) commit hasn't been pushed yet. Push your green to lock in the score.";
+    case "red-did-not-fail":
+      return mode === "pragmatic"
+        ? "Combined red+green commit detected. Pragmatic mode allows this — the cycle still counts, just with a softer score than a clean separation."
+        : "Red commit's tests already passed when the step was first introduced — meaning the implementation was added before the test, or the test is tautological. Switch to pragmatic mode if you commit red+green together intentionally.";
+    case "green-did-not-pass":
+      return "Green commit's own tests still fail. The implementation doesn't yet satisfy the test you wrote — fix the impl, or reconsider whether the test reflects the requirement.";
+    case "hidden-tests-failed":
+      return hiddenPassed === false
+        ? "Your tests pass, but the kata's hidden tests don't — this is the classic tautology trap. Tighten your test to mirror the requirement (e.g., assert the actual return value, not just that it runs)."
+        : "Your tests pass, but hidden verification was inconclusive. Re-push to retry.";
+    case "test-deleted":
+      return "Test count dropped between red and green for this step. Once a test exists it must keep existing — refactor it, don't delete it. If the test was wrong, replace it in a separate commit before resuming the cycle.";
+  }
+};
+
+const explainRefactor = (passed: boolean): string =>
+  passed
+    ? "Tests stayed green through the refactor — structural change without behavior change, the canonical refactor."
+    : "Refactor commit broke at least one test. Either revert the refactor or write a new red→green to capture the changed behavior.";
+
 const FORGEJO_INTERNAL = process.env.FORGEJO_URL ?? "https://git.tdd.md";
 const TEST_TIMEOUT_MS = 8000;
 
@@ -139,6 +200,10 @@ export const judge = async (owner: string, repo: string): Promise<Verdict> => {
       }
     }
 
+    // Read the agent's mode preference (defaults to strict). Mode
+    // affects penalties only — verified credits are mode-invariant.
+    const mode = await readMode(cwd);
+
     // Load the kata's authoritative spec — used to fetch hidden tests
     // per step. Repos that don't match a known kata get scored on red→green
     // discipline only (no hidden-test verification).
@@ -170,31 +235,31 @@ export const judge = async (owner: string, repo: string): Promise<Verdict> => {
       }
 
       let status: StepVerdict["status"];
-      let scoreDelta = 0;
+      let baseDelta = 0;
       if (greenSha === null) {
         status = "no-green";
       } else if (testsDeleted) {
-        // The kata spec calls this -∞. Stiff penalty: the entire step's
-        // potential gain (+20) is wiped and then some.
         status = "test-deleted";
-        scoreDelta = -20;
+        baseDelta = -20;
       } else if (!redFailed) {
         status = "red-did-not-fail";
-        scoreDelta = -5;
+        baseDelta = -5;
       } else if (greenPassed === false) {
         status = "green-did-not-pass";
-        scoreDelta = -5;
+        baseDelta = -5;
       } else if (hiddenPassed === false) {
         status = "hidden-tests-failed";
-        scoreDelta = 0;
+        baseDelta = 0;
       } else if (hiddenPassed === true) {
         status = "verified";
-        scoreDelta = 20;
+        baseDelta = 20;
       } else {
         status = "discipline-only";
-        scoreDelta = 5;
+        baseDelta = 5;
       }
-      steps.push({ stepId, redSha, greenSha, redFailed, greenPassed, hiddenPassed, status, scoreDelta });
+      const scoreDelta = applyMode(baseDelta, mode);
+      const explanation = explainStep({ status, redSha, greenSha, hiddenPassed, mode });
+      steps.push({ stepId, redSha, greenSha, redFailed, greenPassed, hiddenPassed, status, scoreDelta, explanation });
     }
 
     // Refactor commits aren't tied to red→green pairs: the spec rewards
@@ -206,18 +271,20 @@ export const judge = async (owner: string, repo: string): Promise<Verdict> => {
       if (c.phase !== "refactor") continue;
       await runProc(["git", "checkout", "--quiet", c.sha], cwd, 5000);
       const passed = await runTests(cwd);
+      const baseDelta = passed ? 5 : -5;
       refactors.push({
         sha: c.sha,
         stepId: c.step,
         testsPassed: passed,
-        scoreDelta: passed ? 5 : -5,
+        scoreDelta: applyMode(baseDelta, mode),
+        explanation: explainRefactor(passed),
       });
     }
 
     const totalScore =
       steps.reduce((a, s) => a + s.scoreDelta, 0) +
       refactors.reduce((a, r) => a + r.scoreDelta, 0);
-    const verdict: Verdict = { headSha, steps, refactors, totalScore, judgedAt: Date.now() };
+    const verdict: Verdict = { headSha, mode, steps, refactors, totalScore, judgedAt: Date.now() };
     saveRun(owner, repo, verdict);
     return verdict;
   } finally {
diff --git a/src/server.ts b/src/server.ts
index 13e7957a6c702c6234f2f64cd3e06808896d771b..c8e863cd5fec1f6b5bff57deff1edd541ba71910 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -432,9 +432,13 @@ const renderRepoView = async (owner: string, repo: string): Promise<Response> =>
       if (status === "no-green") return "muted";
       return "red";
     };
+    const modeLabel = (m: string): string => {
+      const cls = m === "strict" ? "red" : m === "pragmatic" ? "blue" : "green";
+      return `<span class="${cls}">${m}</span>`;
+    };
     const rows = verdict.steps.length === 0
       ? "_No red→green pairs found yet._"
-      : `| step | red | green | hidden | status | points |\n|---|---|---|---|---|---|\n` +
+      : `| step | red | green | hidden | status | points | explanation |\n|---|---|---|---|---|---|---|\n` +
         verdict.steps.map((s) => {
           const cls = statusClass(s.status);
           const sign = s.scoreDelta >= 0 ? "+" : "";
@@ -442,18 +446,21 @@ const renderRepoView = async (owner: string, repo: string): Promise<Response> =>
             s.hiddenPassed === true ? `<span class="green">pass</span>` :
             s.hiddenPassed === false ? `<span class="red">fail</span>` :
             `<span class="muted">—</span>`;
-          return `| \`${s.stepId}\` | \`${s.redSha?.slice(0, 7) ?? "—"}\` | \`${s.greenSha?.slice(0, 7) ?? "—"}\` | ${hiddenCell} | <span class="${cls}">${s.status}</span> | ${sign}${s.scoreDelta} |`;
+          const explanation = (s.explanation ?? "").replace(/\|/g, "\\|");
+          return `| \`${s.stepId}\` | \`${s.redSha?.slice(0, 7) ?? "—"}\` | \`${s.greenSha?.slice(0, 7) ?? "—"}\` | ${hiddenCell} | <span class="${cls}">${s.status}</span> | ${sign}${s.scoreDelta} | ${explanation} |`;
         }).join("\n");
     const refactorRows = (verdict.refactors ?? []).length === 0
       ? ""
-      : `\n\n### refactors\n\n| sha | step | tests | points |\n|---|---|---|---|\n` +
+      : `\n\n### refactors\n\n| sha | step | tests | points | explanation |\n|---|---|---|---|---|\n` +
         verdict.refactors.map((r) => {
           const sign = r.scoreDelta >= 0 ? "+" : "";
           const cls = r.testsPassed ? "green" : "red";
-          const verdict = r.testsPassed ? "green" : "broke tests";
-          return `| \`${r.sha.slice(0, 7)}\` | ${r.stepId ? `\`${r.stepId}\`` : "—"} | <span class="${cls}">${verdict}</span> | ${sign}${r.scoreDelta} |`;
+          const verb = r.testsPassed ? "green" : "broke tests";
+          const explanation = (r.explanation ?? "").replace(/\|/g, "\\|");
+          return `| \`${r.sha.slice(0, 7)}\` | ${r.stepId ? `\`${r.stepId}\`` : "—"} | <span class="${cls}">${verb}</span> | ${sign}${r.scoreDelta} | ${explanation} |`;
         }).join("\n");
-    scoreSection = `**total: ${sign}${verdict.totalScore}** · judged ${relativeTime(new Date(verdict.judgedAt).toISOString())}${stale}\n\n${rows}${refactorRows}`;
+    const modeLine = verdict.mode ? `**mode: ${modeLabel(verdict.mode)}** · ` : "";
+    scoreSection = `${modeLine}**total: ${sign}${verdict.totalScore}** · judged ${relativeTime(new Date(verdict.judgedAt).toISOString())}${stale}\n\n${rows}${refactorRows}`;
   }
 
   const body = `# ${owner} · playing ${kataLink}