labelapp updates v2

2026-04-05 00:55:53 -04:00 · 2026-04-05 00:55:53 -04:00 · 6f4d6c57a4
commit 6f4d6c57a4
parent 160adc42ab
34 changed files with 996 additions and 2250 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -41,7 +41,6 @@ All commands run from repo root via `bun run <script>`. No need to cd into subpa
 | `la:db:migrate` | Apply Drizzle migrations |
 | `la:db:studio` | Drizzle Studio (DB browser) |
 | `la:seed` | Seed paragraphs + annotations |
 | `la:sample` | Run paragraph sampling |
 | `la:assign` | Generate annotator assignments |
 | `la:export` | Export labels |
 | `la:docker` | Build + push Docker image |
--- a/docs/STATUS.md
+++ b/docs/STATUS.md
@ -79,10 +79,16 @@
 - **Estimated cost:** ~$96
 ### 7. Labelapp Update
- [ ] Update quiz questions for v2 codebook
+- [x] Update quiz questions for v2 codebook (v2 specificity rules, fixed impossible qv-3, all 4 levels as options)
- [ ] Update warmup paragraphs with v2 examples
+- [x] Update warmup paragraphs with v2 explanations
- [ ] Load new holdout paragraphs into labelapp DB
+- [x] Update onboarding content for v2 (Domain-Adapted, 1+ QV, domain terminology lists)
- [ ] Generate new BIBD assignments (3 of 6 annotators per paragraph)
+- [x] Update codebook reference page for v2
 - [x] DB migration to clear old 72k data (0002_v2-reset.sql)
 - [x] Seed script updated for 1,200 holdout paragraphs only
 - [x] Nuke admin account, joey is admin
 - [x] Quiz is one-time (at onboarding), warmup resets each login session
 - [ ] Run migration + seed (`la:db:migrate` then `la:seed`)
 - [ ] Generate new BIBD assignments (3 of 5 annotators per paragraph)
 ### 8. Parallel Labeling
 - [ ] Humans: annotators label v2 holdout (~600 per annotator, 2-3 days)
--- a/labelapp/.sampled-ids.json
+++ b/labelapp/.sampled-ids.json
--- a/labelapp/Dockerfile
+++ b/labelapp/Dockerfile
@ -47,9 +47,10 @@ COPY --from=builder /app/labelapp/db/ ./labelapp/db/
 COPY --from=builder /app/labelapp/scripts/ ./labelapp/scripts/
 COPY --from=builder /app/labelapp/lib/ ./labelapp/lib/
-# Seed data (paragraphs + stage1 annotations)
+# Seed data (paragraphs + stage1 annotations + v2 holdout IDs)
 COPY data/paragraphs/paragraphs-clean.jsonl /app/data/paragraphs-clean.jsonl
 COPY data/annotations/stage1.jsonl /app/data/stage1.jsonl
 COPY data/gold/v2-holdout-ids.json /app/data/v2-holdout-ids.json
 # Entrypoint
 COPY labelapp/entrypoint.sh /app/entrypoint.sh
--- a/labelapp/app/admin/page.tsx
+++ b/labelapp/app/admin/page.tsx
@ -44,9 +44,9 @@ const CATEGORIES = [
 const SPECIFICITIES = [
  { value: 1, label: "1 - Generic/Boilerplate" },
-  { value: 2, label: "2 - Somewhat Specific" },
+  { value: 2, label: "2 - Domain-Adapted" },
-  { value: 3, label: "3 - Specific" },
+  { value: 3, label: "3 - Firm-Specific" },
-  { value: 4, label: "4 - Highly Specific" },
+  { value: 4, label: "4 - Quantified-Verifiable" },
 ] as const;
 interface QueueLabel {
--- a/labelapp/app/api/adjudicate/test/adjudicate.test.ts
+++ b/labelapp/app/api/adjudicate/test/adjudicate.test.ts
@ -31,8 +31,8 @@ const { createSession } = await import("@/lib/auth");
 const { GET, POST } = await import("../route");
 const ADMIN_USER = {
-  id: "admin",
+  id: "joey",
-  displayName: "Admin",
+  displayName: "Joey",
  password: "adminpass",
 };
@ -288,7 +288,7 @@ describe("POST /api/adjudicate", () => {
    expect(adj.method).toBe("discussion");
    expect(adj.finalCategory).toBe("Third-Party Risk");
    expect(adj.finalSpecificity).toBe(2);
-    expect(adj.adjudicatorId).toBe("admin");
+    expect(adj.adjudicatorId).toBe("joey");
    expect(adj.notes).toBe("Resolved via discussion");
  });
--- a/labelapp/app/api/adjudicate/route.ts
+++ b/labelapp/app/api/adjudicate/route.ts
@ -75,7 +75,7 @@ export async function GET() {
    return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
  }
-  if (session.annotatorId !== "admin") {
+  if (session.annotatorId !== "joey") {
    return NextResponse.json({ error: "Admin access required" }, { status: 403 });
  }
@ -188,7 +188,7 @@ export async function POST(request: Request) {
    return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
  }
-  if (session.annotatorId !== "admin") {
+  if (session.annotatorId !== "joey") {
    return NextResponse.json({ error: "Admin access required" }, { status: 403 });
  }
--- a/labelapp/app/api/label/route.ts
+++ b/labelapp/app/api/label/route.ts
@ -22,7 +22,7 @@ const VALID_CATEGORIES = [
 const VALID_SPECIFICITY = [1, 2, 3, 4] as const;
-async function checkWarmupComplete(annotatorId: string): Promise<boolean> {
+async function checkWarmupComplete(annotatorId: string, sessionCreatedAt: number): Promise<boolean> {
  const [quizSession] = await db
    .select()
    .from(quizSessions)
@ -39,11 +39,11 @@ async function checkWarmupComplete(annotatorId: string): Promise<boolean> {
  try {
    const parsed = JSON.parse(quizSession.answers);
-    const warmupCompleted =
+    if (typeof parsed === "object" && !Array.isArray(parsed) && parsed.warmupBySession) {
-      typeof parsed === "object" && !Array.isArray(parsed)
+      const sessionKey = String(sessionCreatedAt);
-        ? parsed.warmupCompleted ?? 0
+      return (parsed.warmupBySession[sessionKey] ?? 0) >= WARMUP_PARAGRAPHS.length;
-        : 0;
+    }
-    return warmupCompleted >= WARMUP_PARAGRAPHS.length;
+    return false;
  } catch {
    return false;
  }
@ -55,7 +55,7 @@ export async function GET() {
    return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
  }
-  const warmupDone = await checkWarmupComplete(session.annotatorId);
+  const warmupDone = await checkWarmupComplete(session.annotatorId, session.createdAt);
  if (!warmupDone) {
    return NextResponse.json({ redirectToWarmup: true });
  }
--- a/labelapp/app/api/metrics/test/metrics.test.ts
+++ b/labelapp/app/api/metrics/test/metrics.test.ts
@ -31,8 +31,8 @@ const { createSession } = await import("@/lib/auth");
 const { GET } = await import("../route");
 const ADMIN_USER = {
-  id: "admin",
+  id: "joey",
-  displayName: "Admin",
+  displayName: "Joey",
  password: "adminpass",
 };
--- a/labelapp/app/api/metrics/route.ts
+++ b/labelapp/app/api/metrics/route.ts
@ -33,7 +33,7 @@ export async function GET() {
    return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
  }
-  if (session.annotatorId !== "admin") {
+  if (session.annotatorId !== "joey") {
    return NextResponse.json({ error: "Admin access required" }, { status: 403 });
  }
@ -100,7 +100,7 @@ export async function GET() {
  // Filter to non-admin annotators for per-annotator stats
  const perAnnotator = allAnnotators
-    .filter((a) => a.id !== "admin")
+    .filter((a) => a.id !== "joey")
    .map((a) => ({
      id: a.id,
      displayName: a.displayName,
@ -132,7 +132,7 @@ export async function GET() {
  // Collect all annotator IDs that have labels (excluding admin)
  const annotatorIds = [
    ...new Set(allLabels.map((l) => l.annotatorId)),
-  ].filter((id) => id !== "admin");
+  ].filter((id) => id !== "joey");
  annotatorIds.sort();
  // For each annotator pair, collect shared paragraph ratings
--- a/labelapp/app/api/quiz/route.ts
+++ b/labelapp/app/api/quiz/route.ts
@ -1,7 +1,7 @@
 import { NextResponse } from "next/server";
 import { db } from "@/db";
 import { quizSessions } from "@/db/schema";
-import { eq, and, gte } from "drizzle-orm";
+import { eq, and } from "drizzle-orm";
 import { getSession } from "@/lib/auth";
 import {
  drawQuizQuestions,
@ -9,7 +9,7 @@ import {
  type QuizQuestion,
 } from "@/lib/quiz-questions";
-const QUIZ_EXPIRY_MS = 2 * 60 * 60 * 1000; // 2 hours
+const QUIZ_SESSION_EXPIRY_MS = 2 * 60 * 60 * 1000; // 2 hours (for in-progress quiz attempts only)
 interface StoredAnswer {
  questionId: string;
@ -23,8 +23,7 @@ export async function GET() {
    return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
  }
-  const cutoff = new Date(Date.now() - QUIZ_EXPIRY_MS);
+  // Quiz only needs to be passed once — no time-based expiry
  const [passedQuiz] = await db
    .select()
    .from(quizSessions)
@ -32,7 +31,6 @@ export async function GET() {
      and(
        eq(quizSessions.annotatorId, session.annotatorId),
        eq(quizSessions.passed, true),
        gte(quizSessions.startedAt, cutoff),
      ),
    )
    .orderBy(quizSessions.startedAt)
@ -132,7 +130,7 @@ async function handleAnswer(
  }
  const elapsed = Date.now() - quizSession.startedAt.getTime();
-  if (elapsed > QUIZ_EXPIRY_MS) {
+  if (elapsed > QUIZ_SESSION_EXPIRY_MS) {
    return NextResponse.json({ error: "Quiz session expired" }, { status: 400 });
  }
--- a/labelapp/app/api/warmup/route.ts
+++ b/labelapp/app/api/warmup/route.ts
@ -5,20 +5,32 @@ import { eq, and, desc } from "drizzle-orm";
 import { getSession } from "@/lib/auth";
 import { WARMUP_PARAGRAPHS } from "@/lib/warmup-paragraphs";
-interface WarmupProgress {
+/**
-  warmupCompleted: number;
+ * Warmup progress is keyed by the auth session's createdAt timestamp.
 * This means warmup resets every time the user logs in (new session),
 * while the quiz pass is permanent.
 */
 interface WarmupProgressMap {
  [sessionKey: string]: number;
 }
-function parseWarmupProgress(answersJson: string): WarmupProgress {
+function getWarmupCompleted(
  answersJson: string,
  sessionKey: string,
 ): number {
  try {
    const parsed = JSON.parse(answersJson);
-    if (parsed && typeof parsed === "object" && "warmupCompleted" in parsed) {
+    if (parsed && typeof parsed === "object" && "warmupBySession" in parsed) {
-      return { warmupCompleted: parsed.warmupCompleted ?? 0 };
+      return (parsed.warmupBySession as WarmupProgressMap)[sessionKey] ?? 0;
    }
-    // Legacy format: answers is an array, no warmup tracking yet
+    // Legacy format: single warmupCompleted
-    return { warmupCompleted: 0 };
+    if (parsed && typeof parsed === "object" && "warmupCompleted" in parsed) {
      return 0; // Reset legacy progress — they need to redo warmup
    }
    return 0;
  } catch {
-    return { warmupCompleted: 0 };
+    return 0;
  }
 }
@ -51,20 +63,21 @@ export async function GET() {
    );
  }
-  const progress = parseWarmupProgress(quizSession.answers);
+  const sessionKey = String(session.createdAt);
  const warmupCompleted = getWarmupCompleted(quizSession.answers, sessionKey);
-  if (progress.warmupCompleted >= WARMUP_PARAGRAPHS.length) {
+  if (warmupCompleted >= WARMUP_PARAGRAPHS.length) {
-    return NextResponse.json({ done: true, warmupCompleted: progress.warmupCompleted });
+    return NextResponse.json({ done: true, warmupCompleted });
  }
-  const next = WARMUP_PARAGRAPHS[progress.warmupCompleted];
+  const next = WARMUP_PARAGRAPHS[warmupCompleted];
  return NextResponse.json({
    done: false,
-    warmupCompleted: progress.warmupCompleted,
+    warmupCompleted,
    paragraph: {
      id: next.id,
      text: next.text,
-      index: progress.warmupCompleted,
+      index: warmupCompleted,
      total: WARMUP_PARAGRAPHS.length,
    },
  });
@ -98,9 +111,10 @@ export async function POST(request: Request) {
    );
  }
-  const progress = parseWarmupProgress(quizSession.answers);
+  const sessionKey = String(session.createdAt);
  const warmupCompleted = getWarmupCompleted(quizSession.answers, sessionKey);
-  if (warmupIndex !== progress.warmupCompleted) {
+  if (warmupIndex !== warmupCompleted) {
    return NextResponse.json(
      { error: "Warmup index mismatch" },
      { status: 400 },
@ -119,8 +133,7 @@ export async function POST(request: Request) {
  const categoryCorrect = category === gold.goldCategory;
  const specificityCorrect = specificity === gold.goldSpecificity;
-  // Store warmup progress in the quiz session answers field
+  // Store warmup progress keyed by auth session
  // Preserve existing quiz answers array if present, add warmup tracking
  let existingData: Record<string, unknown>;
  try {
    const parsed = JSON.parse(quizSession.answers);
@ -133,12 +146,15 @@ export async function POST(request: Request) {
    existingData = {};
  }
  const warmupBySession = (existingData.warmupBySession as WarmupProgressMap) ?? {};
  warmupBySession[sessionKey] = newCompleted;
  await db
    .update(quizSessions)
    .set({
      answers: JSON.stringify({
        ...existingData,
-        warmupCompleted: newCompleted,
+        warmupBySession,
      }),
    })
    .where(eq(quizSessions.id, quizSession.id));
--- a/labelapp/app/codebook/page.tsx
+++ b/labelapp/app/codebook/page.tsx
@ -807,7 +807,7 @@ export default function CodebookPage() {
              <li>
                <strong>Count hard verifiable facts ONLY</strong> (specific
                dates, dollar amounts, headcounts/percentages, named third-party
-                firms, named products/tools, named certifications). TWO or more?
+                firms, named products/tools, named certifications). At least ONE?
                &rarr; <strong>Quantified-Verifiable (4)</strong>
              </li>
              <li>
@ -815,9 +815,10 @@ export default function CodebookPage() {
                below?</strong> &rarr; <strong>Firm-Specific (3)</strong>
              </li>
              <li>
-                <strong>Does it name a recognized standard</strong> (NIST, ISO
+                <strong>Does it use any cybersecurity domain terminology?</strong>{" "}
-                27001, SOC 2, CIS, GDPR, PCI DSS, HIPAA)? &rarr;{" "}
+                (penetration testing, vulnerability scanning, SIEM, SOC, EDR,
-                <strong>Sector-Adapted (2)</strong>
+                NIST CSF, ISO 27001, zero trust, etc.) &rarr;{" "}
                <strong>Domain-Adapted (2)</strong>
              </li>
              <li>
                <strong>None of the above?</strong> &rarr;{" "}
@ -853,12 +854,11 @@ export default function CodebookPage() {
                </TableRow>
                <TableRow>
                  <TableCell>2</TableCell>
-                  <TableCell>Sector-Adapted</TableCell>
+                  <TableCell>Domain-Adapted</TableCell>
                  <TableCell>
-                    Names a specific recognized standard (NIST, ISO 27001, SOC 2,
+                    Uses cybersecurity domain terminology (penetration testing,
-                    etc.) but contains nothing unique to THIS company. General
+                    vulnerability scanning, SIEM, SOC, EDR, NIST CSF, ISO 27001,
-                    practices (pen testing, vulnerability scanning, tabletop
+                    zero trust, etc.) but contains nothing unique to THIS company.
                    exercises) do NOT qualify — only named standards.
                  </TableCell>
                </TableRow>
                <TableRow>
@ -873,8 +873,8 @@ export default function CodebookPage() {
                  <TableCell>4</TableCell>
                  <TableCell>Quantified-Verifiable</TableCell>
                  <TableCell>
-                    Contains TWO or more hard verifiable facts (see QV-eligible
+                    Contains at least one hard verifiable fact that an external
-                    list). One fact = Firm-Specific, not QV.
+                    party could independently verify (see QV-eligible list).
                  </TableCell>
                </TableRow>
              </TableBody>
@ -887,16 +887,12 @@ export default function CodebookPage() {
              IS a Specific Fact (any ONE &rarr; at least Firm-Specific)
            </SectionHeading>
            <ul className="list-disc list-inside space-y-1 text-sm">
-              <ISItem><strong>Cybersecurity-specific titles:</strong> CISO, CTO, CIO, VP of IT/Security, Information Security Officer, Director of IT Security, HSE Director overseeing cybersecurity, Chief Digital Officer (when overseeing cyber), Cybersecurity Director</ISItem>
+              <ISItem><strong>Cybersecurity-specific titles:</strong> CISO, CTO, CIO, VP of IT/Security, Information Security Officer, Director of IT Security, Cybersecurity Director, Chief Digital Officer (when overseeing cyber)</ISItem>
-              <ISItem><strong>Named non-generic committees:</strong> Technology Committee, Cybersecurity Committee, Risk Committee, ERM Committee (NOT &ldquo;Audit Committee&rdquo; — that exists at every public company)</ISItem>
+              <ISItem><strong>Named non-generic committees:</strong> Technology Committee, Cybersecurity Committee, Cybersecurity Steering Committee, Risk Committee (NOT &ldquo;Audit Committee&rdquo; — that exists at every public company)</ISItem>
              <ISItem><strong>Specific team/department compositions:</strong> &ldquo;Legal, Compliance, and Finance&rdquo; (but NOT just &ldquo;a cross-functional team&rdquo;)</ISItem>
              <ISItem><strong>Specific dates:</strong> &ldquo;In December 2023&rdquo;, &ldquo;On May 6, 2024&rdquo;, &ldquo;fiscal 2025&rdquo;</ISItem>
              <ISItem><strong>Named internal programs with unique identifiers:</strong> &ldquo;Cyber Incident Response Plan (CIRP)&rdquo; (must have a distinguishing name/abbreviation — generic &ldquo;incident response plan&rdquo; does not qualify)</ISItem>
-              <ISItem><strong>Named products, systems, tools:</strong> Splunk, CrowdStrike Falcon, Azure Sentinel, ServiceNow</ISItem>
+              <ISItem><strong>Named individuals</strong> in a cybersecurity role context</ISItem>
-              <ISItem><strong>Named third-party firms:</strong> Mandiant, Deloitte, CrowdStrike, PwC</ISItem>
+              <ISItem><strong>Specific organizational claims:</strong> &ldquo;24/7 security operations&rdquo; (implies specific organizational investment beyond generic monitoring)</ISItem>
              <ISItem><strong>Specific numbers:</strong> headcounts, dollar amounts, percentages, exact durations (&ldquo;17 years&rdquo;, &ldquo;12 professionals&rdquo;)</ISItem>
              <ISItem><strong>Certification claims:</strong> &ldquo;We maintain ISO 27001 certification&rdquo; (holding a certification is more than naming a standard)</ISItem>
              <ISItem><strong>Named universities in credential context:</strong> &ldquo;Ph.D. from Princeton University&rdquo; (independently verifiable)</ISItem>
            </ul>
          </div>
@ -914,7 +910,7 @@ export default function CodebookPage() {
              <NOTItem><strong>Boilerplate phrases:</strong> &ldquo;cybersecurity risks&rdquo;, &ldquo;material adverse effect&rdquo;, &ldquo;business operations&rdquo;, &ldquo;financial condition&rdquo;</NOTItem>
              <NOTItem><strong>Standard incident language:</strong> &ldquo;forensic investigation&rdquo;, &ldquo;law enforcement&rdquo;, &ldquo;regulatory obligations&rdquo;, &ldquo;incident response protocols&rdquo;</NOTItem>
              <NOTItem><strong>Vague quantifiers:</strong> &ldquo;certain systems&rdquo;, &ldquo;some employees&rdquo;, &ldquo;a number of&rdquo;, &ldquo;a portion of&rdquo;</NOTItem>
-              <NOTItem><strong>Common practices:</strong> &ldquo;penetration testing&rdquo;, &ldquo;vulnerability scanning&rdquo;, &ldquo;tabletop exercises&rdquo;, &ldquo;phishing simulations&rdquo;, &ldquo;security awareness training&rdquo;</NOTItem>
+              <NOTItem><strong>Generic ERM terms:</strong> &ldquo;risk assessment&rdquo;, &ldquo;incident response plan&rdquo;, &ldquo;business continuity&rdquo;, &ldquo;tabletop exercises&rdquo; (without cybersecurity qualifier), &ldquo;internal controls&rdquo;, &ldquo;compliance&rdquo;</NOTItem>
              <NOTItem><strong>Generic program names:</strong> &ldquo;incident response plan&rdquo;, &ldquo;business continuity plan&rdquo;, &ldquo;cybersecurity program&rdquo;, &ldquo;Third-Party Risk Management Program&rdquo;, &ldquo;Company-wide training&rdquo; — no unique identifier or distinguishing abbreviation</NOTItem>
              <NOTItem><strong>Company self-references:</strong> the company&rsquo;s own name, &ldquo;the Company&rdquo;, &ldquo;the Bank&rdquo;, subsidiary names, filing form types</NOTItem>
              <NOTItem><strong>Company milestones:</strong> &ldquo;since our IPO&rdquo;, &ldquo;since inception&rdquo; — not cybersecurity facts</NOTItem>
@ -924,7 +920,7 @@ export default function CodebookPage() {
          {/* QV-Eligible Facts */}
          <div className="space-y-3">
            <SectionHeading id="qv-eligible" level={3}>
-              QV-Eligible Facts (count toward the 2-fact threshold for Quantified-Verifiable)
+              QV-Eligible Facts (any one triggers Quantified-Verifiable)
            </SectionHeading>
            <ul className="list-disc list-inside space-y-1 text-sm text-green-700 dark:text-green-400">
              <li>Specific dates (month+year or exact date)</li>
@ -940,12 +936,12 @@ export default function CodebookPage() {
          {/* Do NOT count toward QV */}
          <div className="space-y-3">
            <SectionHeading id="not-qv" level={3}>
-              Do NOT Count Toward QV (these trigger Firm-Specific but not QV)
+              NOT QV-Eligible (these trigger Firm-Specific or Domain-Adapted, not QV)
            </SectionHeading>
            <ul className="list-disc list-inside space-y-1 text-sm text-red-700 dark:text-red-400">
              <li>Named roles (CISO, CIO)</li>
              <li>Named committees</li>
-              <li>Named frameworks (NIST, ISO 27001) — these trigger Sector-Adapted</li>
+              <li>Named frameworks followed (NIST, ISO 27001) — these trigger Domain-Adapted</li>
              <li>Team compositions, reporting structures</li>
              <li>Named internal programs</li>
              <li>Generic degrees without named university (&ldquo;BS in Management&rdquo;)</li>
@ -960,7 +956,7 @@ export default function CodebookPage() {
            <p>
              Before finalizing specificity, review the extracted facts. Remove
              any that appear on the NOT list. If no facts remain after filtering
-              &rarr; Generic Boilerplate (or Sector-Adapted if a named standard
+              &rarr; Generic Boilerplate (or Domain-Adapted if domain terminology
              is present). Do not let NOT-list items inflate the specificity
              rating.
            </p>
@ -1050,7 +1046,8 @@ export default function CodebookPage() {
              Board (Audit Committee oversees) + Management (CISO qualifications,
              reporting). The opening clause sets the frame: this is about the
              Audit Committee&rsquo;s oversight, and the CISO detail is
-              subordinate. &rarr; <strong>Board Governance, Specificity 3.</strong>
+              subordinate. &rarr; <strong>Board Governance, Specificity 4</strong>{" "}
              (CISSP is a QV-eligible certification — verifiable via ISC2).
            </p>
          </div>
--- a/labelapp/app/dashboard/page.tsx
+++ b/labelapp/app/dashboard/page.tsx
@ -2,8 +2,8 @@ import { redirect } from "next/navigation";
 import Link from "next/link";
 import { getSession } from "@/lib/auth";
 import { db } from "@/db";
-import { annotators } from "@/db/schema";
+import { annotators, quizSessions } from "@/db/schema";
-import { eq } from "drizzle-orm";
+import { eq, and } from "drizzle-orm";
 import {
  Card,
  CardHeader,
@ -31,6 +31,34 @@ export default async function DashboardPage() {
  const isOnboarded = !!annotator.onboardedAt;
  // Check if user has ever passed the quiz (one-time requirement)
  const [passedQuiz] = await db
    .select({ id: quizSessions.id })
    .from(quizSessions)
    .where(
      and(
        eq(quizSessions.annotatorId, session.annotatorId),
        eq(quizSessions.passed, true),
      ),
    )
    .limit(1);
  const hasPassedQuiz = !!passedQuiz;
  // Determine the primary action link
  let primaryHref: string;
  let primaryLabel: string;
  if (!isOnboarded) {
    primaryHref = "/onboarding";
    primaryLabel = "Complete Training";
  } else if (!hasPassedQuiz) {
    primaryHref = "/quiz";
    primaryLabel = "Take Quiz";
  } else {
    primaryHref = "/label";
    primaryLabel = "Start Labeling Session";
  }
  return (
    <div className="flex flex-1 items-center justify-center p-4">
      <Card className="w-full max-w-sm">
@ -43,21 +71,15 @@ export default async function DashboardPage() {
          </CardDescription>
        </CardHeader>
        <CardContent className="flex flex-col gap-4">
-          {isOnboarded ? (
+          <Link href={primaryHref} className="block">
-            <Link href="/quiz" className="block">
+            <Button className="w-full">{primaryLabel}</Button>
-              <Button className="w-full">Start Labeling Session</Button>
+          </Link>
            </Link>
          ) : (
            <Link href="/onboarding" className="block">
              <Button className="w-full">Complete Training</Button>
            </Link>
          )}
          <Link href="/codebook" className="block">
            <Button variant="outline" className="w-full">
              Codebook Reference
            </Button>
          </Link>
-          {session.annotatorId === "admin" && (
+          {session.annotatorId === "joey" && (
            <Link href="/admin" className="block">
              <Button variant="outline" className="w-full">Admin Panel</Button>
            </Link>
--- a/labelapp/app/label/page.tsx
+++ b/labelapp/app/label/page.tsx
@ -47,7 +47,7 @@ const CATEGORIES = [
 const SPECIFICITY_LABELS = [
  "Generic Boilerplate",
-  "Sector-Adapted",
+  "Domain-Adapted",
  "Firm-Specific",
  "Quantified-Verifiable",
 ] as const;
@ -475,18 +475,18 @@ function CodebookSidebar() {
                />
                <SpecDef
                  level={2}
-                  name="Sector-Adapted"
+                  name="Domain-Adapted"
-                  desc="Names a recognized standard or sector practice (e.g., NIST, SOC 2, PCI DSS) but nothing firm-specific."
+                  desc="Uses cybersecurity domain terminology (e.g., penetration testing, NIST CSF, SIEM, SOC) but nothing unique to THIS company."
                />
                <SpecDef
                  level={3}
                  name="Firm-Specific"
-                  desc="Contains at least one fact from the IS list unique to this company: cybersecurity-specific titles (CISO, CTO), named tools/vendors, specific dates, named committees."
+                  desc="Contains at least one fact unique to THIS company: cybersecurity-specific titles (CISO, CTO, CIO), named non-generic committees, named individuals, 24/7 security operations."
                />
                <SpecDef
                  level={4}
                  name="Quantified-Verifiable"
-                  desc="Contains 2+ hard verifiable facts: specific dates, dollar amounts, percentages, headcounts, named third parties with specifics."
+                  desc="Contains 1+ QV-eligible facts: specific numbers, dates, named external entities, named tools/products, verifiable certifications."
                />
              </div>
            </section>
--- a/labelapp/app/label/warmup/page.tsx
+++ b/labelapp/app/label/warmup/page.tsx
@ -27,7 +27,7 @@ const CATEGORIES = [
 const SPECIFICITY_LABELS = [
  "Generic Boilerplate",
-  "Sector-Adapted",
+  "Domain-Adapted",
  "Firm-Specific",
  "Quantified-Verifiable",
 ] as const;
--- a/labelapp/app/quiz/page.tsx
+++ b/labelapp/app/quiz/page.tsx
@ -54,7 +54,7 @@ type QuizPhase = "loading" | "ready" | "active" | "feedback" | "results";
 const TYPE_LABELS: Record<string, string> = {
  "person-vs-function": "Person vs. Function",
  "materiality-disclaimer": "Materiality Disclaimer",
-  "qv-counting": "QV Fact Counting",
+  "specificity": "Specificity Level",
  "spac-exception": "SPAC Exception",
 };
--- a/labelapp/drizzle/0002_v2-reset.sql
+++ b/labelapp/drizzle/0002_v2-reset.sql
@ -0,0 +1,8 @@
 -- v2 reset: clear all data for fresh v2 holdout seed
 -- Order matters: respect foreign key constraints
 TRUNCATE TABLE adjudications CASCADE;
 TRUNCATE TABLE human_labels CASCADE;
 TRUNCATE TABLE quiz_sessions CASCADE;
 TRUNCATE TABLE assignments CASCADE;
 TRUNCATE TABLE paragraphs CASCADE;
 TRUNCATE TABLE annotators CASCADE;
--- a/labelapp/drizzle/meta/0002_snapshot.json
+++ b/labelapp/drizzle/meta/0002_snapshot.json
@ -0,0 +1,510 @@
 {
  "id": "392c9bd4-1b68-4e32-86b0-be7abc632b44",
  "prevId": "00000000-0000-0000-0000-000000000000",
  "version": "7",
  "dialect": "postgresql",
  "tables": {
    "public.adjudications": {
      "name": "adjudications",
      "schema": "",
      "columns": {
        "paragraph_id": {
          "name": "paragraph_id",
          "type": "text",
          "primaryKey": true,
          "notNull": true
        },
        "final_category": {
          "name": "final_category",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "final_specificity": {
          "name": "final_specificity",
          "type": "integer",
          "primaryKey": false,
          "notNull": true
        },
        "method": {
          "name": "method",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "adjudicator_id": {
          "name": "adjudicator_id",
          "type": "text",
          "primaryKey": false,
          "notNull": false
        },
        "notes": {
          "name": "notes",
          "type": "text",
          "primaryKey": false,
          "notNull": false
        },
        "resolved_at": {
          "name": "resolved_at",
          "type": "timestamp",
          "primaryKey": false,
          "notNull": true,
          "default": "now()"
        }
      },
      "indexes": {},
      "foreignKeys": {
        "adjudications_paragraph_id_paragraphs_id_fk": {
          "name": "adjudications_paragraph_id_paragraphs_id_fk",
          "tableFrom": "adjudications",
          "tableTo": "paragraphs",
          "columnsFrom": [
            "paragraph_id"
          ],
          "columnsTo": [
            "id"
          ],
          "onDelete": "no action",
          "onUpdate": "no action"
        }
      },
      "compositePrimaryKeys": {},
      "uniqueConstraints": {},
      "policies": {},
      "checkConstraints": {},
      "isRLSEnabled": false
    },
    "public.annotators": {
      "name": "annotators",
      "schema": "",
      "columns": {
        "id": {
          "name": "id",
          "type": "text",
          "primaryKey": true,
          "notNull": true
        },
        "display_name": {
          "name": "display_name",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "password": {
          "name": "password",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "onboarded_at": {
          "name": "onboarded_at",
          "type": "timestamp",
          "primaryKey": false,
          "notNull": false
        }
      },
      "indexes": {},
      "foreignKeys": {},
      "compositePrimaryKeys": {},
      "uniqueConstraints": {},
      "policies": {},
      "checkConstraints": {},
      "isRLSEnabled": false
    },
    "public.assignments": {
      "name": "assignments",
      "schema": "",
      "columns": {
        "paragraph_id": {
          "name": "paragraph_id",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "annotator_id": {
          "name": "annotator_id",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "assigned_at": {
          "name": "assigned_at",
          "type": "timestamp",
          "primaryKey": false,
          "notNull": true,
          "default": "now()"
        },
        "is_warmup": {
          "name": "is_warmup",
          "type": "boolean",
          "primaryKey": false,
          "notNull": true,
          "default": false
        }
      },
      "indexes": {},
      "foreignKeys": {
        "assignments_paragraph_id_paragraphs_id_fk": {
          "name": "assignments_paragraph_id_paragraphs_id_fk",
          "tableFrom": "assignments",
          "tableTo": "paragraphs",
          "columnsFrom": [
            "paragraph_id"
          ],
          "columnsTo": [
            "id"
          ],
          "onDelete": "no action",
          "onUpdate": "no action"
        },
        "assignments_annotator_id_annotators_id_fk": {
          "name": "assignments_annotator_id_annotators_id_fk",
          "tableFrom": "assignments",
          "tableTo": "annotators",
          "columnsFrom": [
            "annotator_id"
          ],
          "columnsTo": [
            "id"
          ],
          "onDelete": "no action",
          "onUpdate": "no action"
        }
      },
      "compositePrimaryKeys": {},
      "uniqueConstraints": {
        "assignments_paragraph_id_annotator_id_unique": {
          "name": "assignments_paragraph_id_annotator_id_unique",
          "nullsNotDistinct": false,
          "columns": [
            "paragraph_id",
            "annotator_id"
          ]
        }
      },
      "policies": {},
      "checkConstraints": {},
      "isRLSEnabled": false
    },
    "public.human_labels": {
      "name": "human_labels",
      "schema": "",
      "columns": {
        "id": {
          "name": "id",
          "type": "integer",
          "primaryKey": true,
          "notNull": true,
          "identity": {
            "type": "always",
            "name": "human_labels_id_seq",
            "schema": "public",
            "increment": "1",
            "startWith": "1",
            "minValue": "1",
            "maxValue": "2147483647",
            "cache": "1",
            "cycle": false
          }
        },
        "paragraph_id": {
          "name": "paragraph_id",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "annotator_id": {
          "name": "annotator_id",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "content_category": {
          "name": "content_category",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "specificity_level": {
          "name": "specificity_level",
          "type": "integer",
          "primaryKey": false,
          "notNull": true
        },
        "notes": {
          "name": "notes",
          "type": "text",
          "primaryKey": false,
          "notNull": false
        },
        "labeled_at": {
          "name": "labeled_at",
          "type": "timestamp",
          "primaryKey": false,
          "notNull": true,
          "default": "now()"
        },
        "session_id": {
          "name": "session_id",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "duration_ms": {
          "name": "duration_ms",
          "type": "integer",
          "primaryKey": false,
          "notNull": false
        },
        "active_ms": {
          "name": "active_ms",
          "type": "integer",
          "primaryKey": false,
          "notNull": false
        }
      },
      "indexes": {},
      "foreignKeys": {
        "human_labels_paragraph_id_paragraphs_id_fk": {
          "name": "human_labels_paragraph_id_paragraphs_id_fk",
          "tableFrom": "human_labels",
          "tableTo": "paragraphs",
          "columnsFrom": [
            "paragraph_id"
          ],
          "columnsTo": [
            "id"
          ],
          "onDelete": "no action",
          "onUpdate": "no action"
        },
        "human_labels_annotator_id_annotators_id_fk": {
          "name": "human_labels_annotator_id_annotators_id_fk",
          "tableFrom": "human_labels",
          "tableTo": "annotators",
          "columnsFrom": [
            "annotator_id"
          ],
          "columnsTo": [
            "id"
          ],
          "onDelete": "no action",
          "onUpdate": "no action"
        }
      },
      "compositePrimaryKeys": {},
      "uniqueConstraints": {
        "human_labels_paragraph_id_annotator_id_unique": {
          "name": "human_labels_paragraph_id_annotator_id_unique",
          "nullsNotDistinct": false,
          "columns": [
            "paragraph_id",
            "annotator_id"
          ]
        }
      },
      "policies": {},
      "checkConstraints": {},
      "isRLSEnabled": false
    },
    "public.paragraphs": {
      "name": "paragraphs",
      "schema": "",
      "columns": {
        "id": {
          "name": "id",
          "type": "text",
          "primaryKey": true,
          "notNull": true
        },
        "text": {
          "name": "text",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "word_count": {
          "name": "word_count",
          "type": "integer",
          "primaryKey": false,
          "notNull": true
        },
        "paragraph_index": {
          "name": "paragraph_index",
          "type": "integer",
          "primaryKey": false,
          "notNull": true
        },
        "company_name": {
          "name": "company_name",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "cik": {
          "name": "cik",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "ticker": {
          "name": "ticker",
          "type": "text",
          "primaryKey": false,
          "notNull": false
        },
        "filing_type": {
          "name": "filing_type",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "filing_date": {
          "name": "filing_date",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "fiscal_year": {
          "name": "fiscal_year",
          "type": "integer",
          "primaryKey": false,
          "notNull": true
        },
        "accession_number": {
          "name": "accession_number",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "sec_item": {
          "name": "sec_item",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "stage1_category": {
          "name": "stage1_category",
          "type": "text",
          "primaryKey": false,
          "notNull": false
        },
        "stage1_specificity": {
          "name": "stage1_specificity",
          "type": "integer",
          "primaryKey": false,
          "notNull": false
        },
        "stage1_method": {
          "name": "stage1_method",
          "type": "text",
          "primaryKey": false,
          "notNull": false
        },
        "stage1_confidence": {
          "name": "stage1_confidence",
          "type": "real",
          "primaryKey": false,
          "notNull": false
        }
      },
      "indexes": {},
      "foreignKeys": {},
      "compositePrimaryKeys": {},
      "uniqueConstraints": {},
      "policies": {},
      "checkConstraints": {},
      "isRLSEnabled": false
    },
    "public.quiz_sessions": {
      "name": "quiz_sessions",
      "schema": "",
      "columns": {
        "id": {
          "name": "id",
          "type": "text",
          "primaryKey": true,
          "notNull": true
        },
        "annotator_id": {
          "name": "annotator_id",
          "type": "text",
          "primaryKey": false,
          "notNull": true
        },
        "started_at": {
          "name": "started_at",
          "type": "timestamp",
          "primaryKey": false,
          "notNull": true,
          "default": "now()"
        },
        "completed_at": {
          "name": "completed_at",
          "type": "timestamp",
          "primaryKey": false,
          "notNull": false
        },
        "passed": {
          "name": "passed",
          "type": "boolean",
          "primaryKey": false,
          "notNull": true,
          "default": false
        },
        "score": {
          "name": "score",
          "type": "integer",
          "primaryKey": false,
          "notNull": true,
          "default": 0
        },
        "total_questions": {
          "name": "total_questions",
          "type": "integer",
          "primaryKey": false,
          "notNull": true
        },
        "answers": {
          "name": "answers",
          "type": "text",
          "primaryKey": false,
          "notNull": true,
          "default": "'[]'"
        }
      },
      "indexes": {},
      "foreignKeys": {
        "quiz_sessions_annotator_id_annotators_id_fk": {
          "name": "quiz_sessions_annotator_id_annotators_id_fk",
          "tableFrom": "quiz_sessions",
          "tableTo": "annotators",
          "columnsFrom": [
            "annotator_id"
          ],
          "columnsTo": [
            "id"
          ],
          "onDelete": "no action",
          "onUpdate": "no action"
        }
      },
      "compositePrimaryKeys": {},
      "uniqueConstraints": {},
      "policies": {},
      "checkConstraints": {},
      "isRLSEnabled": false
    }
  },
  "enums": {},
  "schemas": {},
  "sequences": {},
  "roles": {},
  "policies": {},
  "views": {},
  "_meta": {
    "columns": {},
    "schemas": {},
    "tables": {}
  }
 }
--- a/labelapp/drizzle/meta/_journal.json
+++ b/labelapp/drizzle/meta/_journal.json
@ -15,6 +15,13 @@
      "when": 1774822800000,
      "tag": "0001_add-active-ms",
      "breakpoints": true
    },
    {
      "idx": 2,
      "version": "7",
      "when": 1775437200000,
      "tag": "0002_v2-reset",
      "breakpoints": true
    }
  ]
 }
--- a/labelapp/entrypoint.sh
+++ b/labelapp/entrypoint.sh
@ -21,15 +21,12 @@ await sql.end();
 if [ "$ROW_COUNT" = "0" ]; then
  export SEED_PARAGRAPHS_PATH=/app/data/paragraphs-clean.jsonl
  export SEED_ANNOTATIONS_PATH=/app/data/stage1.jsonl
-  export SAMPLED_IDS_PATH=/app/labelapp/.sampled-ids.json
+  export SEED_HOLDOUT_IDS_PATH=/app/data/v2-holdout-ids.json
  echo "==> Database is empty, seeding..."
  bun run scripts/seed.ts
-  echo "==> Running sampling..."
+  echo "==> Generating assignments..."
  bun run scripts/sample.ts
  echo "==> Running assignment generation..."
  bun run scripts/assign.ts
  echo "==> Seeding complete."
--- a/labelapp/lib/auth.ts
+++ b/labelapp/lib/auth.ts
@ -57,7 +57,7 @@ export async function createSession(annotatorId: string): Promise<void> {
  });
 }
-export async function getSession(): Promise<{ annotatorId: string } | null> {
+export async function getSession(): Promise<{ annotatorId: string; createdAt: number } | null> {
  const cookieStore = await cookies();
  const raw = cookieStore.get(SESSION_COOKIE)?.value;
  if (!raw) return null;
@ -65,7 +65,7 @@ export async function getSession(): Promise<{ annotatorId: string } | null> {
  const payload = verifyAndDecode(raw);
  if (!payload) return null;
-  return { annotatorId: payload.annotatorId };
+  return { annotatorId: payload.annotatorId, createdAt: payload.createdAt };
 }
 export async function destroySession(): Promise<void> {
--- a/labelapp/lib/onboarding-content.ts
+++ b/labelapp/lib/onboarding-content.ts
@ -16,551 +16,262 @@ export interface OnboardingStep {
 }
 export const ONBOARDING_STEPS: OnboardingStep[] = [
-  // ── Step 1: What You'll Be Doing ──────────────────────────────────────
+  // ── Step 1: Welcome Back ─────────────────────────────────────────────
  {
    id: 1,
-    title: "What You'll Be Doing",
+    title: "Welcome Back — What's New in v2",
-    subtitle: "A quick overview of the labeling task",
+    subtitle: "Same task, cleaner rules, faster labeling",
    content: [
-      "You're going to read short paragraphs from SEC filings about cybersecurity and label them. No prior knowledge of SEC filings or cybersecurity is needed — we'll teach you everything right here.",
+      "You're labeling SEC cybersecurity disclosure paragraphs again — same 7 categories, same 4 specificity levels, same two questions per paragraph. But the codebook has been overhauled based on what we learned from v1.",
-      "Since 2023, the SEC requires every public company to disclose how they handle cybersecurity risk (in their annual 10-K filings, Item 1C) and any cybersecurity incidents (in 8-K filings). These disclosures are what you'll be reading.",
+      "The good news: v2 is designed to match your intuition. Most of the time, your gut feeling about a paragraph will be correct. The rules are there for the edge cases, not the obvious ones.",
-      "Your job is simple: read each paragraph and answer two questions about it. That's it.",
+      "Here's what changed and why:",
-      "This tool is building a gold-standard dataset for training an AI classifier. There are 6 annotators total, with 3 annotators labeling each paragraph. You'll label roughly 600 paragraphs out of 1,200 total.",
+      "Category assignment is now driven by one question: \"What question does this paragraph primarily answer?\" — not mechanical tests or keyword matching. The person-removal test still exists as a confirmation tool for the BG/MR/RMP boundary, but it's no longer the primary rule.",
      "Management Role is broader: it now covers how management is ORGANIZED to handle cybersecurity — role allocation, committee structure, reporting lines — not just \"who a specific person is.\" Paragraphs about management structure without named individuals can be MR.",
      "Specificity Level 2 is broader: renamed from \"Sector-Adapted\" to \"Domain-Adapted.\" Cybersecurity terms like penetration testing, vulnerability scanning, SIEM, and SOC now trigger Level 2. In v1, these were incorrectly classified as Level 1.",
      "Level 4 requires just 1 QV fact (was 2+). No more counting. If an external party could verify even one claim in the paragraph — a dollar amount, a named tool, a specific date — it's Level 4.",
      "You'll be labeling 1,200 holdout paragraphs total. There are 5 annotators, with 3 labeling each paragraph. You'll see roughly 720.",
    ],
    keyPoints: [
-      "Each paragraph gets two labels: a Content Category and a Specificity Level.",
+      "Same 7 categories, same 4 specificity levels — the framework is unchanged.",
-      "You don't need any background in finance, law, or cybersecurity.",
+      "Rules now follow human intuition: \"what question does this paragraph answer?\"",
-      "Your labels are the ground truth that an AI model will learn from — accuracy matters.",
+      "Level 2 is broader (domain terminology), Level 4 is easier to reach (1 QV fact).",
      "Your labels are building the gold standard for the final model. Accuracy matters.",
    ],
  },
-  // ── Step 2: The Two Questions ─────────────────────────────────────────
+  // ── Step 2: The Two Questions ────────────────────────────────────────
  {
    id: 2,
    title: "The Two Questions",
-    subtitle: "Every paragraph gets exactly two labels",
+    subtitle: "Same as before — one category, one specificity",
    content: [
-      'Question 1: "What is this paragraph about?" — this is the Content Category. You pick one of 7 options.',
+      "For every paragraph, you answer two questions:",
-      'Question 2: "How specific is this paragraph?" — this is the Specificity Level. You pick one of 4 levels.',
+      "Question 1 — Content Category: \"What is this paragraph about?\" Pick the best of 7 options.",
-      "Every paragraph gets exactly one answer for each question. This is single-label classification — pick the BEST fit, not multiple labels.",
+      "Question 2 — Specificity Level: \"How company-specific is this paragraph?\" Pick a level from 1 to 4.",
      "These are independent dimensions. A materiality disclaimer can be Strategy Integration (category) at Level 1 (generic boilerplate). An incident report can be Incident Disclosure at Level 4 (specific dates and firms).",
    ],
    keyPoints: [
-      "Content Category: one of 7 mutually exclusive options.",
+      "One content category (of 7) — pick the dominant one.",
-      "Specificity Level: one of 4 levels from vague to very specific.",
+      "One specificity level (1–4) — determined by the most specific fact present.",
-      "Always pick the single best answer for each dimension.",
+      "Category and specificity are independent — don't let one influence the other.",
    ],
  },
-  // ── Step 3: Content Categories Overview ───────────────────────────────
+  // ── Step 3: Content Categories ───────────────────────────────────────
  {
    id: 3,
-    title: "Content Categories Overview",
+    title: "Content Categories",
-    subtitle: "The 7 categories at a glance",
+    subtitle: "Ask: \"What question does this paragraph answer?\"",
    content: [
-      "There are 7 mutually exclusive content categories. Here's a plain-English way to think about each one:",
+      "For every paragraph, ask yourself which question it primarily answers:",
-      "Board Governance — Who's in charge at the board level?",
+      "\"How does the board oversee cybersecurity?\" → Board Governance — Board or committee is the subject overseeing, receiving reports, delegating.",
-      "Management Role — Who's the person running cybersecurity?",
+      "\"How is management organized to handle cybersecurity?\" → Management Role — Who holds responsibilities, their qualifications, how roles are divided, reporting lines between management.",
-      "Risk Management Process — What does the company's cyber program actually do?",
+      "\"What does the cybersecurity program do?\" → Risk Management Process — Activities, tools, frameworks, processes — regardless of who is mentioned as responsible.",
-      "Third-Party Risk — How do they handle vendor/supplier risk?",
+      "\"How are third-party cyber risks managed?\" → Third-Party Risk — Requirements imposed on vendors, assessment of vendor security. NOT hiring a firm to test your OWN systems (that's RMP).",
-      "Incident Disclosure — Did something bad actually happen?",
+      "\"What happened in a cybersecurity incident?\" → Incident Disclosure — Actual events that occurred. NOT hypothetical \"we may experience\" language.",
-      "Strategy Integration — What does cyber risk mean for the business/money?",
+      "\"How does cybersecurity affect the business/finances?\" → Strategy Integration — Budget, insurance, materiality assessments. Key rule: any statement concluding that cyber risks have or haven't \"materially affected\" the business → always SI.",
-      "None/Other — None of the above.",
+      "None of the above? → None/Other — Section headers, cross-references, SPACs with no program. Always gets Specificity 1.",
-      "If a paragraph touches multiple categories, pick the dominant one — the category that takes up most of the paragraph's text.",
+      "If a paragraph touches multiple categories, pick the one whose question it most directly answers. When genuinely split, the category that takes up the most text wins.",
    ],
    keyPoints: [
      "7 categories, mutually exclusive — always pick exactly one.",
      "When in doubt, pick the category that dominates the paragraph.",
    ],
  },
  // ── Step 4: Board Governance ──────────────────────────────────────────
  {
    id: 4,
    title: "Board Governance",
    subtitle: "Board or committee oversight of cybersecurity risk",
    content: [
      "Definition: Board or committee oversight of cybersecurity risk. The board or a board committee is the grammatical subject performing the primary action.",
      'Look for language like: "Board of Directors oversees," "Audit Committee," "quarterly briefings," "board-level expertise."',
      "IS Board Governance: Board receives reports, Audit Committee oversees cyber risk, directors review cybersecurity matters.",
      "NOT Board Governance: CISO reports TO the board (that's Management Role — the CISO is the subject), board mentioned only in passing.",
    ],
    examples: [
      {
-        text: "The Board of Directors oversees the Company's management of cybersecurity risks.",
+        text: "The Board of Directors oversees the Company's management of cybersecurity risks. The Board has delegated oversight to the Audit Committee, which receives quarterly reports from the CISO.",
        category: "Board Governance",
        explanation:
-          "The board is the subject doing the overseeing. Classic Board Governance.",
+          "Answers \"how does the board oversee?\" The CISO is mentioned as the reporting mechanism, but the paragraph is about the board's oversight structure.",
      },
      {
-        text: "The Audit Committee receives quarterly reports from the CISO and conducts an annual deep-dive review of the Company's cybersecurity program, threat landscape, and incident response readiness.",
+        text: "Our CISO, who holds CISSP certification and has 20 years of experience, reports to the CIO and leads a team of 12 security professionals.",
        category: "Board Governance",
        explanation:
          "Even though the CISO is mentioned, the Audit Committee is the one performing the actions (receiving, conducting). The committee is the grammatical subject.",
      },
      {
        text: "Our Board of Directors recognizes the critical importance of maintaining the trust and confidence of our customers and stakeholders, and cybersecurity risk is an area of increasing focus for our Board.",
        category: "Board Governance",
        explanation:
          "Generic statement about board awareness — still Board Governance because the board is the subject performing the action (recognizing).",
      },
    ],
    tip: "The key test is always: who is the grammatical subject? If the board or a board committee is doing the action, it's Board Governance.",
  },
  // ── Step 5: Management Role ───────────────────────────────────────────
  {
    id: 5,
    title: "Management Role",
    subtitle: "Named officers or management teams responsible for cybersecurity",
    content: [
      "Definition: Named officers or management teams responsible for cybersecurity. A specific person or management function is the grammatical subject.",
      "This category is about WHO THE PERSON IS — their background, credentials, experience, reporting structure.",
      "IS Management Role: CISO's qualifications described, VP of Security's background, management committee structure.",
      "NOT Management Role: CISO mentioned once and then the paragraph describes the program (that's Risk Management Process).",
    ],
    examples: [
      {
        text: "Our Vice President of Information Security, who holds CISSP and CISM certifications and has over 20 years of experience in cybersecurity, reports directly to our Chief Information Officer.",
        category: "Management Role",
        explanation:
-          "It's about the person — their certifications, experience, and reporting line.",
+          "Answers \"how is management organized?\" — the person's credentials, reporting line, and team. Remove the person and nothing remains.",
      },
      {
-        text: "Our CISO, Sarah Chen, leads a dedicated cybersecurity team of 35 professionals. Ms. Chen joined the Company in 2019 after serving as Deputy CISO at a Fortune 100 financial services firm.",
+        text: "Our CISO oversees a cybersecurity program that includes penetration testing, vulnerability scanning, and incident response planning aligned with NIST CSF.",
        category: "Management Role",
        explanation:
          "The paragraph tells you about Sarah Chen as a person — name, team, tenure, prior role.",
      },
      {
        text: "Management is responsible for assessing and managing cybersecurity risks within the organization.",
        category: "Management Role",
        explanation:
          "Generic, but still about who is responsible (management as the subject), not what the program does.",
      },
    ],
    tip: "Ask yourself: is this paragraph telling me about a person (or role), or about what the cybersecurity program does? If it's about the person, it's Management Role.",
  },
  // ── Step 6: Board vs Management — The Key Test ────────────────────────
  {
    id: 6,
    title: "Board vs Management — The Key Test",
    subtitle: "The #1 source of confusion between annotators",
    content: [
      "This is the single most common mistake. The test is simple: WHO is the grammatical subject performing the action?",
      "Board or committee is the subject → Board Governance.",
      "Named officer or management team is the subject → Management Role.",
      'The Person-vs-Function test: If you removed the person\'s name, title, and credentials, does the paragraph still describe cybersecurity activities? If YES → it\'s about the function (Risk Management Process), not the person (Management Role). Naming a cybersecurity title like "CISO" or "CIO" does NOT automatically make it Management Role. The title is often just attribution before describing the program.',
    ],
    examples: [
      {
        text: "The Board has delegated oversight of cybersecurity matters to the Audit Committee, which meets quarterly with the CISO.",
        category: "Board Governance",
        explanation:
          "Board is the subject. The CISO is mentioned incidentally.",
      },
      {
        text: "Our CISO reports quarterly to the Board on cybersecurity threats and program performance.",
        category: "Management Role",
        explanation:
          "The CISO is the subject performing the action (reporting).",
      },
      {
        text: "Our CISO oversees the Company's cybersecurity program, which includes risk assessments, vulnerability scanning, penetration testing, and incident response planning aligned with the NIST CSF framework.",
        category: "Risk Management Process",
        explanation:
-          'NOT Management Role! The CISO is mentioned once as attribution, but the paragraph is about what the program does. Remove "Our CISO oversees" and it still makes complete sense as a description of the program.',
+          "Answers \"what does the program do?\" The CISO is just attribution. Remove \"Our CISO oversees\" and you still have a complete program description.",
      },
    ],
    keyPoints: [
      "Who is the grammatical subject? Board → Board Governance. Officer → Management Role.",
      "Person-vs-Function test: remove the name/title — does the paragraph still describe activities? If yes, it's Risk Management Process.",
      "A CISO mention does NOT automatically mean Management Role.",
    ],
  },
  // ── Step 7: Risk Management Process ───────────────────────────────────
  {
    id: 7,
    title: "Risk Management Process",
    subtitle: "Internal cybersecurity program mechanics",
    content: [
      "Definition: Internal cybersecurity program mechanics — frameworks, assessments, controls, training, monitoring.",
      'This is the "what do they actually do" category.',
      "IS Risk Management Process: NIST framework adoption, penetration testing, employee training, SOC operations, vulnerability scanning.",
      "NOT Risk Management Process: Vendor assessments (that's Third-Party Risk), incident response actions during a real incident (that's Incident Disclosure).",
    ],
    examples: [
      {
        text: "We maintain a cybersecurity risk management program that is integrated into our overall enterprise risk management framework.",
        category: "Risk Management Process",
        explanation:
          "Describing the program's existence and its integration into enterprise risk management.",
      },
      {
        text: "Our cybersecurity program is aligned with the NIST Cybersecurity Framework and incorporates elements of ISO 27001. We conduct regular risk assessments and penetration testing.",
        category: "Risk Management Process",
        explanation:
          "Framework adoption and specific program activities. All about the internal program.",
      },
      {
        text: "We operate a 24/7 Security Operations Center that uses Splunk SIEM and CrowdStrike Falcon endpoint detection. Our incident response team conducts quarterly tabletop exercises simulating ransomware and supply chain compromise scenarios.",
        category: "Risk Management Process",
        explanation:
          "Specific tools, team operations, and exercises. Very detailed, but still about the internal program — no real incident happened.",
      },
    ],
    tip: "If the paragraph describes what the cybersecurity program does day-to-day, it's almost always Risk Management Process.",
  },
  // ── Step 8: Third-Party Risk ──────────────────────────────────────────
  {
    id: 8,
    title: "Third-Party Risk",
    subtitle: "Oversight of external parties' cybersecurity",
    content: [
      "Definition: Oversight of external parties' cybersecurity — vendors, suppliers, service providers.",
      "The key question: is the CENTRAL topic about overseeing someone outside the company?",
      "IS Third-Party Risk: Vendor assessments, third-party audits, supply chain monitoring, SOC 2 requirements for vendors.",
      "NOT Third-Party Risk: Internal program that happens to use third-party tools (Risk Management Process), hiring a firm to test YOUR systems (Risk Management Process).",
    ],
    examples: [
      {
        text: "We face cybersecurity risks associated with our use of third-party service providers who may have access to our systems and data.",
        category: "Third-Party Risk",
        explanation: "About vendor risk exposure — the central topic is external parties.",
      },
      {
        text: "Our vendor risk management program requires all third-party service providers with access to sensitive data to meet minimum security standards, including SOC 2 Type II certification.",
        category: "Third-Party Risk",
        explanation: "Requirements imposed on vendors. The focus is on what the company demands from external parties.",
      },
      {
        text: "We assessed 312 vendors in fiscal 2024. All Tier 1 vendors are required to provide annual SOC 2 Type II reports. 14 vendors were placed on remediation plans and 3 vendor relationships were terminated.",
        category: "Third-Party Risk",
        explanation:
          "Very specific vendor oversight with hard numbers. Still Third-Party Risk because the central topic is vendor management.",
      },
    ],
    tip: 'Watch out for the "hired a firm" trap. If a company says "we engaged Mandiant to conduct penetration testing," that\'s Risk Management Process — Mandiant is testing the company\'s own systems, not being overseen as a vendor.',
  },
  // ── Step 9: Incident Disclosure ───────────────────────────────────────
  {
    id: 9,
    title: "Incident Disclosure",
    subtitle: "Description of an actual cybersecurity incident",
    content: [
      "Definition: Description of an actual cybersecurity incident — what happened, the timeline, the impact, the response.",
      "Key word: ACTUAL. Something really happened. Not hypothetical.",
      'IS Incident Disclosure: "We detected unauthorized access," specific breach details, forensic investigation results.',
      'NOT Incident Disclosure: Generic "we may experience incidents" (that\'s hypothetical risk language), incident response PLANS (that\'s Risk Management Process).',
    ],
    examples: [
      {
        text: "On January 15, 2024, we detected unauthorized access to our customer support portal. The threat actor exploited a known vulnerability in a third-party software component.",
        category: "Incident Disclosure",
        explanation:
          "A real event with a real date and real details. Something actually happened.",
      },
      {
        text: "In December 2023, the Company experienced a cybersecurity incident involving unauthorized access to certain internal systems. The Company promptly took steps to contain and remediate the incident.",
        category: "Incident Disclosure",
        explanation:
          "Real event, though vaguer than the previous example. Still describes something that actually occurred.",
      },
      {
        text: "We have experienced, and may in the future experience, cybersecurity incidents that could have a material adverse effect on our business.",
        category: "None/Other",
        explanation:
          "NOT Incident Disclosure. This is hypothetical risk language — no actual incident is described. Depending on context, this would be Strategy Integration or None/Other.",
      },
    ],
    keyPoints: [
      "The incident must be REAL — something that actually happened.",
      "Hypothetical risk language (\"we may experience\") is never Incident Disclosure.",
      "Incident response plans and tabletop exercises are Risk Management Process, not Incident Disclosure.",
    ],
  },
  // ── Step 10: Strategy Integration ─────────────────────────────────────
  {
    id: 10,
    title: "Strategy Integration",
    subtitle: "Business and financial consequences of cyber risk",
    content: [
      "Definition: Business/financial consequences of cyber risk — budget, insurance, M&A impact, competitive impact, materiality assessments.",
      'This is the "what does it mean for the business" category.',
      'IMPORTANT RULE: Any paragraph that says cybersecurity risks have or could "materially affect" the business → Strategy Integration, even if it\'s boilerplate language.',
      'IS Strategy Integration: Cyber budget amounts, insurance coverage, "not materially affected" statements, cost of incidents.',
      "NOT Strategy Integration: Technical program costs mentioned in passing (that's Risk Management Process).",
    ],
    examples: [
      {
        text: "We increased our cybersecurity budget by 32% to $45M in fiscal 2024. We maintain cyber liability insurance with $100M in aggregate coverage through AIG and Chubb.",
        category: "Strategy Integration",
        explanation:
          "Dollar amounts and business decisions about cyber spending. This is about what cyber risk means for the business financially.",
      },
      {
        text: "Cybersecurity risks have not materially affected, and are not reasonably likely to materially affect, our business strategy, results of operations, or financial condition.",
        category: "Strategy Integration",
        explanation:
-          "This is boilerplate that appears in thousands of filings, but it IS a materiality assessment — the company is making a strategic judgment about impact. Always Strategy Integration.",
+          "A materiality assessment — the company is stating a conclusion about business impact. Always SI, even though it's boilerplate.",
      },
      {
        text: "We have not identified any cybersecurity incidents that have materially affected us. For more information, see Item 1A, Risk Factors.",
        category: "Strategy Integration",
        explanation:
          "The materiality assessment is the key content. The cross-reference at the end is just noise.",
      },
    ],
-    tip: 'The "materially affect" rule is one of the most important. Whenever you see the word "materially" in the context of business impact, think Strategy Integration.',
+    tip: "Most paragraphs will be obvious. Trust your read. The \"what question?\" test is there for when you hesitate.",
  },
-  // ── Step 11: None/Other ───────────────────────────────────────────────
+  // ── Step 4: The Tricky Boundaries ────────────────────────────────────
  {
-    id: 11,
+    id: 4,
-    title: "None/Other",
+    title: "The Tricky Boundaries",
-    subtitle: "Doesn't fit any of the above categories",
+    subtitle: "Where 80% of real disagreements live",
    content: [
-      "Definition: Doesn't fit any of the other 6 categories. Generic corporate language, section headers, cross-references, non-cyber content.",
+      "Most categories are intuitive. These three boundaries are where annotators actually disagree:",
      "Specificity is always 1 (Generic Boilerplate) for None/Other paragraphs — there's no cyber content to rate.",
      'SPAC rule: Companies that say "we have no operations" or "we have not adopted any cybersecurity program" → None/Other, even if they mention the board.',
      'Cross-reference vs materiality: "See Item 1A, Risk Factors" alone = None/Other. But if it also includes "have not materially affected our business" = Strategy Integration.',
    ],
    examples: [
      {
-        text: "Item 1C. Cybersecurity",
+        text: "Board vs Management vs RMP — The Governance Chain",
-        category: "None/Other",
+        explanation:
-        explanation: "Just a section header. No disclosure content.",
+          "Many paragraphs chain Board → Committee → Officer → Program. The \"what question?\" test cuts through: if the paragraph explains how OVERSIGHT works (board receives reports, committee delegates) → BG. If it explains how management is ORGANIZED (role allocation, who reports to whom in management, qualifications) → MR. If it describes what the PROGRAM DOES (tools, processes, frameworks) → RMP. Confirmation tool: remove all person-specific content. If a program description remains → RMP. If the paragraph collapses → MR.",
      },
      {
-        text: "For additional information about risks related to our information technology systems, see Part I, Item 1A, 'Risk Factors.'",
+        text: "Materiality → Always Strategy Integration",
        category: "None/Other",
        explanation:
-          "Pure cross-reference with no disclosure content of its own.",
+          "Any paragraph that STATES A CONCLUSION about whether cyber risks materially affect the business → SI. \"Have not materially affected\" → SI. \"Are reasonably likely to materially affect\" → SI. But bare \"could have a material adverse effect\" is speculation, not a conclusion → N/O. And \"for risks that may materially affect us, see Item 1A\" is a cross-reference, not an assessment → N/O. The test: is the company making a judgment, or just pointing elsewhere / speculating?",
      },
      {
-        text: "We are a special purpose acquisition company with no business operations. We have not adopted any cybersecurity risk management program. Our Board of Directors is generally responsible for oversight of cybersecurity risks, if any.",
+        text: "SPACs and No-Program Companies → None/Other",
        category: "None/Other",
        explanation:
-          "Despite mentioning the Board, this company has no program — the board mention is perfunctory. SPACs with no operations get None/Other.",
+          "Companies that say \"we have no operations\" or \"we have not adopted any cybersecurity program\" get N/O — even if they mention the board. The absence of a program is not a disclosure. Board mentions in this context are perfunctory (\"generally responsible... if any\").",
      },
    ],
    keyPoints: [
-      "None/Other always gets Specificity 1.",
+      "BG/MR/RMP: what question does it answer? Oversight → BG. Organization → MR. Activities → RMP.",
-      "SPACs with no operations → None/Other, even if they mention the board.",
+      "Person-removal test confirms MR vs RMP: remove the people — does a program remain?",
-      "Pure cross-references → None/Other. Cross-references with materiality language → Strategy Integration.",
+      "Materiality CONCLUSION = SI. Materiality SPECULATION or CROSS-REFERENCE = N/O.",
      "No program = no disclosure = N/O, regardless of incidental mentions.",
    ],
  },
-  // ── Step 12: Decision Rules Recap ─────────────────────────────────────
+  // ── Step 5: Specificity — The 4 Levels ──────────────────────────────
  {
-    id: 12,
+    id: 5,
-    title: "Decision Rules Recap",
+    title: "Specificity — The 4 Levels",
-    subtitle: "Quick-reference rules for tricky cases",
+    subtitle: "How company-specific is this paragraph?",
    content: [
-      "Here are the 6 decision rules that handle the most common edge cases:",
+      "Specificity measures how much this paragraph tells you about THIS specific company versus generic filler any company could use.",
-      "Rule 1 — Dominant Category: If a paragraph spans multiple categories, assign the one that takes up the most text.",
+      "Think of it as a waterfall — check from the top and stop at the first yes:",
-      "Rule 2 — Board vs Management: Who is the grammatical subject? Board/committee → Board Governance. Named officer/team → Management Role.",
+      "Level 4 — Quantified-Verifiable: Can an external party verify at least one claim? (a specific number, date, named tool/firm, verifiable certification) → Level 4.",
-      "Rule 2b — Person vs Function: Is the paragraph about the person or what the program does? Remove the name/title — if the paragraph still describes activities, it's Risk Management Process.",
+      "Level 3 — Firm-Specific: Does it contain at least one fact unique to THIS company? (CISO title, named non-generic committee, named individual, 24/7 SOC) → Level 3.",
-      "Rule 3 — Risk Management vs Third-Party: Is the central topic internal processes or vendor oversight?",
+      "Level 2 — Domain-Adapted: Does it use cybersecurity domain terminology? (penetration testing, SIEM, NIST CSF, vulnerability scanning, zero trust) → Level 2.",
-      "Rule 4 — Incident vs Strategy: What happened (Incident Disclosure) vs what it means for the business (Strategy Integration).",
+      "Level 1 — Generic Boilerplate: None of the above. Could paste into any filing unchanged.",
-      "Rule 5 — None/Other Threshold: Only assign None/Other when there's no substantive cyber disclosure.",
+      "None/Other paragraphs always get Level 1.",
-      "Rule 6 — Materiality Disclaimers: Any paragraph with a materiality assessment always goes to Strategy Integration.",
+      "v2 change: Level 2 is broader (domain terms, not just named standards) and Level 4 needs only 1 QV fact (not 2+). This makes the waterfall simpler — less counting, more recognizing.",
    ],
    keyPoints: [
      "Rule 1: Dominant category wins when a paragraph spans multiple topics.",
      "Rule 2: Grammatical subject determines Board Governance vs Management Role.",
      "Rule 2b: Person-vs-Function test — remove the name and see if the paragraph still works.",
      "Rule 3: Internal processes → Risk Management Process. Vendor oversight → Third-Party Risk.",
      "Rule 4: Real event → Incident Disclosure. Business impact → Strategy Integration.",
      "Rule 5: None/Other only when there's no substantive disclosure.",
      "Rule 6: Materiality disclaimers → always Strategy Integration.",
    ],
  },
  // ── Step 13: Specificity — What It Measures ───────────────────────────
  {
    id: 13,
    title: "Specificity — What It Measures",
    subtitle: "The second dimension: how company-specific is the disclosure?",
    content: [
      "Now for the second question you'll answer for every paragraph: Specificity Level.",
      'Think of it this way: "Could you paste this paragraph into any company\'s filing and it would still make sense?"',
      "If yes → low specificity (it's generic boilerplate).",
      "If no, because it mentions this specific company's people, tools, numbers, or dates → high specificity.",
      "There are 4 levels, from vague to very specific:",
      "Level 1 — Generic Boilerplate: Could appear in any filing unchanged.",
      "Level 2 — Sector-Adapted: Names a recognized standard but nothing unique to this company.",
      "Level 3 — Firm-Specific: Contains at least one fact unique to this company.",
      "Level 4 — Quantified-Verifiable: Contains two or more hard verifiable facts.",
      "None/Other paragraphs always get Specificity 1.",
    ],
    keyPoints: [
      "Specificity measures how unique the disclosure is to this specific company.",
      "4 levels: Generic Boilerplate (1) → Sector-Adapted (2) → Firm-Specific (3) → Quantified-Verifiable (4).",
      "None/Other paragraphs are always Specificity 1.",
    ],
  },
  // ── Step 14: Generic Boilerplate & Sector-Adapted (Levels 1-2) ───────
  {
    id: 14,
    title: "Generic Boilerplate & Sector-Adapted (Levels 1-2)",
    subtitle: "The lower specificity levels",
    content: [
      "Level 1 — Generic Boilerplate: Could appear in any company's filing unchanged. No named frameworks, tools, people, dates, or quantities.",
      "Level 2 — Sector-Adapted: Names a recognized standard (NIST, ISO 27001, SOC 2, PCI DSS, HIPAA, etc.) but nothing unique to THIS company.",
      "The jump from Level 1 to Level 2: does the paragraph name a specific standard or framework? If yes, and there are no other company-specific facts, it's Level 2.",
    ],
    examples: [
      {
        text: "We maintain a cybersecurity risk management program designed to identify, assess, and manage material cybersecurity risks.",
        specificity: "Level 1 — Generic Boilerplate",
        explanation:
-          "Could be any company. No named frameworks, tools, people, or details of any kind.",
+          "Pure business language. \"Identify, assess, and manage\" is generic ERM phrasing — no cybersecurity domain terms, nothing unique.",
      },
      {
-        text: "Management is responsible for assessing and managing cybersecurity risks within the organization.",
+        text: "We conduct regular penetration testing and vulnerability scanning as part of our continuous monitoring approach.",
-        specificity: "Level 1 — Generic Boilerplate",
+        specificity: "Level 2 — Domain-Adapted",
        explanation:
-          "No named roles, frameworks, or details. Completely interchangeable between companies.",
+          "\"Penetration testing\" and \"vulnerability scanning\" are cybersecurity domain terms — they wouldn't appear in a generic ERM document. But nothing here is unique to THIS company.",
      },
      {
        text: "Our cybersecurity program is aligned with the NIST Cybersecurity Framework and incorporates elements of ISO 27001.",
        specificity: "Level 2 — Sector-Adapted",
        explanation:
          "Names NIST and ISO 27001, but nothing unique to this company — many companies say the exact same thing.",
      },
      {
        text: "We conduct regular risk assessments, vulnerability scanning, and penetration testing as part of our continuous monitoring approach.",
        specificity: "Level 1 — Generic Boilerplate",
        explanation:
          "NOT Sector-Adapted. These are common practices, but they don't name a specific standard. Activities alone don't bump you to Level 2.",
      },
    ],
    tip: "Common practices like penetration testing and vulnerability scanning do NOT trigger Level 2. You need a named standard or framework (NIST, ISO, SOC 2, etc.) to reach Level 2.",
  },
  // ── Step 15: Firm-Specific & Quantified-Verifiable (Levels 3-4) ──────
  {
    id: 15,
    title: "Firm-Specific & Quantified-Verifiable (Levels 3-4)",
    subtitle: "The higher specificity levels",
    content: [
      "Level 3 — Firm-Specific: Contains at least one fact unique to THIS company.",
      "Level 4 — Quantified-Verifiable: Contains TWO or more hard verifiable facts.",
      "What counts as a specific fact (triggers Level 3): cybersecurity-specific titles (CISO, CTO, CIO, VP of Security), named non-generic committees (Technology Committee, Cybersecurity Committee — NOT Audit Committee since every company has one), specific dates, named tools (Splunk, CrowdStrike, Azure Sentinel), named third-party firms (Mandiant, Deloitte), specific numbers (headcounts, dollar amounts, percentages).",
      "What does NOT count as a specific fact: generic governance terms (the Board, Audit Committee, management), generic C-suite titles (CEO, CFO, COO — not cybersecurity-specific), unnamed entities (third-party experts, external consultants), generic cadences (quarterly, annual without exact dates), common practices (penetration testing, vulnerability scanning).",
    ],
    examples: [
      {
        text: "Our CISO oversees a team of 12 security professionals.",
        specificity: "Level 3 — Firm-Specific",
        explanation:
          "CISO (cybersecurity-specific title) is one specific fact. But just one fact, so Level 3, not Level 4.",
      },
      {
        text: "Our CISO, Sarah Chen, holds CISSP and CISM certifications and has over 20 years of experience. She joined the Company in 2019 after serving as Deputy CISO at a Fortune 100 firm.",
        specificity: "Level 4 — Quantified-Verifiable",
        explanation:
          "Named person + named certifications + years of experience + specific year = 4+ verifiable facts. Easily clears the 2-fact threshold for Level 4.",
      },
      {
        text: "The Audit Committee oversees cybersecurity risk.",
        specificity: "Level 1 — Generic Boilerplate",
        explanation:
          "\"Audit Committee\" is NOT a specific fact — every public company has one. No other specifics present.",
      },
    ],
    keyPoints: [
      "Cybersecurity-specific titles (CISO, CIO, CTO) count as specific facts. Generic titles (CEO, CFO) do not.",
      "Audit Committee is NOT a specific fact — it's generic governance.",
      "Level 4 requires two or more HARD verifiable facts: dates, dollars, headcounts, named firms, named tools, named certifications, years of experience.",
      "Named roles and committees trigger Level 3 but do NOT count toward the Level 4 threshold.",
    ],
  },
  // ── Step 16: The Specificity Decision Test ────────────────────────────
  {
    id: 16,
    title: "The Specificity Decision Test",
    subtitle: "A step-by-step waterfall to determine specificity",
    content: [
      "Apply this waterfall — stop at the first yes:",
      "Step A: Count ONLY hard verifiable facts: specific dates (month+year or exact date), dollar amounts, headcounts, percentages, named third-party firms, named products/tools, named certifications held by individuals, years of experience as a specific number. Two or more? → Level 4 (Quantified-Verifiable).",
      "Step B: At least one fact from the specific-facts list (cybersecurity titles, named committees, named tools, named firms, specific dates, specific numbers)? → Level 3 (Firm-Specific).",
      "Step C: Names a recognized standard (NIST, ISO, SOC 2, PCI DSS, HIPAA, etc.)? → Level 2 (Sector-Adapted).",
      "Step D: None of the above? → Level 1 (Generic Boilerplate).",
      "Important: named roles (CISO, CIO) and named committees trigger Firm-Specific (Level 3) but do NOT count toward the 2-fact threshold for Quantified-Verifiable (Level 4). Named frameworks (NIST, ISO) also do not count toward Level 4.",
    ],
    examples: [
      {
        text: "We operate a 24/7 Security Operations Center that uses Splunk SIEM and CrowdStrike Falcon endpoint detection. Our incident response team conducts quarterly tabletop exercises.",
        specificity: "Level 4 — Quantified-Verifiable",
        explanation:
          "Count QV facts: Splunk (named tool) + CrowdStrike Falcon (named tool) = 2 hard verifiable facts. Meets the threshold → Level 4.",
      },
      {
        text: "Our CISO oversees the cybersecurity program aligned with NIST CSF.",
        specificity: "Level 3 — Firm-Specific",
        explanation:
-          "Count QV facts: none (CISO is a role, NIST is a framework — neither counts toward QV). Specific-facts list: CISO (yes, cybersecurity-specific title). → Level 3.",
+          "CISO is a cybersecurity-specific title (firm-specific fact). NIST CSF is domain terminology (Level 2). The CISO pushes it to Level 3. But no QV facts — CISO is a role, not a verifiable claim.",
      },
      {
-        text: "Our cybersecurity program incorporates elements of ISO 27001.",
+        text: "We engaged Deloitte to assess our cybersecurity program in fiscal 2024, resulting in 12 recommendations.",
-        specificity: "Level 2 — Sector-Adapted",
+        specificity: "Level 4 — Quantified-Verifiable",
        explanation:
-          "Count QV facts: none. Specific-facts list: none (ISO is a standard, not a firm-specific fact). Named standard: ISO 27001 (yes). → Level 2.",
+          "Deloitte (named firm), fiscal 2024 (date tied to a cyber fact), 12 recommendations (specific number). Any one of these is QV-eligible.",
      },
    ],
-    tip: "When in doubt, count the verifiable facts on your fingers. If you can point to two things that an outside observer could independently confirm, it's Level 4.",
+    tip: "The intuition: Level 1 = \"any company could have written this.\" Level 2 = \"a security person wrote this but it could be any company.\" Level 3 = \"I know something about THIS company.\" Level 4 = \"I could fact-check this.\"",
  },
-  // ── Step 17: Putting It All Together ──────────────────────────────────
+  // ── Step 6: What Counts (and What Doesn't) ──────────────────────────
  {
-    id: 17,
+    id: 6,
-    title: "Putting It All Together",
+    title: "What Counts (and What Doesn't)",
-    subtitle: "Borderline cases that exercise both dimensions",
+    subtitle: "The lines between levels 1–4",
    content: [
-      "Let's work through some tricky examples that require you to assign BOTH a content category and a specificity level. These are the kinds of paragraphs that trip people up.",
+      "The specificity waterfall has three boundary questions. Here's what falls on each side:",
      "DOMAIN TERMINOLOGY (triggers Level 2): penetration testing, vulnerability scanning, SIEM, SOC, EDR, network segmentation, NIST CSF, ISO 27001, SOC 2, zero trust, phishing simulations, threat intelligence, MFA, encryption (as security control), ransomware, DDoS.",
      "NOT domain terminology (stays Level 1): risk assessment, incident response plan, business continuity, tabletop exercises (without cyber qualifier), enterprise risk management, internal controls, compliance, \"processes to identify and manage risks,\" \"dedicated cybersecurity team.\"",
      "FIRM-SPECIFIC FACTS (triggers Level 3): cybersecurity-specific titles (CISO, CTO, CIO, VP of Security), named non-generic committees (Cybersecurity Committee — NOT Audit Committee), named individuals in cyber roles, 24/7 security operations.",
      "NOT firm-specific: Board, Audit Committee, management, CEO/CFO/COO (generic titles), unnamed \"third-party experts,\" generic cadences (quarterly, annual), generic program names (\"incident response plan\").",
      "QV-ELIGIBLE FACTS (triggers Level 4): specific numbers (dollars, headcounts, percentages, years of experience), specific dates (month+year or exact), named external entities (Mandiant, Deloitte), named products/tools (Splunk, CrowdStrike Falcon), certifications held (CISSP, \"we maintain ISO 27001 certification\"), named universities.",
      "NOT QV-eligible: named roles (Level 3 only — CISO isn't a verifiable claim), named standards FOLLOWED (\"aligned with NIST\" = Level 2), generic cadences, fiscal year without a tied cyber fact.",
      "Key distinction: \"aligned with ISO 27001\" → Level 2. \"Working toward ISO 27001 certification\" → Level 3. \"We maintain ISO 27001 certification\" → Level 4.",
    ],
    keyPoints: [
      "Level 2: would a non-security person use this term? If no → domain terminology.",
      "Level 3: does this fact identify something unique to THIS company? Audit Committee doesn't (every company has one). CISO does.",
      "Level 4: could an outsider fact-check this? Named tools, specific numbers, verifiable certifications.",
      "Named roles (CISO) get you to Level 3 but NOT Level 4. The role identifies; it doesn't quantify.",
    ],
  },
  // ── Step 7: Putting It All Together ──────────────────────────────────
  {
    id: 7,
    title: "Putting It All Together",
    subtitle: "Category + specificity on real examples",
    content: [
      "Let's work through integrated examples. For each, assign both a category and specificity.",
    ],
    examples: [
      {
-        text: "The Audit Committee, which includes two members with significant technology expertise, receives quarterly reports from the CISO and conducts an annual deep-dive review of the cybersecurity program.",
+        text: "The Audit Committee receives quarterly reports from the CISO and conducts an annual deep-dive review of the cybersecurity program.",
        category: "Board Governance",
        specificity: "Level 3 — Firm-Specific",
        explanation:
-          "Board Governance because the Audit Committee is the grammatical subject doing the actions (receiving reports, conducting reviews). Specificity: CISO is a cybersecurity-specific title — that's one specific fact, so Level 3. The Audit Committee itself doesn't count as a specific fact (every company has one).",
+          "BG because the Audit Committee is the subject (oversight). CISO is a firm-specific fact → Level 3. No QV facts (no numbers, dates, named firms).",
      },
      {
-        text: "Our CISO oversees the Company's cybersecurity program, which includes risk assessments, vulnerability scanning, penetration testing, and incident response planning aligned with the NIST CSF framework.",
+        text: "Under the leadership of our CISO, we have implemented network segmentation, endpoint detection and response, data loss prevention, and SIEM. Our team monitors critical systems continuously and conducts quarterly tabletop exercises.",
        category: "Risk Management Process",
        specificity: "Level 3 — Firm-Specific",
        explanation:
-          "Risk Management Process, NOT Management Role — the paragraph is about what the program does, and the CISO is just attribution. Apply the Person-vs-Function test: remove \"Our CISO oversees\" and the paragraph still describes the program perfectly. For specificity, the CISO mention still counts as a firm-specific fact even when it's just attribution, so Level 3.",
+          "RMP — the paragraph describes what the program does. The CISO is attribution only. Network segmentation, EDR, DLP, SIEM are all domain terminology (Level 2), but CISO is firm-specific → Level 3. No QV facts.",
      },
      {
        text: "We increased our cybersecurity budget by 28% to $38M in fiscal 2024. We maintain cyber liability insurance with $75M in aggregate coverage.",
        category: "Strategy Integration",
        specificity: "Level 4 — Quantified-Verifiable",
        explanation:
          "SI — financial resource allocation for cyber risk. Multiple QV facts: 28%, $38M, fiscal 2024, $75M. Any one is enough for Level 4.",
      },
      {
        text: "Cybersecurity risks have not materially affected our business strategy, results of operations, or financial condition. For more information, see Item 1A, Risk Factors.",
        category: "Strategy Integration",
        specificity: "Level 1 — Generic Boilerplate",
        explanation:
-          "Strategy Integration because the materiality assessment is the key content — the cross-reference is just noise. Generic Boilerplate because there are no specific facts, no named frameworks — this is pure boilerplate language that appears in thousands of filings.",
+          "SI because the materiality assessment is the key content — the cross-reference is noise. Level 1 because it's boilerplate language with no domain terms, no firm-specific facts, no QV facts.",
      },
      {
-        text: "We are a blank check company formed for the purpose of effecting a merger. We have not adopted any cybersecurity risk management program or formal processes for assessing cybersecurity risk.",
+        text: "We are a blank check company with no operations. We have not adopted any cybersecurity risk management program.",
        category: "None/Other",
        specificity: "Level 1 — Generic Boilerplate",
        explanation:
-          "None/Other because there's no substantive disclosure — this is a SPAC with no cybersecurity program. Specificity is always Level 1 for None/Other.",
+          "N/O — no substantive disclosure. No program = no disclosure. Always Level 1.",
      },
    ],
    keyPoints: [
-      "Always assign both a content category AND a specificity level.",
+      "Category and specificity are independent. Don't let one influence the other.",
-      "The Person-vs-Function test and the Specificity Decision Test work together — use both.",
+      "The person-removal test and specificity waterfall work together — use both.",
-      "You're ready for the quiz! You'll answer 8 questions testing these concepts. You need 7/8 to pass.",
+      "When in doubt on category: which question does the paragraph answer?",
      "When in doubt on specificity: check the waterfall top-down (QV → IS → Domain → Generic).",
    ],
  },
  // ── Step 8: You're Ready ─────────────────────────────────────────────
  {
    id: 8,
    title: "You're Ready",
    subtitle: "Quiz time — 8 questions, 7/8 to pass",
    content: [
      "That's it. The v2 codebook is designed to match how you naturally read these paragraphs. Trust your instincts, and use the rules for the genuinely ambiguous cases.",
      "The quiz tests four areas: person-vs-function (BG/MR/RMP boundaries), materiality disclaimers (SI vs N/O), specificity levels (the waterfall), and SPAC exceptions.",
      "You need 7 out of 8 correct. You only have to pass once — it won't make you retake it every session.",
      "After the quiz, you'll do 5 warmup paragraphs with immediate feedback before starting real labeling. The warmup happens every session to recalibrate.",
      "The full codebook is always available as an in-app reference while you label. Use it for the edge cases.",
    ],
    keyPoints: [
      "8 questions, 7/8 to pass. One-time only.",
      "5 warmup paragraphs with gold feedback before each labeling session.",
      "Codebook reference available while labeling.",
      "When in doubt: \"What question does this paragraph answer?\" + check the specificity waterfall.",
    ],
  },
 ];
--- a/labelapp/lib/quiz-questions.ts
+++ b/labelapp/lib/quiz-questions.ts
@ -3,7 +3,7 @@ export interface QuizQuestion {
  type:
    | "person-vs-function"
    | "materiality-disclaimer"
-    | "qv-counting"
+    | "specificity"
    | "spac-exception";
  paragraphText: string;
  question: string;
@ -22,8 +22,9 @@ const MATERIALITY_OPTIONS = [
  { value: "None/Other", label: "None/Other" },
 ];
-const QV_OPTIONS = [
+const SPECIFICITY_OPTIONS = [
-  { value: "2", label: "Specificity 2 — Sector-Adapted" },
+  { value: "1", label: "Specificity 1 — Generic Boilerplate" },
  { value: "2", label: "Specificity 2 — Domain-Adapted" },
  { value: "3", label: "Specificity 3 — Firm-Specific" },
  { value: "4", label: "Specificity 4 — Quantified-Verifiable" },
 ];
@ -39,7 +40,7 @@ const PERSON_VS_FUNCTION_QUESTION =
  "What content category best describes this paragraph?";
 const MATERIALITY_QUESTION =
  "What content category best describes this paragraph?";
-const QV_QUESTION = "What specificity level best describes this paragraph?";
+const SPECIFICITY_QUESTION = "What specificity level best describes this paragraph?";
 const SPAC_QUESTION = "What content category best describes this paragraph?";
 export const QUIZ_QUESTIONS: QuizQuestion[] = [
@ -55,7 +56,7 @@ export const QUIZ_QUESTIONS: QuizQuestion[] = [
    options: PERSON_VS_FUNCTION_OPTIONS,
    correctAnswer: "Management Role",
    explanation:
-      'This paragraph is about the PERSON: their certifications (CISSP, CISM), experience (20 years), and reporting line (to the CIO). The person-vs-function test: if you remove the credentials and reporting line, there is no remaining content about cybersecurity processes or activities. The paragraph tells you WHO the person is, not WHAT the program does.',
+      'This paragraph is about the PERSON: their certifications (CISSP, CISM), experience (20 years), and reporting line (to the CIO). The person-removal test: if you remove the credentials and reporting line, there is no remaining content about cybersecurity processes or activities. The paragraph tells you WHO the person is, not WHAT the program does.',
  },
  {
    id: "pvf-2",
@ -66,7 +67,7 @@ export const QUIZ_QUESTIONS: QuizQuestion[] = [
    options: PERSON_VS_FUNCTION_OPTIONS,
    correctAnswer: "Risk Management Process",
    explanation:
-      'The CISO is mentioned once as attribution ("Our CISO oversees"), but the paragraph\'s substantive content describes the program: risk assessments, vulnerability scanning, penetration testing, incident response planning, NIST CSF alignment. Remove "Our CISO oversees" and the paragraph still describes a complete cybersecurity program. The person-vs-function test clearly points to Risk Management Process.',
+      'The CISO is mentioned once as attribution ("Our CISO oversees"), but the paragraph\'s substantive content describes the program: risk assessments, vulnerability scanning, penetration testing, incident response planning, NIST CSF alignment. Person-removal test: remove "Our CISO oversees" and the paragraph still describes a complete cybersecurity program. Risk Management Process.',
  },
  {
    id: "pvf-3",
@ -88,7 +89,7 @@ export const QUIZ_QUESTIONS: QuizQuestion[] = [
    options: PERSON_VS_FUNCTION_OPTIONS,
    correctAnswer: "Risk Management Process",
    explanation:
-      'The CISO appears as brief attribution ("led by our CISO"), but the paragraph describes program activities: vulnerability assessments, penetration testing, 24/7 monitoring, and the SOC. Remove the CISO reference and you still have a complete description of cybersecurity operations. The person-vs-function test clearly points to Risk Management Process.',
+      'The CISO appears as brief attribution ("led by our CISO"), but the paragraph describes program activities: vulnerability assessments, penetration testing, 24/7 monitoring, and the SOC. Person-removal test: remove the CISO reference and you still have a complete description of cybersecurity operations. Risk Management Process.',
  },
  {
    id: "pvf-5",
@ -110,7 +111,7 @@ export const QUIZ_QUESTIONS: QuizQuestion[] = [
    options: PERSON_VS_FUNCTION_OPTIONS,
    correctAnswer: "Risk Management Process",
    explanation:
-      'The CISO is mentioned only as brief attribution ("Under the leadership of our CISO"). The paragraph\'s content describes program elements: network segmentation, EDR, DLP, SIEM, continuous monitoring, and tabletop exercises. Remove the CISO attribution and the paragraph is entirely about what the cybersecurity program does. This is Risk Management Process.',
+      'The CISO is mentioned only as brief attribution ("Under the leadership of our CISO"). The paragraph\'s content describes program elements: network segmentation, EDR, DLP, SIEM, continuous monitoring, and tabletop exercises. Person-removal test: remove the CISO attribution and the paragraph is entirely about what the cybersecurity program does. Risk Management Process.',
  },
  {
    id: "pvf-7",
@ -121,7 +122,7 @@ export const QUIZ_QUESTIONS: QuizQuestion[] = [
    options: PERSON_VS_FUNCTION_OPTIONS,
    correctAnswer: "Risk Management Process",
    explanation:
-      "While this paragraph names the VP of Cybersecurity and their reporting line, the dominant content describes the function: day-to-day management of the cybersecurity risk management program, and a team responsible for identifying, assessing, and mitigating threats. The person-vs-function test: remove the title and reporting line, and the paragraph still describes a cybersecurity program. The brief reporting structure is subordinate to the process description.",
+      "While this paragraph names the VP of Cybersecurity and their reporting line, the dominant content describes the function: day-to-day management of the cybersecurity risk management program, and a team responsible for identifying, assessing, and mitigating threats. Person-removal test: remove the title and reporting line, and the paragraph still describes a cybersecurity program. The brief reporting structure is subordinate to the process description.",
  },
  {
    id: "pvf-8",
@ -143,7 +144,7 @@ export const QUIZ_QUESTIONS: QuizQuestion[] = [
    options: PERSON_VS_FUNCTION_OPTIONS,
    correctAnswer: "Risk Management Process",
    explanation:
-      'The CISO is mentioned alongside "dedicated cybersecurity team" as attribution, but the content describes the incident response plan and its elements: detection, containment, eradication, recovery protocols, annual testing, and cross-functional participation. The person-vs-function test: remove the CISO reference and the paragraph fully describes a cybersecurity process. This is Risk Management Process.',
+      'The CISO is mentioned alongside "dedicated cybersecurity team" as attribution, but the content describes the incident response plan and its elements: detection, containment, eradication, recovery protocols, annual testing, and cross-functional participation. Person-removal test: remove the CISO reference and the paragraph fully describes a cybersecurity process. Risk Management Process.',
  },
  {
    id: "pvf-10",
@ -169,7 +170,7 @@ export const QUIZ_QUESTIONS: QuizQuestion[] = [
    options: MATERIALITY_OPTIONS,
    correctAnswer: "Strategy Integration",
    explanation:
-      'This is an explicit materiality assessment: the company states that cybersecurity risks have not "materially affected" its business. Per the codebook, any paragraph that explicitly assesses whether cybersecurity risks have or could materially affect the company is Strategy Integration, even when the language is boilerplate.',
+      'This is an explicit materiality assessment: the company states that cybersecurity risks have not "materially affected" its business. Per the codebook, any paragraph that states a conclusion about whether cybersecurity risks have or could materially affect the company is Strategy Integration, even when the language is boilerplate.',
  },
  {
    id: "mat-2",
@ -191,7 +192,7 @@ export const QUIZ_QUESTIONS: QuizQuestion[] = [
    options: MATERIALITY_OPTIONS,
    correctAnswer: "Strategy Integration",
    explanation:
-      'This paragraph contains both a materiality assessment ("have not... materially affected us") and a cross-reference. Per the codebook, the materiality assessment is the substantive content and the cross-reference is noise. A cross-reference appended to a materiality assessment does not change the classification. This is Strategy Integration.',
+      'This paragraph contains both a materiality assessment ("have not... materially affected us") and a cross-reference. The materiality assessment is the substantive content and the cross-reference is noise. A cross-reference appended to a materiality assessment does not change the classification. Strategy Integration.',
  },
  {
    id: "mat-4",
@ -202,7 +203,7 @@ export const QUIZ_QUESTIONS: QuizQuestion[] = [
    options: MATERIALITY_OPTIONS,
    correctAnswer: "Strategy Integration",
    explanation:
-      'Despite the generic threat mention ("we have experienced threats") and the cross-reference, this paragraph contains an explicit materiality assessment: risks "have not materially affected, and are not reasonably likely to materially affect" the company\'s business. Per the codebook, the materiality assessment governs the classification. The cross-reference and generic threat language are noise.',
+      'Despite the generic threat mention ("we have experienced threats") and the cross-reference, this paragraph contains an explicit materiality assessment: risks "have not materially affected, and are not reasonably likely to materially affect" the company\'s business. The materiality assessment governs the classification. The cross-reference and generic threat language are noise.',
  },
  {
    id: "mat-5",
@ -213,7 +214,7 @@ export const QUIZ_QUESTIONS: QuizQuestion[] = [
    options: MATERIALITY_OPTIONS,
    correctAnswer: "None/Other",
    explanation:
-      'This is a cross-reference, not a materiality assessment. It mentions "materially affect" as part of a description of what is in another section, but the paragraph itself makes no substantive claim about whether cybersecurity risks have or could materially affect the business. The test: does this paragraph make a judgment about cyber risk impact? No — it only tells you where to find that discussion. This is None/Other.',
+      'This is a cross-reference, not a materiality assessment. It mentions "materially affect" as part of a description of what is in another section, but the paragraph itself makes no substantive conclusion about whether cybersecurity risks have or could materially affect the business. None/Other.',
  },
  {
    id: "mat-6",
@ -235,7 +236,7 @@ export const QUIZ_QUESTIONS: QuizQuestion[] = [
    options: MATERIALITY_OPTIONS,
    correctAnswer: "None/Other",
    explanation:
-      "This is a pure cross-reference pointing to two other sections of the filing. There is no materiality assessment, no substantive disclosure about cybersecurity risks or their business impact. Per the codebook, a pure cross-reference with no materiality conclusion is None/Other.",
+      "This is a pure cross-reference pointing to two other sections of the filing. There is no materiality assessment, no substantive disclosure about cybersecurity risks or their business impact. None/Other.",
  },
  {
    id: "mat-8",
@ -246,99 +247,99 @@ export const QUIZ_QUESTIONS: QuizQuestion[] = [
    options: MATERIALITY_OPTIONS,
    correctAnswer: "Strategy Integration",
    explanation:
-      'This paragraph makes an explicit materiality assessment: past incidents "have [not] materially affected" the company. The acknowledgment of past incidents does not change the classification — the paragraph\'s purpose is to assess materiality, which is the hallmark of Strategy Integration per the codebook.',
+      'This paragraph makes an explicit materiality assessment: past incidents "have [not] materially affected" the company. The acknowledgment of past incidents does not change the classification — the paragraph\'s purpose is to assess materiality, which is the hallmark of Strategy Integration.',
  },
  // ============================================================
-  // QV FACT COUNTING (8 questions)
+  // SPECIFICITY (8 questions) — updated for v2 codebook
  // ============================================================
  {
-    id: "qv-1",
+    id: "spec-1",
-    type: "qv-counting",
+    type: "specificity",
    paragraphText:
      "Our CISO oversees a dedicated cybersecurity team responsible for managing cyber risk across the enterprise.",
-    question: QV_QUESTION,
+    question: SPECIFICITY_QUESTION,
-    options: QV_OPTIONS,
+    options: SPECIFICITY_OPTIONS,
    correctAnswer: "3",
    explanation:
-      '"CISO" is a cybersecurity-specific title on the codebook\'s IS list — that\'s one firm-specific fact. "Dedicated cybersecurity team" is a generic team reference (NOT list). "Managing cyber risk across the enterprise" is generic. One IS-list fact, no named standards, no QV-eligible facts = Specificity 3 (Firm-Specific).',
+      '"CISO" is a cybersecurity-specific title on the codebook\'s IS list — that\'s one firm-specific fact. "Dedicated cybersecurity team" is a generic organizational term (NOT on the IS list). "Managing cyber risk across the enterprise" is generic. One IS-list fact, no QV-eligible facts = Specificity 3 (Firm-Specific).',
  },
  {
-    id: "qv-2",
+    id: "spec-2",
-    type: "qv-counting",
+    type: "specificity",
    paragraphText:
      "We maintain cyber liability insurance with $100M aggregate coverage through AIG.",
-    question: QV_QUESTION,
+    question: SPECIFICITY_QUESTION,
-    options: QV_OPTIONS,
+    options: SPECIFICITY_OPTIONS,
    correctAnswer: "4",
    explanation:
-      "This paragraph contains multiple verifiable facts: a specific dollar amount ($100M aggregate coverage) and a named insurer (AIG). Two or more hard verifiable facts = Specificity 4 (Quantified-Verifiable) per the codebook's QV counting rules.",
+      "This paragraph contains multiple QV-eligible facts: a specific dollar amount ($100M aggregate coverage) and a named external entity (AIG). One or more QV-eligible facts = Specificity 4 (Quantified-Verifiable).",
  },
  {
-    id: "qv-3",
+    id: "spec-3",
-    type: "qv-counting",
+    type: "specificity",
    paragraphText:
-      "Our incident response team conducts quarterly tabletop exercises.",
+      "We maintain a cybersecurity risk management program designed to identify, assess, and manage material cybersecurity risks to our business.",
-    question: QV_QUESTION,
+    question: SPECIFICITY_QUESTION,
-    options: QV_OPTIONS,
+    options: SPECIFICITY_OPTIONS,
    correctAnswer: "1",
    explanation:
-      'Apply the codebook\'s validation step: "quarterly" is a generic cadence (NOT list), "tabletop exercises" is a common practice (NOT list), and "incident response team" is a generic team reference (NOT list). After filtering, no IS-list facts remain. No named standards either. This is Specificity 1 (Generic Boilerplate) — it could appear unchanged in any company\'s filing.',
+      'This paragraph uses only general business language: "risk management program," "identify, assess, and manage," "material cybersecurity risks." None of these are cybersecurity domain terminology — they belong to generic enterprise risk management. No domain terms, no firm-specific facts, no QV facts. Specificity 1 (Generic Boilerplate).',
  },
  {
-    id: "qv-4",
+    id: "spec-4",
-    type: "qv-counting",
+    type: "specificity",
    paragraphText:
      "Our cybersecurity program is aligned with the NIST Cybersecurity Framework and incorporates elements of ISO 27001. We conduct regular risk assessments and vulnerability scanning as part of our continuous monitoring approach.",
-    question: QV_QUESTION,
+    question: SPECIFICITY_QUESTION,
-    options: QV_OPTIONS,
+    options: SPECIFICITY_OPTIONS,
    correctAnswer: "2",
    explanation:
-      'This paragraph names two recognized standards (NIST CSF and ISO 27001), which places it at Specificity 2. However, naming standards is NOT a firm-specific fact per the codebook — it only makes a paragraph Sector-Adapted. The activities described (risk assessments, vulnerability scanning, continuous monitoring) are generic practices. There are no firm-specific facts (no named tools, no named personnel, no dates, no dollar amounts). Specificity 2 (Sector-Adapted).',
+      'This paragraph names two frameworks (NIST CSF, ISO 27001) and uses domain terminology (vulnerability scanning). These are cybersecurity-specific terms that wouldn\'t appear in a generic enterprise risk document. However, nothing is unique to THIS company — many companies say this verbatim. No firm-specific facts, no QV facts. Specificity 2 (Domain-Adapted).',
  },
  {
-    id: "qv-5",
+    id: "spec-5",
-    type: "qv-counting",
+    type: "specificity",
    paragraphText:
      "We operate a 24/7 Security Operations Center staffed by a team of 18 cybersecurity professionals. Our SOC uses CrowdStrike Falcon for endpoint detection and response and Splunk Enterprise Security as our SIEM platform. In fiscal 2024, our SOC processed over 2.3 billion security events and investigated 847 potential incidents.",
-    question: QV_QUESTION,
+    question: SPECIFICITY_QUESTION,
-    options: QV_OPTIONS,
+    options: SPECIFICITY_OPTIONS,
    correctAnswer: "4",
    explanation:
-      "This paragraph is rich in verifiable facts: team size (18 professionals), named tools (CrowdStrike Falcon, Splunk Enterprise Security), specific time period (fiscal 2024), event volume (2.3 billion), and incident count (847). With far more than two hard verifiable facts, this is clearly Specificity 4 (Quantified-Verifiable).",
+      "This paragraph is rich in QV-eligible facts: team size (18 professionals), named tools (CrowdStrike Falcon, Splunk Enterprise Security), specific time period tied to cybersecurity facts (fiscal 2024), event volume (2.3 billion), and incident count (847). With multiple QV-eligible facts, this is clearly Specificity 4 (Quantified-Verifiable).",
  },
  {
-    id: "qv-6",
+    id: "spec-6",
-    type: "qv-counting",
+    type: "specificity",
    paragraphText:
      "Our CISO leads the Company's cybersecurity program, which includes risk assessments, vulnerability management, and incident response planning.",
-    question: QV_QUESTION,
+    question: SPECIFICITY_QUESTION,
-    options: QV_OPTIONS,
+    options: SPECIFICITY_OPTIONS,
    correctAnswer: "3",
    explanation:
-      'The CISO title is a cybersecurity-specific role per the codebook\'s IS list, making this at least Firm-Specific. However, there is only one firm-specific fact (the CISO title). The activities listed (risk assessments, vulnerability management, incident response planning) are generic and do not count as verifiable facts. One firm-specific fact = Specificity 3 (Firm-Specific), not QV.',
+      'The CISO title is a cybersecurity-specific role on the codebook\'s IS list, making this Firm-Specific. However, the activities listed (risk assessments, vulnerability management, incident response planning) are generic practices. While "vulnerability management" is domain terminology (Level 2), the CISO title pushes it to Level 3. No QV-eligible facts. Specificity 3 (Firm-Specific).',
  },
  {
-    id: "qv-7",
+    id: "spec-7",
-    type: "qv-counting",
+    type: "specificity",
    paragraphText:
      "We engaged Deloitte to conduct an independent assessment of our cybersecurity program in fiscal 2024. The assessment identified no critical vulnerabilities and resulted in 12 recommendations for improvement, all of which have been addressed or are being remediated.",
-    question: QV_QUESTION,
+    question: SPECIFICITY_QUESTION,
-    options: QV_OPTIONS,
+    options: SPECIFICITY_OPTIONS,
    correctAnswer: "4",
    explanation:
-      "Multiple verifiable facts: named third-party firm (Deloitte), specific time period (fiscal 2024), specific finding count (12 recommendations). Three or more hard verifiable facts easily qualifies for Specificity 4 (Quantified-Verifiable).",
+      "Multiple QV-eligible facts: named third-party firm (Deloitte), specific time period with cybersecurity fact (fiscal 2024), and specific finding count (12 recommendations). Any one of these would be sufficient for Level 4. Specificity 4 (Quantified-Verifiable).",
  },
  {
-    id: "qv-8",
+    id: "spec-8",
-    type: "qv-counting",
+    type: "specificity",
    paragraphText:
-      "Our cybersecurity team conducts regular penetration testing and vulnerability assessments of our information technology infrastructure. We also engage external cybersecurity consultants to periodically evaluate our security posture.",
+      "Our incident response team conducts regular penetration testing and vulnerability assessments of our information technology infrastructure.",
-    question: QV_QUESTION,
+    question: SPECIFICITY_QUESTION,
-    options: QV_OPTIONS,
+    options: SPECIFICITY_OPTIONS,
-    correctAnswer: "3",
+    correctAnswer: "2",
    explanation:
-      'The mention of a "cybersecurity team" is a firm-specific fact (this company has a dedicated team), but there is only one such fact. The "external cybersecurity consultants" are unnamed and therefore do not count per the codebook\'s NOT list. "Regular" and "periodically" are generic cadences. One firm-specific fact = Specificity 3 (Firm-Specific).',
+      '"Penetration testing" and "vulnerability assessments" are cybersecurity domain terminology — these terms originate from the cybersecurity domain and wouldn\'t appear in a generic enterprise risk document. However, no firm-specific facts are present: "incident response team" is generic (NOT on the IS list), "regular" is a generic cadence, and "information technology infrastructure" is general IT language. Domain terms present but nothing firm-specific = Specificity 2 (Domain-Adapted).',
  },
  // ============================================================
@ -386,7 +387,7 @@ export const QUIZ_QUESTIONS: QuizQuestion[] = [
    options: SPAC_OPTIONS,
    correctAnswer: "None/Other",
    explanation:
-      "Despite naming the CEO and CFO as responsible for cybersecurity risks, the company explicitly states it has no formal program and no specific policies or procedures. Per the codebook, CEO and CFO are generic C-suite titles (NOT cybersecurity-specific), and the mention of them is perfunctory. The company has limited operations and no substantive cybersecurity disclosure. This is the SPAC/shell company exception: None/Other.",
+      "Despite naming the CEO and CFO as responsible for cybersecurity risks, the company explicitly states it has no formal program and no specific policies or procedures. Per the codebook, CEO and CFO are generic C-suite titles (NOT cybersecurity-specific), and the mention of them is perfunctory. The company has limited operations and no substantive cybersecurity disclosure. None/Other.",
  },
 ];
@ -398,7 +399,7 @@ export function drawQuizQuestions(count: number): QuizQuestion[] {
  const types = [
    "person-vs-function",
    "materiality-disclaimer",
-    "qv-counting",
+    "specificity",
    "spac-exception",
  ] as const;
--- a/labelapp/lib/sampling.ts
+++ b/labelapp/lib/sampling.ts
@ -1,277 +0,0 @@
 export interface ParagraphWithVotes {
  id: string;
  stage1Category: string | null;
  stage1Specificity: number | null;
  /** Raw category votes from stage1 annotations */
  categoryVotes: string[];
  /** Raw specificity votes from stage1 annotations */
  specificityVotes: number[];
 }
 export interface StratumConfig {
  name: string;
  count: number;
  filter: (p: ParagraphWithVotes) => boolean;
 }
 export interface SamplingConfig {
  total: number;
  strata: StratumConfig[];
 }
 /**
 * Shuffle an array in place using Fisher-Yates.
 */
 function shuffle<T>(arr: T[]): T[] {
  for (let i = arr.length - 1; i > 0; i--) {
    const j = Math.floor(Math.random() * (i + 1));
    [arr[i], arr[j]] = [arr[j], arr[i]];
  }
  return arr;
 }
 /**
 * Check if a paragraph's annotations have a split between two specific categories.
 * A "split" means at least one vote for each of the two categories.
 */
 function hasCategorySplit(
  p: ParagraphWithVotes,
  catA: string,
  catB: string,
 ): boolean {
  return (
    p.categoryVotes.includes(catA) && p.categoryVotes.includes(catB)
  );
 }
 /**
 * Check if a paragraph's specificity votes span between two specific values.
 */
 function hasSpecificitySplit(
  p: ParagraphWithVotes,
  specA: number,
  specB: number,
 ): boolean {
  return (
    p.specificityVotes.includes(specA) &&
    p.specificityVotes.includes(specB)
  );
 }
 /**
 * Proportional stratified random sampling from category x specificity cells.
 * Fills the remaining `count` slots proportionally based on cell sizes.
 */
 function proportionalSample(
  eligible: ParagraphWithVotes[],
  count: number,
 ): string[] {
  // Group by category x specificity
  const cells = new Map<string, ParagraphWithVotes[]>();
  for (const p of eligible) {
    const key = `${p.stage1Category ?? "unknown"}|${p.stage1Specificity ?? 0}`;
    const cell = cells.get(key);
    if (cell) {
      cell.push(p);
    } else {
      cells.set(key, [p]);
    }
  }
  const total = eligible.length;
  const selected: string[] = [];
  // First pass: allocate floor proportions
  const cellAllocations: { key: string; allocated: number; remainder: number }[] = [];
  let allocated = 0;
  for (const [key, members] of cells) {
    const exact = (members.length / total) * count;
    const floor = Math.floor(exact);
    cellAllocations.push({ key, allocated: floor, remainder: exact - floor });
    allocated += floor;
  }
  // Second pass: distribute remainder by largest remainders
  let remaining = count - allocated;
  cellAllocations.sort((a, b) => b.remainder - a.remainder);
  for (const cell of cellAllocations) {
    if (remaining <= 0) break;
    cell.allocated++;
    remaining--;
  }
  // Sample from each cell
  for (const { key, allocated: cellCount } of cellAllocations) {
    const members = cells.get(key)!;
    shuffle(members);
    const take = Math.min(cellCount, members.length);
    for (let i = 0; i < take; i++) {
      selected.push(members[i].id);
    }
  }
  return selected;
 }
 /**
 * Build the default sampling config for 1,200 paragraphs.
 */
 export function defaultSamplingConfig(): SamplingConfig {
  return {
    total: 1200,
    strata: [
      {
        name: "Mgmt↔RMP split votes",
        count: 120,
        filter: (p) =>
          hasCategorySplit(p, "Management Role", "Risk Management Process"),
      },
      {
        name: "None/Other↔Strategy splits",
        count: 80,
        filter: (p) =>
          hasCategorySplit(p, "None/Other", "Strategy Integration"),
      },
      {
        name: "Spec [3,4] splits",
        count: 80,
        filter: (p) => hasSpecificitySplit(p, 3, 4),
      },
      {
        name: "Board↔Mgmt splits",
        count: 80,
        filter: (p) =>
          hasCategorySplit(p, "Board Governance", "Management Role"),
      },
    ],
  };
 }
 /**
 * Run stratified sampling. Returns selected paragraph IDs.
 *
 * Process:
 * 1. For each stratum, filter eligible paragraphs, randomly select `count`
 * 2. Already-selected paragraphs are excluded from later strata
 * 3. "Rare category guarantee": ensure >= 15 per category, extra for Incident Disclosure
 * 4. Final fill: proportional stratified random from category x specificity cells
 */
 export function stratifiedSample(
  paragraphs: ParagraphWithVotes[],
  config: SamplingConfig,
 ): string[] {
  const selected = new Set<string>();
  // Phase 1: Named strata (split-vote strata)
  for (const stratum of config.strata) {
    const eligible = paragraphs.filter(
      (p) => !selected.has(p.id) && stratum.filter(p),
    );
    shuffle(eligible);
    const take = Math.min(stratum.count, eligible.length);
    for (let i = 0; i < take; i++) {
      selected.add(eligible[i].id);
    }
    console.log(
      `  Stratum "${stratum.name}": wanted ${stratum.count}, eligible ${eligible.length}, selected ${take}`,
    );
  }
  // Phase 2: Rare category guarantee (120 slots, >= 15 per category)
  const RARE_GUARANTEE_TOTAL = 120;
  const MIN_PER_CATEGORY = 15;
  const rareStartSize = selected.size;
  // Find all categories
  const categoryCounts = new Map<string, ParagraphWithVotes[]>();
  for (const p of paragraphs) {
    if (selected.has(p.id) || !p.stage1Category) continue;
    const cat = p.stage1Category;
    const bucket = categoryCounts.get(cat);
    if (bucket) {
      bucket.push(p);
    } else {
      categoryCounts.set(cat, [p]);
    }
  }
  // Count how many of each category are already selected
  const selectedByCat = new Map<string, number>();
  for (const id of selected) {
    const p = paragraphs.find((pp) => pp.id === id);
    if (p?.stage1Category) {
      selectedByCat.set(
        p.stage1Category,
        (selectedByCat.get(p.stage1Category) ?? 0) + 1,
      );
    }
  }
  // Top up categories that have fewer than MIN_PER_CATEGORY
  let rareAdded = 0;
  const allCategories = new Set<string>();
  for (const p of paragraphs) {
    if (p.stage1Category) allCategories.add(p.stage1Category);
  }
  // Sort categories by current count ascending so rarest get filled first
  const sortedCats = [...allCategories].sort(
    (a, b) =>
      (selectedByCat.get(a) ?? 0) - (selectedByCat.get(b) ?? 0),
  );
  for (const cat of sortedCats) {
    if (rareAdded >= RARE_GUARANTEE_TOTAL) break;
    const current = selectedByCat.get(cat) ?? 0;
    if (current >= MIN_PER_CATEGORY) continue;
    const need = MIN_PER_CATEGORY - current;
    const eligible = (categoryCounts.get(cat) ?? []).filter(
      (p) => !selected.has(p.id),
    );
    shuffle(eligible);
    const take = Math.min(need, eligible.length, RARE_GUARANTEE_TOTAL - rareAdded);
    for (let i = 0; i < take; i++) {
      selected.add(eligible[i].id);
      rareAdded++;
    }
  }
  // Give extra slots to "Incident Disclosure" if budget remains
  if (rareAdded < RARE_GUARANTEE_TOTAL) {
    const incidentEligible = (
      categoryCounts.get("Incident Disclosure") ?? []
    ).filter((p) => !selected.has(p.id));
    shuffle(incidentEligible);
    const take = Math.min(
      RARE_GUARANTEE_TOTAL - rareAdded,
      incidentEligible.length,
    );
    for (let i = 0; i < take; i++) {
      selected.add(incidentEligible[i].id);
      rareAdded++;
    }
  }
  console.log(
    `  Rare category guarantee: added ${selected.size - rareStartSize} (budget ${RARE_GUARANTEE_TOTAL})`,
  );
  // Phase 3: Proportional stratified random fill
  const remaining = config.total - selected.size;
  if (remaining > 0) {
    const eligible = paragraphs.filter(
      (p) => !selected.has(p.id) && p.stage1Category != null,
    );
    const filled = proportionalSample(eligible, remaining);
    for (const id of filled) {
      selected.add(id);
    }
    console.log(
      `  Proportional fill: added ${filled.length} (target ${remaining})`,
    );
  }
  console.log(`  Total selected: ${selected.size}`);
  return [...selected];
 }
--- a/labelapp/lib/warmup-paragraphs.ts
+++ b/labelapp/lib/warmup-paragraphs.ts
@ -13,7 +13,7 @@ export const WARMUP_PARAGRAPHS: WarmupParagraph[] = [
    goldCategory: "Board Governance",
    goldSpecificity: 3,
    explanation:
-      "Board Governance because the Board of Directors and Audit Committee are the grammatical subjects performing the primary actions (overseeing, delegating, receiving reports). Specificity 3 (Firm-Specific) because the paragraph describes a specific delegation structure (to the Audit Committee) with a defined briefing cadence. Note: while 'Audit Committee' alone is generic (per the NOT list), the delegation of cybersecurity oversight to it and the described briefing structure constitute firm-specific organizational choices.",
+      "Board Governance because the Board of Directors and Audit Committee are the grammatical subjects performing the primary actions (overseeing, delegating, receiving reports). Specificity 3 (Firm-Specific) because the paragraph describes a specific delegation structure (to the Audit Committee) with a defined briefing cadence. While 'Audit Committee' alone is generic (NOT list), the delegation of cybersecurity oversight to it is a firm-specific organizational choice. No QV-eligible facts present (no specific numbers, dates, named entities, or tools).",
  },
  {
    id: "warmup-2",
@ -21,7 +21,7 @@ export const WARMUP_PARAGRAPHS: WarmupParagraph[] = [
    goldCategory: "Incident Disclosure",
    goldSpecificity: 4,
    explanation:
-      "Incident Disclosure because the paragraph describes what happened in a cybersecurity incident: the timeline, attack vector, response actions, and scope. Specificity 4 (Quantified-Verifiable) because it contains multiple hard verifiable facts: a specific date (January 15, 2024), a specific containment time (four hours), a named forensic firm (Mandiant), and a quantified impact (12,000 customer records). Four verifiable facts far exceeds the two-fact threshold for QV.",
+      "Incident Disclosure because the paragraph describes what happened in a cybersecurity incident: the timeline, attack vector, response actions, and scope. Specificity 4 (Quantified-Verifiable) because it contains QV-eligible facts: a specific date (January 15, 2024), a specific containment time (four hours), a named forensic firm (Mandiant), and a quantified impact (12,000 customer records). Any one of these would be sufficient for Level 4.",
  },
  {
    id: "warmup-3",
@ -29,7 +29,7 @@ export const WARMUP_PARAGRAPHS: WarmupParagraph[] = [
    goldCategory: "Risk Management Process",
    goldSpecificity: 1,
    explanation:
-      "Risk Management Process because the paragraph describes the company's internal cybersecurity program and its purpose (identify, assess, manage risks). Specificity 1 (Generic Boilerplate) because this language could appear in any company's filing unchanged — it names no specific frameworks (just 'recognized industry frameworks'), no named tools, no named personnel, no dates, no quantities. Every phrase is generic boilerplate.",
+      "Risk Management Process because the paragraph describes the company's internal cybersecurity program and its purpose (identify, assess, manage risks). Specificity 1 (Generic Boilerplate) because this language could appear in any company's filing unchanged — 'identify, assess, and manage' is generic ERM language, 'recognized industry frameworks' names no specific standard, and 'best practices' is boilerplate. No cybersecurity domain terminology, no firm-specific facts, no QV-eligible facts.",
  },
  {
    id: "warmup-4",
@ -37,7 +37,7 @@ export const WARMUP_PARAGRAPHS: WarmupParagraph[] = [
    goldCategory: "Strategy Integration",
    goldSpecificity: 4,
    explanation:
-      "Strategy Integration because the paragraph discusses financial resource allocation (budget increase, insurance) and strategic judgment about cybersecurity investment — business/financial consequences of cyber risk. Specificity 4 (Quantified-Verifiable) because it contains multiple hard verifiable facts: budget percentage (28%), dollar amount ($38M), revenue percentage (0.6%), insurance coverage ($75M), and time period (fiscal 2024). Well above the two-fact QV threshold.",
+      "Strategy Integration because the paragraph discusses financial resource allocation (budget increase, insurance) and strategic judgment about cybersecurity investment — business/financial consequences of cyber risk. Specificity 4 (Quantified-Verifiable) because it contains multiple QV-eligible facts: budget percentage (28%), dollar amount ($38M), revenue percentage (0.6%), insurance coverage ($75M), and time period with cybersecurity fact (fiscal 2024). Any one would suffice for Level 4.",
  },
  {
    id: "warmup-5",
@ -45,6 +45,6 @@ export const WARMUP_PARAGRAPHS: WarmupParagraph[] = [
    goldCategory: "Third-Party Risk",
    goldSpecificity: 2,
    explanation:
-      "Third-Party Risk because the central topic is oversight of external parties' cybersecurity: vendor requirements, security assessments, and ongoing monitoring of third-party relationships. Specificity 2 (Sector-Adapted) because it names a recognized standard (SOC 2 Type II) but contains no firm-specific details — no specific vendor counts, no named vendors, no dollar amounts. The assessment cadences ('initial' and 'annual') are generic. The SOC 2 mention elevates it above Specificity 1 but there are no firm-specific facts to reach Specificity 3.",
+      "Third-Party Risk because the central topic is oversight of external parties' cybersecurity: vendor requirements, security assessments, and ongoing monitoring of third-party relationships. Specificity 2 (Domain-Adapted) because it names a recognized standard (SOC 2 Type II) — cybersecurity domain terminology that wouldn't appear in a generic enterprise risk document. However, no firm-specific facts are present: no specific vendor counts, no named vendors, no dollar amounts. The assessment cadences ('initial' and 'annual') are generic. Domain terminology present but nothing firm-specific = Level 2.",
  },
 ];
--- a/labelapp/package.json
+++ b/labelapp/package.json
@ -13,7 +13,6 @@
    "db:push": "drizzle-kit push",
    "db:studio": "drizzle-kit studio",
    "seed": "bun run scripts/seed.ts",
    "sample": "bun run scripts/sample.ts",
    "assign": "bun run scripts/assign.ts",
    "export": "bun run scripts/export.ts",
    "dump": "bun run scripts/dump-all.ts",
--- a/labelapp/scripts/assign.ts
+++ b/labelapp/scripts/assign.ts
@ -1,29 +1,26 @@
 process.env.DATABASE_URL ??=
  "postgresql://sec_cybert:sec_cybert@localhost:5432/sec_cybert";
 import { readFile } from "node:fs/promises";
 import { ne } from "drizzle-orm";
 import { db } from "../db";
 import * as schema from "../db/schema";
 import { generateAssignments, printAssignmentStats } from "../lib/assignment";
 const SAMPLED_IDS_PATH =
  process.env.SAMPLED_IDS_PATH ??
  "/home/joey/Documents/sec-cyBERT/labelapp/.sampled-ids.json";
 async function main() {
-  // 1. Read sampled paragraph IDs
+  // 1. Read all paragraph IDs from DB (the holdout IS the full set)
-  console.log("Reading sampled paragraph IDs...");
+  console.log("Loading paragraph IDs from DB...");
-  const raw = await readFile(SAMPLED_IDS_PATH, "utf-8");
+  const rows = await db
-  const paragraphIds: string[] = JSON.parse(raw);
+    .select({ id: schema.paragraphs.id })
-  console.log(`  ${paragraphIds.length} paragraph IDs loaded`);
+    .from(schema.paragraphs);
  const paragraphIds = rows.map((r) => r.id);
  console.log(`  ${paragraphIds.length} paragraphs`);
-  // 2. Read annotator IDs from DB (exclude admin)
+  // 2. Read annotator IDs from DB (exclude joey — admin)
  console.log("Loading annotators...");
  const annotators = await db
    .select({ id: schema.annotators.id })
    .from(schema.annotators)
-    .where(ne(schema.annotators.id, "admin"));
+    .where(ne(schema.annotators.id, "joey"));
  const annotatorIds = annotators.map((a) => a.id).sort();
  console.log(`  ${annotatorIds.length} annotators: ${annotatorIds.join(", ")}`);
--- a/labelapp/scripts/dump-all.ts
+++ b/labelapp/scripts/dump-all.ts
@ -53,12 +53,12 @@ async function main() {
      db.select().from(schema.adjudications),
    ]);
-  const nonAdminAnnotators = allAnnotators.filter((a) => a.id !== "admin");
+  const nonAdminAnnotators = allAnnotators.filter((a) => a.id !== "joey");
  const annotatorIds = nonAdminAnnotators.map((a) => a.id).sort();
  const annotatorNames = new Map(allAnnotators.map((a) => [a.id, a.displayName]));
  // Filter to non-admin labels only
-  const labels = allLabels.filter((l) => l.annotatorId !== "admin");
+  const labels = allLabels.filter((l) => l.annotatorId !== "joey");
  console.log(`  ${labels.length} human labels (non-admin)`);
  console.log(`  ${allParagraphs.length} paragraphs`);
--- a/labelapp/scripts/sample.ts
+++ b/labelapp/scripts/sample.ts
@ -1,89 +0,0 @@
 process.env.DATABASE_URL ??=
  "postgresql://sec_cybert:sec_cybert@localhost:5432/sec_cybert";
 import { readFile, writeFile } from "node:fs/promises";
 import { db } from "../db";
 import * as schema from "../db/schema";
 import {
  type ParagraphWithVotes,
  defaultSamplingConfig,
  stratifiedSample,
 } from "../lib/sampling";
 async function readJsonl<T = unknown>(path: string): Promise<T[]> {
  const text = await readFile(path, "utf-8");
  return text
    .split("\n")
    .filter((l) => l.trim())
    .map((l) => JSON.parse(l) as T);
 }
 interface AnnotationRow {
  paragraphId: string;
  label: {
    content_category: string;
    specificity_level: number;
  };
 }
 const OUTPUT_PATH =
  process.env.SAMPLED_IDS_PATH ??
  "/home/joey/Documents/sec-cyBERT/labelapp/.sampled-ids.json";
 const ANNOTATIONS_PATH =
  process.env.SEED_ANNOTATIONS_PATH ??
  "/home/joey/Documents/sec-cyBERT/data/annotations/stage1.jsonl";
 async function main() {
  // 1. Load all paragraphs from DB
  console.log("Loading paragraphs from DB...");
  const dbParagraphs = await db.select().from(schema.paragraphs);
  console.log(`  ${dbParagraphs.length} paragraphs loaded`);
  // 2. Load raw annotations for split-vote detection
  console.log("Loading annotations for vote analysis...");
  const annotations = await readJsonl<AnnotationRow>(ANNOTATIONS_PATH);
  console.log(`  ${annotations.length} annotations loaded`);
  // Group votes by paragraph
  const votesByParagraph = new Map<
    string,
    { categories: string[]; specificities: number[] }
  >();
  for (const a of annotations) {
    let votes = votesByParagraph.get(a.paragraphId);
    if (!votes) {
      votes = { categories: [], specificities: [] };
      votesByParagraph.set(a.paragraphId, votes);
    }
    votes.categories.push(a.label.content_category);
    votes.specificities.push(a.label.specificity_level);
  }
  // 3. Build ParagraphWithVotes array
  const paragraphsWithVotes: ParagraphWithVotes[] = dbParagraphs.map((p) => {
    const votes = votesByParagraph.get(p.id);
    return {
      id: p.id,
      stage1Category: p.stage1Category,
      stage1Specificity: p.stage1Specificity,
      categoryVotes: votes?.categories ?? [],
      specificityVotes: votes?.specificities ?? [],
    };
  });
  // 4. Run stratified sampling
  console.log("Running stratified sampling...");
  const config = defaultSamplingConfig();
  const selectedIds = stratifiedSample(paragraphsWithVotes, config);
  // 5. Write output
  await writeFile(OUTPUT_PATH, JSON.stringify(selectedIds, null, 2));
  console.log(`\nWrote ${selectedIds.length} sampled IDs to ${OUTPUT_PATH}`);
  process.exit(0);
 }
 main().catch((err) => {
  console.error("Sampling failed:", err);
  process.exit(1);
 });
--- a/labelapp/scripts/seed.ts
+++ b/labelapp/scripts/seed.ts
@ -13,6 +13,11 @@ async function readJsonl<T = unknown>(path: string): Promise<T[]> {
    .map((l) => JSON.parse(l) as T);
 }
 async function readJson<T = unknown>(path: string): Promise<T> {
  const text = await readFile(path, "utf-8");
  return JSON.parse(text) as T;
 }
 interface ParagraphRow {
  id: string;
  text: string;
@ -84,7 +89,6 @@ function computeConsensus(annotations: AnnotationRow[]): {
  const allAgreeSpecificity = maxSpecCount === total;
  const method =
    allAgreeCategory && allAgreeSpecificity ? "unanimous" : "majority";
  // Confidence = fraction of annotators that agreed with majority on both
  const agreedOnBoth = annotations.filter(
    (a) =>
      a.label.content_category === majorityCategory &&
@ -101,20 +105,30 @@ function computeConsensus(annotations: AnnotationRow[]): {
 }
 async function main() {
  const ROOT = "/home/joey/Documents/sec-cyBERT";
  const PARAGRAPHS_PATH =
    process.env.SEED_PARAGRAPHS_PATH ??
-    "/home/joey/Documents/sec-cyBERT/data/paragraphs/paragraphs-clean.jsonl";
+    `${ROOT}/data/paragraphs/paragraphs-clean.jsonl`;
  const ANNOTATIONS_PATH =
    process.env.SEED_ANNOTATIONS_PATH ??
-    "/home/joey/Documents/sec-cyBERT/data/annotations/stage1.jsonl";
+    `${ROOT}/data/annotations/stage1.jsonl`;
  const HOLDOUT_IDS_PATH =
    process.env.SEED_HOLDOUT_IDS_PATH ??
    `${ROOT}/data/gold/v2-holdout-ids.json`;
-  // 1. Read annotations and compute consensus per paragraph
+  // 1. Load holdout IDs (the 1,200 v2 paragraphs)
  console.log("Loading v2 holdout IDs...");
  const holdoutIds = new Set(await readJson<string[]>(HOLDOUT_IDS_PATH));
  console.log(`  ${holdoutIds.size} holdout IDs`);
  // 2. Read annotations and compute consensus (only for holdout paragraphs)
  console.log("Reading annotations...");
  const annotations = await readJsonl<AnnotationRow>(ANNOTATIONS_PATH);
-  console.log(`  ${annotations.length} annotations loaded`);
+  console.log(`  ${annotations.length} total annotations loaded`);
  const annotationsByParagraph = new Map<string, AnnotationRow[]>();
  for (const a of annotations) {
    if (!holdoutIds.has(a.paragraphId)) continue;
    const group = annotationsByParagraph.get(a.paragraphId);
    if (group) {
      group.push(a);
@ -123,7 +137,7 @@ async function main() {
    }
  }
  console.log(
-    `  ${annotationsByParagraph.size} paragraphs have annotations`,
+    `  ${annotationsByParagraph.size} holdout paragraphs have annotations`,
  );
  const consensusMap = new Map<
@ -134,12 +148,21 @@ async function main() {
    consensusMap.set(pid, computeConsensus(anns));
  }
-  // 2. Read paragraphs and insert in batches
+  // 3. Read paragraphs, filter to holdout only, and insert
  console.log("Reading paragraphs...");
-  const paragraphs = await readJsonl<ParagraphRow>(PARAGRAPHS_PATH);
+  const allParagraphs = await readJsonl<ParagraphRow>(PARAGRAPHS_PATH);
-  console.log(`  ${paragraphs.length} paragraphs loaded`);
+  const paragraphs = allParagraphs.filter((p) => holdoutIds.has(p.id));
  console.log(
    `  ${allParagraphs.length} total → ${paragraphs.length} holdout paragraphs`,
  );
-  const BATCH_SIZE = 1000;
+  if (paragraphs.length !== holdoutIds.size) {
    console.warn(
      `  WARNING: expected ${holdoutIds.size} holdout paragraphs but found ${paragraphs.length} in paragraphs file`,
    );
  }
  const BATCH_SIZE = 500;
  for (let i = 0; i < paragraphs.length; i += BATCH_SIZE) {
    const batch = paragraphs.slice(i, i + BATCH_SIZE);
    const rows = batch.map((p) => {
@ -173,7 +196,7 @@ async function main() {
    console.log(`  Inserted ${progress}/${paragraphs.length} paragraphs`);
  }
-  // 3. Create annotator accounts
+  // 4. Create annotator accounts (joey is admin, no separate admin account)
  console.log("Creating annotator accounts...");
  const annotatorAccounts = [
    { id: "aaryan", displayName: "Aaryan", password: "sec-cybert" },
@ -182,7 +205,6 @@ async function main() {
    { id: "xander", displayName: "Xander", password: "sec-cybert" },
    { id: "elisabeth", displayName: "Elisabeth", password: "sec-cybert" },
    { id: "joey", displayName: "Joey", password: "sec-cybert" },
    { id: "admin", displayName: "Admin", password: "sec-cybert" },
  ];
  await db
--- a/package.json
+++ b/package.json
@ -13,7 +13,6 @@
    "la:db:migrate": "bun run --filter labelapp db:migrate",
    "la:db:studio": "bun run --filter labelapp db:studio",
    "la:seed": "bun run --filter labelapp seed",
    "la:sample": "bun run --filter labelapp sample",
    "la:assign": "bun run --filter labelapp assign",
    "la:export": "bun run --filter labelapp export",
    "la:dump": "bun run --filter labelapp dump",
--- a/ts/src/cli.ts
+++ b/ts/src/cli.ts
@ -1,7 +1,7 @@
 import { readJsonl } from "./lib/jsonl.ts";
 import { Paragraph } from "@sec-cybert/schemas/paragraph.ts";
 import { Annotation } from "@sec-cybert/schemas/annotation.ts";
-import { STAGE1_MODELS, BENCHMARK_MODELS } from "./lib/openrouter.ts";
+import { STAGE1_MODEL, STAGE1_RUNS, BENCHMARK_MODELS } from "./lib/openrouter.ts";
 import { runBatch } from "./label/batch.ts";
 import { runGoldenBatch } from "./label/golden.ts";
 import { computeConsensus } from "./label/consensus.ts";
@ -22,9 +22,9 @@ Commands:
  extract:reparse     Re-parse cached 10-K HTML files with current parser (no network)
  extract:reparse-8k  Re-parse cached 8-K HTML files with current parser (no network)
  extract:merge       Merge 10-K + 8-K, remove truncated filings, dedup → training.jsonl
-  label:annotate   --model <id> [--limit N] [--concurrency N]
+  label:annotate   [--model <id>] [--run N] [--output-dir DIR] [--paragraphs PATH] [--limit N] [--concurrency N]
-  label:annotate-all [--limit N] [--concurrency N]
+  label:annotate-all [--output-dir DIR] [--paragraphs PATH] [--limit N] [--concurrency N]  (Grok ×3 self-consistency)
-  label:consensus
+  label:consensus  [--input-dir DIR]
  label:judge [--concurrency N]
  label:golden [--paragraphs <path>] [--limit N] [--delay N] [--concurrency N]  (Opus via Agent SDK)
  label:bench-holdout --model <id> [--concurrency N] [--limit N]   (benchmark model on holdout)
@ -66,38 +66,55 @@ async function loadParagraphs(): Promise<Paragraph[]> {
 }
 async function cmdAnnotate(): Promise<void> {
-  const modelId = flag("model");
+  const modelId = flag("model") ?? STAGE1_MODEL;
-  if (!modelId) {
+  const paragraphsPath = flag("paragraphs") ?? `${DATA}/paragraphs/paragraphs-clean.patched.jsonl`;
-    console.error("--model is required");
+  const { records: paragraphs, skipped } = await readJsonl(paragraphsPath, Paragraph);
  if (skipped > 0) process.stderr.write(`  ⚠ Skipped ${skipped} invalid paragraph lines\n`);
  if (paragraphs.length === 0) {
    process.stderr.write(`  ✖ No paragraphs found at ${paragraphsPath}\n`);
    process.exit(1);
  }
-  const paragraphs = await loadParagraphs();
+  process.stderr.write(`  Loaded ${paragraphs.length} paragraphs from ${paragraphsPath}\n`);
  const modelShort = modelId.split("/")[1]!;
  const outputDir = flag("output-dir") ?? "v2-stage1";
  const runSuffix = flag("run") ? `.run${flag("run")}` : "";
  await runBatch(paragraphs, {
    modelId,
    stage: "stage1",
-    outputPath: `${DATA}/annotations/stage1/${modelShort}.jsonl`,
+    outputPath: `${DATA}/annotations/${outputDir}/${modelShort}${runSuffix}.jsonl`,
-    errorsPath: `${DATA}/annotations/stage1/${modelShort}-errors.jsonl`,
+    errorsPath: `${DATA}/annotations/${outputDir}/${modelShort}${runSuffix}-errors.jsonl`,
    sessionsPath: SESSIONS_PATH,
-    concurrency: flagInt("concurrency", 12),
+    concurrency: flagInt("concurrency", 60),
    limit: flag("limit") !== undefined ? flagInt("limit", 50) : undefined,
  });
 }
 async function cmdAnnotateAll(): Promise<void> {
-  const paragraphs = await loadParagraphs();
+  const paragraphsPath = flag("paragraphs") ?? `${DATA}/paragraphs/paragraphs-clean.patched.jsonl`;
-  const concurrency = flagInt("concurrency", 12);
+  const { records: allParagraphs, skipped } = await readJsonl(paragraphsPath, Paragraph);
-  const limit = flag("limit") !== undefined ? flagInt("limit", 50) : undefined;
+  if (skipped > 0) process.stderr.write(`  ⚠ Skipped ${skipped} invalid paragraph lines\n`);
  if (allParagraphs.length === 0) {
    process.stderr.write(`  ✖ No paragraphs found at ${paragraphsPath}\n`);
    process.exit(1);
  }
  process.stderr.write(`  Loaded ${allParagraphs.length} paragraphs from ${paragraphsPath}\n`);
-  for (const modelId of STAGE1_MODELS) {
+  const concurrency = flagInt("concurrency", 60);
-    const modelShort = modelId.split("/")[1]!;
+  const limit = flag("limit") !== undefined ? flagInt("limit", 50) : undefined;
-    process.stderr.write(`\n  ═══ ${modelId} ═══\n`);
+  const outputDir = flag("output-dir") ?? "v2-stage1";
-    await runBatch(paragraphs, {
+  const modelShort = STAGE1_MODEL.split("/")[1]!;
-      modelId,
+
  process.stderr.write(`  Stage 1: ${STAGE1_MODEL} ×${STAGE1_RUNS} self-consistency → ${outputDir}/\n`);
  for (let run = 1; run <= STAGE1_RUNS; run++) {
    process.stderr.write(`\n  ═══ ${STAGE1_MODEL} run ${run}/${STAGE1_RUNS} ═══\n`);
    await runBatch(allParagraphs, {
      modelId: STAGE1_MODEL,
      stage: "stage1",
-      outputPath: `${DATA}/annotations/stage1/${modelShort}.jsonl`,
+      outputPath: `${DATA}/annotations/${outputDir}/${modelShort}.run${run}.jsonl`,
-      errorsPath: `${DATA}/annotations/stage1/${modelShort}-errors.jsonl`,
+      errorsPath: `${DATA}/annotations/${outputDir}/${modelShort}.run${run}-errors.jsonl`,
      sessionsPath: SESSIONS_PATH,
      concurrency,
      limit,
@ -106,14 +123,15 @@ async function cmdAnnotateAll(): Promise<void> {
 }
 async function cmdConsensus(): Promise<void> {
-  // Load all Stage 1 annotations
+  // Load all Stage 1 annotations (3 self-consistency runs)
  const allAnnotations: Map<string, Annotation[]> = new Map();
  const inputDir = flag("input-dir") ?? "v2-stage1";
  const modelShort = STAGE1_MODEL.split("/")[1]!;
-  for (const modelId of STAGE1_MODELS) {
+  for (let run = 1; run <= STAGE1_RUNS; run++) {
-    const modelShort = modelId.split("/")[1]!;
+    const path = `${DATA}/annotations/${inputDir}/${modelShort}.run${run}.jsonl`;
    const path = `${DATA}/annotations/stage1/${modelShort}.jsonl`;
    const { records } = await readJsonl(path, Annotation);
-    process.stderr.write(`  Loaded ${records.length} annotations from ${modelShort}\n`);
+    process.stderr.write(`  Loaded ${records.length} annotations from run ${run}\n`);
    for (const ann of records) {
      const existing = allAnnotations.get(ann.paragraphId) ?? [];
      existing.push(ann);
@ -151,12 +169,14 @@ async function cmdJudge(): Promise<void> {
  const consensusPath = `${DATA}/annotations/consensus.jsonl`;
  const { records: rawConsensus } = await readJsonlRaw(consensusPath);
-  // Load all stage 1 annotations for lookup
+  // Load all stage 1 annotations for lookup (3 self-consistency runs)
  const stage1Map: Map<string, Annotation[]> = new Map();
-  for (const modelId of STAGE1_MODELS) {
+  const judgeInputDir = flag("input-dir") ?? "v2-stage1";
-    const modelShort = modelId.split("/")[1]!;
+  const judgeModelShort = STAGE1_MODEL.split("/")[1]!;
  for (let run = 1; run <= STAGE1_RUNS; run++) {
    const { records } = await readJsonl(
-      `${DATA}/annotations/stage1/${modelShort}.jsonl`,
+      `${DATA}/annotations/${judgeInputDir}/${judgeModelShort}.run${run}.jsonl`,
      Annotation,
    );
    for (const ann of records) {
@ -472,17 +492,22 @@ async function cmdCost(): Promise<void> {
  const modelCosts: Record<string, { cost: number; count: number }> = {};
  const stageCosts: Record<string, { cost: number; count: number }> = {};
-  // Stage 1
+  // Stage 1 (Grok ×3 self-consistency runs)
-  for (const modelId of STAGE1_MODELS) {
+  const costModelShort = STAGE1_MODEL.split("/")[1]!;
-    const modelShort = modelId.split("/")[1]!;
+  for (let run = 1; run <= STAGE1_RUNS; run++) {
-    const path = `${DATA}/annotations/stage1/${modelShort}.jsonl`;
+    const path = `${DATA}/annotations/v2-stage1/${costModelShort}.run${run}.jsonl`;
-    const { records } = await readJsonl(path, Annotation);
+    try {
-    const cost = records.reduce((sum, a) => sum + a.provenance.costUsd, 0);
+      const { records } = await readJsonl(path, Annotation);
-    modelCosts[modelId] = { cost, count: records.length };
+      const cost = records.reduce((sum, a) => sum + a.provenance.costUsd, 0);
-    const stage = stageCosts["stage1"] ?? { cost: 0, count: 0 };
+      const key = `${STAGE1_MODEL} (run ${run})`;
-    stage.cost += cost;
+      modelCosts[key] = { cost, count: records.length };
-    stage.count += records.length;
+      const stage = stageCosts["stage1"] ?? { cost: 0, count: 0 };
-    stageCosts["stage1"] = stage;
+      stage.cost += cost;
      stage.count += records.length;
      stageCosts["stage1"] = stage;
    } catch {
      // Run file may not exist yet
    }
  }
  // Stage 2
--- a/ts/src/lib/openrouter.ts
+++ b/ts/src/lib/openrouter.ts
@ -3,19 +3,18 @@ import { createOpenRouter } from "@openrouter/ai-sdk-provider";
 /** Singleton OpenRouter client. Uses OPENROUTER_API_KEY from env. */
 export const openrouter = createOpenRouter();
-/** Stage 1 annotators — cheap reasoning models, low effort. */
+/** Stage 1 annotator — Grok 4.1 Fast ×3 self-consistency (Wang et al. 2022). */
-export const STAGE1_MODELS = [
+export const STAGE1_MODEL = "x-ai/grok-4.1-fast" as const;
-  "google/gemini-3.1-flash-lite-preview",
+export const STAGE1_RUNS = 3;
  "xiaomi/mimo-v2-flash",
  "x-ai/grok-4.1-fast",
 ] as const;
 /** Stage 2 judge — medium reasoning. */
 export const STAGE2_JUDGE = "anthropic/claude-sonnet-4.6" as const;
 /** Full benchmark panel — 9 models from 8 providers. */
 export const BENCHMARK_MODELS = [
-  ...STAGE1_MODELS,
+  "google/gemini-3.1-flash-lite-preview",
  "xiaomi/mimo-v2-flash",
  "x-ai/grok-4.1-fast",
  "openai/gpt-5.4",
  "moonshotai/kimi-k2.5",
  "google/gemini-3.1-pro-preview",
@ -24,7 +23,7 @@ export const BENCHMARK_MODELS = [
  "xiaomi/mimo-v2-pro:exacto",
 ] as const;
-export type Stage1Model = (typeof STAGE1_MODELS)[number];
+export type Stage1Model = typeof STAGE1_MODEL;
 export type BenchmarkModel = (typeof BENCHMARK_MODELS)[number];
 /** Extract the provider name from an OpenRouter model ID. */