SEC-cyBERT/scripts/examine-v35-errors.py
2026-04-03 14:43:53 -04:00

765 lines
30 KiB
Python

"""Examine specific paragraphs where v3.5 performed WORSE than v3.0 against human labels.
Focus on BG↔MR and MR↔RMP confusion axes.
"""
import json
import textwrap
from collections import Counter, defaultdict
from pathlib import Path
# ── Paths ──────────────────────────────────────────────────────────────────────
ROOT = Path(__file__).resolve().parent.parent
V30_GOLDEN = ROOT / "data/annotations/golden/opus.jsonl"
V35_GOLDEN = ROOT / "data/annotations/golden-v35/opus.jsonl"
V30_BENCH = ROOT / "data/annotations/bench-holdout"
V35_BENCH = ROOT / "data/annotations/bench-holdout-v35"
HUMAN_LABELS = ROOT / "data/gold/human-labels-raw.jsonl"
HOLDOUT_META = ROOT / "data/gold/holdout-rerun-v35.jsonl"
PARAGRAPHS = ROOT / "data/gold/paragraphs-holdout.jsonl"
MODEL_FILES = [
"opus.jsonl",
"gpt-5.4.jsonl",
"gemini-3.1-pro-preview.jsonl",
"glm-5:exacto.jsonl",
"kimi-k2.5.jsonl",
"mimo-v2-pro:exacto.jsonl",
"minimax-m2.7:exacto.jsonl",
]
MODEL_NAMES = [
"Opus",
"GPT-5.4",
"Gemini",
"GLM-5",
"Kimi",
"Mimo",
"MiniMax",
]
# Models to EXCLUDE from majority calculation
EXCLUDED_FROM_MAJORITY = {"MiniMax"}
CAT_ABBREV = {
"BG": "Board Governance",
"MR": "Management Role",
"RMP": "Risk Management Process",
"SI": "Strategy Integration",
"NO": "None/Other",
"ID": "Incident Disclosure",
"TPR": "Third-Party Risk",
}
ABBREV_CAT = {v: k for k, v in CAT_ABBREV.items()}
def abbrev(cat: str) -> str:
return ABBREV_CAT.get(cat, cat)
def load_jsonl(path: Path) -> list[dict]:
with open(path) as f:
return [json.loads(line) for line in f if line.strip()]
def load_annotations(base_dir: Path, filename: str) -> dict[str, str]:
"""Load paragraphId → content_category mapping."""
path = base_dir / filename
records = load_jsonl(path)
return {r["paragraphId"]: r["label"]["content_category"] for r in records}
def load_golden(path: Path) -> dict[str, str]:
records = load_jsonl(path)
return {r["paragraphId"]: r["label"]["content_category"] for r in records}
# ── Load all data ─────────────────────────────────────────────────────────────
print("Loading data...")
# Confusion axis metadata
meta_records = load_jsonl(HOLDOUT_META)
pid_axes: dict[str, list[str]] = {r["paragraphId"]: r["axes"] for r in meta_records}
all_pids = set(pid_axes.keys())
# Human labels: paragraphId → list of (annotator, category)
human_raw = load_jsonl(HUMAN_LABELS)
human_labels: dict[str, list[tuple[str, str]]] = defaultdict(list)
for r in human_raw:
if r["paragraphId"] in all_pids:
human_labels[r["paragraphId"]].append(
(r["annotatorName"], r["contentCategory"])
)
def human_majority(pid: str) -> str | None:
"""Return majority category from human annotators, or None if no data."""
labels = human_labels.get(pid)
if not labels:
return None
cats = [c for _, c in labels]
counts = Counter(cats)
top = counts.most_common(1)[0]
return top[0]
# Paragraph text
para_records = load_jsonl(PARAGRAPHS)
para_text: dict[str, str] = {r["id"]: r["text"] for r in para_records}
# v3.0 signals: model_idx → {pid: category}
v30_signals: list[dict[str, str]] = []
for fname in MODEL_FILES:
if fname == "opus.jsonl":
v30_signals.append(load_golden(V30_GOLDEN))
else:
v30_signals.append(load_annotations(V30_BENCH, fname))
# v3.5 signals
v35_signals: list[dict[str, str]] = []
for fname in MODEL_FILES:
if fname == "opus.jsonl":
v35_signals.append(load_golden(V35_GOLDEN))
else:
v35_signals.append(load_annotations(V35_BENCH, fname))
def get_signals(signals: list[dict[str, str]], pid: str) -> list[str | None]:
"""Get category from each model for a paragraph."""
return [s.get(pid) for s in signals]
def majority_vote(signals: list[str | None], exclude_minimax: bool = True) -> str | None:
"""Compute majority from 6 models (excluding minimax which is index 6)."""
cats = []
for i, s in enumerate(signals):
if s is None:
continue
if exclude_minimax and MODEL_NAMES[i] in EXCLUDED_FROM_MAJORITY:
continue
cats.append(s)
if not cats:
return None
counts = Counter(cats)
return counts.most_common(1)[0][0]
def unanimity_score(signals: list[str | None], exclude_minimax: bool = True) -> float:
"""Fraction of models agreeing with majority (0-1)."""
cats = []
for i, s in enumerate(signals):
if s is None:
continue
if exclude_minimax and MODEL_NAMES[i] in EXCLUDED_FROM_MAJORITY:
continue
cats.append(s)
if not cats:
return 0.0
counts = Counter(cats)
top_count = counts.most_common(1)[0][1]
return top_count / len(cats)
def format_signals(signals: list[str | None]) -> str:
"""Compact model signal display."""
parts = []
for name, cat in zip(MODEL_NAMES, signals):
if cat is None:
parts.append(f"{name}=??")
else:
parts.append(f"{name}={abbrev(cat)}")
return ", ".join(parts)
def wrap_text(text: str, width: int = 100) -> str:
return "\n ".join(textwrap.wrap(text, width=width))
def print_paragraph_analysis(
pid: str,
v30_sigs: list[str | None],
v35_sigs: list[str | None],
header: str = "",
):
"""Print detailed analysis for a single paragraph."""
text = para_text.get(pid, "[TEXT NOT FOUND]")
h_labels = human_labels.get(pid, [])
h_maj = human_majority(pid)
v30_maj = majority_vote(v30_sigs)
v35_maj = majority_vote(v35_sigs)
axes = pid_axes.get(pid, [])
if header:
print(f"\n{'' * 110}")
print(f" {header}")
print(f"{'' * 110}")
else:
print(f"\n{'' * 110}")
print(f" PID: {pid}")
print(f" Axes: {', '.join(axes)}")
print(f"\n TEXT:")
print(f" {wrap_text(text)}")
print(f"\n HUMAN VOTES:")
for name, cat in h_labels:
marker = "" if cat == h_maj else ""
print(f" {name:12s}{abbrev(cat):5s}{marker}")
print(f" Majority → {abbrev(h_maj) if h_maj else '??'}")
print(f"\n v3.0 signals: {format_signals(v30_sigs)}")
print(f" v3.0 majority (excl. MiniMax): {abbrev(v30_maj) if v30_maj else '??'}")
print(f" v3.5 signals: {format_signals(v35_sigs)}")
print(f" v3.5 majority (excl. MiniMax): {abbrev(v35_maj) if v35_maj else '??'}")
# What changed
changed_models = []
for i, (old, new) in enumerate(zip(v30_sigs, v35_sigs)):
if old is not None and new is not None and old != new:
changed_models.append(f"{MODEL_NAMES[i]}: {abbrev(old)}{abbrev(new)}")
if changed_models:
print(f"\n CHANGES: {', '.join(changed_models)}")
correct_v30 = v30_maj == h_maj if v30_maj and h_maj else None
correct_v35 = v35_maj == h_maj if v35_maj and h_maj else None
print(
f" v3.0 {'CORRECT' if correct_v30 else 'WRONG'} | "
f"v3.5 {'CORRECT' if correct_v35 else 'WRONG'}"
)
# ══════════════════════════════════════════════════════════════════════════════
# SECTION 1: BG↔MR Regression Cases
# ══════════════════════════════════════════════════════════════════════════════
print("\n" + "" * 110)
print(" SECTION 1: BG↔MR AXIS — REGRESSION CASES")
print(" (v3.0 matched human majority, but v3.5 does NOT)")
print("" * 110)
bg_mr_pids = [pid for pid, axes in pid_axes.items() if "BG_MR" in axes]
print(f"\nTotal BG↔MR paragraphs: {len(bg_mr_pids)}")
# Filter to those with human labels
bg_mr_pids = [pid for pid in bg_mr_pids if human_majority(pid) is not None]
print(f"With human labels: {len(bg_mr_pids)}")
regressions_bg_mr = []
improvements_bg_mr = []
both_correct_bg_mr = []
both_wrong_bg_mr = []
for pid in bg_mr_pids:
v30_sigs = get_signals(v30_signals, pid)
v35_sigs = get_signals(v35_signals, pid)
v30_maj = majority_vote(v30_sigs)
v35_maj = majority_vote(v35_sigs)
h_maj = human_majority(pid)
if v30_maj is None or v35_maj is None or h_maj is None:
continue
v30_correct = abbrev(v30_maj) == abbrev(h_maj)
v35_correct = abbrev(v35_maj) == abbrev(h_maj)
if v30_correct and not v35_correct:
regressions_bg_mr.append(pid)
elif not v30_correct and v35_correct:
improvements_bg_mr.append(pid)
elif v30_correct and v35_correct:
both_correct_bg_mr.append(pid)
else:
both_wrong_bg_mr.append(pid)
print(f"\nBG↔MR Summary:")
print(f" Both correct: {len(both_correct_bg_mr)}")
print(f" Both wrong: {len(both_wrong_bg_mr)}")
print(f" v3.0 correct → v3.5 WRONG (REGRESSIONS): {len(regressions_bg_mr)}")
print(f" v3.0 wrong → v3.5 correct (IMPROVEMENTS): {len(improvements_bg_mr)}")
print(f"\n{'' * 110}")
print(f" BG↔MR REGRESSIONS (showing all, up to 20)")
print(f"{'' * 110}")
for i, pid in enumerate(regressions_bg_mr[:20]):
v30_sigs = get_signals(v30_signals, pid)
v35_sigs = get_signals(v35_signals, pid)
print_paragraph_analysis(pid, v30_sigs, v35_sigs, f"REGRESSION #{i+1}")
# BG↔MR improvements
print(f"\n{'' * 110}")
print(f" BG↔MR IMPROVEMENTS (showing up to 5)")
print(f"{'' * 110}")
for i, pid in enumerate(improvements_bg_mr[:5]):
v30_sigs = get_signals(v30_signals, pid)
v35_sigs = get_signals(v35_signals, pid)
print_paragraph_analysis(pid, v30_sigs, v35_sigs, f"IMPROVEMENT #{i+1}")
# ══════════════════════════════════════════════════════════════════════════════
# SECTION 2: MR↔RMP Non-Convergence Cases
# ══════════════════════════════════════════════════════════════════════════════
print("\n\n" + "" * 110)
print(" SECTION 2: MR↔RMP AXIS — NON-CONVERGENCE AND REGRESSIONS")
print("" * 110)
mr_rmp_pids = [pid for pid, axes in pid_axes.items() if "MR_RMP" in axes]
print(f"\nTotal MR↔RMP paragraphs: {len(mr_rmp_pids)}")
mr_rmp_pids = [pid for pid in mr_rmp_pids if human_majority(pid) is not None]
print(f"With human labels: {len(mr_rmp_pids)}")
# Find: less unanimous in v3.5 OR flipped away from human majority
non_convergence_mr_rmp = []
regressions_mr_rmp = []
improvements_mr_rmp = []
for pid in mr_rmp_pids:
v30_sigs = get_signals(v30_signals, pid)
v35_sigs = get_signals(v35_signals, pid)
v30_maj = majority_vote(v30_sigs)
v35_maj = majority_vote(v35_sigs)
h_maj = human_majority(pid)
v30_unanimity = unanimity_score(v30_sigs)
v35_unanimity = unanimity_score(v35_sigs)
if v30_maj is None or v35_maj is None or h_maj is None:
continue
v30_correct = abbrev(v30_maj) == abbrev(h_maj)
v35_correct = abbrev(v35_maj) == abbrev(h_maj)
# Regression: was correct, now wrong
if v30_correct and not v35_correct:
regressions_mr_rmp.append((pid, v30_unanimity, v35_unanimity))
# Non-convergence: less unanimous OR flipped away
if v35_unanimity < v30_unanimity or (v30_correct and not v35_correct):
non_convergence_mr_rmp.append((pid, v30_unanimity, v35_unanimity))
if not v30_correct and v35_correct:
improvements_mr_rmp.append((pid, v30_unanimity, v35_unanimity))
# Sort non-convergence by delta (worst first)
non_convergence_mr_rmp.sort(key=lambda x: x[1] - x[2], reverse=True)
print(f"\nMR↔RMP Summary:")
print(f" Regressions (correct→wrong): {len(regressions_mr_rmp)}")
print(f" Non-convergence (less unanimous or regressed): {len(non_convergence_mr_rmp)}")
print(f" Improvements (wrong→correct): {len(improvements_mr_rmp)}")
print(f"\n{'' * 110}")
print(f" MR↔RMP NON-CONVERGENCE / REGRESSION CASES (showing 10)")
print(f"{'' * 110}")
shown = set()
count = 0
for pid, v30_u, v35_u in non_convergence_mr_rmp:
if count >= 10:
break
if pid in shown:
continue
shown.add(pid)
v30_sigs = get_signals(v30_signals, pid)
v35_sigs = get_signals(v35_signals, pid)
v30_maj = majority_vote(v30_sigs)
v35_maj = majority_vote(v35_sigs)
h_maj = human_majority(pid)
label = "REGRESSION" if (abbrev(v30_maj) == abbrev(h_maj) and abbrev(v35_maj) != abbrev(h_maj)) else "LESS UNANIMOUS"
print_paragraph_analysis(
pid, v30_sigs, v35_sigs,
f"{label} #{count+1} (unanimity: v3.0={v30_u:.0%} → v3.5={v35_u:.0%})"
)
count += 1
print(f"\n{'' * 110}")
print(f" MR↔RMP IMPROVEMENTS (showing up to 5)")
print(f"{'' * 110}")
for i, (pid, v30_u, v35_u) in enumerate(improvements_mr_rmp[:5]):
v30_sigs = get_signals(v30_signals, pid)
v35_sigs = get_signals(v35_signals, pid)
print_paragraph_analysis(
pid, v30_sigs, v35_sigs,
f"IMPROVEMENT #{i+1} (unanimity: v3.0={v30_u:.0%} → v3.5={v35_u:.0%})"
)
# ══════════════════════════════════════════════════════════════════════════════
# SECTION 3: Error Pattern Analysis
# ══════════════════════════════════════════════════════════════════════════════
print("\n\n" + "" * 110)
print(" SECTION 3: ERROR PATTERN ANALYSIS")
print("" * 110)
# ── BG↔MR regression patterns ───────────────────────────────────────────────
print(f"\n{'' * 110}")
print(f" 3A: BG↔MR REGRESSION PATTERNS")
print(f"{'' * 110}")
if regressions_bg_mr:
# Analyze what the human majority is and what v3.5 switched to
regression_directions = Counter()
regression_model_flips = Counter()
for pid in regressions_bg_mr:
h_maj = human_majority(pid)
v30_sigs = get_signals(v30_signals, pid)
v35_sigs = get_signals(v35_signals, pid)
v30_maj = majority_vote(v30_sigs)
v35_maj = majority_vote(v35_sigs)
direction = f"{abbrev(v30_maj)}{abbrev(v35_maj)} (human={abbrev(h_maj)})"
regression_directions[direction] += 1
# Which models flipped?
for i, (old, new) in enumerate(zip(v30_sigs, v35_sigs)):
if old and new and old != new:
regression_model_flips[MODEL_NAMES[i]] += 1
print(f"\n Regression directions (v3.0→v3.5, human ground truth):")
for direction, count in regression_directions.most_common():
print(f" {direction}: {count}")
print(f"\n Models that flipped most on regressions:")
for model, count in regression_model_flips.most_common():
print(f" {model}: {count} flips")
# Text pattern analysis
print(f"\n Common textual signals in regression paragraphs:")
signal_words = {
"board": 0, "committee": 0, "oversee": 0, "oversight": 0,
"report": 0, "director": 0, "officer": 0, "CISO": 0,
"governance": 0, "responsible": 0, "qualif": 0, "experience": 0,
"manage": 0, "program": 0, "framework": 0, "process": 0,
"audit": 0,
}
for pid in regressions_bg_mr:
text = para_text.get(pid, "").lower()
for word in signal_words:
if word.lower() in text:
signal_words[word] += 1
total_reg = len(regressions_bg_mr)
for word, count in sorted(signal_words.items(), key=lambda x: -x[1]):
if count > 0:
print(f" '{word}': {count}/{total_reg} ({count/total_reg:.0%})")
# Check if humans are split on these
print(f"\n Human agreement on regressions:")
unanimous_human = 0
split_human = 0
for pid in regressions_bg_mr:
labels = human_labels.get(pid, [])
cats = [c for _, c in labels]
if len(set(cats)) == 1:
unanimous_human += 1
else:
split_human += 1
print(f" Unanimous human: {unanimous_human}")
print(f" Split human (2-1): {split_human}")
if split_human > 0:
print(f"\n Split-human regression details:")
for pid in regressions_bg_mr:
labels = human_labels.get(pid, [])
cats = [c for _, c in labels]
if len(set(cats)) > 1:
votes = ", ".join(f"{n}={abbrev(c)}" for n, c in labels)
print(f" {pid[:12]}... → {votes}")
else:
print("\n No BG↔MR regressions found.")
# ── MR↔RMP patterns ─────────────────────────────────────────────────────────
print(f"\n{'' * 110}")
print(f" 3B: MR↔RMP NON-CONVERGENCE PATTERNS")
print(f"{'' * 110}")
if non_convergence_mr_rmp:
# Regression directions
nc_directions = Counter()
nc_model_flips = Counter()
for pid, _, _ in non_convergence_mr_rmp:
h_maj = human_majority(pid)
v30_sigs = get_signals(v30_signals, pid)
v35_sigs = get_signals(v35_signals, pid)
v30_maj = majority_vote(v30_sigs)
v35_maj = majority_vote(v35_sigs)
direction = f"{abbrev(v30_maj)}{abbrev(v35_maj)} (human={abbrev(h_maj)})"
nc_directions[direction] += 1
for i, (old, new) in enumerate(zip(v30_sigs, v35_sigs)):
if old and new and old != new:
nc_model_flips[MODEL_NAMES[i]] += 1
print(f"\n Direction of non-convergent shifts:")
for direction, count in nc_directions.most_common():
print(f" {direction}: {count}")
print(f"\n Models that flipped most:")
for model, count in nc_model_flips.most_common():
print(f" {model}: {count} flips")
# Text pattern analysis — compare what helped vs what didn't
print(f"\n Text signals in NON-CONVERGENT vs IMPROVED paragraphs:")
keywords = ["CISO", "officer", "responsible", "oversee", "report",
"program", "framework", "qualif", "experience", "certif",
"manage", "assess", "monitor", "team", "director"]
nc_pids_set = {pid for pid, _, _ in non_convergence_mr_rmp}
imp_pids_set = {pid for pid, _, _ in improvements_mr_rmp}
print(f"\n {'Keyword':<16} {'Non-conv':>10} {'Improved':>10}")
print(f" {''*16} {''*10} {''*10}")
for kw in keywords:
nc_count = sum(1 for pid in nc_pids_set if kw.lower() in para_text.get(pid, "").lower())
imp_count = sum(1 for pid in imp_pids_set if kw.lower() in para_text.get(pid, "").lower())
nc_pct = f"{nc_count}/{len(nc_pids_set)}" if nc_pids_set else "0"
imp_pct = f"{imp_count}/{len(imp_pids_set)}" if imp_pids_set else "0"
print(f" {kw:<16} {nc_pct:>10} {imp_pct:>10}")
# Person-removal test analysis
print(f"\n Person-removal test applicability:")
print(f" Checking if regression paragraphs have person as ONLY subject...")
for pid, _, _ in regressions_mr_rmp:
text = para_text.get(pid, "")
has_person_subject = any(
marker in text.lower()
for marker in ["ciso", "chief information", "chief technology",
"vice president", "director of", "officer"]
)
has_process_subject = any(
marker in text.lower()
for marker in ["program", "framework", "process", "system",
"controls", "policies", "procedures"]
)
h_maj = human_majority(pid)
v35_maj = majority_vote(get_signals(v35_signals, pid))
print(
f" {pid[:12]}... person_subj={has_person_subject} "
f"process_subj={has_process_subject} "
f"human={abbrev(h_maj)} v3.5={abbrev(v35_maj)}"
)
else:
print("\n No MR↔RMP non-convergence cases found.")
# ══════════════════════════════════════════════════════════════════════════════
# SECTION 4: Ruling Recommendations
# ══════════════════════════════════════════════════════════════════════════════
print("\n\n" + "" * 110)
print(" SECTION 4: RULING RECOMMENDATIONS")
print("" * 110)
print("""
Based on the error analysis above, here are the specific ruling observations:
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
4A: BG↔MR Board-Line Test
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
CURRENT RULING (Rule 2):
"When a paragraph spans layers (governance chain paragraphs): apply the
dominant-subject test — which layer occupies the most sentence-subjects?"
"Governance overview spanning board → committee → officer → program →
Board Governance if the board/committee occupies more sentence-subjects;
Management Role if the officer does; Risk Management Process if the
program does"
""")
# Analyze the specific regressions to give targeted advice
if regressions_bg_mr:
# Count what direction the regressions went
bg_to_mr = sum(
1 for pid in regressions_bg_mr
if abbrev(majority_vote(get_signals(v35_signals, pid))) == "MR"
and abbrev(human_majority(pid)) == "BG"
)
mr_to_bg = sum(
1 for pid in regressions_bg_mr
if abbrev(majority_vote(get_signals(v35_signals, pid))) == "BG"
and abbrev(human_majority(pid)) == "MR"
)
other_dir = len(regressions_bg_mr) - bg_to_mr - mr_to_bg
print(f" EMPIRICAL FINDING:")
print(f" Regressions that moved BG→MR (human says BG): {bg_to_mr}")
print(f" Regressions that moved MR→BG (human says MR): {mr_to_bg}")
print(f" Other directions: {other_dir}")
if bg_to_mr > mr_to_bg:
print("""
DIAGNOSIS: The dominant-subject test is OVER-CORRECTING toward MR.
When a governance chain mentions a CISO or officer, models are counting that
mention as a "sentence subject" even when the paragraph's primary purpose is
describing the board/committee oversight structure.
PROPOSED FIX — add a "purpose test" before the subject count:
"Before counting sentence-subjects, ask: what is the paragraph's PRIMARY
COMMUNICATIVE PURPOSE? If it is to describe the oversight/reporting
structure (who oversees whom, what gets reported where), the paragraph
is Board Governance even if individual officers are named as intermediaries.
The dominant-subject count applies only when the paragraph's purpose is
genuinely ambiguous between describing the oversight structure and
describing the officer's role."
Alternatively, add a carve-out:
"A governance chain paragraph (board → committee → officer → program)
defaults to Board Governance unless the officer section constitutes
MORE THAN HALF the paragraph's content AND includes qualifications,
credentials, or personal background."
""")
elif mr_to_bg > bg_to_mr:
print("""
DIAGNOSIS: The dominant-subject test is OVER-CORRECTING toward BG.
Paragraphs that are primarily about management roles are being pulled
toward BG because they mention board oversight.
PROPOSED FIX:
"When a paragraph's primary content is about a management role (CISO,
CIO, etc.) and mentions board oversight only as context for the
reporting relationship, classify as Management Role. Board Governance
requires the board/committee to be the PRIMARY ACTOR, not merely
the recipient of reports."
""")
print("""
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
4B: MR↔RMP Three-Step Chain
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
CURRENT RULING (Rule 2b):
"Step 1 — Subject test: What is the paragraph's grammatical subject?
Step 2 — Person-removal test: Could you delete all named roles, titles,
qualifications, experience descriptions, and credentials from the
paragraph and still have a coherent cybersecurity disclosure?
Step 3 — Qualifications tiebreaker: Does the paragraph include experience
(years), certifications (CISSP, CISM), education, team size, or career
history for named individuals?"
""")
if regressions_mr_rmp:
mr_to_rmp = sum(
1 for pid, _, _ in regressions_mr_rmp
if abbrev(majority_vote(get_signals(v35_signals, pid))) == "RMP"
and abbrev(human_majority(pid)) == "MR"
)
rmp_to_mr = sum(
1 for pid, _, _ in regressions_mr_rmp
if abbrev(majority_vote(get_signals(v35_signals, pid))) == "MR"
and abbrev(human_majority(pid)) == "RMP"
)
print(f" EMPIRICAL FINDING:")
print(f" Regressions that moved MR→RMP (human says MR): {mr_to_rmp}")
print(f" Regressions that moved RMP→MR (human says RMP): {rmp_to_mr}")
if mr_to_rmp > rmp_to_mr:
print("""
DIAGNOSIS: The person-removal test is TOO AGGRESSIVE at removing people.
When a paragraph describes a CISO's monitoring activities, the person-removal
test says "yes, the monitoring process stands alone," but the HUMANS recognize
that the paragraph is fundamentally about the management role's responsibilities.
PROPOSED FIX — tighten the person-removal test:
"Step 2 — Person-removal test: Delete all named roles AND their associated
ACTIVITIES. If the paragraph still describes a cybersecurity process or
framework, it is Risk Management Process. If deleting the roles and their
activities leaves nothing substantive, it is Management Role.
Key distinction: 'The CISO monitors threat intelligence' — removing the
CISO removes the monitoring activity, so this is Management Role.
'The company monitors threat intelligence under the direction of the CISO'
— removing the CISO leaves the monitoring intact, so this is RMP."
""")
elif rmp_to_mr > mr_to_rmp:
print("""
DIAGNOSIS: The three-step chain is UNDER-APPLYING the person-removal test.
Models are stopping at Step 1 (subject test) when they see a role title,
without proceeding to the person-removal test.
PROPOSED FIX:
"Step 1 should only produce a STRONG signal, not a decisive result.
Always proceed to Step 2 unless the paragraph is ENTIRELY about
a person's credentials with no process content whatsoever."
""")
if not regressions_mr_rmp:
print("""
No MR↔RMP regressions found. The three-step chain may be working correctly,
or the non-convergence is increasing uncertainty without changing majority votes.
Focus on whether the increased model disagreement reflects genuine ambiguity
or whether the step instructions need to be more prescriptive.
""")
# ── Final summary stats ──────────────────────────────────────────────────────
print("\n" + "" * 110)
print(" FINAL SUMMARY")
print("" * 110)
# Overall accuracy comparison
total_with_human = 0
v30_correct_total = 0
v35_correct_total = 0
for pid in all_pids:
h_maj = human_majority(pid)
if h_maj is None:
continue
v30_sigs = get_signals(v30_signals, pid)
v35_sigs = get_signals(v35_signals, pid)
v30_maj = majority_vote(v30_sigs)
v35_maj = majority_vote(v35_sigs)
if v30_maj is None or v35_maj is None:
continue
total_with_human += 1
if abbrev(v30_maj) == abbrev(h_maj):
v30_correct_total += 1
if abbrev(v35_maj) == abbrev(h_maj):
v35_correct_total += 1
print(f"\n Overall accuracy on {total_with_human} confusion-axis paragraphs:")
print(f" v3.0: {v30_correct_total}/{total_with_human} ({v30_correct_total/total_with_human:.1%})")
print(f" v3.5: {v35_correct_total}/{total_with_human} ({v35_correct_total/total_with_human:.1%})")
print(f" Delta: {v35_correct_total - v30_correct_total:+d}")
# Per-axis breakdown
for axis_name in ["BG_MR", "MR_RMP", "BG_RMP", "SI_NO"]:
axis_pids = [pid for pid, axes in pid_axes.items() if axis_name in axes]
v30_c = 0
v35_c = 0
n = 0
for pid in axis_pids:
h_maj = human_majority(pid)
if h_maj is None:
continue
v30_sigs = get_signals(v30_signals, pid)
v35_sigs = get_signals(v35_signals, pid)
v30_maj = majority_vote(v30_sigs)
v35_maj = majority_vote(v35_sigs)
if v30_maj is None or v35_maj is None:
continue
n += 1
if abbrev(v30_maj) == abbrev(h_maj):
v30_c += 1
if abbrev(v35_maj) == abbrev(h_maj):
v35_c += 1
if n > 0:
print(f"\n {axis_name} ({n} paragraphs):")
print(f" v3.0: {v30_c}/{n} ({v30_c/n:.1%})")
print(f" v3.5: {v35_c}/{n} ({v35_c/n:.1%})")
print(f" Delta: {v35_c - v30_c:+d}")
print()