SEC-cyBERT/ts/scripts/diff-orphan-annotations.ts
2026-03-29 20:33:39 -04:00

211 lines
9.1 KiB
TypeScript

/**
* Diff original vs re-run annotations for orphan-word paragraphs.
*
* Compares stage1.jsonl (original) against stage1-orphan-rerun.jsonl (patched text)
* to measure label changes, bias correction, and conflict resolution.
*
* Usage: bun ts/scripts/diff-orphan-annotations.ts
*/
import { readFileSync } from "node:fs";
const DATA_DIR = new URL("../../data", import.meta.url).pathname;
const ORIG_PATH = `${DATA_DIR}/annotations/stage1.jsonl`;
const RERUN_PATH = `${DATA_DIR}/annotations/stage1-orphan-rerun.jsonl`;
const PATCHES_PATH = `${DATA_DIR}/paragraphs/patches/orphan-word-patches.jsonl`;
interface Annotation {
paragraphId: string;
label: {
content_category: string;
specificity_level: number;
category_confidence: string;
specificity_confidence: string;
};
provenance: {
modelId: string;
};
}
function loadAnnotations(path: string): Map<string, Annotation[]> {
const map = new Map<string, Annotation[]>();
for (const line of readFileSync(path, "utf-8").split("\n")) {
if (!line.trim()) continue;
const ann = JSON.parse(line) as Annotation;
const key = ann.paragraphId;
if (!map.has(key)) map.set(key, []);
map.get(key)!.push(ann);
}
return map;
}
function majorityVote(annotations: Annotation[], field: "content_category" | "specificity_level"): { value: string | number; unanimous: boolean; count: number } {
const counts = new Map<string | number, number>();
for (const ann of annotations) {
const v = ann.label[field];
counts.set(v, (counts.get(v) ?? 0) + 1);
}
let best: string | number = "";
let bestCount = 0;
for (const [v, c] of counts) {
if (c > bestCount) { best = v; bestCount = c; }
}
return { value: best, unanimous: bestCount === annotations.length, count: bestCount };
}
// ── Main ────────────────────────────────────────────────────────────────
const patchIds = new Set<string>();
for (const line of readFileSync(PATCHES_PATH, "utf-8").split("\n")) {
if (!line.trim()) continue;
patchIds.add((JSON.parse(line) as { id: string }).id);
}
const origAll = loadAnnotations(ORIG_PATH);
const rerunAll = loadAnnotations(RERUN_PATH);
// Filter original annotations to only orphan-word paragraphs
const origFiltered = new Map<string, Annotation[]>();
for (const [pid, anns] of origAll) {
if (patchIds.has(pid)) origFiltered.set(pid, anns);
}
console.error(`Orphan-word paragraphs: ${patchIds.size}`);
console.error(`Original annotations found: ${origFiltered.size} paragraphs`);
console.error(`Re-run annotations found: ${rerunAll.size} paragraphs`);
// Compare paragraphs that have BOTH original and re-run annotations
const comparable = [...rerunAll.keys()].filter((pid) => origFiltered.has(pid));
console.error(`Comparable paragraphs: ${comparable.length}\n`);
// Track changes
let catChanged = 0;
let specChanged = 0;
let eitherChanged = 0;
// Per-model changes
const perModelCatChanges = new Map<string, number>();
const perModelSpecChanges = new Map<string, number>();
// Category transition matrix
const catTransitions = new Map<string, Map<string, number>>();
// Consensus changes
let origConflicts = 0;
let rerunConflicts = 0;
let conflictsResolved = 0;
let consensusBroken = 0;
// Category distribution
const origCatDist = new Map<string, number>();
const rerunCatDist = new Map<string, number>();
// Specificity distribution
const origSpecDist = new Map<number, number>();
const rerunSpecDist = new Map<number, number>();
for (const pid of comparable) {
const origAnns = origFiltered.get(pid)!;
const rerunAnns = rerunAll.get(pid)!;
// Per-model comparison
for (const rerunAnn of rerunAnns) {
const modelId = rerunAnn.provenance.modelId;
const origAnn = origAnns.find((a) => a.provenance.modelId === modelId);
if (!origAnn) continue;
if (origAnn.label.content_category !== rerunAnn.label.content_category) {
perModelCatChanges.set(modelId, (perModelCatChanges.get(modelId) ?? 0) + 1);
// Track transition
const from = origAnn.label.content_category;
const to = rerunAnn.label.content_category;
if (!catTransitions.has(from)) catTransitions.set(from, new Map());
catTransitions.get(from)!.set(to, (catTransitions.get(from)!.get(to) ?? 0) + 1);
}
if (origAnn.label.specificity_level !== rerunAnn.label.specificity_level) {
perModelSpecChanges.set(modelId, (perModelSpecChanges.get(modelId) ?? 0) + 1);
}
}
// Consensus comparison (majority vote)
const origCatVote = majorityVote(origAnns, "content_category");
const rerunCatVote = majorityVote(rerunAnns, "content_category");
const origSpecVote = majorityVote(origAnns, "specificity_level");
const rerunSpecVote = majorityVote(rerunAnns, "specificity_level");
origCatDist.set(origCatVote.value as string, (origCatDist.get(origCatVote.value as string) ?? 0) + 1);
rerunCatDist.set(rerunCatVote.value as string, (rerunCatDist.get(rerunCatVote.value as string) ?? 0) + 1);
origSpecDist.set(origSpecVote.value as number, (origSpecDist.get(origSpecVote.value as number) ?? 0) + 1);
rerunSpecDist.set(rerunSpecVote.value as number, (rerunSpecDist.get(rerunSpecVote.value as number) ?? 0) + 1);
if (origCatVote.value !== rerunCatVote.value) catChanged++;
if (origSpecVote.value !== rerunSpecVote.value) specChanged++;
if (origCatVote.value !== rerunCatVote.value || origSpecVote.value !== rerunSpecVote.value) eitherChanged++;
// Conflict tracking (no majority = conflict)
const origHasConflict = origCatVote.count < 2 || origSpecVote.count < 2;
const rerunHasConflict = rerunCatVote.count < 2 || rerunSpecVote.count < 2;
if (origHasConflict) origConflicts++;
if (rerunHasConflict) rerunConflicts++;
if (origHasConflict && !rerunHasConflict) conflictsResolved++;
if (!origHasConflict && rerunHasConflict) consensusBroken++;
}
// ── Report ──────────────────────────────────────────────────────────────
console.log("═══ ORPHAN WORD RE-ANNOTATION DIFF REPORT ═══\n");
console.log(`Paragraphs compared: ${comparable.length}`);
console.log(` Category consensus changed: ${catChanged} (${((catChanged / comparable.length) * 100).toFixed(1)}%)`);
console.log(` Specificity consensus changed: ${specChanged} (${((specChanged / comparable.length) * 100).toFixed(1)}%)`);
console.log(` Either dimension changed: ${eitherChanged} (${((eitherChanged / comparable.length) * 100).toFixed(1)}%)`);
console.log(`\n─── Per-Model Category Changes ───`);
for (const [model, count] of [...perModelCatChanges.entries()].sort((a, b) => b[1] - a[1])) {
const short = model.split("/")[1] ?? model;
console.log(` ${short}: ${count} (${((count / comparable.length) * 100).toFixed(1)}%)`);
}
console.log(`\n─── Per-Model Specificity Changes ───`);
for (const [model, count] of [...perModelSpecChanges.entries()].sort((a, b) => b[1] - a[1])) {
const short = model.split("/")[1] ?? model;
console.log(` ${short}: ${count} (${((count / comparable.length) * 100).toFixed(1)}%)`);
}
console.log(`\n─── Conflict Resolution ───`);
console.log(` Original conflicts: ${origConflicts}`);
console.log(` Re-run conflicts: ${rerunConflicts}`);
console.log(` Conflicts resolved (orig conflict → rerun consensus): ${conflictsResolved}`);
console.log(` Consensus broken (orig consensus → rerun conflict): ${consensusBroken}`);
console.log(` Net conflict change: ${conflictsResolved - consensusBroken > 0 ? "-" : "+"}${Math.abs(conflictsResolved - consensusBroken)}`);
console.log(`\n─── Category Distribution (Consensus) ───`);
console.log(` ${"Category".padEnd(30)} ${"Original".padStart(8)} ${"Re-run".padStart(8)} ${"Delta".padStart(8)}`);
const allCats = new Set([...origCatDist.keys(), ...rerunCatDist.keys()]);
for (const cat of [...allCats].sort()) {
const orig = origCatDist.get(cat) ?? 0;
const rerun = rerunCatDist.get(cat) ?? 0;
const delta = rerun - orig;
const sign = delta > 0 ? "+" : "";
console.log(` ${cat.padEnd(30)} ${String(orig).padStart(8)} ${String(rerun).padStart(8)} ${(sign + delta).padStart(8)}`);
}
console.log(`\n─── Specificity Distribution (Consensus) ───`);
console.log(` ${"Level".padEnd(10)} ${"Original".padStart(8)} ${"Re-run".padStart(8)} ${"Delta".padStart(8)}`);
for (const level of [1, 2, 3, 4]) {
const orig = origSpecDist.get(level) ?? 0;
const rerun = rerunSpecDist.get(level) ?? 0;
const delta = rerun - orig;
const sign = delta > 0 ? "+" : "";
console.log(` ${String(level).padEnd(10)} ${String(orig).padStart(8)} ${String(rerun).padStart(8)} ${(sign + delta).padStart(8)}`);
}
console.log(`\n─── Top Category Transitions ───`);
const transitions: [string, string, number][] = [];
for (const [from, tos] of catTransitions) {
for (const [to, count] of tos) {
transitions.push([from, to, count]);
}
}
transitions.sort((a, b) => b[2] - a[2]);
for (const [from, to, count] of transitions.slice(0, 15)) {
console.log(` ${from}${to}: ${count}`);
}