/** * Detailed cross-tabulations for disputed (non-unanimous) paragraphs. * * Usage: bun ts/scripts/dispute-crosstab.ts */ import { readJsonlRaw, readJsonl } from "../src/lib/jsonl.ts"; import { Paragraph } from "../src/schemas/paragraph.ts"; const ANN_PATH = new URL("../../data/annotations/stage1.jsonl", import.meta.url).pathname; const PARA_PATH = new URL("../../data/paragraphs/paragraphs-clean.jsonl", import.meta.url).pathname; interface Ann { paragraphId: string; label: { content_category: string; specificity_level: number; category_confidence: string; specificity_confidence: string; reasoning: string; }; provenance: { modelId: string; costUsd: number; inputTokens: number; outputTokens: number; reasoningTokens: number; latencyMs: number; requestedAt: string; }; } // ── Helpers ──────────────────────────────────────────────────────────── function pct(n: number, total: number): string { if (total === 0) return "0.0%"; return `${((n / total) * 100).toFixed(1)}%`; } function median(arr: number[]): number { if (arr.length === 0) return 0; const sorted = [...arr].sort((a, b) => a - b); const mid = Math.floor(sorted.length / 2); return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2; } function percentile(arr: number[], p: number): number { if (arr.length === 0) return 0; const sorted = [...arr].sort((a, b) => a - b); const idx = (p / 100) * (sorted.length - 1); const lo = Math.floor(idx); const hi = Math.ceil(idx); return lo === hi ? sorted[lo] : sorted[lo] + (sorted[hi] - sorted[lo]) * (idx - lo); } function majority(arr: T[]): T | null { const freq = new Map(); for (const v of arr) freq.set(v, (freq.get(v) ?? 0) + 1); for (const [val, count] of freq) { if (count >= 2) return val; } return null; } function sortedVals(arr: number[]): string { return `[${[...arr].sort((a, b) => a - b).join(",")}]`; } function uniqueSorted(arr: string[]): string[] { return [...new Set(arr)].sort(); } // ── Main ────────────────────────────────────────────────────────────── async function main() { console.log("Loading data..."); const [{ records: rawAnns, skipped: annSkipped }, { records: paragraphs, skipped: paraSkipped }] = await Promise.all([ readJsonlRaw(ANN_PATH), readJsonl(PARA_PATH, Paragraph), ]); const anns = rawAnns as Ann[]; console.log(` ${anns.length.toLocaleString()} annotations (${annSkipped} skipped)`); console.log(` ${paragraphs.length.toLocaleString()} paragraphs (${paraSkipped} skipped)\n`); // Index paragraphs by id const paraById = new Map(paragraphs.map(p => [p.id, p])); // Group annotations by paragraph const byParagraph = new Map(); for (const a of anns) { let arr = byParagraph.get(a.paragraphId); if (!arr) { arr = []; byParagraph.set(a.paragraphId, arr); } arr.push(a); } // Classify each paragraph interface ParaInfo { pid: string; cats: string[]; specs: number[]; catUnanimous: boolean; specUnanimous: boolean; majCat: string | null; majSpec: number | null; catDisputed: boolean; specDisputed: boolean; disputeType: "none" | "cat-only" | "spec-only" | "both"; wordCount: number; } const allParas: ParaInfo[] = []; for (const [pid, panns] of byParagraph) { if (panns.length !== 3) continue; const cats = panns.map(a => a.label.content_category); const specs = panns.map(a => a.label.specificity_level); const catU = new Set(cats).size === 1; const specU = new Set(specs).size === 1; const majCat = majority(cats); const majSpec = majority(specs); const catDisputed = !catU; const specDisputed = !specU; let disputeType: ParaInfo["disputeType"] = "none"; if (catDisputed && !specDisputed) disputeType = "cat-only"; else if (!catDisputed && specDisputed) disputeType = "spec-only"; else if (catDisputed && specDisputed) disputeType = "both"; const para = paraById.get(pid); allParas.push({ pid, cats, specs, catUnanimous: catU, specUnanimous: specU, majCat, majSpec, catDisputed, specDisputed, disputeType, wordCount: para?.wordCount ?? 0, }); } const disputed = allParas.filter(p => p.disputeType !== "none"); const catOnly = allParas.filter(p => p.disputeType === "cat-only"); const specOnly = allParas.filter(p => p.disputeType === "spec-only"); const bothDisputed = allParas.filter(p => p.disputeType === "both"); console.log("═══════════════════════════════════════════════════════════════════"); console.log(" DISPUTE CROSS-TABULATION ANALYSIS"); console.log("═══════════════════════════════════════════════════════════════════"); console.log(` Total paragraphs (3-annotator): ${allParas.length.toLocaleString()}`); console.log(` Disputed (not both-unanimous): ${disputed.length.toLocaleString()} (${pct(disputed.length, allParas.length)})`); console.log(` Cat-only: ${catOnly.length.toLocaleString()}`); console.log(` Spec-only: ${specOnly.length.toLocaleString()}`); console.log(` Both: ${bothDisputed.length.toLocaleString()}`); // ════════════════════════════════════════════════════════════════════════ // 1. CATEGORY x SPECIFICITY CROSS-TAB FOR DISPUTED PARAGRAPHS // ════════════════════════════════════════════════════════════════════════ console.log("\n\n══════════════════════════════════════════════════════════════"); console.log(" 1. CATEGORY x SPECIFICITY CROSS-TAB (disputed paragraphs)"); console.log("══════════════════════════════════════════════════════════════"); console.log(" Uses majority-vote labels for both axes.\n"); // Collect all categories from majority votes const catCounts = new Map(); for (const p of disputed) { if (p.majCat) catCounts.set(p.majCat, (catCounts.get(p.majCat) ?? 0) + 1); } const categories = [...catCounts.entries()].sort((a, b) => b[1] - a[1]).map(([c]) => c); const specLevels = [1, 2, 3, 4]; const specLabels = ["GenBoiler", "SectorAdpt", "FirmSpec", "QuantVerif"]; // Build the cross-tab const crossTab = new Map(); let noMajCat = 0, noMajSpec = 0; for (const p of disputed) { if (!p.majCat || p.majSpec === null) { if (!p.majCat) noMajCat++; if (p.majSpec === null) noMajSpec++; continue; } const key = `${p.majCat}|${p.majSpec}`; crossTab.set(key, (crossTab.get(key) ?? 0) + 1); } // Print matrix const colW = 12; const catW = 28; let header = "Category".padEnd(catW); for (let i = 0; i < specLevels.length; i++) { header += `${specLevels[i]}:${specLabels[i]}`.padStart(colW); } header += "Total".padStart(colW); console.log(` ${header}`); console.log(` ${"─".repeat(header.length)}`); for (const cat of categories) { let rowTotal = 0; const cells: string[] = []; for (const s of specLevels) { const v = crossTab.get(`${cat}|${s}`) ?? 0; rowTotal += v; cells.push(v.toString()); } let row = cat.padEnd(catW); for (let i = 0; i < cells.length; i++) { const v = parseInt(cells[i]); const rowPct = rowTotal > 0 ? ((v / rowTotal) * 100).toFixed(0) : "0"; row += `${v} (${rowPct}%)`.padStart(colW); } row += `${rowTotal}`.padStart(colW); console.log(` ${row}`); } console.log(`\n (${noMajCat} paragraphs had no majority category, ${noMajSpec} had no majority specificity)`); // ════════════════════════════════════════════════════════════════════════ // 2. DISPUTE TYPE BY CATEGORY // ════════════════════════════════════════════════════════════════════════ console.log("\n\n══════════════════════════════════════════════════════════════"); console.log(" 2. DISPUTE TYPE BY MAJORITY CATEGORY"); console.log("══════════════════════════════════════════════════════════════"); console.log(" For each majority category, % of disputes that are cat-only, spec-only, or both.\n"); // Group disputed paragraphs by majority category const disputeByCat = new Map(); for (const p of disputed) { const cat = p.majCat ?? "[no majority]"; if (!disputeByCat.has(cat)) disputeByCat.set(cat, { catOnly: 0, specOnly: 0, both: 0 }); const entry = disputeByCat.get(cat)!; if (p.disputeType === "cat-only") entry.catOnly++; else if (p.disputeType === "spec-only") entry.specOnly++; else if (p.disputeType === "both") entry.both++; } const dHeader = "Category".padEnd(catW) + "n".padStart(8) + "Cat-only".padStart(12) + "Spec-only".padStart(12) + "Both".padStart(12); console.log(` ${dHeader}`); console.log(` ${"─".repeat(dHeader.length)}`); const sortedDispCats = [...disputeByCat.entries()].sort((a, b) => { const totalA = a[1].catOnly + a[1].specOnly + a[1].both; const totalB = b[1].catOnly + b[1].specOnly + b[1].both; return totalB - totalA; }); for (const [cat, d] of sortedDispCats) { const total = d.catOnly + d.specOnly + d.both; const row = cat.padEnd(catW) + total.toString().padStart(8) + `${d.catOnly} (${pct(d.catOnly, total)})`.padStart(12) + `${d.specOnly} (${pct(d.specOnly, total)})`.padStart(12) + `${d.both} (${pct(d.both, total)})`.padStart(12); console.log(` ${row}`); } // ════════════════════════════════════════════════════════════════════════ // 3. SPEC BOUNDARY DISPUTES BY CATEGORY // ════════════════════════════════════════════════════════════════════════ console.log("\n\n══════════════════════════════════════════════════════════════"); console.log(" 3. SPECIFICITY BOUNDARY DISPUTES BY CATEGORY"); console.log("══════════════════════════════════════════════════════════════"); console.log(" For spec-disputed paragraphs, the spec vote pattern by majority category.\n"); // Group by majority category, then count spec patterns const specPatternByCat = new Map>(); const specDisputed = allParas.filter(p => p.specDisputed); for (const p of specDisputed) { const cat = p.majCat ?? "[no majority]"; if (!specPatternByCat.has(cat)) specPatternByCat.set(cat, new Map()); const patternMap = specPatternByCat.get(cat)!; // Show the unique values sorted as the boundary pattern const uniqSorted = [...new Set(p.specs)].sort((a, b) => a - b); const pattern = `[${uniqSorted.join(",")}]`; patternMap.set(pattern, (patternMap.get(pattern) ?? 0) + 1); } // Collect all patterns const allPatterns = new Set(); for (const pm of specPatternByCat.values()) { for (const pat of pm.keys()) allPatterns.add(pat); } const sortedPatterns = [...allPatterns].sort(); // Print header const patW = 10; let pHeader = "Category".padEnd(catW) + "n".padStart(6); for (const pat of sortedPatterns) { pHeader += pat.padStart(patW); } console.log(` ${pHeader}`); console.log(` ${"─".repeat(pHeader.length)}`); const specPatCats = [...specPatternByCat.entries()].sort((a, b) => { let totalA = 0, totalB = 0; for (const v of a[1].values()) totalA += v; for (const v of b[1].values()) totalB += v; return totalB - totalA; }); for (const [cat, pm] of specPatCats) { let total = 0; for (const v of pm.values()) total += v; let row = cat.padEnd(catW) + total.toString().padStart(6); for (const pat of sortedPatterns) { const v = pm.get(pat) ?? 0; if (v === 0) { row += "-".padStart(patW); } else { row += `${v}`.padStart(patW); } } console.log(` ${row}`); } // Also show with percentages within each category console.log("\n (Row percentages:)"); let pHeader2 = "Category".padEnd(catW) + "n".padStart(6); for (const pat of sortedPatterns) { pHeader2 += pat.padStart(patW); } console.log(` ${pHeader2}`); console.log(` ${"─".repeat(pHeader2.length)}`); for (const [cat, pm] of specPatCats) { let total = 0; for (const v of pm.values()) total += v; let row = cat.padEnd(catW) + total.toString().padStart(6); for (const pat of sortedPatterns) { const v = pm.get(pat) ?? 0; if (v === 0) { row += "-".padStart(patW); } else { row += `${((v / total) * 100).toFixed(0)}%`.padStart(patW); } } console.log(` ${row}`); } // ════════════════════════════════════════════════════════════════════════ // 4. WORD COUNT DISTRIBUTION BY DISPUTE TYPE // ════════════════════════════════════════════════════════════════════════ console.log("\n\n══════════════════════════════════════════════════════════════"); console.log(" 4. WORD COUNT DISTRIBUTION BY DISPUTE TYPE"); console.log("══════════════════════════════════════════════════════════════\n"); const groups: { label: string; paras: ParaInfo[] }[] = [ { label: "Unanimous (no dispute)", paras: allParas.filter(p => p.disputeType === "none") }, { label: "Cat-only dispute", paras: catOnly }, { label: "Spec-only dispute", paras: specOnly }, { label: "Both disputed", paras: bothDisputed }, ]; const wcHeader = "Dispute Type".padEnd(28) + "n".padStart(8) + "Median".padStart(10) + "P90".padStart(10) + "P10".padStart(10) + "Mean".padStart(10); console.log(` ${wcHeader}`); console.log(` ${"─".repeat(wcHeader.length)}`); for (const g of groups) { const wcs = g.paras.map(p => p.wordCount).filter(w => w > 0); if (wcs.length === 0) continue; const row = g.label.padEnd(28) + wcs.length.toString().padStart(8) + median(wcs).toFixed(0).padStart(10) + percentile(wcs, 90).toFixed(0).padStart(10) + percentile(wcs, 10).toFixed(0).padStart(10) + (wcs.reduce((a, b) => a + b, 0) / wcs.length).toFixed(0).padStart(10); console.log(` ${row}`); } // ════════════════════════════════════════════════════════════════════════ // 5. UNRESOLVED PARAGRAPH ANALYSIS (3-WAY SPLITS) // ════════════════════════════════════════════════════════════════════════ console.log("\n\n══════════════════════════════════════════════════════════════"); console.log(" 5. UNRESOLVED PARAGRAPH ANALYSIS (3-way category splits)"); console.log("══════════════════════════════════════════════════════════════\n"); const unresolved = allParas.filter(p => p.majCat === null); console.log(` Total unresolved paragraphs: ${unresolved.length.toLocaleString()}`); // Category representations in unresolved const unresolvedCatFreq = new Map(); for (const p of unresolved) { for (const c of p.cats) { unresolvedCatFreq.set(c, (unresolvedCatFreq.get(c) ?? 0) + 1); } } console.log("\n Categories appearing in unresolved paragraphs (annotation count):"); const sortedUnresCats = [...unresolvedCatFreq.entries()].sort((a, b) => b[1] - a[1]); for (const [cat, count] of sortedUnresCats) { console.log(` ${count.toString().padStart(6)} ${cat}`); } // Specificity in unresolved const unresolvedSpecFreq = new Map(); for (const p of unresolved) { for (const s of p.specs) { unresolvedSpecFreq.set(s, (unresolvedSpecFreq.get(s) ?? 0) + 1); } } console.log("\n Specificity levels in unresolved paragraphs (annotation count):"); for (let s = 1; s <= 4; s++) { const count = unresolvedSpecFreq.get(s) ?? 0; console.log(` ${count.toString().padStart(6)} ${s}`); } // Most common 3-way category splits const threewayPatterns = new Map(); for (const p of unresolved) { const sorted = [...p.cats].sort(); const key = sorted.join(" / "); threewayPatterns.set(key, (threewayPatterns.get(key) ?? 0) + 1); } console.log("\n Most common 3-way category splits:"); const sortedThreeWay = [...threewayPatterns.entries()].sort((a, b) => b[1] - a[1]); for (const [pattern, count] of sortedThreeWay.slice(0, 20)) { console.log(` ${count.toString().padStart(6)} ${pattern}`); } if (sortedThreeWay.length > 20) { console.log(` ... and ${sortedThreeWay.length - 20} more patterns`); } // Specificity agreement among unresolved const unresolvedSpecUnanimous = unresolved.filter(p => p.specUnanimous).length; const unresolvedSpecMaj = unresolved.filter(p => p.majSpec !== null).length; console.log(`\n Specificity agreement among unresolved:`); console.log(` Spec unanimous: ${unresolvedSpecUnanimous} (${pct(unresolvedSpecUnanimous, unresolved.length)})`); console.log(` Spec majority: ${unresolvedSpecMaj} (${pct(unresolvedSpecMaj, unresolved.length)})`); console.log(` Spec 3-way: ${unresolved.length - unresolvedSpecMaj} (${pct(unresolved.length - unresolvedSpecMaj, unresolved.length)})`); // ════════════════════════════════════════════════════════════════════════ // 6. "BOTH" DISPUTES — COMBINED PATTERNS // ════════════════════════════════════════════════════════════════════════ console.log("\n\n══════════════════════════════════════════════════════════════"); console.log(" 6. 'BOTH' DISPUTES — COMBINED CATEGORY + SPECIFICITY PATTERNS"); console.log("══════════════════════════════════════════════════════════════\n"); console.log(` Total paragraphs with both cat AND spec disputed: ${bothDisputed.length.toLocaleString()}\n`); // For each, compute the category dispute pair + spec boundary const combinedPatterns = new Map(); for (const p of bothDisputed) { // Category dispute description const catUniq = uniqueSorted(p.cats); let catPart: string; if (catUniq.length === 2) { // 2-1 split: show as "A<->B" catPart = `${catUniq[0]}↔${catUniq[1]}`; } else { // 3-way catPart = catUniq.join("/"); } // Spec dispute description const specUniq = [...new Set(p.specs)].sort((a, b) => a - b); const specPart = `[${specUniq.join(",")}]`; const combined = `${catPart} + ${specPart}`; combinedPatterns.set(combined, (combinedPatterns.get(combined) ?? 0) + 1); } const sortedCombined = [...combinedPatterns.entries()].sort((a, b) => b[1] - a[1]); console.log(" Top 30 combined dispute patterns:"); for (const [pattern, count] of sortedCombined.slice(0, 30)) { console.log(` ${count.toString().padStart(6)} ${pattern}`); } if (sortedCombined.length > 30) { console.log(`\n ... and ${sortedCombined.length - 30} more patterns (${sortedCombined.slice(30).reduce((a, b) => a + b[1], 0)} paragraphs)`); } // Also summarize by just the category pair (aggregating across spec patterns) console.log("\n Category dispute pairs (aggregated across spec patterns):"); const catPairAgg = new Map(); for (const p of bothDisputed) { const catUniq = uniqueSorted(p.cats); let catPart: string; if (catUniq.length === 2) { catPart = `${catUniq[0]}↔${catUniq[1]}`; } else { catPart = catUniq.join("/"); } catPairAgg.set(catPart, (catPairAgg.get(catPart) ?? 0) + 1); } const sortedCatPairs = [...catPairAgg.entries()].sort((a, b) => b[1] - a[1]); for (const [pair, count] of sortedCatPairs.slice(0, 20)) { console.log(` ${count.toString().padStart(6)} ${pair}`); } console.log("\n Spec boundary patterns within 'both' disputes:"); const specPatAgg = new Map(); for (const p of bothDisputed) { const specUniq = [...new Set(p.specs)].sort((a, b) => a - b); const specPart = `[${specUniq.join(",")}]`; specPatAgg.set(specPart, (specPatAgg.get(specPart) ?? 0) + 1); } const sortedSpecPats = [...specPatAgg.entries()].sort((a, b) => b[1] - a[1]); for (const [pat, count] of sortedSpecPats) { console.log(` ${count.toString().padStart(6)} ${pat} (${pct(count, bothDisputed.length)})`); } console.log("\n═══════════════════════════════════════════════════════════════════"); console.log(" ANALYSIS COMPLETE"); console.log("═══════════════════════════════════════════════════════════════════"); } main().catch(err => { console.error(err); process.exit(1); });