502 lines
24 KiB
TypeScript
502 lines
24 KiB
TypeScript
/**
|
|
* Detailed cross-tabulations for disputed (non-unanimous) paragraphs.
|
|
*
|
|
* Usage: bun ts/scripts/dispute-crosstab.ts
|
|
*/
|
|
import { readJsonlRaw, readJsonl } from "../src/lib/jsonl.ts";
|
|
import { Paragraph } from "@sec-cybert/schemas/paragraph.ts";
|
|
|
|
const ANN_PATH = new URL("../../data/annotations/stage1.jsonl", import.meta.url).pathname;
|
|
const PARA_PATH = new URL("../../data/paragraphs/paragraphs-clean.jsonl", import.meta.url).pathname;
|
|
|
|
interface Ann {
|
|
paragraphId: string;
|
|
label: {
|
|
content_category: string;
|
|
specificity_level: number;
|
|
category_confidence: string;
|
|
specificity_confidence: string;
|
|
reasoning: string;
|
|
};
|
|
provenance: {
|
|
modelId: string;
|
|
costUsd: number;
|
|
inputTokens: number;
|
|
outputTokens: number;
|
|
reasoningTokens: number;
|
|
latencyMs: number;
|
|
requestedAt: string;
|
|
};
|
|
}
|
|
|
|
// ── Helpers ────────────────────────────────────────────────────────────
|
|
function pct(n: number, total: number): string {
|
|
if (total === 0) return "0.0%";
|
|
return `${((n / total) * 100).toFixed(1)}%`;
|
|
}
|
|
|
|
function median(arr: number[]): number {
|
|
if (arr.length === 0) return 0;
|
|
const sorted = [...arr].sort((a, b) => a - b);
|
|
const mid = Math.floor(sorted.length / 2);
|
|
return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
|
|
}
|
|
|
|
function percentile(arr: number[], p: number): number {
|
|
if (arr.length === 0) return 0;
|
|
const sorted = [...arr].sort((a, b) => a - b);
|
|
const idx = (p / 100) * (sorted.length - 1);
|
|
const lo = Math.floor(idx);
|
|
const hi = Math.ceil(idx);
|
|
return lo === hi ? sorted[lo] : sorted[lo] + (sorted[hi] - sorted[lo]) * (idx - lo);
|
|
}
|
|
|
|
function majority<T>(arr: T[]): T | null {
|
|
const freq = new Map<T, number>();
|
|
for (const v of arr) freq.set(v, (freq.get(v) ?? 0) + 1);
|
|
for (const [val, count] of freq) {
|
|
if (count >= 2) return val;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function sortedVals(arr: number[]): string {
|
|
return `[${[...arr].sort((a, b) => a - b).join(",")}]`;
|
|
}
|
|
|
|
function uniqueSorted(arr: string[]): string[] {
|
|
return [...new Set(arr)].sort();
|
|
}
|
|
|
|
// ── Main ──────────────────────────────────────────────────────────────
|
|
async function main() {
|
|
console.log("Loading data...");
|
|
const [{ records: rawAnns, skipped: annSkipped }, { records: paragraphs, skipped: paraSkipped }] =
|
|
await Promise.all([
|
|
readJsonlRaw(ANN_PATH),
|
|
readJsonl(PARA_PATH, Paragraph),
|
|
]);
|
|
|
|
const anns = rawAnns as Ann[];
|
|
console.log(` ${anns.length.toLocaleString()} annotations (${annSkipped} skipped)`);
|
|
console.log(` ${paragraphs.length.toLocaleString()} paragraphs (${paraSkipped} skipped)\n`);
|
|
|
|
// Index paragraphs by id
|
|
const paraById = new Map(paragraphs.map(p => [p.id, p]));
|
|
|
|
// Group annotations by paragraph
|
|
const byParagraph = new Map<string, Ann[]>();
|
|
for (const a of anns) {
|
|
let arr = byParagraph.get(a.paragraphId);
|
|
if (!arr) { arr = []; byParagraph.set(a.paragraphId, arr); }
|
|
arr.push(a);
|
|
}
|
|
|
|
// Classify each paragraph
|
|
interface ParaInfo {
|
|
pid: string;
|
|
cats: string[];
|
|
specs: number[];
|
|
catUnanimous: boolean;
|
|
specUnanimous: boolean;
|
|
majCat: string | null;
|
|
majSpec: number | null;
|
|
catDisputed: boolean;
|
|
specDisputed: boolean;
|
|
disputeType: "none" | "cat-only" | "spec-only" | "both";
|
|
wordCount: number;
|
|
}
|
|
|
|
const allParas: ParaInfo[] = [];
|
|
for (const [pid, panns] of byParagraph) {
|
|
if (panns.length !== 3) continue;
|
|
const cats = panns.map(a => a.label.content_category);
|
|
const specs = panns.map(a => a.label.specificity_level);
|
|
const catU = new Set(cats).size === 1;
|
|
const specU = new Set(specs).size === 1;
|
|
const majCat = majority(cats);
|
|
const majSpec = majority(specs);
|
|
const catDisputed = !catU;
|
|
const specDisputed = !specU;
|
|
let disputeType: ParaInfo["disputeType"] = "none";
|
|
if (catDisputed && !specDisputed) disputeType = "cat-only";
|
|
else if (!catDisputed && specDisputed) disputeType = "spec-only";
|
|
else if (catDisputed && specDisputed) disputeType = "both";
|
|
|
|
const para = paraById.get(pid);
|
|
allParas.push({
|
|
pid,
|
|
cats,
|
|
specs,
|
|
catUnanimous: catU,
|
|
specUnanimous: specU,
|
|
majCat,
|
|
majSpec,
|
|
catDisputed,
|
|
specDisputed,
|
|
disputeType,
|
|
wordCount: para?.wordCount ?? 0,
|
|
});
|
|
}
|
|
|
|
const disputed = allParas.filter(p => p.disputeType !== "none");
|
|
const catOnly = allParas.filter(p => p.disputeType === "cat-only");
|
|
const specOnly = allParas.filter(p => p.disputeType === "spec-only");
|
|
const bothDisputed = allParas.filter(p => p.disputeType === "both");
|
|
|
|
console.log("═══════════════════════════════════════════════════════════════════");
|
|
console.log(" DISPUTE CROSS-TABULATION ANALYSIS");
|
|
console.log("═══════════════════════════════════════════════════════════════════");
|
|
console.log(` Total paragraphs (3-annotator): ${allParas.length.toLocaleString()}`);
|
|
console.log(` Disputed (not both-unanimous): ${disputed.length.toLocaleString()} (${pct(disputed.length, allParas.length)})`);
|
|
console.log(` Cat-only: ${catOnly.length.toLocaleString()}`);
|
|
console.log(` Spec-only: ${specOnly.length.toLocaleString()}`);
|
|
console.log(` Both: ${bothDisputed.length.toLocaleString()}`);
|
|
|
|
// ════════════════════════════════════════════════════════════════════════
|
|
// 1. CATEGORY x SPECIFICITY CROSS-TAB FOR DISPUTED PARAGRAPHS
|
|
// ════════════════════════════════════════════════════════════════════════
|
|
console.log("\n\n══════════════════════════════════════════════════════════════");
|
|
console.log(" 1. CATEGORY x SPECIFICITY CROSS-TAB (disputed paragraphs)");
|
|
console.log("══════════════════════════════════════════════════════════════");
|
|
console.log(" Uses majority-vote labels for both axes.\n");
|
|
|
|
// Collect all categories from majority votes
|
|
const catCounts = new Map<string, number>();
|
|
for (const p of disputed) {
|
|
if (p.majCat) catCounts.set(p.majCat, (catCounts.get(p.majCat) ?? 0) + 1);
|
|
}
|
|
const categories = [...catCounts.entries()].sort((a, b) => b[1] - a[1]).map(([c]) => c);
|
|
const specLevels = [1, 2, 3, 4];
|
|
const specLabels = ["GenBoiler", "SectorAdpt", "FirmSpec", "QuantVerif"];
|
|
|
|
// Build the cross-tab
|
|
const crossTab = new Map<string, number>();
|
|
let noMajCat = 0, noMajSpec = 0;
|
|
for (const p of disputed) {
|
|
if (!p.majCat || p.majSpec === null) {
|
|
if (!p.majCat) noMajCat++;
|
|
if (p.majSpec === null) noMajSpec++;
|
|
continue;
|
|
}
|
|
const key = `${p.majCat}|${p.majSpec}`;
|
|
crossTab.set(key, (crossTab.get(key) ?? 0) + 1);
|
|
}
|
|
|
|
// Print matrix
|
|
const colW = 12;
|
|
const catW = 28;
|
|
let header = "Category".padEnd(catW);
|
|
for (let i = 0; i < specLevels.length; i++) {
|
|
header += `${specLevels[i]}:${specLabels[i]}`.padStart(colW);
|
|
}
|
|
header += "Total".padStart(colW);
|
|
console.log(` ${header}`);
|
|
console.log(` ${"─".repeat(header.length)}`);
|
|
|
|
for (const cat of categories) {
|
|
let rowTotal = 0;
|
|
const cells: string[] = [];
|
|
for (const s of specLevels) {
|
|
const v = crossTab.get(`${cat}|${s}`) ?? 0;
|
|
rowTotal += v;
|
|
cells.push(v.toString());
|
|
}
|
|
let row = cat.padEnd(catW);
|
|
for (let i = 0; i < cells.length; i++) {
|
|
const v = parseInt(cells[i]);
|
|
const rowPct = rowTotal > 0 ? ((v / rowTotal) * 100).toFixed(0) : "0";
|
|
row += `${v} (${rowPct}%)`.padStart(colW);
|
|
}
|
|
row += `${rowTotal}`.padStart(colW);
|
|
console.log(` ${row}`);
|
|
}
|
|
console.log(`\n (${noMajCat} paragraphs had no majority category, ${noMajSpec} had no majority specificity)`);
|
|
|
|
// ════════════════════════════════════════════════════════════════════════
|
|
// 2. DISPUTE TYPE BY CATEGORY
|
|
// ════════════════════════════════════════════════════════════════════════
|
|
console.log("\n\n══════════════════════════════════════════════════════════════");
|
|
console.log(" 2. DISPUTE TYPE BY MAJORITY CATEGORY");
|
|
console.log("══════════════════════════════════════════════════════════════");
|
|
console.log(" For each majority category, % of disputes that are cat-only, spec-only, or both.\n");
|
|
|
|
// Group disputed paragraphs by majority category
|
|
const disputeByCat = new Map<string, { catOnly: number; specOnly: number; both: number }>();
|
|
for (const p of disputed) {
|
|
const cat = p.majCat ?? "[no majority]";
|
|
if (!disputeByCat.has(cat)) disputeByCat.set(cat, { catOnly: 0, specOnly: 0, both: 0 });
|
|
const entry = disputeByCat.get(cat)!;
|
|
if (p.disputeType === "cat-only") entry.catOnly++;
|
|
else if (p.disputeType === "spec-only") entry.specOnly++;
|
|
else if (p.disputeType === "both") entry.both++;
|
|
}
|
|
|
|
const dHeader = "Category".padEnd(catW) + "n".padStart(8) + "Cat-only".padStart(12) + "Spec-only".padStart(12) + "Both".padStart(12);
|
|
console.log(` ${dHeader}`);
|
|
console.log(` ${"─".repeat(dHeader.length)}`);
|
|
|
|
const sortedDispCats = [...disputeByCat.entries()].sort((a, b) => {
|
|
const totalA = a[1].catOnly + a[1].specOnly + a[1].both;
|
|
const totalB = b[1].catOnly + b[1].specOnly + b[1].both;
|
|
return totalB - totalA;
|
|
});
|
|
for (const [cat, d] of sortedDispCats) {
|
|
const total = d.catOnly + d.specOnly + d.both;
|
|
const row = cat.padEnd(catW) +
|
|
total.toString().padStart(8) +
|
|
`${d.catOnly} (${pct(d.catOnly, total)})`.padStart(12) +
|
|
`${d.specOnly} (${pct(d.specOnly, total)})`.padStart(12) +
|
|
`${d.both} (${pct(d.both, total)})`.padStart(12);
|
|
console.log(` ${row}`);
|
|
}
|
|
|
|
// ════════════════════════════════════════════════════════════════════════
|
|
// 3. SPEC BOUNDARY DISPUTES BY CATEGORY
|
|
// ════════════════════════════════════════════════════════════════════════
|
|
console.log("\n\n══════════════════════════════════════════════════════════════");
|
|
console.log(" 3. SPECIFICITY BOUNDARY DISPUTES BY CATEGORY");
|
|
console.log("══════════════════════════════════════════════════════════════");
|
|
console.log(" For spec-disputed paragraphs, the spec vote pattern by majority category.\n");
|
|
|
|
// Group by majority category, then count spec patterns
|
|
const specPatternByCat = new Map<string, Map<string, number>>();
|
|
const specDisputed = allParas.filter(p => p.specDisputed);
|
|
for (const p of specDisputed) {
|
|
const cat = p.majCat ?? "[no majority]";
|
|
if (!specPatternByCat.has(cat)) specPatternByCat.set(cat, new Map());
|
|
const patternMap = specPatternByCat.get(cat)!;
|
|
// Show the unique values sorted as the boundary pattern
|
|
const uniqSorted = [...new Set(p.specs)].sort((a, b) => a - b);
|
|
const pattern = `[${uniqSorted.join(",")}]`;
|
|
patternMap.set(pattern, (patternMap.get(pattern) ?? 0) + 1);
|
|
}
|
|
|
|
// Collect all patterns
|
|
const allPatterns = new Set<string>();
|
|
for (const pm of specPatternByCat.values()) {
|
|
for (const pat of pm.keys()) allPatterns.add(pat);
|
|
}
|
|
const sortedPatterns = [...allPatterns].sort();
|
|
|
|
// Print header
|
|
const patW = 10;
|
|
let pHeader = "Category".padEnd(catW) + "n".padStart(6);
|
|
for (const pat of sortedPatterns) {
|
|
pHeader += pat.padStart(patW);
|
|
}
|
|
console.log(` ${pHeader}`);
|
|
console.log(` ${"─".repeat(pHeader.length)}`);
|
|
|
|
const specPatCats = [...specPatternByCat.entries()].sort((a, b) => {
|
|
let totalA = 0, totalB = 0;
|
|
for (const v of a[1].values()) totalA += v;
|
|
for (const v of b[1].values()) totalB += v;
|
|
return totalB - totalA;
|
|
});
|
|
for (const [cat, pm] of specPatCats) {
|
|
let total = 0;
|
|
for (const v of pm.values()) total += v;
|
|
let row = cat.padEnd(catW) + total.toString().padStart(6);
|
|
for (const pat of sortedPatterns) {
|
|
const v = pm.get(pat) ?? 0;
|
|
if (v === 0) {
|
|
row += "-".padStart(patW);
|
|
} else {
|
|
row += `${v}`.padStart(patW);
|
|
}
|
|
}
|
|
console.log(` ${row}`);
|
|
}
|
|
|
|
// Also show with percentages within each category
|
|
console.log("\n (Row percentages:)");
|
|
let pHeader2 = "Category".padEnd(catW) + "n".padStart(6);
|
|
for (const pat of sortedPatterns) {
|
|
pHeader2 += pat.padStart(patW);
|
|
}
|
|
console.log(` ${pHeader2}`);
|
|
console.log(` ${"─".repeat(pHeader2.length)}`);
|
|
for (const [cat, pm] of specPatCats) {
|
|
let total = 0;
|
|
for (const v of pm.values()) total += v;
|
|
let row = cat.padEnd(catW) + total.toString().padStart(6);
|
|
for (const pat of sortedPatterns) {
|
|
const v = pm.get(pat) ?? 0;
|
|
if (v === 0) {
|
|
row += "-".padStart(patW);
|
|
} else {
|
|
row += `${((v / total) * 100).toFixed(0)}%`.padStart(patW);
|
|
}
|
|
}
|
|
console.log(` ${row}`);
|
|
}
|
|
|
|
// ════════════════════════════════════════════════════════════════════════
|
|
// 4. WORD COUNT DISTRIBUTION BY DISPUTE TYPE
|
|
// ════════════════════════════════════════════════════════════════════════
|
|
console.log("\n\n══════════════════════════════════════════════════════════════");
|
|
console.log(" 4. WORD COUNT DISTRIBUTION BY DISPUTE TYPE");
|
|
console.log("══════════════════════════════════════════════════════════════\n");
|
|
|
|
const groups: { label: string; paras: ParaInfo[] }[] = [
|
|
{ label: "Unanimous (no dispute)", paras: allParas.filter(p => p.disputeType === "none") },
|
|
{ label: "Cat-only dispute", paras: catOnly },
|
|
{ label: "Spec-only dispute", paras: specOnly },
|
|
{ label: "Both disputed", paras: bothDisputed },
|
|
];
|
|
|
|
const wcHeader = "Dispute Type".padEnd(28) + "n".padStart(8) + "Median".padStart(10) + "P90".padStart(10) + "P10".padStart(10) + "Mean".padStart(10);
|
|
console.log(` ${wcHeader}`);
|
|
console.log(` ${"─".repeat(wcHeader.length)}`);
|
|
|
|
for (const g of groups) {
|
|
const wcs = g.paras.map(p => p.wordCount).filter(w => w > 0);
|
|
if (wcs.length === 0) continue;
|
|
const row = g.label.padEnd(28) +
|
|
wcs.length.toString().padStart(8) +
|
|
median(wcs).toFixed(0).padStart(10) +
|
|
percentile(wcs, 90).toFixed(0).padStart(10) +
|
|
percentile(wcs, 10).toFixed(0).padStart(10) +
|
|
(wcs.reduce((a, b) => a + b, 0) / wcs.length).toFixed(0).padStart(10);
|
|
console.log(` ${row}`);
|
|
}
|
|
|
|
// ════════════════════════════════════════════════════════════════════════
|
|
// 5. UNRESOLVED PARAGRAPH ANALYSIS (3-WAY SPLITS)
|
|
// ════════════════════════════════════════════════════════════════════════
|
|
console.log("\n\n══════════════════════════════════════════════════════════════");
|
|
console.log(" 5. UNRESOLVED PARAGRAPH ANALYSIS (3-way category splits)");
|
|
console.log("══════════════════════════════════════════════════════════════\n");
|
|
|
|
const unresolved = allParas.filter(p => p.majCat === null);
|
|
console.log(` Total unresolved paragraphs: ${unresolved.length.toLocaleString()}`);
|
|
|
|
// Category representations in unresolved
|
|
const unresolvedCatFreq = new Map<string, number>();
|
|
for (const p of unresolved) {
|
|
for (const c of p.cats) {
|
|
unresolvedCatFreq.set(c, (unresolvedCatFreq.get(c) ?? 0) + 1);
|
|
}
|
|
}
|
|
console.log("\n Categories appearing in unresolved paragraphs (annotation count):");
|
|
const sortedUnresCats = [...unresolvedCatFreq.entries()].sort((a, b) => b[1] - a[1]);
|
|
for (const [cat, count] of sortedUnresCats) {
|
|
console.log(` ${count.toString().padStart(6)} ${cat}`);
|
|
}
|
|
|
|
// Specificity in unresolved
|
|
const unresolvedSpecFreq = new Map<number, number>();
|
|
for (const p of unresolved) {
|
|
for (const s of p.specs) {
|
|
unresolvedSpecFreq.set(s, (unresolvedSpecFreq.get(s) ?? 0) + 1);
|
|
}
|
|
}
|
|
console.log("\n Specificity levels in unresolved paragraphs (annotation count):");
|
|
for (let s = 1; s <= 4; s++) {
|
|
const count = unresolvedSpecFreq.get(s) ?? 0;
|
|
console.log(` ${count.toString().padStart(6)} ${s}`);
|
|
}
|
|
|
|
// Most common 3-way category splits
|
|
const threewayPatterns = new Map<string, number>();
|
|
for (const p of unresolved) {
|
|
const sorted = [...p.cats].sort();
|
|
const key = sorted.join(" / ");
|
|
threewayPatterns.set(key, (threewayPatterns.get(key) ?? 0) + 1);
|
|
}
|
|
|
|
console.log("\n Most common 3-way category splits:");
|
|
const sortedThreeWay = [...threewayPatterns.entries()].sort((a, b) => b[1] - a[1]);
|
|
for (const [pattern, count] of sortedThreeWay.slice(0, 20)) {
|
|
console.log(` ${count.toString().padStart(6)} ${pattern}`);
|
|
}
|
|
if (sortedThreeWay.length > 20) {
|
|
console.log(` ... and ${sortedThreeWay.length - 20} more patterns`);
|
|
}
|
|
|
|
// Specificity agreement among unresolved
|
|
const unresolvedSpecUnanimous = unresolved.filter(p => p.specUnanimous).length;
|
|
const unresolvedSpecMaj = unresolved.filter(p => p.majSpec !== null).length;
|
|
console.log(`\n Specificity agreement among unresolved:`);
|
|
console.log(` Spec unanimous: ${unresolvedSpecUnanimous} (${pct(unresolvedSpecUnanimous, unresolved.length)})`);
|
|
console.log(` Spec majority: ${unresolvedSpecMaj} (${pct(unresolvedSpecMaj, unresolved.length)})`);
|
|
console.log(` Spec 3-way: ${unresolved.length - unresolvedSpecMaj} (${pct(unresolved.length - unresolvedSpecMaj, unresolved.length)})`);
|
|
|
|
// ════════════════════════════════════════════════════════════════════════
|
|
// 6. "BOTH" DISPUTES — COMBINED PATTERNS
|
|
// ════════════════════════════════════════════════════════════════════════
|
|
console.log("\n\n══════════════════════════════════════════════════════════════");
|
|
console.log(" 6. 'BOTH' DISPUTES — COMBINED CATEGORY + SPECIFICITY PATTERNS");
|
|
console.log("══════════════════════════════════════════════════════════════\n");
|
|
|
|
console.log(` Total paragraphs with both cat AND spec disputed: ${bothDisputed.length.toLocaleString()}\n`);
|
|
|
|
// For each, compute the category dispute pair + spec boundary
|
|
const combinedPatterns = new Map<string, number>();
|
|
for (const p of bothDisputed) {
|
|
// Category dispute description
|
|
const catUniq = uniqueSorted(p.cats);
|
|
let catPart: string;
|
|
if (catUniq.length === 2) {
|
|
// 2-1 split: show as "A<->B"
|
|
catPart = `${catUniq[0]}↔${catUniq[1]}`;
|
|
} else {
|
|
// 3-way
|
|
catPart = catUniq.join("/");
|
|
}
|
|
|
|
// Spec dispute description
|
|
const specUniq = [...new Set(p.specs)].sort((a, b) => a - b);
|
|
const specPart = `[${specUniq.join(",")}]`;
|
|
|
|
const combined = `${catPart} + ${specPart}`;
|
|
combinedPatterns.set(combined, (combinedPatterns.get(combined) ?? 0) + 1);
|
|
}
|
|
|
|
const sortedCombined = [...combinedPatterns.entries()].sort((a, b) => b[1] - a[1]);
|
|
console.log(" Top 30 combined dispute patterns:");
|
|
for (const [pattern, count] of sortedCombined.slice(0, 30)) {
|
|
console.log(` ${count.toString().padStart(6)} ${pattern}`);
|
|
}
|
|
if (sortedCombined.length > 30) {
|
|
console.log(`\n ... and ${sortedCombined.length - 30} more patterns (${sortedCombined.slice(30).reduce((a, b) => a + b[1], 0)} paragraphs)`);
|
|
}
|
|
|
|
// Also summarize by just the category pair (aggregating across spec patterns)
|
|
console.log("\n Category dispute pairs (aggregated across spec patterns):");
|
|
const catPairAgg = new Map<string, number>();
|
|
for (const p of bothDisputed) {
|
|
const catUniq = uniqueSorted(p.cats);
|
|
let catPart: string;
|
|
if (catUniq.length === 2) {
|
|
catPart = `${catUniq[0]}↔${catUniq[1]}`;
|
|
} else {
|
|
catPart = catUniq.join("/");
|
|
}
|
|
catPairAgg.set(catPart, (catPairAgg.get(catPart) ?? 0) + 1);
|
|
}
|
|
const sortedCatPairs = [...catPairAgg.entries()].sort((a, b) => b[1] - a[1]);
|
|
for (const [pair, count] of sortedCatPairs.slice(0, 20)) {
|
|
console.log(` ${count.toString().padStart(6)} ${pair}`);
|
|
}
|
|
|
|
console.log("\n Spec boundary patterns within 'both' disputes:");
|
|
const specPatAgg = new Map<string, number>();
|
|
for (const p of bothDisputed) {
|
|
const specUniq = [...new Set(p.specs)].sort((a, b) => a - b);
|
|
const specPart = `[${specUniq.join(",")}]`;
|
|
specPatAgg.set(specPart, (specPatAgg.get(specPart) ?? 0) + 1);
|
|
}
|
|
const sortedSpecPats = [...specPatAgg.entries()].sort((a, b) => b[1] - a[1]);
|
|
for (const [pat, count] of sortedSpecPats) {
|
|
console.log(` ${count.toString().padStart(6)} ${pat} (${pct(count, bothDisputed.length)})`);
|
|
}
|
|
|
|
console.log("\n═══════════════════════════════════════════════════════════════════");
|
|
console.log(" ANALYSIS COMPLETE");
|
|
console.log("═══════════════════════════════════════════════════════════════════");
|
|
}
|
|
|
|
main().catch(err => { console.error(err); process.exit(1); });
|