/** * Comprehensive data dump from the labelapp database. * * Exports: * data/gold/human-labels-raw.jsonl — every individual label with timing * data/gold/paragraphs-holdout.jsonl — paragraph metadata for the 1,200 holdout * data/gold/annotators.json — annotator profiles + onboarding timestamps * data/gold/quiz-sessions.jsonl — all quiz attempts * data/gold/metrics.json — comprehensive IRR: per-dimension alpha/kappa, pairwise matrices, per-category, per-stratum */ process.env.DATABASE_URL ??= "postgresql://sec_cybert:sec_cybert@10.1.10.10:5432/sec_cybert"; import { writeFile, mkdir } from "node:fs/promises"; import { existsSync } from "node:fs"; import { db } from "../db"; import * as schema from "../db/schema"; import { cohensKappa, krippendorffsAlpha, agreementRate, perCategoryAgreement, } from "../lib/metrics"; const OUT_DIR = "/home/joey/Documents/sec-cyBERT/data/gold"; const CATEGORIES = [ "Board Governance", "Management Role", "Risk Management Process", "Third-Party Risk", "Incident Disclosure", "Strategy Integration", "None/Other", ]; function toJSONL(records: object[]): string { return records.map((r) => JSON.stringify(r)).join("\n") + "\n"; } async function main() { if (!existsSync(OUT_DIR)) await mkdir(OUT_DIR, { recursive: true }); // ── Load everything ── console.log("Loading all data from database..."); const [allLabels, allAnnotators, allParagraphs, allQuizSessions, allAdjudications] = await Promise.all([ db.select().from(schema.humanLabels), db.select().from(schema.annotators), db.select().from(schema.paragraphs), db.select().from(schema.quizSessions), db.select().from(schema.adjudications), ]); const annotatorIds = allAnnotators.map((a) => a.id).sort(); const annotatorNames = new Map(allAnnotators.map((a) => [a.id, a.displayName])); const labels = allLabels; console.log(` ${labels.length} human labels`); console.log(` ${allParagraphs.length} paragraphs`); console.log(` ${allAnnotators.length} annotators`); console.log(` ${allQuizSessions.length} quiz sessions`); console.log(` ${allAdjudications.length} adjudications`); // ── 1. Raw labels JSONL ── console.log("\nExporting raw labels..."); const rawLabels = labels.map((l) => ({ paragraphId: l.paragraphId, annotatorId: l.annotatorId, annotatorName: annotatorNames.get(l.annotatorId) ?? l.annotatorId, contentCategory: l.contentCategory, specificityLevel: l.specificityLevel, notes: l.notes, labeledAt: l.labeledAt?.toISOString() ?? null, sessionId: l.sessionId, durationMs: l.durationMs, activeMs: l.activeMs, })); await writeFile(`${OUT_DIR}/human-labels-raw.jsonl`, toJSONL(rawLabels)); console.log(` ${rawLabels.length} labels → human-labels-raw.jsonl`); // ── 2. Paragraph metadata JSONL ── console.log("\nExporting paragraph metadata..."); const paragraphRecords = allParagraphs.map((p) => ({ id: p.id, text: p.text, wordCount: p.wordCount, paragraphIndex: p.paragraphIndex, companyName: p.companyName, cik: p.cik, ticker: p.ticker, filingType: p.filingType, filingDate: p.filingDate, fiscalYear: p.fiscalYear, accessionNumber: p.accessionNumber, secItem: p.secItem, stage1Category: p.stage1Category, stage1Specificity: p.stage1Specificity, stage1Method: p.stage1Method, stage1Confidence: p.stage1Confidence, })); await writeFile(`${OUT_DIR}/paragraphs-holdout.jsonl`, toJSONL(paragraphRecords)); console.log(` ${paragraphRecords.length} paragraphs → paragraphs-holdout.jsonl`); // ── 3. Annotators JSON ── console.log("\nExporting annotator profiles..."); const annotatorProfiles = allAnnotators.map((a: { id: string; displayName: string; onboardedAt: Date | null }) => ({ id: a.id, displayName: a.displayName, onboardedAt: a.onboardedAt?.toISOString() ?? null, })); await writeFile(`${OUT_DIR}/annotators.json`, JSON.stringify(annotatorProfiles, null, 2)); console.log(` ${annotatorProfiles.length} annotators → annotators.json`); // ── 4. Quiz sessions JSONL ── console.log("\nExporting quiz sessions..."); const quizRecords = allQuizSessions.map((q) => ({ id: q.id, annotatorId: q.annotatorId, annotatorName: annotatorNames.get(q.annotatorId) ?? q.annotatorId, startedAt: q.startedAt?.toISOString() ?? null, completedAt: q.completedAt?.toISOString() ?? null, passed: q.passed, score: q.score, totalQuestions: q.totalQuestions, answers: q.answers, })); await writeFile(`${OUT_DIR}/quiz-sessions.jsonl`, toJSONL(quizRecords)); console.log(` ${quizRecords.length} quiz sessions → quiz-sessions.jsonl`); // ── 5. Comprehensive metrics ── console.log("\nComputing metrics..."); // Group labels by paragraph const byParagraph = new Map(); for (const label of labels) { const group = byParagraph.get(label.paragraphId); if (group) group.push(label); else byParagraph.set(label.paragraphId, [label]); } // Only paragraphs with 3+ labels const fullyLabeled = new Map(); for (const [pid, lbls] of byParagraph) { if (lbls.length >= 3) fullyLabeled.set(pid, lbls); } // Paragraphs with 2+ labels (for pairwise) const multiLabeled = new Map(); for (const [pid, lbls] of byParagraph) { if (lbls.length >= 2) multiLabeled.set(pid, lbls); } const multiLabeledParaIds = [...multiLabeled.keys()]; // ─── Per-annotator stats ─── const perAnnotatorStats = annotatorIds.map((aid) => { const myLabels = labels.filter((l) => l.annotatorId === aid); const activeTimes = myLabels .map((l) => l.activeMs) .filter((t): t is number => t !== null); const wallTimes = myLabels .map((l) => l.durationMs) .filter((t): t is number => t !== null); return { id: aid, name: annotatorNames.get(aid) ?? aid, labelCount: myLabels.length, medianActiveMs: activeTimes.length > 0 ? median(activeTimes) : null, meanActiveMs: activeTimes.length > 0 ? mean(activeTimes) : null, medianDurationMs: wallTimes.length > 0 ? median(wallTimes) : null, meanDurationMs: wallTimes.length > 0 ? mean(wallTimes) : null, totalActiveMs: activeTimes.length > 0 ? sum(activeTimes) : null, totalDurationMs: wallTimes.length > 0 ? sum(wallTimes) : null, labelsWithActiveTime: activeTimes.length, }; }); // ─── Category consensus ─── const categoryArrays: string[][] = []; for (const lbls of fullyLabeled.values()) { categoryArrays.push(lbls.map((l) => l.contentCategory)); } const categoryConsensusRate = agreementRate(categoryArrays); // ─── Specificity consensus ─── const specArrays: string[][] = []; for (const lbls of fullyLabeled.values()) { specArrays.push(lbls.map((l) => String(l.specificityLevel))); } const specConsensusRate = agreementRate(specArrays); // ─── Both consensus ─── const bothArrays: string[][] = []; for (const lbls of fullyLabeled.values()) { bothArrays.push( lbls.map((l) => `${l.contentCategory}|${l.specificityLevel}`), ); } const bothConsensusRate = agreementRate(bothArrays); // ─── Krippendorff's Alpha: category (nominal, use ordinal distance = 0/1) ─── // We encode categories as integers for alpha computation const catIndex = new Map(CATEGORIES.map((c, i) => [c, i + 1])); const categoryRatingsMatrix: (number | null)[][] = annotatorIds.map( (annotatorId) => multiLabeledParaIds.map((paraId) => { const label = multiLabeled .get(paraId) ?.find((l) => l.annotatorId === annotatorId); if (!label) return null; return catIndex.get(label.contentCategory) ?? null; }), ); // Krippendorff's alpha for category (note: using ordinal distance on nominal data // — this is conservative; nominal distance would give higher alpha) const categoryAlpha = annotatorIds.length >= 2 && multiLabeledParaIds.length > 0 ? krippendorffsAlpha(categoryRatingsMatrix) : 0; // ─── Krippendorff's Alpha: specificity (ordinal) ─── const specRatingsMatrix: (number | null)[][] = annotatorIds.map( (annotatorId) => multiLabeledParaIds.map((paraId) => { const label = multiLabeled .get(paraId) ?.find((l) => l.annotatorId === annotatorId); return label?.specificityLevel ?? null; }), ); const specAlpha = annotatorIds.length >= 2 && multiLabeledParaIds.length > 0 ? krippendorffsAlpha(specRatingsMatrix) : 0; // ─── Pairwise Cohen's Kappa — category ─── const kappaCategory: number[][] = Array.from( { length: annotatorIds.length }, () => new Array(annotatorIds.length).fill(0), ); const kappaCatDetails: { a1: string; a2: string; kappa: number; n: number; }[] = []; for (let i = 0; i < annotatorIds.length; i++) { kappaCategory[i][i] = 1; for (let j = i + 1; j < annotatorIds.length; j++) { const a1 = annotatorIds[i]; const a2 = annotatorIds[j]; const shared1: string[] = []; const shared2: string[] = []; for (const [, lbls] of multiLabeled) { const l1 = lbls.find((l) => l.annotatorId === a1); const l2 = lbls.find((l) => l.annotatorId === a2); if (l1 && l2) { shared1.push(l1.contentCategory); shared2.push(l2.contentCategory); } } if (shared1.length >= 2) { const kappa = cohensKappa(shared1, shared2); kappaCategory[i][j] = kappa; kappaCategory[j][i] = kappa; kappaCatDetails.push({ a1: annotatorNames.get(a1) ?? a1, a2: annotatorNames.get(a2) ?? a2, kappa, n: shared1.length, }); } } } // ─── Pairwise Cohen's Kappa — specificity ─── const kappaSpec: number[][] = Array.from( { length: annotatorIds.length }, () => new Array(annotatorIds.length).fill(0), ); const kappaSpecDetails: { a1: string; a2: string; kappa: number; n: number; }[] = []; for (let i = 0; i < annotatorIds.length; i++) { kappaSpec[i][i] = 1; for (let j = i + 1; j < annotatorIds.length; j++) { const a1 = annotatorIds[i]; const a2 = annotatorIds[j]; const shared1: string[] = []; const shared2: string[] = []; for (const [, lbls] of multiLabeled) { const l1 = lbls.find((l) => l.annotatorId === a1); const l2 = lbls.find((l) => l.annotatorId === a2); if (l1 && l2) { shared1.push(String(l1.specificityLevel)); shared2.push(String(l2.specificityLevel)); } } if (shared1.length >= 2) { const kappa = cohensKappa(shared1, shared2); kappaSpec[i][j] = kappa; kappaSpec[j][i] = kappa; kappaSpecDetails.push({ a1: annotatorNames.get(a1) ?? a1, a2: annotatorNames.get(a2) ?? a2, kappa, n: shared1.length, }); } } } // ─── Per-category agreement ─── const perCategory = perCategoryAgreement( labels.map((l) => ({ category: l.contentCategory, annotatorId: l.annotatorId, paragraphId: l.paragraphId, })), CATEGORIES, ); // ─── Per-stratum agreement (using stage1 data to identify strata) ─── const paragraphMeta = new Map(allParagraphs.map((p) => [p.id, p])); // Classify each paragraph's stratum based on stage1 data function classifyStratum(pid: string): string { const para = paragraphMeta.get(pid); if (!para) return "unknown"; const method = para.stage1Method; const cat = para.stage1Category; const spec = para.stage1Specificity; // Check if it was a disputed paragraph based on method if (method === "unresolved") return "unresolved"; if (method === "majority") { // Try to identify the dispute type from the category if (cat === "Management Role" || cat === "Risk Management Process") return "mgmt_rmp_split"; if (cat === "None/Other" || cat === "Strategy Integration") return "noneother_strategy_split"; if (cat === "Board Governance") return "board_mgmt_split"; if (spec === 3 || spec === 4) return "spec_34_split"; return "majority_other"; } if (method === "unanimous") return "unanimous"; return "proportional_random"; } const strataAgreement: Record = {}; for (const [pid, lbls] of fullyLabeled) { const stratum = classifyStratum(pid); if (!strataAgreement[stratum]) { strataAgreement[stratum] = { total: 0, agreed: 0 }; } strataAgreement[stratum].total++; const allSameCat = lbls.every( (l) => l.contentCategory === lbls[0].contentCategory, ); const allSameSpec = lbls.every( (l) => l.specificityLevel === lbls[0].specificityLevel, ); if (allSameCat && allSameSpec) strataAgreement[stratum].agreed++; } const strataRates: Record = {}; for (const [stratum, data] of Object.entries(strataAgreement)) { strataRates[stratum] = { ...data, rate: data.total > 0 ? data.agreed / data.total : 0, }; } // ─── Timing summary ─── const allActiveTimes = labels .map((l) => l.activeMs) .filter((t): t is number => t !== null); const allWallTimes = labels .map((l) => l.durationMs) .filter((t): t is number => t !== null); // ─── Category distribution ─── const categoryDist: Record = {}; for (const cat of CATEGORIES) categoryDist[cat] = 0; for (const l of labels) { categoryDist[l.contentCategory] = (categoryDist[l.contentCategory] ?? 0) + 1; } // ─── Specificity distribution ─── const specDist: Record = { "1": 0, "2": 0, "3": 0, "4": 0 }; for (const l of labels) { specDist[String(l.specificityLevel)] = (specDist[String(l.specificityLevel)] ?? 0) + 1; } // ─── Majority label distribution (for fully-labeled paragraphs) ─── const majorityCategories: Record = {}; for (const cat of CATEGORIES) majorityCategories[cat] = 0; for (const lbls of fullyLabeled.values()) { const catCounts = new Map(); for (const l of lbls) { catCounts.set(l.contentCategory, (catCounts.get(l.contentCategory) ?? 0) + 1); } let maxCount = 0; let majorCat = ""; for (const [cat, count] of catCounts) { if (count > maxCount) { maxCount = count; majorCat = cat; } } if (majorCat) majorityCategories[majorCat]++; } const metrics = { summary: { totalLabels: labels.length, totalParagraphs: allParagraphs.length, fullyLabeledParagraphs: fullyLabeled.size, adjudicatedParagraphs: allAdjudications.length, annotatorCount: annotatorIds.length, }, consensus: { categoryOnly: round(categoryConsensusRate, 4), specificityOnly: round(specConsensusRate, 4), both: round(bothConsensusRate, 4), }, krippendorffsAlpha: { category: round(categoryAlpha, 4), specificity: round(specAlpha, 4), note: "Category alpha uses ordinal distance on nominal data (conservative). Specificity alpha uses ordinal distance.", }, pairwiseKappa: { category: { annotators: annotatorIds.map((id) => annotatorNames.get(id) ?? id), matrix: kappaCategory.map((row) => row.map((v) => round(v, 4))), pairs: kappaCatDetails.map((d) => ({ ...d, kappa: round(d.kappa, 4), })), mean: round( kappaCatDetails.length > 0 ? kappaCatDetails.reduce((s, d) => s + d.kappa, 0) / kappaCatDetails.length : 0, 4, ), }, specificity: { annotators: annotatorIds.map((id) => annotatorNames.get(id) ?? id), matrix: kappaSpec.map((row) => row.map((v) => round(v, 4))), pairs: kappaSpecDetails.map((d) => ({ ...d, kappa: round(d.kappa, 4), })), mean: round( kappaSpecDetails.length > 0 ? kappaSpecDetails.reduce((s, d) => s + d.kappa, 0) / kappaSpecDetails.length : 0, 4, ), }, }, perCategoryAgreement: Object.fromEntries( Object.entries(perCategory).map(([k, v]) => [k, round(v, 4)]), ), perStratumAgreement: strataRates, distributions: { categoryLabels: categoryDist, specificityLabels: specDist, majorityCategories, }, timing: { overallMedianActiveMs: allActiveTimes.length > 0 ? median(allActiveTimes) : null, overallMeanActiveMs: allActiveTimes.length > 0 ? round(mean(allActiveTimes), 0) : null, overallMedianDurationMs: allWallTimes.length > 0 ? median(allWallTimes) : null, overallMeanDurationMs: allWallTimes.length > 0 ? round(mean(allWallTimes), 0) : null, totalActiveHours: allActiveTimes.length > 0 ? round(sum(allActiveTimes) / 3_600_000, 2) : null, totalWallHours: allWallTimes.length > 0 ? round(sum(allWallTimes) / 3_600_000, 2) : null, labelsWithActiveTime: allActiveTimes.length, labelsWithoutActiveTime: labels.length - allActiveTimes.length, }, perAnnotator: perAnnotatorStats, }; await writeFile(`${OUT_DIR}/metrics.json`, JSON.stringify(metrics, null, 2)); console.log(` metrics → metrics.json`); // ── Print summary to console ── console.log("\n" + "=".repeat(60)); console.log("HUMAN LABELING SUMMARY"); console.log("=".repeat(60)); console.log(`\nParagraphs: ${fullyLabeled.size} fully labeled / ${allParagraphs.length} total`); console.log(`Labels: ${labels.length} total`); console.log(`\n── Consensus Rates (3/3 agree) ──`); console.log(` Category only: ${(categoryConsensusRate * 100).toFixed(1)}%`); console.log(` Specificity only: ${(specConsensusRate * 100).toFixed(1)}%`); console.log(` Both: ${(bothConsensusRate * 100).toFixed(1)}%`); console.log(`\n── Krippendorff's Alpha ──`); console.log(` Category: ${categoryAlpha.toFixed(4)}`); console.log(` Specificity: ${specAlpha.toFixed(4)}`); console.log(`\n── Pairwise Kappa (category) ──`); console.log(` Mean: ${metrics.pairwiseKappa.category.mean}`); for (const pair of kappaCatDetails) { console.log(` ${pair.a1} × ${pair.a2}: ${pair.kappa.toFixed(4)} (n=${pair.n})`); } console.log(`\n── Pairwise Kappa (specificity) ──`); console.log(` Mean: ${metrics.pairwiseKappa.specificity.mean}`); for (const pair of kappaSpecDetails) { console.log(` ${pair.a1} × ${pair.a2}: ${pair.kappa.toFixed(4)} (n=${pair.n})`); } console.log(`\n── Per-Category Agreement ──`); for (const [cat, rate] of Object.entries(perCategory)) { console.log(` ${cat}: ${(rate * 100).toFixed(1)}%`); } console.log(`\n── Per-Stratum Agreement ──`); for (const [stratum, data] of Object.entries(strataRates)) { console.log( ` ${stratum}: ${(data.rate * 100).toFixed(1)}% (${data.agreed}/${data.total})`, ); } console.log(`\n── Timing ──`); if (allActiveTimes.length > 0) { console.log(` Median active time: ${(median(allActiveTimes) / 1000).toFixed(1)}s`); console.log(` Mean active time: ${(mean(allActiveTimes) / 1000).toFixed(1)}s`); console.log(` Total active hours: ${(sum(allActiveTimes) / 3_600_000).toFixed(2)}h`); console.log(` Total wall hours: ${(sum(allWallTimes) / 3_600_000).toFixed(2)}h`); } console.log(` Labels with active timer: ${allActiveTimes.length}/${labels.length}`); console.log(`\n── Per-Annotator ──`); for (const a of perAnnotatorStats) { const activeH = a.totalActiveMs ? (a.totalActiveMs / 3_600_000).toFixed(2) : "N/A"; const medSec = a.medianActiveMs ? (a.medianActiveMs / 1000).toFixed(1) : "N/A"; console.log( ` ${a.name}: ${a.labelCount} labels, median ${medSec}s active, ${activeH}h total`, ); } console.log(`\n${"=".repeat(60)}`); console.log(`All data exported to ${OUT_DIR}/`); console.log("=".repeat(60)); process.exit(0); } function median(arr: number[]): number { const sorted = [...arr].sort((a, b) => a - b); const mid = Math.floor(sorted.length / 2); return sorted.length % 2 !== 0 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2; } function mean(arr: number[]): number { return arr.reduce((s, v) => s + v, 0) / arr.length; } function sum(arr: number[]): number { return arr.reduce((s, v) => s + v, 0); } function round(n: number, decimals: number): number { const factor = 10 ** decimals; return Math.round(n * factor) / factor; } main().catch((err) => { console.error("Dump failed:", err); process.exit(1); });