/** * Generate all C(n, k) combinations of elements from `arr`. */ function combinations(arr: T[], k: number): T[][] { if (k === 0) return [[]]; if (k > arr.length) return []; const results: T[][] = []; for (let i = 0; i <= arr.length - k; i++) { const rest = combinations(arr.slice(i + 1), k - 1); for (const combo of rest) { results.push([arr[i], ...combo]); } } return results; } /** * Shuffle an array in place using Fisher-Yates. */ function shuffle(arr: T[]): T[] { for (let i = arr.length - 1; i > 0; i--) { const j = Math.floor(Math.random() * (i + 1)); [arr[i], arr[j]] = [arr[j], arr[i]]; } return arr; } export interface Assignment { paragraphId: string; annotatorId: string; } /** * Generate BIBD assignments: each paragraph gets exactly `perParagraph` annotators, * distributed evenly across all C(n, perParagraph) annotator triples. * * With 6 annotators and perParagraph=3: * - C(6,3) = 20 unique triples * - Each triple gets floor(1200/20) = 60 paragraphs * - Each annotator appears in C(5,2) = 10 triples -> 600 paragraphs each */ export function generateAssignments( paragraphIds: string[], annotatorIds: string[], perParagraph: number, ): Assignment[] { const triples = combinations(annotatorIds, perParagraph); const shuffled = shuffle([...paragraphIds]); const perTriple = Math.floor(shuffled.length / triples.length); const remainder = shuffled.length % triples.length; const assignments: Assignment[] = []; let offset = 0; for (let t = 0; t < triples.length; t++) { // Distribute remainder paragraphs to the first `remainder` triples const count = perTriple + (t < remainder ? 1 : 0); const triple = triples[t]; for (let i = 0; i < count; i++) { const paragraphId = shuffled[offset + i]; for (const annotatorId of triple) { assignments.push({ paragraphId, annotatorId }); } } offset += count; } return assignments; } /** * Print summary statistics for assignments. */ export function printAssignmentStats( assignments: Assignment[], annotatorIds: string[], ): void { // Per-annotator counts const perAnnotator = new Map(); for (const a of assignments) { perAnnotator.set(a.annotatorId, (perAnnotator.get(a.annotatorId) ?? 0) + 1); } console.log("\nPer-annotator assignment counts:"); for (const id of annotatorIds) { console.log(` ${id}: ${perAnnotator.get(id) ?? 0}`); } // Pairwise overlap: how many paragraphs each pair shares const paragraphAnnotators = new Map>(); for (const a of assignments) { const s = paragraphAnnotators.get(a.paragraphId); if (s) { s.add(a.annotatorId); } else { paragraphAnnotators.set(a.paragraphId, new Set([a.annotatorId])); } } const pairCounts = new Map(); for (const annotators of paragraphAnnotators.values()) { const arr = [...annotators]; for (let i = 0; i < arr.length; i++) { for (let j = i + 1; j < arr.length; j++) { const key = [arr[i], arr[j]].sort().join("|"); pairCounts.set(key, (pairCounts.get(key) ?? 0) + 1); } } } console.log("\nPairwise overlap (paragraphs shared):"); const pairs = [...pairCounts.entries()].sort((a, b) => a[0].localeCompare(b[0]), ); for (const [pair, count] of pairs) { const [a, b] = pair.split("|"); console.log(` ${a} & ${b}: ${count}`); } // Unique paragraphs console.log(`\nTotal unique paragraphs: ${paragraphAnnotators.size}`); console.log(`Total assignment rows: ${assignments.length}`); }