126 lines
3.6 KiB
TypeScript
126 lines
3.6 KiB
TypeScript
/**
|
|
* Generate all C(n, k) combinations of elements from `arr`.
|
|
*/
|
|
function combinations<T>(arr: T[], k: number): T[][] {
|
|
if (k === 0) return [[]];
|
|
if (k > arr.length) return [];
|
|
const results: T[][] = [];
|
|
for (let i = 0; i <= arr.length - k; i++) {
|
|
const rest = combinations(arr.slice(i + 1), k - 1);
|
|
for (const combo of rest) {
|
|
results.push([arr[i], ...combo]);
|
|
}
|
|
}
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Shuffle an array in place using Fisher-Yates.
|
|
*/
|
|
function shuffle<T>(arr: T[]): T[] {
|
|
for (let i = arr.length - 1; i > 0; i--) {
|
|
const j = Math.floor(Math.random() * (i + 1));
|
|
[arr[i], arr[j]] = [arr[j], arr[i]];
|
|
}
|
|
return arr;
|
|
}
|
|
|
|
export interface Assignment {
|
|
paragraphId: string;
|
|
annotatorId: string;
|
|
}
|
|
|
|
/**
|
|
* Generate BIBD assignments: each paragraph gets exactly `perParagraph` annotators,
|
|
* distributed evenly across all C(n, perParagraph) annotator triples.
|
|
*
|
|
* With 6 annotators and perParagraph=3:
|
|
* - C(6,3) = 20 unique triples
|
|
* - Each triple gets floor(1200/20) = 60 paragraphs
|
|
* - Each annotator appears in C(5,2) = 10 triples -> 600 paragraphs each
|
|
*/
|
|
export function generateAssignments(
|
|
paragraphIds: string[],
|
|
annotatorIds: string[],
|
|
perParagraph: number,
|
|
): Assignment[] {
|
|
const triples = combinations(annotatorIds, perParagraph);
|
|
const shuffled = shuffle([...paragraphIds]);
|
|
const perTriple = Math.floor(shuffled.length / triples.length);
|
|
const remainder = shuffled.length % triples.length;
|
|
|
|
const assignments: Assignment[] = [];
|
|
let offset = 0;
|
|
|
|
for (let t = 0; t < triples.length; t++) {
|
|
// Distribute remainder paragraphs to the first `remainder` triples
|
|
const count = perTriple + (t < remainder ? 1 : 0);
|
|
const triple = triples[t];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const paragraphId = shuffled[offset + i];
|
|
for (const annotatorId of triple) {
|
|
assignments.push({ paragraphId, annotatorId });
|
|
}
|
|
}
|
|
|
|
offset += count;
|
|
}
|
|
|
|
return assignments;
|
|
}
|
|
|
|
/**
|
|
* Print summary statistics for assignments.
|
|
*/
|
|
export function printAssignmentStats(
|
|
assignments: Assignment[],
|
|
annotatorIds: string[],
|
|
): void {
|
|
// Per-annotator counts
|
|
const perAnnotator = new Map<string, number>();
|
|
for (const a of assignments) {
|
|
perAnnotator.set(a.annotatorId, (perAnnotator.get(a.annotatorId) ?? 0) + 1);
|
|
}
|
|
|
|
console.log("\nPer-annotator assignment counts:");
|
|
for (const id of annotatorIds) {
|
|
console.log(` ${id}: ${perAnnotator.get(id) ?? 0}`);
|
|
}
|
|
|
|
// Pairwise overlap: how many paragraphs each pair shares
|
|
const paragraphAnnotators = new Map<string, Set<string>>();
|
|
for (const a of assignments) {
|
|
const s = paragraphAnnotators.get(a.paragraphId);
|
|
if (s) {
|
|
s.add(a.annotatorId);
|
|
} else {
|
|
paragraphAnnotators.set(a.paragraphId, new Set([a.annotatorId]));
|
|
}
|
|
}
|
|
|
|
const pairCounts = new Map<string, number>();
|
|
for (const annotators of paragraphAnnotators.values()) {
|
|
const arr = [...annotators];
|
|
for (let i = 0; i < arr.length; i++) {
|
|
for (let j = i + 1; j < arr.length; j++) {
|
|
const key = [arr[i], arr[j]].sort().join("|");
|
|
pairCounts.set(key, (pairCounts.get(key) ?? 0) + 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log("\nPairwise overlap (paragraphs shared):");
|
|
const pairs = [...pairCounts.entries()].sort((a, b) =>
|
|
a[0].localeCompare(b[0]),
|
|
);
|
|
for (const [pair, count] of pairs) {
|
|
const [a, b] = pair.split("|");
|
|
console.log(` ${a} & ${b}: ${count}`);
|
|
}
|
|
|
|
// Unique paragraphs
|
|
console.log(`\nTotal unique paragraphs: ${paragraphAnnotators.size}`);
|
|
console.log(`Total assignment rows: ${assignments.length}`);
|
|
}
|