process.env.DATABASE_URL ??= "postgresql://sec_cybert:sec_cybert@localhost:5432/sec_cybert"; import { db } from "../db"; import * as schema from "../db/schema"; import { generateAssignments, printAssignmentStats } from "../lib/assignment"; async function main() { // 1. Read all paragraph IDs from DB (the holdout IS the full set) console.log("Loading paragraph IDs from DB..."); const rows = await db .select({ id: schema.paragraphs.id }) .from(schema.paragraphs); const paragraphIds = rows.map((r) => r.id); console.log(` ${paragraphIds.length} paragraphs`); // 2. Read annotator IDs from DB (all annotators, including joey) console.log("Loading annotators..."); const annotators = await db .select({ id: schema.annotators.id }) .from(schema.annotators); const annotatorIds = annotators.map((a) => a.id).sort(); console.log(` ${annotatorIds.length} annotators: ${annotatorIds.join(", ")}`); // 3. Generate BIBD assignments console.log("Generating BIBD assignments..."); const assignments = generateAssignments(paragraphIds, annotatorIds, 3); // 4. Print stats before inserting printAssignmentStats(assignments, annotatorIds); // 5. Insert into DB in batches console.log("\nInserting assignments into DB..."); const BATCH_SIZE = 1000; for (let i = 0; i < assignments.length; i += BATCH_SIZE) { const batch = assignments.slice(i, i + BATCH_SIZE); await db .insert(schema.assignments) .values(batch) .onConflictDoNothing(); const progress = Math.min(i + BATCH_SIZE, assignments.length); console.log(` Inserted ${progress}/${assignments.length} assignments`); } console.log("Assignment complete."); process.exit(0); } main().catch((err) => { console.error("Assignment failed:", err); process.exit(1); });