process.env.DATABASE_URL ??= "postgresql://sec_cybert:sec_cybert@localhost:5432/sec_cybert"; import { readFile } from "node:fs/promises"; import { ne } from "drizzle-orm"; import { db } from "../db"; import * as schema from "../db/schema"; import { generateAssignments, printAssignmentStats } from "../lib/assignment"; const SAMPLED_IDS_PATH = "/home/joey/Documents/sec-cyBERT/labelapp/.sampled-ids.json"; async function main() { // 1. Read sampled paragraph IDs console.log("Reading sampled paragraph IDs..."); const raw = await readFile(SAMPLED_IDS_PATH, "utf-8"); const paragraphIds: string[] = JSON.parse(raw); console.log(` ${paragraphIds.length} paragraph IDs loaded`); // 2. Read annotator IDs from DB (exclude admin) console.log("Loading annotators..."); const annotators = await db .select({ id: schema.annotators.id }) .from(schema.annotators) .where(ne(schema.annotators.id, "admin")); const annotatorIds = annotators.map((a) => a.id).sort(); console.log(` ${annotatorIds.length} annotators: ${annotatorIds.join(", ")}`); // 3. Generate BIBD assignments console.log("Generating BIBD assignments..."); const assignments = generateAssignments(paragraphIds, annotatorIds, 3); // 4. Print stats before inserting printAssignmentStats(assignments, annotatorIds); // 5. Insert into DB in batches console.log("\nInserting assignments into DB..."); const BATCH_SIZE = 1000; for (let i = 0; i < assignments.length; i += BATCH_SIZE) { const batch = assignments.slice(i, i + BATCH_SIZE); await db .insert(schema.assignments) .values(batch) .onConflictDoNothing(); const progress = Math.min(i + BATCH_SIZE, assignments.length); console.log(` Inserted ${progress}/${assignments.length} assignments`); } console.log("Assignment complete."); process.exit(0); } main().catch((err) => { console.error("Assignment failed:", err); process.exit(1); });