2026-04-05 15:37:50 -04:00

54 lines
1.8 KiB
TypeScript

process.env.DATABASE_URL ??=
"postgresql://sec_cybert:sec_cybert@localhost:5432/sec_cybert";
import { db } from "../db";
import * as schema from "../db/schema";
import { generateAssignments, printAssignmentStats } from "../lib/assignment";
async function main() {
// 1. Read all paragraph IDs from DB (the holdout IS the full set)
console.log("Loading paragraph IDs from DB...");
const rows = await db
.select({ id: schema.paragraphs.id })
.from(schema.paragraphs);
const paragraphIds = rows.map((r) => r.id);
console.log(` ${paragraphIds.length} paragraphs`);
// 2. Read annotator IDs from DB (all annotators, including joey)
console.log("Loading annotators...");
const annotators = await db
.select({ id: schema.annotators.id })
.from(schema.annotators);
const annotatorIds = annotators.map((a) => a.id).sort();
console.log(` ${annotatorIds.length} annotators: ${annotatorIds.join(", ")}`);
// 3. Generate BIBD assignments
console.log("Generating BIBD assignments...");
const assignments = generateAssignments(paragraphIds, annotatorIds, 3);
// 4. Print stats before inserting
printAssignmentStats(assignments, annotatorIds);
// 5. Insert into DB in batches
console.log("\nInserting assignments into DB...");
const BATCH_SIZE = 1000;
for (let i = 0; i < assignments.length; i += BATCH_SIZE) {
const batch = assignments.slice(i, i + BATCH_SIZE);
await db
.insert(schema.assignments)
.values(batch)
.onConflictDoNothing();
const progress = Math.min(i + BATCH_SIZE, assignments.length);
console.log(` Inserted ${progress}/${assignments.length} assignments`);
}
console.log("Assignment complete.");
process.exit(0);
}
main().catch((err) => {
console.error("Assignment failed:", err);
process.exit(1);
});