54 lines
1.8 KiB
TypeScript
54 lines
1.8 KiB
TypeScript
process.env.DATABASE_URL ??=
|
|
"postgresql://sec_cybert:sec_cybert@localhost:5432/sec_cybert";
|
|
|
|
import { db } from "../db";
|
|
import * as schema from "../db/schema";
|
|
import { generateAssignments, printAssignmentStats } from "../lib/assignment";
|
|
|
|
async function main() {
|
|
// 1. Read all paragraph IDs from DB (the holdout IS the full set)
|
|
console.log("Loading paragraph IDs from DB...");
|
|
const rows = await db
|
|
.select({ id: schema.paragraphs.id })
|
|
.from(schema.paragraphs);
|
|
const paragraphIds = rows.map((r) => r.id);
|
|
console.log(` ${paragraphIds.length} paragraphs`);
|
|
|
|
// 2. Read annotator IDs from DB (all annotators, including joey)
|
|
console.log("Loading annotators...");
|
|
const annotators = await db
|
|
.select({ id: schema.annotators.id })
|
|
.from(schema.annotators);
|
|
const annotatorIds = annotators.map((a) => a.id).sort();
|
|
console.log(` ${annotatorIds.length} annotators: ${annotatorIds.join(", ")}`);
|
|
|
|
// 3. Generate BIBD assignments
|
|
console.log("Generating BIBD assignments...");
|
|
const assignments = generateAssignments(paragraphIds, annotatorIds, 3);
|
|
|
|
// 4. Print stats before inserting
|
|
printAssignmentStats(assignments, annotatorIds);
|
|
|
|
// 5. Insert into DB in batches
|
|
console.log("\nInserting assignments into DB...");
|
|
const BATCH_SIZE = 1000;
|
|
for (let i = 0; i < assignments.length; i += BATCH_SIZE) {
|
|
const batch = assignments.slice(i, i + BATCH_SIZE);
|
|
await db
|
|
.insert(schema.assignments)
|
|
.values(batch)
|
|
.onConflictDoNothing();
|
|
|
|
const progress = Math.min(i + BATCH_SIZE, assignments.length);
|
|
console.log(` Inserted ${progress}/${assignments.length} assignments`);
|
|
}
|
|
|
|
console.log("Assignment complete.");
|
|
process.exit(0);
|
|
}
|
|
|
|
main().catch((err) => {
|
|
console.error("Assignment failed:", err);
|
|
process.exit(1);
|
|
});
|