SEC-cyBERT/labelapp/db/schema.ts
2026-03-28 23:44:37 -04:00

96 lines
3.1 KiB
TypeScript

import {
pgTable,
text,
integer,
real,
timestamp,
boolean,
unique,
} from "drizzle-orm/pg-core";
export const paragraphs = pgTable("paragraphs", {
id: text("id").primaryKey(),
text: text("text").notNull(),
wordCount: integer("word_count").notNull(),
paragraphIndex: integer("paragraph_index").notNull(),
companyName: text("company_name").notNull(),
cik: text("cik").notNull(),
ticker: text("ticker"),
filingType: text("filing_type").notNull(),
filingDate: text("filing_date").notNull(),
fiscalYear: integer("fiscal_year").notNull(),
accessionNumber: text("accession_number").notNull(),
secItem: text("sec_item").notNull(),
// Stage 1 consensus (for stratification, not shown to annotators during labeling)
stage1Category: text("stage1_category"),
stage1Specificity: integer("stage1_specificity"),
stage1Method: text("stage1_method"),
stage1Confidence: real("stage1_confidence"),
});
export const annotators = pgTable("annotators", {
id: text("id").primaryKey(),
displayName: text("display_name").notNull(),
password: text("password").notNull(),
});
export const assignments = pgTable(
"assignments",
{
paragraphId: text("paragraph_id")
.notNull()
.references(() => paragraphs.id),
annotatorId: text("annotator_id")
.notNull()
.references(() => annotators.id),
assignedAt: timestamp("assigned_at").notNull().defaultNow(),
isWarmup: boolean("is_warmup").notNull().default(false),
},
(t) => [unique().on(t.paragraphId, t.annotatorId)],
);
export const humanLabels = pgTable(
"human_labels",
{
id: integer("id").primaryKey().generatedAlwaysAsIdentity(),
paragraphId: text("paragraph_id")
.notNull()
.references(() => paragraphs.id),
annotatorId: text("annotator_id")
.notNull()
.references(() => annotators.id),
contentCategory: text("content_category").notNull(),
specificityLevel: integer("specificity_level").notNull(),
notes: text("notes"),
labeledAt: timestamp("labeled_at").notNull().defaultNow(),
sessionId: text("session_id").notNull(),
durationMs: integer("duration_ms"),
},
(t) => [unique().on(t.paragraphId, t.annotatorId)],
);
export const quizSessions = pgTable("quiz_sessions", {
id: text("id").primaryKey(),
annotatorId: text("annotator_id")
.notNull()
.references(() => annotators.id),
startedAt: timestamp("started_at").notNull().defaultNow(),
completedAt: timestamp("completed_at"),
passed: boolean("passed").notNull().default(false),
score: integer("score").notNull().default(0),
totalQuestions: integer("total_questions").notNull(),
answers: text("answers").notNull().default("[]"),
});
export const adjudications = pgTable("adjudications", {
paragraphId: text("paragraph_id")
.primaryKey()
.references(() => paragraphs.id),
finalCategory: text("final_category").notNull(),
finalSpecificity: integer("final_specificity").notNull(),
method: text("method").notNull(),
adjudicatorId: text("adjudicator_id"),
notes: text("notes"),
resolvedAt: timestamp("resolved_at").notNull().defaultNow(),
});