diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..1fa5fa9
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,23 @@
+# Ignore everything by default
+*
+
+# Allow only what the labelapp Dockerfile needs
+!package.json
+!bun.lock
+!packages/schemas/
+
+!ts/package.json
+!labelapp/
+labelapp/node_modules/
+labelapp/.next/
+labelapp/.env*
+labelapp/playwright-report/
+labelapp/test-results/
+
+# Seed data (only the two JSONL files we need)
+!data/paragraphs/paragraphs-clean.jsonl
+!data/annotations/stage1.jsonl
+
+# Git/IDE
+.git
+**/.DS_Store
diff --git a/labelapp/Dockerfile b/labelapp/Dockerfile
new file mode 100644
index 0000000..ba8edce
--- /dev/null
+++ b/labelapp/Dockerfile
@@ -0,0 +1,60 @@
+# Build context: monorepo root (run: docker build -f labelapp/Dockerfile .)
+FROM oven/bun:1 AS base
+
+# -- Install dependencies --
+FROM base AS deps
+WORKDIR /app
+COPY package.json bun.lock ./
+COPY packages/schemas/package.json packages/schemas/
+COPY ts/package.json ts/
+COPY labelapp/package.json labelapp/
+RUN bun install --frozen-lockfile
+
+# -- Build Next.js --
+FROM base AS builder
+WORKDIR /app
+COPY --from=deps /app/node_modules ./node_modules
+COPY --from=deps /app/packages/schemas/node_modules ./packages/schemas/node_modules
+COPY --from=deps /app/labelapp/node_modules ./labelapp/node_modules
+COPY package.json bun.lock ./
+COPY packages/schemas/ packages/schemas/
+COPY labelapp/ labelapp/
+ENV NEXT_TELEMETRY_DISABLED=1
+RUN cd labelapp && bun run build
+
+# -- Production image --
+FROM base AS runner
+WORKDIR /app
+ENV NODE_ENV=production
+ENV NEXT_TELEMETRY_DISABLED=1
+
+# Standalone server + static assets
+COPY --from=builder /app/labelapp/.next/standalone ./
+COPY --from=builder /app/labelapp/.next/static ./labelapp/.next/static
+COPY --from=builder /app/labelapp/public ./labelapp/public
+
+# Drizzle migration tooling (drizzle-kit push needs these)
+COPY --from=deps /app/node_modules ./node_modules
+COPY --from=deps /app/labelapp/node_modules ./labelapp/node_modules
+COPY --from=builder /app/labelapp/drizzle.config.ts ./labelapp/
+COPY --from=builder /app/labelapp/db/ ./labelapp/db/
+COPY --from=builder /app/packages/schemas/ ./packages/schemas/
+COPY --from=builder /app/package.json ./
+
+# Seed/sample/assign scripts
+COPY --from=builder /app/labelapp/scripts/ ./labelapp/scripts/
+COPY --from=builder /app/labelapp/lib/ ./labelapp/lib/
+
+# Seed data (paragraphs + stage1 annotations)
+COPY data/paragraphs/paragraphs-clean.jsonl /app/data/paragraphs-clean.jsonl
+COPY data/annotations/stage1.jsonl /app/data/stage1.jsonl
+
+# Entrypoint
+COPY labelapp/entrypoint.sh /app/entrypoint.sh
+RUN chmod +x /app/entrypoint.sh
+
+EXPOSE 3000
+ENV PORT=3000
+ENV HOSTNAME=0.0.0.0
+
+ENTRYPOINT ["/app/entrypoint.sh"]
diff --git a/labelapp/app/label/page.tsx b/labelapp/app/label/page.tsx
index b0b9585..08985ae 100644
--- a/labelapp/app/label/page.tsx
+++ b/labelapp/app/label/page.tsx
@@ -458,7 +458,7 @@ function CodebookSidebar() {
- "Our CISO, Jane Smith" = named person (Firm-Specific). "Our
- CISO" alone = function reference (could be Generic).
+ Is the paragraph about the person (credentials, background,
+ reporting lines) or the function (program activities, tools)?
+ Person → Management Role. Function → Risk Management Process.
Need 2+ independently verifiable facts (dates, dollar amounts,
@@ -495,7 +496,7 @@ function CodebookSidebar() {
Choose the category whose content occupies the majority of the
- paragraph. If truly 50/50, prefer the more specific category.
+ paragraph — the primary communicative purpose.
diff --git a/labelapp/entrypoint.sh b/labelapp/entrypoint.sh
new file mode 100644
index 0000000..e6241c0
--- /dev/null
+++ b/labelapp/entrypoint.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+set -euo pipefail
+
+cd /app/labelapp
+
+echo "==> Running Drizzle migrations..."
+bunx drizzle-kit push --force
+
+echo "==> Checking if database needs seeding..."
+ROW_COUNT=$(bun --eval "
+import postgres from 'postgres';
+const sql = postgres(process.env.DATABASE_URL);
+const [{count}] = await sql\`SELECT count(*)::int as count FROM paragraphs\`;
+console.log(count);
+await sql.end();
+" 2>/dev/null || echo "0")
+
+if [ "$ROW_COUNT" = "0" ]; then
+ export SEED_PARAGRAPHS_PATH=/app/data/paragraphs-clean.jsonl
+ export SEED_ANNOTATIONS_PATH=/app/data/stage1.jsonl
+ export SAMPLED_IDS_PATH=/app/labelapp/.sampled-ids.json
+
+ echo "==> Database is empty, seeding..."
+ bun run scripts/seed.ts
+
+ echo "==> Running sampling..."
+ bun run scripts/sample.ts
+
+ echo "==> Running assignment generation..."
+ bun run scripts/assign.ts
+
+ echo "==> Seeding complete."
+else
+ echo "==> Database already seeded ($ROW_COUNT paragraphs). Skipping."
+fi
+
+echo "==> Starting Next.js server..."
+exec bun run .next/standalone/labelapp/server.js
diff --git a/labelapp/lib/quiz-questions.ts b/labelapp/lib/quiz-questions.ts
index 0c51c8f..3e74174 100644
--- a/labelapp/lib/quiz-questions.ts
+++ b/labelapp/lib/quiz-questions.ts
@@ -256,12 +256,12 @@ export const QUIZ_QUESTIONS: QuizQuestion[] = [
id: "qv-1",
type: "qv-counting",
paragraphText:
- "We maintain cyber liability insurance coverage.",
+ "Our CISO oversees a dedicated cybersecurity team responsible for managing cyber risk across the enterprise.",
question: QV_QUESTION,
options: QV_OPTIONS,
correctAnswer: "3",
explanation:
- 'This mentions insurance but provides no verifiable details (no dollar amount, no named insurer). "Cyber liability insurance" is a firm-specific fact — it tells you this particular company holds this type of coverage — but there is only one such fact. One firm-specific fact without a named standard = Specificity 3 (Firm-Specific).',
+ '"CISO" is a cybersecurity-specific title on the codebook\'s IS list — that\'s one firm-specific fact. "Dedicated cybersecurity team" is a generic team reference (NOT list). "Managing cyber risk across the enterprise" is generic. One IS-list fact, no named standards, no QV-eligible facts = Specificity 3 (Firm-Specific).',
},
{
id: "qv-2",
@@ -281,9 +281,9 @@ export const QUIZ_QUESTIONS: QuizQuestion[] = [
"Our incident response team conducts quarterly tabletop exercises.",
question: QV_QUESTION,
options: QV_OPTIONS,
- correctAnswer: "3",
+ correctAnswer: "1",
explanation:
- 'Per the codebook, "quarterly" is a generic cadence and does NOT count as a specific fact for QV purposes. However, the mention of an "incident response team" and "tabletop exercises" indicates firm-specific activities. This has one firm-specific element but no hard verifiable facts (no named vendors, no dollar amounts, no exact dates). Specificity 3 (Firm-Specific).',
+ 'Apply the codebook\'s validation step: "quarterly" is a generic cadence (NOT list), "tabletop exercises" is a common practice (NOT list), and "incident response team" is a generic team reference (NOT list). After filtering, no IS-list facts remain. No named standards either. This is Specificity 1 (Generic Boilerplate) — it could appear unchanged in any company\'s filing.',
},
{
id: "qv-4",
diff --git a/labelapp/next.config.ts b/labelapp/next.config.ts
index e9ffa30..6ebc36f 100644
--- a/labelapp/next.config.ts
+++ b/labelapp/next.config.ts
@@ -1,7 +1,9 @@
import type { NextConfig } from "next";
+import path from "node:path";
const nextConfig: NextConfig = {
- /* config options here */
+ output: "standalone",
+ outputFileTracingRoot: path.join(import.meta.dirname, "../"),
};
export default nextConfig;
diff --git a/labelapp/package.json b/labelapp/package.json
index 8cbfe3a..faf607a 100644
--- a/labelapp/package.json
+++ b/labelapp/package.json
@@ -16,7 +16,8 @@
"test": "bun test app/ lib/ && playwright test",
"test:api": "bun test app/ lib/",
"test:e2e": "playwright test",
- "test:e2e:ui": "playwright test --ui"
+ "test:e2e:ui": "playwright test --ui",
+ "deploy": "docker build -f labelapp/Dockerfile -t registry.claiborne.soy/labelapp:latest .. --push"
},
"dependencies": {
"@base-ui/react": "^1.3.0",
@@ -54,4 +55,4 @@
"sharp",
"unrs-resolver"
]
-}
+}
\ No newline at end of file
diff --git a/labelapp/playwright-report/index.html b/labelapp/playwright-report/index.html
index 577ba01..20b2ca2 100644
--- a/labelapp/playwright-report/index.html
+++ b/labelapp/playwright-report/index.html
@@ -82,4 +82,4 @@ Error generating stack: `+a.message+`