deployment and minor tweaks
This commit is contained in:
parent
8c496ededa
commit
8e773d5335
23
.dockerignore
Normal file
23
.dockerignore
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Ignore everything by default
|
||||||
|
*
|
||||||
|
|
||||||
|
# Allow only what the labelapp Dockerfile needs
|
||||||
|
!package.json
|
||||||
|
!bun.lock
|
||||||
|
!packages/schemas/
|
||||||
|
|
||||||
|
!ts/package.json
|
||||||
|
!labelapp/
|
||||||
|
labelapp/node_modules/
|
||||||
|
labelapp/.next/
|
||||||
|
labelapp/.env*
|
||||||
|
labelapp/playwright-report/
|
||||||
|
labelapp/test-results/
|
||||||
|
|
||||||
|
# Seed data (only the two JSONL files we need)
|
||||||
|
!data/paragraphs/paragraphs-clean.jsonl
|
||||||
|
!data/annotations/stage1.jsonl
|
||||||
|
|
||||||
|
# Git/IDE
|
||||||
|
.git
|
||||||
|
**/.DS_Store
|
||||||
60
labelapp/Dockerfile
Normal file
60
labelapp/Dockerfile
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
# Build context: monorepo root (run: docker build -f labelapp/Dockerfile .)
|
||||||
|
FROM oven/bun:1 AS base
|
||||||
|
|
||||||
|
# -- Install dependencies --
|
||||||
|
FROM base AS deps
|
||||||
|
WORKDIR /app
|
||||||
|
COPY package.json bun.lock ./
|
||||||
|
COPY packages/schemas/package.json packages/schemas/
|
||||||
|
COPY ts/package.json ts/
|
||||||
|
COPY labelapp/package.json labelapp/
|
||||||
|
RUN bun install --frozen-lockfile
|
||||||
|
|
||||||
|
# -- Build Next.js --
|
||||||
|
FROM base AS builder
|
||||||
|
WORKDIR /app
|
||||||
|
COPY --from=deps /app/node_modules ./node_modules
|
||||||
|
COPY --from=deps /app/packages/schemas/node_modules ./packages/schemas/node_modules
|
||||||
|
COPY --from=deps /app/labelapp/node_modules ./labelapp/node_modules
|
||||||
|
COPY package.json bun.lock ./
|
||||||
|
COPY packages/schemas/ packages/schemas/
|
||||||
|
COPY labelapp/ labelapp/
|
||||||
|
ENV NEXT_TELEMETRY_DISABLED=1
|
||||||
|
RUN cd labelapp && bun run build
|
||||||
|
|
||||||
|
# -- Production image --
|
||||||
|
FROM base AS runner
|
||||||
|
WORKDIR /app
|
||||||
|
ENV NODE_ENV=production
|
||||||
|
ENV NEXT_TELEMETRY_DISABLED=1
|
||||||
|
|
||||||
|
# Standalone server + static assets
|
||||||
|
COPY --from=builder /app/labelapp/.next/standalone ./
|
||||||
|
COPY --from=builder /app/labelapp/.next/static ./labelapp/.next/static
|
||||||
|
COPY --from=builder /app/labelapp/public ./labelapp/public
|
||||||
|
|
||||||
|
# Drizzle migration tooling (drizzle-kit push needs these)
|
||||||
|
COPY --from=deps /app/node_modules ./node_modules
|
||||||
|
COPY --from=deps /app/labelapp/node_modules ./labelapp/node_modules
|
||||||
|
COPY --from=builder /app/labelapp/drizzle.config.ts ./labelapp/
|
||||||
|
COPY --from=builder /app/labelapp/db/ ./labelapp/db/
|
||||||
|
COPY --from=builder /app/packages/schemas/ ./packages/schemas/
|
||||||
|
COPY --from=builder /app/package.json ./
|
||||||
|
|
||||||
|
# Seed/sample/assign scripts
|
||||||
|
COPY --from=builder /app/labelapp/scripts/ ./labelapp/scripts/
|
||||||
|
COPY --from=builder /app/labelapp/lib/ ./labelapp/lib/
|
||||||
|
|
||||||
|
# Seed data (paragraphs + stage1 annotations)
|
||||||
|
COPY data/paragraphs/paragraphs-clean.jsonl /app/data/paragraphs-clean.jsonl
|
||||||
|
COPY data/annotations/stage1.jsonl /app/data/stage1.jsonl
|
||||||
|
|
||||||
|
# Entrypoint
|
||||||
|
COPY labelapp/entrypoint.sh /app/entrypoint.sh
|
||||||
|
RUN chmod +x /app/entrypoint.sh
|
||||||
|
|
||||||
|
EXPOSE 3000
|
||||||
|
ENV PORT=3000
|
||||||
|
ENV HOSTNAME=0.0.0.0
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/entrypoint.sh"]
|
||||||
@ -458,7 +458,7 @@ function CodebookSidebar() {
|
|||||||
<SpecDef
|
<SpecDef
|
||||||
level={3}
|
level={3}
|
||||||
name="Firm-Specific"
|
name="Firm-Specific"
|
||||||
desc="Contains details unique to this company: named personnel, specific org structure, named tools/vendors, described processes."
|
desc="Contains at least one fact from the IS list unique to this company: cybersecurity-specific titles (CISO, CTO), named tools/vendors, specific dates, named committees."
|
||||||
/>
|
/>
|
||||||
<SpecDef
|
<SpecDef
|
||||||
level={4}
|
level={4}
|
||||||
@ -481,8 +481,9 @@ function CodebookSidebar() {
|
|||||||
Governance. Named officer/team = Management Role.
|
Governance. Named officer/team = Management Role.
|
||||||
</Rule>
|
</Rule>
|
||||||
<Rule title="Person vs Function">
|
<Rule title="Person vs Function">
|
||||||
"Our CISO, Jane Smith" = named person (Firm-Specific). "Our
|
Is the paragraph about the person (credentials, background,
|
||||||
CISO" alone = function reference (could be Generic).
|
reporting lines) or the function (program activities, tools)?
|
||||||
|
Person → Management Role. Function → Risk Management Process.
|
||||||
</Rule>
|
</Rule>
|
||||||
<Rule title="QV threshold">
|
<Rule title="QV threshold">
|
||||||
Need 2+ independently verifiable facts (dates, dollar amounts,
|
Need 2+ independently verifiable facts (dates, dollar amounts,
|
||||||
@ -495,7 +496,7 @@ function CodebookSidebar() {
|
|||||||
</Rule>
|
</Rule>
|
||||||
<Rule title="Dual-topic paragraphs">
|
<Rule title="Dual-topic paragraphs">
|
||||||
Choose the category whose content occupies the majority of the
|
Choose the category whose content occupies the majority of the
|
||||||
paragraph. If truly 50/50, prefer the more specific category.
|
paragraph — the primary communicative purpose.
|
||||||
</Rule>
|
</Rule>
|
||||||
</div>
|
</div>
|
||||||
</section>
|
</section>
|
||||||
|
|||||||
38
labelapp/entrypoint.sh
Normal file
38
labelapp/entrypoint.sh
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
cd /app/labelapp
|
||||||
|
|
||||||
|
echo "==> Running Drizzle migrations..."
|
||||||
|
bunx drizzle-kit push --force
|
||||||
|
|
||||||
|
echo "==> Checking if database needs seeding..."
|
||||||
|
ROW_COUNT=$(bun --eval "
|
||||||
|
import postgres from 'postgres';
|
||||||
|
const sql = postgres(process.env.DATABASE_URL);
|
||||||
|
const [{count}] = await sql\`SELECT count(*)::int as count FROM paragraphs\`;
|
||||||
|
console.log(count);
|
||||||
|
await sql.end();
|
||||||
|
" 2>/dev/null || echo "0")
|
||||||
|
|
||||||
|
if [ "$ROW_COUNT" = "0" ]; then
|
||||||
|
export SEED_PARAGRAPHS_PATH=/app/data/paragraphs-clean.jsonl
|
||||||
|
export SEED_ANNOTATIONS_PATH=/app/data/stage1.jsonl
|
||||||
|
export SAMPLED_IDS_PATH=/app/labelapp/.sampled-ids.json
|
||||||
|
|
||||||
|
echo "==> Database is empty, seeding..."
|
||||||
|
bun run scripts/seed.ts
|
||||||
|
|
||||||
|
echo "==> Running sampling..."
|
||||||
|
bun run scripts/sample.ts
|
||||||
|
|
||||||
|
echo "==> Running assignment generation..."
|
||||||
|
bun run scripts/assign.ts
|
||||||
|
|
||||||
|
echo "==> Seeding complete."
|
||||||
|
else
|
||||||
|
echo "==> Database already seeded ($ROW_COUNT paragraphs). Skipping."
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "==> Starting Next.js server..."
|
||||||
|
exec bun run .next/standalone/labelapp/server.js
|
||||||
@ -256,12 +256,12 @@ export const QUIZ_QUESTIONS: QuizQuestion[] = [
|
|||||||
id: "qv-1",
|
id: "qv-1",
|
||||||
type: "qv-counting",
|
type: "qv-counting",
|
||||||
paragraphText:
|
paragraphText:
|
||||||
"We maintain cyber liability insurance coverage.",
|
"Our CISO oversees a dedicated cybersecurity team responsible for managing cyber risk across the enterprise.",
|
||||||
question: QV_QUESTION,
|
question: QV_QUESTION,
|
||||||
options: QV_OPTIONS,
|
options: QV_OPTIONS,
|
||||||
correctAnswer: "3",
|
correctAnswer: "3",
|
||||||
explanation:
|
explanation:
|
||||||
'This mentions insurance but provides no verifiable details (no dollar amount, no named insurer). "Cyber liability insurance" is a firm-specific fact — it tells you this particular company holds this type of coverage — but there is only one such fact. One firm-specific fact without a named standard = Specificity 3 (Firm-Specific).',
|
'"CISO" is a cybersecurity-specific title on the codebook\'s IS list — that\'s one firm-specific fact. "Dedicated cybersecurity team" is a generic team reference (NOT list). "Managing cyber risk across the enterprise" is generic. One IS-list fact, no named standards, no QV-eligible facts = Specificity 3 (Firm-Specific).',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: "qv-2",
|
id: "qv-2",
|
||||||
@ -281,9 +281,9 @@ export const QUIZ_QUESTIONS: QuizQuestion[] = [
|
|||||||
"Our incident response team conducts quarterly tabletop exercises.",
|
"Our incident response team conducts quarterly tabletop exercises.",
|
||||||
question: QV_QUESTION,
|
question: QV_QUESTION,
|
||||||
options: QV_OPTIONS,
|
options: QV_OPTIONS,
|
||||||
correctAnswer: "3",
|
correctAnswer: "1",
|
||||||
explanation:
|
explanation:
|
||||||
'Per the codebook, "quarterly" is a generic cadence and does NOT count as a specific fact for QV purposes. However, the mention of an "incident response team" and "tabletop exercises" indicates firm-specific activities. This has one firm-specific element but no hard verifiable facts (no named vendors, no dollar amounts, no exact dates). Specificity 3 (Firm-Specific).',
|
'Apply the codebook\'s validation step: "quarterly" is a generic cadence (NOT list), "tabletop exercises" is a common practice (NOT list), and "incident response team" is a generic team reference (NOT list). After filtering, no IS-list facts remain. No named standards either. This is Specificity 1 (Generic Boilerplate) — it could appear unchanged in any company\'s filing.',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: "qv-4",
|
id: "qv-4",
|
||||||
|
|||||||
@ -1,7 +1,9 @@
|
|||||||
import type { NextConfig } from "next";
|
import type { NextConfig } from "next";
|
||||||
|
import path from "node:path";
|
||||||
|
|
||||||
const nextConfig: NextConfig = {
|
const nextConfig: NextConfig = {
|
||||||
/* config options here */
|
output: "standalone",
|
||||||
|
outputFileTracingRoot: path.join(import.meta.dirname, "../"),
|
||||||
};
|
};
|
||||||
|
|
||||||
export default nextConfig;
|
export default nextConfig;
|
||||||
|
|||||||
@ -16,7 +16,8 @@
|
|||||||
"test": "bun test app/ lib/ && playwright test",
|
"test": "bun test app/ lib/ && playwright test",
|
||||||
"test:api": "bun test app/ lib/",
|
"test:api": "bun test app/ lib/",
|
||||||
"test:e2e": "playwright test",
|
"test:e2e": "playwright test",
|
||||||
"test:e2e:ui": "playwright test --ui"
|
"test:e2e:ui": "playwright test --ui",
|
||||||
|
"deploy": "docker build -f labelapp/Dockerfile -t registry.claiborne.soy/labelapp:latest .. --push"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@base-ui/react": "^1.3.0",
|
"@base-ui/react": "^1.3.0",
|
||||||
@ -54,4 +55,4 @@
|
|||||||
"sharp",
|
"sharp",
|
||||||
"unrs-resolver"
|
"unrs-resolver"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
File diff suppressed because one or more lines are too long
@ -8,6 +8,7 @@ import * as schema from "../db/schema";
|
|||||||
import { generateAssignments, printAssignmentStats } from "../lib/assignment";
|
import { generateAssignments, printAssignmentStats } from "../lib/assignment";
|
||||||
|
|
||||||
const SAMPLED_IDS_PATH =
|
const SAMPLED_IDS_PATH =
|
||||||
|
process.env.SAMPLED_IDS_PATH ??
|
||||||
"/home/joey/Documents/sec-cyBERT/labelapp/.sampled-ids.json";
|
"/home/joey/Documents/sec-cyBERT/labelapp/.sampled-ids.json";
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
|
|||||||
@ -27,8 +27,10 @@ interface AnnotationRow {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const OUTPUT_PATH =
|
const OUTPUT_PATH =
|
||||||
|
process.env.SAMPLED_IDS_PATH ??
|
||||||
"/home/joey/Documents/sec-cyBERT/labelapp/.sampled-ids.json";
|
"/home/joey/Documents/sec-cyBERT/labelapp/.sampled-ids.json";
|
||||||
const ANNOTATIONS_PATH =
|
const ANNOTATIONS_PATH =
|
||||||
|
process.env.SEED_ANNOTATIONS_PATH ??
|
||||||
"/home/joey/Documents/sec-cyBERT/data/annotations/stage1.jsonl";
|
"/home/joey/Documents/sec-cyBERT/data/annotations/stage1.jsonl";
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
|
|||||||
@ -102,8 +102,10 @@ function computeConsensus(annotations: AnnotationRow[]): {
|
|||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
const PARAGRAPHS_PATH =
|
const PARAGRAPHS_PATH =
|
||||||
|
process.env.SEED_PARAGRAPHS_PATH ??
|
||||||
"/home/joey/Documents/sec-cyBERT/data/paragraphs/paragraphs-clean.jsonl";
|
"/home/joey/Documents/sec-cyBERT/data/paragraphs/paragraphs-clean.jsonl";
|
||||||
const ANNOTATIONS_PATH =
|
const ANNOTATIONS_PATH =
|
||||||
|
process.env.SEED_ANNOTATIONS_PATH ??
|
||||||
"/home/joey/Documents/sec-cyBERT/data/annotations/stage1.jsonl";
|
"/home/joey/Documents/sec-cyBERT/data/annotations/stage1.jsonl";
|
||||||
|
|
||||||
// 1. Read annotations and compute consensus per paragraph
|
// 1. Read annotations and compute consensus per paragraph
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user