SEC-cyBERT/labelapp/app/api/metrics/route.ts

import { NextResponse } from "next/server";
import { db } from "@/db";
import {
  adjudications,
  annotators,
  assignments,
  humanLabels,
  paragraphs,
} from "@/db/schema";
import { sql } from "drizzle-orm";
import { getSession } from "@/lib/auth";
import {
  cohensKappa,
  krippendorffsAlpha,
  confusionMatrix as buildConfusionMatrix,
  agreementRate,
  perCategoryAgreement,
} from "@/lib/metrics";

const CATEGORIES = [
  "Board Governance",
  "Management Role",
  "Risk Management Process",
  "Third-Party Risk",
  "Incident Disclosure",
  "Strategy Integration",
  "None/Other",
];

export async function GET() {
  const session = await getSession();
  if (!session) {
    return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
  }

  if (session.annotatorId !== "joey") {
    return NextResponse.json({ error: "Admin access required" }, { status: 403 });
  }

  // Load all data
  const [allLabels, allAnnotators, allAdjudications, paragraphCount] =
    await Promise.all([
      db.select().from(humanLabels),
      db.select().from(annotators),
      db.select().from(adjudications),
      db
        .select({ count: sql<number>`count(*)` })
        .from(paragraphs),
    ]);

  const totalParagraphs = Number(paragraphCount[0]?.count ?? 0);

  // Per-annotator assignment counts
  const assignmentCounts = await db
    .select({
      annotatorId: assignments.annotatorId,
      total: sql<number>`count(*)`,
    })
    .from(assignments)
    .groupBy(assignments.annotatorId);

  const assignmentMap = new Map(
    assignmentCounts.map((a) => [a.annotatorId, Number(a.total)]),
  );

  // Group labels by paragraph
  const byParagraph = new Map<
    string,
    { annotatorId: string; contentCategory: string; specificityLevel: number }[]
  >();
  for (const label of allLabels) {
    if (!byParagraph.has(label.paragraphId)) {
      byParagraph.set(label.paragraphId, []);
    }
    byParagraph.get(label.paragraphId)!.push({
      annotatorId: label.annotatorId,
      contentCategory: label.contentCategory,
      specificityLevel: label.specificityLevel,
    });
  }

  // Count fully labeled (3+ labels)
  let fullyLabeled = 0;
  for (const labels of byParagraph.values()) {
    if (labels.length >= 3) fullyLabeled++;
  }

  // Per-annotator completed counts
  const annotatorCompletedMap = new Map<string, number>();
  for (const label of allLabels) {
    annotatorCompletedMap.set(
      label.annotatorId,
      (annotatorCompletedMap.get(label.annotatorId) ?? 0) + 1,
    );
  }

  const annotatorMap = new Map(
    allAnnotators.map((a) => [a.id, a.displayName]),
  );

  // Filter to non-admin annotators for per-annotator stats
  const perAnnotator = allAnnotators
    .filter((a) => a.id !== "joey")
    .map((a) => ({
      id: a.id,
      displayName: a.displayName,
      completed: annotatorCompletedMap.get(a.id) ?? 0,
      total: assignmentMap.get(a.id) ?? 0,
    }));

  // === Agreement metrics ===

  // Get paragraphs with 2+ labels for pairwise analysis
  const multiLabeledParagraphs: {
    paragraphId: string;
    labels: { annotatorId: string; category: string; specificity: number }[];
  }[] = [];

  for (const [paragraphId, labels] of byParagraph) {
    if (labels.length >= 2) {
      multiLabeledParagraphs.push({
        paragraphId,
        labels: labels.map((l) => ({
          annotatorId: l.annotatorId,
          category: l.contentCategory,
          specificity: l.specificityLevel,
        })),
      });
    }
  }

  // Collect all annotator IDs that have labels (excluding admin)
  const annotatorIds = [
    ...new Set(allLabels.map((l) => l.annotatorId)),
  ].filter((id) => id !== "joey");
  annotatorIds.sort();

  // For each annotator pair, collect shared paragraph ratings
  const kappaValues: number[][] = Array.from(
    { length: annotatorIds.length },
    () => new Array(annotatorIds.length).fill(0),
  );

  let kappaSum = 0;
  let kappaPairCount = 0;

  for (let i = 0; i < annotatorIds.length; i++) {
    kappaValues[i][i] = 1; // Self-agreement is perfect
    for (let j = i + 1; j < annotatorIds.length; j++) {
      const a1 = annotatorIds[i];
      const a2 = annotatorIds[j];

      const shared1: string[] = [];
      const shared2: string[] = [];

      for (const para of multiLabeledParagraphs) {
        const l1 = para.labels.find((l) => l.annotatorId === a1);
        const l2 = para.labels.find((l) => l.annotatorId === a2);
        if (l1 && l2) {
          shared1.push(l1.category);
          shared2.push(l2.category);
        }
      }

      if (shared1.length >= 2) {
        const kappa = cohensKappa(shared1, shared2);
        kappaValues[i][j] = kappa;
        kappaValues[j][i] = kappa;
        kappaSum += kappa;
        kappaPairCount++;
      }
    }
  }

  const avgKappa = kappaPairCount > 0 ? kappaSum / kappaPairCount : 0;

  // Consensus rate: proportion of 3+ labeled paragraphs where all agree on category
  const categoryArrays: string[][] = [];
  for (const [, labels] of byParagraph) {
    if (labels.length >= 3) {
      categoryArrays.push(labels.map((l) => l.contentCategory));
    }
  }
  const consensusRate = agreementRate(categoryArrays);

  // Krippendorff's Alpha for specificity (ordinal)
  // Build raters x items matrix
  // Get unique paragraph IDs that have 2+ labels
  const multiLabeledParaIds = multiLabeledParagraphs.map((p) => p.paragraphId);
  const ratingsMatrix: (number | null)[][] = annotatorIds.map((annotatorId) =>
    multiLabeledParaIds.map((paraId) => {
      const para = multiLabeledParagraphs.find(
        (p) => p.paragraphId === paraId,
      );
      const label = para?.labels.find((l) => l.annotatorId === annotatorId);
      return label?.specificity ?? null;
    }),
  );

  let alpha = 0;
  if (annotatorIds.length >= 2 && multiLabeledParaIds.length > 0) {
    alpha = krippendorffsAlpha(ratingsMatrix);
  }

  // Per-category agreement
  const perCategory = perCategoryAgreement(
    allLabels.map((l) => ({
      category: l.contentCategory,
      annotatorId: l.annotatorId,
      paragraphId: l.paragraphId,
    })),
    CATEGORIES,
  );

  // Aggregated confusion matrix across all annotator pairs
  const allActual: string[] = [];
  const allPredicted: string[] = [];

  for (let i = 0; i < annotatorIds.length; i++) {
    for (let j = i + 1; j < annotatorIds.length; j++) {
      const a1 = annotatorIds[i];
      const a2 = annotatorIds[j];

      for (const para of multiLabeledParagraphs) {
        const l1 = para.labels.find((l) => l.annotatorId === a1);
        const l2 = para.labels.find((l) => l.annotatorId === a2);
        if (l1 && l2) {
          allActual.push(l1.category);
          allPredicted.push(l2.category);
        }
      }
    }
  }

  const confMatrix =
    allActual.length > 0
      ? buildConfusionMatrix(allActual, allPredicted, CATEGORIES)
      : CATEGORIES.map(() => new Array(CATEGORIES.length).fill(0));

  return NextResponse.json({
    progress: {
      totalParagraphs,
      fullyLabeled,
      adjudicated: allAdjudications.length,
      perAnnotator,
    },
    agreement: {
      consensusRate,
      avgKappa,
      kappaMatrix: {
        annotators: annotatorIds.map(
          (id) => annotatorMap.get(id) ?? id,
        ),
        values: kappaValues,
      },
      krippendorffsAlpha: alpha,
      perCategory,
    },
    confusionMatrix: {
      labels: CATEGORIES,
      matrix: confMatrix,
    },
  });
}