SEC-cyBERT/ts/scripts/judge-diag.ts
2026-03-28 23:44:37 -04:00

71 lines
2.8 KiB
TypeScript

/**
* Diagnostic: call GLM-5 on a failing paragraph, log raw response + headers.
* Usage: bun ts/scripts/judge-diag.ts <paragraph-id> [model-id]
*/
import { generateText, Output } from "ai";
import { openrouter } from "../src/lib/openrouter.ts";
import { readJsonl, readJsonlRaw } from "../src/lib/jsonl.ts";
import { Paragraph } from "@sec-cybert/schemas/paragraph.ts";
import { LabelOutputRaw } from "@sec-cybert/schemas/label.ts";
import { SYSTEM_PROMPT, buildJudgePrompt } from "../src/label/prompts.ts";
const PID = process.argv[2];
const MODEL = process.argv[3] ?? "z-ai/glm-5";
if (!PID) { console.error("Usage: bun ts/scripts/judge-diag.ts <paragraph-id> [model-id]"); process.exit(1); }
const STAGE1_PATH = new URL("../../data/annotations/stage1.jsonl", import.meta.url).pathname;
const PARAGRAPHS_PATH = new URL("../../data/paragraphs/training.jsonl", import.meta.url).pathname;
interface S1Ann {
paragraphId: string;
label: { content_category: string; specificity_level: number; reasoning: string };
provenance: { modelId: string };
}
console.error(`Loading data for ${PID}...`);
const { records: allAnns } = await readJsonlRaw(STAGE1_PATH);
const s1anns = (allAnns as S1Ann[]).filter(a => a.paragraphId === PID);
const { records: allParagraphs } = await readJsonl(PARAGRAPHS_PATH, Paragraph);
const paragraph = allParagraphs.find(p => p.id === PID);
if (!paragraph) { console.error("Paragraph not found"); process.exit(1); }
const priorAnns = s1anns.map(a => ({
content_category: a.label.content_category,
specificity_level: a.label.specificity_level,
reasoning: a.label.reasoning,
}));
const judgePrompt = buildJudgePrompt(paragraph, priorAnns);
console.error(`\n=== JUDGE PROMPT (${judgePrompt.length} chars) ===\n`);
console.error(judgePrompt.slice(0, 500) + "...\n");
// ── Attempt 1: with structured output (like bench script) ──
console.error("=== ATTEMPT WITH STRUCTURED OUTPUT ===");
try {
const result = await generateText({
model: openrouter(MODEL),
output: Output.object({ schema: LabelOutputRaw }),
system: SYSTEM_PROMPT,
prompt: judgePrompt,
temperature: 0,
providerOptions: {
openrouter: {
reasoning: { effort: "medium" },
usage: { include: true },
},
},
abortSignal: AbortSignal.timeout(120_000),
});
console.log("SUCCESS (structured):");
console.log(" Output:", JSON.stringify(result.output, null, 2));
console.log(" Response ID:", result.response?.id);
console.log(" Model ID:", result.response?.modelId);
const hdrs = result.response?.headers;
console.log(" Headers:", JSON.stringify(hdrs && typeof hdrs === "object" ? hdrs : {}, null, 2));
console.log(" Provider metadata:", JSON.stringify(result.providerMetadata, null, 2));
} catch (err) {
console.error("FAILED (structured):", err instanceof Error ? err.message.slice(0, 500) : String(err));
}