71 lines
2.8 KiB
TypeScript
71 lines
2.8 KiB
TypeScript
/**
|
|
* Diagnostic: call GLM-5 on a failing paragraph, log raw response + headers.
|
|
* Usage: bun ts/scripts/judge-diag.ts <paragraph-id> [model-id]
|
|
*/
|
|
import { generateText, Output } from "ai";
|
|
import { openrouter } from "../src/lib/openrouter.ts";
|
|
import { readJsonl, readJsonlRaw } from "../src/lib/jsonl.ts";
|
|
import { Paragraph } from "@sec-cybert/schemas/paragraph.ts";
|
|
import { LabelOutputRaw } from "@sec-cybert/schemas/label.ts";
|
|
import { SYSTEM_PROMPT, buildJudgePrompt } from "../src/label/prompts.ts";
|
|
|
|
const PID = process.argv[2];
|
|
const MODEL = process.argv[3] ?? "z-ai/glm-5";
|
|
if (!PID) { console.error("Usage: bun ts/scripts/judge-diag.ts <paragraph-id> [model-id]"); process.exit(1); }
|
|
|
|
const STAGE1_PATH = new URL("../../data/annotations/stage1.jsonl", import.meta.url).pathname;
|
|
const PARAGRAPHS_PATH = new URL("../../data/paragraphs/training.jsonl", import.meta.url).pathname;
|
|
|
|
interface S1Ann {
|
|
paragraphId: string;
|
|
label: { content_category: string; specificity_level: number; reasoning: string };
|
|
provenance: { modelId: string };
|
|
}
|
|
|
|
console.error(`Loading data for ${PID}...`);
|
|
const { records: allAnns } = await readJsonlRaw(STAGE1_PATH);
|
|
const s1anns = (allAnns as S1Ann[]).filter(a => a.paragraphId === PID);
|
|
|
|
const { records: allParagraphs } = await readJsonl(PARAGRAPHS_PATH, Paragraph);
|
|
const paragraph = allParagraphs.find(p => p.id === PID);
|
|
if (!paragraph) { console.error("Paragraph not found"); process.exit(1); }
|
|
|
|
const priorAnns = s1anns.map(a => ({
|
|
content_category: a.label.content_category,
|
|
specificity_level: a.label.specificity_level,
|
|
reasoning: a.label.reasoning,
|
|
}));
|
|
|
|
const judgePrompt = buildJudgePrompt(paragraph, priorAnns);
|
|
console.error(`\n=== JUDGE PROMPT (${judgePrompt.length} chars) ===\n`);
|
|
console.error(judgePrompt.slice(0, 500) + "...\n");
|
|
|
|
// ── Attempt 1: with structured output (like bench script) ──
|
|
console.error("=== ATTEMPT WITH STRUCTURED OUTPUT ===");
|
|
try {
|
|
const result = await generateText({
|
|
model: openrouter(MODEL),
|
|
output: Output.object({ schema: LabelOutputRaw }),
|
|
system: SYSTEM_PROMPT,
|
|
prompt: judgePrompt,
|
|
temperature: 0,
|
|
providerOptions: {
|
|
openrouter: {
|
|
reasoning: { effort: "medium" },
|
|
usage: { include: true },
|
|
},
|
|
},
|
|
abortSignal: AbortSignal.timeout(120_000),
|
|
});
|
|
|
|
console.log("SUCCESS (structured):");
|
|
console.log(" Output:", JSON.stringify(result.output, null, 2));
|
|
console.log(" Response ID:", result.response?.id);
|
|
console.log(" Model ID:", result.response?.modelId);
|
|
const hdrs = result.response?.headers;
|
|
console.log(" Headers:", JSON.stringify(hdrs && typeof hdrs === "object" ? hdrs : {}, null, 2));
|
|
console.log(" Provider metadata:", JSON.stringify(result.providerMetadata, null, 2));
|
|
} catch (err) {
|
|
console.error("FAILED (structured):", err instanceof Error ? err.message.slice(0, 500) : String(err));
|
|
}
|