84 lines
2.6 KiB
TypeScript
84 lines
2.6 KiB
TypeScript
/**
|
|
* Raw test: call mimo-v2-flash on 20 paragraphs without Output.object,
|
|
* then validate each response against the schema to find failure patterns.
|
|
*/
|
|
import { generateText } from "ai";
|
|
import { openrouter } from "../src/lib/openrouter.ts";
|
|
import { readJsonl } from "../src/lib/jsonl.ts";
|
|
import { Paragraph } from "../src/schemas/paragraph.ts";
|
|
import { SYSTEM_PROMPT, buildUserPrompt } from "../src/label/prompts.ts";
|
|
import { LabelOutputRaw } from "../src/schemas/label.ts";
|
|
|
|
const INPUT = new URL("../../data/paragraphs/training.jsonl", import.meta.url).pathname;
|
|
const MODEL = "xiaomi/mimo-v2-flash";
|
|
const N = 20;
|
|
|
|
async function main() {
|
|
const { records: paragraphs } = await readJsonl(INPUT, Paragraph);
|
|
const sample = paragraphs.slice(0, N);
|
|
|
|
let pass = 0, fail = 0;
|
|
const failures: { id: string; issues: unknown; raw: unknown }[] = [];
|
|
|
|
for (const p of sample) {
|
|
try {
|
|
const result = await generateText({
|
|
model: openrouter(MODEL),
|
|
system: SYSTEM_PROMPT,
|
|
prompt: buildUserPrompt(p),
|
|
temperature: 0,
|
|
providerOptions: {
|
|
openrouter: {
|
|
reasoning: { effort: "low" },
|
|
usage: { include: true },
|
|
},
|
|
},
|
|
abortSignal: AbortSignal.timeout(120_000),
|
|
});
|
|
|
|
// Try to parse as JSON
|
|
let parsed: unknown;
|
|
try {
|
|
parsed = JSON.parse(result.text);
|
|
} catch {
|
|
// Try extracting JSON from markdown/text
|
|
const match = result.text.match(/\{[\s\S]*\}/);
|
|
if (match) parsed = JSON.parse(match[0]);
|
|
else {
|
|
failures.push({ id: p.id, issues: "Not JSON", raw: result.text.slice(0, 500) });
|
|
fail++;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
const validation = LabelOutputRaw.safeParse(parsed);
|
|
if (validation.success) {
|
|
pass++;
|
|
process.stdout.write(".");
|
|
} else {
|
|
fail++;
|
|
failures.push({ id: p.id, issues: validation.error.issues, raw: parsed });
|
|
process.stdout.write("✖");
|
|
}
|
|
} catch (err) {
|
|
fail++;
|
|
failures.push({ id: p.id, issues: err instanceof Error ? err.message : String(err), raw: null });
|
|
process.stdout.write("E");
|
|
}
|
|
}
|
|
|
|
console.log(`\n\n${pass}/${N} passed, ${fail} failed\n`);
|
|
|
|
if (failures.length > 0) {
|
|
console.log("=== FAILURES ===\n");
|
|
for (const f of failures) {
|
|
console.log(`--- ${f.id} ---`);
|
|
console.log("Issues:", JSON.stringify(f.issues, null, 2));
|
|
console.log("Raw:", JSON.stringify(f.raw, null, 2)?.slice(0, 1000));
|
|
console.log();
|
|
}
|
|
}
|
|
}
|
|
|
|
main().catch(err => { console.error(err); process.exit(1); });
|