ktx/packages/context/scripts/relationship-benchmark-report.mjs

53 lines
1.9 KiB
JavaScript
Raw Permalink Normal View History

2026-05-10 23:12:26 +02:00
import { dirname, join, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import {
2026-05-10 23:51:24 +02:00
KTX_RELATIONSHIP_BENCHMARK_MODES,
buildKtxRelationshipBenchmarkReport,
currentKtxRelationshipBenchmarkDetector,
formatKtxRelationshipBenchmarkReportMarkdown,
ktxRelationshipBenchmarkDetectorWithLlm,
loadKtxRelationshipBenchmarkFixtures,
runKtxRelationshipBenchmarkSuite,
2026-05-10 23:12:26 +02:00
} from '../dist/scan/index.js';
const scriptDir = dirname(fileURLToPath(import.meta.url));
const packageRoot = resolve(scriptDir, '..');
const fixtureRoot = join(packageRoot, 'test/fixtures/relationship-benchmarks');
async function buildDetector() {
2026-05-10 23:51:24 +02:00
const backend = process.env.KTX_BENCHMARK_LLM_BACKEND;
2026-05-10 23:12:26 +02:00
if (!backend || backend === 'none') {
2026-05-10 23:51:24 +02:00
return currentKtxRelationshipBenchmarkDetector();
2026-05-10 23:12:26 +02:00
}
if (backend !== 'vertex') {
2026-05-10 23:51:24 +02:00
throw new Error(`Unsupported KTX_BENCHMARK_LLM_BACKEND: ${backend}`);
2026-05-10 23:12:26 +02:00
}
2026-05-10 23:51:24 +02:00
const project = process.env.KTX_BENCHMARK_VERTEX_PROJECT;
const location = process.env.KTX_BENCHMARK_VERTEX_LOCATION;
const model = process.env.KTX_BENCHMARK_LLM_MODEL ?? 'claude-sonnet-4-6';
2026-05-10 23:12:26 +02:00
if (!project || !location) {
2026-05-10 23:51:24 +02:00
throw new Error('KTX_BENCHMARK_VERTEX_PROJECT and KTX_BENCHMARK_VERTEX_LOCATION are required for vertex backend');
2026-05-10 23:12:26 +02:00
}
2026-05-10 23:51:24 +02:00
const { createKtxLlmProvider } = await import('@ktx/llm');
const provider = createKtxLlmProvider({
2026-05-10 23:12:26 +02:00
backend: 'vertex',
vertex: { project, location },
modelSlots: { default: model },
});
2026-05-10 23:51:24 +02:00
return ktxRelationshipBenchmarkDetectorWithLlm(provider);
2026-05-10 23:12:26 +02:00
}
2026-05-10 23:51:24 +02:00
const fixtures = await loadKtxRelationshipBenchmarkFixtures(fixtureRoot);
2026-05-10 23:12:26 +02:00
const detector = await buildDetector();
2026-05-10 23:51:24 +02:00
const suite = await runKtxRelationshipBenchmarkSuite({
2026-05-10 23:12:26 +02:00
fixtures,
detector,
});
2026-05-10 23:51:24 +02:00
const report = buildKtxRelationshipBenchmarkReport({
2026-05-10 23:12:26 +02:00
fixtures,
suite,
2026-05-10 23:51:24 +02:00
modes: KTX_RELATIONSHIP_BENCHMARK_MODES,
2026-05-10 23:12:26 +02:00
});
2026-05-10 23:51:24 +02:00
process.stdout.write(formatKtxRelationshipBenchmarkReportMarkdown(report));