ktx/scripts/relationship-benchmark-report.mjs

53 lines
1.9 KiB
JavaScript
Raw Normal View History

2026-05-10 23:12:26 +02:00
import { dirname, join, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import {
2026-05-10 23:51:24 +02:00
KTX_RELATIONSHIP_BENCHMARK_MODES,
buildKtxRelationshipBenchmarkReport,
currentKtxRelationshipBenchmarkDetector,
formatKtxRelationshipBenchmarkReportMarkdown,
ktxRelationshipBenchmarkDetectorWithLlm,
loadKtxRelationshipBenchmarkFixtures,
runKtxRelationshipBenchmarkSuite,
} from '../packages/cli/dist/context/scan/index.js';
2026-05-10 23:12:26 +02:00
const scriptDir = dirname(fileURLToPath(import.meta.url));
const ktxRoot = resolve(scriptDir, '..');
const fixtureRoot = join(ktxRoot, 'packages/cli/src/test/fixtures/relationship-benchmarks');
2026-05-10 23:12:26 +02:00
async function buildDetector() {
2026-05-10 23:51:24 +02:00
const backend = process.env.KTX_BENCHMARK_LLM_BACKEND;
2026-05-10 23:12:26 +02:00
if (!backend || backend === 'none') {
2026-05-10 23:51:24 +02:00
return currentKtxRelationshipBenchmarkDetector();
2026-05-10 23:12:26 +02:00
}
if (backend !== 'vertex') {
2026-05-10 23:51:24 +02:00
throw new Error(`Unsupported KTX_BENCHMARK_LLM_BACKEND: ${backend}`);
2026-05-10 23:12:26 +02:00
}
2026-05-10 23:51:24 +02:00
const project = process.env.KTX_BENCHMARK_VERTEX_PROJECT;
const location = process.env.KTX_BENCHMARK_VERTEX_LOCATION;
const model = process.env.KTX_BENCHMARK_LLM_MODEL ?? 'claude-sonnet-4-6';
2026-05-10 23:12:26 +02:00
if (!project || !location) {
2026-05-10 23:51:24 +02:00
throw new Error('KTX_BENCHMARK_VERTEX_PROJECT and KTX_BENCHMARK_VERTEX_LOCATION are required for vertex backend');
2026-05-10 23:12:26 +02:00
}
const { createKtxLlmProvider } = await import('../packages/cli/dist/llm/index.js');
2026-05-10 23:51:24 +02:00
const provider = createKtxLlmProvider({
2026-05-10 23:12:26 +02:00
backend: 'vertex',
vertex: { project, location },
modelSlots: { default: model },
});
2026-05-10 23:51:24 +02:00
return ktxRelationshipBenchmarkDetectorWithLlm(provider);
2026-05-10 23:12:26 +02:00
}
2026-05-10 23:51:24 +02:00
const fixtures = await loadKtxRelationshipBenchmarkFixtures(fixtureRoot);
2026-05-10 23:12:26 +02:00
const detector = await buildDetector();
2026-05-10 23:51:24 +02:00
const suite = await runKtxRelationshipBenchmarkSuite({
2026-05-10 23:12:26 +02:00
fixtures,
detector,
});
2026-05-10 23:51:24 +02:00
const report = buildKtxRelationshipBenchmarkReport({
2026-05-10 23:12:26 +02:00
fixtures,
suite,
2026-05-10 23:51:24 +02:00
modes: KTX_RELATIONSHIP_BENCHMARK_MODES,
2026-05-10 23:12:26 +02:00
});
2026-05-10 23:51:24 +02:00
process.stdout.write(formatKtxRelationshipBenchmarkReportMarkdown(report));