feat(cli): improve search ranking output

This commit is contained in:
Andrey Avtomonov 2026-05-17 01:59:21 +02:00
parent de72a10ffb
commit 855c0644ff
12 changed files with 267 additions and 35 deletions

View file

@ -22,6 +22,25 @@ class FakeEmbeddingPort {
}
}
class ArrSynonymEmbeddingPort {
readonly maxBatchSize = 16;
async computeEmbedding(text: string): Promise<number[]> {
const lower = text.toLowerCase();
if (lower.trim() === 'annual recurring revenue' || lower.includes('arr') || lower.includes('contract-first')) {
return [1, 0];
}
if (lower.includes('net revenue') || lower.includes('gross') || lower.includes('refund')) {
return [0, 1];
}
return [0.5, 0.5];
}
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
return Promise.all(texts.map((text) => this.computeEmbedding(text)));
}
}
describe('local knowledge helpers', () => {
let tempDir: string;
let project: KtxLocalProject;
@ -131,6 +150,37 @@ describe('local knowledge helpers', () => {
});
});
it('ranks ARR synonym queries by semantic page embeddings over stronger lexical revenue matches', async () => {
await writeLocalKnowledgePage(project, {
key: 'arr-definition',
scope: 'GLOBAL',
summary: 'ARR is calculated contract-first for active customer contracts.',
content: 'Contract-first active contract value takes precedence over subscription values.',
tags: ['arr', 'contracts', 'finance'],
});
await writeLocalKnowledgePage(project, {
key: 'net-revenue-definition',
scope: 'GLOBAL',
summary: 'Net revenue definition',
content: 'Annual revenue is gross invoice revenue minus credits and refunds.',
tags: ['revenue', 'finance'],
});
const search = await searchLocalKnowledgePages(project, {
query: 'annual recurring revenue',
userId: 'local',
limit: 2,
embeddingService: new ArrSynonymEmbeddingPort(),
});
expect(search.map((result) => result.key)).toEqual(['arr-definition', 'net-revenue-definition']);
expect(search[0]).toMatchObject({
key: 'arr-definition',
matchReasons: expect.arrayContaining(['semantic']),
lanes: expect.arrayContaining([expect.objectContaining({ lane: 'semantic', status: 'available' })]),
});
});
it('reports semantic lane as skipped when wiki embeddings are not configured', async () => {
await writeLocalKnowledgePage(project, {
key: 'metrics-revenue',

View file

@ -309,6 +309,7 @@ async function searchLocalKnowledgePagesWithSqlite(
},
{
lane: 'semantic',
weight: 3,
async generate(args) {
if (!embeddingService) {
return { status: 'skipped', candidates: [], reason: 'embedding_unconfigured' };
@ -320,7 +321,9 @@ async function searchLocalKnowledgePagesWithSqlite(
limit: args.laneCandidatePoolLimit,
});
return {
candidates: rows.map((row) => ({ id: row.id, rank: row.rank, rawScore: row.rawScore })),
candidates: rows
.filter((row) => row.rawScore > 0)
.map((row, index) => ({ id: row.id, rank: index + 1, rawScore: row.rawScore })),
};
} catch (error) {
return {