mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-28 08:49:38 +02:00
feat(cli): improve search ranking output
This commit is contained in:
parent
de72a10ffb
commit
855c0644ff
12 changed files with 267 additions and 35 deletions
|
|
@ -22,6 +22,25 @@ class FakeEmbeddingPort {
|
|||
}
|
||||
}
|
||||
|
||||
class ArrSynonymEmbeddingPort {
|
||||
readonly maxBatchSize = 16;
|
||||
|
||||
async computeEmbedding(text: string): Promise<number[]> {
|
||||
const lower = text.toLowerCase();
|
||||
if (lower.trim() === 'annual recurring revenue' || lower.includes('arr') || lower.includes('contract-first')) {
|
||||
return [1, 0];
|
||||
}
|
||||
if (lower.includes('net revenue') || lower.includes('gross') || lower.includes('refund')) {
|
||||
return [0, 1];
|
||||
}
|
||||
return [0.5, 0.5];
|
||||
}
|
||||
|
||||
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
|
||||
return Promise.all(texts.map((text) => this.computeEmbedding(text)));
|
||||
}
|
||||
}
|
||||
|
||||
describe('local knowledge helpers', () => {
|
||||
let tempDir: string;
|
||||
let project: KtxLocalProject;
|
||||
|
|
@ -131,6 +150,37 @@ describe('local knowledge helpers', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('ranks ARR synonym queries by semantic page embeddings over stronger lexical revenue matches', async () => {
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'arr-definition',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'ARR is calculated contract-first for active customer contracts.',
|
||||
content: 'Contract-first active contract value takes precedence over subscription values.',
|
||||
tags: ['arr', 'contracts', 'finance'],
|
||||
});
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'net-revenue-definition',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Net revenue definition',
|
||||
content: 'Annual revenue is gross invoice revenue minus credits and refunds.',
|
||||
tags: ['revenue', 'finance'],
|
||||
});
|
||||
|
||||
const search = await searchLocalKnowledgePages(project, {
|
||||
query: 'annual recurring revenue',
|
||||
userId: 'local',
|
||||
limit: 2,
|
||||
embeddingService: new ArrSynonymEmbeddingPort(),
|
||||
});
|
||||
|
||||
expect(search.map((result) => result.key)).toEqual(['arr-definition', 'net-revenue-definition']);
|
||||
expect(search[0]).toMatchObject({
|
||||
key: 'arr-definition',
|
||||
matchReasons: expect.arrayContaining(['semantic']),
|
||||
lanes: expect.arrayContaining([expect.objectContaining({ lane: 'semantic', status: 'available' })]),
|
||||
});
|
||||
});
|
||||
|
||||
it('reports semantic lane as skipped when wiki embeddings are not configured', async () => {
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'metrics-revenue',
|
||||
|
|
|
|||
|
|
@ -309,6 +309,7 @@ async function searchLocalKnowledgePagesWithSqlite(
|
|||
},
|
||||
{
|
||||
lane: 'semantic',
|
||||
weight: 3,
|
||||
async generate(args) {
|
||||
if (!embeddingService) {
|
||||
return { status: 'skipped', candidates: [], reason: 'embedding_unconfigured' };
|
||||
|
|
@ -320,7 +321,9 @@ async function searchLocalKnowledgePagesWithSqlite(
|
|||
limit: args.laneCandidatePoolLimit,
|
||||
});
|
||||
return {
|
||||
candidates: rows.map((row) => ({ id: row.id, rank: row.rank, rawScore: row.rawScore })),
|
||||
candidates: rows
|
||||
.filter((row) => row.rawScore > 0)
|
||||
.map((row, index) => ({ id: row.id, rank: index + 1, rawScore: row.rawScore })),
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue