feat(cli): improve search ranking output (#123)

This commit is contained in:
Andrey Avtomonov 2026-05-17 02:32:41 +02:00 committed by GitHub
parent d3d58a279b
commit 74be832aea
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 267 additions and 35 deletions

View file

@ -141,6 +141,9 @@ Semantic-layer list and search commands return human-readable output by
default. Use `--json` on `list` or `search` when an agent needs structured default. Use `--json` on `list` or `search` when an agent needs structured
output. Use `--format sql` on `query` to inspect generated SQL before output. Use `--format sql` on `query` to inspect generated SQL before
execution, or leave `--format json` for the compiled query and optional rows. execution, or leave `--format json` for the compiled query and optional rows.
Pretty `sl search` output shows `#1`, `#2`, and later rank badges for the
displayed results. Plain and JSON output keep the raw `score` value, which is a
ranking score rather than a percentage.
```json ```json
{ {

View file

@ -43,6 +43,12 @@ need to add or update wiki knowledge.
| `--output <mode>` | Output mode: `pretty` (default in TTY), `plain` (TSV), or `json` | `pretty` | | `--output <mode>` | Output mode: `pretty` (default in TTY), `plain` (TSV), or `json` | `pretty` |
| `--json` | Shortcut for `--output=json` (overrides `--output`) | `false` | | `--json` | Shortcut for `--output=json` (overrides `--output`) | `false` |
`wiki search` uses hybrid search when `storage.search` is `sqlite-fts5`. KTX
combines lexical SQLite FTS5 matches, token matches, and semantic matches from
wiki page embeddings stored in `.ktx/db.sqlite`. If embeddings are not
configured or the embedding backend is unavailable, KTX skips the semantic lane
and keeps lexical and token results.
## Examples ## Examples
```bash ```bash
@ -60,14 +66,21 @@ ktx wiki search "monthly recurring revenue" --json --limit 10
# Print search results as TSV # Print search results as TSV
ktx wiki search "monthly recurring revenue" --output plain ktx wiki search "monthly recurring revenue" --output plain
# Inspect which search lanes were used
ktx --debug wiki search "monthly recurring revenue" --json
``` ```
## Output ## Output
Wiki commands print clack-style pretty output in a TTY and TSV-style plain Wiki commands print clack-style pretty output in a TTY and TSV-style plain
output when requested. JSON output wraps the items with a command metadata output when requested. JSON output wraps the items with a command metadata
envelope. Open the matching Markdown files directly when you need the full page envelope. Search results include `matchReasons` and `lanes` metadata so you can
contents. see whether lexical, token, or semantic search contributed to the ranking. Open
the matching Markdown files directly when you need the full page contents.
Pretty search output shows `#1`, `#2`, and later rank badges for the displayed
results. Plain and JSON output keep the raw `score` value, which is a ranking
score rather than a percentage.
```json ```json
{ {
@ -77,16 +90,49 @@ contents.
{ {
"key": "revenue-definitions", "key": "revenue-definitions",
"summary": "Canonical revenue metric definitions", "summary": "Canonical revenue metric definitions",
"score": 0.92 "score": 0.92,
"matchReasons": ["lexical", "semantic"],
"lanes": [
{
"lane": "lexical",
"status": "available",
"requestedCandidatePoolLimit": 25,
"effectiveCandidatePoolLimit": 25,
"returnedCandidateCount": 3,
"weight": 1.5
},
{
"lane": "semantic",
"status": "available",
"requestedCandidatePoolLimit": 25,
"effectiveCandidatePoolLimit": 25,
"returnedCandidateCount": 8,
"weight": 3
}
]
} }
] ]
},
"meta": {
"command": "wiki search"
} }
} }
``` ```
When you pass the global `--debug` flag, KTX writes search diagnostics to
stderr and leaves stdout unchanged. This is useful with `--json` because stdout
stays machine-readable:
```text
[debug] wiki search mode=sqlite-fts5 embedding=configured results=2
[debug] wiki search lane=lexical status=available returned=1 weight=1.5
[debug] wiki search lane=token status=available returned=1 weight=0.75
[debug] wiki search lane=semantic status=available returned=2 weight=3
```
## Common errors ## Common errors
| Error | Cause | Recovery | | Error | Cause | Recovery |
|-------|-------|----------| |-------|-------|----------|
| Search returns no results | The query terms do not match summaries, tags, or content | Retry with business synonyms, then create a page if the knowledge is missing | | Search returns no results | The query terms do not match summaries, tags, or content, and the semantic lane is unavailable or has no positive matches | Run with `--debug`, check the semantic lane status, retry with business synonyms, then create a page if the knowledge is missing |
| A page is missing | No Markdown file exists for that business context | Add a file under `wiki/` or run `ktx ingest <connectionId>` | | A page is missing | No Markdown file exists for that business context | Add a file under `wiki/` or run `ktx ingest <connectionId>` |

View file

@ -1,5 +1,6 @@
import { type Command, Option } from '@commander-js/extra-typings'; import { type Command, Option } from '@commander-js/extra-typings';
import { import {
type CommandWithGlobalOptions,
type KtxCliCommandContext, type KtxCliCommandContext,
parsePositiveIntegerOption, parsePositiveIntegerOption,
resolveCommandProjectDir, resolveCommandProjectDir,
@ -14,6 +15,11 @@ async function runKnowledgeArgs(context: KtxCliCommandContext, args: KtxKnowledg
context.setExitCode(await runner(args, context.io)); context.setExitCode(await runner(args, context.io));
} }
function isDebugEnabled(command: CommandWithGlobalOptions): boolean {
const options = (command.optsWithGlobals ? command.optsWithGlobals() : command.opts()) as { debug?: unknown };
return options.debug === true;
}
export function registerWikiCommands(program: Command, context: KtxCliCommandContext): void { export function registerWikiCommands(program: Command, context: KtxCliCommandContext): void {
const wiki = program const wiki = program
.command('wiki') .command('wiki')
@ -83,6 +89,7 @@ export function registerWikiCommands(program: Command, context: KtxCliCommandCon
userId: options.userId, userId: options.userId,
output: options.output, output: options.output,
json: options.json, json: options.json,
...(isDebugEnabled(command) ? { debug: true } : {}),
...(options.limit !== undefined ? { limit: options.limit } : {}), ...(options.limit !== undefined ? { limit: options.limit } : {}),
}); });
}, },

View file

@ -171,6 +171,22 @@ describe('runKtxCli', () => {
}, },
searchIo.io, searchIo.io,
); );
const debugSearchIo = makeIo();
await expect(
runKtxCli(['--project-dir', tempDir, '--debug', 'wiki', 'search', 'revenue'], debugSearchIo.io, { knowledge }),
).resolves.toBe(0);
expect(knowledge).toHaveBeenLastCalledWith(
{
command: 'search',
projectDir: tempDir,
query: 'revenue',
userId: 'local',
json: false,
debug: true,
},
debugSearchIo.io,
);
}); });
it('rejects removed public wiki read and write commands', async () => { it('rejects removed public wiki read and write commands', async () => {

View file

@ -1,6 +1,6 @@
import { describe, expect, it } from 'vitest'; import { describe, expect, it } from 'vitest';
import type { KtxCliIo } from '../cli-runtime.js'; import type { KtxCliIo } from '../cli-runtime.js';
import { printList, type PrintListColumn } from './print-list.js'; import { createRankBadgeFormatter, printList, type PrintListColumn } from './print-list.js';
import { SYMBOLS } from './symbols.js'; import { SYMBOLS } from './symbols.js';
function recorder(): { io: KtxCliIo; out: () => string; err: () => string } { function recorder(): { io: KtxCliIo; out: () => string; err: () => string } {
@ -239,26 +239,26 @@ describe('printList — pretty mode', () => {
expect(out).toContain('2 pages'); expect(out).toContain('2 pages');
}); });
it('renders a leading badge column with prettyFormat in pretty mode', () => { it('renders a leading rank badge column in pretty mode', () => {
const r = recorder(); const r = recorder();
interface SearchRow { score: number; scope: string; key: string; summary: string } interface SearchRow { score: number; scope: string; key: string; summary: string }
const rows: SearchRow[] = [
{ score: 0.87, scope: 'GLOBAL', key: 'alpha', summary: 'first' },
{ score: 0.04, scope: 'GLOBAL', key: 'beta', summary: 'second' },
];
const SEARCH_COLUMNS: ReadonlyArray<PrintListColumn<SearchRow>> = [ const SEARCH_COLUMNS: ReadonlyArray<PrintListColumn<SearchRow>> = [
{ {
key: 'score', key: 'score',
label: 'SCORE', label: 'SCORE',
plain: 'score=', plain: 'score=',
role: 'badge', role: 'badge',
prettyFormat: (v) => `${Math.round(Number(v) * 100)}%`, prettyFormat: createRankBadgeFormatter(rows),
dim: true, dim: true,
}, },
{ key: 'scope', label: 'SCOPE', plain: '' }, { key: 'scope', label: 'SCOPE', plain: '' },
{ key: 'key', label: 'KEY', plain: '' }, { key: 'key', label: 'KEY', plain: '' },
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true }, { key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
]; ];
const rows: SearchRow[] = [
{ score: 0.87, scope: 'GLOBAL', key: 'alpha', summary: 'first' },
{ score: 0.04, scope: 'GLOBAL', key: 'beta', summary: 'second' },
];
printList<SearchRow>({ printList<SearchRow>({
rows, rows,
columns: SEARCH_COLUMNS, columns: SEARCH_COLUMNS,
@ -270,20 +270,22 @@ describe('printList — pretty mode', () => {
io: r.io, io: r.io,
}); });
const out = stripAnsi(r.out()); const out = stripAnsi(r.out());
expect(out).toMatch(/87%\s+alpha\s+/); expect(out).toMatch(/#1\s+alpha\s+/);
expect(out).toMatch(/4%\s+beta\s+/); expect(out).toMatch(/#2\s+beta\s+/);
expect(out).not.toContain('%');
}); });
it('emits the badge column in plain mode using its plain prefix', () => { it('emits the badge column in plain mode using its plain prefix', () => {
const r = recorder(); const r = recorder();
interface SearchRow { score: number; scope: string; key: string; summary: string } interface SearchRow { score: number; scope: string; key: string; summary: string }
const rows: SearchRow[] = [{ score: 0.87, scope: 'GLOBAL', key: 'alpha', summary: 'first' }];
const SEARCH_COLUMNS: ReadonlyArray<PrintListColumn<SearchRow>> = [ const SEARCH_COLUMNS: ReadonlyArray<PrintListColumn<SearchRow>> = [
{ {
key: 'score', key: 'score',
label: 'SCORE', label: 'SCORE',
plain: 'score=', plain: 'score=',
role: 'badge', role: 'badge',
prettyFormat: (v) => `${Math.round(Number(v) * 100)}%`, prettyFormat: createRankBadgeFormatter(rows),
dim: true, dim: true,
}, },
{ key: 'scope', label: 'SCOPE', plain: '' }, { key: 'scope', label: 'SCOPE', plain: '' },
@ -291,7 +293,7 @@ describe('printList — pretty mode', () => {
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true }, { key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
]; ];
printList<SearchRow>({ printList<SearchRow>({
rows: [{ score: 0.87, scope: 'GLOBAL', key: 'alpha', summary: 'first' }], rows,
columns: SEARCH_COLUMNS, columns: SEARCH_COLUMNS,
groupBy: 'scope', groupBy: 'scope',
mode: 'plain', mode: 'plain',

View file

@ -24,7 +24,7 @@ export interface PrintListColumn<Row> {
* - `'suffix'` trailing em-dash optional value. Default: any column with `optional: true`. * - `'suffix'` trailing em-dash optional value. Default: any column with `optional: true`.
*/ */
role?: 'name' | 'metric' | 'badge' | 'suffix'; role?: 'name' | 'metric' | 'badge' | 'suffix';
/** Custom pretty-mode value formatter (e.g. score → "87%"). Plain/JSON unaffected. */ /** Custom pretty-mode value formatter (for example, score -> "#1"). Plain/JSON unaffected. */
prettyFormat?: (value: Row[keyof Row & string], row: Row) => string; prettyFormat?: (value: Row[keyof Row & string], row: Row) => string;
} }
@ -67,6 +67,16 @@ export function printList<Row extends object>(args: PrintListArgs<Row>): void {
} }
} }
export function createRankBadgeFormatter<Row extends object>(
rows: ReadonlyArray<Row>,
): (_value: Row[keyof Row & string], row: Row) => string {
const ranks = new WeakMap<Row, number>();
rows.forEach((row, index) => {
ranks.set(row, index + 1);
});
return (_value, row) => `#${ranks.get(row) ?? rows.indexOf(row) + 1}`;
}
function isEmpty(value: unknown): boolean { function isEmpty(value: unknown): boolean {
return value === undefined || value === null || value === ''; return value === undefined || value === null || value === '';
} }

View file

@ -1,6 +1,7 @@
import { mkdtemp, rm } from 'node:fs/promises'; import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os'; import { tmpdir } from 'node:os';
import { join } from 'node:path'; import { join } from 'node:path';
import { stripVTControlCharacters } from 'node:util';
import { initKtxProject, loadKtxProject } from '@ktx/context/project'; import { initKtxProject, loadKtxProject } from '@ktx/context/project';
import type { KtxEmbeddingPort } from '@ktx/context'; import type { KtxEmbeddingPort } from '@ktx/context';
import { writeLocalKnowledgePage } from '@ktx/context/wiki'; import { writeLocalKnowledgePage } from '@ktx/context/wiki';
@ -90,6 +91,24 @@ describe('runKtxKnowledge', () => {
expect(searchIo.stdout()).toContain('metrics-revenue'); expect(searchIo.stdout()).toContain('metrics-revenue');
}); });
it('prints wiki search rank badges in pretty output', async () => {
const projectDir = join(tempDir, 'rank-project');
await initKtxProject({ projectDir });
await seedWikiPage(projectDir);
const searchIo = makeIo();
await expect(
runKtxKnowledge(
{ command: 'search', projectDir, query: 'paid order', userId: 'local', output: 'pretty' },
searchIo.io,
),
).resolves.toBe(0);
const stdout = stripVTControlCharacters(searchIo.stdout());
expect(stdout).toMatch(/#1\s+metrics-revenue/);
expect(stdout).not.toContain('%');
});
it('prints wiki list and search as public JSON envelopes', async () => { it('prints wiki list and search as public JSON envelopes', async () => {
const projectDir = join(tempDir, 'project'); const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir }); await initKtxProject({ projectDir });
@ -156,4 +175,29 @@ describe('runKtxKnowledge', () => {
expect(searchIo.stdout()).toContain('active-contract-arr-open-tickets'); expect(searchIo.stdout()).toContain('active-contract-arr-open-tickets');
expect(searchIo.stderr()).toBe(''); expect(searchIo.stderr()).toBe('');
}); });
it('writes wiki search lane diagnostics to stderr when debug is enabled', async () => {
const projectDir = join(tempDir, 'debug-project');
await initKtxProject({ projectDir });
await seedWikiPage(projectDir);
const searchIo = makeIo();
await expect(
runKtxKnowledge(
{ command: 'search', projectDir, query: 'paid order', userId: 'local', json: true, debug: true },
searchIo.io,
{ embeddingService: new FakeEmbeddingPort() },
),
).resolves.toBe(0);
expect(JSON.parse(searchIo.stdout())).toMatchObject({
kind: 'list',
data: { items: [expect.objectContaining({ key: 'metrics-revenue' })] },
meta: { command: 'wiki search' },
});
expect(searchIo.stderr()).toContain('[debug] wiki search mode=sqlite-fts5');
expect(searchIo.stderr()).toContain('embedding=configured');
expect(searchIo.stderr()).toContain('lane=lexical status=available');
expect(searchIo.stderr()).toContain('lane=semantic status=available');
});
}); });

View file

@ -11,7 +11,7 @@ import {
searchLocalKnowledgePages, searchLocalKnowledgePages,
} from '@ktx/context/wiki'; } from '@ktx/context/wiki';
import { resolveOutputMode } from './io/mode.js'; import { resolveOutputMode } from './io/mode.js';
import { printList, type PrintListColumn } from './io/print-list.js'; import { createRankBadgeFormatter, printList, type PrintListColumn } from './io/print-list.js';
export type KtxKnowledgeArgs = export type KtxKnowledgeArgs =
| { command: 'list'; projectDir: string; userId: string; output?: string; json?: boolean } | { command: 'list'; projectDir: string; userId: string; output?: string; json?: boolean }
@ -23,6 +23,7 @@ export type KtxKnowledgeArgs =
output?: string; output?: string;
json?: boolean; json?: boolean;
limit?: number; limit?: number;
debug?: boolean;
}; };
type KtxKnowledgeIo = import('./cli-runtime.js').KtxCliIo; type KtxKnowledgeIo = import('./cli-runtime.js').KtxCliIo;
@ -33,19 +34,23 @@ const WIKI_LIST_COLUMNS: ReadonlyArray<PrintListColumn<LocalKnowledgeSummary>> =
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true }, { key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
]; ];
const WIKI_SEARCH_COLUMNS: ReadonlyArray<PrintListColumn<LocalKnowledgeSearchResult>> = [ function wikiSearchColumns(
{ rows: ReadonlyArray<LocalKnowledgeSearchResult>,
key: 'score', ): ReadonlyArray<PrintListColumn<LocalKnowledgeSearchResult>> {
label: 'SCORE', return [
plain: 'score=', {
role: 'badge', key: 'score',
prettyFormat: (value) => `${Math.round(Number(value) * 100)}%`, label: 'SCORE',
dim: true, plain: 'score=',
}, role: 'badge',
{ key: 'scope', label: 'SCOPE', plain: '' }, prettyFormat: createRankBadgeFormatter(rows),
{ key: 'key', label: 'KEY', plain: '' }, dim: true,
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true }, },
]; { key: 'scope', label: 'SCOPE', plain: '' },
{ key: 'key', label: 'KEY', plain: '' },
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
];
}
interface KtxKnowledgeDeps { interface KtxKnowledgeDeps {
embeddingService?: KtxEmbeddingPort | null; embeddingService?: KtxEmbeddingPort | null;
@ -65,6 +70,26 @@ function wikiSearchEmbeddingService(
return provider ? new KtxIngestEmbeddingPortAdapter(provider) : null; return provider ? new KtxIngestEmbeddingPortAdapter(provider) : null;
} }
function writeWikiSearchDebug(
io: KtxKnowledgeIo,
input: {
mode: string;
embeddingConfigured: boolean;
results: LocalKnowledgeSearchResult[];
},
): void {
io.stderr.write(
`[debug] wiki search mode=${input.mode} embedding=${input.embeddingConfigured ? 'configured' : 'unconfigured'} results=${input.results.length}\n`,
);
const lanes = input.results[0]?.lanes ?? [];
for (const lane of lanes) {
const reason = lane.reason ? ` reason=${lane.reason}` : '';
io.stderr.write(
`[debug] wiki search lane=${lane.lane} status=${lane.status} returned=${lane.returnedCandidateCount} weight=${lane.weight}${reason}\n`,
);
}
}
export async function runKtxKnowledge( export async function runKtxKnowledge(
args: KtxKnowledgeArgs, args: KtxKnowledgeArgs,
io: KtxKnowledgeIo = process, io: KtxKnowledgeIo = process,
@ -89,12 +114,20 @@ export async function runKtxKnowledge(
return 0; return 0;
} }
if (args.command === 'search') { if (args.command === 'search') {
const embeddingService = wikiSearchEmbeddingService(project, deps);
const results = await searchLocalKnowledgePages(project, { const results = await searchLocalKnowledgePages(project, {
query: args.query, query: args.query,
userId: args.userId, userId: args.userId,
embeddingService: wikiSearchEmbeddingService(project, deps), embeddingService,
limit: args.limit, limit: args.limit,
}); });
if (args.debug) {
writeWikiSearchDebug(io, {
mode: project.config.storage.search,
embeddingConfigured: embeddingService !== null,
results,
});
}
const mode = resolveOutputMode({ explicit: args.output, json: args.json, io }); const mode = resolveOutputMode({ explicit: args.output, json: args.json, io });
let emptyMessage = `No local wiki pages matched "${args.query}"`; let emptyMessage = `No local wiki pages matched "${args.query}"`;
let emptyHint = 'Run `ktx wiki list` to inspect available pages.'; let emptyHint = 'Run `ktx wiki list` to inspect available pages.';
@ -107,7 +140,7 @@ export async function runKtxKnowledge(
} }
printList<LocalKnowledgeSearchResult>({ printList<LocalKnowledgeSearchResult>({
rows: results, rows: results,
columns: WIKI_SEARCH_COLUMNS, columns: wikiSearchColumns(results),
groupBy: 'scope', groupBy: 'scope',
emptyMessage, emptyMessage,
emptyHint, emptyHint,

View file

@ -1,6 +1,7 @@
import { mkdtemp, rm, writeFile } from 'node:fs/promises'; import { mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os'; import { tmpdir } from 'node:os';
import { join } from 'node:path'; import { join } from 'node:path';
import { stripVTControlCharacters } from 'node:util';
import Database from 'better-sqlite3'; import Database from 'better-sqlite3';
import { initKtxProject } from '@ktx/context/project'; import { initKtxProject } from '@ktx/context/project';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
@ -98,6 +99,23 @@ describe('runKtxSl', () => {
}); });
}); });
it('prints semantic-layer search rank badges in pretty output', async () => {
const projectDir = join(tempDir, 'rank-project');
await seedSlSource({ projectDir });
const searchIo = makeIo();
await expect(
runKtxSl(
{ command: 'search', projectDir, connectionId: 'warehouse', query: 'order', output: 'pretty' },
searchIo.io,
),
).resolves.toBe(0);
const stdout = stripVTControlCharacters(searchIo.stdout());
expect(stdout).toMatch(/#1\s+orders/);
expect(stdout).not.toContain('%');
});
it('prints semantic-layer list and search as public JSON envelopes', async () => { it('prints semantic-layer list and search as public JSON envelopes', async () => {
const projectDir = join(tempDir, 'project'); const projectDir = join(tempDir, 'project');
await seedSlSource({ await seedSlSource({

View file

@ -109,7 +109,7 @@ async function printSlSources(input: {
emptyHint?: string; emptyHint?: string;
}): Promise<void> { }): Promise<void> {
const { resolveOutputMode } = await import('./io/mode.js'); const { resolveOutputMode } = await import('./io/mode.js');
const { printList } = await import('./io/print-list.js'); const { createRankBadgeFormatter, printList } = await import('./io/print-list.js');
const mode = resolveOutputMode({ explicit: input.output, json: input.json, io: input.io }); const mode = resolveOutputMode({ explicit: input.output, json: input.json, io: input.io });
if (input.command === 'sl search') { if (input.command === 'sl search') {
@ -119,7 +119,7 @@ async function printSlSources(input: {
label: 'SCORE', label: 'SCORE',
plain: 'score=', plain: 'score=',
role: 'badge', role: 'badge',
prettyFormat: (value) => `${Math.round(Number(value) * 100)}%`, prettyFormat: createRankBadgeFormatter(input.rows as ReadonlyArray<LocalSlSourceSearchResult>),
dim: true, dim: true,
}, },
{ key: 'connectionId', label: 'CONNECTION', plain: '' }, { key: 'connectionId', label: 'CONNECTION', plain: '' },

View file

@ -22,6 +22,25 @@ class FakeEmbeddingPort {
} }
} }
class ArrSynonymEmbeddingPort {
readonly maxBatchSize = 16;
async computeEmbedding(text: string): Promise<number[]> {
const lower = text.toLowerCase();
if (lower.trim() === 'annual recurring revenue' || lower.includes('arr') || lower.includes('contract-first')) {
return [1, 0];
}
if (lower.includes('net revenue') || lower.includes('gross') || lower.includes('refund')) {
return [0, 1];
}
return [0.5, 0.5];
}
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
return Promise.all(texts.map((text) => this.computeEmbedding(text)));
}
}
describe('local knowledge helpers', () => { describe('local knowledge helpers', () => {
let tempDir: string; let tempDir: string;
let project: KtxLocalProject; let project: KtxLocalProject;
@ -131,6 +150,37 @@ describe('local knowledge helpers', () => {
}); });
}); });
it('ranks ARR synonym queries by semantic page embeddings over stronger lexical revenue matches', async () => {
await writeLocalKnowledgePage(project, {
key: 'arr-definition',
scope: 'GLOBAL',
summary: 'ARR is calculated contract-first for active customer contracts.',
content: 'Contract-first active contract value takes precedence over subscription values.',
tags: ['arr', 'contracts', 'finance'],
});
await writeLocalKnowledgePage(project, {
key: 'net-revenue-definition',
scope: 'GLOBAL',
summary: 'Net revenue definition',
content: 'Annual revenue is gross invoice revenue minus credits and refunds.',
tags: ['revenue', 'finance'],
});
const search = await searchLocalKnowledgePages(project, {
query: 'annual recurring revenue',
userId: 'local',
limit: 2,
embeddingService: new ArrSynonymEmbeddingPort(),
});
expect(search.map((result) => result.key)).toEqual(['arr-definition', 'net-revenue-definition']);
expect(search[0]).toMatchObject({
key: 'arr-definition',
matchReasons: expect.arrayContaining(['semantic']),
lanes: expect.arrayContaining([expect.objectContaining({ lane: 'semantic', status: 'available' })]),
});
});
it('reports semantic lane as skipped when wiki embeddings are not configured', async () => { it('reports semantic lane as skipped when wiki embeddings are not configured', async () => {
await writeLocalKnowledgePage(project, { await writeLocalKnowledgePage(project, {
key: 'metrics-revenue', key: 'metrics-revenue',

View file

@ -309,6 +309,7 @@ async function searchLocalKnowledgePagesWithSqlite(
}, },
{ {
lane: 'semantic', lane: 'semantic',
weight: 3,
async generate(args) { async generate(args) {
if (!embeddingService) { if (!embeddingService) {
return { status: 'skipped', candidates: [], reason: 'embedding_unconfigured' }; return { status: 'skipped', candidates: [], reason: 'embedding_unconfigured' };
@ -320,7 +321,9 @@ async function searchLocalKnowledgePagesWithSqlite(
limit: args.laneCandidatePoolLimit, limit: args.laneCandidatePoolLimit,
}); });
return { return {
candidates: rows.map((row) => ({ id: row.id, rank: row.rank, rawScore: row.rawScore })), candidates: rows
.filter((row) => row.rawScore > 0)
.map((row, index) => ({ id: row.id, rank: index + 1, rawScore: row.rawScore })),
}; };
} catch (error) { } catch (error) {
return { return {