mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
feat(cli): improve search ranking output (#123)
This commit is contained in:
parent
d3d58a279b
commit
74be832aea
12 changed files with 267 additions and 35 deletions
|
|
@ -141,6 +141,9 @@ Semantic-layer list and search commands return human-readable output by
|
|||
default. Use `--json` on `list` or `search` when an agent needs structured
|
||||
output. Use `--format sql` on `query` to inspect generated SQL before
|
||||
execution, or leave `--format json` for the compiled query and optional rows.
|
||||
Pretty `sl search` output shows `#1`, `#2`, and later rank badges for the
|
||||
displayed results. Plain and JSON output keep the raw `score` value, which is a
|
||||
ranking score rather than a percentage.
|
||||
|
||||
```json
|
||||
{
|
||||
|
|
|
|||
|
|
@ -43,6 +43,12 @@ need to add or update wiki knowledge.
|
|||
| `--output <mode>` | Output mode: `pretty` (default in TTY), `plain` (TSV), or `json` | `pretty` |
|
||||
| `--json` | Shortcut for `--output=json` (overrides `--output`) | `false` |
|
||||
|
||||
`wiki search` uses hybrid search when `storage.search` is `sqlite-fts5`. KTX
|
||||
combines lexical SQLite FTS5 matches, token matches, and semantic matches from
|
||||
wiki page embeddings stored in `.ktx/db.sqlite`. If embeddings are not
|
||||
configured or the embedding backend is unavailable, KTX skips the semantic lane
|
||||
and keeps lexical and token results.
|
||||
|
||||
## Examples
|
||||
|
||||
```bash
|
||||
|
|
@ -60,14 +66,21 @@ ktx wiki search "monthly recurring revenue" --json --limit 10
|
|||
|
||||
# Print search results as TSV
|
||||
ktx wiki search "monthly recurring revenue" --output plain
|
||||
|
||||
# Inspect which search lanes were used
|
||||
ktx --debug wiki search "monthly recurring revenue" --json
|
||||
```
|
||||
|
||||
## Output
|
||||
|
||||
Wiki commands print clack-style pretty output in a TTY and TSV-style plain
|
||||
output when requested. JSON output wraps the items with a command metadata
|
||||
envelope. Open the matching Markdown files directly when you need the full page
|
||||
contents.
|
||||
envelope. Search results include `matchReasons` and `lanes` metadata so you can
|
||||
see whether lexical, token, or semantic search contributed to the ranking. Open
|
||||
the matching Markdown files directly when you need the full page contents.
|
||||
Pretty search output shows `#1`, `#2`, and later rank badges for the displayed
|
||||
results. Plain and JSON output keep the raw `score` value, which is a ranking
|
||||
score rather than a percentage.
|
||||
|
||||
```json
|
||||
{
|
||||
|
|
@ -77,16 +90,49 @@ contents.
|
|||
{
|
||||
"key": "revenue-definitions",
|
||||
"summary": "Canonical revenue metric definitions",
|
||||
"score": 0.92
|
||||
"score": 0.92,
|
||||
"matchReasons": ["lexical", "semantic"],
|
||||
"lanes": [
|
||||
{
|
||||
"lane": "lexical",
|
||||
"status": "available",
|
||||
"requestedCandidatePoolLimit": 25,
|
||||
"effectiveCandidatePoolLimit": 25,
|
||||
"returnedCandidateCount": 3,
|
||||
"weight": 1.5
|
||||
},
|
||||
{
|
||||
"lane": "semantic",
|
||||
"status": "available",
|
||||
"requestedCandidatePoolLimit": 25,
|
||||
"effectiveCandidatePoolLimit": 25,
|
||||
"returnedCandidateCount": 8,
|
||||
"weight": 3
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"meta": {
|
||||
"command": "wiki search"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
When you pass the global `--debug` flag, KTX writes search diagnostics to
|
||||
stderr and leaves stdout unchanged. This is useful with `--json` because stdout
|
||||
stays machine-readable:
|
||||
|
||||
```text
|
||||
[debug] wiki search mode=sqlite-fts5 embedding=configured results=2
|
||||
[debug] wiki search lane=lexical status=available returned=1 weight=1.5
|
||||
[debug] wiki search lane=token status=available returned=1 weight=0.75
|
||||
[debug] wiki search lane=semantic status=available returned=2 weight=3
|
||||
```
|
||||
|
||||
## Common errors
|
||||
|
||||
| Error | Cause | Recovery |
|
||||
|-------|-------|----------|
|
||||
| Search returns no results | The query terms do not match summaries, tags, or content | Retry with business synonyms, then create a page if the knowledge is missing |
|
||||
| Search returns no results | The query terms do not match summaries, tags, or content, and the semantic lane is unavailable or has no positive matches | Run with `--debug`, check the semantic lane status, retry with business synonyms, then create a page if the knowledge is missing |
|
||||
| A page is missing | No Markdown file exists for that business context | Add a file under `wiki/` or run `ktx ingest <connectionId>` |
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import { type Command, Option } from '@commander-js/extra-typings';
|
||||
import {
|
||||
type CommandWithGlobalOptions,
|
||||
type KtxCliCommandContext,
|
||||
parsePositiveIntegerOption,
|
||||
resolveCommandProjectDir,
|
||||
|
|
@ -14,6 +15,11 @@ async function runKnowledgeArgs(context: KtxCliCommandContext, args: KtxKnowledg
|
|||
context.setExitCode(await runner(args, context.io));
|
||||
}
|
||||
|
||||
function isDebugEnabled(command: CommandWithGlobalOptions): boolean {
|
||||
const options = (command.optsWithGlobals ? command.optsWithGlobals() : command.opts()) as { debug?: unknown };
|
||||
return options.debug === true;
|
||||
}
|
||||
|
||||
export function registerWikiCommands(program: Command, context: KtxCliCommandContext): void {
|
||||
const wiki = program
|
||||
.command('wiki')
|
||||
|
|
@ -83,6 +89,7 @@ export function registerWikiCommands(program: Command, context: KtxCliCommandCon
|
|||
userId: options.userId,
|
||||
output: options.output,
|
||||
json: options.json,
|
||||
...(isDebugEnabled(command) ? { debug: true } : {}),
|
||||
...(options.limit !== undefined ? { limit: options.limit } : {}),
|
||||
});
|
||||
},
|
||||
|
|
|
|||
|
|
@ -171,6 +171,22 @@ describe('runKtxCli', () => {
|
|||
},
|
||||
searchIo.io,
|
||||
);
|
||||
|
||||
const debugSearchIo = makeIo();
|
||||
await expect(
|
||||
runKtxCli(['--project-dir', tempDir, '--debug', 'wiki', 'search', 'revenue'], debugSearchIo.io, { knowledge }),
|
||||
).resolves.toBe(0);
|
||||
expect(knowledge).toHaveBeenLastCalledWith(
|
||||
{
|
||||
command: 'search',
|
||||
projectDir: tempDir,
|
||||
query: 'revenue',
|
||||
userId: 'local',
|
||||
json: false,
|
||||
debug: true,
|
||||
},
|
||||
debugSearchIo.io,
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects removed public wiki read and write commands', async () => {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import type { KtxCliIo } from '../cli-runtime.js';
|
||||
import { printList, type PrintListColumn } from './print-list.js';
|
||||
import { createRankBadgeFormatter, printList, type PrintListColumn } from './print-list.js';
|
||||
import { SYMBOLS } from './symbols.js';
|
||||
|
||||
function recorder(): { io: KtxCliIo; out: () => string; err: () => string } {
|
||||
|
|
@ -239,26 +239,26 @@ describe('printList — pretty mode', () => {
|
|||
expect(out).toContain('2 pages');
|
||||
});
|
||||
|
||||
it('renders a leading badge column with prettyFormat in pretty mode', () => {
|
||||
it('renders a leading rank badge column in pretty mode', () => {
|
||||
const r = recorder();
|
||||
interface SearchRow { score: number; scope: string; key: string; summary: string }
|
||||
const rows: SearchRow[] = [
|
||||
{ score: 0.87, scope: 'GLOBAL', key: 'alpha', summary: 'first' },
|
||||
{ score: 0.04, scope: 'GLOBAL', key: 'beta', summary: 'second' },
|
||||
];
|
||||
const SEARCH_COLUMNS: ReadonlyArray<PrintListColumn<SearchRow>> = [
|
||||
{
|
||||
key: 'score',
|
||||
label: 'SCORE',
|
||||
plain: 'score=',
|
||||
role: 'badge',
|
||||
prettyFormat: (v) => `${Math.round(Number(v) * 100)}%`,
|
||||
prettyFormat: createRankBadgeFormatter(rows),
|
||||
dim: true,
|
||||
},
|
||||
{ key: 'scope', label: 'SCOPE', plain: '' },
|
||||
{ key: 'key', label: 'KEY', plain: '' },
|
||||
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
|
||||
];
|
||||
const rows: SearchRow[] = [
|
||||
{ score: 0.87, scope: 'GLOBAL', key: 'alpha', summary: 'first' },
|
||||
{ score: 0.04, scope: 'GLOBAL', key: 'beta', summary: 'second' },
|
||||
];
|
||||
printList<SearchRow>({
|
||||
rows,
|
||||
columns: SEARCH_COLUMNS,
|
||||
|
|
@ -270,20 +270,22 @@ describe('printList — pretty mode', () => {
|
|||
io: r.io,
|
||||
});
|
||||
const out = stripAnsi(r.out());
|
||||
expect(out).toMatch(/87%\s+alpha\s+/);
|
||||
expect(out).toMatch(/4%\s+beta\s+/);
|
||||
expect(out).toMatch(/#1\s+alpha\s+/);
|
||||
expect(out).toMatch(/#2\s+beta\s+/);
|
||||
expect(out).not.toContain('%');
|
||||
});
|
||||
|
||||
it('emits the badge column in plain mode using its plain prefix', () => {
|
||||
const r = recorder();
|
||||
interface SearchRow { score: number; scope: string; key: string; summary: string }
|
||||
const rows: SearchRow[] = [{ score: 0.87, scope: 'GLOBAL', key: 'alpha', summary: 'first' }];
|
||||
const SEARCH_COLUMNS: ReadonlyArray<PrintListColumn<SearchRow>> = [
|
||||
{
|
||||
key: 'score',
|
||||
label: 'SCORE',
|
||||
plain: 'score=',
|
||||
role: 'badge',
|
||||
prettyFormat: (v) => `${Math.round(Number(v) * 100)}%`,
|
||||
prettyFormat: createRankBadgeFormatter(rows),
|
||||
dim: true,
|
||||
},
|
||||
{ key: 'scope', label: 'SCOPE', plain: '' },
|
||||
|
|
@ -291,7 +293,7 @@ describe('printList — pretty mode', () => {
|
|||
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
|
||||
];
|
||||
printList<SearchRow>({
|
||||
rows: [{ score: 0.87, scope: 'GLOBAL', key: 'alpha', summary: 'first' }],
|
||||
rows,
|
||||
columns: SEARCH_COLUMNS,
|
||||
groupBy: 'scope',
|
||||
mode: 'plain',
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ export interface PrintListColumn<Row> {
|
|||
* - `'suffix'` — trailing em-dash optional value. Default: any column with `optional: true`.
|
||||
*/
|
||||
role?: 'name' | 'metric' | 'badge' | 'suffix';
|
||||
/** Custom pretty-mode value formatter (e.g. score → "87%"). Plain/JSON unaffected. */
|
||||
/** Custom pretty-mode value formatter (for example, score -> "#1"). Plain/JSON unaffected. */
|
||||
prettyFormat?: (value: Row[keyof Row & string], row: Row) => string;
|
||||
}
|
||||
|
||||
|
|
@ -67,6 +67,16 @@ export function printList<Row extends object>(args: PrintListArgs<Row>): void {
|
|||
}
|
||||
}
|
||||
|
||||
export function createRankBadgeFormatter<Row extends object>(
|
||||
rows: ReadonlyArray<Row>,
|
||||
): (_value: Row[keyof Row & string], row: Row) => string {
|
||||
const ranks = new WeakMap<Row, number>();
|
||||
rows.forEach((row, index) => {
|
||||
ranks.set(row, index + 1);
|
||||
});
|
||||
return (_value, row) => `#${ranks.get(row) ?? rows.indexOf(row) + 1}`;
|
||||
}
|
||||
|
||||
function isEmpty(value: unknown): boolean {
|
||||
return value === undefined || value === null || value === '';
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { stripVTControlCharacters } from 'node:util';
|
||||
import { initKtxProject, loadKtxProject } from '@ktx/context/project';
|
||||
import type { KtxEmbeddingPort } from '@ktx/context';
|
||||
import { writeLocalKnowledgePage } from '@ktx/context/wiki';
|
||||
|
|
@ -90,6 +91,24 @@ describe('runKtxKnowledge', () => {
|
|||
expect(searchIo.stdout()).toContain('metrics-revenue');
|
||||
});
|
||||
|
||||
it('prints wiki search rank badges in pretty output', async () => {
|
||||
const projectDir = join(tempDir, 'rank-project');
|
||||
await initKtxProject({ projectDir });
|
||||
await seedWikiPage(projectDir);
|
||||
|
||||
const searchIo = makeIo();
|
||||
await expect(
|
||||
runKtxKnowledge(
|
||||
{ command: 'search', projectDir, query: 'paid order', userId: 'local', output: 'pretty' },
|
||||
searchIo.io,
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
const stdout = stripVTControlCharacters(searchIo.stdout());
|
||||
expect(stdout).toMatch(/#1\s+metrics-revenue/);
|
||||
expect(stdout).not.toContain('%');
|
||||
});
|
||||
|
||||
it('prints wiki list and search as public JSON envelopes', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await initKtxProject({ projectDir });
|
||||
|
|
@ -156,4 +175,29 @@ describe('runKtxKnowledge', () => {
|
|||
expect(searchIo.stdout()).toContain('active-contract-arr-open-tickets');
|
||||
expect(searchIo.stderr()).toBe('');
|
||||
});
|
||||
|
||||
it('writes wiki search lane diagnostics to stderr when debug is enabled', async () => {
|
||||
const projectDir = join(tempDir, 'debug-project');
|
||||
await initKtxProject({ projectDir });
|
||||
await seedWikiPage(projectDir);
|
||||
|
||||
const searchIo = makeIo();
|
||||
await expect(
|
||||
runKtxKnowledge(
|
||||
{ command: 'search', projectDir, query: 'paid order', userId: 'local', json: true, debug: true },
|
||||
searchIo.io,
|
||||
{ embeddingService: new FakeEmbeddingPort() },
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(JSON.parse(searchIo.stdout())).toMatchObject({
|
||||
kind: 'list',
|
||||
data: { items: [expect.objectContaining({ key: 'metrics-revenue' })] },
|
||||
meta: { command: 'wiki search' },
|
||||
});
|
||||
expect(searchIo.stderr()).toContain('[debug] wiki search mode=sqlite-fts5');
|
||||
expect(searchIo.stderr()).toContain('embedding=configured');
|
||||
expect(searchIo.stderr()).toContain('lane=lexical status=available');
|
||||
expect(searchIo.stderr()).toContain('lane=semantic status=available');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import {
|
|||
searchLocalKnowledgePages,
|
||||
} from '@ktx/context/wiki';
|
||||
import { resolveOutputMode } from './io/mode.js';
|
||||
import { printList, type PrintListColumn } from './io/print-list.js';
|
||||
import { createRankBadgeFormatter, printList, type PrintListColumn } from './io/print-list.js';
|
||||
|
||||
export type KtxKnowledgeArgs =
|
||||
| { command: 'list'; projectDir: string; userId: string; output?: string; json?: boolean }
|
||||
|
|
@ -23,6 +23,7 @@ export type KtxKnowledgeArgs =
|
|||
output?: string;
|
||||
json?: boolean;
|
||||
limit?: number;
|
||||
debug?: boolean;
|
||||
};
|
||||
|
||||
type KtxKnowledgeIo = import('./cli-runtime.js').KtxCliIo;
|
||||
|
|
@ -33,19 +34,23 @@ const WIKI_LIST_COLUMNS: ReadonlyArray<PrintListColumn<LocalKnowledgeSummary>> =
|
|||
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
|
||||
];
|
||||
|
||||
const WIKI_SEARCH_COLUMNS: ReadonlyArray<PrintListColumn<LocalKnowledgeSearchResult>> = [
|
||||
{
|
||||
key: 'score',
|
||||
label: 'SCORE',
|
||||
plain: 'score=',
|
||||
role: 'badge',
|
||||
prettyFormat: (value) => `${Math.round(Number(value) * 100)}%`,
|
||||
dim: true,
|
||||
},
|
||||
{ key: 'scope', label: 'SCOPE', plain: '' },
|
||||
{ key: 'key', label: 'KEY', plain: '' },
|
||||
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
|
||||
];
|
||||
function wikiSearchColumns(
|
||||
rows: ReadonlyArray<LocalKnowledgeSearchResult>,
|
||||
): ReadonlyArray<PrintListColumn<LocalKnowledgeSearchResult>> {
|
||||
return [
|
||||
{
|
||||
key: 'score',
|
||||
label: 'SCORE',
|
||||
plain: 'score=',
|
||||
role: 'badge',
|
||||
prettyFormat: createRankBadgeFormatter(rows),
|
||||
dim: true,
|
||||
},
|
||||
{ key: 'scope', label: 'SCOPE', plain: '' },
|
||||
{ key: 'key', label: 'KEY', plain: '' },
|
||||
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
|
||||
];
|
||||
}
|
||||
|
||||
interface KtxKnowledgeDeps {
|
||||
embeddingService?: KtxEmbeddingPort | null;
|
||||
|
|
@ -65,6 +70,26 @@ function wikiSearchEmbeddingService(
|
|||
return provider ? new KtxIngestEmbeddingPortAdapter(provider) : null;
|
||||
}
|
||||
|
||||
function writeWikiSearchDebug(
|
||||
io: KtxKnowledgeIo,
|
||||
input: {
|
||||
mode: string;
|
||||
embeddingConfigured: boolean;
|
||||
results: LocalKnowledgeSearchResult[];
|
||||
},
|
||||
): void {
|
||||
io.stderr.write(
|
||||
`[debug] wiki search mode=${input.mode} embedding=${input.embeddingConfigured ? 'configured' : 'unconfigured'} results=${input.results.length}\n`,
|
||||
);
|
||||
const lanes = input.results[0]?.lanes ?? [];
|
||||
for (const lane of lanes) {
|
||||
const reason = lane.reason ? ` reason=${lane.reason}` : '';
|
||||
io.stderr.write(
|
||||
`[debug] wiki search lane=${lane.lane} status=${lane.status} returned=${lane.returnedCandidateCount} weight=${lane.weight}${reason}\n`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export async function runKtxKnowledge(
|
||||
args: KtxKnowledgeArgs,
|
||||
io: KtxKnowledgeIo = process,
|
||||
|
|
@ -89,12 +114,20 @@ export async function runKtxKnowledge(
|
|||
return 0;
|
||||
}
|
||||
if (args.command === 'search') {
|
||||
const embeddingService = wikiSearchEmbeddingService(project, deps);
|
||||
const results = await searchLocalKnowledgePages(project, {
|
||||
query: args.query,
|
||||
userId: args.userId,
|
||||
embeddingService: wikiSearchEmbeddingService(project, deps),
|
||||
embeddingService,
|
||||
limit: args.limit,
|
||||
});
|
||||
if (args.debug) {
|
||||
writeWikiSearchDebug(io, {
|
||||
mode: project.config.storage.search,
|
||||
embeddingConfigured: embeddingService !== null,
|
||||
results,
|
||||
});
|
||||
}
|
||||
const mode = resolveOutputMode({ explicit: args.output, json: args.json, io });
|
||||
let emptyMessage = `No local wiki pages matched "${args.query}"`;
|
||||
let emptyHint = 'Run `ktx wiki list` to inspect available pages.';
|
||||
|
|
@ -107,7 +140,7 @@ export async function runKtxKnowledge(
|
|||
}
|
||||
printList<LocalKnowledgeSearchResult>({
|
||||
rows: results,
|
||||
columns: WIKI_SEARCH_COLUMNS,
|
||||
columns: wikiSearchColumns(results),
|
||||
groupBy: 'scope',
|
||||
emptyMessage,
|
||||
emptyHint,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { stripVTControlCharacters } from 'node:util';
|
||||
import Database from 'better-sqlite3';
|
||||
import { initKtxProject } from '@ktx/context/project';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
|
@ -98,6 +99,23 @@ describe('runKtxSl', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('prints semantic-layer search rank badges in pretty output', async () => {
|
||||
const projectDir = join(tempDir, 'rank-project');
|
||||
await seedSlSource({ projectDir });
|
||||
|
||||
const searchIo = makeIo();
|
||||
await expect(
|
||||
runKtxSl(
|
||||
{ command: 'search', projectDir, connectionId: 'warehouse', query: 'order', output: 'pretty' },
|
||||
searchIo.io,
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
const stdout = stripVTControlCharacters(searchIo.stdout());
|
||||
expect(stdout).toMatch(/#1\s+orders/);
|
||||
expect(stdout).not.toContain('%');
|
||||
});
|
||||
|
||||
it('prints semantic-layer list and search as public JSON envelopes', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await seedSlSource({
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ async function printSlSources(input: {
|
|||
emptyHint?: string;
|
||||
}): Promise<void> {
|
||||
const { resolveOutputMode } = await import('./io/mode.js');
|
||||
const { printList } = await import('./io/print-list.js');
|
||||
const { createRankBadgeFormatter, printList } = await import('./io/print-list.js');
|
||||
const mode = resolveOutputMode({ explicit: input.output, json: input.json, io: input.io });
|
||||
|
||||
if (input.command === 'sl search') {
|
||||
|
|
@ -119,7 +119,7 @@ async function printSlSources(input: {
|
|||
label: 'SCORE',
|
||||
plain: 'score=',
|
||||
role: 'badge',
|
||||
prettyFormat: (value) => `${Math.round(Number(value) * 100)}%`,
|
||||
prettyFormat: createRankBadgeFormatter(input.rows as ReadonlyArray<LocalSlSourceSearchResult>),
|
||||
dim: true,
|
||||
},
|
||||
{ key: 'connectionId', label: 'CONNECTION', plain: '' },
|
||||
|
|
|
|||
|
|
@ -22,6 +22,25 @@ class FakeEmbeddingPort {
|
|||
}
|
||||
}
|
||||
|
||||
class ArrSynonymEmbeddingPort {
|
||||
readonly maxBatchSize = 16;
|
||||
|
||||
async computeEmbedding(text: string): Promise<number[]> {
|
||||
const lower = text.toLowerCase();
|
||||
if (lower.trim() === 'annual recurring revenue' || lower.includes('arr') || lower.includes('contract-first')) {
|
||||
return [1, 0];
|
||||
}
|
||||
if (lower.includes('net revenue') || lower.includes('gross') || lower.includes('refund')) {
|
||||
return [0, 1];
|
||||
}
|
||||
return [0.5, 0.5];
|
||||
}
|
||||
|
||||
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
|
||||
return Promise.all(texts.map((text) => this.computeEmbedding(text)));
|
||||
}
|
||||
}
|
||||
|
||||
describe('local knowledge helpers', () => {
|
||||
let tempDir: string;
|
||||
let project: KtxLocalProject;
|
||||
|
|
@ -131,6 +150,37 @@ describe('local knowledge helpers', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('ranks ARR synonym queries by semantic page embeddings over stronger lexical revenue matches', async () => {
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'arr-definition',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'ARR is calculated contract-first for active customer contracts.',
|
||||
content: 'Contract-first active contract value takes precedence over subscription values.',
|
||||
tags: ['arr', 'contracts', 'finance'],
|
||||
});
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'net-revenue-definition',
|
||||
scope: 'GLOBAL',
|
||||
summary: 'Net revenue definition',
|
||||
content: 'Annual revenue is gross invoice revenue minus credits and refunds.',
|
||||
tags: ['revenue', 'finance'],
|
||||
});
|
||||
|
||||
const search = await searchLocalKnowledgePages(project, {
|
||||
query: 'annual recurring revenue',
|
||||
userId: 'local',
|
||||
limit: 2,
|
||||
embeddingService: new ArrSynonymEmbeddingPort(),
|
||||
});
|
||||
|
||||
expect(search.map((result) => result.key)).toEqual(['arr-definition', 'net-revenue-definition']);
|
||||
expect(search[0]).toMatchObject({
|
||||
key: 'arr-definition',
|
||||
matchReasons: expect.arrayContaining(['semantic']),
|
||||
lanes: expect.arrayContaining([expect.objectContaining({ lane: 'semantic', status: 'available' })]),
|
||||
});
|
||||
});
|
||||
|
||||
it('reports semantic lane as skipped when wiki embeddings are not configured', async () => {
|
||||
await writeLocalKnowledgePage(project, {
|
||||
key: 'metrics-revenue',
|
||||
|
|
|
|||
|
|
@ -309,6 +309,7 @@ async function searchLocalKnowledgePagesWithSqlite(
|
|||
},
|
||||
{
|
||||
lane: 'semantic',
|
||||
weight: 3,
|
||||
async generate(args) {
|
||||
if (!embeddingService) {
|
||||
return { status: 'skipped', candidates: [], reason: 'embedding_unconfigured' };
|
||||
|
|
@ -320,7 +321,9 @@ async function searchLocalKnowledgePagesWithSqlite(
|
|||
limit: args.laneCandidatePoolLimit,
|
||||
});
|
||||
return {
|
||||
candidates: rows.map((row) => ({ id: row.id, rank: row.rank, rawScore: row.rawScore })),
|
||||
candidates: rows
|
||||
.filter((row) => row.rawScore > 0)
|
||||
.map((row, index) => ({ id: row.id, rank: index + 1, rawScore: row.rawScore })),
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue