mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
feat: return sl search snippets
This commit is contained in:
parent
19585e67ac
commit
633f9359c2
5 changed files with 73 additions and 9 deletions
|
|
@ -49,5 +49,5 @@ export interface SlSourcesIndexPort {
|
|||
queryText: string,
|
||||
limit: number,
|
||||
minRrfScore?: number,
|
||||
): Promise<Array<{ sourceName: string; rrfScore: number }>>;
|
||||
): Promise<Array<{ sourceName: string; rrfScore: number; snippet?: string }>>;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -191,4 +191,36 @@ describe('SlSearchService', () => {
|
|||
expect(text).toContain('commonly joined to public.customers on customer_id');
|
||||
expect(text).toContain('stale since 2026-05-01T00:00:00.000Z');
|
||||
});
|
||||
|
||||
it('preserves FTS snippets returned by the source index', async () => {
|
||||
const service = new SlSearchService(
|
||||
{
|
||||
maxBatchSize: 16,
|
||||
computeEmbedding: vi.fn(async () => [1, 0]),
|
||||
computeEmbeddingsBulk: vi.fn(),
|
||||
},
|
||||
{
|
||||
upsertSources: vi.fn(),
|
||||
getExistingSearchTexts: vi.fn(),
|
||||
deleteStale: vi.fn(),
|
||||
deleteByConnection: vi.fn(),
|
||||
deleteByConnectionAndName: vi.fn(),
|
||||
search: vi.fn(async () => [
|
||||
{
|
||||
sourceName: 'orders',
|
||||
rrfScore: 0.75,
|
||||
snippet: 'usage: paid <mark>order</mark> lifecycle',
|
||||
},
|
||||
]),
|
||||
},
|
||||
);
|
||||
|
||||
await expect(service.search('warehouse', 'order lifecycle', 10)).resolves.toEqual([
|
||||
{
|
||||
sourceName: 'orders',
|
||||
score: 0.75,
|
||||
snippet: 'usage: paid <mark>order</mark> lifecycle',
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -168,7 +168,7 @@ export class SlSearchService {
|
|||
query: string,
|
||||
limit = 15,
|
||||
minRrfScore = 0,
|
||||
): Promise<Array<{ sourceName: string; score: number }>> {
|
||||
): Promise<Array<{ sourceName: string; score: number; snippet?: string }>> {
|
||||
let queryEmbedding: number[] | null = null;
|
||||
try {
|
||||
queryEmbedding = await this.embeddingService.computeEmbedding(query);
|
||||
|
|
@ -179,7 +179,11 @@ export class SlSearchService {
|
|||
}
|
||||
|
||||
const results = await this.slSourcesRepository.search(connectionId, queryEmbedding, query, limit, minRrfScore);
|
||||
return results.map((r) => ({ sourceName: r.sourceName, score: r.rrfScore }));
|
||||
return results.map((result) => ({
|
||||
sourceName: result.sourceName,
|
||||
score: result.rrfScore,
|
||||
...(result.snippet ? { snippet: result.snippet } : {}),
|
||||
}));
|
||||
}
|
||||
|
||||
buildSearchText(source: SemanticLayerSource, priority: string[] = DEFAULT_PRIORITY): string {
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ describe('SqliteSlSourcesIndex', () => {
|
|||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('creates SQLite tables and searches indexed source text', async () => {
|
||||
it('creates SQLite tables and searches indexed source text with FTS snippets', async () => {
|
||||
const index = new SqliteSlSourcesIndex({ dbPath });
|
||||
|
||||
await index.upsertSources('warehouse', [
|
||||
|
|
@ -34,10 +34,24 @@ describe('SqliteSlSourcesIndex', () => {
|
|||
]);
|
||||
|
||||
await expect(access(dbPath)).resolves.toBeUndefined();
|
||||
expect(await index.search('warehouse', null, 'gross revenue', 10)).toEqual([
|
||||
|
||||
const directResults = await index.search('warehouse', null, 'gross revenue', 10);
|
||||
expect(directResults).toEqual([
|
||||
expect.objectContaining({
|
||||
sourceName: 'orders',
|
||||
rrfScore: expect.any(Number),
|
||||
snippet: expect.stringContaining('<mark>'),
|
||||
}),
|
||||
]);
|
||||
expect(directResults[0]?.snippet).toContain('revenue');
|
||||
|
||||
const lexicalCandidates = await index.searchLexicalCandidates({ queryText: 'gross revenue', limit: 10 });
|
||||
expect(lexicalCandidates).toEqual([
|
||||
expect.objectContaining({
|
||||
id: 'warehouse/orders',
|
||||
connectionId: 'warehouse',
|
||||
sourceName: 'orders',
|
||||
snippet: expect.stringContaining('<mark>'),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ type SearchRow = {
|
|||
connection_id?: string;
|
||||
source_name: string;
|
||||
rank: number;
|
||||
snippet?: string | null;
|
||||
};
|
||||
|
||||
export interface SlSqliteLaneCandidate {
|
||||
|
|
@ -27,6 +28,7 @@ export interface SlSqliteLaneCandidate {
|
|||
sourceName: string;
|
||||
rank: number;
|
||||
rawScore: number;
|
||||
snippet?: string;
|
||||
}
|
||||
|
||||
export interface SlSqliteDictionaryCandidate extends SlSqliteLaneCandidate {
|
||||
|
|
@ -334,7 +336,11 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
|
|||
const rows = this.db
|
||||
.prepare(
|
||||
`
|
||||
SELECT connection_id, source_name, bm25(local_sl_sources_fts) AS rank
|
||||
SELECT
|
||||
connection_id,
|
||||
source_name,
|
||||
bm25(local_sl_sources_fts) AS rank,
|
||||
snippet(local_sl_sources_fts, 2, '<mark>', '</mark>', '...', 12) AS snippet
|
||||
FROM local_sl_sources_fts
|
||||
WHERE local_sl_sources_fts MATCH ?
|
||||
${connectionPredicate}
|
||||
|
|
@ -350,6 +356,7 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
|
|||
sourceName: row.source_name,
|
||||
rank: index + 1,
|
||||
rawScore: Number(row.rank),
|
||||
...(typeof row.snippet === 'string' && row.snippet.length > 0 ? { snippet: row.snippet } : {}),
|
||||
}));
|
||||
}
|
||||
|
||||
|
|
@ -499,7 +506,7 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
|
|||
queryText: string,
|
||||
limit: number,
|
||||
minRrfScore = 0,
|
||||
): Promise<Array<{ sourceName: string; rrfScore: number }>> {
|
||||
): Promise<Array<{ sourceName: string; rrfScore: number; snippet?: string }>> {
|
||||
const ftsQuery = normalizeFtsQuery(queryText);
|
||||
if (!ftsQuery) {
|
||||
return [];
|
||||
|
|
@ -508,7 +515,10 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
|
|||
const rows = this.db
|
||||
.prepare(
|
||||
`
|
||||
SELECT source_name, bm25(local_sl_sources_fts) AS rank
|
||||
SELECT
|
||||
source_name,
|
||||
bm25(local_sl_sources_fts) AS rank,
|
||||
snippet(local_sl_sources_fts, 2, '<mark>', '</mark>', '...', 12) AS snippet
|
||||
FROM local_sl_sources_fts
|
||||
WHERE connection_id = ?
|
||||
AND local_sl_sources_fts MATCH ?
|
||||
|
|
@ -519,7 +529,11 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
|
|||
.all(connectionId, ftsQuery, Math.max(1, limit)) as SearchRow[];
|
||||
|
||||
return rows
|
||||
.map((row) => ({ sourceName: row.source_name, rrfScore: scoreFromRank(row.rank) }))
|
||||
.map((row) => ({
|
||||
sourceName: row.source_name,
|
||||
rrfScore: scoreFromRank(row.rank),
|
||||
...(typeof row.snippet === 'string' && row.snippet.length > 0 ? { snippet: row.snippet } : {}),
|
||||
}))
|
||||
.filter((row) => row.rrfScore >= minRrfScore);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue