feat: return sl search snippets

This commit is contained in:
Andrey Avtomonov 2026-05-11 17:25:46 +02:00
parent 19585e67ac
commit 633f9359c2
5 changed files with 73 additions and 9 deletions

View file

@ -49,5 +49,5 @@ export interface SlSourcesIndexPort {
queryText: string,
limit: number,
minRrfScore?: number,
): Promise<Array<{ sourceName: string; rrfScore: number }>>;
): Promise<Array<{ sourceName: string; rrfScore: number; snippet?: string }>>;
}

View file

@ -191,4 +191,36 @@ describe('SlSearchService', () => {
expect(text).toContain('commonly joined to public.customers on customer_id');
expect(text).toContain('stale since 2026-05-01T00:00:00.000Z');
});
it('preserves FTS snippets returned by the source index', async () => {
const service = new SlSearchService(
{
maxBatchSize: 16,
computeEmbedding: vi.fn(async () => [1, 0]),
computeEmbeddingsBulk: vi.fn(),
},
{
upsertSources: vi.fn(),
getExistingSearchTexts: vi.fn(),
deleteStale: vi.fn(),
deleteByConnection: vi.fn(),
deleteByConnectionAndName: vi.fn(),
search: vi.fn(async () => [
{
sourceName: 'orders',
rrfScore: 0.75,
snippet: 'usage: paid <mark>order</mark> lifecycle',
},
]),
},
);
await expect(service.search('warehouse', 'order lifecycle', 10)).resolves.toEqual([
{
sourceName: 'orders',
score: 0.75,
snippet: 'usage: paid <mark>order</mark> lifecycle',
},
]);
});
});

View file

@ -168,7 +168,7 @@ export class SlSearchService {
query: string,
limit = 15,
minRrfScore = 0,
): Promise<Array<{ sourceName: string; score: number }>> {
): Promise<Array<{ sourceName: string; score: number; snippet?: string }>> {
let queryEmbedding: number[] | null = null;
try {
queryEmbedding = await this.embeddingService.computeEmbedding(query);
@ -179,7 +179,11 @@ export class SlSearchService {
}
const results = await this.slSourcesRepository.search(connectionId, queryEmbedding, query, limit, minRrfScore);
return results.map((r) => ({ sourceName: r.sourceName, score: r.rrfScore }));
return results.map((result) => ({
sourceName: result.sourceName,
score: result.rrfScore,
...(result.snippet ? { snippet: result.snippet } : {}),
}));
}
buildSearchText(source: SemanticLayerSource, priority: string[] = DEFAULT_PRIORITY): string {

View file

@ -17,7 +17,7 @@ describe('SqliteSlSourcesIndex', () => {
await rm(tempDir, { recursive: true, force: true });
});
it('creates SQLite tables and searches indexed source text', async () => {
it('creates SQLite tables and searches indexed source text with FTS snippets', async () => {
const index = new SqliteSlSourcesIndex({ dbPath });
await index.upsertSources('warehouse', [
@ -34,10 +34,24 @@ describe('SqliteSlSourcesIndex', () => {
]);
await expect(access(dbPath)).resolves.toBeUndefined();
expect(await index.search('warehouse', null, 'gross revenue', 10)).toEqual([
const directResults = await index.search('warehouse', null, 'gross revenue', 10);
expect(directResults).toEqual([
expect.objectContaining({
sourceName: 'orders',
rrfScore: expect.any(Number),
snippet: expect.stringContaining('<mark>'),
}),
]);
expect(directResults[0]?.snippet).toContain('revenue');
const lexicalCandidates = await index.searchLexicalCandidates({ queryText: 'gross revenue', limit: 10 });
expect(lexicalCandidates).toEqual([
expect.objectContaining({
id: 'warehouse/orders',
connectionId: 'warehouse',
sourceName: 'orders',
snippet: expect.stringContaining('<mark>'),
}),
]);
});

View file

@ -19,6 +19,7 @@ type SearchRow = {
connection_id?: string;
source_name: string;
rank: number;
snippet?: string | null;
};
export interface SlSqliteLaneCandidate {
@ -27,6 +28,7 @@ export interface SlSqliteLaneCandidate {
sourceName: string;
rank: number;
rawScore: number;
snippet?: string;
}
export interface SlSqliteDictionaryCandidate extends SlSqliteLaneCandidate {
@ -334,7 +336,11 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
const rows = this.db
.prepare(
`
SELECT connection_id, source_name, bm25(local_sl_sources_fts) AS rank
SELECT
connection_id,
source_name,
bm25(local_sl_sources_fts) AS rank,
snippet(local_sl_sources_fts, 2, '<mark>', '</mark>', '...', 12) AS snippet
FROM local_sl_sources_fts
WHERE local_sl_sources_fts MATCH ?
${connectionPredicate}
@ -350,6 +356,7 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
sourceName: row.source_name,
rank: index + 1,
rawScore: Number(row.rank),
...(typeof row.snippet === 'string' && row.snippet.length > 0 ? { snippet: row.snippet } : {}),
}));
}
@ -499,7 +506,7 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
queryText: string,
limit: number,
minRrfScore = 0,
): Promise<Array<{ sourceName: string; rrfScore: number }>> {
): Promise<Array<{ sourceName: string; rrfScore: number; snippet?: string }>> {
const ftsQuery = normalizeFtsQuery(queryText);
if (!ftsQuery) {
return [];
@ -508,7 +515,10 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
const rows = this.db
.prepare(
`
SELECT source_name, bm25(local_sl_sources_fts) AS rank
SELECT
source_name,
bm25(local_sl_sources_fts) AS rank,
snippet(local_sl_sources_fts, 2, '<mark>', '</mark>', '...', 12) AS snippet
FROM local_sl_sources_fts
WHERE connection_id = ?
AND local_sl_sources_fts MATCH ?
@ -519,7 +529,11 @@ export class SqliteSlSourcesIndex implements SlSourcesIndexPort {
.all(connectionId, ftsQuery, Math.max(1, limit)) as SearchRow[];
return rows
.map((row) => ({ sourceName: row.source_name, rrfScore: scoreFromRank(row.rank) }))
.map((row) => ({
sourceName: row.source_name,
rrfScore: scoreFromRank(row.rank),
...(typeof row.snippet === 'string' && row.snippet.length > 0 ? { snippet: row.snippet } : {}),
}))
.filter((row) => row.rrfScore >= minRrfScore);
}