feat: hydrate sl search usage metadata

This commit is contained in:
Andrey Avtomonov 2026-05-11 17:28:07 +02:00
parent 633f9359c2
commit cd127571fc
3 changed files with 65 additions and 0 deletions

View file

@ -187,6 +187,53 @@ describe('local semantic-layer helpers', () => {
await expect(access(join(project.projectDir, '.ktx/db.sqlite'))).resolves.toBeUndefined();
});
it('searches historic SQL usage and returns frequency tier plus FTS snippet', async () => {
await project.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
`tables:
orders:
table: public.orders
usage:
narrative: Analysts inspect paid order lifecycle by customer segment.
frequencyTier: high
commonFilters:
- status
- created_at
commonGroupBys:
- customer_segment
commonJoins:
- table: public.customers
on:
- customer_id
columns:
- name: order_id
type: string
- name: status
type: string
`,
'ktx',
'ktx@example.com',
'Add usage-backed manifest shard',
);
const results = await searchLocalSlSources(project, {
connectionId: 'warehouse',
query: 'paid lifecycle customer segment',
});
expect(results).toEqual([
expect.objectContaining({
connectionId: 'warehouse',
name: 'orders',
path: 'semantic-layer/warehouse/_schema/public.yaml#orders',
frequencyTier: 'high',
snippet: expect.stringContaining('<mark>'),
matchReasons: expect.arrayContaining(['lexical']),
}),
]);
expect(results[0]?.snippet).toContain('lifecycle');
});
it('searches all connections with one global hybrid ranking pass', async () => {
await writeLocalSlSource(project, {
connectionId: 'warehouse',

View file

@ -26,6 +26,8 @@ export interface LocalSlSourceSummary {
export interface LocalSlSourceSearchResult extends LocalSlSourceSummary {
score: number;
frequencyTier?: NonNullable<SemanticLayerSource['usage']>['frequencyTier'];
snippet?: string;
matchReasons?: SlSearchMatchReason[];
dictionaryMatches?: SlDictionaryMatch[];
lanes?: SlSearchLaneSummary[];
@ -367,6 +369,10 @@ function candidateKey(summary: LocalSlSourceSummary): string {
return `${summary.connectionId}/${summary.name}`;
}
function searchResultUsageFields(source: SemanticLayerSource): Pick<LocalSlSourceSearchResult, 'frequencyTier'> {
return source.usage?.frequencyTier ? { frequencyTier: source.usage.frequencyTier } : {};
}
function tokenLaneCandidates(candidates: LocalSlSearchCandidate[], terms: readonly string[]) {
if (terms.length === 0) {
return [];
@ -483,6 +489,7 @@ export async function searchLocalSlSources(
...result.candidate.summary,
score: result.score,
matchReasons: ['token'],
...searchResultUsageFields(result.candidate.source),
}))
.sort(
(left, right) =>
@ -500,6 +507,7 @@ export async function searchLocalSlSources(
const finalLimit = input.limit ?? candidates.length;
const core = new HybridSearchCore();
const dictionaryEvidence = new Map<string, SlDictionaryMatch[]>();
const lexicalSnippets = new Map<string, string>();
const generators: SearchCandidateGenerator[] = [
{
@ -510,6 +518,11 @@ export async function searchLocalSlSources(
queryText: args.queryText,
limit: args.laneCandidatePoolLimit,
});
for (const row of rows) {
if (row.snippet) {
lexicalSnippets.set(row.id, row.snippet);
}
}
return {
candidates: rows.map((row) => ({ id: row.id, rank: row.rank, rawScore: row.rawScore })),
};
@ -584,9 +597,12 @@ export async function searchLocalSlSources(
continue;
}
const dictionaryMatches = dictionaryEvidence.get(fused.id);
const snippet = lexicalSnippets.get(fused.id);
hydrated.push({
...candidate.summary,
score: fused.score,
...searchResultUsageFields(candidate.source),
...(snippet ? { snippet } : {}),
matchReasons: fused.matchReasons as SlSearchMatchReason[],
...(dictionaryMatches && dictionaryMatches.length > 0 ? { dictionaryMatches } : {}),
lanes: result.lanes,

View file

@ -554,9 +554,11 @@ export async function searchLocalSlSourcesWithPglitePrototype(
continue;
}
const dictionaryMatches = dictionaryEvidence.get(result.id);
const frequencyTier = candidate.source.usage?.frequencyTier;
hydrated.push({
...candidate.summary,
score: result.score,
...(frequencyTier ? { frequencyTier } : {}),
matchReasons: result.matchReasons as SlSearchMatchReason[],
...(dictionaryMatches && dictionaryMatches.length > 0 ? { dictionaryMatches } : {}),
lanes: fused.lanes,