ktx/packages/context/src/sl/local-sl.ts
Andrey Avtomonov cb8902f1e5
fix(context): merge overlay columns onto manifest columns by name (#94)
* fix(context): merge overlay columns onto manifest columns by name

composeOverlay was appending overlay columns to the manifest column list,
producing duplicate entries when dbt/metabase overlays declared a column
just to attach descriptions. The duplicates carried no `type`, so the
pydantic SourceDefinition rejected them at semantic-query time and broke
`ktx sl query` for every overlay-backed measure. Now overlay columns
match base columns by name (case-insensitive): same-name entries merge
onto the manifest (overlay fields win, type/role fall back to the base,
descriptions merge per source key) and only new names append.

* refactor(sl): split overlay columns from column_overrides and enforce TS/Python wire contract

Overlay sources now have two distinct collections: `columns:` for computed
columns (requiring `expr` + `type`) and `column_overrides:` for metadata
patches to inherited manifest columns. Composing or loading an overlay that
mixes the two — or references an unknown column — fails with a typed error.

Introduce `ResolvedSemanticLayerSource` / `resolvedSourceSchema` /
`toResolvedWire` as the strict shape sent to the Python engine, and add a
schema contract test that diffs Zod against the Pydantic JSON schema dumped
by `python -m semantic_layer dump-schema`. `SourceDefinition` is now
`extra="forbid"` on the Python side.

`loadAllSources` surfaces per-file load errors instead of swallowing them,
so validation/query paths can report manifest shard parse failures.

* fix(context): make scan description generation resilient and quiet

A transient sampleTable failure during ingest used to take out every
table in a connection: generateTableDescription returned a hardcoded
'Table not found' string into descriptions.ai, and KtxDescriptionGenerator
was constructed without a logger, so the failure left no trail anywhere.

- sampleTable / sampleColumn calls retry 3x with 200/400/800ms backoff,
  honouring KtxScanContext.signal via a new KtxAbortedError.
- On retry exhaustion or missing capability, table generation falls back
  to a metadata-only prompt built from column name / native type / comment
  / rawDescriptions. The column path follows the same rule -- call the
  LLM when any of samples or rawDescriptions are available; skip only
  when both are absent.
- Logger is now threaded from KtxScanContext into the generator. Failures
  emit structured KtxScanWarning entries (new description_fallback_used
  code, plus existing sampling_failed / enrichment_failed /
  connector_capability_missing). ktx scan groups warnings by code so a
  batch of identical failures collapses to one summary line plus sample.
- Returns null on failure instead of the 'Table not found' sentinel; the
  manifest writer's existing guard already skips empty descriptions, so
  schema YAML no longer carries misleading text. SCAN_MANAGED_DESCRIPTION_KEYS
  already strips stale 'ai' on merge, so existing YAML clears on next run.

Also suppress AI SDK v6 'system in messages' warning: pull system messages
out of KtxMessageBuilder.wrapSimple's output via a new splitKtxSystemMessages
helper and pass them top-level to generateText (preserves cacheControl
providerOptions on the SystemModelMessage). Agent-runner's local
splitSystemPromptMessages dedupes onto the shared helper.

* test(docs): align examples-docs assertions with revamped docs

PR #103 (setup/guide doc revamp) reworded several CLI examples and
connection labels; the assertions in scripts/examples-docs.test.mjs
still referenced the pre-revamp wording and were failing in CI on main.
Update the regexes to match the post-revamp content:

- drop the `--json` flag from the sl-query example expectation
- move the `Driver:` / `Status: ok` probe to the connection reference,
  which is where that output now lives (driver id is lowercase
  `postgres`, not the display name `PostgreSQL`)
- drop the obsolete `Install \`uv\`...` troubleshooting line
- accept `<connectionId>` everywhere; the docs no longer use the
  hyphenated `<connection-id>` form
- match the `warehouse` connection id used in the quickstart instead of
  the `postgres-warehouse` id only used in the README and setup ref

* fix(sl): skip TS/Python schema contract test when uv is unavailable

The TypeScript checks CI job does not install uv or Python, so the
module-level `execFileSync('uv', ...)` in schemas.contract.test.ts threw
ENOENT and failed the suite. Wrap the schema dump in a try/catch and
guard the describe block with `describe.skipIf` so the test skips in
environments without uv. Local dev and any CI job that has uv on PATH
still runs the cross-language contract assertion.
2026-05-15 02:11:04 +02:00

656 lines
23 KiB
TypeScript

import { join } from 'node:path';
import YAML from 'yaml';
import { z } from 'zod';
import type { KtxEmbeddingPort, KtxFileWriteResult } from '../core/index.js';
import type { KtxLocalProject } from '../project/index.js';
import { HybridSearchCore, type SearchCandidateGenerator } from '../search/index.js';
import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js';
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
import { sourceDefinitionSchema, sourceOverlaySchema } from './schemas.js';
import {
composeOverlay,
type ManifestTableEntry,
projectManifestEntry,
SemanticLayerService,
toResolvedWire,
} from './semantic-layer.service.js';
import type { PgliteSlSearchPrototypeOwnerOptions } from './pglite-sl-search-prototype.js';
import { loadLatestSlDictionaryEntries } from './sl-dictionary-profile.js';
import { buildSemanticLayerSourceSearchText, SlSearchService } from './sl-search.service.js';
import { SqliteSlSourcesIndex } from './sqlite-sl-sources-index.js';
import type { SemanticLayerSource, SlDictionaryMatch, SlSearchLaneSummary, SlSearchMatchReason } from './types.js';
export interface LocalSlSourceSummary {
connectionId: string;
name: string;
path: string;
description?: string;
columnCount: number;
measureCount: number;
joinCount: number;
}
export interface LocalSlSourceSearchResult extends LocalSlSourceSummary {
score: number;
frequencyTier?: NonNullable<SemanticLayerSource['usage']>['frequencyTier'];
snippet?: string;
matchReasons?: SlSearchMatchReason[];
dictionaryMatches?: SlDictionaryMatch[];
lanes?: SlSearchLaneSummary[];
}
export interface LocalSlSearchInput {
connectionId?: string;
query: string;
embeddingService?: KtxEmbeddingPort | null;
limit?: number;
backend?: 'pglite-owner-prototype';
pglite?: PgliteSlSearchPrototypeOwnerOptions;
}
export interface LocalSlSource extends LocalSlSourceSummary {
yaml: string;
}
export interface LocalSlSourceRecord extends LocalSlSource {
source: SemanticLayerSource;
}
export interface LocalSlValidationResult {
valid: boolean;
errors: string[];
}
const LOCAL_AUTHOR = 'ktx';
const LOCAL_AUTHOR_EMAIL = 'ktx@example.com';
function assertSafePathToken(kind: string, value: string): string {
if (
value.trim().length === 0 ||
value.includes('..') ||
value.includes('\\') ||
value.startsWith('/') ||
value.startsWith('.') ||
value.includes('//')
) {
throw new Error(`Unsafe ${kind}: ${value}`);
}
return value;
}
function assertSafeConnectionId(connectionId: string): string {
if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) {
throw new Error(`Unsafe connection id: ${connectionId}`);
}
return assertSafePathToken('connection id', connectionId);
}
function isSafeConnectionId(connectionId: string | undefined): connectionId is string {
return typeof connectionId === 'string' && /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId);
}
function assertSafeSourceName(sourceName: string): string {
if (!/^[a-z0-9][a-z0-9_]*$/.test(sourceName)) {
throw new Error(`Unsafe semantic-layer source name: ${sourceName}`);
}
return assertSafePathToken('semantic-layer source name', sourceName);
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value);
}
function slPath(connectionId: string, sourceName: string): string {
return `semantic-layer/${assertSafeConnectionId(connectionId)}/${assertSafeSourceName(sourceName)}.yaml`;
}
function sourceNameFromPath(path: string): string {
return (
path
.split('/')
.at(-1)
?.replace(/\.ya?ml$/, '') ?? path
);
}
function parseYamlRecord(raw: string): Record<string, unknown> {
const parsed = YAML.parse(raw) as unknown;
if (!isRecord(parsed)) {
throw new Error('Semantic-layer source YAML must contain an object');
}
return parsed;
}
function descriptionMap(value: Record<string, unknown>): Record<string, string> | undefined {
const result: Record<string, string> = {};
const descriptions = value.descriptions;
if (isRecord(descriptions)) {
for (const [key, text] of Object.entries(descriptions)) {
if (typeof text === 'string' && text.trim().length > 0) {
result[key] = text;
}
}
}
const flatDescription = value.description;
if (!result.user && typeof flatDescription === 'string' && flatDescription.trim().length > 0) {
result.user = flatDescription;
}
return Object.keys(result).length > 0 ? result : undefined;
}
function validationErrors(error: unknown): string[] {
if (error instanceof z.ZodError) {
return error.issues.map((issue) => `${issue.path.join('.') || '<root>'}: ${issue.message}`);
}
return [error instanceof Error ? error.message : String(error)];
}
function summarizeSource(args: { connectionId: string; path: string; raw: string }): LocalSlSourceSummary {
const parsed = parseYamlRecord(args.raw);
const name = typeof parsed.name === 'string' && parsed.name.length > 0 ? parsed.name : sourceNameFromPath(args.path);
const description = resolveDescription(descriptionMap(parsed), { priority: DEFAULT_PRIORITY }) ?? undefined;
return {
connectionId: args.connectionId,
name,
path: args.path,
...(description ? { description } : {}),
columnCount: Array.isArray(parsed.columns) ? parsed.columns.length : 0,
measureCount: Array.isArray(parsed.measures) ? parsed.measures.length : 0,
joinCount: Array.isArray(parsed.joins) ? parsed.joins.length : 0,
};
}
function sourceToYaml(source: SemanticLayerSource): string {
return YAML.stringify(source, { indent: 2, lineWidth: 0, version: '1.1' });
}
function summarizeSemanticSource(args: {
connectionId: string;
path: string;
source: SemanticLayerSource;
}): LocalSlSourceSummary {
const description = resolveDescription(args.source.descriptions, { priority: DEFAULT_PRIORITY }) ?? undefined;
return {
connectionId: args.connectionId,
name: args.source.name,
path: args.path,
...(description ? { description } : {}),
columnCount: args.source.columns.length,
measureCount: args.source.measures.length,
joinCount: args.source.joins.length,
};
}
function manifestTables(value: Record<string, unknown>): Record<string, ManifestTableEntry> | null {
return isRecord(value.tables) ? (value.tables as Record<string, ManifestTableEntry>) : null;
}
function parsedStandaloneSource(parsed: Record<string, unknown>, name: string): SemanticLayerSource {
const source = parsed as Partial<SemanticLayerSource>;
return normalizeSemanticLayerDescriptions({
...source,
name,
grain: Array.isArray(parsed.grain) ? (parsed.grain.filter((item) => typeof item === 'string') as string[]) : [],
columns: Array.isArray(parsed.columns) ? (parsed.columns as SemanticLayerSource['columns']) : [],
joins: Array.isArray(parsed.joins) ? (parsed.joins as SemanticLayerSource['joins']) : [],
measures: Array.isArray(parsed.measures) ? (parsed.measures as SemanticLayerSource['measures']) : [],
});
}
export async function loadLocalSlSourceRecords(
project: KtxLocalProject,
input: { connectionId: string },
): Promise<LocalSlSourceRecord[]> {
const connectionId = assertSafeConnectionId(input.connectionId);
const dir = `semantic-layer/${connectionId}`;
const schemaDir = `${dir}/_schema`;
const listed = await project.fileStore.listFiles(dir);
const paths = listed.files.filter((file) => file.endsWith('.yaml') || file.endsWith('.yml')).sort();
const sources = new Map<string, LocalSlSourceRecord>();
for (const path of paths.filter((file) => file.startsWith(`${schemaDir}/`))) {
const raw = await project.fileStore.readFile(path);
const tables = manifestTables(parseYamlRecord(raw.content));
if (!tables) {
continue;
}
for (const [name, entry] of Object.entries(tables)) {
const source = projectManifestEntry(name, entry);
const projectedPath = `${path}#${name}`;
sources.set(name, {
...summarizeSemanticSource({ connectionId, path: projectedPath, source }),
yaml: sourceToYaml(source),
source,
});
}
}
for (const path of paths.filter((file) => !file.startsWith(`${schemaDir}/`))) {
const raw = await project.fileStore.readFile(path);
const parsed = parseYamlRecord(raw.content);
const name = typeof parsed.name === 'string' && parsed.name.length > 0 ? parsed.name : sourceNameFromPath(path);
if (parsed.table || parsed.sql) {
const source = parsedStandaloneSource(parsed, name);
sources.set(name, { ...summarizeSource({ connectionId, path, raw: raw.content }), yaml: raw.content, source });
continue;
}
const base = sources.get(name);
if (!base) {
continue;
}
let source: SemanticLayerSource;
try {
source = composeOverlay(base.source, parsed);
} catch (error) {
throw new Error(`${path}: ${error instanceof Error ? error.message : String(error)}`);
}
sources.set(name, {
...summarizeSemanticSource({ connectionId, path, source }),
yaml: sourceToYaml(source),
source,
});
}
return [...sources.values()].sort((left, right) => left.name.localeCompare(right.name));
}
export async function validateLocalSlSource(
rawYaml: string,
options?: { project?: KtxLocalProject; connectionId?: string; sourceName?: string },
): Promise<LocalSlValidationResult> {
try {
const parsed = parseYamlRecord(rawYaml);
const schema = parsed.table || parsed.sql ? sourceDefinitionSchema : sourceOverlaySchema;
if (schema === sourceOverlaySchema && Array.isArray(parsed.columns)) {
const sourceName = options?.sourceName ?? (typeof parsed.name === 'string' ? parsed.name : 'source');
const path =
options?.connectionId && isSafeConnectionId(options.connectionId)
? `semantic-layer/${options.connectionId}/${sourceName}.yaml`
: `${sourceName}.yaml`;
const legacyColumnPatchErrors = parsed.columns
.filter((column): column is Record<string, unknown> => isRecord(column))
.filter((column) => typeof column.name === 'string' && (!column.expr || !column.type))
.map(
(column) =>
`${path}: column '${column.name}' patches a manifest column but is in 'columns:' — move it to 'column_overrides:'`,
);
if (legacyColumnPatchErrors.length > 0) {
return { valid: false, errors: legacyColumnPatchErrors };
}
}
const result = schema.parse(parsed);
const errors: string[] = [];
if (options?.project && options.connectionId && 'table' in result && result.table) {
const service = new SemanticLayerService(options.project.fileStore, {} as never, {} as never);
errors.push(
...(await service.validatePhysicalTableReferences(options.connectionId, [result as SemanticLayerSource])),
);
}
if ('table' in result || 'sql' in result) {
toResolvedWire(result as SemanticLayerSource);
}
return { valid: errors.length === 0, errors };
} catch (error) {
return { valid: false, errors: validationErrors(error) };
}
}
export async function writeLocalSlSource(
project: KtxLocalProject,
input: { connectionId: string; sourceName: string; yaml: string },
): Promise<KtxFileWriteResult> {
const validation = await validateLocalSlSource(input.yaml, { project, connectionId: input.connectionId });
if (!validation.valid) {
throw new Error(`Invalid semantic-layer source: ${validation.errors.join('; ')}`);
}
const parsed = parseYamlRecord(input.yaml);
if (typeof parsed.name === 'string' && parsed.name !== input.sourceName) {
throw new Error(`Semantic-layer source name "${parsed.name}" does not match requested path "${input.sourceName}"`);
}
const path = slPath(input.connectionId, input.sourceName);
return project.fileStore.writeFile(
path,
input.yaml.endsWith('\n') ? input.yaml : `${input.yaml}\n`,
LOCAL_AUTHOR,
LOCAL_AUTHOR_EMAIL,
`Write semantic-layer source: ${input.connectionId}/${input.sourceName}`,
);
}
export async function readLocalSlSource(
project: KtxLocalProject,
input: { connectionId: string; sourceName: string },
): Promise<LocalSlSource | null> {
const path = slPath(input.connectionId, input.sourceName);
try {
const result = await project.fileStore.readFile(path);
return {
...summarizeSource({ connectionId: input.connectionId, path, raw: result.content }),
yaml: result.content,
};
} catch {
const records = await loadLocalSlSourceRecords(project, {
connectionId: input.connectionId,
});
const record = records.find((source) => source.name === input.sourceName);
return record ? { ...record } : null;
}
}
export async function listLocalSlSources(
project: KtxLocalProject,
input: { connectionId?: string } = {},
): Promise<LocalSlSourceSummary[]> {
if (input.connectionId) {
return (await loadLocalSlSourceRecords(project, { connectionId: input.connectionId })).map(
({ source: _source, yaml: _yaml, ...summary }) => summary,
);
}
const listed = await project.fileStore.listFiles('semantic-layer');
const connectionIds = [...new Set(listed.files.map((path) => path.split('/')[1]).filter(isSafeConnectionId))].sort();
const summaries: LocalSlSourceSummary[] = [];
for (const connectionId of connectionIds) {
const records = await loadLocalSlSourceRecords(project, { connectionId });
summaries.push(...records.map(({ source: _source, yaml: _yaml, ...summary }) => summary));
}
return summaries.sort(
(left, right) => left.connectionId.localeCompare(right.connectionId) || left.name.localeCompare(right.name),
);
}
interface LocalSlSearchCandidate {
summary: LocalSlSourceSummary;
source: SemanticLayerSource;
searchText: string;
}
function sqliteSlDbPath(project: KtxLocalProject): string {
return join(project.projectDir, '.ktx', 'db.sqlite');
}
async function loadLocalSlSearchCandidates(
project: KtxLocalProject,
input: { connectionId?: string } = {},
): Promise<LocalSlSearchCandidate[]> {
if (input.connectionId) {
return (await loadLocalSlSourceRecords(project, { connectionId: input.connectionId })).map((record) => ({
summary: {
connectionId: record.connectionId,
name: record.name,
path: record.path,
...(record.description ? { description: record.description } : {}),
columnCount: record.columnCount,
measureCount: record.measureCount,
joinCount: record.joinCount,
},
source: record.source,
searchText: buildSemanticLayerSourceSearchText(record.source),
}));
}
const listed = await project.fileStore.listFiles('semantic-layer');
const connectionIds = [...new Set(listed.files.map((path) => path.split('/')[1]).filter(isSafeConnectionId))].sort();
const candidates: LocalSlSearchCandidate[] = [];
for (const connectionId of connectionIds) {
candidates.push(...(await loadLocalSlSearchCandidates(project, { connectionId })));
}
return candidates.sort(
(left, right) =>
left.summary.connectionId.localeCompare(right.summary.connectionId) ||
left.summary.name.localeCompare(right.summary.name),
);
}
function candidateKey(summary: LocalSlSourceSummary): string {
return `${summary.connectionId}/${summary.name}`;
}
function searchResultUsageFields(source: SemanticLayerSource): Pick<LocalSlSourceSearchResult, 'frequencyTier'> {
return source.usage?.frequencyTier ? { frequencyTier: source.usage.frequencyTier } : {};
}
function tokenLaneCandidates(candidates: LocalSlSearchCandidate[], terms: readonly string[]) {
if (terms.length === 0) {
return [];
}
return candidates
.map((candidate) => {
const haystack = candidate.searchText.toLowerCase();
const matchedTerms = terms.filter((term) => haystack.includes(term));
return {
candidate,
score: matchedTerms.length / terms.length,
};
})
.filter((result) => result.score > 0)
.sort(
(left, right) =>
right.score - left.score ||
left.candidate.summary.connectionId.localeCompare(right.candidate.summary.connectionId) ||
left.candidate.summary.name.localeCompare(right.candidate.summary.name),
);
}
async function refreshHybridSlIndexes(input: {
index: SqliteSlSourcesIndex;
project: KtxLocalProject;
candidates: LocalSlSearchCandidate[];
embeddingService?: KtxEmbeddingPort | null;
}): Promise<void> {
const candidatesByConnection = new Map<string, LocalSlSearchCandidate[]>();
for (const candidate of input.candidates) {
candidatesByConnection.set(candidate.summary.connectionId, [
...(candidatesByConnection.get(candidate.summary.connectionId) ?? []),
candidate,
]);
}
for (const [connectionId, group] of candidatesByConnection) {
if (input.embeddingService) {
const service = new SlSearchService(input.embeddingService, input.index);
await service.indexSources(
connectionId,
group.map((candidate) => candidate.source),
);
} else {
await input.index.upsertSources(
connectionId,
group.map((candidate) => ({
sourceName: candidate.summary.name,
searchText: candidate.searchText,
embedding: null,
})),
);
await input.index.deleteStale(
connectionId,
group.map((candidate) => candidate.summary.name),
);
}
}
const dictionaryEntries = await loadLatestSlDictionaryEntries(input.project, [...candidatesByConnection.keys()]);
for (const connectionId of candidatesByConnection.keys()) {
await input.index.replaceDictionaryEntries(
connectionId,
dictionaryEntries.filter((entry) => entry.connectionId === connectionId),
);
}
}
export async function searchLocalSlSources(
project: KtxLocalProject,
input: LocalSlSearchInput,
): Promise<LocalSlSourceSearchResult[]> {
const query = input.query.trim();
if (!query) {
return (await listLocalSlSources(project, { connectionId: input.connectionId })).map((source) => ({
...source,
score: 1,
}));
}
if (input.backend === 'pglite-owner-prototype') {
if (!input.pglite) {
throw new Error('PGlite semantic-layer search prototype requires pglite owner-process options.');
}
const { searchLocalSlSourcesWithPglitePrototype } = await import('./pglite-sl-search-prototype.js');
return searchLocalSlSourcesWithPglitePrototype(project, {
connectionId: input.connectionId,
query,
embeddingService: input.embeddingService ?? null,
limit: input.limit,
pglite: input.pglite,
});
}
const candidates = await loadLocalSlSearchCandidates(project, { connectionId: input.connectionId });
if (project.config.storage.search !== 'sqlite-fts5') {
return candidates
.map((candidate) => {
const terms = query
.toLowerCase()
.split(/\s+/)
.map((term) => term.trim())
.filter(Boolean);
return {
candidate,
score:
terms.length === 0
? 0
: terms.filter((term) => candidate.searchText.toLowerCase().includes(term)).length / terms.length,
};
})
.filter((result) => result.score > 0)
.map((result) => ({
...result.candidate.summary,
score: result.score,
matchReasons: ['token'],
...searchResultUsageFields(result.candidate.source),
}))
.sort(
(left, right) =>
right.score - left.score ||
left.connectionId.localeCompare(right.connectionId) ||
left.path.localeCompare(right.path),
);
}
const index = new SqliteSlSourcesIndex({ dbPath: sqliteSlDbPath(project) });
await refreshHybridSlIndexes({ index, project, candidates, embeddingService: input.embeddingService ?? null });
const candidateById = new Map(candidates.map((candidate) => [candidateKey(candidate.summary), candidate]));
const connectionIds = input.connectionId ? [input.connectionId] : undefined;
const finalLimit = input.limit ?? candidates.length;
const core = new HybridSearchCore();
const dictionaryEvidence = new Map<string, SlDictionaryMatch[]>();
const lexicalSnippets = new Map<string, string>();
const generators: SearchCandidateGenerator[] = [
{
lane: 'lexical',
async generate(args) {
const rows = await index.searchLexicalCandidates({
connectionIds,
queryText: args.queryText,
limit: args.laneCandidatePoolLimit,
});
for (const row of rows) {
if (row.snippet) {
lexicalSnippets.set(row.id, row.snippet);
}
}
return {
candidates: rows.map((row) => ({ id: row.id, rank: row.rank, rawScore: row.rawScore })),
};
},
},
{
lane: 'dictionary',
async generate(args) {
const rows = await index.searchDictionaryCandidates({
connectionIds,
queryText: args.queryText,
limit: args.laneCandidatePoolLimit,
});
for (const row of rows) {
dictionaryEvidence.set(row.id, row.matches);
}
return {
candidates: rows.map((row) => ({
id: row.id,
rank: row.rank,
rawScore: row.rawScore,
evidence: row.matches,
})),
};
},
},
{
lane: 'token',
async generate(args) {
const rows = tokenLaneCandidates(candidates, args.normalizedQuery.terms).slice(0, args.laneCandidatePoolLimit);
return {
candidates: rows.map((row, index) => ({
id: candidateKey(row.candidate.summary),
rank: index + 1,
rawScore: row.score,
})),
};
},
},
{
lane: 'semantic',
async generate(args) {
if (!input.embeddingService) {
return { status: 'skipped', candidates: [], reason: 'embedding_unconfigured' };
}
try {
const queryEmbedding = await input.embeddingService.computeEmbedding(args.queryText);
const rows = await index.searchSemanticCandidates({
connectionIds,
queryEmbedding,
limit: args.laneCandidatePoolLimit,
});
return {
candidates: rows.map((row) => ({ id: row.id, rank: row.rank, rawScore: row.rawScore })),
};
} catch (error) {
return {
status: 'skipped',
candidates: [],
reason: `embedding_unhealthy:${error instanceof Error ? error.message : String(error)}`,
};
}
},
},
];
const result = await core.search({ queryText: query, limit: finalLimit, generators });
const hydrated: LocalSlSourceSearchResult[] = [];
for (const fused of result.results) {
const candidate = candidateById.get(fused.id);
if (!candidate) {
continue;
}
const dictionaryMatches = dictionaryEvidence.get(fused.id);
const snippet = lexicalSnippets.get(fused.id);
hydrated.push({
...candidate.summary,
score: fused.score,
...searchResultUsageFields(candidate.source),
...(snippet ? { snippet } : {}),
matchReasons: fused.matchReasons as SlSearchMatchReason[],
...(dictionaryMatches && dictionaryMatches.length > 0 ? { dictionaryMatches } : {}),
lanes: result.lanes,
});
}
return hydrated;
}