From e6f0d9cd51da6543198c62bb2d1c7b727c7a6398 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Thu, 21 May 2026 10:08:03 +0200 Subject: [PATCH] fix: surface silent failures in SL, wiki, and embedding wiring - require non-empty `vertex.location` in the project schema instead of defaulting to an empty string with a description that promised SDK fallback the resolver never honored - log YAML parse failures from `SemanticLayerService.loadSource` and `KnowledgeWikiService.readPage` so corrupted overlays aren't silently treated as "does not exist" by ingest/agent tools - push directory-listing errors in `loadAllSources` and `listPageKeys` into the load-error / log path instead of returning empty success - accept an `embeddingProvider` in `createLocalProjectMemoryIngest` and plumb the resolved CLI provider through `mcp-server-factory`; warn in both the memory and bundle runtimes when they fall back to `NoopEmbeddingPort` while the project config requests an active embedding backend - clarify `embeddings.dimensions` description as a placeholder valid only with `backend: none`, and tighten the sentence-transformers `base_url` description to call out that managed-daemon resolution is CLI-only --- packages/cli/src/mcp-server-factory.ts | 11 ++++-- .../src/ingest/local-bundle-runtime.ts | 9 +++++ packages/context/src/memory/local-memory.ts | 16 +++++++- packages/context/src/project/config.ts | 14 +++++-- .../context/src/sl/semantic-layer.service.ts | 22 +++++++++-- .../src/wiki/knowledge-wiki.service.ts | 38 +++++++++++++------ 6 files changed, 89 insertions(+), 21 deletions(-) diff --git a/packages/cli/src/mcp-server-factory.ts b/packages/cli/src/mcp-server-factory.ts index 792f6bb9..1528a350 100644 --- a/packages/cli/src/mcp-server-factory.ts +++ b/packages/cli/src/mcp-server-factory.ts @@ -41,10 +41,11 @@ export async function createKtxMcpServerFactory(input: { cliVersion: input.cliVersion, io, }); - const embeddingService = + const embeddingProvider = resolution.kind === 'configured' || resolution.kind === 'managed-running' || resolution.kind === 'managed-started' - ? new KtxIngestEmbeddingPortAdapter(resolution.provider) + ? resolution.provider : null; + const embeddingService = embeddingProvider ? new KtxIngestEmbeddingPortAdapter(embeddingProvider) : null; const contextTools = createLocalProjectMcpContextPorts(input.project, { semanticLayerCompute, queryExecutor, @@ -57,7 +58,11 @@ export async function createKtxMcpServerFactory(input: { let memoryIngest: ReturnType | undefined; try { - memoryIngest = createLocalProjectMemoryIngest(input.project, { semanticLayerCompute, queryExecutor }); + memoryIngest = createLocalProjectMemoryIngest(input.project, { + semanticLayerCompute, + queryExecutor, + embeddingProvider, + }); } catch (error) { io.stderr.write(`KTX MCP memory_ingest disabled: ${error instanceof Error ? error.message : String(error)}\n`); } diff --git a/packages/context/src/ingest/local-bundle-runtime.ts b/packages/context/src/ingest/local-bundle-runtime.ts index 13466e50..2749b4c7 100644 --- a/packages/context/src/ingest/local-bundle-runtime.ts +++ b/packages/context/src/ingest/local-bundle-runtime.ts @@ -671,6 +671,15 @@ export function createLocalBundleIngestRuntime( const store = new SqliteBundleIngestStore({ dbPath }); const contextStore = new SqliteContextEvidenceStore({ dbPath }); const embeddingProvider = options.embeddingProvider ?? null; + if (!embeddingProvider && options.project.config.ingest.embeddings.backend !== 'none') { + // Embedding-dependent stages (CandidateDedup clustering, ContextEvidenceIndex + // chunk indexing) silently produce zero-vector data with NoopEmbeddingPort. + // Surface that fact so the caller knows ingest will not be running its + // configured backend. + logger.warn( + `[local-bundle-runtime] embeddings backend "${options.project.config.ingest.embeddings.backend}" is configured but no embedding provider was passed; embedding-dependent stages will run against a no-op embedding port.`, + ); + } const embedding = embeddingProvider ? new KtxIngestEmbeddingPortAdapter(embeddingProvider) : new NoopEmbeddingPort(); const connections = new LocalConnectionCatalog(options.project, options.queryExecutor); const rootFileStore = options.project.fileStore; diff --git a/packages/context/src/memory/local-memory.ts b/packages/context/src/memory/local-memory.ts index c12dec74..7ac4fe90 100644 --- a/packages/context/src/memory/local-memory.ts +++ b/packages/context/src/memory/local-memory.ts @@ -5,8 +5,10 @@ import { localConnectionInfoFromConfig } from '../connections/index.js'; import type { KtxEmbeddingPort, KtxFileStorePort, KtxFileWriteResult } from '../core/index.js'; import { type KtxLogger, noopLogger, SessionWorktreeService } from '../core/index.js'; import type { KtxSemanticLayerComputePort } from '../daemon/index.js'; +import type { KtxEmbeddingProvider } from '@ktx/llm'; import { createLocalKtxLlmRuntimeFromConfig, + KtxIngestEmbeddingPortAdapter, RuntimeAgentRunner, type AgentRunnerPort, type KtxLlmRuntimePort, @@ -74,6 +76,7 @@ export interface CreateLocalProjectMemoryIngestOptions { queryExecutor?: { execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise }; runIdFactory?: () => string; logger?: KtxLogger; + embeddingProvider?: KtxEmbeddingProvider | null; } export function createLocalProjectMemoryIngest( @@ -82,7 +85,18 @@ export function createLocalProjectMemoryIngest( ): MemoryIngestService { const logger = options.logger ?? noopLogger; const rootFileStore = new LocalMemoryFileStore(project.fileStore); - const embedding = new NoopEmbeddingPort(); + const embedding = options.embeddingProvider + ? new KtxIngestEmbeddingPortAdapter(options.embeddingProvider) + : new NoopEmbeddingPort(); + if (!options.embeddingProvider && project.config.ingest.embeddings.backend !== 'none') { + // Memory-agent search (SlSearch, wiki) embeds against this port. With Noop the + // configured backend is silently inert — the agent will see empty vectors and + // rank results against zeros. Surface that so the caller knows to plumb the + // resolved embedding provider through. + logger.warn( + `[memory-ingest] embeddings backend "${project.config.ingest.embeddings.backend}" is configured but no embedding provider was passed; semantic search will fall back to a no-op embedding port.`, + ); + } const knowledgeIndex = new LocalKnowledgeIndex(project); const knowledgeEvents = new NoopKnowledgeEventPort(); const knowledgeSlRefs = new NoopKnowledgeSlRefsPort(); diff --git a/packages/context/src/project/config.ts b/packages/context/src/project/config.ts index b95f0ce0..18676ef5 100644 --- a/packages/context/src/project/config.ts +++ b/packages/context/src/project/config.ts @@ -30,13 +30,13 @@ const apiCredentialsSchema = z const vertexProviderSchema = z .strictObject({ project: z.string().min(1).optional().describe('Google Cloud project ID hosting the Vertex AI endpoint.'), - location: z.string().default('').describe('Vertex AI region (e.g. "us-east5"). Empty string falls back to the SDK default.'), + location: z.string().min(1).describe('Vertex AI region (e.g. "us-east5"). Required whenever the vertex provider block is present.'), }) .describe('Google Vertex AI provider configuration.'); const sentenceTransformersSchema = z .strictObject({ - base_url: z.string().default('').describe('Base URL of the sentence-transformers HTTP server. Leave empty (or omit) to use the project-managed local daemon.'), + base_url: z.string().default('').describe('Base URL of the sentence-transformers HTTP server. Leave empty (or omit) when the `ktx` CLI is expected to start and manage a local daemon for this project; programmatic consumers must populate it explicitly.'), pathPrefix: z.string().optional().describe('Optional URL path prefix prepended to embedding requests.'), }) .describe('Sentence-transformers embedding server configuration.'); @@ -83,7 +83,15 @@ const embeddingSchema = z .default('none') .describe('Embedding backend. "openai" and "sentence-transformers" call out to those providers; "none" disables embeddings.'), model: z.string().min(1).optional().describe('Provider-specific embedding model identifier (e.g. "text-embedding-3-small").'), - dimensions: z.int().positive().default(8).describe('Embedding vector dimensionality. Must match the chosen model when using a real provider.'), + dimensions: z + .int() + .positive() + .default(8) + .describe( + 'Embedding vector dimensionality. The default value 8 is a placeholder that is only valid alongside backend: none; ' + + 'before switching backend to openai/sentence-transformers, set this explicitly to match the chosen model ' + + '(e.g. 384 for all-MiniLM-L6-v2, 1536 for text-embedding-3-small).', + ), openai: apiCredentialsSchema.optional().describe('OpenAI credentials, used when backend is "openai".'), sentenceTransformers: sentenceTransformersSchema.optional().describe('Sentence-transformers server config, used when backend is "sentence-transformers".'), batchSize: z.int().positive().optional().describe('Number of texts per embedding API call. Omit to use the backend default.'), diff --git a/packages/context/src/sl/semantic-layer.service.ts b/packages/context/src/sl/semantic-layer.service.ts index 00149d3b..2ab96688 100644 --- a/packages/context/src/sl/semantic-layer.service.ts +++ b/packages/context/src/sl/semantic-layer.service.ts @@ -198,12 +198,25 @@ export class SemanticLayerService { } async loadSource(connectionId: string, sourceName: string): Promise { + let content: string; try { - const { content } = await this.readSourceFile(connectionId, sourceName); - return YAML.parse(content) as SemanticLayerSource; + const result = await this.readSourceFile(connectionId, sourceName); + content = result.content; } catch { return null; } + try { + return YAML.parse(content) as SemanticLayerSource; + } catch (error) { + // Distinguish a YAML parse failure from a missing file. The file exists but + // its contents are unparseable — callers that treat null as "does not exist" + // could otherwise overwrite the broken file. Surface the parse failure via + // the service logger so the broken source is at least visible. + this.logger.warn( + `[loadSource] ${connectionId}/${sourceName}.yaml: YAML parse failed: ${error instanceof Error ? error.message : String(error)}`, + ); + return null; + } } async loadAllSources(connectionId: string): Promise { @@ -215,7 +228,10 @@ export class SemanticLayerService { try { const result = await this.configService.listFiles(dir); allFiles = result.files.filter((f) => f.endsWith('.yaml')); - } catch { + } catch (e) { + const message = `Failed to list semantic-layer files under ${dir}: ${e instanceof Error ? e.message : String(e)}`; + loadErrors.push(message); + this.logger.warn(message); return { sources: [], loadErrors }; } diff --git a/packages/context/src/wiki/knowledge-wiki.service.ts b/packages/context/src/wiki/knowledge-wiki.service.ts index 88447c14..d7a9f095 100644 --- a/packages/context/src/wiki/knowledge-wiki.service.ts +++ b/packages/context/src/wiki/knowledge-wiki.service.ts @@ -98,13 +98,25 @@ export class KnowledgeWikiService { async readPage(scope: string, scopeId: string | null | undefined, pageKey: string): Promise { const path = this.pagePath(scope, scopeId, pageKey); + let raw: string; try { const result = await this.configService.readFile(path); - const { frontmatter, content } = this.parsePage(result.content); - return { pageKey, frontmatter, content }; + raw = result.content; } catch { return null; } + try { + const { frontmatter, content } = this.parsePage(raw); + return { pageKey, frontmatter, content }; + } catch (error) { + // The file exists but parsing failed. Returning null without surfacing the + // parse error would let callers (and the memory agent) treat it as "page + // doesn't exist" and clobber the broken page on the next write. + this.logger.warn( + `[readPage] ${path}: parse failed: ${error instanceof Error ? error.message : String(error)}`, + ); + return null; + } } async deletePage( @@ -133,19 +145,23 @@ export class KnowledgeWikiService { async listPageKeys(scope: string, scopeId?: string | null): Promise { const dir = this.scopeDir(scope, scopeId); + let files: string[]; try { const result = await this.configService.listFiles(dir); - return result.files - .filter((f) => f.endsWith('.md')) - .map((f) => { - // Strip the directory prefix and .md extension - const name = f.replace(`${dir}/`, '').replace(/\.md$/, ''); - return name; - }) - .filter(isFlatWikiKey); - } catch { + files = result.files; + } catch (error) { + // listFiles returns [] for missing directories; reaching this catch means + // an IO-level failure that should at least be surfaced before we report + // "no pages" the same as a freshly-initialised store would. + this.logger.warn( + `[listPageKeys] ${dir}: ${error instanceof Error ? error.message : String(error)}`, + ); return []; } + return files + .filter((f) => f.endsWith('.md')) + .map((f) => f.replace(`${dir}/`, '').replace(/\.md$/, '')) + .filter(isFlatWikiKey); } async getPageHistory(scope: string, scopeId: string | null | undefined, pageKey: string) {