mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
fix: surface silent failures in SL, wiki, and embedding wiring
- require non-empty `vertex.location` in the project schema instead of defaulting to an empty string with a description that promised SDK fallback the resolver never honored - log YAML parse failures from `SemanticLayerService.loadSource` and `KnowledgeWikiService.readPage` so corrupted overlays aren't silently treated as "does not exist" by ingest/agent tools - push directory-listing errors in `loadAllSources` and `listPageKeys` into the load-error / log path instead of returning empty success - accept an `embeddingProvider` in `createLocalProjectMemoryIngest` and plumb the resolved CLI provider through `mcp-server-factory`; warn in both the memory and bundle runtimes when they fall back to `NoopEmbeddingPort` while the project config requests an active embedding backend - clarify `embeddings.dimensions` description as a placeholder valid only with `backend: none`, and tighten the sentence-transformers `base_url` description to call out that managed-daemon resolution is CLI-only
This commit is contained in:
parent
9fc715ac6a
commit
e6f0d9cd51
6 changed files with 89 additions and 21 deletions
|
|
@ -41,10 +41,11 @@ export async function createKtxMcpServerFactory(input: {
|
|||
cliVersion: input.cliVersion,
|
||||
io,
|
||||
});
|
||||
const embeddingService =
|
||||
const embeddingProvider =
|
||||
resolution.kind === 'configured' || resolution.kind === 'managed-running' || resolution.kind === 'managed-started'
|
||||
? new KtxIngestEmbeddingPortAdapter(resolution.provider)
|
||||
? resolution.provider
|
||||
: null;
|
||||
const embeddingService = embeddingProvider ? new KtxIngestEmbeddingPortAdapter(embeddingProvider) : null;
|
||||
const contextTools = createLocalProjectMcpContextPorts(input.project, {
|
||||
semanticLayerCompute,
|
||||
queryExecutor,
|
||||
|
|
@ -57,7 +58,11 @@ export async function createKtxMcpServerFactory(input: {
|
|||
|
||||
let memoryIngest: ReturnType<typeof createLocalProjectMemoryIngest> | undefined;
|
||||
try {
|
||||
memoryIngest = createLocalProjectMemoryIngest(input.project, { semanticLayerCompute, queryExecutor });
|
||||
memoryIngest = createLocalProjectMemoryIngest(input.project, {
|
||||
semanticLayerCompute,
|
||||
queryExecutor,
|
||||
embeddingProvider,
|
||||
});
|
||||
} catch (error) {
|
||||
io.stderr.write(`KTX MCP memory_ingest disabled: ${error instanceof Error ? error.message : String(error)}\n`);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -671,6 +671,15 @@ export function createLocalBundleIngestRuntime(
|
|||
const store = new SqliteBundleIngestStore({ dbPath });
|
||||
const contextStore = new SqliteContextEvidenceStore({ dbPath });
|
||||
const embeddingProvider = options.embeddingProvider ?? null;
|
||||
if (!embeddingProvider && options.project.config.ingest.embeddings.backend !== 'none') {
|
||||
// Embedding-dependent stages (CandidateDedup clustering, ContextEvidenceIndex
|
||||
// chunk indexing) silently produce zero-vector data with NoopEmbeddingPort.
|
||||
// Surface that fact so the caller knows ingest will not be running its
|
||||
// configured backend.
|
||||
logger.warn(
|
||||
`[local-bundle-runtime] embeddings backend "${options.project.config.ingest.embeddings.backend}" is configured but no embedding provider was passed; embedding-dependent stages will run against a no-op embedding port.`,
|
||||
);
|
||||
}
|
||||
const embedding = embeddingProvider ? new KtxIngestEmbeddingPortAdapter(embeddingProvider) : new NoopEmbeddingPort();
|
||||
const connections = new LocalConnectionCatalog(options.project, options.queryExecutor);
|
||||
const rootFileStore = options.project.fileStore;
|
||||
|
|
|
|||
|
|
@ -5,8 +5,10 @@ import { localConnectionInfoFromConfig } from '../connections/index.js';
|
|||
import type { KtxEmbeddingPort, KtxFileStorePort, KtxFileWriteResult } from '../core/index.js';
|
||||
import { type KtxLogger, noopLogger, SessionWorktreeService } from '../core/index.js';
|
||||
import type { KtxSemanticLayerComputePort } from '../daemon/index.js';
|
||||
import type { KtxEmbeddingProvider } from '@ktx/llm';
|
||||
import {
|
||||
createLocalKtxLlmRuntimeFromConfig,
|
||||
KtxIngestEmbeddingPortAdapter,
|
||||
RuntimeAgentRunner,
|
||||
type AgentRunnerPort,
|
||||
type KtxLlmRuntimePort,
|
||||
|
|
@ -74,6 +76,7 @@ export interface CreateLocalProjectMemoryIngestOptions {
|
|||
queryExecutor?: { execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise<KtxQueryResult> };
|
||||
runIdFactory?: () => string;
|
||||
logger?: KtxLogger;
|
||||
embeddingProvider?: KtxEmbeddingProvider | null;
|
||||
}
|
||||
|
||||
export function createLocalProjectMemoryIngest(
|
||||
|
|
@ -82,7 +85,18 @@ export function createLocalProjectMemoryIngest(
|
|||
): MemoryIngestService {
|
||||
const logger = options.logger ?? noopLogger;
|
||||
const rootFileStore = new LocalMemoryFileStore(project.fileStore);
|
||||
const embedding = new NoopEmbeddingPort();
|
||||
const embedding = options.embeddingProvider
|
||||
? new KtxIngestEmbeddingPortAdapter(options.embeddingProvider)
|
||||
: new NoopEmbeddingPort();
|
||||
if (!options.embeddingProvider && project.config.ingest.embeddings.backend !== 'none') {
|
||||
// Memory-agent search (SlSearch, wiki) embeds against this port. With Noop the
|
||||
// configured backend is silently inert — the agent will see empty vectors and
|
||||
// rank results against zeros. Surface that so the caller knows to plumb the
|
||||
// resolved embedding provider through.
|
||||
logger.warn(
|
||||
`[memory-ingest] embeddings backend "${project.config.ingest.embeddings.backend}" is configured but no embedding provider was passed; semantic search will fall back to a no-op embedding port.`,
|
||||
);
|
||||
}
|
||||
const knowledgeIndex = new LocalKnowledgeIndex(project);
|
||||
const knowledgeEvents = new NoopKnowledgeEventPort();
|
||||
const knowledgeSlRefs = new NoopKnowledgeSlRefsPort();
|
||||
|
|
|
|||
|
|
@ -30,13 +30,13 @@ const apiCredentialsSchema = z
|
|||
const vertexProviderSchema = z
|
||||
.strictObject({
|
||||
project: z.string().min(1).optional().describe('Google Cloud project ID hosting the Vertex AI endpoint.'),
|
||||
location: z.string().default('').describe('Vertex AI region (e.g. "us-east5"). Empty string falls back to the SDK default.'),
|
||||
location: z.string().min(1).describe('Vertex AI region (e.g. "us-east5"). Required whenever the vertex provider block is present.'),
|
||||
})
|
||||
.describe('Google Vertex AI provider configuration.');
|
||||
|
||||
const sentenceTransformersSchema = z
|
||||
.strictObject({
|
||||
base_url: z.string().default('').describe('Base URL of the sentence-transformers HTTP server. Leave empty (or omit) to use the project-managed local daemon.'),
|
||||
base_url: z.string().default('').describe('Base URL of the sentence-transformers HTTP server. Leave empty (or omit) when the `ktx` CLI is expected to start and manage a local daemon for this project; programmatic consumers must populate it explicitly.'),
|
||||
pathPrefix: z.string().optional().describe('Optional URL path prefix prepended to embedding requests.'),
|
||||
})
|
||||
.describe('Sentence-transformers embedding server configuration.');
|
||||
|
|
@ -83,7 +83,15 @@ const embeddingSchema = z
|
|||
.default('none')
|
||||
.describe('Embedding backend. "openai" and "sentence-transformers" call out to those providers; "none" disables embeddings.'),
|
||||
model: z.string().min(1).optional().describe('Provider-specific embedding model identifier (e.g. "text-embedding-3-small").'),
|
||||
dimensions: z.int().positive().default(8).describe('Embedding vector dimensionality. Must match the chosen model when using a real provider.'),
|
||||
dimensions: z
|
||||
.int()
|
||||
.positive()
|
||||
.default(8)
|
||||
.describe(
|
||||
'Embedding vector dimensionality. The default value 8 is a placeholder that is only valid alongside backend: none; ' +
|
||||
'before switching backend to openai/sentence-transformers, set this explicitly to match the chosen model ' +
|
||||
'(e.g. 384 for all-MiniLM-L6-v2, 1536 for text-embedding-3-small).',
|
||||
),
|
||||
openai: apiCredentialsSchema.optional().describe('OpenAI credentials, used when backend is "openai".'),
|
||||
sentenceTransformers: sentenceTransformersSchema.optional().describe('Sentence-transformers server config, used when backend is "sentence-transformers".'),
|
||||
batchSize: z.int().positive().optional().describe('Number of texts per embedding API call. Omit to use the backend default.'),
|
||||
|
|
|
|||
|
|
@ -198,12 +198,25 @@ export class SemanticLayerService {
|
|||
}
|
||||
|
||||
async loadSource(connectionId: string, sourceName: string): Promise<SemanticLayerSource | null> {
|
||||
let content: string;
|
||||
try {
|
||||
const { content } = await this.readSourceFile(connectionId, sourceName);
|
||||
return YAML.parse(content) as SemanticLayerSource;
|
||||
const result = await this.readSourceFile(connectionId, sourceName);
|
||||
content = result.content;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return YAML.parse(content) as SemanticLayerSource;
|
||||
} catch (error) {
|
||||
// Distinguish a YAML parse failure from a missing file. The file exists but
|
||||
// its contents are unparseable — callers that treat null as "does not exist"
|
||||
// could otherwise overwrite the broken file. Surface the parse failure via
|
||||
// the service logger so the broken source is at least visible.
|
||||
this.logger.warn(
|
||||
`[loadSource] ${connectionId}/${sourceName}.yaml: YAML parse failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async loadAllSources(connectionId: string): Promise<LoadAllSourcesResult> {
|
||||
|
|
@ -215,7 +228,10 @@ export class SemanticLayerService {
|
|||
try {
|
||||
const result = await this.configService.listFiles(dir);
|
||||
allFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
||||
} catch {
|
||||
} catch (e) {
|
||||
const message = `Failed to list semantic-layer files under ${dir}: ${e instanceof Error ? e.message : String(e)}`;
|
||||
loadErrors.push(message);
|
||||
this.logger.warn(message);
|
||||
return { sources: [], loadErrors };
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -98,13 +98,25 @@ export class KnowledgeWikiService {
|
|||
|
||||
async readPage(scope: string, scopeId: string | null | undefined, pageKey: string): Promise<WikiPage | null> {
|
||||
const path = this.pagePath(scope, scopeId, pageKey);
|
||||
let raw: string;
|
||||
try {
|
||||
const result = await this.configService.readFile(path);
|
||||
const { frontmatter, content } = this.parsePage(result.content);
|
||||
return { pageKey, frontmatter, content };
|
||||
raw = result.content;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
const { frontmatter, content } = this.parsePage(raw);
|
||||
return { pageKey, frontmatter, content };
|
||||
} catch (error) {
|
||||
// The file exists but parsing failed. Returning null without surfacing the
|
||||
// parse error would let callers (and the memory agent) treat it as "page
|
||||
// doesn't exist" and clobber the broken page on the next write.
|
||||
this.logger.warn(
|
||||
`[readPage] ${path}: parse failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async deletePage(
|
||||
|
|
@ -133,19 +145,23 @@ export class KnowledgeWikiService {
|
|||
|
||||
async listPageKeys(scope: string, scopeId?: string | null): Promise<string[]> {
|
||||
const dir = this.scopeDir(scope, scopeId);
|
||||
let files: string[];
|
||||
try {
|
||||
const result = await this.configService.listFiles(dir);
|
||||
return result.files
|
||||
.filter((f) => f.endsWith('.md'))
|
||||
.map((f) => {
|
||||
// Strip the directory prefix and .md extension
|
||||
const name = f.replace(`${dir}/`, '').replace(/\.md$/, '');
|
||||
return name;
|
||||
})
|
||||
.filter(isFlatWikiKey);
|
||||
} catch {
|
||||
files = result.files;
|
||||
} catch (error) {
|
||||
// listFiles returns [] for missing directories; reaching this catch means
|
||||
// an IO-level failure that should at least be surfaced before we report
|
||||
// "no pages" the same as a freshly-initialised store would.
|
||||
this.logger.warn(
|
||||
`[listPageKeys] ${dir}: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
return [];
|
||||
}
|
||||
return files
|
||||
.filter((f) => f.endsWith('.md'))
|
||||
.map((f) => f.replace(`${dir}/`, '').replace(/\.md$/, ''))
|
||||
.filter(isFlatWikiKey);
|
||||
}
|
||||
|
||||
async getPageHistory(scope: string, scopeId: string | null | undefined, pageKey: string) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue