mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
fix: surface silent failures in SL, wiki, and embedding wiring (#195)
* fix: surface silent failures in SL, wiki, and embedding wiring - require non-empty `vertex.location` in the project schema instead of defaulting to an empty string with a description that promised SDK fallback the resolver never honored - log YAML parse failures from `SemanticLayerService.loadSource` and `KnowledgeWikiService.readPage` so corrupted overlays aren't silently treated as "does not exist" by ingest/agent tools - push directory-listing errors in `loadAllSources` and `listPageKeys` into the load-error / log path instead of returning empty success - accept an `embeddingProvider` in `createLocalProjectMemoryIngest` and plumb the resolved CLI provider through `mcp-server-factory`; warn in both the memory and bundle runtimes when they fall back to `NoopEmbeddingPort` while the project config requests an active embedding backend - clarify `embeddings.dimensions` description as a placeholder valid only with `backend: none`, and tighten the sentence-transformers `base_url` description to call out that managed-daemon resolution is CLI-only * test: improve PR coverage
This commit is contained in:
parent
9fc715ac6a
commit
488b955024
12 changed files with 397 additions and 21 deletions
193
packages/cli/src/mcp-server-factory.test.ts
Normal file
193
packages/cli/src/mcp-server-factory.test.ts
Normal file
|
|
@ -0,0 +1,193 @@
|
||||||
|
import { createDefaultKtxMcpServer, createLocalProjectMcpContextPorts } from '@ktx/context/mcp';
|
||||||
|
import { createLocalProjectMemoryIngest } from '@ktx/context/memory';
|
||||||
|
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||||
|
import { resolveProjectEmbeddingProvider } from './embedding-resolution.js';
|
||||||
|
import { createKtxCliScanConnector } from './local-scan-connectors.js';
|
||||||
|
import { createKtxMcpServerFactory } from './mcp-server-factory.js';
|
||||||
|
|
||||||
|
type FakeEmbeddingProvider = {
|
||||||
|
maxBatchSize: number;
|
||||||
|
embed(text: string): Promise<number[]>;
|
||||||
|
embedMany(texts: string[]): Promise<number[][]>;
|
||||||
|
};
|
||||||
|
|
||||||
|
const mocks = vi.hoisted(() => ({
|
||||||
|
queryExecutor: { execute: vi.fn() },
|
||||||
|
semanticLayerCompute: { validateSources: vi.fn(), generateSources: vi.fn(), query: vi.fn() },
|
||||||
|
sqlAnalysis: { analyzeForFingerprint: vi.fn(), analyzeBatch: vi.fn(), validateReadOnly: vi.fn() },
|
||||||
|
memoryIngest: { ingest: vi.fn(), status: vi.fn(), waitForRun: vi.fn() },
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('@ktx/context', () => ({
|
||||||
|
KtxIngestEmbeddingPortAdapter: class {
|
||||||
|
readonly maxBatchSize: number;
|
||||||
|
|
||||||
|
constructor(private readonly provider: FakeEmbeddingProvider) {
|
||||||
|
this.maxBatchSize = provider.maxBatchSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
computeEmbedding(text: string): Promise<number[]> {
|
||||||
|
return this.provider.embed(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
|
||||||
|
return this.provider.embedMany(texts);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('@ktx/context/mcp', () => ({
|
||||||
|
createDefaultKtxMcpServer: vi.fn(() => ({ kind: 'mcp-server' })),
|
||||||
|
createLocalProjectMcpContextPorts: vi.fn(() => ({ context_tool: { name: 'context_tool' } })),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('@ktx/context/memory', () => ({
|
||||||
|
createLocalProjectMemoryIngest: vi.fn(() => mocks.memoryIngest),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('./embedding-resolution.js', () => ({
|
||||||
|
resolveProjectEmbeddingProvider: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('./ingest-query-executor.js', () => ({
|
||||||
|
createKtxCliIngestQueryExecutor: vi.fn(() => mocks.queryExecutor),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('./local-scan-connectors.js', () => ({
|
||||||
|
createKtxCliScanConnector: vi.fn(() => ({ source: 'fake-scan-connector' })),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('./managed-python-command.js', () => ({
|
||||||
|
createManagedPythonSemanticLayerComputePort: vi.fn(async () => mocks.semanticLayerCompute),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('./managed-python-http.js', () => ({
|
||||||
|
createManagedDaemonSqlAnalysisPort: vi.fn(() => mocks.sqlAnalysis),
|
||||||
|
}));
|
||||||
|
|
||||||
|
const project = {
|
||||||
|
projectDir: '/work/project',
|
||||||
|
configPath: '/work/project/ktx.yaml',
|
||||||
|
config: {},
|
||||||
|
coreConfig: {},
|
||||||
|
git: {},
|
||||||
|
fileStore: {},
|
||||||
|
};
|
||||||
|
|
||||||
|
const io = {
|
||||||
|
stdout: { write: vi.fn() },
|
||||||
|
stderr: { write: vi.fn() },
|
||||||
|
};
|
||||||
|
|
||||||
|
describe('createKtxMcpServerFactory', () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.clearAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('passes a resolved embedding provider to MCP context ports and memory ingest', async () => {
|
||||||
|
const provider = {
|
||||||
|
maxBatchSize: 4,
|
||||||
|
embed: vi.fn(async () => [0.2, 0.4]),
|
||||||
|
embedMany: vi.fn(async () => [[0.2, 0.4]]),
|
||||||
|
};
|
||||||
|
vi.mocked(resolveProjectEmbeddingProvider).mockResolvedValue({ kind: 'configured', provider } as never);
|
||||||
|
|
||||||
|
const factory = await createKtxMcpServerFactory({
|
||||||
|
project: project as never,
|
||||||
|
projectDir: project.projectDir,
|
||||||
|
cliVersion: '0.5.0',
|
||||||
|
io,
|
||||||
|
});
|
||||||
|
|
||||||
|
const contextOptions = vi.mocked(createLocalProjectMcpContextPorts).mock.calls[0][1] as {
|
||||||
|
embeddingService: {
|
||||||
|
computeEmbedding(text: string): Promise<number[]>;
|
||||||
|
computeEmbeddingsBulk(texts: string[]): Promise<number[][]>;
|
||||||
|
};
|
||||||
|
queryExecutor: unknown;
|
||||||
|
semanticLayerCompute: unknown;
|
||||||
|
sqlAnalysis: unknown;
|
||||||
|
localScan: {
|
||||||
|
createConnector(connectionId: string): Promise<unknown>;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
await expect(contextOptions.embeddingService.computeEmbedding('gross revenue')).resolves.toEqual([0.2, 0.4]);
|
||||||
|
await expect(contextOptions.embeddingService.computeEmbeddingsBulk(['gross revenue'])).resolves.toEqual([[0.2, 0.4]]);
|
||||||
|
await expect(contextOptions.localScan.createConnector('warehouse')).resolves.toEqual({
|
||||||
|
source: 'fake-scan-connector',
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(provider.embed).toHaveBeenCalledWith('gross revenue');
|
||||||
|
expect(provider.embedMany).toHaveBeenCalledWith(['gross revenue']);
|
||||||
|
expect(createKtxCliScanConnector).toHaveBeenCalledWith(project, 'warehouse');
|
||||||
|
expect(contextOptions).toMatchObject({
|
||||||
|
queryExecutor: mocks.queryExecutor,
|
||||||
|
semanticLayerCompute: mocks.semanticLayerCompute,
|
||||||
|
sqlAnalysis: mocks.sqlAnalysis,
|
||||||
|
});
|
||||||
|
expect(createLocalProjectMemoryIngest).toHaveBeenCalledWith(
|
||||||
|
project,
|
||||||
|
expect.objectContaining({
|
||||||
|
embeddingProvider: provider,
|
||||||
|
queryExecutor: mocks.queryExecutor,
|
||||||
|
semanticLayerCompute: mocks.semanticLayerCompute,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(factory()).toEqual({ kind: 'mcp-server' });
|
||||||
|
expect(createDefaultKtxMcpServer).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
contextTools: expect.objectContaining({
|
||||||
|
context_tool: { name: 'context_tool' },
|
||||||
|
memoryIngest: mocks.memoryIngest,
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('uses null embedding ports when no configured provider is available', async () => {
|
||||||
|
vi.mocked(resolveProjectEmbeddingProvider).mockResolvedValue({ kind: 'managed-unavailable' } as never);
|
||||||
|
|
||||||
|
await createKtxMcpServerFactory({
|
||||||
|
project: project as never,
|
||||||
|
projectDir: project.projectDir,
|
||||||
|
cliVersion: '0.5.0',
|
||||||
|
io,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(vi.mocked(createLocalProjectMcpContextPorts).mock.calls[0][1]).toMatchObject({
|
||||||
|
embeddingService: null,
|
||||||
|
});
|
||||||
|
expect(createLocalProjectMemoryIngest).toHaveBeenCalledWith(
|
||||||
|
project,
|
||||||
|
expect.objectContaining({
|
||||||
|
embeddingProvider: null,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('omits memory ingest and logs when memory ingest construction fails', async () => {
|
||||||
|
vi.mocked(resolveProjectEmbeddingProvider).mockResolvedValue({ kind: 'disabled' } as never);
|
||||||
|
vi.mocked(createLocalProjectMemoryIngest).mockImplementationOnce(() => {
|
||||||
|
throw new Error('missing local memory prerequisites');
|
||||||
|
});
|
||||||
|
|
||||||
|
const factory = await createKtxMcpServerFactory({
|
||||||
|
project: project as never,
|
||||||
|
projectDir: project.projectDir,
|
||||||
|
cliVersion: '0.5.0',
|
||||||
|
io,
|
||||||
|
});
|
||||||
|
|
||||||
|
factory();
|
||||||
|
|
||||||
|
expect(io.stderr.write).toHaveBeenCalledWith(
|
||||||
|
'KTX MCP memory_ingest disabled: missing local memory prerequisites\n',
|
||||||
|
);
|
||||||
|
expect(createDefaultKtxMcpServer).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
contextTools: { context_tool: { name: 'context_tool' } },
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
@ -41,10 +41,11 @@ export async function createKtxMcpServerFactory(input: {
|
||||||
cliVersion: input.cliVersion,
|
cliVersion: input.cliVersion,
|
||||||
io,
|
io,
|
||||||
});
|
});
|
||||||
const embeddingService =
|
const embeddingProvider =
|
||||||
resolution.kind === 'configured' || resolution.kind === 'managed-running' || resolution.kind === 'managed-started'
|
resolution.kind === 'configured' || resolution.kind === 'managed-running' || resolution.kind === 'managed-started'
|
||||||
? new KtxIngestEmbeddingPortAdapter(resolution.provider)
|
? resolution.provider
|
||||||
: null;
|
: null;
|
||||||
|
const embeddingService = embeddingProvider ? new KtxIngestEmbeddingPortAdapter(embeddingProvider) : null;
|
||||||
const contextTools = createLocalProjectMcpContextPorts(input.project, {
|
const contextTools = createLocalProjectMcpContextPorts(input.project, {
|
||||||
semanticLayerCompute,
|
semanticLayerCompute,
|
||||||
queryExecutor,
|
queryExecutor,
|
||||||
|
|
@ -57,7 +58,11 @@ export async function createKtxMcpServerFactory(input: {
|
||||||
|
|
||||||
let memoryIngest: ReturnType<typeof createLocalProjectMemoryIngest> | undefined;
|
let memoryIngest: ReturnType<typeof createLocalProjectMemoryIngest> | undefined;
|
||||||
try {
|
try {
|
||||||
memoryIngest = createLocalProjectMemoryIngest(input.project, { semanticLayerCompute, queryExecutor });
|
memoryIngest = createLocalProjectMemoryIngest(input.project, {
|
||||||
|
semanticLayerCompute,
|
||||||
|
queryExecutor,
|
||||||
|
embeddingProvider,
|
||||||
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
io.stderr.write(`KTX MCP memory_ingest disabled: ${error instanceof Error ? error.message : String(error)}\n`);
|
io.stderr.write(`KTX MCP memory_ingest disabled: ${error instanceof Error ? error.message : String(error)}\n`);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -111,6 +111,26 @@ describe('createLocalBundleIngestRuntime', () => {
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('warns when embeddings are configured but no embedding provider is supplied', () => {
|
||||||
|
const logger = { log: vi.fn(), warn: vi.fn(), error: vi.fn() };
|
||||||
|
project.config.ingest.embeddings = {
|
||||||
|
backend: 'openai',
|
||||||
|
model: 'text-embedding-3-small',
|
||||||
|
dimensions: 1536,
|
||||||
|
};
|
||||||
|
|
||||||
|
createLocalBundleIngestRuntime({
|
||||||
|
project,
|
||||||
|
adapters: [new FakeSourceAdapter()],
|
||||||
|
agentRunner: testAgentRunner(),
|
||||||
|
logger: logger as never,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(logger.warn).toHaveBeenCalledWith(
|
||||||
|
'[local-bundle-runtime] embeddings backend "openai" is configured but no embedding provider was passed; embedding-dependent stages will run against a no-op embedding port.',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
it('builds runner deps with local SQLite stores and context tools enabled', async () => {
|
it('builds runner deps with local SQLite stores and context tools enabled', async () => {
|
||||||
const agentRunner = testAgentRunner();
|
const agentRunner = testAgentRunner();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -671,6 +671,15 @@ export function createLocalBundleIngestRuntime(
|
||||||
const store = new SqliteBundleIngestStore({ dbPath });
|
const store = new SqliteBundleIngestStore({ dbPath });
|
||||||
const contextStore = new SqliteContextEvidenceStore({ dbPath });
|
const contextStore = new SqliteContextEvidenceStore({ dbPath });
|
||||||
const embeddingProvider = options.embeddingProvider ?? null;
|
const embeddingProvider = options.embeddingProvider ?? null;
|
||||||
|
if (!embeddingProvider && options.project.config.ingest.embeddings.backend !== 'none') {
|
||||||
|
// Embedding-dependent stages (CandidateDedup clustering, ContextEvidenceIndex
|
||||||
|
// chunk indexing) silently produce zero-vector data with NoopEmbeddingPort.
|
||||||
|
// Surface that fact so the caller knows ingest will not be running its
|
||||||
|
// configured backend.
|
||||||
|
logger.warn(
|
||||||
|
`[local-bundle-runtime] embeddings backend "${options.project.config.ingest.embeddings.backend}" is configured but no embedding provider was passed; embedding-dependent stages will run against a no-op embedding port.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
const embedding = embeddingProvider ? new KtxIngestEmbeddingPortAdapter(embeddingProvider) : new NoopEmbeddingPort();
|
const embedding = embeddingProvider ? new KtxIngestEmbeddingPortAdapter(embeddingProvider) : new NoopEmbeddingPort();
|
||||||
const connections = new LocalConnectionCatalog(options.project, options.queryExecutor);
|
const connections = new LocalConnectionCatalog(options.project, options.queryExecutor);
|
||||||
const rootFileStore = options.project.fileStore;
|
const rootFileStore = options.project.fileStore;
|
||||||
|
|
|
||||||
|
|
@ -88,6 +88,25 @@ describe('createLocalProjectMemoryIngest', () => {
|
||||||
await rm(tempDir, { recursive: true, force: true });
|
await rm(tempDir, { recursive: true, force: true });
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('warns when embeddings are configured but memory ingest is created without an embedding provider', async () => {
|
||||||
|
const project = await initKtxProject({ projectDir: tempDir });
|
||||||
|
project.config.ingest.embeddings = {
|
||||||
|
backend: 'openai',
|
||||||
|
model: 'text-embedding-3-small',
|
||||||
|
dimensions: 1536,
|
||||||
|
};
|
||||||
|
const logger = { log: vi.fn(), warn: vi.fn(), error: vi.fn() };
|
||||||
|
|
||||||
|
createLocalProjectMemoryIngest(project, {
|
||||||
|
agentRunner: { runLoop: vi.fn() } as never,
|
||||||
|
logger: logger as never,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(logger.warn).toHaveBeenCalledWith(
|
||||||
|
'[memory-ingest] embeddings backend "openai" is configured but no embedding provider was passed; semantic search will fall back to a no-op embedding port.',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
it('captures a wiki page through the local memory agent and persists pollable status', async () => {
|
it('captures a wiki page through the local memory agent and persists pollable status', async () => {
|
||||||
const project = await initKtxProject({ projectDir: tempDir });
|
const project = await initKtxProject({ projectDir: tempDir });
|
||||||
const agentRunner = {
|
const agentRunner = {
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,10 @@ import { localConnectionInfoFromConfig } from '../connections/index.js';
|
||||||
import type { KtxEmbeddingPort, KtxFileStorePort, KtxFileWriteResult } from '../core/index.js';
|
import type { KtxEmbeddingPort, KtxFileStorePort, KtxFileWriteResult } from '../core/index.js';
|
||||||
import { type KtxLogger, noopLogger, SessionWorktreeService } from '../core/index.js';
|
import { type KtxLogger, noopLogger, SessionWorktreeService } from '../core/index.js';
|
||||||
import type { KtxSemanticLayerComputePort } from '../daemon/index.js';
|
import type { KtxSemanticLayerComputePort } from '../daemon/index.js';
|
||||||
|
import type { KtxEmbeddingProvider } from '@ktx/llm';
|
||||||
import {
|
import {
|
||||||
createLocalKtxLlmRuntimeFromConfig,
|
createLocalKtxLlmRuntimeFromConfig,
|
||||||
|
KtxIngestEmbeddingPortAdapter,
|
||||||
RuntimeAgentRunner,
|
RuntimeAgentRunner,
|
||||||
type AgentRunnerPort,
|
type AgentRunnerPort,
|
||||||
type KtxLlmRuntimePort,
|
type KtxLlmRuntimePort,
|
||||||
|
|
@ -74,6 +76,7 @@ export interface CreateLocalProjectMemoryIngestOptions {
|
||||||
queryExecutor?: { execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise<KtxQueryResult> };
|
queryExecutor?: { execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise<KtxQueryResult> };
|
||||||
runIdFactory?: () => string;
|
runIdFactory?: () => string;
|
||||||
logger?: KtxLogger;
|
logger?: KtxLogger;
|
||||||
|
embeddingProvider?: KtxEmbeddingProvider | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function createLocalProjectMemoryIngest(
|
export function createLocalProjectMemoryIngest(
|
||||||
|
|
@ -82,7 +85,18 @@ export function createLocalProjectMemoryIngest(
|
||||||
): MemoryIngestService {
|
): MemoryIngestService {
|
||||||
const logger = options.logger ?? noopLogger;
|
const logger = options.logger ?? noopLogger;
|
||||||
const rootFileStore = new LocalMemoryFileStore(project.fileStore);
|
const rootFileStore = new LocalMemoryFileStore(project.fileStore);
|
||||||
const embedding = new NoopEmbeddingPort();
|
const embedding = options.embeddingProvider
|
||||||
|
? new KtxIngestEmbeddingPortAdapter(options.embeddingProvider)
|
||||||
|
: new NoopEmbeddingPort();
|
||||||
|
if (!options.embeddingProvider && project.config.ingest.embeddings.backend !== 'none') {
|
||||||
|
// Memory-agent search (SlSearch, wiki) embeds against this port. With Noop the
|
||||||
|
// configured backend is silently inert — the agent will see empty vectors and
|
||||||
|
// rank results against zeros. Surface that so the caller knows to plumb the
|
||||||
|
// resolved embedding provider through.
|
||||||
|
logger.warn(
|
||||||
|
`[memory-ingest] embeddings backend "${project.config.ingest.embeddings.backend}" is configured but no embedding provider was passed; semantic search will fall back to a no-op embedding port.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
const knowledgeIndex = new LocalKnowledgeIndex(project);
|
const knowledgeIndex = new LocalKnowledgeIndex(project);
|
||||||
const knowledgeEvents = new NoopKnowledgeEventPort();
|
const knowledgeEvents = new NoopKnowledgeEventPort();
|
||||||
const knowledgeSlRefs = new NoopKnowledgeSlRefsPort();
|
const knowledgeSlRefs = new NoopKnowledgeSlRefsPort();
|
||||||
|
|
|
||||||
|
|
@ -176,6 +176,28 @@ llm:
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('requires a non-empty Vertex location when the Vertex provider block is present', () => {
|
||||||
|
const yaml = `
|
||||||
|
llm:
|
||||||
|
provider:
|
||||||
|
backend: vertex
|
||||||
|
vertex:
|
||||||
|
project: local-gcp-project
|
||||||
|
`;
|
||||||
|
|
||||||
|
expect(() => parseKtxProjectConfig(yaml)).toThrow(/llm\.provider\.vertex\.location/);
|
||||||
|
|
||||||
|
const validation = validateKtxProjectConfig(yaml);
|
||||||
|
expect(validation.ok).toBe(false);
|
||||||
|
expect(validation.issues).toEqual(
|
||||||
|
expect.arrayContaining([
|
||||||
|
expect.objectContaining({
|
||||||
|
path: 'llm.provider.vertex.location',
|
||||||
|
}),
|
||||||
|
]),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
it('parses Claude Code as a first-class LLM backend', () => {
|
it('parses Claude Code as a first-class LLM backend', () => {
|
||||||
const config = parseKtxProjectConfig(`
|
const config = parseKtxProjectConfig(`
|
||||||
llm:
|
llm:
|
||||||
|
|
|
||||||
|
|
@ -30,13 +30,13 @@ const apiCredentialsSchema = z
|
||||||
const vertexProviderSchema = z
|
const vertexProviderSchema = z
|
||||||
.strictObject({
|
.strictObject({
|
||||||
project: z.string().min(1).optional().describe('Google Cloud project ID hosting the Vertex AI endpoint.'),
|
project: z.string().min(1).optional().describe('Google Cloud project ID hosting the Vertex AI endpoint.'),
|
||||||
location: z.string().default('').describe('Vertex AI region (e.g. "us-east5"). Empty string falls back to the SDK default.'),
|
location: z.string().min(1).describe('Vertex AI region (e.g. "us-east5"). Required whenever the vertex provider block is present.'),
|
||||||
})
|
})
|
||||||
.describe('Google Vertex AI provider configuration.');
|
.describe('Google Vertex AI provider configuration.');
|
||||||
|
|
||||||
const sentenceTransformersSchema = z
|
const sentenceTransformersSchema = z
|
||||||
.strictObject({
|
.strictObject({
|
||||||
base_url: z.string().default('').describe('Base URL of the sentence-transformers HTTP server. Leave empty (or omit) to use the project-managed local daemon.'),
|
base_url: z.string().default('').describe('Base URL of the sentence-transformers HTTP server. Leave empty (or omit) when the `ktx` CLI is expected to start and manage a local daemon for this project; programmatic consumers must populate it explicitly.'),
|
||||||
pathPrefix: z.string().optional().describe('Optional URL path prefix prepended to embedding requests.'),
|
pathPrefix: z.string().optional().describe('Optional URL path prefix prepended to embedding requests.'),
|
||||||
})
|
})
|
||||||
.describe('Sentence-transformers embedding server configuration.');
|
.describe('Sentence-transformers embedding server configuration.');
|
||||||
|
|
@ -83,7 +83,15 @@ const embeddingSchema = z
|
||||||
.default('none')
|
.default('none')
|
||||||
.describe('Embedding backend. "openai" and "sentence-transformers" call out to those providers; "none" disables embeddings.'),
|
.describe('Embedding backend. "openai" and "sentence-transformers" call out to those providers; "none" disables embeddings.'),
|
||||||
model: z.string().min(1).optional().describe('Provider-specific embedding model identifier (e.g. "text-embedding-3-small").'),
|
model: z.string().min(1).optional().describe('Provider-specific embedding model identifier (e.g. "text-embedding-3-small").'),
|
||||||
dimensions: z.int().positive().default(8).describe('Embedding vector dimensionality. Must match the chosen model when using a real provider.'),
|
dimensions: z
|
||||||
|
.int()
|
||||||
|
.positive()
|
||||||
|
.default(8)
|
||||||
|
.describe(
|
||||||
|
'Embedding vector dimensionality. The default value 8 is a placeholder that is only valid alongside backend: none; ' +
|
||||||
|
'before switching backend to openai/sentence-transformers, set this explicitly to match the chosen model ' +
|
||||||
|
'(e.g. 384 for all-MiniLM-L6-v2, 1536 for text-embedding-3-small).',
|
||||||
|
),
|
||||||
openai: apiCredentialsSchema.optional().describe('OpenAI credentials, used when backend is "openai".'),
|
openai: apiCredentialsSchema.optional().describe('OpenAI credentials, used when backend is "openai".'),
|
||||||
sentenceTransformers: sentenceTransformersSchema.optional().describe('Sentence-transformers server config, used when backend is "sentence-transformers".'),
|
sentenceTransformers: sentenceTransformersSchema.optional().describe('Sentence-transformers server config, used when backend is "sentence-transformers".'),
|
||||||
batchSize: z.int().positive().optional().describe('Number of texts per embedding API call. Omit to use the backend default.'),
|
batchSize: z.int().positive().optional().describe('Number of texts per embedding API call. Omit to use the backend default.'),
|
||||||
|
|
|
||||||
|
|
@ -67,6 +67,23 @@ describe('listConnectionIdsWithNames', () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('loadSource', () => {
|
||||||
|
it('warns and returns null when an existing source file has invalid YAML', async () => {
|
||||||
|
const logger = { log: vi.fn(), warn: vi.fn(), error: vi.fn() };
|
||||||
|
const configService = {
|
||||||
|
readFile: vi.fn().mockResolvedValue({ content: 'name: [' }),
|
||||||
|
};
|
||||||
|
const service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort, logger as never);
|
||||||
|
|
||||||
|
await expect(service.loadSource('warehouse', 'orders')).resolves.toBeNull();
|
||||||
|
|
||||||
|
expect(configService.readFile).toHaveBeenCalledWith('semantic-layer/warehouse/orders.yaml');
|
||||||
|
expect(logger.warn).toHaveBeenCalledWith(
|
||||||
|
expect.stringContaining('[loadSource] warehouse/orders.yaml: YAML parse failed:'),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('composeOverlay', () => {
|
describe('composeOverlay', () => {
|
||||||
it('carries top-level segments from overlay into the composed source', () => {
|
it('carries top-level segments from overlay into the composed source', () => {
|
||||||
const overlay = {
|
const overlay = {
|
||||||
|
|
@ -856,6 +873,22 @@ describe('loadAllSources — standalone enrichment via inherits_columns_from', (
|
||||||
expect(loadErrors.join('\n')).toContain(overlayPath);
|
expect(loadErrors.join('\n')).toContain(overlayPath);
|
||||||
expect(loadErrors.join('\n')).toContain("move it to 'column_overrides:'");
|
expect(loadErrors.join('\n')).toContain("move it to 'column_overrides:'");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('reports and logs directory listing failures instead of treating them as empty sources', async () => {
|
||||||
|
const logger = { log: vi.fn(), warn: vi.fn(), error: vi.fn() };
|
||||||
|
configService.listFiles.mockRejectedValue(new Error('permission denied'));
|
||||||
|
service = new SemanticLayerService(configService as never, connectionCatalog(), pythonPort, logger as never);
|
||||||
|
|
||||||
|
const { sources, loadErrors } = await service.loadAllSources('conn-1');
|
||||||
|
|
||||||
|
expect(sources).toEqual([]);
|
||||||
|
expect(loadErrors).toEqual([
|
||||||
|
'Failed to list semantic-layer files under semantic-layer/conn-1: permission denied',
|
||||||
|
]);
|
||||||
|
expect(logger.warn).toHaveBeenCalledWith(
|
||||||
|
'Failed to list semantic-layer files under semantic-layer/conn-1: permission denied',
|
||||||
|
);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('validateWithProposedSource', () => {
|
describe('validateWithProposedSource', () => {
|
||||||
|
|
|
||||||
|
|
@ -198,12 +198,25 @@ export class SemanticLayerService {
|
||||||
}
|
}
|
||||||
|
|
||||||
async loadSource(connectionId: string, sourceName: string): Promise<SemanticLayerSource | null> {
|
async loadSource(connectionId: string, sourceName: string): Promise<SemanticLayerSource | null> {
|
||||||
|
let content: string;
|
||||||
try {
|
try {
|
||||||
const { content } = await this.readSourceFile(connectionId, sourceName);
|
const result = await this.readSourceFile(connectionId, sourceName);
|
||||||
return YAML.parse(content) as SemanticLayerSource;
|
content = result.content;
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
try {
|
||||||
|
return YAML.parse(content) as SemanticLayerSource;
|
||||||
|
} catch (error) {
|
||||||
|
// Distinguish a YAML parse failure from a missing file. The file exists but
|
||||||
|
// its contents are unparseable — callers that treat null as "does not exist"
|
||||||
|
// could otherwise overwrite the broken file. Surface the parse failure via
|
||||||
|
// the service logger so the broken source is at least visible.
|
||||||
|
this.logger.warn(
|
||||||
|
`[loadSource] ${connectionId}/${sourceName}.yaml: YAML parse failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async loadAllSources(connectionId: string): Promise<LoadAllSourcesResult> {
|
async loadAllSources(connectionId: string): Promise<LoadAllSourcesResult> {
|
||||||
|
|
@ -215,7 +228,10 @@ export class SemanticLayerService {
|
||||||
try {
|
try {
|
||||||
const result = await this.configService.listFiles(dir);
|
const result = await this.configService.listFiles(dir);
|
||||||
allFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
allFiles = result.files.filter((f) => f.endsWith('.yaml'));
|
||||||
} catch {
|
} catch (e) {
|
||||||
|
const message = `Failed to list semantic-layer files under ${dir}: ${e instanceof Error ? e.message : String(e)}`;
|
||||||
|
loadErrors.push(message);
|
||||||
|
this.logger.warn(message);
|
||||||
return { sources: [], loadErrors };
|
return { sources: [], loadErrors };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,27 @@ function makeService() {
|
||||||
|
|
||||||
const fm: WikiFrontmatter = { summary: 'sum', usage_mode: 'auto' };
|
const fm: WikiFrontmatter = { summary: 'sum', usage_mode: 'auto' };
|
||||||
|
|
||||||
|
describe('KnowledgeWikiService file reads', () => {
|
||||||
|
it('warns and returns null when an existing page cannot be parsed', async () => {
|
||||||
|
const { service, configService, logger } = makeService();
|
||||||
|
configService.readFile.mockResolvedValue({ content: '---\nsummary: [\n---\nBody' });
|
||||||
|
|
||||||
|
await expect(service.readPage('GLOBAL', null, 'revenue')).resolves.toBeNull();
|
||||||
|
|
||||||
|
expect(configService.readFile).toHaveBeenCalledWith('wiki/global/revenue.md');
|
||||||
|
expect(logger.warn).toHaveBeenCalledWith(expect.stringContaining('[readPage] wiki/global/revenue.md: parse failed:'));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('warns and returns an empty page list when directory listing fails', async () => {
|
||||||
|
const { service, configService, logger } = makeService();
|
||||||
|
configService.listFiles.mockRejectedValue(new Error('filesystem unavailable'));
|
||||||
|
|
||||||
|
await expect(service.listPageKeys('GLOBAL', null)).resolves.toEqual([]);
|
||||||
|
|
||||||
|
expect(logger.warn).toHaveBeenCalledWith('[listPageKeys] wiki/global: filesystem unavailable');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('KnowledgeWikiService.syncIndex result stats', () => {
|
describe('KnowledgeWikiService.syncIndex result stats', () => {
|
||||||
it('reports scanned, updated, deleted, and embedding counts', async () => {
|
it('reports scanned, updated, deleted, and embedding counts', async () => {
|
||||||
const { service, pagesRepository, embeddingService, configService } = makeService();
|
const { service, pagesRepository, embeddingService, configService } = makeService();
|
||||||
|
|
|
||||||
|
|
@ -98,13 +98,25 @@ export class KnowledgeWikiService {
|
||||||
|
|
||||||
async readPage(scope: string, scopeId: string | null | undefined, pageKey: string): Promise<WikiPage | null> {
|
async readPage(scope: string, scopeId: string | null | undefined, pageKey: string): Promise<WikiPage | null> {
|
||||||
const path = this.pagePath(scope, scopeId, pageKey);
|
const path = this.pagePath(scope, scopeId, pageKey);
|
||||||
|
let raw: string;
|
||||||
try {
|
try {
|
||||||
const result = await this.configService.readFile(path);
|
const result = await this.configService.readFile(path);
|
||||||
const { frontmatter, content } = this.parsePage(result.content);
|
raw = result.content;
|
||||||
return { pageKey, frontmatter, content };
|
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
try {
|
||||||
|
const { frontmatter, content } = this.parsePage(raw);
|
||||||
|
return { pageKey, frontmatter, content };
|
||||||
|
} catch (error) {
|
||||||
|
// The file exists but parsing failed. Returning null without surfacing the
|
||||||
|
// parse error would let callers (and the memory agent) treat it as "page
|
||||||
|
// doesn't exist" and clobber the broken page on the next write.
|
||||||
|
this.logger.warn(
|
||||||
|
`[readPage] ${path}: parse failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async deletePage(
|
async deletePage(
|
||||||
|
|
@ -133,19 +145,23 @@ export class KnowledgeWikiService {
|
||||||
|
|
||||||
async listPageKeys(scope: string, scopeId?: string | null): Promise<string[]> {
|
async listPageKeys(scope: string, scopeId?: string | null): Promise<string[]> {
|
||||||
const dir = this.scopeDir(scope, scopeId);
|
const dir = this.scopeDir(scope, scopeId);
|
||||||
|
let files: string[];
|
||||||
try {
|
try {
|
||||||
const result = await this.configService.listFiles(dir);
|
const result = await this.configService.listFiles(dir);
|
||||||
return result.files
|
files = result.files;
|
||||||
.filter((f) => f.endsWith('.md'))
|
} catch (error) {
|
||||||
.map((f) => {
|
// listFiles returns [] for missing directories; reaching this catch means
|
||||||
// Strip the directory prefix and .md extension
|
// an IO-level failure that should at least be surfaced before we report
|
||||||
const name = f.replace(`${dir}/`, '').replace(/\.md$/, '');
|
// "no pages" the same as a freshly-initialised store would.
|
||||||
return name;
|
this.logger.warn(
|
||||||
})
|
`[listPageKeys] ${dir}: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
.filter(isFlatWikiKey);
|
);
|
||||||
} catch {
|
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
return files
|
||||||
|
.filter((f) => f.endsWith('.md'))
|
||||||
|
.map((f) => f.replace(`${dir}/`, '').replace(/\.md$/, ''))
|
||||||
|
.filter(isFlatWikiKey);
|
||||||
}
|
}
|
||||||
|
|
||||||
async getPageHistory(scope: string, scopeId: string | null | undefined, pageKey: string) {
|
async getPageHistory(scope: string, scopeId: string | null | undefined, pageKey: string) {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue