ktx/packages/context/src/memory/memory-agent.service.ingest.test.ts

400 lines
14 KiB
TypeScript
Raw Normal View History

2026-05-10 23:12:26 +02:00
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
// Module-level mock for 'ai' so generateText is a stub. This file is separate from
// memory-agent.service.spec.ts so the existing pure-helper tests don't load the mock.
vi.mock('ai', () => ({
generateText: vi.fn().mockResolvedValue({ text: '', toolCalls: [] }),
stepCountIs: (n: number) => n,
tool: (def: unknown) => def,
}));
// Imported AFTER vi.mock so the mocked module is used.
import { generateText } from 'ai';
import { SYSTEM_GIT_AUTHOR } from '../tools/index.js';
import { MemoryAgentService } from './memory-agent.service.js';
interface BuiltMocks {
appSettings: any;
llmProvider: any;
prompt: any;
2026-05-10 20:44:07 -07:00
eventTracker: any;
2026-05-10 23:12:26 +02:00
telemetry: any;
skillsRegistry: any;
wikiService: any;
indexRepository: any;
knowledgeSlRefsRepository: any;
knowledgeRepository: any;
embeddingService: any;
semanticLayerService: any;
slSearchService: any;
dataSourcesService: any;
configService: any;
gitService: any;
lockingService: any;
slSourcesRepository: any;
sessionWorktreeService: any;
semanticLayerSourceReconciler: any;
agentRunner: any;
slValidator: any;
toolsetFactory: any;
logger: any;
2026-05-10 23:12:26 +02:00
}
const buildMocks = (overrides: Partial<BuiltMocks> = {}): BuiltMocks => {
const scopedConfig = { writeFile: vi.fn(), deleteFile: vi.fn() };
const scopedGit = { revParseHead: vi.fn().mockResolvedValue('basesha') };
const sessionWorktree = {
chatId: 'chat-1',
workdir: '/tmp/wt/session-chat-1',
branch: 'session/chat-1',
baseSha: 'basesha',
createdAt: new Date(),
git: scopedGit,
config: scopedConfig,
};
const defaults: BuiltMocks = {
appSettings: {
settings: {
ai: {
knowledge: { userScopedKnowledgeEnabled: false },
slValidation: { probeRowCount: 1 },
},
llm: { memoryIngestionModel: 'test-model' },
},
},
llmProvider: { getModel: vi.fn().mockReturnValue({}) },
prompt: { loadPrompt: vi.fn().mockResolvedValue('base framing') },
2026-05-10 20:44:07 -07:00
eventTracker: { trackEvent: vi.fn(), createTelemetryIntegration: vi.fn().mockReturnValue(undefined) },
2026-05-10 23:12:26 +02:00
telemetry: {
isEnabled: () => false,
appSettingsService: { settings: { telemetry: { recordInputs: false, recordOutputs: false } } },
systemConfigService: { config: { instance: { name: 'test-instance' } } },
},
skillsRegistry: {
listSkills: vi.fn().mockResolvedValue([]),
buildSkillsPrompt: vi.fn().mockReturnValue(''),
getSkill: vi.fn(),
stripFrontmatter: vi.fn(),
},
wikiService: {
forWorktree: vi.fn().mockReturnThis(),
readPage: vi.fn(),
syncSinglePage: vi.fn(),
deleteFromIndex: vi.fn(),
},
indexRepository: { listPagesForUser: vi.fn().mockResolvedValue([]) },
knowledgeSlRefsRepository: { syncFromWiki: vi.fn().mockResolvedValue({ inserted: 0, deleted: 0 }) },
knowledgeRepository: {},
embeddingService: { computeEmbedding: vi.fn() },
semanticLayerService: {
forWorktree: vi.fn().mockReturnThis(),
fix(context): merge overlay columns onto manifest columns by name (#94) * fix(context): merge overlay columns onto manifest columns by name composeOverlay was appending overlay columns to the manifest column list, producing duplicate entries when dbt/metabase overlays declared a column just to attach descriptions. The duplicates carried no `type`, so the pydantic SourceDefinition rejected them at semantic-query time and broke `ktx sl query` for every overlay-backed measure. Now overlay columns match base columns by name (case-insensitive): same-name entries merge onto the manifest (overlay fields win, type/role fall back to the base, descriptions merge per source key) and only new names append. * refactor(sl): split overlay columns from column_overrides and enforce TS/Python wire contract Overlay sources now have two distinct collections: `columns:` for computed columns (requiring `expr` + `type`) and `column_overrides:` for metadata patches to inherited manifest columns. Composing or loading an overlay that mixes the two — or references an unknown column — fails with a typed error. Introduce `ResolvedSemanticLayerSource` / `resolvedSourceSchema` / `toResolvedWire` as the strict shape sent to the Python engine, and add a schema contract test that diffs Zod against the Pydantic JSON schema dumped by `python -m semantic_layer dump-schema`. `SourceDefinition` is now `extra="forbid"` on the Python side. `loadAllSources` surfaces per-file load errors instead of swallowing them, so validation/query paths can report manifest shard parse failures. * fix(context): make scan description generation resilient and quiet A transient sampleTable failure during ingest used to take out every table in a connection: generateTableDescription returned a hardcoded 'Table not found' string into descriptions.ai, and KtxDescriptionGenerator was constructed without a logger, so the failure left no trail anywhere. - sampleTable / sampleColumn calls retry 3x with 200/400/800ms backoff, honouring KtxScanContext.signal via a new KtxAbortedError. - On retry exhaustion or missing capability, table generation falls back to a metadata-only prompt built from column name / native type / comment / rawDescriptions. The column path follows the same rule -- call the LLM when any of samples or rawDescriptions are available; skip only when both are absent. - Logger is now threaded from KtxScanContext into the generator. Failures emit structured KtxScanWarning entries (new description_fallback_used code, plus existing sampling_failed / enrichment_failed / connector_capability_missing). ktx scan groups warnings by code so a batch of identical failures collapses to one summary line plus sample. - Returns null on failure instead of the 'Table not found' sentinel; the manifest writer's existing guard already skips empty descriptions, so schema YAML no longer carries misleading text. SCAN_MANAGED_DESCRIPTION_KEYS already strips stale 'ai' on merge, so existing YAML clears on next run. Also suppress AI SDK v6 'system in messages' warning: pull system messages out of KtxMessageBuilder.wrapSimple's output via a new splitKtxSystemMessages helper and pass them top-level to generateText (preserves cacheControl providerOptions on the SystemModelMessage). Agent-runner's local splitSystemPromptMessages dedupes onto the shared helper. * test(docs): align examples-docs assertions with revamped docs PR #103 (setup/guide doc revamp) reworded several CLI examples and connection labels; the assertions in scripts/examples-docs.test.mjs still referenced the pre-revamp wording and were failing in CI on main. Update the regexes to match the post-revamp content: - drop the `--json` flag from the sl-query example expectation - move the `Driver:` / `Status: ok` probe to the connection reference, which is where that output now lives (driver id is lowercase `postgres`, not the display name `PostgreSQL`) - drop the obsolete `Install \`uv\`...` troubleshooting line - accept `<connectionId>` everywhere; the docs no longer use the hyphenated `<connection-id>` form - match the `warehouse` connection id used in the quickstart instead of the `postgres-warehouse` id only used in the README and setup ref * fix(sl): skip TS/Python schema contract test when uv is unavailable The TypeScript checks CI job does not install uv or Python, so the module-level `execFileSync('uv', ...)` in schemas.contract.test.ts threw ENOENT and failed the suite. Wrap the schema dump in a try/catch and guard the describe block with `describe.skipIf` so the test skips in environments without uv. Local dev and any CI job that has uv on PATH still runs the cross-language contract assertion.
2026-05-15 02:11:04 +02:00
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
2026-05-10 23:12:26 +02:00
readSourceFile: vi.fn(),
},
slSearchService: { indexSources: vi.fn(), buildSearchText: vi.fn() },
dataSourcesService: {
listEnabledConnections: vi.fn().mockResolvedValue([]),
getConnectionById: vi.fn().mockResolvedValue({
id: 'conn-1',
name: 'Warehouse',
connectionType: 'POSTGRESQL',
}),
executeQuery: vi.fn(),
},
configService: {
enqueueCommitMessageJobForExternalCommit: vi.fn().mockResolvedValue(undefined),
writeFile: vi.fn(),
deleteFile: vi.fn(),
},
gitService: {
revParseHead: vi.fn().mockResolvedValue('basesha'),
squashMergeIntoMain: vi.fn().mockResolvedValue({ ok: true, squashSha: 'cafebabe', touchedPaths: ['a.yaml'] }),
},
lockingService: {
withLock: vi.fn().mockImplementation((_key: string, fn: () => Promise<unknown>) => fn()),
},
slSourcesRepository: { deleteByConnectionAndName: vi.fn() },
sessionWorktreeService: {
create: vi.fn().mockResolvedValue(sessionWorktree),
cleanup: vi.fn().mockResolvedValue(undefined),
},
semanticLayerSourceReconciler: { upsertRow: vi.fn() },
agentRunner: { runLoop: vi.fn().mockResolvedValue({ stopReason: 'natural' }) },
slValidator: { validateSingleSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }) },
toolsetFactory: {
createIngestWuToolset: vi.fn().mockReturnValue({
toAiSdkTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
}),
createToolset: vi.fn().mockReturnValue({
toAiSdkTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
}),
},
logger: { log: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() },
2026-05-10 23:12:26 +02:00
};
return { ...defaults, ...overrides };
};
const buildService = (mocks: BuiltMocks): MemoryAgentService =>
new MemoryAgentService({
settings: {
knowledge: {
userScopedKnowledgeEnabled: mocks.appSettings.settings.ai.knowledge.userScopedKnowledgeEnabled,
},
slValidation: {
probeRowCount: mocks.appSettings.settings.ai.slValidation.probeRowCount,
},
llm: {
memoryIngestionModel: mocks.appSettings.settings.llm.memoryIngestionModel,
},
},
promptService: mocks.prompt,
skillsRegistry: mocks.skillsRegistry,
wikiService: mocks.wikiService,
knowledgeIndex: mocks.indexRepository,
knowledgeSlRefs: mocks.knowledgeSlRefsRepository,
semanticLayerService: mocks.semanticLayerService,
slSearchService: mocks.slSearchService,
connections: {
listEnabledConnections: vi.fn().mockResolvedValue([]),
getConnectionById:
mocks.dataSourcesService.getConnectionById ??
vi.fn().mockResolvedValue({
id: 'conn-1',
name: 'Warehouse',
connectionType: 'POSTGRESQL',
}),
executeQuery: mocks.dataSourcesService.executeQuery,
},
rootFileStore: mocks.configService,
gitService: mocks.gitService,
lockingService: mocks.lockingService,
slSourcesRepository: mocks.slSourcesRepository,
sessionWorktreeService: mocks.sessionWorktreeService,
semanticLayerSourceReconciler: mocks.semanticLayerSourceReconciler,
agentRunner: mocks.agentRunner,
slValidator: mocks.slValidator,
toolsetFactory: mocks.toolsetFactory,
telemetry: {
2026-05-10 20:44:07 -07:00
trackMemoryIngestion: mocks.eventTracker.trackEvent,
2026-05-10 23:12:26 +02:00
},
logger: mocks.logger,
2026-05-10 23:12:26 +02:00
});
const baseInput = {
userId: 'u1',
chatId: 'chat-1',
// Long enough + with a definition keyword so the prefilter doesn't skip.
userMessage: 'going forward exclude cancelled orders from revenue, this is the canonical definition',
};
const generateTextMock = vi.mocked(generateText);
beforeEach(() => {
generateTextMock.mockReset();
generateTextMock.mockResolvedValue({ text: '', toolCalls: [] } as never);
});
afterEach(() => {
vi.restoreAllMocks();
});
describe('MemoryAgentService.ingest — session-branch orchestration', () => {
it('happy path: creates worktree, runs LLM loop, squash-merges, enqueues note, cleans up', async () => {
const mocks = buildMocks();
const svc = buildService(mocks);
const result = await svc.ingest(baseInput);
// Phase 1: session worktree was created from main's HEAD.
expect(mocks.sessionWorktreeService.create).toHaveBeenCalledWith('chat-1', 'basesha');
// Phase 2: LLM loop ran with the assembled tools/system/prompt.
expect(mocks.agentRunner.runLoop).toHaveBeenCalledOnce();
// Phase 3: squash-merged onto main.
expect(mocks.gitService.squashMergeIntoMain).toHaveBeenCalledWith(
'session/chat-1',
SYSTEM_GIT_AUTHOR.name,
SYSTEM_GIT_AUTHOR.email,
expect.stringContaining('[chat=chat-1]'),
);
// Note enqueue happened on the ROOT configService, not the scoped one. The single
// touched path is passed as the diff scope.
expect(mocks.configService.enqueueCommitMessageJobForExternalCommit).toHaveBeenCalledWith(
{ commitHash: 'cafebabe' },
expect.stringContaining('[chat=chat-1]'),
'a.yaml',
);
// Cleanup ran with success.
expect(mocks.sessionWorktreeService.cleanup).toHaveBeenCalledWith(
expect.objectContaining({ chatId: 'chat-1' }),
'success',
expect.any(Object),
);
expect(result.commitHash).toBe('cafebabe');
});
it('logs prompt debug output when KTX_MEMORY_AGENT_DEBUG_PROMPTS is enabled', async () => {
const previousDebugPrompts = process.env.KTX_MEMORY_AGENT_DEBUG_PROMPTS;
const mocks = buildMocks();
const svc = buildService(mocks);
try {
process.env.KTX_MEMORY_AGENT_DEBUG_PROMPTS = '1';
await svc.ingest(baseInput);
expect(mocks.logger.debug).toHaveBeenCalledWith(expect.stringContaining('[memory-agent prompt-debug] system='));
expect(mocks.logger.debug).toHaveBeenCalledWith(expect.stringContaining('[memory-agent prompt-debug] user='));
} finally {
if (previousDebugPrompts === undefined) {
delete process.env.KTX_MEMORY_AGENT_DEBUG_PROMPTS;
} else {
process.env.KTX_MEMORY_AGENT_DEBUG_PROMPTS = previousDebugPrompts;
}
}
});
2026-05-10 23:12:26 +02:00
it('empty path: squash returns no touched paths → no enqueue, cleanup(empty), commitHash=null', async () => {
const mocks = buildMocks();
mocks.gitService.squashMergeIntoMain.mockResolvedValue({
ok: true,
squashSha: 'basesha',
touchedPaths: [],
});
const svc = buildService(mocks);
const result = await svc.ingest(baseInput);
expect(mocks.configService.enqueueCommitMessageJobForExternalCommit).not.toHaveBeenCalled();
expect(mocks.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'empty', expect.any(Object));
expect(result.commitHash).toBeNull();
});
it('conflict path: rolls back DB, cleanup(conflict, conflictPaths), returns commitHash=null with empty actions', async () => {
const mocks = buildMocks();
mocks.gitService.squashMergeIntoMain.mockResolvedValue({
ok: false,
conflict: true,
conflictPaths: ['semantic-layer/conn-x/fct_intakes.yaml'],
});
// Have the wikiService report a still-existing page in main, so rollback re-syncs.
mocks.wikiService.readPage.mockResolvedValue({
pageKey: 'phantom',
frontmatter: { summary: 'x', usage_mode: 'auto' },
content: 'body',
});
const svc = buildService(mocks);
const result = await svc.ingest(baseInput);
expect(mocks.gitService.squashMergeIntoMain).toHaveBeenCalled();
// Cleanup got the conflict outcome + the paths.
expect(mocks.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'conflict', {
conflictPaths: ['semantic-layer/conn-x/fct_intakes.yaml'],
});
expect(mocks.configService.enqueueCommitMessageJobForExternalCommit).not.toHaveBeenCalled();
expect(result.commitHash).toBeNull();
expect(result.actions).toEqual([]);
});
it('crash path: post-loop step throws → cleanup(crash), commitHash=null', async () => {
const mocks = buildMocks();
// Force the cross-ref reconciler to throw, escaping into the outer try/catch and
// landing in the crash branch.
mocks.knowledgeSlRefsRepository.syncFromWiki.mockRejectedValue(new Error('db down'));
// squashMergeIntoMain shouldn't even be reached.
mocks.gitService.squashMergeIntoMain.mockRejectedValue(new Error('should not be called after crash'));
// Need a wiki action to trigger the cross-ref code path. Easiest: have the LLM mock
// not push actions, so syncFromWiki is never called and crash won't happen here.
// Instead, force the squash to throw.
mocks.knowledgeSlRefsRepository.syncFromWiki.mockResolvedValue({ inserted: 0, deleted: 0 });
mocks.gitService.squashMergeIntoMain.mockRejectedValue(new Error('git crashed'));
const svc = buildService(mocks);
const result = await svc.ingest(baseInput);
expect(mocks.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'crash', expect.any(Object));
expect(result.commitHash).toBeNull();
});
});
describe('MemoryAgentService.ingest — concurrency regression', () => {
it('two parallel ingest() calls produce distinct squash commits (no absorption)', async () => {
// FIFO lock: each acquisition chains onto the previous holder's release. This is the
// same shape as production withLock — the test asserts that two parallel ingests
// sequence both their phase-1 (worktree create) and phase-3 (squash merge) calls
// without deadlocking, and produce distinct commits.
let chain: Promise<void> = Promise.resolve();
const lockingService = {
withLock: vi.fn().mockImplementation(async (_key: string, fn: () => Promise<unknown>) => {
const previous = chain;
let releaseMe!: () => void;
chain = new Promise<void>((resolve) => {
releaseMe = resolve;
});
await previous;
try {
return await fn();
} finally {
releaseMe();
}
}),
};
let createCount = 0;
const sessionWorktreeService = {
create: vi.fn().mockImplementation((chatId: string) => {
createCount += 1;
return Promise.resolve({
chatId,
workdir: `/tmp/wt/session-${chatId}`,
branch: `session/${chatId}`,
baseSha: 'basesha',
createdAt: new Date(),
git: { revParseHead: vi.fn().mockResolvedValue('basesha') },
config: { writeFile: vi.fn() },
});
}),
cleanup: vi.fn().mockResolvedValue(undefined),
};
let mergeCount = 0;
const gitService = {
revParseHead: vi.fn().mockResolvedValue('basesha'),
squashMergeIntoMain: vi.fn().mockImplementation(() => {
mergeCount += 1;
return Promise.resolve({
ok: true,
squashSha: `sha-${mergeCount}`,
touchedPaths: [`${mergeCount}.yaml`],
});
}),
};
const mocksA = buildMocks({ lockingService, sessionWorktreeService, gitService });
const mocksB = buildMocks({ lockingService, sessionWorktreeService, gitService });
const svcA = buildService(mocksA);
const svcB = buildService(mocksB);
const [a, b] = await Promise.all([
svcA.ingest({ ...baseInput, chatId: 'chat-A' }),
svcB.ingest({ ...baseInput, chatId: 'chat-B' }),
]);
expect(createCount).toBe(2);
expect(gitService.squashMergeIntoMain).toHaveBeenCalledTimes(2);
expect(a.commitHash).not.toBeNull();
expect(b.commitHash).not.toBeNull();
expect(a.commitHash).not.toBe(b.commitHash);
});
});