Initial open-source release

This commit is contained in:
Andrey Avtomonov 2026-05-10 23:12:26 +02:00
commit 1a42152e6f
1199 changed files with 257054 additions and 0 deletions

View file

@ -0,0 +1,128 @@
import type { CaptureSignals, MemoryAgentInput, MemoryAgentSourceType } from './types.js';
const SQL_AGGREGATE_PATTERN = /\b(SUM|AVG|COUNT|MIN|MAX|GROUP\s+BY|JOIN|WITH\s+\w+\s+AS\s*\()\b/i;
const SL_DEFINITION_PATTERN = /\b(define|going forward|always (apply|exclude)|treat as|cohort|reusable)\b/i;
const KNOWLEDGE_DEFINITION_PATTERN =
/\b(define|going forward|alias|stands for|means|convention|is the (canonical|definition))\b/i;
const TABLE_SEPARATOR_PATTERN = /\|\s*-{3,}\s*\|/;
const LOOKML_STRUCTURAL_PATTERN = /^\s*(view|explore|model|include)\s*:\s*[\w"`]/m;
const LOOKML_FIELDS_PATTERN =
/^\s*(measure|dimension|dimension_group|sql_table_name|derived_table|sql_always_where|drill_fields|join)\s*:/m;
export const DEFAULT_SKILL_NAMES = ['sl', 'sl_capture', 'knowledge_capture'] as const;
export function detectCaptureSignals(input: MemoryAgentInput): CaptureSignals {
const userMessage = input.userMessage?.trim() ?? '';
const assistantMessage = input.assistantMessage?.trim() ?? '';
const reasons: string[] = [];
let sl = false;
if (assistantMessage && SQL_AGGREGATE_PATTERN.test(assistantMessage) && userMessage.length >= 100) {
sl = true;
reasons.push('sql aggregate in assistant message');
}
if (userMessage && SL_DEFINITION_PATTERN.test(userMessage)) {
sl = true;
reasons.push('sl-style definition keyword in user message');
}
let knowledge = false;
if (userMessage && KNOWLEDGE_DEFINITION_PATTERN.test(userMessage)) {
knowledge = true;
reasons.push('definition keyword in user message');
}
if (assistantMessage && TABLE_SEPARATOR_PATTERN.test(assistantMessage)) {
knowledge = true;
reasons.push('definition table in assistant message');
}
let dialect: CaptureSignals['dialect'];
if (
assistantMessage &&
LOOKML_STRUCTURAL_PATTERN.test(assistantMessage) &&
LOOKML_FIELDS_PATTERN.test(assistantMessage)
) {
dialect = 'lookml';
sl = true;
reasons.push('lookml structure in assistant message');
}
return { knowledge, sl, dialect, reasons };
}
export function buildRequiredSkillsBlock(signals: CaptureSignals): string {
const required: Array<{ name: string; reason: string }> = [];
if (signals.knowledge) {
const reason =
signals.reasons.find((r) => r.includes('definition keyword') || r.includes('definition table')) ??
'wiki signal detected';
required.push({ name: 'knowledge_capture', reason });
}
if (signals.sl) {
const reason =
signals.reasons.find((r) => r.includes('sql aggregate') || r.includes('sl-style')) ?? 'sl signal detected';
required.push({ name: 'sl', reason });
}
if (signals.dialect === 'lookml') {
const reason = signals.reasons.find((r) => r.includes('lookml')) ?? 'lookml dialect detected';
required.push({ name: 'lookml_ingest', reason });
}
if (required.length === 0) {
return '';
}
const lines = required.map((r) => `- \`${r.name}\` - reason: ${r.reason}`).join('\n');
return [
'<required_skills>',
'The pre-scan flagged this turn as a likely capture candidate. Before exiting, you MUST `load_skill` for each skill below and follow its workflow. Skipping a required skill means a likely capture is being missed; only skip if, after reading the skill body and the turn, you are sure no capture applies.',
'',
lines,
'</required_skills>',
].join('\n');
}
export function prefilterSkipReason(input: MemoryAgentInput, signals = detectCaptureSignals(input)): string | null {
const trimmedUser = input.userMessage?.trim() ?? '';
const assistantMessage = input.assistantMessage ?? '';
const hasUserSignal = trimmedUser.length >= 6;
const hasAssistantSqlSignal = /\b(SUM|AVG|COUNT|MIN|MAX|GROUP\s+BY)\b/i.test(assistantMessage);
if (!hasUserSignal && !hasAssistantSqlSignal) {
return 'message too short, no SQL keywords';
}
if (signals.dialect === 'lookml') {
const hasStructural = /^\s*(derived_table|sql_always_where|join)\s*:/m.test(assistantMessage);
const hasNonCountAggregate = /\btype:\s*(sum|average|avg|min|max|count_distinct|median|percentile)\b/i.test(
assistantMessage,
);
if (!hasStructural && !hasNonCountAggregate) {
return 'no semantic signal (lookml-wrapper)';
}
}
return null;
}
export function isWorthAnalyzing(input: MemoryAgentInput): boolean {
return prefilterSkipReason(input, detectCaptureSignals(input)) === null;
}
export function stepBudgetFor(sourceType: MemoryAgentSourceType): number {
switch (sourceType) {
case 'research':
return 20;
case 'external_ingest':
return 30;
case 'backfill':
case 'sql-review-migration':
return 25;
}
}
export function promptNameFor(sourceType: MemoryAgentSourceType): string {
return sourceType === 'external_ingest'
? 'memory_agent_external_ingest'
: sourceType === 'backfill' || sourceType === 'sql-review-migration'
? 'memory_agent_backfill'
: 'memory_agent_research';
}

View file

@ -0,0 +1,41 @@
export {
buildRequiredSkillsBlock,
DEFAULT_SKILL_NAMES,
detectCaptureSignals,
isWorthAnalyzing,
prefilterSkipReason,
promptNameFor,
stepBudgetFor,
} from './capture-signals.js';
export { MemoryAgentService } from './memory-agent.service.js';
export { createLocalProjectMemoryCapture, type CreateLocalProjectMemoryCaptureOptions } from './local-memory.js';
export { LocalMemoryRunStore, type LocalMemoryRunStoreOptions } from './local-memory-runs.js';
export {
MemoryCaptureService,
type MemoryCaptureServiceDeps,
type MemoryCaptureStartResult,
type MemoryCaptureStatus,
type MemoryRunRecord,
type MemoryRunStatus,
type MemoryRunStorePort,
} from './memory-runs.js';
export type {
CaptureSession,
CaptureSignals,
MemoryAction,
MemoryAgentInput,
MemoryAgentResult,
MemoryAgentServiceDeps,
MemoryAgentSettings,
MemoryAgentSourceType,
MemoryCommitMessagePort,
MemoryConnectionPort,
MemoryFileStorePort,
MemoryKnowledgeSlRefsPort,
MemoryLockPort,
MemorySlSourceReconcilerPort,
MemoryTelemetryPort,
MemoryToolSetLike,
MemoryToolsetFactoryPort,
} from './types.js';

View file

@ -0,0 +1,211 @@
import { randomUUID } from 'node:crypto';
import { mkdirSync } from 'node:fs';
import { dirname, join } from 'node:path';
import Database from 'better-sqlite3';
import type { MemoryRunRecord, MemoryRunStatus, MemoryRunStorePort } from './memory-runs.js';
import type { MemoryAgentResult } from './types.js';
export interface LocalMemoryRunStoreOptions {
projectDir: string;
idFactory?: () => string;
}
type MemoryRunRow = {
id: string;
status: string;
stage: string;
input_hash: string;
chat_id: string | null;
output_summary_json: string | null;
error: string | null;
};
function localMemoryDbPath(projectDir: string): string {
return join(projectDir, '.klo', 'db.sqlite');
}
function isSafeRunId(runId: string): boolean {
return /^[a-zA-Z0-9][a-zA-Z0-9_.-]*$/.test(runId);
}
function isMemoryRunStatus(value: unknown): value is MemoryRunStatus {
return value === 'running' || value === 'done' || value === 'error';
}
function parseOutputSummary(raw: string | null): MemoryAgentResult | null {
if (!raw) {
return null;
}
return JSON.parse(raw) as MemoryAgentResult;
}
function rowToRecord(row: MemoryRunRow): MemoryRunRecord | null {
if (!isMemoryRunStatus(row.status)) {
return null;
}
return {
id: row.id,
status: row.status,
stage: row.stage,
inputHash: row.input_hash,
chatId: row.chat_id,
outputSummary: parseOutputSummary(row.output_summary_json),
error: row.error,
};
}
export class LocalMemoryRunStore implements MemoryRunStorePort {
private readonly db: Database.Database;
private readonly idFactory: () => string;
constructor(options: LocalMemoryRunStoreOptions) {
const dbPath = localMemoryDbPath(options.projectDir);
mkdirSync(dirname(dbPath), { recursive: true });
this.db = new Database(dbPath);
this.db.pragma('journal_mode = WAL');
this.db.pragma('foreign_keys = ON');
this.db.exec(`
CREATE TABLE IF NOT EXISTS local_memory_runs (
id TEXT PRIMARY KEY,
status TEXT NOT NULL,
stage TEXT NOT NULL,
input_hash TEXT NOT NULL,
chat_id TEXT,
output_summary_json TEXT,
error TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS local_memory_runs_status_updated_idx
ON local_memory_runs (status, updated_at DESC);
`);
this.idFactory = options.idFactory ?? (() => `memory-${randomUUID()}`);
}
async createRunning(args: { inputHash: string; chatId?: string | null }): Promise<{ id: string }> {
const now = new Date().toISOString();
const id = this.idFactory();
this.db
.prepare(
`
INSERT INTO local_memory_runs (
id,
status,
stage,
input_hash,
chat_id,
output_summary_json,
error,
created_at,
updated_at
)
VALUES (
@id,
'running',
'queued',
@inputHash,
@chatId,
NULL,
NULL,
@now,
@now
)
`,
)
.run({
id,
inputHash: args.inputHash,
chatId: args.chatId ?? null,
now,
});
return { id };
}
async markRunning(id: string, stage: string): Promise<void> {
this.updateRun(id, {
status: 'running',
stage,
outputSummaryJson: null,
error: null,
});
}
async markDone(id: string, outputSummary: MemoryAgentResult): Promise<void> {
this.updateRun(id, {
status: 'done',
stage: 'done',
outputSummaryJson: JSON.stringify(outputSummary),
error: null,
});
}
async markError(id: string, error: string): Promise<void> {
this.updateRun(id, {
status: 'error',
stage: 'error',
outputSummaryJson: null,
error,
});
}
async findById(id: string): Promise<MemoryRunRecord | null> {
if (!isSafeRunId(id)) {
return null;
}
const row = this.db
.prepare(
`
SELECT
id,
status,
stage,
input_hash,
chat_id,
output_summary_json,
error
FROM local_memory_runs
WHERE id = ?
`,
)
.get(id) as MemoryRunRow | undefined;
return row ? rowToRecord(row) : null;
}
private updateRun(
id: string,
input: {
status: MemoryRunStatus;
stage: string;
outputSummaryJson: string | null;
error: string | null;
},
): void {
const result = this.db
.prepare(
`
UPDATE local_memory_runs
SET
status = @status,
stage = @stage,
output_summary_json = @outputSummaryJson,
error = @error,
updated_at = @updatedAt
WHERE id = @id
`,
)
.run({
id,
status: input.status,
stage: input.stage,
outputSummaryJson: input.outputSummaryJson,
error: input.error,
updatedAt: new Date().toISOString(),
});
if (result.changes === 0) {
throw new Error(`Memory run not found: ${id}`);
}
}
}

View file

@ -0,0 +1,204 @@
import { access, mkdtemp, readFile, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { initKloProject } from '../project/index.js';
import { createLocalProjectMemoryCapture } from './local-memory.js';
import { LocalMemoryRunStore } from './local-memory-runs.js';
vi.mock('ai', () => ({
generateText: vi.fn().mockResolvedValue({ text: '', toolCalls: [] }),
stepCountIs: (stepBudget: number) => stepBudget,
tool: (definition: unknown) => definition,
}));
async function expectPathMissing(path: string): Promise<void> {
await expect(access(path)).rejects.toThrow();
}
describe('LocalMemoryRunStore', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'klo-local-memory-runs-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('persists running, done, and reloadable memory run status in SQLite', async () => {
const store = new LocalMemoryRunStore({
projectDir: tempDir,
idFactory: () => 'memory-run-1',
});
const created = await store.createRunning({ inputHash: 'hash-1', chatId: 'chat-1' });
expect(created).toEqual({ id: 'memory-run-1' });
await store.markRunning('memory-run-1', 'capturing');
await store.markDone('memory-run-1', {
signalDetected: true,
actions: [{ target: 'wiki', type: 'created', key: 'revenue', detail: 'Revenue definition' }],
skillsLoaded: ['knowledge_capture'],
commitHash: 'abc123',
});
await expect(access(join(tempDir, '.klo/db.sqlite'))).resolves.toBeUndefined();
await expectPathMissing(join(tempDir, '.klo/memory-runs/memory-run-1.json'));
await expect(store.findById('memory-run-1')).resolves.toMatchObject({
id: 'memory-run-1',
status: 'done',
stage: 'done',
inputHash: 'hash-1',
chatId: 'chat-1',
error: null,
outputSummary: {
signalDetected: true,
commitHash: 'abc123',
},
});
const reloaded = new LocalMemoryRunStore({ projectDir: tempDir });
await expect(reloaded.findById('memory-run-1')).resolves.toMatchObject({
id: 'memory-run-1',
status: 'done',
stage: 'done',
inputHash: 'hash-1',
chatId: 'chat-1',
outputSummary: {
actions: [{ target: 'wiki', type: 'created', key: 'revenue', detail: 'Revenue definition' }],
skillsLoaded: ['knowledge_capture'],
signalDetected: true,
commitHash: 'abc123',
},
});
});
});
describe('createLocalProjectMemoryCapture', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'klo-local-memory-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('captures a wiki page through the local memory agent and persists pollable status', async () => {
const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' });
const agentRunner = {
runLoop: async ({
toolSet,
}: {
toolSet: Record<string, { execute: (input: unknown, options?: { toolCallId?: string }) => Promise<unknown> }>;
}) => {
await toolSet.load_skill.execute({ name: 'knowledge_capture' });
await toolSet.wiki_write.execute(
{
key: 'revenue',
summary: 'Revenue definition',
content: 'Revenue means paid order value net of refunds.',
tags: ['finance'],
},
{ toolCallId: 'wiki-write' },
);
return { stopReason: 'natural' as const };
},
};
const capture = createLocalProjectMemoryCapture(project, {
agentRunner: agentRunner as never,
runIdFactory: () => 'memory-run-1',
});
await expect(
capture.capture({
userId: 'local-user',
chatId: 'chat-1',
userMessage: 'define revenue as paid order value net of refunds',
assistantMessage: 'Captured.',
sourceType: 'external_ingest',
}),
).resolves.toEqual({ runId: 'memory-run-1' });
await capture.waitForRun('memory-run-1');
await expect(access(join(project.projectDir, '.klo/db.sqlite'))).resolves.toBeUndefined();
await expectPathMissing(join(project.projectDir, '.klo/memory-runs/memory-run-1.json'));
await expect(capture.status('memory-run-1')).resolves.toMatchObject({
runId: 'memory-run-1',
status: 'done',
done: true,
captured: { wiki: ['revenue'], sl: [], xrefs: [] },
skillsLoaded: ['knowledge_capture'],
signalDetected: true,
});
await expect(readFile(join(project.projectDir, 'knowledge/global/revenue.md'), 'utf-8')).resolves.toContain(
'Revenue means paid order value net of refunds.',
);
});
it('captures a semantic-layer source for a named local connection id', async () => {
const project = await initKloProject({ projectDir: tempDir, projectName: 'warehouse' });
project.config.connections.warehouse = { driver: 'postgres', readonly: true };
const agentRunner = {
runLoop: async ({
toolSet,
}: {
toolSet: Record<string, { execute: (input: unknown, options?: { toolCallId?: string }) => Promise<unknown> }>;
}) => {
await toolSet.load_skill.execute({ name: 'sl' });
await toolSet.sl_write_source.execute(
{
connectionId: 'warehouse',
sourceName: 'orders',
source: {
name: 'orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'number' }],
joins: [],
measures: [{ name: 'order_count', expr: 'count(*)' }],
},
},
{ toolCallId: 'sl-write' },
);
return { stopReason: 'natural' as const };
},
};
const capture = createLocalProjectMemoryCapture(project, {
agentRunner: agentRunner as never,
runIdFactory: () => 'memory-run-2',
});
await capture.capture({
userId: 'local-user',
chatId: 'chat-2',
userMessage: 'going forward define orders count as count of public orders',
assistantMessage: 'Captured.',
connectionId: 'warehouse',
sourceType: 'external_ingest',
});
await capture.waitForRun('memory-run-2');
await expect(access(join(project.projectDir, '.klo/db.sqlite'))).resolves.toBeUndefined();
await expectPathMissing(join(project.projectDir, '.klo/memory-runs/memory-run-2.json'));
await expect(capture.status('memory-run-2')).resolves.toMatchObject({
runId: 'memory-run-2',
status: 'done',
captured: { wiki: [], sl: ['orders'], xrefs: [] },
skillsLoaded: ['sl'],
signalDetected: true,
});
await expect(readFile(join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'), 'utf-8')).resolves.toContain(
'order_count',
);
});
});

View file

@ -0,0 +1,482 @@
import { join } from 'node:path';
import { fileURLToPath } from 'node:url';
import type { KloLlmProvider } from '@klo/llm';
import YAML from 'yaml';
import { AgentRunnerService } from '../agent/index.js';
import { localConnectionInfoFromConfig } from '../connections/index.js';
import type { KloEmbeddingPort, KloFileStorePort, KloFileWriteResult } from '../core/index.js';
import { type KloLogger, noopLogger, SessionWorktreeService } from '../core/index.js';
import type { KloSemanticLayerComputePort } from '../daemon/index.js';
import { createLocalKloLlmProviderFromConfig } from '../llm/index.js';
import type { KloLocalProject } from '../project/index.js';
import { PromptService } from '../prompts/index.js';
import { SkillsRegistryService } from '../skills/index.js';
import {
type KloConnectionInfo,
type KloQueryResult,
SemanticLayerService,
type SemanticLayerSource,
type SlConnectionCatalogPort,
SlDiscoverTool,
SlEditSourceTool,
type SlPythonPort,
SlReadSourceTool,
SlRollbackTool,
SlSearchService,
type SlSourcesIndexPort,
SlValidateTool,
type SlValidationDeps,
type SlValidatorPort,
SlWriteSourceTool,
SqliteSlSourcesIndex,
sourceDefinitionSchema,
sourceOverlaySchema,
} from '../sl/index.js';
import { BaseTool, type GitAuthorResolverPort, type ToolContext } from '../tools/index.js';
import {
type KnowledgeEventPort,
type KnowledgeIndexPort,
KnowledgeWikiService,
searchLocalKnowledgePages,
WikiListTagsTool,
WikiReadTool,
WikiRemoveTool,
WikiSearchTool,
WikiWriteTool,
} from '../wiki/index.js';
import { LocalMemoryRunStore } from './local-memory-runs.js';
import { MemoryAgentService } from './memory-agent.service.js';
import { MemoryCaptureService } from './memory-runs.js';
import type {
MemoryConnectionPort,
MemoryFileStorePort,
MemoryKnowledgeSlRefsPort,
MemorySlSourceReconcilerPort,
MemoryToolSetLike,
MemoryToolsetFactoryPort,
} from './types.js';
const promptsDir = fileURLToPath(new URL('../../prompts', import.meta.url));
const skillsDir = fileURLToPath(new URL('../../skills', import.meta.url));
const LOCAL_AUTHOR = { name: 'KLO Local', email: 'local@klo.local' };
const LOCAL_SHAPE_WARNING = 'Local memory capture validates semantic-layer YAML shape only.';
export interface CreateLocalProjectMemoryCaptureOptions {
llmProvider?: KloLlmProvider;
agentRunner?: AgentRunnerService;
memoryModel?: string;
semanticLayerCompute?: KloSemanticLayerComputePort;
queryExecutor?: { execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise<KloQueryResult> };
runIdFactory?: () => string;
logger?: KloLogger;
}
export function createLocalProjectMemoryCapture(
project: KloLocalProject,
options: CreateLocalProjectMemoryCaptureOptions = {},
): MemoryCaptureService {
const logger = options.logger ?? noopLogger;
const rootFileStore = new LocalMemoryFileStore(project.fileStore);
const embedding = new NoopEmbeddingPort();
const knowledgeIndex = new LocalKnowledgeIndex(project);
const knowledgeEvents = new NoopKnowledgeEventPort();
const knowledgeSlRefs = new NoopKnowledgeSlRefsPort();
const connections = new LocalMemoryConnections(project, options.queryExecutor);
const slPython = new LocalSlPythonPort(options.semanticLayerCompute);
const semanticLayerService = new SemanticLayerService(rootFileStore, connections, slPython, logger);
const slSourcesRepository = new SqliteSlSourcesIndex({ dbPath: join(project.projectDir, '.klo', 'db.sqlite') });
const slSearchService = new SlSearchService(embedding, slSourcesRepository, logger);
const wikiService = new KnowledgeWikiService(rootFileStore, embedding, knowledgeIndex, project.git, logger);
const authorResolver = new LocalAuthorResolver();
const llmProvider = options.llmProvider ?? createLocalKloLlmProviderFromConfig(project.config.llm);
const toolsetFactory = new LocalMemoryToolsetFactory({
project,
embedding,
wikiService,
knowledgeIndex,
knowledgeEvents,
semanticLayerService,
slSearchService,
authorResolver,
slSourcesRepository,
connections,
});
const agentRunner =
options.agentRunner ??
new AgentRunnerService({
llmProvider: requireLlmProvider(llmProvider),
logger,
});
const memoryAgent = new MemoryAgentService({
settings: {
knowledge: { userScopedKnowledgeEnabled: false },
slValidation: { probeRowCount: 0 },
llm: { memoryIngestionModel: project.config.llm.models.default ?? 'local-memory-model' },
},
promptService: new PromptService({ promptsDir, partials: [] }),
skillsRegistry: new SkillsRegistryService({ skillsDir }),
wikiService,
knowledgeIndex,
knowledgeSlRefs,
semanticLayerService,
slSearchService,
connections,
rootFileStore,
gitService: project.git,
lockingService: new LocalMemoryLock(),
slSourcesRepository,
sessionWorktreeService: new SessionWorktreeService({
coreConfig: project.coreConfig,
gitService: project.git,
configService: rootFileStore,
}),
semanticLayerSourceReconciler: new NoopSemanticLayerSourceReconciler(),
agentRunner,
slValidator: new LocalShapeOnlySlValidator(),
toolsetFactory,
logger,
});
return new MemoryCaptureService({
memoryAgent,
runs: new LocalMemoryRunStore({ projectDir: project.projectDir, idFactory: options.runIdFactory }),
});
}
function requireLlmProvider(provider: KloLlmProvider | null | undefined): KloLlmProvider {
if (!provider) {
throw new Error('createLocalProjectMemoryCapture requires llm.provider.backend or an injected agentRunner');
}
return provider;
}
class LocalMemoryFileStore implements MemoryFileStorePort {
constructor(private readonly fileStore: MemoryFileStorePort | KloFileStorePort) {}
forWorktree(workdir: string): LocalMemoryFileStore {
return new LocalMemoryFileStore(this.fileStore.forWorktree(workdir) as KloFileStorePort);
}
writeFile(...args: Parameters<KloFileStorePort['writeFile']>): Promise<KloFileWriteResult> {
return this.fileStore.writeFile(...args);
}
readFile(...args: Parameters<KloFileStorePort['readFile']>) {
return this.fileStore.readFile(...args);
}
deleteFile(...args: Parameters<KloFileStorePort['deleteFile']>) {
return this.fileStore.deleteFile(...args);
}
listFiles(...args: Parameters<KloFileStorePort['listFiles']>) {
return this.fileStore.listFiles(...args);
}
getFileHistory(...args: Parameters<KloFileStorePort['getFileHistory']>) {
return this.fileStore.getFileHistory(...args);
}
async enqueueCommitMessageJobForExternalCommit(): Promise<void> {}
}
class NoopEmbeddingPort implements KloEmbeddingPort {
readonly maxBatchSize = 64;
async computeEmbedding(): Promise<number[]> {
return [];
}
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
return texts.map(() => []);
}
}
class LocalKnowledgeIndex implements KnowledgeIndexPort {
constructor(private readonly project: KloLocalProject) {}
async upsertPage(): Promise<void> {}
async applyDiffTransactional(): Promise<void> {}
async getExistingSearchTexts(): Promise<Map<string, { searchText: string; hasEmbedding: boolean }>> {
return new Map();
}
async deleteStale(): Promise<void> {}
async deleteByScope(): Promise<void> {}
async deleteByKey(): Promise<void> {}
async findPageByKey(scope: string, scopeId: string | null, pageKey: string) {
const path = this.pagePath(scope, scopeId, pageKey);
try {
await this.project.fileStore.readFile(path);
return { page_key: pageKey };
} catch {
return null;
}
}
async listPagesForUser(userId: string) {
const pages: Array<{ id?: string; page_key: string; summary: string; scope: string; scope_id: string | null }> = [];
for (const scope of [
{ scope: 'GLOBAL', scopeId: null, dir: 'knowledge/global' },
{ scope: 'USER', scopeId: userId, dir: `knowledge/user/${userId}` },
]) {
const listed = await this.project.fileStore.listFiles(scope.dir, true);
for (const file of listed.files.filter((entry) => entry.endsWith('.md'))) {
const pageKey = file.replace(/\.md$/, '');
const raw = await this.project.fileStore.readFile(`${scope.dir}/${file}`);
const parsed = parseWiki(raw.content);
pages.push({
page_key: pageKey,
summary: parsed.summary,
scope: scope.scope,
scope_id: scope.scopeId,
});
}
}
return pages.sort((a, b) => a.page_key.localeCompare(b.page_key));
}
async getUserPageCount(userId: string): Promise<number> {
return (await this.listPagesForUser(userId)).filter((page) => page.scope === 'USER').length;
}
async incrementUsageCount(): Promise<void> {}
async searchRRF(_userId: string, _embedding: number[] | null, queryText: string, limit: number) {
const pages = await this.listPagesForUser(_userId);
return pages
.map((page) => ({
pageKey: page.page_key,
summary: page.summary,
rrfScore: scoreText(`${page.page_key} ${page.summary}`, queryText),
}))
.filter((page) => page.rrfScore > 0)
.sort((a, b) => b.rrfScore - a.rrfScore || a.pageKey.localeCompare(b.pageKey))
.slice(0, limit);
}
private pagePath(scope: string, scopeId: string | null, pageKey: string): string {
return scope === 'GLOBAL' ? `knowledge/global/${pageKey}.md` : `knowledge/user/${scopeId}/${pageKey}.md`;
}
}
class NoopKnowledgeEventPort implements KnowledgeEventPort {
async createEvent(): Promise<void> {}
}
class NoopKnowledgeSlRefsPort implements MemoryKnowledgeSlRefsPort {
async syncFromWiki(): Promise<{ inserted: number; deleted: number }> {
return { inserted: 0, deleted: 0 };
}
}
class LocalMemoryConnections implements MemoryConnectionPort, SlConnectionCatalogPort {
constructor(
private readonly project: KloLocalProject,
private readonly queryExecutor?: {
execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise<KloQueryResult>;
},
) {}
async listEnabledConnections(ids: string[]): Promise<KloConnectionInfo[]> {
return ids
.map((id) => localConnectionInfoFromConfig(id, this.project.config.connections[id]))
.filter((connection): connection is KloConnectionInfo => connection !== null);
}
async getConnectionById(connectionId: string): Promise<KloConnectionInfo> {
const connection = localConnectionInfoFromConfig(connectionId, this.project.config.connections[connectionId]);
if (!connection) {
throw new Error(`Connection not found: ${connectionId}`);
}
return connection;
}
async executeQuery(connectionId: string, sql: string): Promise<KloQueryResult> {
if (!this.queryExecutor) {
throw new Error('Local memory capture has no query executor configured');
}
return this.queryExecutor.execute({ connectionId, sql });
}
}
class LocalSlPythonPort implements SlPythonPort {
constructor(private readonly compute?: KloSemanticLayerComputePort) {}
async validateSources(input: Parameters<SlPythonPort['validateSources']>[0]) {
if (!this.compute) {
return {
data: {
errors: [],
warnings: [LOCAL_SHAPE_WARNING],
per_source_warnings: {},
},
};
}
const result = await this.compute.validateSources({
sources: input.sources,
dialect: input.dialect,
recentlyTouched: input.recently_touched,
});
return {
data: {
errors: result.errors,
warnings: result.warnings,
per_source_warnings: result.perSourceWarnings,
},
};
}
async query(input: Parameters<SlPythonPort['query']>[0]) {
if (!this.compute) {
return { error: 'Local memory capture has no semantic compute adapter configured' };
}
const result = await this.compute.query({
sources: input.sources,
dialect: input.dialect,
query: input.query,
});
return { data: { sql: result.sql, plan: result.plan } };
}
}
class LocalAuthorResolver implements GitAuthorResolverPort {
async resolve() {
return LOCAL_AUTHOR;
}
}
class LocalMemoryLock {
async withLock<T>(_key: 'config:repo', fn: () => Promise<T>): Promise<T> {
return fn();
}
}
class NoopSemanticLayerSourceReconciler implements MemorySlSourceReconcilerPort {
async upsertRow(): Promise<void> {}
}
class LocalShapeOnlySlValidator implements SlValidatorPort<SlValidationDeps> {
async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) {
try {
const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName);
const parsed = YAML.parse(file.content) as SemanticLayerSource;
const isOverlay = parsed.table == null && parsed.sql == null;
const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed);
return result.success
? { errors: [], warnings: [LOCAL_SHAPE_WARNING] }
: {
errors: result.error.issues.map(
(issue) => `${sourceName}: ${issue.path.join('.') || 'source'} ${issue.message}`,
),
warnings: [],
};
} catch (error) {
return { errors: [`${sourceName}: ${error instanceof Error ? error.message : String(error)}`], warnings: [] };
}
}
}
class LocalMemoryToolSet implements MemoryToolSetLike {
constructor(private readonly tools: BaseTool[]) {}
toAiSdkTools(context: ToolContext) {
return Object.fromEntries(this.tools.map((tool) => [tool.name, tool.toAiSdkTool(context)]));
}
}
class LocalMemoryToolsetFactory implements MemoryToolsetFactoryPort {
private readonly wikiTools: BaseTool[];
private readonly slTools: BaseTool[];
constructor(deps: {
project: KloLocalProject;
embedding: KloEmbeddingPort;
wikiService: KnowledgeWikiService;
knowledgeIndex: KnowledgeIndexPort;
knowledgeEvents: KnowledgeEventPort;
semanticLayerService: SemanticLayerService;
slSearchService: SlSearchService;
authorResolver: GitAuthorResolverPort;
slSourcesRepository: SlSourcesIndexPort;
connections: SlConnectionCatalogPort;
}) {
const slDeps = {
semanticLayerService: deps.semanticLayerService,
slSearchService: deps.slSearchService,
authorResolver: deps.authorResolver,
};
this.wikiTools = [
new WikiReadTool(deps.wikiService, deps.knowledgeIndex),
new WikiSearchTool({
search: async (input) => {
const results = await searchLocalKnowledgePages(deps.project, {
userId: input.userId,
query: input.query,
limit: input.limit,
embeddingService: deps.embedding,
});
return {
results: results.slice(0, input.limit).map((result) => ({
key: result.key,
path: result.path,
summary: result.summary,
score: result.score,
matchReasons: result.matchReasons,
lanes: result.lanes,
})),
totalFound: results.length,
};
},
}),
new WikiListTagsTool(deps.wikiService, deps.knowledgeIndex),
new WikiWriteTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents),
new WikiRemoveTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents),
];
this.slTools = [
new SlDiscoverTool(slDeps, { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 }),
new SlEditSourceTool(slDeps),
new SlReadSourceTool(slDeps),
new SlWriteSourceTool(slDeps),
new SlValidateTool(slDeps),
new SlRollbackTool(deps.slSourcesRepository, deps.connections, 0),
];
}
createIngestWuToolset(): MemoryToolSetLike {
return new LocalMemoryToolSet([...this.wikiTools, ...this.slTools]);
}
createToolset(): MemoryToolSetLike {
return new LocalMemoryToolSet(this.wikiTools);
}
}
function parseWiki(raw: string): { summary: string; content: string } {
const match = raw.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/);
if (!match) {
return { summary: '', content: raw.trim() };
}
const frontmatter = (YAML.parse(match[1]) ?? {}) as Record<string, unknown>;
return {
summary: typeof frontmatter.summary === 'string' ? frontmatter.summary : '',
content: match[2].trim(),
};
}
function scoreText(text: string, query: string): number {
const normalized = query.toLowerCase().trim();
if (!normalized) {
return 0;
}
const haystack = text.toLowerCase();
if (haystack.includes(normalized)) {
return 1;
}
const words = normalized.split(/\s+/).filter(Boolean);
return words.filter((word) => haystack.includes(word)).length / Math.max(words.length, 1);
}

View file

@ -0,0 +1,375 @@
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
// Module-level mock for 'ai' so generateText is a stub. This file is separate from
// memory-agent.service.spec.ts so the existing pure-helper tests don't load the mock.
vi.mock('ai', () => ({
generateText: vi.fn().mockResolvedValue({ text: '', toolCalls: [] }),
stepCountIs: (n: number) => n,
tool: (def: unknown) => def,
}));
// Imported AFTER vi.mock so the mocked module is used.
import { generateText } from 'ai';
import { SYSTEM_GIT_AUTHOR } from '../tools/index.js';
import { MemoryAgentService } from './memory-agent.service.js';
interface BuiltMocks {
appSettings: any;
llmProvider: any;
prompt: any;
posthog: any;
telemetry: any;
skillsRegistry: any;
wikiService: any;
indexRepository: any;
knowledgeSlRefsRepository: any;
knowledgeRepository: any;
embeddingService: any;
semanticLayerService: any;
slSearchService: any;
dataSourcesService: any;
configService: any;
gitService: any;
lockingService: any;
slSourcesRepository: any;
sessionWorktreeService: any;
semanticLayerSourceReconciler: any;
agentRunner: any;
slValidator: any;
toolsetFactory: any;
}
const buildMocks = (overrides: Partial<BuiltMocks> = {}): BuiltMocks => {
const scopedConfig = { writeFile: vi.fn(), deleteFile: vi.fn() };
const scopedGit = { revParseHead: vi.fn().mockResolvedValue('basesha') };
const sessionWorktree = {
chatId: 'chat-1',
workdir: '/tmp/wt/session-chat-1',
branch: 'session/chat-1',
baseSha: 'basesha',
createdAt: new Date(),
git: scopedGit,
config: scopedConfig,
};
const defaults: BuiltMocks = {
appSettings: {
settings: {
ai: {
knowledge: { userScopedKnowledgeEnabled: false },
slValidation: { probeRowCount: 1 },
},
llm: { memoryIngestionModel: 'test-model' },
},
},
llmProvider: { getModel: vi.fn().mockReturnValue({}) },
prompt: { loadPrompt: vi.fn().mockResolvedValue('base framing') },
posthog: { trackEvent: vi.fn(), createTelemetryIntegration: vi.fn().mockReturnValue(undefined) },
telemetry: {
isEnabled: () => false,
appSettingsService: { settings: { telemetry: { recordInputs: false, recordOutputs: false } } },
systemConfigService: { config: { instance: { name: 'test-instance' } } },
},
skillsRegistry: {
listSkills: vi.fn().mockResolvedValue([]),
buildSkillsPrompt: vi.fn().mockReturnValue(''),
getSkill: vi.fn(),
stripFrontmatter: vi.fn(),
},
wikiService: {
forWorktree: vi.fn().mockReturnThis(),
readPage: vi.fn(),
syncSinglePage: vi.fn(),
deleteFromIndex: vi.fn(),
},
indexRepository: { listPagesForUser: vi.fn().mockResolvedValue([]) },
knowledgeSlRefsRepository: { syncFromWiki: vi.fn().mockResolvedValue({ inserted: 0, deleted: 0 }) },
knowledgeRepository: {},
embeddingService: { computeEmbedding: vi.fn() },
semanticLayerService: {
forWorktree: vi.fn().mockReturnThis(),
loadAllSources: vi.fn().mockResolvedValue([]),
readSourceFile: vi.fn(),
},
slSearchService: { indexSources: vi.fn(), buildSearchText: vi.fn() },
dataSourcesService: {
listEnabledConnections: vi.fn().mockResolvedValue([]),
getConnectionById: vi.fn().mockResolvedValue({
id: 'conn-1',
name: 'Warehouse',
connectionType: 'POSTGRESQL',
}),
executeQuery: vi.fn(),
},
configService: {
enqueueCommitMessageJobForExternalCommit: vi.fn().mockResolvedValue(undefined),
writeFile: vi.fn(),
deleteFile: vi.fn(),
},
gitService: {
revParseHead: vi.fn().mockResolvedValue('basesha'),
squashMergeIntoMain: vi.fn().mockResolvedValue({ ok: true, squashSha: 'cafebabe', touchedPaths: ['a.yaml'] }),
},
lockingService: {
withLock: vi.fn().mockImplementation((_key: string, fn: () => Promise<unknown>) => fn()),
},
slSourcesRepository: { deleteByConnectionAndName: vi.fn() },
sessionWorktreeService: {
create: vi.fn().mockResolvedValue(sessionWorktree),
cleanup: vi.fn().mockResolvedValue(undefined),
},
semanticLayerSourceReconciler: { upsertRow: vi.fn() },
agentRunner: { runLoop: vi.fn().mockResolvedValue({ stopReason: 'natural' }) },
slValidator: { validateSingleSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }) },
toolsetFactory: {
createIngestWuToolset: vi.fn().mockReturnValue({
toAiSdkTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
}),
createToolset: vi.fn().mockReturnValue({
toAiSdkTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
}),
},
};
return { ...defaults, ...overrides };
};
const buildService = (mocks: BuiltMocks): MemoryAgentService =>
new MemoryAgentService({
settings: {
knowledge: {
userScopedKnowledgeEnabled: mocks.appSettings.settings.ai.knowledge.userScopedKnowledgeEnabled,
},
slValidation: {
probeRowCount: mocks.appSettings.settings.ai.slValidation.probeRowCount,
},
llm: {
memoryIngestionModel: mocks.appSettings.settings.llm.memoryIngestionModel,
},
},
promptService: mocks.prompt,
skillsRegistry: mocks.skillsRegistry,
wikiService: mocks.wikiService,
knowledgeIndex: mocks.indexRepository,
knowledgeSlRefs: mocks.knowledgeSlRefsRepository,
semanticLayerService: mocks.semanticLayerService,
slSearchService: mocks.slSearchService,
connections: {
listEnabledConnections: vi.fn().mockResolvedValue([]),
getConnectionById:
mocks.dataSourcesService.getConnectionById ??
vi.fn().mockResolvedValue({
id: 'conn-1',
name: 'Warehouse',
connectionType: 'POSTGRESQL',
}),
executeQuery: mocks.dataSourcesService.executeQuery,
},
rootFileStore: mocks.configService,
gitService: mocks.gitService,
lockingService: mocks.lockingService,
slSourcesRepository: mocks.slSourcesRepository,
sessionWorktreeService: mocks.sessionWorktreeService,
semanticLayerSourceReconciler: mocks.semanticLayerSourceReconciler,
agentRunner: mocks.agentRunner,
slValidator: mocks.slValidator,
toolsetFactory: mocks.toolsetFactory,
telemetry: {
trackMemoryIngestion: mocks.posthog.trackEvent,
},
});
const baseInput = {
userId: 'u1',
chatId: 'chat-1',
// Long enough + with a definition keyword so the prefilter doesn't skip.
userMessage: 'going forward exclude cancelled orders from revenue, this is the canonical definition',
};
const generateTextMock = vi.mocked(generateText);
beforeEach(() => {
generateTextMock.mockReset();
generateTextMock.mockResolvedValue({ text: '', toolCalls: [] } as never);
});
afterEach(() => {
vi.restoreAllMocks();
});
describe('MemoryAgentService.ingest — session-branch orchestration', () => {
it('happy path: creates worktree, runs LLM loop, squash-merges, enqueues note, cleans up', async () => {
const mocks = buildMocks();
const svc = buildService(mocks);
const result = await svc.ingest(baseInput);
// Phase 1: session worktree was created from main's HEAD.
expect(mocks.sessionWorktreeService.create).toHaveBeenCalledWith('chat-1', 'basesha');
// Phase 2: LLM loop ran with the assembled tools/system/prompt.
expect(mocks.agentRunner.runLoop).toHaveBeenCalledOnce();
// Phase 3: squash-merged onto main.
expect(mocks.gitService.squashMergeIntoMain).toHaveBeenCalledWith(
'session/chat-1',
SYSTEM_GIT_AUTHOR.name,
SYSTEM_GIT_AUTHOR.email,
expect.stringContaining('[chat=chat-1]'),
);
// Note enqueue happened on the ROOT configService, not the scoped one. The single
// touched path is passed as the diff scope.
expect(mocks.configService.enqueueCommitMessageJobForExternalCommit).toHaveBeenCalledWith(
{ commitHash: 'cafebabe' },
expect.stringContaining('[chat=chat-1]'),
'a.yaml',
);
// Cleanup ran with success.
expect(mocks.sessionWorktreeService.cleanup).toHaveBeenCalledWith(
expect.objectContaining({ chatId: 'chat-1' }),
'success',
expect.any(Object),
);
expect(result.commitHash).toBe('cafebabe');
});
it('empty path: squash returns no touched paths → no enqueue, cleanup(empty), commitHash=null', async () => {
const mocks = buildMocks();
mocks.gitService.squashMergeIntoMain.mockResolvedValue({
ok: true,
squashSha: 'basesha',
touchedPaths: [],
});
const svc = buildService(mocks);
const result = await svc.ingest(baseInput);
expect(mocks.configService.enqueueCommitMessageJobForExternalCommit).not.toHaveBeenCalled();
expect(mocks.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'empty', expect.any(Object));
expect(result.commitHash).toBeNull();
});
it('conflict path: rolls back DB, cleanup(conflict, conflictPaths), returns commitHash=null with empty actions', async () => {
const mocks = buildMocks();
mocks.gitService.squashMergeIntoMain.mockResolvedValue({
ok: false,
conflict: true,
conflictPaths: ['semantic-layer/conn-x/fct_intakes.yaml'],
});
// Have the wikiService report a still-existing page in main, so rollback re-syncs.
mocks.wikiService.readPage.mockResolvedValue({
pageKey: 'phantom',
frontmatter: { summary: 'x', usage_mode: 'auto' },
content: 'body',
});
const svc = buildService(mocks);
const result = await svc.ingest(baseInput);
expect(mocks.gitService.squashMergeIntoMain).toHaveBeenCalled();
// Cleanup got the conflict outcome + the paths.
expect(mocks.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'conflict', {
conflictPaths: ['semantic-layer/conn-x/fct_intakes.yaml'],
});
expect(mocks.configService.enqueueCommitMessageJobForExternalCommit).not.toHaveBeenCalled();
expect(result.commitHash).toBeNull();
expect(result.actions).toEqual([]);
});
it('crash path: post-loop step throws → cleanup(crash), commitHash=null', async () => {
const mocks = buildMocks();
// Force the cross-ref reconciler to throw, escaping into the outer try/catch and
// landing in the crash branch.
mocks.knowledgeSlRefsRepository.syncFromWiki.mockRejectedValue(new Error('db down'));
// squashMergeIntoMain shouldn't even be reached.
mocks.gitService.squashMergeIntoMain.mockRejectedValue(new Error('should not be called after crash'));
// Need a wiki action to trigger the cross-ref code path. Easiest: have the LLM mock
// not push actions, so syncFromWiki is never called and crash won't happen here.
// Instead, force the squash to throw.
mocks.knowledgeSlRefsRepository.syncFromWiki.mockResolvedValue({ inserted: 0, deleted: 0 });
mocks.gitService.squashMergeIntoMain.mockRejectedValue(new Error('git crashed'));
const svc = buildService(mocks);
const result = await svc.ingest(baseInput);
expect(mocks.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'crash', expect.any(Object));
expect(result.commitHash).toBeNull();
});
});
describe('MemoryAgentService.ingest — concurrency regression', () => {
it('two parallel ingest() calls produce distinct squash commits (no absorption)', async () => {
// FIFO lock: each acquisition chains onto the previous holder's release. This is the
// same shape as production withLock — the test asserts that two parallel ingests
// sequence both their phase-1 (worktree create) and phase-3 (squash merge) calls
// without deadlocking, and produce distinct commits.
let chain: Promise<void> = Promise.resolve();
const lockingService = {
withLock: vi.fn().mockImplementation(async (_key: string, fn: () => Promise<unknown>) => {
const previous = chain;
let releaseMe!: () => void;
chain = new Promise<void>((resolve) => {
releaseMe = resolve;
});
await previous;
try {
return await fn();
} finally {
releaseMe();
}
}),
};
let createCount = 0;
const sessionWorktreeService = {
create: vi.fn().mockImplementation((chatId: string) => {
createCount += 1;
return Promise.resolve({
chatId,
workdir: `/tmp/wt/session-${chatId}`,
branch: `session/${chatId}`,
baseSha: 'basesha',
createdAt: new Date(),
git: { revParseHead: vi.fn().mockResolvedValue('basesha') },
config: { writeFile: vi.fn() },
});
}),
cleanup: vi.fn().mockResolvedValue(undefined),
};
let mergeCount = 0;
const gitService = {
revParseHead: vi.fn().mockResolvedValue('basesha'),
squashMergeIntoMain: vi.fn().mockImplementation(() => {
mergeCount += 1;
return Promise.resolve({
ok: true,
squashSha: `sha-${mergeCount}`,
touchedPaths: [`${mergeCount}.yaml`],
});
}),
};
const mocksA = buildMocks({ lockingService, sessionWorktreeService, gitService });
const mocksB = buildMocks({ lockingService, sessionWorktreeService, gitService });
const svcA = buildService(mocksA);
const svcB = buildService(mocksB);
const [a, b] = await Promise.all([
svcA.ingest({ ...baseInput, chatId: 'chat-A' }),
svcB.ingest({ ...baseInput, chatId: 'chat-B' }),
]);
expect(createCount).toBe(2);
expect(gitService.squashMergeIntoMain).toHaveBeenCalledTimes(2);
expect(a.commitHash).not.toBeNull();
expect(b.commitHash).not.toBeNull();
expect(a.commitHash).not.toBe(b.commitHash);
});
});

View file

@ -0,0 +1,475 @@
import { describe, expect, it, vi } from 'vitest';
import { validateSingleSource } from '../sl/index.js';
import { createTouchedSlSources, hasTouchedSlSource } from '../tools/index.js';
import { detectCaptureSignals, isWorthAnalyzing } from './capture-signals.js';
import { MemoryAgentService } from './memory-agent.service.js';
const passthroughValidator = {
validateSingleSource: (d: unknown, c: string, n: string) => validateSingleSource(d as never, c, n),
} as never;
describe('MemoryAgentService.detectCaptureSignals', () => {
it('fires sl on a long user message + SQL aggregate in assistant message', () => {
const userMessage = `${'A'.repeat(120)} show me revenue by month`;
const result = detectCaptureSignals({
userId: 'u',
chatId: 'c',
userMessage,
assistantMessage: 'SELECT SUM(amount) FROM orders GROUP BY month',
});
expect(result.sl).toBe(true);
expect(result.reasons).toContain('sql aggregate in assistant message');
});
it('does NOT fire sl from aggregate alone when user message is short', () => {
const result = detectCaptureSignals({
userId: 'u',
chatId: 'c',
userMessage: 'show revenue',
assistantMessage: 'SELECT SUM(amount) FROM orders',
});
expect(result.sl).toBe(false);
});
it('fires sl on definition keywords in user message regardless of length', () => {
const result = detectCaptureSignals({
userId: 'u',
chatId: 'c',
userMessage: 'going forward exclude cancelled orders from revenue',
});
expect(result.sl).toBe(true);
expect(result.reasons).toContain('sl-style definition keyword in user message');
});
it('fires knowledge on a definition keyword in user message', () => {
const result = detectCaptureSignals({
userId: 'u',
chatId: 'c',
userMessage: 'BYOL stands for Bring Your Own Lab',
});
expect(result.knowledge).toBe(true);
expect(result.reasons).toContain('definition keyword in user message');
});
it('fires both sl and knowledge when both signals hit', () => {
const result = detectCaptureSignals({
userId: 'u',
chatId: 'c',
userMessage: 'going forward, define revenue as sum of paid orders',
});
expect(result.sl).toBe(true);
expect(result.knowledge).toBe(true);
});
it('fires neither for a plain ad-hoc question', () => {
const result = detectCaptureSignals({
userId: 'u',
chatId: 'c',
userMessage: 'how many users signed up last week?',
assistantMessage: '12 users.',
});
expect(result.sl).toBe(false);
expect(result.knowledge).toBe(false);
expect(result.reasons).toEqual([]);
});
it('fires knowledge when assistant emits a markdown definition table', () => {
const result = detectCaptureSignals({
userId: 'u',
chatId: 'c',
userMessage: 'list our protocols',
assistantMessage: '| Term | Definition |\n|---|---|\n| TRT | Testosterone Replacement Therapy |',
});
expect(result.knowledge).toBe(true);
expect(result.reasons).toContain('definition table in assistant message');
});
it('accepts JOIN and CTE-style aggregates as sl signals', () => {
const userMessage = 'B'.repeat(150);
const result = detectCaptureSignals({
userId: 'u',
chatId: 'c',
userMessage,
assistantMessage: 'WITH base AS (SELECT * FROM x) SELECT * FROM base',
});
expect(result.sl).toBe(true);
});
it('reasons array is empty when no signal fires', () => {
const result = detectCaptureSignals({
userId: 'u',
chatId: 'c',
userMessage: 'hello',
});
expect(result.reasons).toEqual([]);
});
it('detects LookML dialect from view/measure structural keywords', () => {
const result = detectCaptureSignals({
userId: 'u',
chatId: 'c',
userMessage: 'ingest this',
assistantMessage:
'view: fct_labs {\n sql_table_name: analytics.fct_labs ;;\n measure: count_lab_orders { type: count }\n}',
});
expect(result.dialect).toBe('lookml');
expect(result.sl).toBe(true);
expect(result.reasons).toContain('lookml structure in assistant message');
});
});
describe('MemoryAgentService.isWorthAnalyzing (C1 + F1)', () => {
const baseInput = (assistantMessage: string) => ({
userId: 'u',
chatId: 'c',
userMessage: 'Ingest the following content into memory.',
assistantMessage,
});
it('skips a pure LookML wrapper (only view + sql_table_name + dimensions + measure: count)', () => {
const wrapper = `view: timeline {
sql_table_name: analytics.timeline ;;
dimension_group: date { type: time; description: "m/d/Y" }
dimension: notes { type: string; description: "notes" }
measure: count { type: count }
}`;
expect(isWorthAnalyzing(baseInput(wrapper))).toBe(false);
});
it('keeps a LookML view with a non-count aggregate (count_distinct, sum, avg, …)', () => {
const real = `view: fct_labs {
sql_table_name: analytics.fct_labs ;;
measure: count_lab_orders { type: count }
measure: count_distinct_patients { type: count_distinct; sql: \${admin_user_id} ;; }
}`;
expect(isWorthAnalyzing(baseInput(real))).toBe(true);
});
it('keeps a LookML view with derived_table even if it has no non-count measures', () => {
const derived = `view: lab_results {
derived_table: { sql: SELECT * FROM analytics.raw WHERE status = 'final' ;; }
dimension: lab_order_id { primary_key: yes; type: string }
measure: count { type: count }
}`;
expect(isWorthAnalyzing(baseInput(derived))).toBe(true);
});
it('keeps a LookML view with sql_always_where', () => {
const enforced = `view: rpt_daily_braze_email {
sql_table_name: analytics.fct_email_sends ;;
sql_always_where: \${TABLE}.channel = 'braze' ;;
measure: count { type: count }
}`;
expect(isWorthAnalyzing(baseInput(enforced))).toBe(true);
});
it('keeps a LookML view with a join: block', () => {
const joined = `view: fct_labs {
sql_table_name: analytics.fct_labs ;;
join: dim_customers {
sql_on: \${fct_labs.admin_user_id} = \${dim_customers.admin_user_id} ;;
relationship: many_to_one
}
}`;
expect(isWorthAnalyzing(baseInput(joined))).toBe(true);
});
});
describe('MemoryAgentService.reconcileCrossRefs', () => {
type Action = { target: 'wiki' | 'sl'; type: 'created' | 'updated' | 'removed'; key: string; detail: string };
const buildService = (overrides: {
readPage?: ReturnType<typeof vi.fn>;
syncFromWiki?: ReturnType<typeof vi.fn>;
}) => {
const wikiService = {
readPage: overrides.readPage ?? vi.fn(),
};
const knowledgeSlRefsRepository = {
syncFromWiki: overrides.syncFromWiki ?? vi.fn().mockResolvedValue({ inserted: 0, deleted: 0 }),
};
const svc = new MemoryAgentService({
settings: {
knowledge: { userScopedKnowledgeEnabled: false },
slValidation: { probeRowCount: 1 },
llm: { memoryIngestionModel: 'test-model' },
},
promptService: undefined as never,
skillsRegistry: undefined as never,
wikiService: wikiService as never,
knowledgeIndex: undefined as never,
knowledgeSlRefs: knowledgeSlRefsRepository as never,
semanticLayerService: undefined as never,
slSearchService: undefined as never,
connections: undefined as never,
rootFileStore: undefined as never,
gitService: undefined as never,
lockingService: undefined as never,
slSourcesRepository: undefined as never,
sessionWorktreeService: undefined as never,
semanticLayerSourceReconciler: undefined as never,
agentRunner: undefined as never,
slValidator: undefined as never,
toolsetFactory: undefined as never,
});
return { svc, wikiService, knowledgeSlRefsRepository };
};
const session = {
userId: 'u',
chatId: 'c',
userMessage: 'test',
connectionId: 'conn-1',
userScopedEnabled: false,
forceGlobalScope: false,
touchedSlSources: createTouchedSlSources(),
preHead: null,
};
it('projects a wiki page.sl_refs into knowledge_sl_refs via syncFromWiki', async () => {
const { svc, knowledgeSlRefsRepository } = buildService({
readPage: vi.fn().mockResolvedValue({
pageKey: 'byol-definition',
frontmatter: { summary: 'byol', sl_refs: ['fct_labs', 'lab_results'] },
content: 'body',
}),
syncFromWiki: vi.fn().mockResolvedValue({ inserted: 2, deleted: 0 }),
});
const actions: Action[] = [{ target: 'wiki', type: 'created', key: 'byol-definition', detail: '' }];
const synced = await svc.reconcileCrossRefs(actions, session);
expect(synced).toBe(2);
expect(knowledgeSlRefsRepository.syncFromWiki).toHaveBeenCalledWith({
wikiPageKey: 'byol-definition',
wikiScope: 'GLOBAL',
wikiScopeId: null,
refs: [
{ connectionId: 'conn-1', sourceName: 'fct_labs' },
{ connectionId: 'conn-1', sourceName: 'lab_results' },
],
});
});
it('skips sync when the action has no connectionId in session', async () => {
const { svc, knowledgeSlRefsRepository } = buildService({
readPage: vi.fn().mockResolvedValue({
pageKey: 'byol-definition',
frontmatter: { summary: 'byol', sl_refs: ['fct_labs'] },
content: 'body',
}),
});
const actions: Action[] = [{ target: 'wiki', type: 'created', key: 'byol-definition', detail: '' }];
const synced = await svc.reconcileCrossRefs(actions, { ...session, connectionId: undefined });
expect(synced).toBe(0);
expect(knowledgeSlRefsRepository.syncFromWiki).not.toHaveBeenCalled();
});
it('syncs an empty sl_refs list — clearing any stale rows for that wiki', async () => {
const { svc, knowledgeSlRefsRepository } = buildService({
readPage: vi.fn().mockResolvedValue({
pageKey: 'byol-definition',
frontmatter: { summary: 'byol' },
content: 'body',
}),
syncFromWiki: vi.fn().mockResolvedValue({ inserted: 0, deleted: 1 }),
});
const actions: Action[] = [{ target: 'wiki', type: 'updated', key: 'byol-definition', detail: '' }];
const synced = await svc.reconcileCrossRefs(actions, session);
expect(synced).toBe(1);
expect(knowledgeSlRefsRepository.syncFromWiki).toHaveBeenCalledWith({
wikiPageKey: 'byol-definition',
wikiScope: 'GLOBAL',
wikiScopeId: null,
refs: [],
});
});
it('normalizes dotted sl_refs to bare source names, dedupes (H)', async () => {
const { svc, knowledgeSlRefsRepository } = buildService({
readPage: vi.fn().mockResolvedValue({
pageKey: 'fct-labs-overview',
frontmatter: {
summary: 'fct_labs',
sl_refs: ['fct_labs', 'fct_labs.count_lab_orders', 'fct_labs.count_distinct_patients', 'lab_results'],
},
content: 'body',
}),
syncFromWiki: vi.fn().mockResolvedValue({ inserted: 2, deleted: 0 }),
});
const actions: Action[] = [{ target: 'wiki', type: 'created', key: 'fct-labs-overview', detail: '' }];
await svc.reconcileCrossRefs(actions, session);
expect(knowledgeSlRefsRepository.syncFromWiki).toHaveBeenCalledWith({
wikiPageKey: 'fct-labs-overview',
wikiScope: 'GLOBAL',
wikiScopeId: null,
refs: [
{ connectionId: 'conn-1', sourceName: 'fct_labs' },
{ connectionId: 'conn-1', sourceName: 'lab_results' },
],
});
});
it('ignores sl-only actions — the DB index is driven from the wiki side', async () => {
const { svc, knowledgeSlRefsRepository } = buildService({});
const actions: Action[] = [{ target: 'sl', type: 'updated', key: 'fct_labs', detail: '' }];
const synced = await svc.reconcileCrossRefs(actions, session);
expect(synced).toBe(0);
expect(knowledgeSlRefsRepository.syncFromWiki).not.toHaveBeenCalled();
});
});
describe('MemoryAgentService.gateRevertInvalidSources (J3)', () => {
type Action = { target: 'wiki' | 'sl'; type: 'created' | 'updated' | 'removed'; key: string; detail: string };
// Build a service with the minimal deps the gate needs: semanticLayerService
// (readSourceFile, loadSource, writeSource for revert), dataSourcesService
// (executeQuery for dry-run), configService (writeFile/deleteFile for revert),
// gitService (getFileAtCommit).
const buildService = (overrides: {
readSourceFile?: ReturnType<typeof vi.fn>;
executeQuery?: ReturnType<typeof vi.fn>;
writeFile?: ReturnType<typeof vi.fn>;
deleteFile?: ReturnType<typeof vi.fn>;
getFileAtCommit?: ReturnType<typeof vi.fn>;
}) => {
const semanticLayerService = {
readSourceFile: overrides.readSourceFile ?? vi.fn(),
isManifestBacked: vi.fn().mockResolvedValue(false),
};
const connections = {
listEnabledConnections: vi.fn().mockResolvedValue([]),
getConnectionById: vi.fn().mockResolvedValue({
id: 'conn-1',
name: 'Warehouse',
connectionType: 'POSTGRESQL',
}),
executeQuery: overrides.executeQuery ?? vi.fn(),
};
const configService = {
writeFile: overrides.writeFile ?? vi.fn().mockResolvedValue({}),
deleteFile: overrides.deleteFile ?? vi.fn().mockResolvedValue({}),
};
const gitService = {
getFileAtCommit: overrides.getFileAtCommit ?? vi.fn().mockRejectedValue(new Error('not present')),
};
const slSourcesRepository = {
deleteByConnectionAndName: vi.fn().mockResolvedValue(undefined),
};
const svc = new MemoryAgentService({
settings: {
knowledge: { userScopedKnowledgeEnabled: false },
slValidation: { probeRowCount: 1 },
llm: { memoryIngestionModel: 'test-model' },
},
promptService: undefined as never,
skillsRegistry: undefined as never,
wikiService: undefined as never,
knowledgeIndex: undefined as never,
knowledgeSlRefs: undefined as never,
semanticLayerService: semanticLayerService as never,
slSearchService: undefined as never,
connections: connections as never,
rootFileStore: configService as never,
gitService: gitService as never,
lockingService: undefined as never,
slSourcesRepository: slSourcesRepository as never,
sessionWorktreeService: undefined as never,
semanticLayerSourceReconciler: undefined as never,
agentRunner: undefined as never,
slValidator: passthroughValidator,
toolsetFactory: undefined as never,
});
return { svc, semanticLayerService, connections, configService, gitService, slSourcesRepository };
};
const session = {
userId: 'u',
chatId: 'c',
userMessage: 'test',
connectionId: 'conn-1',
userScopedEnabled: false,
forceGlobalScope: false,
touchedSlSources: createTouchedSlSources([{ connectionId: 'conn-1', sourceName: 'broken_source' }]),
preHead: null,
};
it('reverts (deletes) a source whose dry-run fails and drops its action', async () => {
const badYaml = `name: broken_source
source_type: sql
sql: |
SELECT fake_col FROM analytics.x
grain: [fake_col]
columns: [{name: fake_col, type: string}]
measures: []
joins: []
`;
const { svc, configService } = buildService({
readSourceFile: vi.fn().mockResolvedValue({ content: badYaml, path: 'x' }),
executeQuery: vi.fn().mockResolvedValue({
headers: [],
rows: [],
totalRows: 0,
error: 'Unrecognized name: fake_col',
}),
});
const actions: Action[] = [
{ target: 'sl', type: 'created', key: 'broken_source', detail: 'create' },
{ target: 'wiki', type: 'created', key: 'some_wiki', detail: 'wiki' },
];
const localSession = {
...session,
touchedSlSources: createTouchedSlSources([{ connectionId: 'conn-1', sourceName: 'broken_source' }]),
};
const reverted = await svc.gateRevertInvalidSources(localSession as never, actions);
expect(reverted).toEqual(['broken_source']);
expect(configService.deleteFile).toHaveBeenCalledWith(
'semantic-layer/conn-1/broken_source.yaml',
expect.any(String),
expect.any(String),
expect.any(String),
{ skipLock: true },
);
// Wiki action survives; SL action is scrubbed.
expect(actions.map((a) => `${a.target}:${a.key}`)).toEqual(['wiki:some_wiki']);
expect(hasTouchedSlSource(localSession.touchedSlSources, 'conn-1', 'broken_source')).toBe(false);
});
it('leaves a source alone when its dry-run passes', async () => {
const goodYaml = `name: good_source
source_type: sql
sql: |
SELECT id FROM analytics.x
grain: [id]
columns: [{name: id, type: string}]
measures: []
joins: []
`;
const { svc, configService } = buildService({
readSourceFile: vi.fn().mockResolvedValue({ content: goodYaml, path: 'x' }),
executeQuery: vi.fn().mockResolvedValue({ headers: ['id'], rows: [], totalRows: 0, error: null }),
});
const actions: Action[] = [{ target: 'sl', type: 'created', key: 'good_source', detail: 'create' }];
const localSession = {
...session,
touchedSlSources: createTouchedSlSources([{ connectionId: 'conn-1', sourceName: 'good_source' }]),
};
const reverted = await svc.gateRevertInvalidSources(localSession as never, actions);
expect(reverted).toEqual([]);
expect(configService.writeFile).not.toHaveBeenCalled();
expect(configService.deleteFile).not.toHaveBeenCalled();
expect(actions).toHaveLength(1);
});
});

View file

@ -0,0 +1,658 @@
import { createHash } from 'node:crypto';
import { readFile } from 'node:fs/promises';
import { join } from 'node:path';
import { tool } from 'ai';
import * as YAML from 'yaml';
import { z } from 'zod';
import { type KloLogger, noopLogger } from '../core/index.js';
import {
revertSourceToPreHead,
type SemanticLayerSource,
type SlValidationDeps,
type SlValidatorPort,
} from '../sl/index.js';
import {
createTouchedSlSources,
deleteTouchedSlSource,
listTouchedSlSources,
SYSTEM_GIT_AUTHOR,
type ToolContext,
type ToolSession,
touchedSlSourceCount,
touchedSlSourceNamesForConnection,
} from '../tools/index.js';
import {
buildRequiredSkillsBlock,
DEFAULT_SKILL_NAMES,
detectCaptureSignals,
prefilterSkipReason,
promptNameFor,
stepBudgetFor,
} from './capture-signals.js';
import type {
CaptureSession,
MemoryAction,
MemoryAgentInput,
MemoryAgentResult,
MemoryAgentServiceDeps,
MemoryAgentSourceType,
} from './types.js';
type GateDeps = SlValidationDeps & { slValidator: SlValidatorPort<SlValidationDeps> };
export class MemoryAgentService {
private readonly logger: KloLogger;
constructor(private readonly deps: MemoryAgentServiceDeps) {
this.logger = deps.logger ?? noopLogger;
}
async ingest(input: MemoryAgentInput): Promise<MemoryAgentResult> {
const chatId = input.chatId;
const sourceType: MemoryAgentSourceType = input.sourceType ?? 'research';
const empty: MemoryAgentResult = { signalDetected: false, actions: [], skillsLoaded: [], commitHash: null };
const hasSL = !!input.connectionId;
const userScopedEnabled = this.deps.settings.knowledge.userScopedKnowledgeEnabled;
const forceGlobalScope = sourceType === 'external_ingest';
const signals = detectCaptureSignals(input);
const skipReason = prefilterSkipReason(input, signals);
if (skipReason) {
this.logger.debug(`[memory-agent] chat=${chatId} skipped (pre-filter: ${skipReason})`);
return empty;
}
// Phase 1 — create a per-session git worktree branched at main's HEAD. This runs under
// a brief `config:repo` lock so the baseSha snapshot is consistent with the branch
// creation, but releases before the LLM loop starts. The unlocked loop is what lets
// concurrent ingest() calls and interactive saves on main run in parallel.
const sessionWorktree = await this.deps.lockingService.withLock('config:repo', async () => {
const mainHead = await this.deps.gitService.revParseHead();
if (!mainHead) {
throw new Error('memory-agent: config repo has no HEAD');
}
return this.deps.sessionWorktreeService.create(chatId, mainHead);
});
const [wikiIndex, slIndex] = await Promise.all([
this.buildWikiIndex(input.userId, userScopedEnabled),
hasSL ? this.buildSlIndex(input.connectionId!) : Promise.resolve(''),
]);
const skillsLoaded: string[] = [];
const actions: MemoryAction[] = [];
const session: CaptureSession = {
userId: input.userId,
chatId,
userMessageId: input.userMessageId,
userMessage: input.userMessage,
connectionId: input.connectionId,
userScopedEnabled,
forceGlobalScope,
touchedSlSources: createTouchedSlSources(),
preHead: sessionWorktree.baseSha,
};
// Wire scoped services so the LLM loop's reads + writes both target the session
// worktree, not main. Scoped wiki/SL services route their internal `configService`
// to the worktree; sl-tools take an explicit `configService` and `gitService`.
const scopedWikiService = this.deps.wikiService.forWorktree(sessionWorktree.workdir);
const scopedSemanticLayerService = this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir);
const toolSession: ToolSession = {
connectionId: input.connectionId ?? null,
isWorktreeScoped: true,
preHead: sessionWorktree.baseSha,
touchedSlSources: session.touchedSlSources,
actions,
semanticLayerService: scopedSemanticLayerService,
wikiService: scopedWikiService,
configService: sessionWorktree.config,
gitService: sessionWorktree.git,
};
const toolset = hasSL
? this.deps.toolsetFactory.createIngestWuToolset(toolSession)
: this.deps.toolsetFactory.createToolset(['wiki']);
const toolContext: ToolContext = {
sourceId: 'memory-agent',
messageId: chatId,
userId: input.userId,
connectionId: input.connectionId,
session: toolSession,
};
const loadSkillTool = {
load_skill: tool({
description:
'Load a skill to get specialized instructions. Call this when a skill listed in the system prompt matches the current task.',
inputSchema: z.object({
name: z.string().describe('The skill name as listed in the system prompt.'),
}),
execute: async ({ name }) => {
const skill = await this.deps.skillsRegistry.getSkill(name, 'memory_agent');
if (!skill) {
const available =
(await this.deps.skillsRegistry.listSkills('memory_agent')).map((s) => s.name).join(', ') || '(none)';
return `Skill "${name}" not available to the memory agent. Available: ${available}`;
}
try {
const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8');
if (!skillsLoaded.includes(skill.name)) {
skillsLoaded.push(skill.name);
}
return {
name: skill.name,
skillDirectory: skill.path,
content: this.deps.skillsRegistry.stripFrontmatter(body),
};
} catch (e) {
return `Error loading skill "${name}": ${e instanceof Error ? e.message : String(e)}`;
}
},
}),
};
const skillNames: string[] = [...DEFAULT_SKILL_NAMES];
if (signals.dialect === 'lookml') {
skillNames.push('lookml_ingest');
}
const skills = await this.deps.skillsRegistry.listSkills(skillNames, 'memory_agent');
const skillsPrompt = this.deps.skillsRegistry.buildSkillsPrompt(skills, 'memory_agent');
const baseFraming = await this.loadBaseFraming(sourceType);
const requiredSkillsBlock = buildRequiredSkillsBlock(signals);
const systemPrompt = [baseFraming.trimEnd(), skillsPrompt, requiredSkillsBlock].filter(Boolean).join('\n');
const clipLimit = sourceType === 'external_ingest' ? 48000 : 16000;
const assistantSection = input.assistantMessage?.trim()
? `## Assistant Response\n${clip(input.assistantMessage.trim(), clipLimit)}`
: '';
const prompt = [
`# Wiki Index\n\n${wikiIndex}`,
hasSL ? `\n# Semantic Layer Sources\n\n${slIndex}` : '',
'\n---\n',
assistantSection,
`\n## User Message\n\n${input.userMessage.trim()}`,
]
.filter(Boolean)
.join('\n');
const stepBudget = stepBudgetFor(sourceType);
const modelName = this.deps.settings.llm.memoryIngestionModel;
const signalsList = [signals.knowledge && 'knowledge', signals.sl && 'sl'].filter(Boolean) as string[];
const signalsSuffix =
signalsList.length > 0 ? ` signals=[${signalsList.join(', ')}] reasons=[${signals.reasons.join('; ')}]` : '';
const dialectSuffix = signals.dialect ? ` dialect=${signals.dialect}` : '';
this.logger.log(
`[memory-agent] chat=${chatId} running (sourceType=${sourceType}, hasSL=${hasSL}, budget=${stepBudget}, model=${modelName})${signalsSuffix}${dialectSuffix}`,
);
if (process.env.MEMORY_AGENT_DEBUG_PROMPTS === '1') {
this.logger.debug(`[memory-agent prompt-debug] system=${systemPrompt}`);
this.logger.debug(`[memory-agent prompt-debug] user=${prompt}`);
}
// Phase 2 — unlocked LLM loop against the session worktree. Crashes inside generateText
// are isolated; we still try to run the cross-ref + gate steps and surface what we can.
let sessionOutcome: 'success' | 'empty' | 'conflict' | 'crash' = 'success';
let squashSha: string | null = null;
let touchedPaths: string[] = [];
let reconciledCrossRefs = 0;
let gateRevertedSources: string[] = [];
let sessionConflictPaths: string[] | undefined;
let sessionCrashed = false;
try {
const runResult = await this.deps.agentRunner.runLoop({
modelRole: 'candidateExtraction',
systemPrompt,
userPrompt: prompt,
toolSet: { ...toolset.toAiSdkTools(toolContext), ...loadSkillTool },
stepBudget,
telemetryTags: {
operationName: 'memory-agent-ingest',
userId: input.userId,
chatId,
},
});
if (runResult.stopReason === 'error' && runResult.error) {
this.logger.warn(`[memory-agent] chat=${chatId} loop failed: ${runResult.error.message}`);
}
// Cross-ref + revert gate: still scoped to the session worktree (writes via
// sl-tools' deps already use scoped services). Wiki cross-refs live in the DB,
// so they're connection-state and don't need scoping.
const gateDeps: GateDeps = {
semanticLayerService: scopedSemanticLayerService,
connections: this.deps.connections,
configService: sessionWorktree.config,
gitService: sessionWorktree.git,
slSourcesRepository: this.deps.slSourcesRepository,
slValidator: this.deps.slValidator,
probeRowCount: this.deps.settings.slValidation.probeRowCount,
};
reconciledCrossRefs = await this.reconcileCrossRefs(actions, session);
if (hasSL && touchedSlSourceCount(session.touchedSlSources) > 0) {
gateRevertedSources = await this.gateRevertInvalidSourcesWithDeps(session, actions, gateDeps);
}
if (gateRevertedSources.length > 0) {
this.logger.warn(
`[memory-agent] chat=${chatId} gate: reverted ${gateRevertedSources.length} unvalidatable SL source(s): ${gateRevertedSources.join(', ')}`,
);
}
// Phase 3 — squash-merge under a brief `config:repo` lock so interactive writes
// serialize against this short window. Empty merges (no diff vs main) skip the
// commit-message enqueue. Conflicts trigger a targeted DB rollback so eager
// session writes don't leave DB ahead of main.
const squashMessage = this.squashMessageForSession(
sourceType,
chatId,
actions,
reconciledCrossRefs,
gateRevertedSources,
);
const mergeResult = await this.deps.lockingService.withLock('config:repo', () =>
this.deps.gitService.squashMergeIntoMain(
sessionWorktree.branch,
SYSTEM_GIT_AUTHOR.name,
SYSTEM_GIT_AUTHOR.email,
squashMessage,
),
);
if (!mergeResult.ok) {
sessionOutcome = 'conflict';
sessionConflictPaths = mergeResult.conflictPaths;
await this.rollbackDbForAbortedSession(session, actions);
} else if (mergeResult.touchedPaths.length === 0) {
sessionOutcome = 'empty';
} else {
squashSha = mergeResult.squashSha;
touchedPaths = mergeResult.touchedPaths;
// Single-file commits: pass the path so the handler diff is path-scoped.
// Multi-file commits: omit path so the handler grabs the full commit diff
// (a comma-joined pathspec would match nothing).
const pathFilter = touchedPaths.length === 1 ? touchedPaths[0] : '';
await this.deps.rootFileStore.enqueueCommitMessageJobForExternalCommit(
{ commitHash: squashSha },
squashMessage,
pathFilter,
);
}
} catch (error) {
sessionCrashed = true;
sessionOutcome = 'crash';
this.logger.error(
`[memory-agent] chat=${chatId} session crashed: ${error instanceof Error ? error.message : String(error)}`,
);
} finally {
await this.deps.sessionWorktreeService.cleanup(sessionWorktree, sessionOutcome, {
conflictPaths: sessionConflictPaths,
});
}
if (sessionCrashed) {
this.logger.warn(`[memory-agent] chat=${chatId} crashed; worktree preserved for inspection`);
}
// On conflict/crash the session's git work was discarded — the action list no longer
// matches main. Drop it so callers don't think writes landed.
const finalActions = sessionOutcome === 'conflict' || sessionOutcome === 'crash' ? [] : actions;
// Reindex SL search if any SL actions actually landed on main.
if (hasSL && finalActions.some((a) => a.target === 'sl')) {
try {
const allSources = await this.deps.semanticLayerService.loadAllSources(input.connectionId!);
await this.deps.slSearchService.indexSources(input.connectionId!, allSources);
} catch (e) {
this.logger.warn(
`[memory-agent] chat=${chatId} SL index reindex failed (non-fatal): ${e instanceof Error ? e.message : String(e)}`,
);
}
}
const signalsActedOn: string[] = [];
if (signals.knowledge && skillsLoaded.includes('knowledge_capture')) {
signalsActedOn.push('knowledge');
}
if (signals.sl && skillsLoaded.includes('sl')) {
signalsActedOn.push('sl');
}
if (finalActions.length > 0) {
this.logger.log(
`[memory-agent] chat=${chatId} completed: ${finalActions.length} action(s) — ${finalActions.map((a) => `${a.target}:${a.type}:${a.key}`).join(', ')} (skills=[${skillsLoaded.join(', ')}], outcome=${sessionOutcome})`,
);
this.deps.telemetry?.trackMemoryIngestion(input.userId, {
chat_id: chatId,
source_type: sourceType,
action_count: finalActions.length,
actions: finalActions.map((a) => `${a.target}:${a.type}:${a.key}`),
skills_loaded: skillsLoaded,
signals_detected: signalsList,
signals_acted_on: signalsActedOn,
reconciled_cross_refs: reconciledCrossRefs,
session_outcome: sessionOutcome,
});
} else {
this.logger.log(
`[memory-agent] chat=${chatId} completed: 0 actions (skills=[${skillsLoaded.join(', ')}], outcome=${sessionOutcome})`,
);
if (signalsList.length > 0) {
this.deps.telemetry?.trackMemoryIngestion(input.userId, {
chat_id: chatId,
source_type: sourceType,
action_count: 0,
actions: [],
skills_loaded: skillsLoaded,
signals_detected: signalsList,
signals_acted_on: signalsActedOn,
reconciled_cross_refs: reconciledCrossRefs,
session_outcome: sessionOutcome,
});
}
}
return {
signalDetected: skillsLoaded.length > 0 || finalActions.length > 0,
actions: finalActions,
skillsLoaded,
commitHash: squashSha,
};
}
/**
* Project wiki frontmatter `sl_refs:` into the `knowledge_sl_refs` DB index. The wiki
* YAML remains the authored source of truth; this is a pure derivation. Called inside
* the `config:repo` lock window so it lines up with the squash-at-end commit flow.
*
* Returns the number of DB rows that changed (inserts + deletes).
*/
async reconcileCrossRefs(actions: MemoryAction[], session: CaptureSession): Promise<number> {
const writesGlobal = session.forceGlobalScope || !session.userScopedEnabled;
const wikiScope: 'GLOBAL' | 'USER' = writesGlobal ? 'GLOBAL' : 'USER';
const wikiScopeId = wikiScope === 'USER' ? session.userId : null;
let synced = 0;
for (const action of actions) {
if (action.target !== 'wiki' || (action.type !== 'created' && action.type !== 'updated')) {
continue;
}
if (!session.connectionId) {
this.logger.debug(
`[memory-agent] reconcile: wiki=${action.key} skipped knowledge_sl_refs (no connectionId in session)`,
);
continue;
}
const page = await this.deps.wikiService.readPage(wikiScope, wikiScopeId, action.key);
if (!page) {
continue;
}
const slRefs = page.frontmatter.sl_refs ?? [];
// Wiki authors write both bare source names (`fct_labs`) and measure-qualified refs
// (`fct_labs.count_lab_orders`). The reverse-edge index is a source-level projection —
// strip the `.measure` suffix and dedupe before persisting, so findBySource('fct_labs')
// returns one row for this wiki no matter how many dotted measures it cited.
const bareSources = [
...new Set(
slRefs.map((ref) => ref.split('.')[0]).filter((sourceName): sourceName is string => sourceName.length > 0),
),
];
const { inserted, deleted } = await this.deps.knowledgeSlRefs.syncFromWiki({
wikiPageKey: action.key,
wikiScope,
wikiScopeId,
refs: bareSources.map((sourceName) => ({ connectionId: session.connectionId!, sourceName })),
});
synced += inserted + deleted;
}
if (synced > 0) {
this.logger.log(`[memory-agent] chat=${session.chatId} knowledge_sl_refs_synced=${synced}`);
}
return synced;
}
/**
* Pre-squash gate: walk every SL source touched by the agent this session, re-run the
* full validation (YAML + schema + warehouse dry-run), and for any that still fail,
* roll back to the pre-session state. Returns the list of source names that were
* reverted so the caller can log them and scrub the action list.
*
* Runs inside the `config:repo` lock; uses `skipLock: true` on downstream writes.
*/
async gateRevertInvalidSources(session: CaptureSession, actions: MemoryAction[]): Promise<string[]> {
return this.gateRevertInvalidSourcesWithDeps(session, actions, {
semanticLayerService: this.deps.semanticLayerService,
connections: this.deps.connections,
configService: this.deps.rootFileStore,
gitService: this.deps.gitService,
slSourcesRepository: this.deps.slSourcesRepository,
slValidator: this.deps.slValidator,
probeRowCount: this.deps.settings.slValidation.probeRowCount,
});
}
/**
* Same as `gateRevertInvalidSources` but with explicit deps so the orchestrator can
* pass session-worktree-scoped services for the revert reads/writes.
*/
async gateRevertInvalidSourcesWithDeps(
session: CaptureSession,
actions: MemoryAction[],
deps: GateDeps,
): Promise<string[]> {
if (!session.connectionId) {
return [];
}
const reverted: string[] = [];
for (const sourceName of touchedSlSourceNamesForConnection(session.touchedSlSources, session.connectionId)) {
const result = await deps.slValidator.validateSingleSource(deps, session.connectionId, sourceName);
if (result.errors.length === 0) {
continue;
}
try {
await revertSourceToPreHead(deps, session.connectionId, session.preHead, sourceName);
reverted.push(sourceName);
deleteTouchedSlSource(session.touchedSlSources, session.connectionId, sourceName);
for (let i = actions.length - 1; i >= 0; i--) {
if (actions[i].target === 'sl' && actions[i].key === sourceName) {
actions.splice(i, 1);
}
}
} catch (e) {
this.logger.error(
`[memory-agent] chat=${session.chatId} gate: failed to revert ${sourceName}: ${e instanceof Error ? e.message : String(e)}`,
);
}
}
return reverted;
}
/**
* Abort-path DB rollback. After a session's merge was rejected because main moved
* underneath, the session's eager DB writes (sl_sources rows, knowledge_index entries)
* no longer correspond to anything on disk. For every source/page the agent touched,
* re-derive from main's current state and overwrite DB. Scoped to touched keys only
* NOT a full reconciler run.
*/
async rollbackDbForAbortedSession(session: CaptureSession, actions: MemoryAction[]): Promise<void> {
if (session.connectionId) {
for (const { connectionId, sourceName } of listTouchedSlSources(session.touchedSlSources)) {
try {
const file = await this.deps.semanticLayerService.readSourceFile(connectionId, sourceName).catch(() => null);
if (file?.content) {
const parsed = this.parseYamlOrNull(file.content);
if (parsed) {
const hash = this.sha256Hex(file.content);
await this.deps.semanticLayerSourceReconciler.upsertRow(parsed, file.path, hash);
}
} else {
await this.deps.slSourcesRepository.deleteByConnectionAndName(connectionId, sourceName);
}
} catch (err) {
this.logger.warn(
`[memory-agent rollback] SL ${sourceName} failed: ${err instanceof Error ? err.message : String(err)}`,
);
}
}
}
const wikiActions = actions.filter((a) => a.target === 'wiki');
const wikiScope: 'GLOBAL' | 'USER' = session.forceGlobalScope || !session.userScopedEnabled ? 'GLOBAL' : 'USER';
const wikiScopeId = wikiScope === 'USER' ? session.userId : null;
for (const action of wikiActions) {
try {
const page = await this.deps.wikiService.readPage(wikiScope, wikiScopeId, action.key).catch(() => null);
if (page) {
await this.deps.wikiService.syncSinglePage(
wikiScope,
wikiScopeId,
action.key,
page.frontmatter,
page.content,
);
} else {
await this.deps.wikiService.deleteFromIndex(wikiScope, wikiScopeId, action.key);
}
} catch (err) {
this.logger.warn(
`[memory-agent rollback] wiki ${action.key} failed: ${err instanceof Error ? err.message : String(err)}`,
);
}
}
}
private parseYamlOrNull(content: string): SemanticLayerSource | null {
try {
return YAML.parse(content) as SemanticLayerSource;
} catch {
return null;
}
}
private sha256Hex(content: string): string {
return createHash('sha256').update(content, 'utf-8').digest('hex');
}
/**
* Build the deterministic squash-merge commit message for a session ingest. Includes
* action counts, cross-ref reconciles, and revert-gate counts for triage.
*/
private squashMessageForSession(
sourceType: MemoryAgentSourceType,
chatId: string,
actions: MemoryAction[],
reconciledCrossRefs: number,
gateRevertedSources: string[],
): string {
const wikiCount = actions.filter((a) => a.target === 'wiki').length;
const slCount = actions.filter((a) => a.target === 'sl').length;
const parts: string[] = [];
if (wikiCount > 0) {
parts.push(`${wikiCount} wiki`);
}
if (slCount > 0) {
parts.push(`${slCount} sl`);
}
if (reconciledCrossRefs > 0) {
parts.push(`${reconciledCrossRefs} xref`);
}
if (gateRevertedSources.length > 0) {
parts.push(`${gateRevertedSources.length} reverted`);
}
const summary = parts.length > 0 ? parts.join(', ') : 'no writes';
return `Memory ingest (${sourceType}): ${summary} [chat=${chatId.slice(0, 8)}]`;
}
private async loadBaseFraming(sourceType: MemoryAgentSourceType): Promise<string> {
return this.deps.promptService.loadPrompt(promptNameFor(sourceType));
}
private async buildWikiIndex(userId: string, userScopedEnabled: boolean): Promise<string> {
const pages = await this.deps.knowledgeIndex.listPagesForUser(userId);
if (pages.length === 0) {
return '(empty — no knowledge pages exist yet)';
}
const formatEntry = (p: { page_key: string; summary: string }) => `- ${p.page_key}: ${p.summary}`;
if (!userScopedEnabled) {
return `## Knowledge Pages\n${pages.map(formatEntry).join('\n')}`;
}
const globalEntries: string[] = [];
const userEntries: string[] = [];
for (const page of pages) {
const entry = formatEntry(page);
if (page.scope === 'GLOBAL') {
globalEntries.push(entry);
} else {
userEntries.push(entry);
}
}
const sections: string[] = [];
if (globalEntries.length > 0) {
sections.push(`## Organization (read-only from USER scope)\n${globalEntries.join('\n')}`);
}
if (userEntries.length > 0) {
sections.push(`## Your Preferences\n${userEntries.join('\n')}`);
}
return sections.join('\n\n');
}
private async buildSlIndex(connectionId: string): Promise<string> {
const [sources, warehouseLine] = await Promise.all([
this.deps.semanticLayerService.loadAllSources(connectionId),
this.buildWarehouseLine(connectionId),
]);
const indexLines =
sources.length === 0
? '(no existing sources)'
: sources
.map((s) => {
const measureCount = s.measures.length;
const joinCount = s.joins?.length ?? 0;
const header = `${s.name} [measures=${measureCount}, joins=${joinCount}]`;
if (measureCount === 0 && joinCount === 0) {
return `${header} — candidate for enrichment`;
}
const parts: string[] = [header];
if (measureCount > 0) {
parts.push(` measures: ${s.measures.map((m) => `${s.name}.${m.name}`).join(', ')}`);
}
if (joinCount > 0) {
parts.push(` joins: ${(s.joins ?? []).map((j) => `${j.to} (${j.relationship})`).join(', ')}`);
}
return parts.join('\n');
})
.join('\n');
return warehouseLine ? `${warehouseLine}\n\n${indexLines}` : indexLines;
}
/**
* Read the connection's warehouse type and project it as a `Warehouse: X` line so the
* agent picks dialect-correct date arithmetic + SQL idioms. The sl_capture skill
* documents the mapping; without this line the agent defaults to whatever flavor the
* SKILL examples used to show.
*/
private async buildWarehouseLine(connectionId: string): Promise<string> {
try {
const connection = await this.deps.connections.getConnectionById(connectionId);
return `Warehouse: ${connection.connectionType}`;
} catch {
return '';
}
}
}
function clip(text: string, maxLength: number): string {
return text.length > maxLength ? `${text.slice(0, maxLength - 1)}` : text;
}

View file

@ -0,0 +1,198 @@
import { describe, expect, it, vi } from 'vitest';
import type { MemoryAgentInput, MemoryAgentResult, MemoryAgentService } from './index.js';
import { MemoryCaptureService, type MemoryRunStorePort } from './memory-runs.js';
class InMemoryRunStore implements MemoryRunStorePort {
readonly rows = new Map<
string,
{
id: string;
status: 'running' | 'done' | 'error';
stage: string;
inputHash: string;
chatId: string | null;
outputSummary: MemoryAgentResult | null;
error: string | null;
}
>();
async createRunning(args: { inputHash: string; chatId?: string | null }): Promise<{ id: string }> {
const id = `run-${this.rows.size + 1}`;
this.rows.set(id, {
id,
status: 'running',
stage: 'queued',
inputHash: args.inputHash,
chatId: args.chatId ?? null,
outputSummary: null,
error: null,
});
return { id };
}
async markRunning(id: string, stage: string): Promise<void> {
const row = this.rows.get(id);
if (!row) {
throw new Error(`unknown run ${id}`);
}
row.stage = stage;
}
async markDone(id: string, outputSummary: MemoryAgentResult): Promise<void> {
const row = this.rows.get(id);
if (!row) {
throw new Error(`unknown run ${id}`);
}
row.status = 'done';
row.stage = 'done';
row.outputSummary = outputSummary;
}
async markError(id: string, error: string): Promise<void> {
const row = this.rows.get(id);
if (!row) {
throw new Error(`unknown run ${id}`);
}
row.status = 'error';
row.stage = 'error';
row.error = error;
}
async findById(id: string) {
return this.rows.get(id) ?? null;
}
}
function deferred<T>() {
let resolve!: (value: T) => void;
let reject!: (reason?: unknown) => void;
const promise = new Promise<T>((res, rej) => {
resolve = res;
reject = rej;
});
return { promise, resolve, reject };
}
function buildService(): {
capture: MemoryCaptureService;
store: InMemoryRunStore;
ingest: ReturnType<typeof vi.fn>;
run: ReturnType<typeof deferred<MemoryAgentResult>>;
} {
const store = new InMemoryRunStore();
const run = deferred<MemoryAgentResult>();
const ingest = vi.fn<MemoryAgentService['ingest']>().mockReturnValue(run.promise);
const memoryAgent = { ingest };
return {
capture: new MemoryCaptureService({ memoryAgent, runs: store }),
store,
ingest,
run,
};
}
describe('MemoryCaptureService', () => {
it('creates a run, executes memory capture, and stores a done summary', async () => {
const result: MemoryAgentResult = {
signalDetected: true,
actions: [{ target: 'wiki', type: 'created', key: 'revenue', detail: 'captured revenue definition' }],
skillsLoaded: ['knowledge_capture'],
commitHash: 'abc123',
};
const { capture, store, ingest, run } = buildService();
const input: MemoryAgentInput = {
userId: 'user-1',
chatId: 'chat-1',
userMessage: 'Revenue means paid order value.',
assistantMessage: 'Captured.',
connectionId: '00000000-0000-0000-0000-000000000001',
};
const started = await capture.capture(input);
expect(started.runId).toBe('run-1');
expect(ingest).toHaveBeenCalledWith(input);
await expect(capture.status(started.runId)).resolves.toMatchObject({
runId: 'run-1',
status: 'running',
stage: 'capturing',
done: false,
});
run.resolve(result);
await capture.waitForRun(started.runId);
const status = await capture.status(started.runId);
expect(status).toEqual({
runId: 'run-1',
stage: 'done',
done: true,
status: 'done',
captured: {
wiki: ['revenue'],
sl: [],
xrefs: [],
},
error: null,
commitHash: 'abc123',
skillsLoaded: ['knowledge_capture'],
signalDetected: true,
});
expect(store.rows.get('run-1')?.inputHash).toHaveLength(64);
});
it('stores no-signal captures as done with empty captured arrays', async () => {
const { capture, run } = buildService();
const started = await capture.capture({
userId: 'user-1',
chatId: 'chat-2',
userMessage: 'Thanks.',
});
run.resolve({
signalDetected: false,
actions: [],
skillsLoaded: [],
commitHash: null,
});
await capture.waitForRun(started.runId);
await expect(capture.status(started.runId)).resolves.toMatchObject({
done: true,
status: 'done',
captured: { wiki: [], sl: [], xrefs: [] },
signalDetected: false,
});
});
it('stores thrown errors and projects them as failed statuses', async () => {
const store = new InMemoryRunStore();
const memoryAgent = {
ingest: vi.fn<MemoryAgentService['ingest']>().mockRejectedValue(new Error('LLM provider missing')),
};
const capture = new MemoryCaptureService({ memoryAgent, runs: store });
const started = await capture.capture({
userId: 'user-1',
chatId: 'chat-3',
userMessage: 'Remember this.',
});
await capture.waitForRun(started.runId);
await expect(capture.status(started.runId)).resolves.toMatchObject({
done: true,
status: 'error',
stage: 'error',
captured: { wiki: [], sl: [], xrefs: [] },
error: 'LLM provider missing',
});
});
it('returns null for an unknown run id', async () => {
const { capture } = buildService();
await expect(capture.status('missing')).resolves.toBeNull();
});
});

View file

@ -0,0 +1,133 @@
import { createHash } from 'node:crypto';
import type { MemoryAction, MemoryAgentInput, MemoryAgentResult, MemoryAgentService } from './index.js';
export type MemoryRunStatus = 'running' | 'done' | 'error';
export interface MemoryRunRecord {
id: string;
status: MemoryRunStatus;
stage: string;
inputHash: string;
chatId: string | null;
outputSummary: MemoryAgentResult | null;
error: string | null;
}
export interface MemoryRunStorePort {
createRunning(args: { inputHash: string; chatId?: string | null }): Promise<{ id: string }>;
markRunning(id: string, stage: string): Promise<void>;
markDone(id: string, outputSummary: MemoryAgentResult): Promise<void>;
markError(id: string, error: string): Promise<void>;
findById(id: string): Promise<MemoryRunRecord | null>;
}
export interface MemoryCaptureServiceDeps {
memoryAgent: Pick<MemoryAgentService, 'ingest'>;
runs: MemoryRunStorePort;
}
export interface MemoryCaptureStartResult {
runId: string;
}
export interface MemoryCaptureStatus {
runId: string;
status: MemoryRunStatus;
stage: string;
done: boolean;
captured: {
wiki: string[];
sl: string[];
xrefs: string[];
};
error: string | null;
commitHash: string | null;
skillsLoaded: string[];
signalDetected: boolean;
}
function inputHash(input: MemoryAgentInput): string {
const stableInput = JSON.stringify({
userMessage: input.userMessage,
assistantMessage: input.assistantMessage ?? '',
connectionId: input.connectionId ?? null,
});
return createHash('sha256').update(stableInput).digest('hex');
}
function capturedKeys(actions: MemoryAction[]): MemoryCaptureStatus['captured'] {
const wiki = new Set<string>();
const sl = new Set<string>();
const xrefs = new Set<string>();
for (const action of actions) {
if (action.target === 'wiki') {
wiki.add(action.key);
} else {
sl.add(action.key);
}
if (action.detail.toLowerCase().includes('xref') || action.detail.toLowerCase().includes('cross-ref')) {
xrefs.add(action.key);
}
}
return {
wiki: [...wiki].sort(),
sl: [...sl].sort(),
xrefs: [...xrefs].sort(),
};
}
export class MemoryCaptureService {
private readonly inFlight = new Map<string, Promise<void>>();
constructor(private readonly deps: MemoryCaptureServiceDeps) {}
async capture(input: MemoryAgentInput): Promise<MemoryCaptureStartResult> {
const row = await this.deps.runs.createRunning({
inputHash: inputHash(input),
chatId: input.chatId,
});
await this.deps.runs.markRunning(row.id, 'capturing');
const run = this.runCapture(row.id, input);
this.inFlight.set(row.id, run);
run.finally(() => this.inFlight.delete(row.id)).catch(() => undefined);
return { runId: row.id };
}
async waitForRun(runId: string): Promise<void> {
await this.inFlight.get(runId);
}
private async runCapture(runId: string, input: MemoryAgentInput): Promise<void> {
try {
const outputSummary = await this.deps.memoryAgent.ingest(input);
await this.deps.runs.markDone(runId, outputSummary);
} catch (error) {
await this.deps.runs.markError(runId, error instanceof Error ? error.message : String(error));
}
}
async status(runId: string): Promise<MemoryCaptureStatus | null> {
const row = await this.deps.runs.findById(runId);
if (!row) {
return null;
}
const output = row.outputSummary;
return {
runId: row.id,
status: row.status,
stage: row.stage,
done: row.status !== 'running',
captured: output ? capturedKeys(output.actions) : { wiki: [], sl: [], xrefs: [] },
error: row.error,
commitHash: output?.commitHash ?? null,
skillsLoaded: output?.skillsLoaded ?? [],
signalDetected: output?.signalDetected ?? false,
};
}
}

View file

@ -0,0 +1,100 @@
import { readFile } from 'node:fs/promises';
import { join } from 'node:path';
import { fileURLToPath } from 'node:url';
import { describe, expect, it } from 'vitest';
import { PromptService } from '../prompts/index.js';
import { SkillsRegistryService } from '../skills/index.js';
import { DEFAULT_SKILL_NAMES, type MemoryAgentSourceType, promptNameFor } from './index.js';
const promptsDir = fileURLToPath(new URL('../../prompts', import.meta.url));
const skillsDir = fileURLToPath(new URL('../../skills', import.meta.url));
const memorySourceTypes: MemoryAgentSourceType[] = ['research', 'external_ingest', 'backfill'];
const expectedSkillHeadings: Record<string, string> = {
knowledge_capture: '# Knowledge Capture',
sl: '# Semantic Layer',
sl_capture: '# Semantic Layer',
};
const expectedAdapterSkillHeadings: Record<string, string> = {
historic_sql_ingest: '# Historic SQL Ingest',
live_database_ingest: '# Live Database Ingest',
looker_ingest: '# Looker Runtime Ingest',
lookml_ingest: '# LookML to KLO Semantic Layer',
metabase_ingest: '# Metabase to KLO Semantic Layer',
metricflow_ingest: '# MetricFlow to KLO Semantic Layer',
};
function forbiddenProductPattern() {
return new RegExp([['Kae', 'lio'].join(''), ['kae', 'lio'].join(''), ['KAE', 'LIO_'].join('')].join('|'));
}
describe('memory runtime assets', () => {
it('packages every memory-agent base prompt referenced by promptNameFor()', async () => {
const prompts = new PromptService({ promptsDir, partials: [] });
for (const sourceType of memorySourceTypes) {
const promptName = promptNameFor(sourceType);
const prompt = await prompts.loadPrompt(promptName);
expect(prompt).toContain('<role>');
expect(prompt).toContain('<workflow>');
expect(prompt).not.toMatch(forbiddenProductPattern());
}
});
it('packages the default memory capture skills referenced by DEFAULT_SKILL_NAMES', async () => {
const registry = new SkillsRegistryService({ skillsDir });
const skills = await registry.listSkills([...DEFAULT_SKILL_NAMES], 'memory_agent');
expect(skills.map((skill) => skill.name).sort()).toEqual(['knowledge_capture', 'sl', 'sl_capture']);
for (const skill of skills) {
const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8');
const expectedHeading = expectedSkillHeadings[skill.name];
expect(expectedHeading).toBeDefined();
expect(body).toContain(expectedHeading);
expect(body).not.toMatch(forbiddenProductPattern());
}
});
it('keeps memory-only capture skills hidden from research callers', async () => {
const registry = new SkillsRegistryService({ skillsDir });
const skills = await registry.listSkills([...DEFAULT_SKILL_NAMES], 'research');
expect(skills.map((skill) => skill.name)).toEqual(['sl']);
});
it('packages ingest adapter skills referenced by bundled adapters', async () => {
const registry = new SkillsRegistryService({ skillsDir });
const skillNames = Object.keys(expectedAdapterSkillHeadings);
const skills = await registry.listSkills(skillNames, 'memory_agent');
expect(skills.map((skill) => skill.name).sort()).toEqual([...skillNames].sort());
for (const skill of skills) {
const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8');
expect(body).toContain(expectedAdapterSkillHeadings[skill.name]);
expect(body).not.toMatch(forbiddenProductPattern());
}
});
it('ships Looker runtime ingest guidance for warehouse target SL writes', async () => {
const body = await readFile(join(skillsDir, 'looker_ingest', 'SKILL.md'), 'utf-8');
expect(body).toContain('targetWarehouseConnectionId');
expect(body).toContain('targetTable.ok === true');
expect(body).toContain('targetTable.canonicalTable');
expect(body).toContain('source_tables preflight');
expect(body).toContain('emit_unmapped_fallback');
expect(body).toContain('no_connection_mapping');
expect(body).not.toContain('a standalone SL source only when raw evidence contains enough table or SQL structure');
});
it('packages LookML connection-mismatch SL gate guidance', async () => {
const body = await readFile(join(skillsDir, 'lookml_ingest', 'SKILL.md'), 'utf-8');
expect(body).toContain('[LOOKML SL WRITES DISALLOWED]');
expect(body).toContain('lookml_connection_mismatch');
expect(body).toContain('Do not call `sl_write_source` or `sl_edit_source`');
expect(body).toContain('LookML writes target the run connection directly');
});
});

View file

@ -0,0 +1,157 @@
import type { Tool } from 'ai';
import type { AgentRunnerService } from '../agent/index.js';
import type { GitService, KloFileStorePort, KloLogger, SessionWorktreeService } from '../core/index.js';
import type { PromptService } from '../prompts/index.js';
import type { SkillsRegistryService } from '../skills/index.js';
import type {
KloConnectionInfo,
KloQueryResult,
SemanticLayerService,
SemanticLayerSource,
SlSearchService,
SlSourcesIndexPort,
SlValidationDeps,
SlValidatorPort,
} from '../sl/index.js';
import type { ToolContext, ToolSession, TouchedSlSourceSet } from '../tools/index.js';
import type { KnowledgeIndexPort, KnowledgeWikiService } from '../wiki/index.js';
export type MemoryAgentSourceType = 'research' | 'external_ingest' | 'backfill' | 'sql-review-migration';
export interface MemoryAgentInput {
userId: string;
chatId: string;
userMessage: string;
assistantMessage?: string;
connectionId?: string;
userMessageId?: string;
sourceType?: MemoryAgentSourceType;
}
export interface MemoryAction {
target: 'wiki' | 'sl';
type: 'created' | 'updated' | 'removed';
key: string;
detail: string;
targetConnectionId?: string | null;
}
export interface MemoryAgentResult {
signalDetected: boolean;
actions: MemoryAction[];
skillsLoaded: string[];
commitHash: string | null;
}
export interface CaptureSignals {
knowledge: boolean;
sl: boolean;
dialect?: 'lookml';
reasons: string[];
}
export interface CaptureSession {
userId: string;
chatId: string;
userMessageId?: string;
userMessage: string;
connectionId?: string;
userScopedEnabled: boolean;
forceGlobalScope: boolean;
touchedSlSources: TouchedSlSourceSet;
preHead: string | null;
}
export interface MemoryAgentSettings {
knowledge: {
userScopedKnowledgeEnabled: boolean;
};
slValidation: {
probeRowCount: number;
};
llm: {
memoryIngestionModel: string;
};
}
export interface MemoryTelemetryPort {
trackMemoryIngestion(
userId: string,
properties: {
chat_id: string;
source_type: MemoryAgentSourceType;
action_count: number;
actions: string[];
skills_loaded: string[];
signals_detected: string[];
signals_acted_on: string[];
reconciled_cross_refs: number;
session_outcome: 'success' | 'empty' | 'conflict' | 'crash';
},
): void;
}
export interface MemoryKnowledgeSlRefsPort {
syncFromWiki(args: {
wikiPageKey: string;
wikiScope: 'GLOBAL' | 'USER';
wikiScopeId: string | null;
refs: Array<{ connectionId: string; sourceName: string }>;
}): Promise<{ inserted: number; deleted: number }>;
}
export interface MemoryConnectionPort {
listEnabledConnections(ids: string[]): Promise<KloConnectionInfo[]>;
getConnectionById(connectionId: string): Promise<KloConnectionInfo>;
executeQuery(connectionId: string, sql: string): Promise<KloQueryResult>;
}
export interface MemoryCommitMessagePort {
enqueueCommitMessageJobForExternalCommit(
commit: { commitHash: string },
message: string,
pathFilter: string,
): Promise<void>;
}
export interface MemoryFileStorePort extends KloFileStorePort<MemoryFileStorePort>, MemoryCommitMessagePort {}
export interface MemoryToolSetLike {
toAiSdkTools(context: ToolContext): Record<string, Tool>;
}
export interface MemoryToolsetFactoryPort {
createIngestWuToolset(session: ToolSession): MemoryToolSetLike;
createToolset(capabilities: ['wiki']): MemoryToolSetLike;
}
export interface MemorySlSourceReconcilerPort {
upsertRow(parsed: SemanticLayerSource, path: string, contentHash: string): Promise<void>;
}
export interface MemoryLockPort {
withLock<T>(key: 'config:repo', fn: () => Promise<T>): Promise<T>;
}
export interface MemoryAgentServiceDeps {
settings: MemoryAgentSettings;
promptService: PromptService;
skillsRegistry: SkillsRegistryService;
wikiService: KnowledgeWikiService;
knowledgeIndex: KnowledgeIndexPort;
knowledgeSlRefs: MemoryKnowledgeSlRefsPort;
semanticLayerService: SemanticLayerService;
slSearchService: SlSearchService;
connections: MemoryConnectionPort;
rootFileStore: MemoryFileStorePort;
gitService: GitService;
lockingService: MemoryLockPort;
slSourcesRepository: SlSourcesIndexPort;
sessionWorktreeService: SessionWorktreeService<MemoryFileStorePort>;
semanticLayerSourceReconciler: MemorySlSourceReconcilerPort;
agentRunner: AgentRunnerService;
slValidator: SlValidatorPort<SlValidationDeps>;
toolsetFactory: MemoryToolsetFactoryPort;
telemetry?: MemoryTelemetryPort;
logger?: KloLogger;
}