Improve schema setup and Notion ingest UX (#14)

* Improve schema setup and Notion ingest UX

* Handle Postgres network scan failures

* WIP: save local changes before main merge

* Refine setup prompt choices

* Tighten ingest reconciliation guidance

* Commit setup config updates

* Canonicalize unmapped fallback details

* Count reconciliation actions in reports

* Harden semantic layer source validation

* Return wiki content after edits

* Validate SL sources against manifests

* Validate wiki refs before writes

* Simplify CLI next steps

* Clarify agent setup summary

* Surface dbt target SL sources

* Recover SL write fallbacks

* Preserve failed context build metadata

* Track raw paths for ingest actions

* test(cli): update seeded demo expectations

* fix(ingest): scope fallback recovery checks

* fix(sl): tighten source validation guards

* fix(wiki): ignore empty embedding vectors

* Improve Notion ingest UX

* Enforce flat wiki keys

* test(context): update wiki key assertion

---------

Co-authored-by: Andrey Avtomonov <andreybavt@gmail.com>
This commit is contained in:
Luca Martial 2026-05-12 16:56:58 -04:00 committed by GitHub
parent 866d33e71a
commit 60457e9407
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
116 changed files with 4177 additions and 610 deletions

View file

@ -185,8 +185,8 @@ describe('runKtxAgent', () => {
search: vi.fn(async () => ({
results: [
{
key: 'metrics/revenue',
path: 'knowledge/global/metrics/revenue.md',
key: 'metrics-revenue',
path: 'knowledge/global/metrics-revenue.md',
scope: 'GLOBAL' as const,
summary: 'Revenue metric definition',
score: 0.02459016393442623,
@ -207,8 +207,8 @@ describe('runKtxAgent', () => {
expect(JSON.parse(io.stdout())).toEqual({
results: [
expect.objectContaining({
key: 'metrics/revenue',
path: 'knowledge/global/metrics/revenue.md',
key: 'metrics-revenue',
path: 'knowledge/global/metrics-revenue.md',
matchReasons: ['lexical', 'token'],
}),
],

View file

@ -355,6 +355,53 @@ describe('runKtxConnectionNotion', () => {
expect(io.stdout()).toContain('rootPageIds: 1');
});
it('uses inline Notion auth_token for interactive discovery', async () => {
const projectDir = join(tempDir, 'project');
const initialized = await initKtxProject({ projectDir, projectName: 'warehouse' });
await writeProjectConfig(projectDir, {
...initialized.config,
connections: {
'notion-main': {
driver: 'notion',
auth_token: 'ntn_inline_token',
crawl_mode: 'selected_roots',
root_page_ids: [PAGE_IDS.engineering],
root_database_ids: [],
root_data_source_ids: [],
max_pages_per_run: 12,
max_knowledge_creates_per_run: 2,
max_knowledge_updates_per_run: 7,
last_successful_cursor: null,
},
},
});
const api = fakeNotionApi([notionPage(PAGE_IDS.engineering, 'Engineering')]);
const createNotionApi = vi.fn((authToken: string) => {
expect(authToken).toBe('ntn_inline_token');
return api;
});
const io = makeIo();
await expect(
runKtxConnectionNotion(
{
command: 'pick',
projectDir,
connectionId: 'notion-main',
mode: 'interactive',
},
io.io,
{
createNotionApi,
renderPicker: vi.fn(async (): Promise<PickerRenderResult> => ({ kind: 'quit' })),
},
),
).resolves.toBe(0);
expect(createNotionApi).toHaveBeenCalledOnce();
expect(io.stdout()).toContain('No changes saved.');
});
it('passes partial-discovery warnings into the TUI banner state', async () => {
const projectDir = join(tempDir, 'project');
const initialized = await initKtxProject({ projectDir, projectName: 'warehouse' });

View file

@ -1,4 +1,4 @@
import { parseNotionConnectionConfig, resolveNotionAuthToken } from '@ktx/context/connections';
import { parseNotionConnectionConfig, resolveNotionConnectionAuthToken } from '@ktx/context/connections';
import { type NotionApi, type NotionBotInfo, NotionClient } from '@ktx/context/ingest';
import {
type KtxLocalProject,
@ -223,7 +223,7 @@ export async function runKtxConnectionNotion(
const project = await loadProject({ projectDir: args.projectDir });
const rawConnection = notionConnection(project, args.connectionId);
const notion = parseNotionConnectionConfig(rawConnection);
const authToken = await resolveNotionAuthToken(notion.auth_token_ref, { env: deps.env });
const authToken = await resolveNotionConnectionAuthToken(notion, { env: deps.env });
const api = deps.createNotionApi ? deps.createNotionApi(authToken) : new NotionClient(authToken);
const discovery = await discoverNotionPickerPages(api);
const tree = buildPickerTree(discovery.pages);

View file

@ -267,10 +267,11 @@ describe('renderContextBuildView', () => {
{ connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] },
]);
state.primarySources[0].status = 'failed';
state.primarySources[0].failureText = 'KTX lost its connection to PostgreSQL while scanning warehouse.';
const output = renderContextBuildView(state, { styled: false });
expect(output).toContain('✗');
expect(output).toContain('failed');
expect(output).toContain('KTX lost its connection to PostgreSQL while scanning warehouse.');
});
it('omits empty groups', () => {
@ -327,8 +328,19 @@ describe('createRepainter', () => {
repainter.paint('hello');
repainter.paint('bye');
expect(io.stdout()).toContain('\rbye');
expect(io.stdout()).not.toContain('\u001b[1A\rbye');
expect(io.stdout()).toContain('bye');
expect(io.stdout()).not.toMatch(/\[\d+A/);
});
it('does not undershoot cursor-up when a line is exactly the terminal width', () => {
const io = makeIo({ isTTY: true, columns: 10 });
const repainter = createRepainter(io.io);
repainter.paint('0123456789\nsecond\n');
repainter.paint('0123456789\nsecond\n');
const cursorMoves = [...io.stdout().matchAll(/\[(\d+)A/g)].map((m) => Number(m[1]));
expect(cursorMoves).toEqual([2]);
});
});
@ -373,6 +385,52 @@ describe('runContextBuild', () => {
expect(result).toEqual({ exitCode: 1, detached: false });
});
it('renders a friendly network failure when target output contains a network error code', async () => {
const io = makeIo();
const project = projectWithConnections({
warehouse: { driver: 'postgres' },
});
const executeTarget = vi.fn(async (target, _args, targetIo) => {
targetIo.stderr.write('Error: read EADDRNOTAVAIL\n');
return failedResult(target.connectionId, target.driver, target.operation);
});
const result = await runContextBuild(
project,
{ projectDir: '/tmp/project', inputMode: 'disabled' },
io.io,
{ executeTarget, now: () => 1000 },
);
expect(result).toEqual({ exitCode: 1, detached: false });
expect(io.stdout()).toContain('KTX lost its connection to PostgreSQL while scanning warehouse.');
expect(io.stdout()).toContain('network address unavailable (EADDRNOTAVAIL)');
expect(io.stdout()).toContain('Retry: ktx setup --project-dir /tmp/project');
expect(io.stdout()).not.toContain('BoundPool');
});
it('renders a friendly network failure when target execution throws', async () => {
const io = makeIo();
const project = projectWithConnections({
warehouse: { driver: 'postgres' },
});
const error = Object.assign(new Error('read ECONNRESET'), { code: 'ECONNRESET' });
const executeTarget = vi.fn(async () => {
throw error;
});
const result = await runContextBuild(
project,
{ projectDir: '/tmp/project', inputMode: 'disabled' },
io.io,
{ executeTarget, now: () => 1000 },
);
expect(result).toEqual({ exitCode: 1, detached: false });
expect(io.stdout()).toContain('KTX lost its connection to PostgreSQL while scanning warehouse.');
expect(io.stdout()).toContain('connection reset (ECONNRESET)');
});
it('renders final view for non-TTY output', async () => {
const io = makeIo();
const project = projectWithConnections({
@ -529,6 +587,36 @@ describe('runContextBuild', () => {
],
});
});
it('returns report IDs parsed from failed source-ingest target output', async () => {
const io = makeIo();
const project = projectWithConnections({
warehouse: { driver: 'postgres' },
dbt_main: { driver: 'dbt' },
});
const executeTarget = vi.fn(async (target, _args, targetIo) => {
if (target.operation === 'scan') {
return successResult(target.connectionId, target.driver, target.operation);
}
targetIo.stdout.write('Report: report-dbt-failed\n');
targetIo.stdout.write('Work units: 3\n');
return failedResult(target.connectionId, target.driver, target.operation);
});
const result = await runContextBuild(
project,
{ projectDir: '/tmp/project', inputMode: 'disabled' },
io.io,
{ executeTarget, now: () => 1000 },
);
expect(result).toMatchObject({
exitCode: 1,
detached: false,
reportIds: ['report-dbt-failed'],
});
});
});
describe('viewStateFromSourceProgress', () => {

View file

@ -22,6 +22,7 @@ export interface ContextBuildTargetState {
status: 'queued' | 'running' | 'done' | 'failed';
detailLine: string | null;
summaryText: string | null;
failureText: string | null;
startedAt: number | null;
elapsedMs: number;
}
@ -133,7 +134,8 @@ function targetDetail(target: ContextBuildTargetState, styled: boolean): string
return parts.join(' · ');
}
if (target.status === 'failed') {
return styled ? red('failed') : 'failed';
const failureText = target.failureText ?? 'failed';
return styled ? red(failureText) : failureText;
}
if (target.status === 'running') {
const percent = extractPercent(target.detailLine);
@ -327,6 +329,7 @@ export function viewStateFromSourceProgress(
status: s.status,
detailLine: null,
summaryText: s.summaryText ?? null,
failureText: null,
startedAt: s.startedAtMs ?? null,
elapsedMs: s.status === 'running' && s.startedAtMs ? now - s.startedAtMs : (s.elapsedMs ?? 0),
});
@ -378,7 +381,8 @@ export function createRepainter(io: KtxCliIo) {
}
io.stdout.write('\r');
}
io.stdout.write(content.replaceAll('\n', `${ESC}[K\n`));
io.stdout.write(`${ESC}[2K`);
io.stdout.write(content.replaceAll('\n', `\n${ESC}[2K`));
io.stdout.write(`${ESC}[J`);
hasPainted = true;
lastCursorUpRows = cursorUpRowsAfterWrite(content);
@ -440,7 +444,83 @@ export function defaultSetupKeystroke(onDetach: () => void, onCtrlC: () => void)
// --- Orchestration ---
function makeTargetState(target: KtxPublicIngestPlanTarget): ContextBuildTargetState {
return { target, status: 'queued', detailLine: null, summaryText: null, startedAt: null, elapsedMs: 0 };
return {
target,
status: 'queued',
detailLine: null,
summaryText: null,
failureText: null,
startedAt: null,
elapsedMs: 0,
};
}
const NETWORK_ERROR_REASONS: Record<string, string> = {
EADDRNOTAVAIL: 'network address unavailable',
ECONNRESET: 'connection reset',
ECONNREFUSED: 'connection refused',
ENETUNREACH: 'network unreachable',
ENOTFOUND: 'host not found',
ETIMEDOUT: 'connection timed out',
EHOSTUNREACH: 'host unreachable',
};
function unknownErrorMessage(error: unknown): string {
return error instanceof Error ? error.message : String(error);
}
function networkErrorCodeFromText(text: string): string | null {
for (const code of Object.keys(NETWORK_ERROR_REASONS)) {
if (new RegExp(`\\b${code}\\b`).test(text)) {
return code;
}
}
return null;
}
function networkErrorCode(error: unknown, capturedOutput = ''): string | null {
const directCode = typeof (error as { code?: unknown })?.code === 'string'
? (error as { code: string }).code
: null;
if (directCode && NETWORK_ERROR_REASONS[directCode]) {
return directCode;
}
return networkErrorCodeFromText(`${unknownErrorMessage(error)}\n${capturedOutput}`);
}
function friendlyDriverName(driver: string): string {
const normalized = driver.toLowerCase();
if (normalized === 'postgres' || normalized === 'postgresql') return 'PostgreSQL';
if (normalized === 'mysql') return 'MySQL';
if (normalized === 'sqlserver') return 'SQL Server';
if (normalized === 'bigquery') return 'BigQuery';
if (normalized === 'snowflake') return 'Snowflake';
if (normalized === 'clickhouse') return 'ClickHouse';
if (normalized === 'sqlite') return 'SQLite';
return driver || 'the source';
}
function failedStepDetail(result: KtxPublicIngestTargetResult): string | null {
return result.steps.find((step) => step.status === 'failed')?.detail ?? null;
}
function failureTextForTarget(input: {
target: KtxPublicIngestPlanTarget;
projectDir: string;
capturedOutput?: string;
error?: unknown;
fallback?: string | null;
}): string {
const code = networkErrorCode(input.error, input.capturedOutput);
if (code) {
const operation = input.target.operation === 'scan' ? 'scanning' : 'ingesting';
return [
`KTX lost its connection to ${friendlyDriverName(input.target.driver)} while ${operation} ${input.target.connectionId}.`,
`Reason: ${NETWORK_ERROR_REASONS[code]} (${code}).`,
`Retry: ${resumeCommand(input.projectDir)}`,
].join(' ');
}
return input.fallback ?? `${input.target.connectionId} failed.`;
}
export function initViewState(targets: KtxPublicIngestPlanTarget[]): ContextBuildViewState {
@ -493,6 +573,7 @@ export async function runContextBuild(
const artifactPaths = new Set<string>();
let detached = false;
let exiting = false;
let cleanupKeystroke: (() => void) | null = null;
if (isTTY || deps.setupKeystroke) {
@ -502,6 +583,7 @@ export async function runContextBuild(
};
cleanupKeystroke = (deps.setupKeystroke ?? defaultSetupKeystroke)(
() => {
detached = true;
cleanup();
deps.onDetach?.();
const bg = spawnBackgroundBuild(args.projectDir);
@ -509,12 +591,14 @@ export async function runContextBuild(
if (bg) io.stdout.write(`Log: ${bg.logPath}\n`);
io.stdout.write(`Resume: ${resumeCommand(args.projectDir)}\n`);
io.stdout.write(`Status: ktx setup context status --project-dir ${resolve(args.projectDir)}\n`);
exiting = true;
process.exit(0);
},
() => {
cleanup();
io.stdout.write('\n\nContext build stopped. Nothing is running in the background.\n');
io.stdout.write(`Resume: ${resumeCommand(args.projectDir)}\n`);
exiting = true;
process.exit(130);
},
);
@ -548,21 +632,38 @@ export async function runContextBuild(
false,
);
const result = await execTarget(targetState.target, runArgs, capture.io, {});
let result: KtxPublicIngestTargetResult | null = null;
let thrownError: unknown = null;
try {
result = await execTarget(targetState.target, runArgs, capture.io, {});
} catch (error) {
if (exiting) {
throw error;
}
thrownError = error;
}
targetState.elapsedMs = nowFn() - (targetState.startedAt ?? nowFn());
const failed = result.steps.some((s) => s.status === 'failed');
const failed = thrownError !== null || result?.steps.some((s) => s.status === 'failed') === true;
targetState.status = failed ? 'failed' : 'done';
targetState.detailLine = null;
const capturedOutput = capture.captured();
const metadata = collectOutputMetadata(capturedOutput, targetState.target.operation);
for (const reportId of metadata.reportIds) reportIds.add(reportId);
for (const artifactPath of metadata.artifactPaths) artifactPaths.add(artifactPath);
if (!failed) {
const capturedOutput = capture.captured();
const metadata = collectOutputMetadata(capturedOutput, targetState.target.operation);
for (const reportId of metadata.reportIds) reportIds.add(reportId);
for (const artifactPath of metadata.artifactPaths) artifactPaths.add(artifactPath);
targetState.summaryText =
targetState.target.operation === 'scan'
? parseScanSummary(capturedOutput)
: parseIngestSummary(capturedOutput);
} else {
targetState.failureText = failureTextForTarget({
target: targetState.target,
projectDir: args.projectDir,
capturedOutput,
error: thrownError,
fallback: result ? failedStepDetail(result) : null,
});
}
if (failed) hasFailure = true;

View file

@ -4,7 +4,7 @@ import { join } from 'node:path';
import { afterEach, describe, expect, it } from 'vitest';
import { runDemoSeeded } from './demo-seeded.js';
import { formatSeededInspect, inspectSeededProject } from './demo-seeded-inspect.js';
import { KTX_NEXT_STEP_COMMANDS } from './next-steps.js';
import { KTX_NEXT_STEP_DIRECT_COMMANDS } from './next-steps.js';
describe('seeded demo inspect contract', () => {
const projectDir = join(tmpdir(), `ktx-demo-seeded-inspect-${process.pid}`);
@ -59,7 +59,7 @@ describe('seeded demo inspect contract', () => {
reports: { primaryPath: 'reports/seeded-demo-report.json', fileCount: 1 },
replays: { primaryPath: 'replays/replay.memory-flow.v1.json', latestPath: 'replays/latest.memory-flow.v1.json' },
},
nextCommands: KTX_NEXT_STEP_COMMANDS,
nextCommands: KTX_NEXT_STEP_DIRECT_COMMANDS,
});
expect(inspect.generatedOutputs.replays.fileCount).toBeGreaterThanOrEqual(3);
@ -91,10 +91,7 @@ describe('seeded demo inspect contract', () => {
expect(output).toContain('Latest replay: seeded (packaged, prebuilt)');
expect(output).toContain(' $ ktx agent tools --json');
expect(output).toContain(' $ ktx agent context --json');
expect(output).toContain(' $ ktx serve --mcp stdio --user-id local');
expect(output.indexOf('ktx agent tools --json')).toBeLessThan(
output.indexOf('ktx serve --mcp stdio --user-id local'),
);
expect(output).not.toContain('ktx serve --mcp stdio --user-id local');
expect(output).not.toContain('ktx ask');
expect(output).not.toContain('deterministic mode');
});

View file

@ -4,7 +4,7 @@ import { join, resolve } from 'node:path';
import type { MemoryFlowReplayInput } from '@ktx/context/ingest/memory-flow';
import { loadPackagedDemoReplay } from './demo-assets.js';
import { DEMO_LATEST_REPLAY_FILE, loadLatestDemoReplay } from './demo-replay-store.js';
import { KTX_NEXT_STEP_COMMANDS, KTX_NEXT_STEP_COMMAND_WIDTH } from './next-steps.js';
import { KTX_NEXT_STEP_COMMAND_WIDTH, KTX_NEXT_STEP_DIRECT_COMMANDS } from './next-steps.js';
type SeededInspectReadiness = 'missing' | 'ready' | 'corrupt';
@ -178,7 +178,7 @@ function sourceBundleFromManifest(manifest: DemoSeededManifest): SeededInspectSu
}
function nextCommands(): SeededInspectSummary['nextCommands'] {
return [...KTX_NEXT_STEP_COMMANDS];
return [...KTX_NEXT_STEP_DIRECT_COMMANDS];
}
function modeMetadataFromReplay(replay: MemoryFlowReplayInput | null): SeededInspectSummary['modeMetadata'] {

View file

@ -8,7 +8,7 @@ import { DEMO_FULL_JOB_ID, defaultDemoProjectDir, ensureDemoProject } from './de
import type { DemoFullResult } from './demo-full.js';
import { createTestDemoPromptAdapter } from './demo-interaction.js';
import type { renderMemoryFlowTui } from './memory-flow-tui.js';
import { KTX_NEXT_STEP_COMMANDS } from './next-steps.js';
import { KTX_NEXT_STEP_DIRECT_COMMANDS } from './next-steps.js';
import { resetVizFallbackWarningsForTest } from './viz-fallback.js';
const SEEDED_DEMO_SEMANTIC_SOURCE_COUNT = 46;
@ -208,7 +208,7 @@ describe('runKtxDemo', () => {
expect(io.stdout()).toContain('Connection: orbit_demo');
expect(io.stdout()).toContain('ktx sl list');
expect(io.stdout()).toContain('ktx wiki list');
expect(io.stdout()).toContain('ktx serve --mcp stdio --user-id local');
expect(io.stdout()).not.toContain('ktx serve --mcp stdio --user-id local');
expect(io.stdout()).not.toContain('KTX memory flow');
expect(io.stderr()).toContain(
'Visualization requested but stdin raw mode is unavailable; printing plain output.',
@ -226,7 +226,7 @@ describe('runKtxDemo', () => {
expect(testIo.stdout()).toContain('Connection: orbit_demo');
expect(testIo.stdout()).toContain('ktx sl list');
expect(testIo.stdout()).toContain('ktx wiki list');
expect(testIo.stdout()).toContain('ktx serve --mcp stdio --user-id local');
expect(testIo.stdout()).not.toContain('ktx serve --mcp stdio --user-id local');
expect(testIo.stdout()).not.toContain('KTX memory flow');
expect(testIo.stderr()).toContain(
'Visualization requested but stdout is not an interactive terminal; printing plain output.',
@ -287,7 +287,7 @@ describe('runKtxDemo', () => {
expect(io.stdout()).toContain('LLM calls: none');
expect(io.stdout()).toContain('Your KTX project files are at:');
expect(io.stdout()).toContain(join(tmpdir(), 'ktx-demo-'));
expect(io.stdout()).toContain('ktx serve --mcp stdio');
expect(io.stdout()).not.toContain('ktx serve --mcp stdio');
expect(io.stdout()).not.toContain(['ktx', 'mcp'].join(' '));
expect(io.stdout()).not.toContain('deterministic');
});
@ -356,7 +356,7 @@ describe('runKtxDemo', () => {
generatedContext: 'prebuilt from bundled assets',
llmCalls: 'none',
},
nextCommands: KTX_NEXT_STEP_COMMANDS,
nextCommands: KTX_NEXT_STEP_DIRECT_COMMANDS,
});
expect(parsed.generatedOutputs.replays.fileCount).toBeGreaterThanOrEqual(3);
expect(jsonIo.stderr()).toBe('');
@ -423,7 +423,7 @@ describe('runKtxDemo', () => {
expect(testIo.stdout()).toContain('KTX finished ingesting your data');
expect(testIo.stdout()).toContain('ktx sl list');
expect(testIo.stdout()).toContain('ktx wiki list');
expect(testIo.stdout()).toContain('ktx serve --mcp stdio --user-id local');
expect(testIo.stdout()).not.toContain('ktx serve --mcp stdio --user-id local');
expect(testIo.stdout()).not.toContain(['ktx', 'ask'].join(' '));
expect(testIo.stdout()).not.toContain(['ktx', 'mcp'].join(' '));
});

View file

@ -1171,7 +1171,7 @@ describe('runKtxCli', () => {
projectDir: tempDir,
inputMode: 'disabled',
cliVersion: '0.0.0-private',
anthropicApiKeyEnv: 'ANTHROPIC_API_KEY',
anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', // pragma: allowlist secret
anthropicModel: 'claude-sonnet-4-6',
skipLlm: false,
}),
@ -1232,7 +1232,7 @@ describe('runKtxCli', () => {
inputMode: 'disabled',
skipLlm: true,
embeddingBackend: 'openai',
embeddingApiKeyEnv: 'OPENAI_API_KEY',
embeddingApiKeyEnv: 'OPENAI_API_KEY', // pragma: allowlist secret
skipEmbeddings: false,
}),
setupIo.io,
@ -1333,7 +1333,7 @@ describe('runKtxCli', () => {
source: 'metabase',
sourceConnectionId: 'prod_metabase',
sourceUrl: 'https://metabase.example.com',
sourceApiKeyRef: 'env:METABASE_API_KEY',
sourceApiKeyRef: 'env:METABASE_API_KEY', // pragma: allowlist secret
sourceWarehouseConnectionId: 'warehouse',
metabaseDatabaseId: 1,
}),
@ -1759,8 +1759,8 @@ describe('runKtxCli', () => {
{
results: [
{
key: 'metrics/revenue',
path: 'knowledge/global/metrics/revenue.md',
key: 'metrics-revenue',
path: 'knowledge/global/metrics-revenue.md',
scope: 'GLOBAL',
summary: 'Revenue metric definition',
score: 0.02459016393442623,
@ -1786,8 +1786,8 @@ describe('runKtxCli', () => {
expect(JSON.parse(io.stdout())).toEqual({
results: [
expect.objectContaining({
key: 'metrics/revenue',
path: 'knowledge/global/metrics/revenue.md',
key: 'metrics-revenue',
path: 'knowledge/global/metrics-revenue.md',
matchReasons: ['lexical', 'token'],
}),
],

View file

@ -186,6 +186,91 @@ describe('runKtxIngest viz and replay', () => {
expect(io.stdout()).toContain('Connection: warehouse');
});
it('prints live viz final summaries as errors when the report has failed work units', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const io = makeIo({ isTTY: true, stdinIsTTY: true, columns: 120 });
const liveSession = {
update: vi.fn(),
close: vi.fn(),
isClosed: vi.fn(() => false),
};
const startLiveMemoryFlow = vi.fn(async (_input: MemoryFlowReplayInput, _io: unknown) => liveSession);
const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise<LocalIngestResult> => {
input.memoryFlow?.emit({ type: 'source_acquired', adapter: 'notion', trigger: 'manual_resync', fileCount: 37 });
input.memoryFlow?.update({
syncId: 'sync-notion',
plannedWorkUnits: [
{
unitKey: 'notion-cluster-1',
rawFiles: ['pages/a.md'],
peerFileCount: 0,
dependencyCount: 0,
},
],
});
input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 });
input.memoryFlow?.emit({
type: 'work_unit_finished',
unitKey: 'notion-cluster-1',
status: 'failed',
reason: 'notion-cluster-1 failed: {"error":"invalid_grant","error_description":"reauth related error (invalid_rapt)"}',
});
input.memoryFlow?.emit({ type: 'report_created', runId: 'live-failed' });
input.memoryFlow?.finish('done');
const failedWorkUnit = {
...localFakeBundleReport('live-failed').body.workUnits[0],
unitKey: 'notion-cluster-1',
rawFiles: ['pages/a.md'],
status: 'failed' as const,
reason: 'notion-cluster-1 failed: {"error":"invalid_grant","error_description":"reauth related error (invalid_rapt)"}',
actions: [],
touchedSlSources: [],
};
const report = localFakeBundleReport('live-failed', {
id: 'report-live-failed',
runId: 'run-live-failed',
connectionId: input.connectionId,
sourceKey: input.adapter,
body: {
workUnits: [failedWorkUnit],
failedWorkUnits: [failedWorkUnit.unitKey],
},
});
return {
result: {
jobId: 'live-failed',
runId: report.runId,
syncId: report.body.syncId,
diffSummary: report.body.diffSummary,
workUnitCount: report.body.workUnits.length,
failedWorkUnits: report.body.failedWorkUnits,
artifactsWritten: report.body.provenanceRows.length,
commitSha: report.body.commitSha,
},
report,
};
});
await expect(
runKtxIngest(
{
command: 'run',
projectDir,
connectionId: 'notion-main',
adapter: 'notion',
outputMode: 'viz',
},
io.io,
{ runLocalIngest: runLocal, startLiveMemoryFlow },
),
).resolves.toBe(1);
expect(io.stdout()).toContain('Memory-flow summary: error');
expect(io.stdout()).toContain('Notion authorization expired');
});
it('falls back to text live rendering when the TUI live session is unavailable', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);

View file

@ -437,6 +437,21 @@ function initialRunMemoryFlowInput(
};
}
function finalRunMemoryFlowInput(snapshot: MemoryFlowReplayInput, report: IngestReportSnapshot): MemoryFlowReplayInput {
const status = reportStatus(report);
return {
...snapshot,
runId: report.runId,
connectionId: report.connectionId,
adapter: report.sourceKey,
status,
syncId: report.body.syncId,
reportId: report.id,
reportPath: report.id,
errors: status === 'error' ? report.body.failedWorkUnits : snapshot.errors,
};
}
function managedDaemonOptionsForIngestRun(
args: Extract<KtxIngestArgs, { command: 'run' }>,
io: KtxIngestIo,
@ -592,7 +607,7 @@ export async function runKtxIngest(
...(memoryFlow ? { memoryFlow } : {}),
});
if (shouldUseLiveViz && memoryFlow) {
latestMemoryFlowSnapshot = memoryFlow.snapshot();
latestMemoryFlowSnapshot = finalRunMemoryFlowInput(memoryFlow.snapshot(), result.report);
liveTui?.close();
liveTui = null;
io.stdout.write(formatMemoryFlowFinalSummary(latestMemoryFlowSnapshot));

View file

@ -61,7 +61,7 @@ describe('runKtxKnowledge', () => {
{
command: 'write',
projectDir,
key: 'metrics/revenue',
key: 'metrics-revenue',
scope: 'GLOBAL',
userId: 'local',
summary: 'Revenue',
@ -73,24 +73,53 @@ describe('runKtxKnowledge', () => {
writeIo.io,
),
).resolves.toBe(0);
expect(writeIo.stdout()).toContain('Wrote knowledge/global/metrics/revenue.md');
expect(writeIo.stdout()).toContain('Wrote knowledge/global/metrics-revenue.md');
const readIo = makeIo();
await expect(
runKtxKnowledge({ command: 'read', projectDir, key: 'metrics/revenue', userId: 'local' }, readIo.io),
runKtxKnowledge({ command: 'read', projectDir, key: 'metrics-revenue', userId: 'local' }, readIo.io),
).resolves.toBe(0);
expect(readIo.stdout()).toContain('# metrics/revenue');
expect(readIo.stdout()).toContain('# metrics-revenue');
expect(readIo.stdout()).toContain('Revenue is paid order value.');
const listIo = makeIo();
await expect(runKtxKnowledge({ command: 'list', projectDir, userId: 'local' }, listIo.io)).resolves.toBe(0);
expect(listIo.stdout()).toContain('GLOBAL\tmetrics/revenue\tRevenue');
expect(listIo.stdout()).toContain('GLOBAL\tmetrics-revenue\tRevenue');
const searchIo = makeIo();
await expect(
runKtxKnowledge({ command: 'search', projectDir, query: 'paid order', userId: 'local' }, searchIo.io),
).resolves.toBe(0);
expect(searchIo.stdout()).toContain('metrics/revenue');
expect(searchIo.stdout()).toContain('metrics-revenue');
});
it('rejects slash-delimited write keys with a flat-key suggestion', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir, projectName: 'warehouse' });
const writeIo = makeIo();
await expect(
runKtxKnowledge(
{
command: 'write',
projectDir,
key: 'orbit/company-overview',
scope: 'GLOBAL',
userId: 'local',
summary: 'Orbit',
content: 'Orbit overview.',
tags: [],
refs: [],
slRefs: [],
},
writeIo.io,
),
).resolves.toBe(1);
expect(writeIo.stderr()).toContain(
'Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".',
);
expect(writeIo.stdout()).toBe('');
});
it('explains empty search results for a project without wiki pages', async () => {
@ -116,7 +145,7 @@ describe('runKtxKnowledge', () => {
{
command: 'write',
projectDir,
key: 'historic-sql/active-contract-arr-open-tickets',
key: 'active-contract-arr-open-tickets',
scope: 'GLOBAL',
userId: 'local',
summary: 'Active Contract ARR Ranked by Open Support Ticket Count',
@ -138,7 +167,7 @@ describe('runKtxKnowledge', () => {
),
).resolves.toBe(0);
expect(searchIo.stdout()).toContain('historic-sql/active-contract-arr-open-tickets');
expect(searchIo.stdout()).toContain('active-contract-arr-open-tickets');
expect(searchIo.stderr()).toBe('');
});
});

View file

@ -201,7 +201,7 @@ describe('MemoryFlowTuiApp', () => {
expect(frame).toContain('KTX finished ingesting your data');
expect(frame).toContain('ktx sl list');
expect(frame).toContain('ktx wiki list');
expect(frame).toContain('ktx serve --mcp stdio --user-id local');
expect(frame).not.toContain('ktx serve --mcp stdio --user-id local');
expect(frame).not.toContain(['ktx', 'ask'].join(' '));
expect(frame).not.toContain(['ktx', 'mcp'].join(' '));
});

View file

@ -66,11 +66,10 @@ describe('KTX demo next steps', () => {
const rendered = formatNextStepLines().join('\n');
expect(rendered).toContain('KTX context is ready for agents.');
expect(rendered).toContain('Preferred route: CLI + Skills');
expect(rendered).toContain('no MCP server is required');
expect(rendered).toContain('Direct CLI checks:');
expect(rendered).toContain('Optional MCP:');
expect(rendered).not.toContain('Ask your agent to use KTX');
expect(rendered).toContain('ask a data question');
expect(rendered).toContain('Verify with:');
expect(rendered).not.toContain('Preferred route');
expect(rendered).not.toContain('Optional MCP:');
});
it('does not advertise removed Commander migration commands', () => {
@ -80,7 +79,6 @@ describe('KTX demo next steps', () => {
expect(rendered).toContain('ktx agent context --json');
expect(rendered).toContain('ktx sl list');
expect(rendered).toContain('ktx wiki list');
expect(rendered).toContain('ktx serve --mcp stdio --user-id local');
for (const removed of [
command('ktx', 'ask'),
@ -91,6 +89,7 @@ describe('KTX demo next steps', () => {
command('dev', 'knowledge'),
command('ktx', 'ingest', 'run'),
command('ktx', 'ingest', 'replay'),
command('ktx', 'serve', '--mcp', 'stdio', '--user-id', 'local'),
]) {
expect(rendered).not.toContain(removed);
}
@ -123,7 +122,7 @@ describe('KTX demo next steps', () => {
expect(rendered).toContain('KTX context is ready for agents.');
expect(rendered).toContain('ktx agent context --json');
expect(rendered).toContain('ktx serve --mcp stdio --user-id local');
expect(rendered).not.toContain('ktx serve --mcp stdio --user-id local');
expect(rendered).not.toContain('Build KTX context next.');
});
});

View file

@ -58,12 +58,9 @@ function commandLines(commands: ReadonlyArray<{ command: string; description: st
export function formatNextStepLines(indent = ' '): string[] {
return [
`${indent}KTX context is ready for agents.`,
`${indent}Preferred route: CLI + Skills; installed rules call the pinned local CLI directly, so no MCP server is required.`,
`${indent}Direct CLI checks:`,
`${indent}KTX context is ready for agents. Open your coding agent in this directory and ask a data question.`,
`${indent}Verify with:`,
...commandLines(KTX_NEXT_STEP_DIRECT_COMMANDS, indent),
`${indent}Optional MCP:`,
...commandLines(KTX_NEXT_STEP_MCP_COMMANDS, indent),
];
}

View file

@ -239,11 +239,11 @@ describe('setup agents', () => {
const output = io.stdout();
expect(output).toContain('Agent integration complete');
expect(output).toContain('Claude Code');
expect(output).toContain('+ Skill installed');
expect(output).toContain('+ Skill installed — teaches your agent which KTX commands to run');
expect(output).toContain('.claude/skills/ktx/SKILL.md');
expect(output).toContain('+ Rule installed');
expect(output).toContain('+ Rule installed — tells your agent when to use KTX');
expect(output).toContain('.claude/rules/ktx.md');
expect(output).toContain('+ MCP config added');
expect(output).toContain('+ MCP config added — lets your agent talk to KTX over MCP');
expect(output).toContain('.mcp.json');
});
@ -258,9 +258,9 @@ describe('setup agents', () => {
);
expect(summary).toContain('Cursor');
expect(summary).toContain('+ Rule installed');
expect(summary).toContain('+ Rule installed — tells your agent when to use KTX');
expect(summary).toContain('.cursor/rules/ktx.mdc');
expect(summary).toContain('+ MCP config added');
expect(summary).toContain('+ MCP config added — lets your agent talk to KTX over MCP');
expect(summary).toContain('.cursor/mcp.json');
expect(summary).not.toContain(tempDir);
});
@ -280,9 +280,9 @@ describe('setup agents', () => {
);
expect(summary).toContain('Claude Code');
expect(summary).toContain('+ Skill installed');
expect(summary).toContain('+ Rule installed');
expect(summary).toContain('+ Skill installed — teaches your agent which KTX commands to run');
expect(summary).toContain('+ Rule installed — tells your agent when to use KTX');
expect(summary).toContain('Codex');
expect(summary).toContain('+ MCP config added');
expect(summary).toContain('+ MCP config added — lets your agent talk to KTX over MCP');
});
});

View file

@ -348,6 +348,12 @@ export function formatInstallSummary(
idx += planned.length;
}
const fileHints: Record<string, string> = {
skill: 'teaches your agent which KTX commands to run',
rule: 'tells your agent when to use KTX',
mcp: 'lets your agent talk to KTX over MCP',
};
const lines: string[] = [];
for (const install of installs) {
const targetEntries = entriesByTarget.get(install.target) ?? [];
@ -356,11 +362,13 @@ export function formatInstallSummary(
const displayPath =
install.scope === 'global' ? entry.path : relative(projectDir, entry.path);
if (entry.kind === 'file') {
const label = entry.role === 'rule' ? 'Rule installed' : fileEntryLabels[install.target];
lines.push(` + ${label}`);
const isRule = entry.role === 'rule' || fileEntryLabels[install.target] === 'Rule installed';
const label = isRule ? 'Rule installed' : fileEntryLabels[install.target];
const hint = fileHints[isRule ? 'rule' : (entry.role ?? 'skill')] ?? '';
lines.push(` + ${label}${hint}`);
lines.push(` ${displayPath}`);
} else {
lines.push(` + MCP config added`);
lines.push(` + MCP config added${fileHints.mcp}`);
lines.push(` ${displayPath}`);
}
}

View file

@ -215,6 +215,47 @@ describe('setup context build state', () => {
expect(io.stdout()).toContain('KTX context is ready for agents.');
});
it('records only failed sources as retryable when the context build fails', async () => {
await writeReadyProject(tempDir);
const io = makeIo();
const runContextBuildMock = vi.fn(async (_project, _args, _io, hooks) => {
hooks.onSourceProgress?.([
{ connectionId: 'warehouse', operation: 'scan', status: 'done', elapsedMs: 1000 },
{ connectionId: 'docs', operation: 'source-ingest', status: 'failed', elapsedMs: 2000 },
]);
return {
exitCode: 1,
detached: false,
reportIds: ['report-docs-failed'],
artifactPaths: ['raw-sources/docs/notion/sync-1/ingest-report.json'],
};
});
await expect(
runKtxSetupContextStep(
{ projectDir: tempDir, inputMode: 'disabled' },
io.io,
{
runIdFactory: () => 'setup-context-local-failed',
now: () => new Date('2026-05-09T10:00:00.000Z'),
runContextBuild: runContextBuildMock,
},
),
).resolves.toEqual({ status: 'failed', projectDir: tempDir });
await expect(readKtxSetupContextState(tempDir)).resolves.toMatchObject({
runId: 'setup-context-local-failed',
status: 'failed',
reportIds: ['report-docs-failed'],
artifactPaths: ['raw-sources/docs/notion/sync-1/ingest-report.json'],
retryableFailedTargets: ['docs'],
sourceProgress: [
{ connectionId: 'warehouse', operation: 'scan', status: 'done', elapsedMs: 1000 },
{ connectionId: 'docs', operation: 'source-ingest', status: 'failed', elapsedMs: 2000 },
],
});
});
it('marks context complete without prompting when initial source ingest already made agent context', async () => {
await writeReadyProject(tempDir);
await mkdir(join(tempDir, 'semantic-layer', 'dbt-main'), { recursive: true });

View file

@ -234,6 +234,24 @@ function normalizeSourceProgress(value: unknown): ContextBuildSourceProgressUpda
return entries.length > 0 ? entries : undefined;
}
function setupContextTargetIds(targets: KtxSetupContextTargets): string[] {
return [...new Set([...targets.primarySourceConnectionIds, ...targets.contextSourceConnectionIds])];
}
function retryableFailedTargetsFromProgress(
targets: KtxSetupContextTargets,
progress: ContextBuildSourceProgressUpdate[] | undefined,
): string[] {
const targetIds = setupContextTargetIds(targets);
if (!progress || progress.length === 0) {
return targetIds;
}
const failedIds = new Set(progress.filter((source) => source.status === 'failed').map((source) => source.connectionId));
const failedTargets = targetIds.filter((connectionId) => failedIds.has(connectionId));
return failedTargets.length > 0 ? failedTargets : targetIds;
}
export async function readKtxSetupContextState(projectDir: string): Promise<KtxSetupContextState> {
const filePath = statePath(projectDir);
if (!(await pathExists(filePath))) {
@ -614,7 +632,7 @@ async function runBuild(
updatedAt,
reportIds: completedReportIds,
artifactPaths: completedArtifactPaths,
retryableFailedTargets: [...targets.primarySourceConnectionIds, ...targets.contextSourceConnectionIds],
retryableFailedTargets: retryableFailedTargetsFromProgress(targets, lastSourceProgress),
failureReason: 'Context build failed.',
...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}),
});

View file

@ -532,8 +532,8 @@ describe('setup databases step', () => {
expect(prompts.select).toHaveBeenCalledWith({
message: 'Primary sources already configured: warehouse\nWhat would you like to do?',
options: [
{ value: 'continue', label: 'Continue to knowledge sources' },
{ value: 'add', label: 'Add another primary source' },
{ value: 'continue', label: 'Continue setup' },
{ value: 'back', label: 'Back' },
],
});
@ -583,8 +583,8 @@ describe('setup databases step', () => {
expect(prompts.select).toHaveBeenCalledWith({
message: 'Primary sources already configured: warehouse\nWhat would you like to do?',
options: [
{ value: 'continue', label: 'Continue to knowledge sources' },
{ value: 'add', label: 'Add another primary source' },
{ value: 'continue', label: 'Continue setup' },
{ value: 'back', label: 'Back' },
],
});
@ -618,8 +618,8 @@ describe('setup databases step', () => {
expect(prompts.select).toHaveBeenCalledWith({
message: 'Primary sources already configured: postgres-warehouse\nWhat would you like to do?',
options: [
{ value: 'continue', label: 'Continue to knowledge sources' },
{ value: 'add', label: 'Add another primary source' },
{ value: 'continue', label: 'Continue setup' },
{ value: 'back', label: 'Back' },
],
});
@ -653,8 +653,8 @@ describe('setup databases step', () => {
expect(prompts.select).toHaveBeenNthCalledWith(2, {
message: 'Primary sources already configured: postgres-warehouse\nWhat would you like to do?',
options: [
{ value: 'continue', label: 'Continue to knowledge sources' },
{ value: 'add', label: 'Add another primary source' },
{ value: 'continue', label: 'Continue setup' },
{ value: 'back', label: 'Back' },
],
});
@ -696,8 +696,8 @@ describe('setup databases step', () => {
expect(prompts.select).toHaveBeenNthCalledWith(2, {
message: 'Primary sources already configured: warehouse\nWhat would you like to do?',
options: [
{ value: 'continue', label: 'Continue to knowledge sources' },
{ value: 'add', label: 'Add another primary source' },
{ value: 'continue', label: 'Continue setup' },
{ value: 'back', label: 'Back' },
],
});
@ -920,8 +920,18 @@ describe('setup databases step', () => {
'│ ✓ Connection test passed',
'│ Driver: PostgreSQL · Tables: 2',
'│',
].join('\n'),
);
expect(io.stdout()).toContain(
[
'◇ Scanning postgres-warehouse',
'│ ✓ Structural scan completed',
'│ Running structural scan…',
'│',
].join('\n'),
);
expect(io.stdout()).toContain(
[
'◇ Scan complete for postgres-warehouse',
'│ Changes: 2 new tables',
'│ Report: raw-sources/postgres-warehouse/live-database/.../scan-report.json',
'│',
@ -1009,7 +1019,7 @@ describe('setup databases step', () => {
expect(config.connections['postgres-warehouse']).toMatchObject({
schemas: ['orbit_analytics', 'orbit_raw'],
});
expect(io.stdout()).toContain('Schemas: orbit_analytics, orbit_raw');
expect(io.stdout()).toContain(' orbit_analytics, orbit_raw');
});
it('auto-selects all discovered Postgres schemas in non-interactive setup', async () => {
@ -1045,7 +1055,7 @@ describe('setup databases step', () => {
expect(config.connections.warehouse).toMatchObject({
schemas: ['orbit_analytics', 'orbit_raw', 'public'],
});
expect(io.stdout()).toContain('Schemas: orbit_analytics, orbit_raw, public');
expect(io.stdout()).toContain(' orbit_analytics, orbit_raw, public');
});
it('adds one non-interactive Postgres URL connection, tests it, scans it, and marks databases complete', async () => {

View file

@ -115,6 +115,56 @@ const DEFAULT_CONNECTION_IDS: Record<KtxSetupDatabaseDriver, string> = {
snowflake: 'snowflake-warehouse',
};
interface ScopeDiscoverySpec {
noun: string;
nounPlural: string;
promptLabel: string;
configArrayField: string;
configSingleField: string;
defaultSelection: (values: string[]) => string[];
}
const SCOPE_DISCOVERY_SPECS: Partial<Record<KtxSetupDatabaseDriver, ScopeDiscoverySpec>> = {
postgres: {
noun: 'schema',
nounPlural: 'schemas',
promptLabel: 'PostgreSQL schemas',
configArrayField: 'schemas',
configSingleField: 'schema',
defaultSelection(schemas) {
const nonPublic = schemas.filter((s) => s !== 'public');
return nonPublic.length > 0 ? nonPublic : schemas;
},
},
sqlserver: {
noun: 'schema',
nounPlural: 'schemas',
promptLabel: 'SQL Server schemas',
configArrayField: 'schemas',
configSingleField: 'schema',
defaultSelection: (schemas) => schemas,
},
bigquery: {
noun: 'dataset',
nounPlural: 'datasets',
promptLabel: 'BigQuery datasets',
configArrayField: 'dataset_ids',
configSingleField: 'dataset_id',
defaultSelection: (datasets) => datasets,
},
snowflake: {
noun: 'schema',
nounPlural: 'schemas',
promptLabel: 'Snowflake schemas',
configArrayField: 'schema_names',
configSingleField: 'schema_name',
defaultSelection(schemas) {
const nonPublic = schemas.filter((s) => s !== 'PUBLIC');
return nonPublic.length > 0 ? nonPublic : schemas;
},
},
};
type UrlDriverType = Extract<KtxSetupDatabaseDriver, 'postgres' | 'mysql' | 'clickhouse' | 'sqlserver'>;
const DRIVER_CONNECTION_DEFAULTS: Record<UrlDriverType, { port: string }> = {
@ -263,16 +313,53 @@ async function defaultHistoricSqlProbe(input: KtxSetupHistoricSqlProbeInput): Pr
async function defaultListSchemas(projectDir: string, connectionId: string): Promise<string[]> {
const project = await loadKtxProject({ projectDir });
const connection = project.config.connections[connectionId];
const { KtxPostgresScanConnector, isKtxPostgresConnectionConfig } = await import('@ktx/connector-postgres');
if (!isKtxPostgresConnectionConfig(connection)) {
return [];
const driver = normalizeDriver(connection?.driver);
if (driver === 'postgres') {
const { KtxPostgresScanConnector, isKtxPostgresConnectionConfig } = await import('@ktx/connector-postgres');
if (!isKtxPostgresConnectionConfig(connection)) return [];
const connector = new KtxPostgresScanConnector({ connectionId, connection });
try {
return await connector.listSchemas();
} finally {
await connector.cleanup();
}
}
const connector = new KtxPostgresScanConnector({ connectionId, connection });
try {
return await connector.listSchemas();
} finally {
await connector.cleanup();
if (driver === 'sqlserver') {
const { KtxSqlServerScanConnector, isKtxSqlServerConnectionConfig } = await import('@ktx/connector-sqlserver');
if (!isKtxSqlServerConnectionConfig(connection)) return [];
const connector = new KtxSqlServerScanConnector({ connectionId, connection });
try {
return await connector.listSchemas();
} finally {
await connector.cleanup();
}
}
if (driver === 'bigquery') {
const { KtxBigQueryScanConnector, isKtxBigQueryConnectionConfig } = await import('@ktx/connector-bigquery');
if (!isKtxBigQueryConnectionConfig(connection)) return [];
const connector = new KtxBigQueryScanConnector({ connectionId, connection });
try {
return await connector.listDatasets();
} finally {
await connector.cleanup();
}
}
if (driver === 'snowflake') {
const { KtxSnowflakeScanConnector, isKtxSnowflakeConnectionConfig } = await import('@ktx/connector-snowflake');
if (!isKtxSnowflakeConnectionConfig(connection)) return [];
const connector = new KtxSnowflakeScanConnector({ connectionId, connection });
try {
return await connector.listSchemas();
} finally {
await connector.cleanup();
}
}
return [];
}
function existingConnectionIdsByDriver(
@ -309,8 +396,8 @@ function configuredPrimarySourcesPrompt(connectionIds: string[]): {
return {
message: `Primary sources already configured: ${connectionIds.join(', ')}\nWhat would you like to do?`,
options: [
{ value: 'continue', label: 'Continue to knowledge sources' },
{ value: 'add', label: 'Add another primary source' },
{ value: 'continue', label: 'Continue setup' },
{ value: 'back', label: 'Back' },
],
};
@ -849,41 +936,44 @@ async function writeConnectionConfig(input: {
}
}
function configuredSchemas(connection: KtxProjectConnectionConfig | undefined): string[] {
function configuredScopeValues(
connection: KtxProjectConnectionConfig | undefined,
spec: ScopeDiscoverySpec,
): string[] {
if (!connection) return [];
if (Array.isArray(connection.schemas)) {
return connection.schemas
.filter((schema): schema is string => typeof schema === 'string' && schema.trim().length > 0)
.map((schema) => schema.trim());
const arrayVal = connection[spec.configArrayField];
if (Array.isArray(arrayVal)) {
return arrayVal
.filter((v): v is string => typeof v === 'string' && v.trim().length > 0)
.map((v) => v.trim());
}
return typeof connection.schema === 'string' && connection.schema.trim().length > 0 ? [connection.schema.trim()] : [];
const singleVal = connection[spec.configSingleField];
return typeof singleVal === 'string' && singleVal.trim().length > 0 ? [singleVal.trim()] : [];
}
function defaultSchemaSelection(schemas: string[]): string[] {
const nonPublic = schemas.filter((schema) => schema !== 'public');
return nonPublic.length > 0 ? nonPublic : schemas;
}
async function writeConnectionSchemas(input: {
async function writeScopeConfig(input: {
projectDir: string;
connectionId: string;
schemas: string[];
values: string[];
spec: ScopeDiscoverySpec;
}): Promise<void> {
const project = await loadKtxProject({ projectDir: input.projectDir });
const connection = project.config.connections[input.connectionId];
if (!connection) return;
const { schema: _schema, ...connectionWithoutLegacySchema } = connection;
const cleaned = Object.fromEntries(
Object.entries(connection).filter(([key]) => key !== input.spec.configSingleField),
) as KtxProjectConnectionConfig;
await writeConnectionConfig({
projectDir: input.projectDir,
connectionId: input.connectionId,
connection: {
...connectionWithoutLegacySchema,
schemas: unique(input.schemas),
...cleaned,
[input.spec.configArrayField]: unique(input.values),
},
});
}
async function maybeConfigurePostgresSchemas(input: {
async function maybeConfigureSchemaScope(input: {
projectDir: string;
connectionId: string;
args: KtxSetupDatabasesArgs;
@ -893,65 +983,78 @@ async function maybeConfigurePostgresSchemas(input: {
}): Promise<boolean> {
const project = await loadKtxProject({ projectDir: input.projectDir });
const connection = project.config.connections[input.connectionId];
if (normalizeDriver(connection?.driver) !== 'postgres') {
return true;
}
const driver = normalizeDriver(connection?.driver);
if (!driver) return true;
if (configuredSchemas(connection).length > 0) {
const spec = SCOPE_DISCOVERY_SPECS[driver];
if (!spec) return true;
const arrayVal = connection?.[spec.configArrayField];
if (Array.isArray(arrayVal) && arrayVal.length > 0) {
return true;
}
if (input.args.databaseSchemas.length > 0) {
await writeConnectionSchemas({
await writeScopeConfig({
projectDir: input.projectDir,
connectionId: input.connectionId,
schemas: input.args.databaseSchemas,
values: input.args.databaseSchemas,
spec,
});
return true;
}
let discoveredSchemas: string[];
writeSetupSection(input.io, `Discovering ${spec.promptLabel.toLowerCase()}`, [
`Connecting to ${input.connectionId}`,
]);
let discovered: string[];
try {
discoveredSchemas = unique(
discovered = unique(
await (input.deps.listSchemas ?? defaultListSchemas)(input.projectDir, input.connectionId),
);
} catch (error) {
input.io.stderr.write(
`Could not discover PostgreSQL schemas for ${input.connectionId}; continuing with existing schema scope. ` +
`Could not discover ${spec.promptLabel.toLowerCase()} for ${input.connectionId}; continuing with existing ${spec.noun} scope. ` +
`Pass --database-schema to set it explicitly. ${error instanceof Error ? error.message : String(error)}\n`,
);
return true;
}
if (discoveredSchemas.length === 0) {
if (discovered.length === 0) {
return true;
}
let selectedSchemas: string[];
if (input.args.inputMode === 'disabled' || discoveredSchemas.length === 1) {
selectedSchemas = discoveredSchemas;
let selected: string[];
if (input.args.inputMode === 'disabled' || discovered.length === 1) {
const preconfigured = configuredScopeValues(connection, spec).filter((v) => discovered.includes(v));
selected = preconfigured.length > 0 ? preconfigured : discovered;
} else {
const initialValues = defaultSchemaSelection(discoveredSchemas);
const preconfigured = configuredScopeValues(connection, spec).filter((v) => discovered.includes(v));
const initialValues = preconfigured.length > 0 ? preconfigured : spec.defaultSelection(discovered);
const choices = await input.prompts.multiselect({
message: withMultiselectNavigation(
'PostgreSQL schemas to scan\nKTX found multiple non-system schemas. Select every schema agents should use.',
`${spec.promptLabel} to scan\n` +
`KTX found multiple ${spec.nounPlural}. Select every ${spec.noun} agents should use.`,
),
options: discoveredSchemas.map((schema) => ({ value: schema, label: schema })),
options: discovered.map((v) => ({ value: v, label: v })),
initialValues,
required: true,
});
if (choices.includes('back')) {
return false;
}
selectedSchemas = choices.length > 0 ? choices : initialValues;
selected = choices.length > 0 ? choices : initialValues;
}
await writeConnectionSchemas({
await writeScopeConfig({
projectDir: input.projectDir,
connectionId: input.connectionId,
schemas: selectedSchemas,
values: selected,
spec,
});
writeSetupSection(input.io, `Selecting schemas for ${input.connectionId}`, [
`Schemas: ${selectedSchemas.join(', ')}`,
const capitalNounPlural = spec.nounPlural[0]!.toUpperCase() + spec.nounPlural.slice(1);
writeSetupSection(input.io, `${capitalNounPlural} saved for ${input.connectionId}`, [
`${selected.join(', ')}`,
]);
return true;
}
@ -1081,7 +1184,7 @@ async function validateAndScanConnection(input: {
testLines.push(`Driver: ${driverDisplay}${Number.isFinite(tableCount) ? ` · Tables: ${tableCount}` : ''}`);
writeSetupSection(input.io, `Testing ${input.connectionId}`, testLines);
if (!(await maybeConfigurePostgresSchemas(input))) {
if (!(await maybeConfigureSchemaScope(input))) {
return false;
}
@ -1091,6 +1194,9 @@ async function validateAndScanConnection(input: {
io: input.io,
deps: input.deps,
});
writeSetupSection(input.io, `Scanning ${input.connectionId}`, [
'Running structural scan…',
]);
const scanIo = createBufferedCommandIo();
const scanCode = await scanConnection(input.projectDir, input.connectionId, scanIo);
if (scanCode !== 0) {
@ -1103,9 +1209,8 @@ async function validateAndScanConnection(input: {
const reportPath = readOutputValue(scanOutput, 'Report');
writeSetupSection(
input.io,
`Scanning ${input.connectionId}`,
`Scan complete for ${input.connectionId}`,
[
'✓ Structural scan completed',
`Changes: ${summarizeScanChanges(scanOutput)}`,
...(reportPath ? [`Report: ${shortenScanReportPath(reportPath)}`] : []),
],

View file

@ -52,6 +52,7 @@ function createTargetState(target: KtxPublicIngestPlanTarget): ContextBuildTarge
status: 'queued',
detailLine: null,
summaryText: null,
failureText: null,
startedAt: null,
elapsedMs: 0,
};

View file

@ -211,6 +211,66 @@ describe('setup sources step', () => {
expect(runMapping).toHaveBeenCalledWith(projectDir, 'prod_metabase', io.io);
});
it('writes Notion config with the full default knowledge create budget', async () => {
await addPrimarySource();
const validateNotion = vi.fn(async () => ({ ok: true as const, detail: 'roots=1' }));
await expect(
runKtxSetupSourcesStep(
{
projectDir,
inputMode: 'disabled',
source: 'notion',
sourceConnectionId: 'notion-main',
sourceApiKeyRef: 'env:NOTION_TOKEN',
notionCrawlMode: 'selected_roots',
notionRootPageIds: ['page-1'],
runInitialSourceIngest: false,
skipSources: false,
},
makeIo().io,
{ validateNotion },
),
).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['notion-main'] });
expect((await readConfig()).connections['notion-main']).toMatchObject({
driver: 'notion',
auth_token_ref: 'env:NOTION_TOKEN',
root_page_ids: ['page-1'],
max_knowledge_creates_per_run: 25,
max_knowledge_updates_per_run: 20,
});
});
it('uses selected Notion roots when root page ids are provided even if crawl mode says all accessible', async () => {
await addPrimarySource();
const validateNotion = vi.fn(async () => ({ ok: true as const, detail: 'roots=1' }));
await expect(
runKtxSetupSourcesStep(
{
projectDir,
inputMode: 'disabled',
source: 'notion',
sourceConnectionId: 'notion-main',
sourceApiKeyRef: 'env:NOTION_TOKEN',
notionCrawlMode: 'all_accessible',
notionRootPageIds: ['page-1'],
runInitialSourceIngest: false,
skipSources: false,
},
makeIo().io,
{ validateNotion },
),
).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['notion-main'] });
expect((await readConfig()).connections['notion-main']).toMatchObject({
driver: 'notion',
root_page_ids: ['page-1'],
crawl_mode: 'selected_roots',
});
});
it('defaults interactive Metabase and Looker source setup to the only warehouse connection', async () => {
await addPrimarySource();
const cases: Array<{

View file

@ -36,6 +36,8 @@ import { writeProjectLocalSecretReference } from './setup-secrets.js';
export type KtxSetupSourceType = 'dbt' | 'metricflow' | 'metabase' | 'looker' | 'lookml' | 'notion';
const DEFAULT_NOTION_MAX_KNOWLEDGE_CREATES_PER_RUN = 25;
export interface KtxSetupSourcesArgs {
projectDir: string;
inputMode: 'auto' | 'disabled';
@ -508,8 +510,8 @@ function buildLookmlConnection(args: KtxSetupSourcesArgs): KtxProjectConnectionC
}
function buildNotionConnection(args: KtxSetupSourcesArgs): KtxProjectConnectionConfig {
const crawlMode = args.notionCrawlMode ?? 'selected_roots';
const rootPageIds = args.notionRootPageIds ?? [];
const crawlMode = rootPageIds.length > 0 ? 'selected_roots' : (args.notionCrawlMode ?? 'selected_roots');
if (crawlMode === 'selected_roots' && rootPageIds.length === 0) {
throw new Error('Notion selected_roots requires --notion-root-page-id.');
}
@ -521,7 +523,7 @@ function buildNotionConnection(args: KtxSetupSourcesArgs): KtxProjectConnectionC
root_database_ids: [],
root_data_source_ids: [],
max_pages_per_run: 1000,
max_knowledge_creates_per_run: 5,
max_knowledge_creates_per_run: DEFAULT_NOTION_MAX_KNOWLEDGE_CREATES_PER_RUN,
max_knowledge_updates_per_run: 20,
last_successful_cursor: null,
};
@ -1185,10 +1187,10 @@ async function promptForInteractiveSource(
},
async (currentState) => {
const crawlMode = await prompts.select({
message: 'Notion crawl mode',
message: 'Which Notion pages should KTX ingest?',
options: [
{ value: 'selected_roots', label: 'Selected roots' },
{ value: 'all_accessible', label: 'All accessible pages' },
{ value: 'selected_roots', label: 'Specific pages and their subpages (you\'ll paste page IDs)' },
{ value: 'all_accessible', label: 'All pages the integration can access' },
{ value: 'back', label: 'Back' },
],
});
@ -1203,8 +1205,8 @@ async function promptForInteractiveSource(
? [
async (currentState: SourcePromptState) => {
const roots = await promptText(prompts, {
message: 'Notion root page ids',
placeholder: 'comma-separated ids',
message: 'Notion page IDs to ingest (each page includes all its subpages)',
placeholder: 'page-id-1, page-id-2',
});
if (roots === undefined) return 'back';
currentState.notionRootPageIds = roots

View file

@ -1,6 +1,8 @@
import { execFile } from 'node:child_process';
import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { promisify } from 'node:util';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { localFakeBundleReport, persistLocalBundleReport } from './ingest.test-utils.js';
@ -12,6 +14,8 @@ vi.mock('./setup-demo-tour.js', () => ({
runDemoTour: vi.fn(async () => 0),
}));
const execFileAsync = promisify(execFile);
function makeIo() {
let stdout = '';
let stderr = '';
@ -1453,6 +1457,60 @@ describe('setup status', () => {
expect(calls).toEqual(['model', 'embeddings', 'databases', 'sources', 'context', 'agents']);
});
it('commits setup config changes written by later setup steps', async () => {
const io = makeIo();
await expect(
runKtxSetup(
{
command: 'run',
projectDir: tempDir,
mode: 'new',
agents: false,
inputMode: 'disabled',
yes: true,
cliVersion: '0.2.0',
skipLlm: true,
skipEmbeddings: true,
skipDatabases: true,
skipSources: true,
skipAgents: false,
databaseSchemas: [],
},
io.io,
{
model: async () => ({ status: 'skipped', projectDir: tempDir }),
embeddings: async () => ({ status: 'skipped', projectDir: tempDir }),
databases: async () => {
const configPath = join(tempDir, 'ktx.yaml');
const current = await readFile(configPath, 'utf-8');
await writeFile(
configPath,
current.replace(
'connections: {}',
['connections:', ' warehouse:', ' driver: postgres', ' url: env:DATABASE_URL'].join('\n'),
),
'utf-8',
);
return { status: 'skipped', projectDir: tempDir };
},
sources: async () => ({ status: 'skipped', projectDir: tempDir }),
context: async () => ({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-test' }),
agents: async () => ({
status: 'ready',
projectDir: tempDir,
installs: [{ target: 'codex', scope: 'project', mode: 'cli' }],
}),
},
),
).resolves.toBe(0);
const { stdout } = await execFileAsync('git', ['-C', tempDir, 'status', '--short', '--', 'ktx.yaml']);
expect(stdout).toBe('');
const committedConfig = await execFileAsync('git', ['-C', tempDir, 'show', 'HEAD:ktx.yaml']);
expect(committedConfig.stdout).toContain('warehouse:');
});
it('runs agent setup after context succeeds in --agents mode', async () => {
const calls: string[] = [];
const io = makeIo();

View file

@ -433,6 +433,11 @@ function setupRuntimeInstallPolicy(args: Extract<KtxSetupArgs, { command: 'run'
return args.inputMode === 'disabled' ? 'never' : 'prompt';
}
async function commitSetupConfigChanges(projectDir: string): Promise<void> {
const project = await loadKtxProject({ projectDir });
await project.git.commitFile('ktx.yaml', 'setup: update KTX project config', 'ktx setup', 'setup@ktx.local');
}
export async function runKtxSetup(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetupDeps = {}): Promise<number> {
try {
return await runKtxSetupInner(args, io, deps);
@ -773,6 +778,8 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
break;
}
await commitSetupConfigChanges(projectResult.projectDir);
const status = await readKtxSetupStatus(projectResult.projectDir);
io.stdout.write(formatKtxSetupStatus(status));
io.stdout.write('\nWhat you can do next:\n');

View file

@ -84,6 +84,56 @@ describe('runKtxSl', () => {
expect(listIo.stdout()).toContain('warehouse\torders\tcolumns=1\tmeasures=0\tjoins=0');
});
it('fails validation when a table-backed source declares columns absent from a matching warehouse manifest', async () => {
const projectDir = join(tempDir, 'project');
const project = await initKtxProject({ projectDir, projectName: 'warehouse' });
await project.fileStore.writeFile(
'semantic-layer/postgres-warehouse/_schema/orbit_analytics.yaml',
`tables:
int_active_contract_arr:
table: orbit_analytics.int_active_contract_arr
columns:
- { name: contract_id, type: string }
- { name: contract_arr_cents, type: number }
`,
'ktx',
'ktx@example.com',
'Add warehouse manifest',
);
await project.fileStore.writeFile(
'semantic-layer/dbt-main/int_active_contract_arr.yaml',
`name: int_active_contract_arr
table: orbit_analytics.int_active_contract_arr
grain: [contract_id]
columns:
- { name: contract_id, type: string }
- { name: arr_cents, type: number }
measures:
- { name: arr, expr: sum(arr_cents) }
joins: []
`,
'ktx',
'ktx@example.com',
'Add invalid dbt source',
);
const validateIo = makeIo();
await expect(
runKtxSl(
{
command: 'validate',
projectDir,
connectionId: 'dbt-main',
sourceName: 'int_active_contract_arr',
},
validateIo.io,
),
).resolves.toBe(1);
expect(validateIo.stderr()).toContain('arr_cents');
expect(validateIo.stderr()).toContain('absent from physical table');
});
it('runs sl query and prints SQL output', async () => {
const projectDir = join(tempDir, 'project');
const project = await initKtxProject({ projectDir, projectName: 'warehouse' });

View file

@ -97,7 +97,7 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx
if (!source) {
throw new Error(`Semantic-layer source "${args.connectionId}/${args.sourceName}" was not found`);
}
const result = await validateLocalSlSource(source.yaml);
const result = await validateLocalSlSource(source.yaml, { project, connectionId: args.connectionId });
if (!result.valid) {
for (const error of result.errors) {
io.stderr.write(`${error}\n`);

View file

@ -226,7 +226,7 @@ describe('standalone built ktx CLI smoke', () => {
expect(result.stdout).toContain('Notion:');
expect(result.stdout).toContain('Semantic-layer sources:');
expect(result.stdout).toContain('Knowledge pages:');
expect(result.stdout).toContain('ktx serve --mcp stdio');
expect(result.stdout).not.toContain('ktx serve --mcp stdio');
expect(result.stdout).not.toContain(['--mode', 'deterministic'].join(' '));
});
@ -341,7 +341,7 @@ describe('standalone built ktx CLI smoke', () => {
expect(inspect.stdout).toContain('ktx agent tools --json');
expect(inspect.stdout).toContain('ktx agent context --json');
expect(inspect.stdout).not.toContain('ktx ask "your question here"');
expect(inspect.stdout).toContain('ktx serve --mcp stdio');
expect(inspect.stdout).not.toContain('ktx serve --mcp stdio');
});
it('serves seeded demo wiki and semantic-layer context over stdio MCP', async () => {