mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-13 08:15:14 +02:00
Fix historic SQL ingest setup and progress
This commit is contained in:
parent
f3f6b36551
commit
1bd29c7eb1
14 changed files with 877 additions and 34 deletions
|
|
@ -92,7 +92,7 @@ export function registerIngestCommands(
|
|||
sourceDir: options.sourceDir ? resolve(options.sourceDir) : undefined,
|
||||
databaseIntrospectionUrl: options.databaseIntrospectionUrl || undefined,
|
||||
cliVersion: context.packageInfo.version,
|
||||
runtimeInstallPolicy: runtimeInstallPolicyFromFlags(options),
|
||||
runtimeInstallPolicy: runtimeInstallPolicyFromFlags({ yes: options.yes }),
|
||||
...(options.debugLlmRequestFile ? { debugLlmRequestFile: resolve(options.debugLlmRequestFile) } : {}),
|
||||
outputMode: outputMode(options),
|
||||
...inputMode(options),
|
||||
|
|
|
|||
|
|
@ -920,7 +920,7 @@ describe('runKtxCli', () => {
|
|||
sourceDir: tempDir,
|
||||
databaseIntrospectionUrl: undefined,
|
||||
cliVersion: '0.0.0-private',
|
||||
runtimeInstallPolicy: 'never',
|
||||
runtimeInstallPolicy: 'prompt',
|
||||
debugLlmRequestFile: `${tempDir}/debug.jsonl`,
|
||||
outputMode: 'json',
|
||||
inputMode: 'disabled',
|
||||
|
|
@ -934,9 +934,9 @@ describe('runKtxCli', () => {
|
|||
expect(ingestReplayHelpIo.stderr()).toBe('');
|
||||
});
|
||||
|
||||
it('routes ingest managed runtime install policies', async () => {
|
||||
it('routes ingest managed runtime install policy separately from visualization input mode', async () => {
|
||||
const autoIo = makeIo();
|
||||
const conflictIo = makeIo();
|
||||
const nonInteractiveIo = makeIo();
|
||||
const ingest = vi.fn(async () => 0);
|
||||
|
||||
await expect(
|
||||
|
|
@ -972,10 +972,10 @@ describe('runKtxCli', () => {
|
|||
'--yes',
|
||||
'--no-input',
|
||||
],
|
||||
conflictIo.io,
|
||||
nonInteractiveIo.io,
|
||||
{ ingest },
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(ingest).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
|
|
@ -985,7 +985,16 @@ describe('runKtxCli', () => {
|
|||
}),
|
||||
autoIo.io,
|
||||
);
|
||||
expect(conflictIo.stderr()).toContain('Choose only one runtime install mode: --yes or --no-input');
|
||||
expect(ingest).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
command: 'run',
|
||||
cliVersion: '0.0.0-private',
|
||||
runtimeInstallPolicy: 'auto',
|
||||
inputMode: 'disabled',
|
||||
}),
|
||||
nonInteractiveIo.io,
|
||||
);
|
||||
expect(nonInteractiveIo.stderr()).toBe('');
|
||||
});
|
||||
|
||||
it('dispatches public connection through the existing connection implementation', async () => {
|
||||
|
|
|
|||
|
|
@ -304,7 +304,7 @@ describe('runKtxIngest viz and replay', () => {
|
|||
expect(io.stdout()).toContain('KTX memory flow warehouse/fake done');
|
||||
});
|
||||
|
||||
it('does not attach a live memory-flow sink for plain run output', async () => {
|
||||
it('attaches a plain progress memory-flow sink for interactive plain run output', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await writeWarehouseConfig(projectDir);
|
||||
const sourceDir = join(tempDir, 'source');
|
||||
|
|
@ -329,7 +329,8 @@ describe('runKtxIngest viz and replay', () => {
|
|||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() }));
|
||||
expect(runLocal).toHaveBeenCalledWith(expect.objectContaining({ memoryFlow: expect.anything() }));
|
||||
expect(io.stdout()).toContain('[5%] Fetching source files for warehouse/fake');
|
||||
expect(io.stdout()).toContain('Job: plain-run');
|
||||
expect(io.stdout()).not.toContain('KTX memory flow');
|
||||
});
|
||||
|
|
@ -403,7 +404,8 @@ describe('runKtxIngest viz and replay', () => {
|
|||
).resolves.toBe(0);
|
||||
|
||||
expect(startLiveMemoryFlow).not.toHaveBeenCalled();
|
||||
expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() }));
|
||||
expect(runLocal).toHaveBeenCalledWith(expect.objectContaining({ memoryFlow: expect.anything() }));
|
||||
expect(io.stdout()).toContain('[5%] Fetching source files for warehouse/fake');
|
||||
expect(io.stdout()).toContain('Job: raw-missing-viz-run');
|
||||
expect(io.stdout()).not.toContain('KTX memory flow');
|
||||
expect(io.stderr()).toContain(
|
||||
|
|
|
|||
|
|
@ -762,6 +762,103 @@ describe('runKtxIngest', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('prints live progress for plain local ingest in interactive terminals', async () => {
|
||||
const projectDir = join(tempDir, 'historic-sql-progress-project');
|
||||
await mkdir(projectDir, { recursive: true });
|
||||
await writeFile(
|
||||
join(projectDir, 'ktx.yaml'),
|
||||
[
|
||||
'project: historic-sql-progress-project',
|
||||
'connections:',
|
||||
' warehouse:',
|
||||
' driver: postgres',
|
||||
' url: env:WAREHOUSE_DATABASE_URL',
|
||||
' historicSql:',
|
||||
' enabled: true',
|
||||
' dialect: postgres',
|
||||
' minExecutions: 2',
|
||||
'ingest:',
|
||||
' adapters:',
|
||||
' - historic-sql',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
const createdAdapters: SourceAdapter[] = [
|
||||
{ source: 'historic-sql', skillNames: [], detect: async () => true, chunk: async () => ({ workUnits: [] }) },
|
||||
];
|
||||
const createAdapters = vi.fn(() => createdAdapters as never);
|
||||
const runLocal = vi.fn(async (input: RunLocalIngestOptions) => {
|
||||
expect(input.memoryFlow).toBeDefined();
|
||||
input.memoryFlow?.emit({
|
||||
type: 'source_acquired',
|
||||
adapter: 'historic-sql',
|
||||
trigger: 'manual_resync',
|
||||
fileCount: 3,
|
||||
});
|
||||
input.memoryFlow?.update({ syncId: 'sync-progress-1' });
|
||||
input.memoryFlow?.emit({ type: 'raw_snapshot_written', syncId: 'sync-progress-1', rawFileCount: 3 });
|
||||
input.memoryFlow?.emit({ type: 'diff_computed', added: 2, modified: 0, deleted: 0, unchanged: 1 });
|
||||
input.memoryFlow?.update({
|
||||
plannedWorkUnits: [
|
||||
{
|
||||
unitKey: 'historic-sql-table-public-orders',
|
||||
rawFiles: ['tables/public/orders.json'],
|
||||
peerFileCount: 0,
|
||||
dependencyCount: 0,
|
||||
},
|
||||
],
|
||||
});
|
||||
input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 });
|
||||
input.memoryFlow?.emit({
|
||||
type: 'work_unit_started',
|
||||
unitKey: 'historic-sql-table-public-orders',
|
||||
skills: ['historic_sql_table_digest'],
|
||||
stepBudget: 40,
|
||||
});
|
||||
input.memoryFlow?.emit({
|
||||
type: 'work_unit_finished',
|
||||
unitKey: 'historic-sql-table-public-orders',
|
||||
status: 'success',
|
||||
});
|
||||
input.memoryFlow?.emit({ type: 'saved', commitSha: null, wikiCount: 0, slCount: 1 });
|
||||
input.memoryFlow?.emit({ type: 'provenance_recorded', rowCount: 3 });
|
||||
input.memoryFlow?.emit({ type: 'report_created', runId: 'run-live-1', reportPath: 'report-live-1' });
|
||||
input.memoryFlow?.finish('done');
|
||||
return completedLocalBundleRun(input, input.jobId ?? 'historic-progress-job');
|
||||
});
|
||||
const io = makeIo({ isTTY: true });
|
||||
|
||||
await expect(
|
||||
runKtxIngest(
|
||||
{
|
||||
command: 'run',
|
||||
projectDir,
|
||||
connectionId: 'warehouse',
|
||||
adapter: 'historic-sql',
|
||||
outputMode: 'plain',
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
createAdapters,
|
||||
runLocalIngest: runLocal,
|
||||
jobIdFactory: () => 'historic-progress-job',
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
const stdout = io.stdout();
|
||||
expect(stdout).toContain('[5%] Fetching source files for warehouse/historic-sql');
|
||||
expect(stdout).toContain('[15%] Fetched 3 source files from historic-sql');
|
||||
expect(stdout).toContain('[45%] Planned 1 work unit');
|
||||
expect(stdout).toContain('[80%] Processed 1/1 work units');
|
||||
expect(stdout).toContain('[100%] Ingest completed');
|
||||
expect(stdout.indexOf('[5%] Fetching source files for warehouse/historic-sql')).toBeLessThan(
|
||||
stdout.indexOf('Report: report-live-1'),
|
||||
);
|
||||
expect(io.stderr()).toBe('');
|
||||
});
|
||||
|
||||
it('passes local Looker pull-config options and agent runner into scheduled ingest for Looker scheduled ingest', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await writeWarehouseConfig(projectDir);
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import {
|
|||
ingestReportToMemoryFlowReplay,
|
||||
type LocalMetabaseFanoutResult,
|
||||
type LocalMetabaseFanoutProgress,
|
||||
type MemoryFlowEvent,
|
||||
type MemoryFlowReplayInput,
|
||||
type RunLocalIngestOptions,
|
||||
renderMemoryFlowReplay,
|
||||
|
|
@ -170,6 +171,118 @@ function createMetabaseFanoutProgress(
|
|||
};
|
||||
}
|
||||
|
||||
function formatDiffProgress(event: Extract<MemoryFlowEvent, { type: 'diff_computed' }>): string {
|
||||
return `+${event.added}/~${event.modified}/-${event.deleted}/=${event.unchanged}`;
|
||||
}
|
||||
|
||||
function completedWorkUnitCount(snapshot: MemoryFlowReplayInput): number {
|
||||
return snapshot.events.filter((event) => event.type === 'work_unit_finished').length;
|
||||
}
|
||||
|
||||
function plainIngestEventProgress(
|
||||
event: MemoryFlowEvent,
|
||||
snapshot: MemoryFlowReplayInput,
|
||||
): { percent: number; message: string } | null {
|
||||
switch (event.type) {
|
||||
case 'source_acquired':
|
||||
return {
|
||||
percent: 15,
|
||||
message: `Fetched ${pluralize(event.fileCount, 'source file')} from ${event.adapter}`,
|
||||
};
|
||||
case 'raw_snapshot_written':
|
||||
return {
|
||||
percent: 25,
|
||||
message: `Wrote raw snapshot ${event.syncId} with ${pluralize(event.rawFileCount, 'file')}`,
|
||||
};
|
||||
case 'diff_computed':
|
||||
return { percent: 35, message: `Computed source diff ${formatDiffProgress(event)}` };
|
||||
case 'chunks_planned':
|
||||
return {
|
||||
percent: 45,
|
||||
message: `Planned ${pluralize(event.workUnitCount, 'work unit')}`,
|
||||
};
|
||||
case 'stage_skipped':
|
||||
return { percent: 45, message: `Skipped ${event.stage}: ${event.reason}` };
|
||||
case 'work_unit_started':
|
||||
return { percent: 55, message: `Processing ${event.unitKey}` };
|
||||
case 'work_unit_finished': {
|
||||
const total = snapshot.plannedWorkUnits.length || completedWorkUnitCount(snapshot);
|
||||
const completed = completedWorkUnitCount(snapshot);
|
||||
const percent = total > 0 ? 55 + Math.round((completed / total) * 25) : 80;
|
||||
return {
|
||||
percent,
|
||||
message: `Processed ${completed}/${total} work units`,
|
||||
};
|
||||
}
|
||||
case 'reconciliation_finished':
|
||||
return {
|
||||
percent: 85,
|
||||
message: `Reconciled results with ${pluralize(event.conflictCount, 'conflict')} and ${pluralize(
|
||||
event.fallbackCount,
|
||||
'fallback',
|
||||
)}`,
|
||||
};
|
||||
case 'saved':
|
||||
return {
|
||||
percent: 90,
|
||||
message: `Saved memory updates (${event.wikiCount} wiki, ${event.slCount} SL)`,
|
||||
};
|
||||
case 'provenance_recorded':
|
||||
return { percent: 95, message: `Recorded ${pluralize(event.rowCount, 'provenance row')}` };
|
||||
case 'report_created':
|
||||
return { percent: 98, message: `Created ingest report ${event.reportPath ?? event.runId}` };
|
||||
case 'scope_detected':
|
||||
case 'work_unit_step':
|
||||
case 'candidate_action':
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function shouldWritePlainIngestProgress(
|
||||
outputMode: KtxIngestOutputMode,
|
||||
io: KtxIngestIo,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): boolean {
|
||||
return outputMode === 'plain' && io.stdout.isTTY === true && env.CI !== 'true';
|
||||
}
|
||||
|
||||
function createPlainIngestProgressRenderer(
|
||||
args: Extract<KtxIngestArgs, { command: 'run' }>,
|
||||
io: KtxIngestIo,
|
||||
): { start(): void; update(snapshot: MemoryFlowReplayInput): void } {
|
||||
let printedEvents = 0;
|
||||
let lastPercent = 0;
|
||||
let printedCompletion = false;
|
||||
|
||||
const write = (percent: number, message: string) => {
|
||||
const nextPercent = Math.max(lastPercent, Math.max(0, Math.min(100, percent)));
|
||||
lastPercent = nextPercent;
|
||||
io.stdout.write(`[${nextPercent}%] ${message}\n`);
|
||||
};
|
||||
|
||||
return {
|
||||
start() {
|
||||
write(5, `Fetching source files for ${args.connectionId}/${args.adapter}`);
|
||||
},
|
||||
update(snapshot) {
|
||||
while (printedEvents < snapshot.events.length) {
|
||||
const event = snapshot.events[printedEvents++];
|
||||
if (!event) {
|
||||
continue;
|
||||
}
|
||||
const progress = plainIngestEventProgress(event, snapshot);
|
||||
if (progress) {
|
||||
write(progress.percent, progress.message);
|
||||
}
|
||||
}
|
||||
if (!printedCompletion && snapshot.status !== 'running') {
|
||||
printedCompletion = true;
|
||||
write(100, snapshot.status === 'done' ? 'Ingest completed' : 'Ingest failed');
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function writeReportJson(report: IngestReportSnapshot, io: KtxIngestIo): void {
|
||||
io.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
|
||||
}
|
||||
|
|
@ -366,10 +479,14 @@ export async function runKtxIngest(
|
|||
});
|
||||
const shouldUseLiveViz =
|
||||
runOutputMode === 'viz' && (args.inputMode ?? 'auto') === 'auto' && isInteractiveTerminal(io);
|
||||
const initialMemoryFlow = shouldUseLiveViz ? initialRunMemoryFlowInput(args, jobId ?? 'pending') : undefined;
|
||||
const plainProgress = shouldWritePlainIngestProgress(runOutputMode, io, env)
|
||||
? createPlainIngestProgressRenderer(args, io)
|
||||
: null;
|
||||
const initialMemoryFlow =
|
||||
shouldUseLiveViz || plainProgress ? initialRunMemoryFlowInput(args, jobId ?? 'pending') : undefined;
|
||||
let latestMemoryFlowSnapshot: MemoryFlowReplayInput | null = initialMemoryFlow ?? null;
|
||||
|
||||
if (initialMemoryFlow && isTuiCapableIo(io)) {
|
||||
if (shouldUseLiveViz && initialMemoryFlow && isTuiCapableIo(io)) {
|
||||
const startLiveMemoryFlow = deps.startLiveMemoryFlow ?? startLiveMemoryFlowTui;
|
||||
liveTui = await startLiveMemoryFlow(initialMemoryFlow, io);
|
||||
}
|
||||
|
|
@ -382,13 +499,17 @@ export async function runKtxIngest(
|
|||
liveTui.update(snapshot);
|
||||
return;
|
||||
}
|
||||
if (!liveTui) {
|
||||
if (shouldUseLiveViz && !liveTui) {
|
||||
writeMemoryFlowInput(snapshot, io, { clear: true });
|
||||
return;
|
||||
}
|
||||
plainProgress?.update(snapshot);
|
||||
},
|
||||
})
|
||||
: undefined;
|
||||
|
||||
plainProgress?.start();
|
||||
|
||||
try {
|
||||
const result = await executeLocalIngest({
|
||||
project,
|
||||
|
|
@ -403,7 +524,7 @@ export async function runKtxIngest(
|
|||
...(args.debugLlmRequestFile ? { llmDebugRequestFile: args.debugLlmRequestFile } : {}),
|
||||
...(memoryFlow ? { memoryFlow } : {}),
|
||||
});
|
||||
if (memoryFlow) {
|
||||
if (shouldUseLiveViz && memoryFlow) {
|
||||
latestMemoryFlowSnapshot = memoryFlow.snapshot();
|
||||
liveTui?.close();
|
||||
liveTui = null;
|
||||
|
|
|
|||
|
|
@ -767,6 +767,9 @@ export async function runKtxSetupContextStep(
|
|||
|
||||
const missing = missingCapabilities(project);
|
||||
if (missing.length > 0) {
|
||||
if (args.allowEmpty === true) {
|
||||
return { status: 'skipped', projectDir: args.projectDir };
|
||||
}
|
||||
writeMissingCapabilities(missing, io);
|
||||
return { status: 'missing-input', projectDir: args.projectDir };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1174,6 +1174,66 @@ describe('setup status', () => {
|
|||
expect(calls).toEqual(['model', 'embeddings', 'databases', 'sources']);
|
||||
});
|
||||
|
||||
it('does not fail context build when prerequisites were explicitly skipped and agents are skipped', async () => {
|
||||
const calls: string[] = [];
|
||||
const io = makeIo();
|
||||
await writeFile(
|
||||
join(tempDir, 'ktx.yaml'),
|
||||
[
|
||||
'project: revenue',
|
||||
'connections:',
|
||||
' warehouse:',
|
||||
' driver: postgres',
|
||||
' url: env:DEMO_DATABASE_URL',
|
||||
' readonly: true',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
await expect(
|
||||
runKtxSetup(
|
||||
{
|
||||
command: 'run',
|
||||
projectDir: tempDir,
|
||||
mode: 'existing',
|
||||
agents: false,
|
||||
skipAgents: true,
|
||||
inputMode: 'disabled',
|
||||
yes: true,
|
||||
cliVersion: '0.2.0',
|
||||
skipLlm: true,
|
||||
skipEmbeddings: true,
|
||||
skipDatabases: true,
|
||||
skipSources: true,
|
||||
databaseSchemas: [],
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
model: async () => {
|
||||
calls.push('model');
|
||||
return { status: 'skipped', projectDir: tempDir };
|
||||
},
|
||||
embeddings: async () => {
|
||||
calls.push('embeddings');
|
||||
return { status: 'skipped', projectDir: tempDir };
|
||||
},
|
||||
databases: async () => {
|
||||
calls.push('databases');
|
||||
return { status: 'skipped', projectDir: tempDir };
|
||||
},
|
||||
sources: async () => {
|
||||
calls.push('sources');
|
||||
return { status: 'skipped', projectDir: tempDir };
|
||||
},
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(calls).toEqual(['model', 'embeddings', 'databases', 'sources']);
|
||||
expect(io.stderr()).not.toContain('KTX cannot build agent-ready context yet.');
|
||||
});
|
||||
|
||||
it('runs context after sources and before agents in full setup', async () => {
|
||||
const calls: string[] = [];
|
||||
const io = makeIo();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue