ktx/packages/cli/src/ingest.ts

614 lines
22 KiB
TypeScript
Raw Normal View History

2026-05-10 23:12:26 +02:00
import {
buildMemoryFlowViewModel,
createMemoryFlowLiveBuffer,
formatMemoryFlowFinalSummary,
getLatestLocalIngestStatus,
getLocalIngestStatus,
type IngestReportSnapshot,
ingestReportToMemoryFlowReplay,
type LocalMetabaseFanoutResult,
type LocalMetabaseFanoutProgress,
type MemoryFlowEvent,
2026-05-10 23:12:26 +02:00
type MemoryFlowReplayInput,
type RunLocalIngestOptions,
renderMemoryFlowReplay,
runLocalIngest,
runLocalMetabaseIngest,
savedMemoryCountsForReport,
2026-05-10 23:51:24 +02:00
} from '@ktx/context/ingest';
import { loadKtxProject } from '@ktx/context/project';
2026-05-10 23:12:26 +02:00
import { readIngestReportSnapshotFile } from './ingest-report-file.js';
import { createCliOperationalLogger } from './io/logger.js';
2026-05-10 23:51:24 +02:00
import { createKtxCliLocalIngestAdapters } from './local-adapters.js';
feat: npm-managed Python runtime for @kaelio/ktx (#7) * docs: add npm managed python runtime design * build: add bundled python runtime wheel builder * build: make local embedding dependencies optional * build: bundle python runtime wheel in cli artifacts * build: track bundled python runtime release artifact * test: verify bundled python runtime wheel * docs: add plan for bundled python runtime wheel * test: cover managed python runtime lifecycle * feat: add managed python runtime installer * feat: add runtime command runner * feat: expose runtime management commands * test: verify managed python runtime commands * docs: add plan for managed python runtime installer * feat: add managed python command helper * feat: use managed runtime for sl query compute * feat: route sl query managed runtime policy * docs: add plan for managed runtime sl query integration * feat: add managed runtime daemon metadata * feat: manage python daemon lifecycle * feat: add runtime daemon start stop commands * fix: verify managed runtime daemon lifecycle * docs: add plan for managed runtime daemon lifecycle * feat: add managed local embeddings config marker * feat: add managed local embeddings daemon helper * feat: use managed runtime for local embedding setup * feat: pass managed runtime policy through setup * docs: add plan for managed local embeddings runtime * feat: read CLI package metadata dynamically * feat: assemble public kaelio ktx npm package * feat: release one public kaelio ktx npm artifact * test: cover public kaelio ktx package invocations * chore: verify public kaelio ktx package artifacts * docs: add plan for public kaelio ktx npm package * test: verify managed runtime in public package smoke * test: finalize managed runtime release smoke * docs: add plan for managed runtime release smoke * test: specify local embeddings release smoke * feat: add local embeddings runtime smoke * chore: register local embeddings smoke * fix: verify local embeddings smoke * fix: restore artifact smoke python env helper * docs: add plan for managed local embeddings release smoke * refactor: share managed runtime install policy parsing * feat: use managed runtime for agent semantic queries * feat: use managed runtime for MCP semantic compute * docs: add plan for managed agent and MCP semantic runtime * feat(cli): add managed daemon HTTP helpers * feat(cli): route local adapters through managed daemon * feat(cli): use managed daemon for ingest helpers * feat(cli): pass managed daemon options to scan * feat(context): pass MCP ingest pull config options * feat(cli): pass managed daemon options to serve ingest * test: verify managed local ingest daemon runtime * docs: add plan for managed local ingest daemon runtime * docs: align managed runtime examples * docs: add plan for managed runtime docs cleanup * test: cover published package runtime smoke commands * test: validate published package smoke outputs * docs: add plan for published package runtime smoke * build: stamp public npm package version * release: add npm public release policy * release: add guarded npm publish script * release: document public npm release handoff * docs: add plan for public npm release handoff * test: cover managed runtime prune in package smoke * docs: document managed runtime prune * docs: add plan for managed runtime prune smoke and docs * chore: encode uv runtime prerequisite policy * fix: clarify missing uv runtime error * docs: document uv runtime prerequisite * docs: add plan for uv runtime prerequisite contract * refactor: limit release artifacts to public package runtime * chore: align release policy with bundled runtime wheel * docs: describe single public runtime artifact surface * test: verify single public runtime artifact contract * docs: add plan for single public runtime artifact cleanup * fix: align local embeddings smoke with public version * docs: add plan for local embeddings smoke public version * release: soft-launch as @kaelio/ktx@0.1.0-rc.0 on next tag Publish target moves to the pre-release version 0.1.0-rc.0 under the next dist-tag so npm install @kaelio/ktx (which resolves to latest) does not pick up the soft-launch build. Users opt in via @kaelio/ktx@next. * Fix release script boundary checks * Remove PostHog from public package bundle
2026-05-11 15:50:34 +02:00
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
2026-05-10 23:51:24 +02:00
import { type KtxMemoryFlowStdin, renderMemoryFlowInteractively } from './memory-flow-interactive.js';
2026-05-10 23:12:26 +02:00
import {
2026-05-10 23:51:24 +02:00
type KtxMemoryFlowTuiIo,
2026-05-10 23:12:26 +02:00
type MemoryFlowTuiLiveSession,
renderMemoryFlowTui,
startLiveMemoryFlowTui,
} from './memory-flow-tui.js';
import { resolveVizFallback, warnVizFallbackOnce } from './viz-fallback.js';
import { profileMark } from './startup-profile.js';
profileMark('module:ingest');
2026-05-10 23:51:24 +02:00
export type KtxIngestOutputMode = 'plain' | 'json' | 'viz';
type KtxIngestInputMode = 'auto' | 'disabled';
2026-05-10 23:12:26 +02:00
2026-05-10 23:51:24 +02:00
export type KtxIngestArgs =
2026-05-10 23:12:26 +02:00
| {
command: 'run';
projectDir: string;
connectionId: string;
adapter: string;
sourceDir?: string;
databaseIntrospectionUrl?: string;
feat: npm-managed Python runtime for @kaelio/ktx (#7) * docs: add npm managed python runtime design * build: add bundled python runtime wheel builder * build: make local embedding dependencies optional * build: bundle python runtime wheel in cli artifacts * build: track bundled python runtime release artifact * test: verify bundled python runtime wheel * docs: add plan for bundled python runtime wheel * test: cover managed python runtime lifecycle * feat: add managed python runtime installer * feat: add runtime command runner * feat: expose runtime management commands * test: verify managed python runtime commands * docs: add plan for managed python runtime installer * feat: add managed python command helper * feat: use managed runtime for sl query compute * feat: route sl query managed runtime policy * docs: add plan for managed runtime sl query integration * feat: add managed runtime daemon metadata * feat: manage python daemon lifecycle * feat: add runtime daemon start stop commands * fix: verify managed runtime daemon lifecycle * docs: add plan for managed runtime daemon lifecycle * feat: add managed local embeddings config marker * feat: add managed local embeddings daemon helper * feat: use managed runtime for local embedding setup * feat: pass managed runtime policy through setup * docs: add plan for managed local embeddings runtime * feat: read CLI package metadata dynamically * feat: assemble public kaelio ktx npm package * feat: release one public kaelio ktx npm artifact * test: cover public kaelio ktx package invocations * chore: verify public kaelio ktx package artifacts * docs: add plan for public kaelio ktx npm package * test: verify managed runtime in public package smoke * test: finalize managed runtime release smoke * docs: add plan for managed runtime release smoke * test: specify local embeddings release smoke * feat: add local embeddings runtime smoke * chore: register local embeddings smoke * fix: verify local embeddings smoke * fix: restore artifact smoke python env helper * docs: add plan for managed local embeddings release smoke * refactor: share managed runtime install policy parsing * feat: use managed runtime for agent semantic queries * feat: use managed runtime for MCP semantic compute * docs: add plan for managed agent and MCP semantic runtime * feat(cli): add managed daemon HTTP helpers * feat(cli): route local adapters through managed daemon * feat(cli): use managed daemon for ingest helpers * feat(cli): pass managed daemon options to scan * feat(context): pass MCP ingest pull config options * feat(cli): pass managed daemon options to serve ingest * test: verify managed local ingest daemon runtime * docs: add plan for managed local ingest daemon runtime * docs: align managed runtime examples * docs: add plan for managed runtime docs cleanup * test: cover published package runtime smoke commands * test: validate published package smoke outputs * docs: add plan for published package runtime smoke * build: stamp public npm package version * release: add npm public release policy * release: add guarded npm publish script * release: document public npm release handoff * docs: add plan for public npm release handoff * test: cover managed runtime prune in package smoke * docs: document managed runtime prune * docs: add plan for managed runtime prune smoke and docs * chore: encode uv runtime prerequisite policy * fix: clarify missing uv runtime error * docs: document uv runtime prerequisite * docs: add plan for uv runtime prerequisite contract * refactor: limit release artifacts to public package runtime * chore: align release policy with bundled runtime wheel * docs: describe single public runtime artifact surface * test: verify single public runtime artifact contract * docs: add plan for single public runtime artifact cleanup * fix: align local embeddings smoke with public version * docs: add plan for local embeddings smoke public version * release: soft-launch as @kaelio/ktx@0.1.0-rc.0 on next tag Publish target moves to the pre-release version 0.1.0-rc.0 under the next dist-tag so npm install @kaelio/ktx (which resolves to latest) does not pick up the soft-launch build. Users opt in via @kaelio/ktx@next. * Fix release script boundary checks * Remove PostHog from public package bundle
2026-05-11 15:50:34 +02:00
cliVersion?: string;
runtimeInstallPolicy?: KtxManagedPythonInstallPolicy;
2026-05-10 23:12:26 +02:00
debugLlmRequestFile?: string;
2026-05-10 23:51:24 +02:00
outputMode: KtxIngestOutputMode;
inputMode?: KtxIngestInputMode;
2026-05-10 23:12:26 +02:00
}
| {
command: 'status' | 'replay' | 'watch';
projectDir: string;
runId?: string;
reportFile?: string;
2026-05-10 23:51:24 +02:00
outputMode: KtxIngestOutputMode;
inputMode?: KtxIngestInputMode;
2026-05-10 23:12:26 +02:00
};
2026-05-10 23:51:24 +02:00
interface KtxIngestIo {
stdin?: KtxMemoryFlowStdin;
2026-05-10 23:12:26 +02:00
stdout: { isTTY?: boolean; columns?: number; write(chunk: string): void };
stderr: { write(chunk: string): void };
}
2026-05-10 23:51:24 +02:00
interface KtxIngestDeps {
2026-05-10 23:12:26 +02:00
jobIdFactory?: () => string;
now?: () => Date;
2026-05-10 23:51:24 +02:00
createAdapters?: typeof createKtxCliLocalIngestAdapters;
2026-05-10 23:12:26 +02:00
runLocalIngest?: typeof runLocalIngest;
runLocalMetabaseIngest?: typeof runLocalMetabaseIngest;
readReportFile?: typeof readIngestReportSnapshotFile;
renderStoredMemoryFlow?: typeof renderMemoryFlowTui;
startLiveMemoryFlow?: typeof startLiveMemoryFlowTui;
env?: NodeJS.ProcessEnv;
localIngestOptions?: Pick<
RunLocalIngestOptions,
| 'agentRunner'
| 'llmProvider'
| 'memoryModel'
| 'semanticLayerCompute'
| 'queryExecutor'
| 'logger'
| 'pullConfigOptions'
>;
}
function reportStatus(report: IngestReportSnapshot): 'done' | 'error' {
return report.body.failedWorkUnits.length > 0 ? 'error' : 'done';
}
2026-05-10 23:51:24 +02:00
function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void {
const counts = savedMemoryCountsForReport(report);
2026-05-10 23:12:26 +02:00
io.stdout.write(`Report: ${report.id}\n`);
io.stdout.write(`Run: ${report.runId}\n`);
io.stdout.write(`Job: ${report.jobId}\n`);
io.stdout.write(`Status: ${reportStatus(report)}\n`);
io.stdout.write(`Adapter: ${report.sourceKey}\n`);
io.stdout.write(`Connection: ${report.connectionId}\n`);
io.stdout.write(`Sync: ${report.body.syncId}\n`);
io.stdout.write(
`Diff: +${report.body.diffSummary.added}/~${report.body.diffSummary.modified}/-${report.body.diffSummary.deleted}/=${report.body.diffSummary.unchanged}\n`,
);
io.stdout.write(`Work units: ${report.body.workUnits.length}\n`);
io.stdout.write(`Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} SL\n`);
io.stdout.write(`Provenance rows: ${report.body.provenanceRows.length}\n`);
}
2026-05-10 23:51:24 +02:00
function writeMetabaseFanoutStatus(result: LocalMetabaseFanoutResult, io: KtxIngestIo): void {
2026-05-10 23:13:17 -07:00
const counts = result.children.reduce(
(acc, child) => {
const childCounts = savedMemoryCountsForReport(child.report);
2026-05-10 23:13:17 -07:00
return {
wikiCount: acc.wikiCount + childCounts.wikiCount,
slCount: acc.slCount + childCounts.slCount,
};
},
{ wikiCount: 0, slCount: 0 },
);
2026-05-10 23:12:26 +02:00
io.stdout.write(`Metabase fan-out: ${result.status}\n`);
io.stdout.write(`Source: ${result.metabaseConnectionId}\n`);
io.stdout.write(`Children: ${result.children.length}\n`);
if (result.totals) {
io.stdout.write(`Work units: ${result.totals.workUnits}\n`);
io.stdout.write(`Failed work units: ${result.totals.failedWorkUnits}\n`);
}
2026-05-10 23:13:17 -07:00
io.stdout.write(`Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} SL\n`);
2026-05-10 23:12:26 +02:00
for (const child of result.children) {
const status = reportStatus(child.report);
io.stdout.write(
2026-05-10 23:13:17 -07:00
`- target=${child.targetConnectionId} database=${child.metabaseDatabaseId} status=${status} job=${child.jobId} report=${child.report.id}\n`,
2026-05-10 23:12:26 +02:00
);
}
}
function pluralize(count: number, singular: string, plural = `${singular}s`): string {
return `${count} ${count === 1 ? singular : plural}`;
}
function createMetabaseFanoutProgress(
connectionId: string,
2026-05-10 23:51:24 +02:00
io: KtxIngestIo,
2026-05-10 23:12:26 +02:00
): LocalMetabaseFanoutProgress {
io.stderr.write(`Metabase ingest: ${connectionId}\n`);
io.stderr.write('Checking mappings and scheduled-pull targets...\n');
2026-05-10 23:12:26 +02:00
return {
onMetabaseFanoutPlanned(event) {
io.stderr.write(`Targets: ${pluralize(event.children.length, 'mapped database')}\n`);
2026-05-10 23:12:26 +02:00
for (const child of event.children) {
io.stderr.write(`- database=${child.metabaseDatabaseId} target=${child.targetConnectionId} status=queued\n`);
2026-05-10 23:12:26 +02:00
}
},
onMetabaseChildStarted(event) {
io.stderr.write(
2026-05-10 23:12:26 +02:00
`- database=${event.metabaseDatabaseId} target=${event.targetConnectionId} status=running job=${event.jobId}\n`,
);
},
onMetabaseChildCompleted(event) {
io.stderr.write(
2026-05-10 23:12:26 +02:00
`- database=${event.metabaseDatabaseId} target=${event.targetConnectionId} status=${event.status} job=${event.jobId}\n`,
);
},
};
}
function formatDiffProgress(event: Extract<MemoryFlowEvent, { type: 'diff_computed' }>): string {
return `+${event.added}/~${event.modified}/-${event.deleted}/=${event.unchanged}`;
}
function workUnitEventsThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): MemoryFlowEvent[] {
return snapshot.events.slice(0, eventIndex + 1);
}
function completedWorkUnitCountThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): number {
return workUnitEventsThrough(snapshot, eventIndex).filter((event) => event.type === 'work_unit_finished').length;
}
function plannedWorkUnitCountThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): number {
if (snapshot.plannedWorkUnits.length > 0) {
return snapshot.plannedWorkUnits.length;
}
const planEvent = workUnitEventsThrough(snapshot, eventIndex)
.filter((event) => event.type === 'chunks_planned')
.at(-1);
return planEvent?.workUnitCount ?? completedWorkUnitCountThrough(snapshot, eventIndex);
}
function workUnitOrdinalThrough(snapshot: MemoryFlowReplayInput, eventIndex: number, unitKey: string): number {
const events = workUnitEventsThrough(snapshot, eventIndex);
const startedIndex = events.findIndex((event) => event.type === 'work_unit_started' && event.unitKey === unitKey);
if (startedIndex === -1) {
return completedWorkUnitCountThrough(snapshot, eventIndex) + 1;
}
return events.slice(0, startedIndex + 1).filter((event) => event.type === 'work_unit_started').length;
}
function plainIngestEventProgress(
event: MemoryFlowEvent,
snapshot: MemoryFlowReplayInput,
eventIndex: number,
): { percent: number; message: string } | null {
switch (event.type) {
case 'source_acquired':
return {
percent: 15,
message: `Fetched ${pluralize(event.fileCount, 'source file')} from ${event.adapter}`,
};
case 'raw_snapshot_written':
return {
percent: 25,
message: `Wrote raw snapshot ${event.syncId} with ${pluralize(event.rawFileCount, 'file')}`,
};
case 'diff_computed':
return { percent: 35, message: `Computed source diff ${formatDiffProgress(event)}` };
case 'chunks_planned':
return {
percent: 45,
message: `Planned ${pluralize(event.workUnitCount, 'work unit')}`,
};
case 'stage_skipped':
return { percent: 45, message: `Skipped ${event.stage}: ${event.reason}` };
case 'work_unit_started': {
const total = plannedWorkUnitCountThrough(snapshot, eventIndex);
const ordinal = workUnitOrdinalThrough(snapshot, eventIndex, event.unitKey);
const progress = total > 0 ? `${ordinal}/${total} work units: ` : '';
return { percent: 55, message: `Processing ${progress}${event.unitKey}` };
}
case 'work_unit_step': {
const total = plannedWorkUnitCountThrough(snapshot, eventIndex);
const completed = completedWorkUnitCountThrough(snapshot, eventIndex);
const ordinal = workUnitOrdinalThrough(snapshot, eventIndex, event.unitKey);
const stepFraction = event.stepBudget > 0 ? Math.min(1, event.stepIndex / event.stepBudget) : 0;
const percent = total > 0 ? 55 + Math.ceil(((completed + stepFraction) / total) * 25) : 55;
const progress = total > 0 ? `${ordinal}/${total} work units: ` : '';
return {
percent,
message: `Processing ${progress}${event.unitKey} step ${event.stepIndex}/${event.stepBudget}`,
};
}
case 'work_unit_finished': {
const total = plannedWorkUnitCountThrough(snapshot, eventIndex);
const completed = completedWorkUnitCountThrough(snapshot, eventIndex);
const percent = total > 0 ? 55 + Math.round((completed / total) * 25) : 80;
return {
percent,
message: `Processed ${completed}/${total} work units`,
};
}
case 'reconciliation_finished':
return {
percent: 85,
message: `Reconciled results with ${pluralize(event.conflictCount, 'conflict')} and ${pluralize(
event.fallbackCount,
'fallback',
)}`,
};
case 'saved':
return {
percent: 90,
message: `Saved memory updates (${event.wikiCount} wiki, ${event.slCount} SL)`,
};
case 'provenance_recorded':
return { percent: 95, message: `Recorded ${pluralize(event.rowCount, 'provenance row')}` };
case 'report_created':
return { percent: 98, message: `Created ingest report ${event.reportPath ?? event.runId}` };
case 'scope_detected':
case 'candidate_action':
return null;
}
}
function shouldWritePlainIngestProgress(
outputMode: KtxIngestOutputMode,
io: KtxIngestIo,
env: NodeJS.ProcessEnv,
): boolean {
return outputMode === 'plain' && io.stdout.isTTY === true && env.CI !== 'true';
}
function createPlainIngestProgressRenderer(
args: Extract<KtxIngestArgs, { command: 'run' }>,
io: KtxIngestIo,
): { start(): void; update(snapshot: MemoryFlowReplayInput): void } {
let printedEvents = 0;
let lastPercent = 0;
let printedCompletion = false;
const write = (percent: number, message: string) => {
const nextPercent = Math.max(lastPercent, Math.max(0, Math.min(100, percent)));
lastPercent = nextPercent;
io.stderr.write(`[${nextPercent}%] ${message}\n`);
};
return {
start() {
write(5, `Fetching source files for ${args.connectionId}/${args.adapter}`);
},
update(snapshot) {
while (printedEvents < snapshot.events.length) {
const eventIndex = printedEvents;
const event = snapshot.events[printedEvents++];
if (!event) {
continue;
}
const progress = plainIngestEventProgress(event, snapshot, eventIndex);
if (progress) {
write(progress.percent, progress.message);
}
}
if (!printedCompletion && snapshot.status !== 'running') {
printedCompletion = true;
write(100, snapshot.status === 'done' ? 'Ingest completed' : 'Ingest failed');
}
},
};
}
2026-05-10 23:51:24 +02:00
function writeReportJson(report: IngestReportSnapshot, io: KtxIngestIo): void {
2026-05-10 23:12:26 +02:00
io.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
}
function assertReportMatchesReplayId(report: IngestReportSnapshot, requestedId: string, reportFile: string): void {
const validIds = [report.id, report.runId, report.jobId];
if (!validIds.includes(requestedId)) {
throw new Error(
`Report file ${reportFile} does not match ingest replay id "${requestedId}"; expected one of ${validIds.join(
', ',
)}`,
);
}
}
async function readStoredIngestReport(
2026-05-10 23:51:24 +02:00
project: Awaited<ReturnType<typeof loadKtxProject>>,
2026-05-10 23:12:26 +02:00
runId: string | undefined,
): Promise<IngestReportSnapshot | null> {
return runId ? await getLocalIngestStatus(project, runId) : await getLatestLocalIngestStatus(project);
}
2026-05-10 23:51:24 +02:00
function isInteractiveTerminal(io: KtxIngestIo): boolean {
2026-05-10 23:12:26 +02:00
return io.stdout.isTTY === true;
}
2026-05-10 23:51:24 +02:00
function terminalWidth(io: KtxIngestIo): number | undefined {
2026-05-10 23:12:26 +02:00
return io.stdout.columns ?? process.stdout.columns;
}
2026-05-10 23:51:24 +02:00
function isTuiCapableIo(io: KtxIngestIo): io is KtxIngestIo & KtxMemoryFlowTuiIo {
2026-05-10 23:12:26 +02:00
return (
io.stdin?.isTTY === true &&
io.stdout.isTTY === true &&
typeof io.stdin.on === 'function' &&
typeof io.stdin.setRawMode === 'function' &&
typeof io.stdout.write === 'function'
);
}
interface EffectiveIngestOutputModeOptions {
requireInput?: boolean;
}
function effectiveIngestOutputMode(
2026-05-10 23:51:24 +02:00
outputMode: KtxIngestOutputMode,
io: KtxIngestIo,
2026-05-10 23:12:26 +02:00
env: NodeJS.ProcessEnv,
options: EffectiveIngestOutputModeOptions = {},
2026-05-10 23:51:24 +02:00
): KtxIngestOutputMode {
2026-05-10 23:12:26 +02:00
if (outputMode !== 'viz') {
return outputMode;
}
const fallback = resolveVizFallback(io, env, { requireInput: options.requireInput ?? false });
if (!fallback.shouldDegrade) {
return outputMode;
}
warnVizFallbackOnce(io, fallback);
return 'plain';
}
2026-05-10 23:51:24 +02:00
function writeMemoryFlowInput(input: MemoryFlowReplayInput, io: KtxIngestIo, options: { clear?: boolean } = {}): void {
2026-05-10 23:12:26 +02:00
if (options.clear) {
io.stdout.write('\u001b[2J\u001b[H');
}
const view = buildMemoryFlowViewModel(input);
io.stdout.write(renderMemoryFlowReplay(view, { terminalWidth: terminalWidth(io) }));
}
function initialRunMemoryFlowInput(
2026-05-10 23:51:24 +02:00
args: Extract<KtxIngestArgs, { command: 'run' }>,
2026-05-10 23:12:26 +02:00
runId: string,
): MemoryFlowReplayInput {
return {
runId,
connectionId: args.connectionId,
adapter: args.adapter,
status: 'running',
sourceDir: args.sourceDir ?? null,
syncId: 'pending',
errors: [],
events: [],
plannedWorkUnits: [],
details: { actions: [], provenance: [], transcripts: [] },
};
}
feat: npm-managed Python runtime for @kaelio/ktx (#7) * docs: add npm managed python runtime design * build: add bundled python runtime wheel builder * build: make local embedding dependencies optional * build: bundle python runtime wheel in cli artifacts * build: track bundled python runtime release artifact * test: verify bundled python runtime wheel * docs: add plan for bundled python runtime wheel * test: cover managed python runtime lifecycle * feat: add managed python runtime installer * feat: add runtime command runner * feat: expose runtime management commands * test: verify managed python runtime commands * docs: add plan for managed python runtime installer * feat: add managed python command helper * feat: use managed runtime for sl query compute * feat: route sl query managed runtime policy * docs: add plan for managed runtime sl query integration * feat: add managed runtime daemon metadata * feat: manage python daemon lifecycle * feat: add runtime daemon start stop commands * fix: verify managed runtime daemon lifecycle * docs: add plan for managed runtime daemon lifecycle * feat: add managed local embeddings config marker * feat: add managed local embeddings daemon helper * feat: use managed runtime for local embedding setup * feat: pass managed runtime policy through setup * docs: add plan for managed local embeddings runtime * feat: read CLI package metadata dynamically * feat: assemble public kaelio ktx npm package * feat: release one public kaelio ktx npm artifact * test: cover public kaelio ktx package invocations * chore: verify public kaelio ktx package artifacts * docs: add plan for public kaelio ktx npm package * test: verify managed runtime in public package smoke * test: finalize managed runtime release smoke * docs: add plan for managed runtime release smoke * test: specify local embeddings release smoke * feat: add local embeddings runtime smoke * chore: register local embeddings smoke * fix: verify local embeddings smoke * fix: restore artifact smoke python env helper * docs: add plan for managed local embeddings release smoke * refactor: share managed runtime install policy parsing * feat: use managed runtime for agent semantic queries * feat: use managed runtime for MCP semantic compute * docs: add plan for managed agent and MCP semantic runtime * feat(cli): add managed daemon HTTP helpers * feat(cli): route local adapters through managed daemon * feat(cli): use managed daemon for ingest helpers * feat(cli): pass managed daemon options to scan * feat(context): pass MCP ingest pull config options * feat(cli): pass managed daemon options to serve ingest * test: verify managed local ingest daemon runtime * docs: add plan for managed local ingest daemon runtime * docs: align managed runtime examples * docs: add plan for managed runtime docs cleanup * test: cover published package runtime smoke commands * test: validate published package smoke outputs * docs: add plan for published package runtime smoke * build: stamp public npm package version * release: add npm public release policy * release: add guarded npm publish script * release: document public npm release handoff * docs: add plan for public npm release handoff * test: cover managed runtime prune in package smoke * docs: document managed runtime prune * docs: add plan for managed runtime prune smoke and docs * chore: encode uv runtime prerequisite policy * fix: clarify missing uv runtime error * docs: document uv runtime prerequisite * docs: add plan for uv runtime prerequisite contract * refactor: limit release artifacts to public package runtime * chore: align release policy with bundled runtime wheel * docs: describe single public runtime artifact surface * test: verify single public runtime artifact contract * docs: add plan for single public runtime artifact cleanup * fix: align local embeddings smoke with public version * docs: add plan for local embeddings smoke public version * release: soft-launch as @kaelio/ktx@0.1.0-rc.0 on next tag Publish target moves to the pre-release version 0.1.0-rc.0 under the next dist-tag so npm install @kaelio/ktx (which resolves to latest) does not pick up the soft-launch build. Users opt in via @kaelio/ktx@next. * Fix release script boundary checks * Remove PostHog from public package bundle
2026-05-11 15:50:34 +02:00
function managedDaemonOptionsForIngestRun(
args: Extract<KtxIngestArgs, { command: 'run' }>,
io: KtxIngestIo,
) {
if (args.databaseIntrospectionUrl || !args.cliVersion || !args.runtimeInstallPolicy) {
return undefined;
}
return {
cliVersion: args.cliVersion,
installPolicy: args.runtimeInstallPolicy,
io,
};
}
2026-05-10 23:12:26 +02:00
async function writeReportRecord(
report: IngestReportSnapshot,
2026-05-10 23:51:24 +02:00
outputMode: KtxIngestOutputMode,
io: KtxIngestIo,
2026-05-10 23:12:26 +02:00
options: {
interactive?: boolean;
renderStoredMemoryFlow?: typeof renderMemoryFlowTui;
env?: NodeJS.ProcessEnv;
} = {},
): Promise<void> {
if (outputMode === 'json') {
writeReportJson(report, io);
return;
}
const resolvedOutputMode = effectiveIngestOutputMode(outputMode, io, options.env ?? process.env, {
requireInput: options.interactive === true,
});
if (resolvedOutputMode === 'viz') {
const input = ingestReportToMemoryFlowReplay(report, { provenanceRowCount: report.body.provenanceRows.length });
if (options.interactive === true) {
if (io.stdin?.isTTY === true) {
const renderStoredMemoryFlow = options.renderStoredMemoryFlow ?? renderMemoryFlowTui;
if (isTuiCapableIo(io) && (await renderStoredMemoryFlow(input, io))) {
return;
}
await renderMemoryFlowInteractively(input, io);
return;
}
writeMemoryFlowInput(input, io);
return;
}
writeMemoryFlowInput(input, io);
return;
}
writeReportStatus(report, io);
}
2026-05-10 23:51:24 +02:00
export async function runKtxIngest(
args: KtxIngestArgs,
io: KtxIngestIo = process,
deps: KtxIngestDeps = {},
2026-05-10 23:12:26 +02:00
): Promise<number> {
try {
2026-05-10 23:51:24 +02:00
const project = await loadKtxProject({ projectDir: args.projectDir });
2026-05-10 23:12:26 +02:00
const env = deps.env ?? process.env;
if (args.command === 'run') {
2026-05-10 23:51:24 +02:00
const createAdapters = deps.createAdapters ?? createKtxCliLocalIngestAdapters;
2026-05-10 23:12:26 +02:00
const executeLocalIngest = deps.runLocalIngest ?? runLocalIngest;
const localIngestOptions = deps.localIngestOptions ?? {};
feat: npm-managed Python runtime for @kaelio/ktx (#7) * docs: add npm managed python runtime design * build: add bundled python runtime wheel builder * build: make local embedding dependencies optional * build: bundle python runtime wheel in cli artifacts * build: track bundled python runtime release artifact * test: verify bundled python runtime wheel * docs: add plan for bundled python runtime wheel * test: cover managed python runtime lifecycle * feat: add managed python runtime installer * feat: add runtime command runner * feat: expose runtime management commands * test: verify managed python runtime commands * docs: add plan for managed python runtime installer * feat: add managed python command helper * feat: use managed runtime for sl query compute * feat: route sl query managed runtime policy * docs: add plan for managed runtime sl query integration * feat: add managed runtime daemon metadata * feat: manage python daemon lifecycle * feat: add runtime daemon start stop commands * fix: verify managed runtime daemon lifecycle * docs: add plan for managed runtime daemon lifecycle * feat: add managed local embeddings config marker * feat: add managed local embeddings daemon helper * feat: use managed runtime for local embedding setup * feat: pass managed runtime policy through setup * docs: add plan for managed local embeddings runtime * feat: read CLI package metadata dynamically * feat: assemble public kaelio ktx npm package * feat: release one public kaelio ktx npm artifact * test: cover public kaelio ktx package invocations * chore: verify public kaelio ktx package artifacts * docs: add plan for public kaelio ktx npm package * test: verify managed runtime in public package smoke * test: finalize managed runtime release smoke * docs: add plan for managed runtime release smoke * test: specify local embeddings release smoke * feat: add local embeddings runtime smoke * chore: register local embeddings smoke * fix: verify local embeddings smoke * fix: restore artifact smoke python env helper * docs: add plan for managed local embeddings release smoke * refactor: share managed runtime install policy parsing * feat: use managed runtime for agent semantic queries * feat: use managed runtime for MCP semantic compute * docs: add plan for managed agent and MCP semantic runtime * feat(cli): add managed daemon HTTP helpers * feat(cli): route local adapters through managed daemon * feat(cli): use managed daemon for ingest helpers * feat(cli): pass managed daemon options to scan * feat(context): pass MCP ingest pull config options * feat(cli): pass managed daemon options to serve ingest * test: verify managed local ingest daemon runtime * docs: add plan for managed local ingest daemon runtime * docs: align managed runtime examples * docs: add plan for managed runtime docs cleanup * test: cover published package runtime smoke commands * test: validate published package smoke outputs * docs: add plan for published package runtime smoke * build: stamp public npm package version * release: add npm public release policy * release: add guarded npm publish script * release: document public npm release handoff * docs: add plan for public npm release handoff * test: cover managed runtime prune in package smoke * docs: document managed runtime prune * docs: add plan for managed runtime prune smoke and docs * chore: encode uv runtime prerequisite policy * fix: clarify missing uv runtime error * docs: document uv runtime prerequisite * docs: add plan for uv runtime prerequisite contract * refactor: limit release artifacts to public package runtime * chore: align release policy with bundled runtime wheel * docs: describe single public runtime artifact surface * test: verify single public runtime artifact contract * docs: add plan for single public runtime artifact cleanup * fix: align local embeddings smoke with public version * docs: add plan for local embeddings smoke public version * release: soft-launch as @kaelio/ktx@0.1.0-rc.0 on next tag Publish target moves to the pre-release version 0.1.0-rc.0 under the next dist-tag so npm install @kaelio/ktx (which resolves to latest) does not pick up the soft-launch build. Users opt in via @kaelio/ktx@next. * Fix release script boundary checks * Remove PostHog from public package bundle
2026-05-11 15:50:34 +02:00
const managedDaemon = managedDaemonOptionsForIngestRun(args, io);
const operationalLogger = createCliOperationalLogger(io, args.outputMode);
2026-05-10 23:12:26 +02:00
const adapterOptions = {
...(localIngestOptions.pullConfigOptions ?? {}),
...(args.databaseIntrospectionUrl ? { databaseIntrospectionUrl: args.databaseIntrospectionUrl } : {}),
feat: npm-managed Python runtime for @kaelio/ktx (#7) * docs: add npm managed python runtime design * build: add bundled python runtime wheel builder * build: make local embedding dependencies optional * build: bundle python runtime wheel in cli artifacts * build: track bundled python runtime release artifact * test: verify bundled python runtime wheel * docs: add plan for bundled python runtime wheel * test: cover managed python runtime lifecycle * feat: add managed python runtime installer * feat: add runtime command runner * feat: expose runtime management commands * test: verify managed python runtime commands * docs: add plan for managed python runtime installer * feat: add managed python command helper * feat: use managed runtime for sl query compute * feat: route sl query managed runtime policy * docs: add plan for managed runtime sl query integration * feat: add managed runtime daemon metadata * feat: manage python daemon lifecycle * feat: add runtime daemon start stop commands * fix: verify managed runtime daemon lifecycle * docs: add plan for managed runtime daemon lifecycle * feat: add managed local embeddings config marker * feat: add managed local embeddings daemon helper * feat: use managed runtime for local embedding setup * feat: pass managed runtime policy through setup * docs: add plan for managed local embeddings runtime * feat: read CLI package metadata dynamically * feat: assemble public kaelio ktx npm package * feat: release one public kaelio ktx npm artifact * test: cover public kaelio ktx package invocations * chore: verify public kaelio ktx package artifacts * docs: add plan for public kaelio ktx npm package * test: verify managed runtime in public package smoke * test: finalize managed runtime release smoke * docs: add plan for managed runtime release smoke * test: specify local embeddings release smoke * feat: add local embeddings runtime smoke * chore: register local embeddings smoke * fix: verify local embeddings smoke * fix: restore artifact smoke python env helper * docs: add plan for managed local embeddings release smoke * refactor: share managed runtime install policy parsing * feat: use managed runtime for agent semantic queries * feat: use managed runtime for MCP semantic compute * docs: add plan for managed agent and MCP semantic runtime * feat(cli): add managed daemon HTTP helpers * feat(cli): route local adapters through managed daemon * feat(cli): use managed daemon for ingest helpers * feat(cli): pass managed daemon options to scan * feat(context): pass MCP ingest pull config options * feat(cli): pass managed daemon options to serve ingest * test: verify managed local ingest daemon runtime * docs: add plan for managed local ingest daemon runtime * docs: align managed runtime examples * docs: add plan for managed runtime docs cleanup * test: cover published package runtime smoke commands * test: validate published package smoke outputs * docs: add plan for published package runtime smoke * build: stamp public npm package version * release: add npm public release policy * release: add guarded npm publish script * release: document public npm release handoff * docs: add plan for public npm release handoff * test: cover managed runtime prune in package smoke * docs: document managed runtime prune * docs: add plan for managed runtime prune smoke and docs * chore: encode uv runtime prerequisite policy * fix: clarify missing uv runtime error * docs: document uv runtime prerequisite * docs: add plan for uv runtime prerequisite contract * refactor: limit release artifacts to public package runtime * chore: align release policy with bundled runtime wheel * docs: describe single public runtime artifact surface * test: verify single public runtime artifact contract * docs: add plan for single public runtime artifact cleanup * fix: align local embeddings smoke with public version * docs: add plan for local embeddings smoke public version * release: soft-launch as @kaelio/ktx@0.1.0-rc.0 on next tag Publish target moves to the pre-release version 0.1.0-rc.0 under the next dist-tag so npm install @kaelio/ktx (which resolves to latest) does not pick up the soft-launch build. Users opt in via @kaelio/ktx@next. * Fix release script boundary checks * Remove PostHog from public package bundle
2026-05-11 15:50:34 +02:00
...(managedDaemon ? { managedDaemon } : {}),
2026-05-10 23:12:26 +02:00
...(args.adapter === 'historic-sql' ? { historicSqlConnectionId: args.connectionId } : {}),
logger: operationalLogger,
2026-05-10 23:12:26 +02:00
};
if (args.adapter === 'metabase' && args.sourceDir) {
throw new Error('source-dir uploads are not supported for the Metabase fan-out adapter');
}
if (args.adapter === 'metabase') {
const executeMetabaseFanout = deps.runLocalMetabaseIngest ?? runLocalMetabaseIngest;
const progress =
args.outputMode === 'json' ? undefined : createMetabaseFanoutProgress(args.connectionId, io);
const result = await executeMetabaseFanout({
project,
adapters: createAdapters(project, adapterOptions),
metabaseConnectionId: args.connectionId,
...localIngestOptions,
trigger: 'manual_resync',
jobIdFactory: deps.jobIdFactory,
...(progress ? { progress } : {}),
});
if (args.outputMode === 'json') {
io.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
} else {
writeMetabaseFanoutStatus(result, io);
}
2026-05-10 23:13:17 -07:00
return result.status === 'all_succeeded' ? 0 : 1;
2026-05-10 23:12:26 +02:00
}
const jobId = deps.jobIdFactory?.();
let liveTui: MemoryFlowTuiLiveSession | null = null;
const runOutputMode = effectiveIngestOutputMode(args.outputMode, io, env, {
requireInput: (args.inputMode ?? 'auto') === 'auto',
});
const shouldUseLiveViz =
runOutputMode === 'viz' && (args.inputMode ?? 'auto') === 'auto' && isInteractiveTerminal(io);
const plainProgress = shouldWritePlainIngestProgress(runOutputMode, io, env)
? createPlainIngestProgressRenderer(args, io)
: null;
const initialMemoryFlow =
shouldUseLiveViz || plainProgress ? initialRunMemoryFlowInput(args, jobId ?? 'pending') : undefined;
2026-05-10 23:12:26 +02:00
let latestMemoryFlowSnapshot: MemoryFlowReplayInput | null = initialMemoryFlow ?? null;
if (shouldUseLiveViz && initialMemoryFlow && isTuiCapableIo(io)) {
2026-05-10 23:12:26 +02:00
const startLiveMemoryFlow = deps.startLiveMemoryFlow ?? startLiveMemoryFlowTui;
liveTui = await startLiveMemoryFlow(initialMemoryFlow, io);
}
const memoryFlow = initialMemoryFlow
? createMemoryFlowLiveBuffer(initialMemoryFlow, {
onChange: (snapshot) => {
latestMemoryFlowSnapshot = snapshot;
if (liveTui && !liveTui.isClosed()) {
liveTui.update(snapshot);
return;
}
if (shouldUseLiveViz && !liveTui) {
2026-05-10 23:12:26 +02:00
writeMemoryFlowInput(snapshot, io, { clear: true });
return;
2026-05-10 23:12:26 +02:00
}
plainProgress?.update(snapshot);
2026-05-10 23:12:26 +02:00
},
})
: undefined;
plainProgress?.start();
2026-05-10 23:12:26 +02:00
try {
const result = await executeLocalIngest({
project,
adapters: createAdapters(project, adapterOptions),
adapter: args.adapter,
connectionId: args.connectionId,
sourceDir: args.sourceDir,
trigger: 'manual_resync',
jobId,
...localIngestOptions,
feat: npm-managed Python runtime for @kaelio/ktx (#7) * docs: add npm managed python runtime design * build: add bundled python runtime wheel builder * build: make local embedding dependencies optional * build: bundle python runtime wheel in cli artifacts * build: track bundled python runtime release artifact * test: verify bundled python runtime wheel * docs: add plan for bundled python runtime wheel * test: cover managed python runtime lifecycle * feat: add managed python runtime installer * feat: add runtime command runner * feat: expose runtime management commands * test: verify managed python runtime commands * docs: add plan for managed python runtime installer * feat: add managed python command helper * feat: use managed runtime for sl query compute * feat: route sl query managed runtime policy * docs: add plan for managed runtime sl query integration * feat: add managed runtime daemon metadata * feat: manage python daemon lifecycle * feat: add runtime daemon start stop commands * fix: verify managed runtime daemon lifecycle * docs: add plan for managed runtime daemon lifecycle * feat: add managed local embeddings config marker * feat: add managed local embeddings daemon helper * feat: use managed runtime for local embedding setup * feat: pass managed runtime policy through setup * docs: add plan for managed local embeddings runtime * feat: read CLI package metadata dynamically * feat: assemble public kaelio ktx npm package * feat: release one public kaelio ktx npm artifact * test: cover public kaelio ktx package invocations * chore: verify public kaelio ktx package artifacts * docs: add plan for public kaelio ktx npm package * test: verify managed runtime in public package smoke * test: finalize managed runtime release smoke * docs: add plan for managed runtime release smoke * test: specify local embeddings release smoke * feat: add local embeddings runtime smoke * chore: register local embeddings smoke * fix: verify local embeddings smoke * fix: restore artifact smoke python env helper * docs: add plan for managed local embeddings release smoke * refactor: share managed runtime install policy parsing * feat: use managed runtime for agent semantic queries * feat: use managed runtime for MCP semantic compute * docs: add plan for managed agent and MCP semantic runtime * feat(cli): add managed daemon HTTP helpers * feat(cli): route local adapters through managed daemon * feat(cli): use managed daemon for ingest helpers * feat(cli): pass managed daemon options to scan * feat(context): pass MCP ingest pull config options * feat(cli): pass managed daemon options to serve ingest * test: verify managed local ingest daemon runtime * docs: add plan for managed local ingest daemon runtime * docs: align managed runtime examples * docs: add plan for managed runtime docs cleanup * test: cover published package runtime smoke commands * test: validate published package smoke outputs * docs: add plan for published package runtime smoke * build: stamp public npm package version * release: add npm public release policy * release: add guarded npm publish script * release: document public npm release handoff * docs: add plan for public npm release handoff * test: cover managed runtime prune in package smoke * docs: document managed runtime prune * docs: add plan for managed runtime prune smoke and docs * chore: encode uv runtime prerequisite policy * fix: clarify missing uv runtime error * docs: document uv runtime prerequisite * docs: add plan for uv runtime prerequisite contract * refactor: limit release artifacts to public package runtime * chore: align release policy with bundled runtime wheel * docs: describe single public runtime artifact surface * test: verify single public runtime artifact contract * docs: add plan for single public runtime artifact cleanup * fix: align local embeddings smoke with public version * docs: add plan for local embeddings smoke public version * release: soft-launch as @kaelio/ktx@0.1.0-rc.0 on next tag Publish target moves to the pre-release version 0.1.0-rc.0 under the next dist-tag so npm install @kaelio/ktx (which resolves to latest) does not pick up the soft-launch build. Users opt in via @kaelio/ktx@next. * Fix release script boundary checks * Remove PostHog from public package bundle
2026-05-11 15:50:34 +02:00
pullConfigOptions: adapterOptions,
2026-05-10 23:12:26 +02:00
...(args.debugLlmRequestFile ? { llmDebugRequestFile: args.debugLlmRequestFile } : {}),
...(memoryFlow ? { memoryFlow } : {}),
});
if (shouldUseLiveViz && memoryFlow) {
2026-05-10 23:12:26 +02:00
latestMemoryFlowSnapshot = memoryFlow.snapshot();
liveTui?.close();
liveTui = null;
io.stdout.write(formatMemoryFlowFinalSummary(latestMemoryFlowSnapshot));
2026-05-10 23:13:17 -07:00
return reportStatus(result.report) === 'done' ? 0 : 1;
2026-05-10 23:12:26 +02:00
}
await writeReportRecord(result.report, runOutputMode, io, {
interactive: (args.inputMode ?? 'auto') === 'auto',
renderStoredMemoryFlow: deps.renderStoredMemoryFlow,
env,
});
2026-05-10 23:13:17 -07:00
return reportStatus(result.report) === 'done' ? 0 : 1;
2026-05-10 23:12:26 +02:00
} finally {
liveTui?.close();
}
}
if (args.reportFile) {
const readReportFile = deps.readReportFile ?? readIngestReportSnapshotFile;
const report = await readReportFile(args.reportFile);
if (args.runId) {
assertReportMatchesReplayId(report, args.runId, args.reportFile);
}
await writeReportRecord(report, args.outputMode, io, {
interactive: (args.inputMode ?? 'auto') === 'auto',
renderStoredMemoryFlow: deps.renderStoredMemoryFlow,
env,
});
return 0;
}
const report = await readStoredIngestReport(project, args.runId);
if (!report) {
throw new Error(
args.runId
? `Local ingest run or report "${args.runId}" was not found`
2026-05-10 23:51:24 +02:00
: 'No local ingest reports were found. Run `ktx ingest --all` first.',
2026-05-10 23:12:26 +02:00
);
}
await writeReportRecord(report, args.outputMode, io, {
interactive: (args.inputMode ?? 'auto') === 'auto',
renderStoredMemoryFlow: deps.renderStoredMemoryFlow,
env,
});
return 0;
} catch (error) {
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
return 1;
}
}