2026-05-10 23:51:24 +02:00
|
|
|
import type { KtxCliIo } from './index.js';
|
2026-05-13 17:01:48 +02:00
|
|
|
import type { KtxIngestProgressUpdate } from './ingest.js';
|
2026-05-14 01:43:06 +02:00
|
|
|
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
|
2026-05-10 23:12:26 +02:00
|
|
|
import type {
|
2026-05-10 23:51:24 +02:00
|
|
|
KtxPublicIngestArgs,
|
2026-05-13 17:01:48 +02:00
|
|
|
KtxPublicIngestDeps,
|
2026-05-10 23:51:24 +02:00
|
|
|
KtxPublicIngestPlanTarget,
|
|
|
|
|
KtxPublicIngestProject,
|
|
|
|
|
KtxPublicIngestTargetResult,
|
2026-05-10 23:12:26 +02:00
|
|
|
} from './public-ingest.js';
|
2026-06-01 23:31:31 +02:00
|
|
|
import { buildPublicIngestPlan, executePublicIngestTarget, publicProgressMessage } from './public-ingest.js';
|
|
|
|
|
import { createAggregateProgressPort } from './progress-port-adapter.js';
|
2026-05-10 23:12:26 +02:00
|
|
|
import { formatDuration } from './demo-metrics.js';
|
|
|
|
|
import { profileMark } from './startup-profile.js';
|
2026-06-08 16:14:56 +02:00
|
|
|
import {
|
|
|
|
|
isFreshStarCountCache,
|
|
|
|
|
readStarCountCache,
|
|
|
|
|
writeStarCountCache,
|
|
|
|
|
} from './star-prompt/cache.js';
|
|
|
|
|
import { fetchGitHubStarCount as defaultFetchGitHubStarCount } from './star-prompt/star-count.js';
|
|
|
|
|
import { renderStarPromptLine } from './star-prompt/star-line.js';
|
2026-05-10 23:12:26 +02:00
|
|
|
|
|
|
|
|
profileMark('module:context-build-view');
|
|
|
|
|
|
|
|
|
|
const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'] as const;
|
|
|
|
|
const ESC = String.fromCharCode(0x1b);
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
type PhaseKey = 'database-schema' | 'query-history' | 'source-ingest';
|
|
|
|
|
type PhaseStatus = 'queued' | 'running' | 'done' | 'failed' | 'skipped';
|
|
|
|
|
|
|
|
|
|
interface PhaseState {
|
|
|
|
|
key: PhaseKey;
|
|
|
|
|
name: string;
|
|
|
|
|
status: PhaseStatus;
|
|
|
|
|
percent: number;
|
|
|
|
|
detail: string | null;
|
|
|
|
|
summary: string | null;
|
|
|
|
|
startedAt: number | null;
|
|
|
|
|
elapsedMs: number;
|
|
|
|
|
progressUpdatedAtMs: number | null;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-10 23:12:26 +02:00
|
|
|
export interface ContextBuildTargetState {
|
2026-05-10 23:51:24 +02:00
|
|
|
target: KtxPublicIngestPlanTarget;
|
2026-05-10 23:12:26 +02:00
|
|
|
status: 'queued' | 'running' | 'done' | 'failed';
|
|
|
|
|
detailLine: string | null;
|
|
|
|
|
summaryText: string | null;
|
2026-05-12 16:56:58 -04:00
|
|
|
failureText: string | null;
|
2026-05-10 23:12:26 +02:00
|
|
|
startedAt: number | null;
|
|
|
|
|
elapsedMs: number;
|
2026-05-13 17:01:48 +02:00
|
|
|
progressUpdatedAtMs: number | null;
|
2026-05-14 01:43:06 +02:00
|
|
|
phases: PhaseState[];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const PHASE_LABELS: Record<PhaseKey, string> = {
|
|
|
|
|
'database-schema': 'Schema',
|
|
|
|
|
'query-history': 'Query history',
|
|
|
|
|
'source-ingest': 'Source ingest',
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
function makePhasesForTarget(target: KtxPublicIngestPlanTarget): PhaseState[] {
|
|
|
|
|
const make = (key: PhaseKey): PhaseState => ({
|
|
|
|
|
key,
|
|
|
|
|
name: PHASE_LABELS[key],
|
|
|
|
|
status: 'queued',
|
|
|
|
|
percent: 0,
|
|
|
|
|
detail: null,
|
|
|
|
|
summary: null,
|
|
|
|
|
startedAt: null,
|
|
|
|
|
elapsedMs: 0,
|
|
|
|
|
progressUpdatedAtMs: null,
|
|
|
|
|
});
|
|
|
|
|
if (target.operation === 'database-ingest') {
|
|
|
|
|
const phases: PhaseState[] = [make('database-schema')];
|
|
|
|
|
if (target.queryHistory?.enabled === true) {
|
|
|
|
|
phases.push(make('query-history'));
|
|
|
|
|
}
|
|
|
|
|
return phases;
|
|
|
|
|
}
|
|
|
|
|
return [make('source-ingest')];
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface ContextBuildViewState {
|
|
|
|
|
primarySources: ContextBuildTargetState[];
|
|
|
|
|
contextSources: ContextBuildTargetState[];
|
|
|
|
|
frame: number;
|
2026-05-10 16:12:51 -07:00
|
|
|
startedAt: number | null;
|
|
|
|
|
totalElapsedMs: number;
|
2026-06-08 16:14:56 +02:00
|
|
|
starCount: number | null;
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface ContextBuildArgs {
|
|
|
|
|
projectDir: string;
|
|
|
|
|
inputMode: 'auto' | 'disabled';
|
2026-05-14 01:43:06 +02:00
|
|
|
targetConnectionId?: string;
|
|
|
|
|
all?: boolean;
|
|
|
|
|
entrypoint?: 'setup' | 'ingest';
|
|
|
|
|
queryHistory?: Extract<KtxPublicIngestArgs, { command: 'run' }>['queryHistory'];
|
|
|
|
|
queryHistoryWindowDays?: number;
|
|
|
|
|
scanMode?: Extract<KtxPublicIngestArgs, { command: 'run' }>['scanMode'];
|
2026-05-10 23:12:26 +02:00
|
|
|
detectRelationships?: boolean;
|
2026-05-14 01:43:06 +02:00
|
|
|
cliVersion?: string;
|
|
|
|
|
runtimeInstallPolicy?: KtxManagedPythonInstallPolicy;
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export interface ContextBuildResult {
|
|
|
|
|
exitCode: number;
|
2026-05-10 23:13:17 -07:00
|
|
|
reportIds?: string[];
|
|
|
|
|
artifactPaths?: string[];
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
|
2026-05-10 17:08:55 -07:00
|
|
|
export interface ContextBuildSourceProgressUpdate {
|
|
|
|
|
connectionId: string;
|
2026-05-14 01:43:06 +02:00
|
|
|
operation: 'database-ingest' | 'source-ingest';
|
2026-05-10 17:08:55 -07:00
|
|
|
status: 'queued' | 'running' | 'done' | 'failed';
|
|
|
|
|
startedAtMs?: number;
|
|
|
|
|
elapsedMs?: number;
|
2026-05-13 17:01:48 +02:00
|
|
|
percent?: number;
|
|
|
|
|
message?: string;
|
|
|
|
|
updatedAtMs?: number;
|
2026-05-10 17:08:55 -07:00
|
|
|
summaryText?: string;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-13 19:32:49 +02:00
|
|
|
interface CompletedItemName {
|
|
|
|
|
singular: string;
|
|
|
|
|
plural: string;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface ContextBuildRenderOptions {
|
|
|
|
|
styled?: boolean;
|
|
|
|
|
showHint?: boolean;
|
2026-06-08 16:14:56 +02:00
|
|
|
showStarPrompt?: boolean;
|
|
|
|
|
columns?: number;
|
2026-05-13 19:32:49 +02:00
|
|
|
hintText?: string;
|
|
|
|
|
projectDir?: string;
|
|
|
|
|
title?: string;
|
|
|
|
|
primaryGroupLabel?: string;
|
|
|
|
|
contextGroupLabel?: string;
|
|
|
|
|
scanRunningText?: string;
|
|
|
|
|
sourceIngestRunningText?: string;
|
|
|
|
|
completedItemName?: CompletedItemName;
|
2026-05-14 01:43:06 +02:00
|
|
|
notices?: string[];
|
|
|
|
|
warnings?: string[];
|
2026-05-13 19:32:49 +02:00
|
|
|
}
|
|
|
|
|
|
2026-05-10 23:12:26 +02:00
|
|
|
export interface ContextBuildDeps {
|
|
|
|
|
executeTarget?: typeof executePublicIngestTarget;
|
|
|
|
|
now?: () => number;
|
2026-05-10 17:08:55 -07:00
|
|
|
onSourceProgress?: (sources: ContextBuildSourceProgressUpdate[]) => void;
|
2026-05-13 17:01:48 +02:00
|
|
|
sourceProgressThrottleMs?: number;
|
2026-06-08 16:14:56 +02:00
|
|
|
fetchStarCount?: typeof defaultFetchGitHubStarCount;
|
|
|
|
|
starPromptEnv?: StarPromptEnv;
|
|
|
|
|
starPromptHomeDir?: string;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface StarPromptEnv extends NodeJS.ProcessEnv {
|
|
|
|
|
CI?: string;
|
|
|
|
|
DO_NOT_TRACK?: string;
|
|
|
|
|
KTX_NO_STAR?: string;
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Rendering ---
|
|
|
|
|
|
|
|
|
|
function green(text: string): string {
|
|
|
|
|
return `${ESC}[32m${text}${ESC}[39m`;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function red(text: string): string {
|
|
|
|
|
return `${ESC}[31m${text}${ESC}[39m`;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function cyan(text: string): string {
|
|
|
|
|
return `${ESC}[36m${text}${ESC}[39m`;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function dim(text: string): string {
|
|
|
|
|
return `${ESC}[2m${text}${ESC}[22m`;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function statusIcon(status: ContextBuildTargetState['status'], frame: number, styled: boolean): string {
|
|
|
|
|
if (!styled) {
|
|
|
|
|
switch (status) {
|
|
|
|
|
case 'done':
|
|
|
|
|
return '✓';
|
|
|
|
|
case 'failed':
|
|
|
|
|
return '✗';
|
|
|
|
|
case 'running':
|
|
|
|
|
return SPINNER_FRAMES[frame % SPINNER_FRAMES.length] ?? '⠋';
|
|
|
|
|
default:
|
2026-05-10 16:12:51 -07:00
|
|
|
return '○';
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
switch (status) {
|
|
|
|
|
case 'done':
|
|
|
|
|
return green('✓');
|
|
|
|
|
case 'failed':
|
|
|
|
|
return red('✗');
|
|
|
|
|
case 'running':
|
|
|
|
|
return cyan(SPINNER_FRAMES[frame % SPINNER_FRAMES.length] ?? '⠋');
|
|
|
|
|
default:
|
2026-05-10 16:12:51 -07:00
|
|
|
return dim('○');
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
function phaseStatusIcon(status: PhaseStatus, frame: number, styled: boolean): string {
|
|
|
|
|
const raw = (() => {
|
|
|
|
|
switch (status) {
|
|
|
|
|
case 'done':
|
|
|
|
|
return '✓';
|
|
|
|
|
case 'failed':
|
|
|
|
|
return '✗';
|
|
|
|
|
case 'running':
|
|
|
|
|
return SPINNER_FRAMES[frame % SPINNER_FRAMES.length] ?? '⠋';
|
|
|
|
|
case 'skipped':
|
|
|
|
|
return '·';
|
|
|
|
|
default:
|
|
|
|
|
return '○';
|
|
|
|
|
}
|
|
|
|
|
})();
|
|
|
|
|
if (!styled) return raw;
|
|
|
|
|
switch (status) {
|
|
|
|
|
case 'done':
|
|
|
|
|
return green(raw);
|
|
|
|
|
case 'failed':
|
|
|
|
|
return red(raw);
|
|
|
|
|
case 'running':
|
|
|
|
|
return cyan(raw);
|
|
|
|
|
default:
|
|
|
|
|
return dim(raw);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-10 16:12:51 -07:00
|
|
|
function extractPercent(detailLine: string | null): number | null {
|
|
|
|
|
if (!detailLine) return null;
|
|
|
|
|
const match = detailLine.match(/^\[(\d+)%\]/);
|
|
|
|
|
return match ? Number(match[1]) : null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const BAR_WIDTH = 12;
|
|
|
|
|
const BAR_FILLED = '█';
|
|
|
|
|
const BAR_EMPTY = '░';
|
2026-05-13 17:01:48 +02:00
|
|
|
const STALE_PROGRESS_UPDATE_MS = 30_000;
|
2026-05-10 16:12:51 -07:00
|
|
|
|
|
|
|
|
function renderProgressBar(percent: number, styled: boolean): string {
|
|
|
|
|
const filled = Math.round((percent / 100) * BAR_WIDTH);
|
|
|
|
|
const empty = BAR_WIDTH - filled;
|
|
|
|
|
const bar = `${BAR_FILLED.repeat(filled)}${BAR_EMPTY.repeat(empty)}`;
|
|
|
|
|
return styled ? cyan(bar) : bar;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-13 17:01:48 +02:00
|
|
|
function staleProgressText(target: ContextBuildTargetState, styled: boolean): string | null {
|
|
|
|
|
if (target.startedAt === null || target.progressUpdatedAtMs === null || target.elapsedMs <= 0) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
const currentTimeMs = target.startedAt + target.elapsedMs;
|
|
|
|
|
const staleMs = currentTimeMs - target.progressUpdatedAtMs;
|
|
|
|
|
if (staleMs < STALE_PROGRESS_UPDATE_MS) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
const text = `last update ${formatDuration(staleMs)} ago`;
|
|
|
|
|
return styled ? dim(text) : text;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-13 19:32:49 +02:00
|
|
|
function targetDetail(target: ContextBuildTargetState, styled: boolean, options: ContextBuildRenderOptions): string {
|
2026-05-10 23:12:26 +02:00
|
|
|
if (target.status === 'done') {
|
|
|
|
|
const parts: string[] = [];
|
|
|
|
|
if (target.summaryText) parts.push(target.summaryText);
|
|
|
|
|
parts.push(formatDuration(target.elapsedMs));
|
|
|
|
|
return parts.join(' · ');
|
|
|
|
|
}
|
|
|
|
|
if (target.status === 'failed') {
|
2026-05-12 16:56:58 -04:00
|
|
|
const failureText = target.failureText ?? 'failed';
|
|
|
|
|
return styled ? red(failureText) : failureText;
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
if (target.status === 'running') {
|
2026-05-10 16:12:51 -07:00
|
|
|
const percent = extractPercent(target.detailLine);
|
2026-05-14 01:43:06 +02:00
|
|
|
const progressText =
|
|
|
|
|
target.detailLine?.replace(/^\[\d+%\]\s*/, '') ??
|
|
|
|
|
(target.target.operation === 'database-ingest'
|
|
|
|
|
? (options.scanRunningText ?? 'reading schema')
|
2026-05-13 19:32:49 +02:00
|
|
|
: (options.sourceIngestRunningText ?? 'ingesting...'));
|
2026-05-10 20:44:07 -07:00
|
|
|
const elapsed = target.elapsedMs > 0 ? `(${formatDuration(target.elapsedMs)})` : null;
|
2026-05-10 16:12:51 -07:00
|
|
|
const parts: string[] = [];
|
|
|
|
|
if (percent !== null) {
|
|
|
|
|
parts.push(`${renderProgressBar(percent, styled)} ${percent}%`);
|
|
|
|
|
}
|
|
|
|
|
parts.push(progressText);
|
2026-05-13 17:01:48 +02:00
|
|
|
const stale = staleProgressText(target, styled);
|
|
|
|
|
if (stale) parts.push(stale);
|
2026-05-10 16:12:51 -07:00
|
|
|
if (elapsed) parts.push(styled ? dim(elapsed) : elapsed);
|
|
|
|
|
return parts.join(' ');
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
return styled ? dim('queued') : 'queued';
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
const PHASE_NAME_WIDTH = 14;
|
|
|
|
|
|
|
|
|
|
function renderRunningTargetHeaderDetail(target: ContextBuildTargetState, styled: boolean): string {
|
|
|
|
|
const elapsed = target.elapsedMs > 0 ? `(${formatDuration(target.elapsedMs)})` : '';
|
|
|
|
|
if (!elapsed) return '';
|
|
|
|
|
return styled ? dim(elapsed) : elapsed;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function renderPhaseRow(phase: PhaseState, frame: number, styled: boolean): string {
|
|
|
|
|
const icon = phaseStatusIcon(phase.status, frame, styled);
|
|
|
|
|
const name = phase.name.padEnd(PHASE_NAME_WIDTH);
|
|
|
|
|
const segments: string[] = [];
|
|
|
|
|
if (phase.status === 'queued' || phase.status === 'skipped') {
|
|
|
|
|
const emptyBar = BAR_EMPTY.repeat(BAR_WIDTH);
|
|
|
|
|
segments.push(styled ? dim(emptyBar) : emptyBar);
|
|
|
|
|
segments.push(styled ? dim(' —') : ' —');
|
|
|
|
|
} else {
|
|
|
|
|
const pct = Math.max(0, Math.min(100, Math.round(phase.percent)));
|
|
|
|
|
segments.push(renderProgressBar(pct, styled));
|
|
|
|
|
segments.push(`${String(pct).padStart(3)}%`);
|
|
|
|
|
}
|
|
|
|
|
let trailing = '';
|
|
|
|
|
if (phase.status === 'done') {
|
|
|
|
|
const parts: string[] = [];
|
|
|
|
|
if (phase.summary) parts.push(phase.summary);
|
|
|
|
|
if (phase.elapsedMs > 0) {
|
|
|
|
|
const elapsed = `(${formatDuration(phase.elapsedMs)})`;
|
|
|
|
|
parts.push(styled ? dim(elapsed) : elapsed);
|
|
|
|
|
}
|
|
|
|
|
trailing = parts.join(' ');
|
|
|
|
|
} else if (phase.status === 'running') {
|
|
|
|
|
const parts: string[] = [];
|
|
|
|
|
if (phase.detail) parts.push(phase.detail);
|
|
|
|
|
if (phase.elapsedMs > 0) {
|
|
|
|
|
const elapsed = `(${formatDuration(phase.elapsedMs)})`;
|
|
|
|
|
parts.push(styled ? dim(elapsed) : elapsed);
|
|
|
|
|
}
|
|
|
|
|
trailing = parts.join(' ');
|
|
|
|
|
} else if (phase.status === 'queued') {
|
|
|
|
|
trailing = styled ? dim('queued') : 'queued';
|
|
|
|
|
} else if (phase.status === 'skipped') {
|
|
|
|
|
trailing = styled ? dim('skipped') : 'skipped';
|
|
|
|
|
} else if (phase.status === 'failed') {
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
const label = styled ? red('failed') : 'failed';
|
|
|
|
|
trailing = phase.summary ? `${label} ${phase.summary}` : label;
|
2026-05-14 01:43:06 +02:00
|
|
|
}
|
|
|
|
|
const bar = `${segments.join(' ')} ${trailing}`.trimEnd();
|
|
|
|
|
return ` ${icon} ${name} ${bar}`;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-10 23:12:26 +02:00
|
|
|
function columnWidth(state: ContextBuildViewState): number {
|
|
|
|
|
const all = [...state.primarySources, ...state.contextSources];
|
|
|
|
|
return Math.max(12, ...all.map((t) => t.target.connectionId.length)) + 2;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
function renderTargetRows(
|
2026-05-13 19:32:49 +02:00
|
|
|
target: ContextBuildTargetState,
|
|
|
|
|
frame: number,
|
|
|
|
|
styled: boolean,
|
|
|
|
|
width: number,
|
|
|
|
|
options: ContextBuildRenderOptions,
|
2026-05-14 01:43:06 +02:00
|
|
|
): string[] {
|
|
|
|
|
const icon = statusIcon(target.status, frame, styled);
|
|
|
|
|
const name = target.target.connectionId.padEnd(width);
|
|
|
|
|
const anyPhaseStarted = target.phases.some((p) => p.status !== 'queued');
|
|
|
|
|
if (target.status === 'running' && target.phases.length > 0 && anyPhaseStarted) {
|
|
|
|
|
const headerDetail = renderRunningTargetHeaderDetail(target, styled);
|
|
|
|
|
const headerLine = ` ${icon} ${name} ${headerDetail}`.trimEnd();
|
|
|
|
|
return [headerLine, ...target.phases.map((phase) => renderPhaseRow(phase, frame, styled))];
|
|
|
|
|
}
|
|
|
|
|
return [` ${icon} ${name} ${targetDetail(target, styled, options)}`];
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function renderTargetGroup(
|
|
|
|
|
label: string,
|
|
|
|
|
targets: ContextBuildTargetState[],
|
|
|
|
|
frame: number,
|
|
|
|
|
styled: boolean,
|
|
|
|
|
width: number,
|
2026-05-13 19:32:49 +02:00
|
|
|
options: ContextBuildRenderOptions,
|
2026-05-10 23:12:26 +02:00
|
|
|
): string[] {
|
|
|
|
|
if (targets.length === 0) return [];
|
2026-05-14 01:43:06 +02:00
|
|
|
return ['', ` ${label}:`, ...targets.flatMap((t) => renderTargetRows(t, frame, styled, width, options))];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function renderMessageGroup(label: string, messages: string[], styled: boolean): string[] {
|
|
|
|
|
if (messages.length === 0) return [];
|
|
|
|
|
const renderedMessages = messages.map((message) => ` - ${message}`);
|
|
|
|
|
return ['', ` ${label}:`, ...renderedMessages.map((line) => (styled ? dim(line) : line))];
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
function retryCommand(input: {
|
|
|
|
|
projectDir?: string;
|
|
|
|
|
entrypoint?: 'setup' | 'ingest';
|
|
|
|
|
connectionId?: string;
|
|
|
|
|
queryHistory?: boolean;
|
|
|
|
|
queryHistoryWindowDays?: number;
|
|
|
|
|
}): string {
|
|
|
|
|
const projectPart = input.projectDir ? ` --project-dir ${input.projectDir}` : '';
|
|
|
|
|
if (input.entrypoint === 'ingest' && input.connectionId) {
|
|
|
|
|
const queryHistoryPart = input.queryHistory ? ' --query-history' : '';
|
|
|
|
|
const windowPart =
|
|
|
|
|
input.queryHistory && input.queryHistoryWindowDays !== undefined
|
|
|
|
|
? ` --query-history-window-days ${input.queryHistoryWindowDays}`
|
|
|
|
|
: '';
|
2026-05-29 17:41:04 +02:00
|
|
|
return `ktx ingest ${input.connectionId}${projectPart}${queryHistoryPart}${windowPart}`;
|
2026-05-14 01:43:06 +02:00
|
|
|
}
|
|
|
|
|
return input.projectDir ? `ktx setup --project-dir ${input.projectDir}` : 'ktx setup';
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function renderContextBuildView(
|
|
|
|
|
state: ContextBuildViewState,
|
2026-05-13 19:32:49 +02:00
|
|
|
options: ContextBuildRenderOptions = {},
|
2026-05-10 23:12:26 +02:00
|
|
|
): string {
|
|
|
|
|
const styled = options.styled ?? true;
|
|
|
|
|
const width = columnWidth(state);
|
2026-05-10 16:12:51 -07:00
|
|
|
const allTargets = [...state.primarySources, ...state.contextSources];
|
|
|
|
|
const doneCount = allTargets.filter((t) => t.status === 'done' || t.status === 'failed').length;
|
|
|
|
|
const totalCount = allTargets.length;
|
|
|
|
|
const hasActive = allTargets.some((t) => t.status === 'running' || t.status === 'queued');
|
|
|
|
|
const allDone = totalCount > 0 && !hasActive;
|
|
|
|
|
|
2026-06-11 13:49:45 +02:00
|
|
|
const headerParts = [options.title ?? 'Building ktx context'];
|
2026-05-10 16:12:51 -07:00
|
|
|
if (totalCount > 0) {
|
|
|
|
|
const progressParts: string[] = [`${doneCount}/${totalCount}`];
|
|
|
|
|
if (state.totalElapsedMs > 0) progressParts.push(formatDuration(state.totalElapsedMs));
|
|
|
|
|
const progress = `(${progressParts.join(' · ')})`;
|
|
|
|
|
headerParts.push(styled ? dim(progress) : progress);
|
|
|
|
|
}
|
|
|
|
|
const header = headerParts.join(' ');
|
|
|
|
|
const headerPlainLength = header.replace(/\x1b\[[0-9;]*m/g, '').length;
|
|
|
|
|
const separator = '─'.repeat(Math.max(21, headerPlainLength));
|
|
|
|
|
|
2026-05-10 23:12:26 +02:00
|
|
|
const lines: string[] = [
|
|
|
|
|
'',
|
2026-05-10 16:12:51 -07:00
|
|
|
header,
|
|
|
|
|
separator,
|
2026-05-13 15:49:22 +02:00
|
|
|
...(options.projectDir ? [` Project: ${options.projectDir}`] : []),
|
2026-05-14 01:43:06 +02:00
|
|
|
...renderTargetGroup(options.primaryGroupLabel ?? 'Databases', state.primarySources, state.frame, styled, width, options),
|
2026-05-13 19:32:49 +02:00
|
|
|
...renderTargetGroup(options.contextGroupLabel ?? 'Context sources', state.contextSources, state.frame, styled, width, options),
|
2026-05-14 01:43:06 +02:00
|
|
|
...renderMessageGroup('Notices', options.notices ?? [], styled),
|
|
|
|
|
...renderMessageGroup('Warnings', options.warnings ?? [], styled),
|
2026-05-10 23:12:26 +02:00
|
|
|
'',
|
|
|
|
|
];
|
2026-05-10 16:12:51 -07:00
|
|
|
|
|
|
|
|
if (allDone && state.totalElapsedMs > 0) {
|
2026-05-13 19:32:49 +02:00
|
|
|
const itemName = options.completedItemName ?? { singular: 'source', plural: 'sources' };
|
|
|
|
|
const sourcesLabel = totalCount === 1 ? `1 ${itemName.singular}` : `${totalCount} ${itemName.plural}`;
|
2026-05-10 16:12:51 -07:00
|
|
|
const summary = ` Done in ${formatDuration(state.totalElapsedMs)} · ${sourcesLabel} processed`;
|
|
|
|
|
lines.push(styled ? green(summary) : summary);
|
|
|
|
|
lines.push('');
|
|
|
|
|
}
|
|
|
|
|
|
2026-06-08 16:14:56 +02:00
|
|
|
if (options.showStarPrompt && hasActive) {
|
|
|
|
|
const starPrompt = renderStarPromptLine({
|
|
|
|
|
count: state.starCount,
|
|
|
|
|
columns: options.columns ?? 80,
|
|
|
|
|
});
|
|
|
|
|
lines.push(styled ? dim(starPrompt) : starPrompt);
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-10 23:12:26 +02:00
|
|
|
if (options.showHint && hasActive) {
|
2026-05-14 01:43:06 +02:00
|
|
|
const hintContent = options.hintText ?? 'Ctrl+C to stop';
|
2026-05-10 17:08:55 -07:00
|
|
|
const hint = ` ${hintContent}`;
|
2026-05-10 23:12:26 +02:00
|
|
|
lines.push(styled ? dim(hint) : hint);
|
|
|
|
|
lines.push('');
|
|
|
|
|
}
|
|
|
|
|
return `${lines.join('\n')}\n`;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- IO Capture ---
|
|
|
|
|
|
|
|
|
|
const ESC_K_RE = new RegExp(`${ESC.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\[K`, 'g');
|
2026-05-11 00:31:15 -07:00
|
|
|
const ANSI_RE = /\x1b\[[0-9;]*m/g;
|
2026-05-10 23:12:26 +02:00
|
|
|
|
chore(workspace): gate dead-code with knip production mode (#196)
* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm
* refactor(workspace): rewrite @ktx/llm imports to relative paths
* refactor(workspace): fold internal packages into cli
* chore(workspace): gate dead-code with knip production mode
Turn on production-mode knip plus an autofix run in pre-commit and the
`pnpm dead-code` script, document the `/** @internal */` convention for
test-only exports in AGENTS.md, annotate test-only exports across the
CLI with that JSDoc, and drop dead exports/wrappers the new gate
surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`,
`createLocalScanEnrichmentProvidersFromConfig`,
`PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports).
Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit
production entries so cross-package barrel leaks are caught.
* refactor(cli): delete internal barrel index.ts files
The 34 `index.ts` re-export barrels inside `packages/cli/src/` were
holdovers from the pre-fold multi-workspace structure. Post-fold-in they
served no production purpose: external consumers go through the single
package main entry, and in-repo callers mostly imported through them
only because the path was short. Internally, knip flagged most barrel
re-exports as production-dead (only reached via tests).
This change:
- Deletes every internal barrel except `packages/cli/src/index.ts`
(the published package entry).
- Rewrites ~270 source/test files to import each name directly from
the file that defines it.
- Moves `tools/warehouse-verification/index.ts` to
`create-warehouse-verification-tools.ts` (the function it defined
locally) and updates its single consumer.
- Renames `search/backend-conformance.ts` → `.test-utils.ts` to match
the existing test-helper file convention.
- Deletes 13 dead test-only chains (dbt-descriptions/*,
live-database/extracted-schema, live-database/structural-sync,
relationship-* feedback/review chain) plus their tests and a
cascading orphan integration test.
- Updates test mocks that pointed at deleted barrel paths
(notion-client, connector barrels in scan/local-scan-connectors
tests) to mock the source files instead.
- Points the maintainer benchmark script
(`scripts/relationship-benchmark-report.mjs`) at source files
instead of `dist/context/scan/index.js`.
- Drops the barrel `!` entries from `knip.json`; adds explicit
production entries only for the benchmark code reached via dist by
the maintainer script.
Net: 413 files changed, ~1.2k insertions, ~9.4k deletions.
`pnpm run dead-code` (Biome + knip default + knip production) and
`pnpm run type-check` are clean; 2277 tests pass.
* refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly
Promote the CLI workspace package to the public name `@kaelio/ktx` and
drop the separate `scripts/build-public-npm-package.mjs` wrapper. The
CLI package is now publishable in place (`publishConfig.access: public`,
`provenance: true`), so artifact packing uses `pnpm pack` against
`packages/cli/` instead of assembling a parallel package tree.
Updates all workspace filter invocations, docs, tests, and release
readiness checks to reference the new package name, and folds the
tarball-name helper into `scripts/public-npm-release-metadata.mjs`.
* docs: align "agent clients" and "data agents" terminology
Replace "client agents" with "agent clients" and "database agents" with
"data agents" across AGENTS.md, README.md, the docs-site copy, and the
matching setup-agents test description, matching the canonical
vocabulary in docs/terminology.md.
Also moves packages/cli/tsconfig.json's tsBuildInfoFile from
node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive
node_modules reinstalls.
* refactor(release): single source of truth for package version
Make packages/cli/package.json the single source of truth for the
@kaelio/ktx version. publicNpmPackageVersion() now reads it directly,
so artifact filenames, release-readiness checks, and the Python wheel
version all derive from one field. The duplicate
release-policy.json.publicNpmPackageVersion is removed.
Previously the two fields could drift: tarballs were named
kaelio-ktx-0.4.1.tgz while internally containing
@kaelio/ktx@0.0.0-private.
- update-public-release-version.mjs rewrites both Python pyproject.toml
files (ktx-daemon, ktx-sl) alongside the npm package.jsons,
normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2).
- semantic-release-config.cjs adds the two pyproject.toml files to
@semantic-release/git assets so the release commit back to main
carries every version source in lockstep.
- The six "?? '0.0.0-private'" fallback literals across the CLI are
replaced with "?? getKtxCliPackageInfo().version", and
createDefaultKtxMcpServer makes its version arg required.
- docs/release.md describes the actual commit-back model: the dev tree
always reflects the most recent release; no sentinel pin to
maintain.
Verified: pnpm run artifacts:build now produces
kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with
@kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and
2287 vitests + 173 script tests pass.
* refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime
Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and
scan command entrypoints so tests can stub them, and teach
resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime
feature when ktx.yaml selects sentence-transformers.
* chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal
Both symbols are consumed only by status-project.test.ts. Annotating with
/** @internal */ keeps knip's production-mode check clean without changing
runtime behavior.
* fix(cli): use real package metadata in print-command-tree
The stubbed package name embedded a forbidden product identifier that
tripped the boundary check in CI. Read the metadata from package.json
instead — keeps the rendered tree unchanged and removes a duplicate
source of truth.
* feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts
Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer
source counts, computed with `SUM(embedding_json IS NOT NULL)` over
`knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to
"Wiki" (canonical per `docs/terminology.md`) and rename the matching
`localStats.knowledgePages` field to `localStats.wikiPages`.
Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those
duplicated the per-surface rows above. Disk now reports only actual byte
usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` /
`semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry`
helpers, and the `filter` arg on `summarizeDir` are removed.
2026-05-21 15:28:58 +02:00
|
|
|
/** @internal */
|
2026-05-10 23:12:26 +02:00
|
|
|
export function extractProgressMessage(chunk: string): string | null {
|
|
|
|
|
const cleaned = chunk.replace(/^\r/, '').replace(ESC_K_RE, '').replace(/\n$/, '').trim();
|
|
|
|
|
const match = cleaned.match(/^\[(\d+)%\]\s*(.+)$/);
|
|
|
|
|
return match ? `[${match[1]}%] ${match[2]}` : null;
|
|
|
|
|
}
|
|
|
|
|
|
chore(workspace): gate dead-code with knip production mode (#196)
* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm
* refactor(workspace): rewrite @ktx/llm imports to relative paths
* refactor(workspace): fold internal packages into cli
* chore(workspace): gate dead-code with knip production mode
Turn on production-mode knip plus an autofix run in pre-commit and the
`pnpm dead-code` script, document the `/** @internal */` convention for
test-only exports in AGENTS.md, annotate test-only exports across the
CLI with that JSDoc, and drop dead exports/wrappers the new gate
surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`,
`createLocalScanEnrichmentProvidersFromConfig`,
`PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports).
Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit
production entries so cross-package barrel leaks are caught.
* refactor(cli): delete internal barrel index.ts files
The 34 `index.ts` re-export barrels inside `packages/cli/src/` were
holdovers from the pre-fold multi-workspace structure. Post-fold-in they
served no production purpose: external consumers go through the single
package main entry, and in-repo callers mostly imported through them
only because the path was short. Internally, knip flagged most barrel
re-exports as production-dead (only reached via tests).
This change:
- Deletes every internal barrel except `packages/cli/src/index.ts`
(the published package entry).
- Rewrites ~270 source/test files to import each name directly from
the file that defines it.
- Moves `tools/warehouse-verification/index.ts` to
`create-warehouse-verification-tools.ts` (the function it defined
locally) and updates its single consumer.
- Renames `search/backend-conformance.ts` → `.test-utils.ts` to match
the existing test-helper file convention.
- Deletes 13 dead test-only chains (dbt-descriptions/*,
live-database/extracted-schema, live-database/structural-sync,
relationship-* feedback/review chain) plus their tests and a
cascading orphan integration test.
- Updates test mocks that pointed at deleted barrel paths
(notion-client, connector barrels in scan/local-scan-connectors
tests) to mock the source files instead.
- Points the maintainer benchmark script
(`scripts/relationship-benchmark-report.mjs`) at source files
instead of `dist/context/scan/index.js`.
- Drops the barrel `!` entries from `knip.json`; adds explicit
production entries only for the benchmark code reached via dist by
the maintainer script.
Net: 413 files changed, ~1.2k insertions, ~9.4k deletions.
`pnpm run dead-code` (Biome + knip default + knip production) and
`pnpm run type-check` are clean; 2277 tests pass.
* refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly
Promote the CLI workspace package to the public name `@kaelio/ktx` and
drop the separate `scripts/build-public-npm-package.mjs` wrapper. The
CLI package is now publishable in place (`publishConfig.access: public`,
`provenance: true`), so artifact packing uses `pnpm pack` against
`packages/cli/` instead of assembling a parallel package tree.
Updates all workspace filter invocations, docs, tests, and release
readiness checks to reference the new package name, and folds the
tarball-name helper into `scripts/public-npm-release-metadata.mjs`.
* docs: align "agent clients" and "data agents" terminology
Replace "client agents" with "agent clients" and "database agents" with
"data agents" across AGENTS.md, README.md, the docs-site copy, and the
matching setup-agents test description, matching the canonical
vocabulary in docs/terminology.md.
Also moves packages/cli/tsconfig.json's tsBuildInfoFile from
node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive
node_modules reinstalls.
* refactor(release): single source of truth for package version
Make packages/cli/package.json the single source of truth for the
@kaelio/ktx version. publicNpmPackageVersion() now reads it directly,
so artifact filenames, release-readiness checks, and the Python wheel
version all derive from one field. The duplicate
release-policy.json.publicNpmPackageVersion is removed.
Previously the two fields could drift: tarballs were named
kaelio-ktx-0.4.1.tgz while internally containing
@kaelio/ktx@0.0.0-private.
- update-public-release-version.mjs rewrites both Python pyproject.toml
files (ktx-daemon, ktx-sl) alongside the npm package.jsons,
normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2).
- semantic-release-config.cjs adds the two pyproject.toml files to
@semantic-release/git assets so the release commit back to main
carries every version source in lockstep.
- The six "?? '0.0.0-private'" fallback literals across the CLI are
replaced with "?? getKtxCliPackageInfo().version", and
createDefaultKtxMcpServer makes its version arg required.
- docs/release.md describes the actual commit-back model: the dev tree
always reflects the most recent release; no sentinel pin to
maintain.
Verified: pnpm run artifacts:build now produces
kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with
@kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and
2287 vitests + 173 script tests pass.
* refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime
Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and
scan command entrypoints so tests can stub them, and teach
resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime
feature when ktx.yaml selects sentence-transformers.
* chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal
Both symbols are consumed only by status-project.test.ts. Annotating with
/** @internal */ keeps knip's production-mode check clean without changing
runtime behavior.
* fix(cli): use real package metadata in print-command-tree
The stubbed package name embedded a forbidden product identifier that
tripped the boundary check in CI. Read the metadata from package.json
instead — keeps the rendered tree unchanged and removes a duplicate
source of truth.
* feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts
Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer
source counts, computed with `SUM(embedding_json IS NOT NULL)` over
`knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to
"Wiki" (canonical per `docs/terminology.md`) and rename the matching
`localStats.knowledgePages` field to `localStats.wikiPages`.
Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those
duplicated the per-surface rows above. Disk now reports only actual byte
usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` /
`semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry`
helpers, and the `filter` arg on `summarizeDir` are removed.
2026-05-21 15:28:58 +02:00
|
|
|
/** @internal */
|
2026-05-10 23:12:26 +02:00
|
|
|
export function parseScanSummary(output: string): string | null {
|
|
|
|
|
const match = output.match(/(\d+) changes? across (\d+) tables?/);
|
|
|
|
|
return match ? `${match[2]} tables` : null;
|
|
|
|
|
}
|
|
|
|
|
|
chore(workspace): gate dead-code with knip production mode (#196)
* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm
* refactor(workspace): rewrite @ktx/llm imports to relative paths
* refactor(workspace): fold internal packages into cli
* chore(workspace): gate dead-code with knip production mode
Turn on production-mode knip plus an autofix run in pre-commit and the
`pnpm dead-code` script, document the `/** @internal */` convention for
test-only exports in AGENTS.md, annotate test-only exports across the
CLI with that JSDoc, and drop dead exports/wrappers the new gate
surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`,
`createLocalScanEnrichmentProvidersFromConfig`,
`PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports).
Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit
production entries so cross-package barrel leaks are caught.
* refactor(cli): delete internal barrel index.ts files
The 34 `index.ts` re-export barrels inside `packages/cli/src/` were
holdovers from the pre-fold multi-workspace structure. Post-fold-in they
served no production purpose: external consumers go through the single
package main entry, and in-repo callers mostly imported through them
only because the path was short. Internally, knip flagged most barrel
re-exports as production-dead (only reached via tests).
This change:
- Deletes every internal barrel except `packages/cli/src/index.ts`
(the published package entry).
- Rewrites ~270 source/test files to import each name directly from
the file that defines it.
- Moves `tools/warehouse-verification/index.ts` to
`create-warehouse-verification-tools.ts` (the function it defined
locally) and updates its single consumer.
- Renames `search/backend-conformance.ts` → `.test-utils.ts` to match
the existing test-helper file convention.
- Deletes 13 dead test-only chains (dbt-descriptions/*,
live-database/extracted-schema, live-database/structural-sync,
relationship-* feedback/review chain) plus their tests and a
cascading orphan integration test.
- Updates test mocks that pointed at deleted barrel paths
(notion-client, connector barrels in scan/local-scan-connectors
tests) to mock the source files instead.
- Points the maintainer benchmark script
(`scripts/relationship-benchmark-report.mjs`) at source files
instead of `dist/context/scan/index.js`.
- Drops the barrel `!` entries from `knip.json`; adds explicit
production entries only for the benchmark code reached via dist by
the maintainer script.
Net: 413 files changed, ~1.2k insertions, ~9.4k deletions.
`pnpm run dead-code` (Biome + knip default + knip production) and
`pnpm run type-check` are clean; 2277 tests pass.
* refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly
Promote the CLI workspace package to the public name `@kaelio/ktx` and
drop the separate `scripts/build-public-npm-package.mjs` wrapper. The
CLI package is now publishable in place (`publishConfig.access: public`,
`provenance: true`), so artifact packing uses `pnpm pack` against
`packages/cli/` instead of assembling a parallel package tree.
Updates all workspace filter invocations, docs, tests, and release
readiness checks to reference the new package name, and folds the
tarball-name helper into `scripts/public-npm-release-metadata.mjs`.
* docs: align "agent clients" and "data agents" terminology
Replace "client agents" with "agent clients" and "database agents" with
"data agents" across AGENTS.md, README.md, the docs-site copy, and the
matching setup-agents test description, matching the canonical
vocabulary in docs/terminology.md.
Also moves packages/cli/tsconfig.json's tsBuildInfoFile from
node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive
node_modules reinstalls.
* refactor(release): single source of truth for package version
Make packages/cli/package.json the single source of truth for the
@kaelio/ktx version. publicNpmPackageVersion() now reads it directly,
so artifact filenames, release-readiness checks, and the Python wheel
version all derive from one field. The duplicate
release-policy.json.publicNpmPackageVersion is removed.
Previously the two fields could drift: tarballs were named
kaelio-ktx-0.4.1.tgz while internally containing
@kaelio/ktx@0.0.0-private.
- update-public-release-version.mjs rewrites both Python pyproject.toml
files (ktx-daemon, ktx-sl) alongside the npm package.jsons,
normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2).
- semantic-release-config.cjs adds the two pyproject.toml files to
@semantic-release/git assets so the release commit back to main
carries every version source in lockstep.
- The six "?? '0.0.0-private'" fallback literals across the CLI are
replaced with "?? getKtxCliPackageInfo().version", and
createDefaultKtxMcpServer makes its version arg required.
- docs/release.md describes the actual commit-back model: the dev tree
always reflects the most recent release; no sentinel pin to
maintain.
Verified: pnpm run artifacts:build now produces
kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with
@kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and
2287 vitests + 173 script tests pass.
* refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime
Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and
scan command entrypoints so tests can stub them, and teach
resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime
feature when ktx.yaml selects sentence-transformers.
* chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal
Both symbols are consumed only by status-project.test.ts. Annotating with
/** @internal */ keeps knip's production-mode check clean without changing
runtime behavior.
* fix(cli): use real package metadata in print-command-tree
The stubbed package name embedded a forbidden product identifier that
tripped the boundary check in CI. Read the metadata from package.json
instead — keeps the rendered tree unchanged and removes a duplicate
source of truth.
* feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts
Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer
source counts, computed with `SUM(embedding_json IS NOT NULL)` over
`knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to
"Wiki" (canonical per `docs/terminology.md`) and rename the matching
`localStats.knowledgePages` field to `localStats.wikiPages`.
Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those
duplicated the per-surface rows above. Disk now reports only actual byte
usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` /
`semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry`
helpers, and the `filter` arg on `summarizeDir` are removed.
2026-05-21 15:28:58 +02:00
|
|
|
/** @internal */
|
2026-05-10 23:12:26 +02:00
|
|
|
export function parseIngestSummary(output: string): string | null {
|
|
|
|
|
const savedMemory = output.match(/Saved memory: (.+)/);
|
2026-05-10 23:13:17 -07:00
|
|
|
if (savedMemory) return savedMemory[1];
|
2026-05-24 01:00:20 +02:00
|
|
|
const tasks = output.match(/Tasks: (\d+)/);
|
2026-05-14 01:43:06 +02:00
|
|
|
if (tasks) return `${tasks[1]} tasks`;
|
2026-05-10 23:13:17 -07:00
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function collectOutputMetadata(
|
|
|
|
|
output: string,
|
|
|
|
|
operation: KtxPublicIngestPlanTarget['operation'],
|
|
|
|
|
): { reportIds: string[]; artifactPaths: string[] } {
|
|
|
|
|
const reportIds = new Set<string>();
|
|
|
|
|
const artifactPaths = new Set<string>();
|
|
|
|
|
for (const line of output.split(/\r?\n/)) {
|
|
|
|
|
const trimmed = line.trim();
|
|
|
|
|
const reportLine = trimmed.match(/^Report:\s*(.+)$/);
|
|
|
|
|
if (reportLine) {
|
|
|
|
|
const value = reportLine[1].trim();
|
|
|
|
|
if (value && value !== 'none') {
|
2026-05-14 01:43:06 +02:00
|
|
|
if (operation === 'database-ingest') artifactPaths.add(value);
|
2026-05-10 23:13:17 -07:00
|
|
|
else reportIds.add(value);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
const rawSourcesLine = trimmed.match(/^Raw sources:\s*(.+)$/);
|
|
|
|
|
if (rawSourcesLine) {
|
|
|
|
|
const value = rawSourcesLine[1].trim();
|
|
|
|
|
if (value && value !== 'none') artifactPaths.add(value);
|
|
|
|
|
}
|
|
|
|
|
if (operation === 'source-ingest') {
|
|
|
|
|
for (const match of trimmed.matchAll(/\breport=([^\s]+)/g)) {
|
|
|
|
|
reportIds.add(match[1]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return { reportIds: [...reportIds], artifactPaths: [...artifactPaths] };
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface CapturedIo {
|
2026-05-10 23:51:24 +02:00
|
|
|
io: KtxCliIo;
|
2026-05-10 23:12:26 +02:00
|
|
|
captured(): string;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function createCaptureIo(onProgress: (message: string) => void, isTTY: boolean): CapturedIo {
|
|
|
|
|
let buffer = '';
|
|
|
|
|
return {
|
|
|
|
|
io: {
|
|
|
|
|
stdout: {
|
|
|
|
|
isTTY,
|
|
|
|
|
write(chunk: string) {
|
|
|
|
|
buffer += chunk;
|
|
|
|
|
const progress = extractProgressMessage(chunk);
|
|
|
|
|
if (progress) onProgress(progress);
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
stderr: {
|
|
|
|
|
write(chunk: string) {
|
|
|
|
|
buffer += chunk;
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
captured: () => buffer,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-10 17:08:55 -07:00
|
|
|
// --- Source progress helpers ---
|
|
|
|
|
|
2026-05-13 17:01:48 +02:00
|
|
|
function progressFieldsFromDetailLine(
|
|
|
|
|
detailLine: string | null,
|
|
|
|
|
updatedAtMs: number | null,
|
|
|
|
|
): Pick<ContextBuildSourceProgressUpdate, 'percent' | 'message' | 'updatedAtMs'> {
|
|
|
|
|
if (!detailLine) return {};
|
|
|
|
|
const percent = extractPercent(detailLine);
|
|
|
|
|
const message = detailLine.replace(/^\[\d+%\]\s*/, '');
|
|
|
|
|
return {
|
|
|
|
|
...(percent !== null ? { percent } : {}),
|
|
|
|
|
...(message ? { message } : {}),
|
|
|
|
|
...(updatedAtMs !== null ? { updatedAtMs } : {}),
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function detailLineFromProgressSource(source: ContextBuildSourceProgressUpdate): string | null {
|
|
|
|
|
if (!source.message) return null;
|
|
|
|
|
if (typeof source.percent === 'number' && Number.isFinite(source.percent)) {
|
|
|
|
|
const percent = Math.max(0, Math.min(100, Math.round(source.percent)));
|
|
|
|
|
return `[${percent}%] ${source.message}`;
|
|
|
|
|
}
|
|
|
|
|
return source.message;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-10 17:08:55 -07:00
|
|
|
function collectSourceProgress(targets: ContextBuildTargetState[]): ContextBuildSourceProgressUpdate[] {
|
2026-05-13 17:01:48 +02:00
|
|
|
return targets.map((t) => {
|
|
|
|
|
const progressFields = progressFieldsFromDetailLine(t.detailLine, t.progressUpdatedAtMs);
|
|
|
|
|
return {
|
|
|
|
|
connectionId: t.target.connectionId,
|
|
|
|
|
operation: t.target.operation,
|
|
|
|
|
status: t.status,
|
|
|
|
|
...(t.startedAt !== null ? { startedAtMs: t.startedAt } : {}),
|
|
|
|
|
...(t.elapsedMs > 0 ? { elapsedMs: t.elapsedMs } : {}),
|
|
|
|
|
...progressFields,
|
|
|
|
|
...(t.summaryText ? { summaryText: t.summaryText } : {}),
|
|
|
|
|
};
|
|
|
|
|
});
|
2026-05-10 17:08:55 -07:00
|
|
|
}
|
|
|
|
|
|
chore(workspace): gate dead-code with knip production mode (#196)
* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm
* refactor(workspace): rewrite @ktx/llm imports to relative paths
* refactor(workspace): fold internal packages into cli
* chore(workspace): gate dead-code with knip production mode
Turn on production-mode knip plus an autofix run in pre-commit and the
`pnpm dead-code` script, document the `/** @internal */` convention for
test-only exports in AGENTS.md, annotate test-only exports across the
CLI with that JSDoc, and drop dead exports/wrappers the new gate
surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`,
`createLocalScanEnrichmentProvidersFromConfig`,
`PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports).
Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit
production entries so cross-package barrel leaks are caught.
* refactor(cli): delete internal barrel index.ts files
The 34 `index.ts` re-export barrels inside `packages/cli/src/` were
holdovers from the pre-fold multi-workspace structure. Post-fold-in they
served no production purpose: external consumers go through the single
package main entry, and in-repo callers mostly imported through them
only because the path was short. Internally, knip flagged most barrel
re-exports as production-dead (only reached via tests).
This change:
- Deletes every internal barrel except `packages/cli/src/index.ts`
(the published package entry).
- Rewrites ~270 source/test files to import each name directly from
the file that defines it.
- Moves `tools/warehouse-verification/index.ts` to
`create-warehouse-verification-tools.ts` (the function it defined
locally) and updates its single consumer.
- Renames `search/backend-conformance.ts` → `.test-utils.ts` to match
the existing test-helper file convention.
- Deletes 13 dead test-only chains (dbt-descriptions/*,
live-database/extracted-schema, live-database/structural-sync,
relationship-* feedback/review chain) plus their tests and a
cascading orphan integration test.
- Updates test mocks that pointed at deleted barrel paths
(notion-client, connector barrels in scan/local-scan-connectors
tests) to mock the source files instead.
- Points the maintainer benchmark script
(`scripts/relationship-benchmark-report.mjs`) at source files
instead of `dist/context/scan/index.js`.
- Drops the barrel `!` entries from `knip.json`; adds explicit
production entries only for the benchmark code reached via dist by
the maintainer script.
Net: 413 files changed, ~1.2k insertions, ~9.4k deletions.
`pnpm run dead-code` (Biome + knip default + knip production) and
`pnpm run type-check` are clean; 2277 tests pass.
* refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly
Promote the CLI workspace package to the public name `@kaelio/ktx` and
drop the separate `scripts/build-public-npm-package.mjs` wrapper. The
CLI package is now publishable in place (`publishConfig.access: public`,
`provenance: true`), so artifact packing uses `pnpm pack` against
`packages/cli/` instead of assembling a parallel package tree.
Updates all workspace filter invocations, docs, tests, and release
readiness checks to reference the new package name, and folds the
tarball-name helper into `scripts/public-npm-release-metadata.mjs`.
* docs: align "agent clients" and "data agents" terminology
Replace "client agents" with "agent clients" and "database agents" with
"data agents" across AGENTS.md, README.md, the docs-site copy, and the
matching setup-agents test description, matching the canonical
vocabulary in docs/terminology.md.
Also moves packages/cli/tsconfig.json's tsBuildInfoFile from
node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive
node_modules reinstalls.
* refactor(release): single source of truth for package version
Make packages/cli/package.json the single source of truth for the
@kaelio/ktx version. publicNpmPackageVersion() now reads it directly,
so artifact filenames, release-readiness checks, and the Python wheel
version all derive from one field. The duplicate
release-policy.json.publicNpmPackageVersion is removed.
Previously the two fields could drift: tarballs were named
kaelio-ktx-0.4.1.tgz while internally containing
@kaelio/ktx@0.0.0-private.
- update-public-release-version.mjs rewrites both Python pyproject.toml
files (ktx-daemon, ktx-sl) alongside the npm package.jsons,
normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2).
- semantic-release-config.cjs adds the two pyproject.toml files to
@semantic-release/git assets so the release commit back to main
carries every version source in lockstep.
- The six "?? '0.0.0-private'" fallback literals across the CLI are
replaced with "?? getKtxCliPackageInfo().version", and
createDefaultKtxMcpServer makes its version arg required.
- docs/release.md describes the actual commit-back model: the dev tree
always reflects the most recent release; no sentinel pin to
maintain.
Verified: pnpm run artifacts:build now produces
kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with
@kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and
2287 vitests + 173 script tests pass.
* refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime
Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and
scan command entrypoints so tests can stub them, and teach
resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime
feature when ktx.yaml selects sentence-transformers.
* chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal
Both symbols are consumed only by status-project.test.ts. Annotating with
/** @internal */ keeps knip's production-mode check clean without changing
runtime behavior.
* fix(cli): use real package metadata in print-command-tree
The stubbed package name embedded a forbidden product identifier that
tripped the boundary check in CI. Read the metadata from package.json
instead — keeps the rendered tree unchanged and removes a duplicate
source of truth.
* feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts
Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer
source counts, computed with `SUM(embedding_json IS NOT NULL)` over
`knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to
"Wiki" (canonical per `docs/terminology.md`) and rename the matching
`localStats.knowledgePages` field to `localStats.wikiPages`.
Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those
duplicated the per-surface rows above. Disk now reports only actual byte
usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` /
`semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry`
helpers, and the `filter` arg on `summarizeDir` are removed.
2026-05-21 15:28:58 +02:00
|
|
|
/** @internal */
|
2026-05-10 17:08:55 -07:00
|
|
|
export function viewStateFromSourceProgress(
|
|
|
|
|
sources: ContextBuildSourceProgressUpdate[],
|
|
|
|
|
now: number,
|
|
|
|
|
startedAtMs?: number,
|
|
|
|
|
): ContextBuildViewState {
|
|
|
|
|
const makeTarget = (s: ContextBuildSourceProgressUpdate): ContextBuildTargetState => ({
|
|
|
|
|
target: { connectionId: s.connectionId, driver: '', operation: s.operation, debugCommand: '', steps: [] },
|
|
|
|
|
status: s.status,
|
2026-05-13 17:01:48 +02:00
|
|
|
detailLine: detailLineFromProgressSource(s),
|
2026-05-10 17:08:55 -07:00
|
|
|
summaryText: s.summaryText ?? null,
|
2026-05-12 16:56:58 -04:00
|
|
|
failureText: null,
|
2026-05-10 17:08:55 -07:00
|
|
|
startedAt: s.startedAtMs ?? null,
|
|
|
|
|
elapsedMs: s.status === 'running' && s.startedAtMs ? now - s.startedAtMs : (s.elapsedMs ?? 0),
|
2026-05-13 17:01:48 +02:00
|
|
|
progressUpdatedAtMs: s.updatedAtMs ?? null,
|
2026-05-14 01:43:06 +02:00
|
|
|
phases: [],
|
2026-05-10 17:08:55 -07:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return {
|
2026-05-14 01:43:06 +02:00
|
|
|
primarySources: sources.filter((s) => s.operation === 'database-ingest').map(makeTarget),
|
2026-05-10 17:08:55 -07:00
|
|
|
contextSources: sources.filter((s) => s.operation === 'source-ingest').map(makeTarget),
|
|
|
|
|
frame: 0,
|
|
|
|
|
startedAt: startedAtMs ?? null,
|
|
|
|
|
totalElapsedMs: startedAtMs ? now - startedAtMs : 0,
|
2026-06-08 16:14:56 +02:00
|
|
|
starCount: null,
|
2026-05-10 17:08:55 -07:00
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-10 23:12:26 +02:00
|
|
|
// --- Repaint ---
|
|
|
|
|
|
2026-05-10 17:08:55 -07:00
|
|
|
export function createRepainter(io: KtxCliIo) {
|
2026-05-11 00:31:15 -07:00
|
|
|
let hasPainted = false;
|
|
|
|
|
let lastCursorUpRows = 0;
|
|
|
|
|
|
|
|
|
|
const terminalColumns = () => {
|
|
|
|
|
for (const columns of [io.stdout.columns, process.stdout.columns]) {
|
|
|
|
|
if (typeof columns === 'number' && Number.isFinite(columns) && columns > 0) return columns;
|
|
|
|
|
}
|
|
|
|
|
return 80;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const visualRows = (line: string, columns: number) => {
|
|
|
|
|
const plainLength = line.replace(ANSI_RE, '').length;
|
|
|
|
|
return Math.max(1, Math.ceil(plainLength / columns));
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const cursorUpRowsAfterWrite = (content: string) => {
|
|
|
|
|
const columns = terminalColumns();
|
|
|
|
|
const endsWithNewline = content.endsWith('\n');
|
|
|
|
|
const lines = content.split('\n');
|
|
|
|
|
return lines.reduce((sum, line, index) => {
|
|
|
|
|
if (index === lines.length - 1) {
|
|
|
|
|
return endsWithNewline ? sum : sum + Math.max(0, visualRows(line, columns) - 1);
|
|
|
|
|
}
|
|
|
|
|
return sum + visualRows(line, columns);
|
|
|
|
|
}, 0);
|
|
|
|
|
};
|
2026-05-10 23:12:26 +02:00
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
paint(content: string) {
|
2026-05-11 00:31:15 -07:00
|
|
|
if (hasPainted) {
|
|
|
|
|
if (lastCursorUpRows > 0) {
|
|
|
|
|
io.stdout.write(`${ESC}[${lastCursorUpRows}A`);
|
|
|
|
|
}
|
|
|
|
|
io.stdout.write('\r');
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
2026-05-12 16:56:58 -04:00
|
|
|
io.stdout.write(`${ESC}[2K`);
|
|
|
|
|
io.stdout.write(content.replaceAll('\n', `\n${ESC}[2K`));
|
2026-05-10 23:12:26 +02:00
|
|
|
io.stdout.write(`${ESC}[J`);
|
2026-05-11 00:31:15 -07:00
|
|
|
hasPainted = true;
|
|
|
|
|
lastCursorUpRows = cursorUpRowsAfterWrite(content);
|
2026-05-10 23:12:26 +02:00
|
|
|
},
|
2026-06-08 16:14:56 +02:00
|
|
|
columns() {
|
|
|
|
|
return terminalColumns();
|
|
|
|
|
},
|
2026-05-10 23:12:26 +02:00
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// --- Orchestration ---
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
function makeTargetState(target: KtxPublicIngestPlanTarget): ContextBuildTargetState {
|
2026-05-12 16:56:58 -04:00
|
|
|
return {
|
|
|
|
|
target,
|
|
|
|
|
status: 'queued',
|
|
|
|
|
detailLine: null,
|
|
|
|
|
summaryText: null,
|
|
|
|
|
failureText: null,
|
|
|
|
|
startedAt: null,
|
|
|
|
|
elapsedMs: 0,
|
2026-05-13 17:01:48 +02:00
|
|
|
progressUpdatedAtMs: null,
|
2026-05-14 01:43:06 +02:00
|
|
|
phases: makePhasesForTarget(target),
|
2026-05-12 16:56:58 -04:00
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const NETWORK_ERROR_REASONS: Record<string, string> = {
|
|
|
|
|
EADDRNOTAVAIL: 'network address unavailable',
|
|
|
|
|
ECONNRESET: 'connection reset',
|
|
|
|
|
ECONNREFUSED: 'connection refused',
|
|
|
|
|
ENETUNREACH: 'network unreachable',
|
|
|
|
|
ENOTFOUND: 'host not found',
|
|
|
|
|
ETIMEDOUT: 'connection timed out',
|
|
|
|
|
EHOSTUNREACH: 'host unreachable',
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
function unknownErrorMessage(error: unknown): string {
|
|
|
|
|
return error instanceof Error ? error.message : String(error);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function networkErrorCodeFromText(text: string): string | null {
|
|
|
|
|
for (const code of Object.keys(NETWORK_ERROR_REASONS)) {
|
|
|
|
|
if (new RegExp(`\\b${code}\\b`).test(text)) {
|
|
|
|
|
return code;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function networkErrorCode(error: unknown, capturedOutput = ''): string | null {
|
|
|
|
|
const directCode = typeof (error as { code?: unknown })?.code === 'string'
|
|
|
|
|
? (error as { code: string }).code
|
|
|
|
|
: null;
|
|
|
|
|
if (directCode && NETWORK_ERROR_REASONS[directCode]) {
|
|
|
|
|
return directCode;
|
|
|
|
|
}
|
|
|
|
|
return networkErrorCodeFromText(`${unknownErrorMessage(error)}\n${capturedOutput}`);
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
function isLocalSqlAnalysisConnectionRefused(input: { capturedOutput?: string; fallback?: string | null }): boolean {
|
|
|
|
|
const text = `${input.capturedOutput ?? ''}\n${input.fallback ?? ''}`;
|
|
|
|
|
return /\bECONNREFUSED\b/.test(text) && /\b(?:127\.0\.0\.1|localhost):8765\b/.test(text);
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-12 16:56:58 -04:00
|
|
|
function friendlyDriverName(driver: string): string {
|
|
|
|
|
const normalized = driver.toLowerCase();
|
2026-05-24 16:57:23 +02:00
|
|
|
if (normalized === 'postgres') return 'PostgreSQL';
|
2026-05-12 16:56:58 -04:00
|
|
|
if (normalized === 'mysql') return 'MySQL';
|
|
|
|
|
if (normalized === 'sqlserver') return 'SQL Server';
|
|
|
|
|
if (normalized === 'bigquery') return 'BigQuery';
|
|
|
|
|
if (normalized === 'snowflake') return 'Snowflake';
|
|
|
|
|
if (normalized === 'clickhouse') return 'ClickHouse';
|
|
|
|
|
if (normalized === 'sqlite') return 'SQLite';
|
|
|
|
|
return driver || 'the source';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function failedStepDetail(result: KtxPublicIngestTargetResult): string | null {
|
|
|
|
|
return result.steps.find((step) => step.status === 'failed')?.detail ?? null;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
const INTERNAL_FAILURE_LINE_RE =
|
|
|
|
|
/^(Report|Run|Job|Status|Adapter|Connection|Sync|Mode|Dry run|Diff|Tasks|Work units|Failed tasks|Saved memory|Provenance rows):\s*/;
|
|
|
|
|
const ACTIONABLE_FAILURE_LINE_RE =
|
2026-06-11 13:49:45 +02:00
|
|
|
/^(Missing bundled Python runtime manifest|ktx Python runtime is required|ktx daemon HTTP|Error:|Failed\b|Could not\b|Cannot\b)/;
|
2026-05-14 01:43:06 +02:00
|
|
|
|
2026-05-17 10:27:29 +02:00
|
|
|
function trimErrorPrefix(line: string): string {
|
|
|
|
|
return line.replace(/^Error:\s*/, '');
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
function firstCapturedFailureLine(output: string | undefined): string | null {
|
|
|
|
|
const lines = (output ?? '')
|
|
|
|
|
.split(/\r?\n/)
|
|
|
|
|
.map((candidate) => candidate.trim())
|
|
|
|
|
.filter((candidate) => candidate.length > 0)
|
2026-06-11 13:49:45 +02:00
|
|
|
.filter((candidate) => !candidate.startsWith('ktx scan completed'))
|
2026-05-14 01:43:06 +02:00
|
|
|
.filter((candidate) => !INTERNAL_FAILURE_LINE_RE.test(candidate));
|
2026-05-17 10:27:29 +02:00
|
|
|
const line = lines.find((candidate) => ACTIONABLE_FAILURE_LINE_RE.test(candidate)) ?? lines.at(-1) ?? null;
|
|
|
|
|
return line ? trimErrorPrefix(line) : null;
|
2026-05-14 01:43:06 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function isGenericFailedAtDetail(target: KtxPublicIngestPlanTarget, detail: string | null | undefined): boolean {
|
|
|
|
|
return new RegExp(`^${target.connectionId} failed at [a-z-]+\\.?(?: Retry: .*)?$`).test(detail ?? '');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function appendRetryIfNeeded(input: {
|
|
|
|
|
message: string;
|
|
|
|
|
target: KtxPublicIngestPlanTarget;
|
|
|
|
|
projectDir: string;
|
|
|
|
|
entrypoint?: 'setup' | 'ingest';
|
|
|
|
|
}): string {
|
|
|
|
|
const base = input.message.trim().replace(/\.+$/, '');
|
|
|
|
|
if (/\bRetry:\s/.test(base)) {
|
|
|
|
|
return base;
|
|
|
|
|
}
|
|
|
|
|
return `${base}. Retry: ${retryCommand({
|
|
|
|
|
projectDir: input.projectDir,
|
|
|
|
|
entrypoint: input.entrypoint,
|
|
|
|
|
connectionId: input.target.connectionId,
|
|
|
|
|
queryHistory: input.target.queryHistory?.enabled === true,
|
|
|
|
|
queryHistoryWindowDays: input.target.queryHistory?.windowDays,
|
|
|
|
|
})}`;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-12 16:56:58 -04:00
|
|
|
function failureTextForTarget(input: {
|
|
|
|
|
target: KtxPublicIngestPlanTarget;
|
|
|
|
|
projectDir: string;
|
2026-05-14 01:43:06 +02:00
|
|
|
entrypoint?: 'setup' | 'ingest';
|
2026-05-12 16:56:58 -04:00
|
|
|
capturedOutput?: string;
|
|
|
|
|
error?: unknown;
|
|
|
|
|
fallback?: string | null;
|
|
|
|
|
}): string {
|
|
|
|
|
const code = networkErrorCode(input.error, input.capturedOutput);
|
2026-05-14 01:43:06 +02:00
|
|
|
if (code && isLocalSqlAnalysisConnectionRefused({ capturedOutput: input.capturedOutput, fallback: input.fallback })) {
|
|
|
|
|
return [
|
2026-06-11 13:49:45 +02:00
|
|
|
`ktx could not reach the local SQL analysis runtime while processing query history for ${input.target.connectionId}.`,
|
2026-05-14 01:43:06 +02:00
|
|
|
`Reason: ${NETWORK_ERROR_REASONS[code]} (${code}).`,
|
|
|
|
|
`Retry: ${retryCommand({
|
|
|
|
|
projectDir: input.projectDir,
|
|
|
|
|
entrypoint: input.entrypoint,
|
|
|
|
|
connectionId: input.target.connectionId,
|
|
|
|
|
queryHistory: input.target.queryHistory?.enabled === true,
|
|
|
|
|
queryHistoryWindowDays: input.target.queryHistory?.windowDays,
|
|
|
|
|
})}`,
|
|
|
|
|
].join(' ');
|
|
|
|
|
}
|
2026-05-12 16:56:58 -04:00
|
|
|
if (code) {
|
2026-05-14 01:43:06 +02:00
|
|
|
const operation = input.target.operation === 'database-ingest' ? 'reading schema for' : 'ingesting';
|
2026-05-12 16:56:58 -04:00
|
|
|
return [
|
2026-06-11 13:49:45 +02:00
|
|
|
`ktx lost its connection to ${friendlyDriverName(input.target.driver)} while ${operation} ${input.target.connectionId}.`,
|
2026-05-12 16:56:58 -04:00
|
|
|
`Reason: ${NETWORK_ERROR_REASONS[code]} (${code}).`,
|
2026-05-14 01:43:06 +02:00
|
|
|
`Retry: ${retryCommand({
|
|
|
|
|
projectDir: input.projectDir,
|
|
|
|
|
entrypoint: input.entrypoint,
|
|
|
|
|
connectionId: input.target.connectionId,
|
|
|
|
|
queryHistory: input.target.queryHistory?.enabled === true,
|
|
|
|
|
queryHistoryWindowDays: input.target.queryHistory?.windowDays,
|
|
|
|
|
})}`,
|
2026-05-12 16:56:58 -04:00
|
|
|
].join(' ');
|
|
|
|
|
}
|
2026-05-14 01:43:06 +02:00
|
|
|
const capturedFailure = firstCapturedFailureLine(input.capturedOutput);
|
|
|
|
|
const fallback =
|
|
|
|
|
capturedFailure && isGenericFailedAtDetail(input.target, input.fallback)
|
|
|
|
|
? capturedFailure
|
|
|
|
|
: (input.fallback ?? capturedFailure ?? `${input.target.connectionId} failed.`);
|
|
|
|
|
if (input.entrypoint === 'ingest') {
|
|
|
|
|
return appendRetryIfNeeded({
|
|
|
|
|
message: fallback,
|
|
|
|
|
target: input.target,
|
|
|
|
|
projectDir: input.projectDir,
|
|
|
|
|
entrypoint: input.entrypoint,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
return fallback;
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
export function initViewState(targets: KtxPublicIngestPlanTarget[]): ContextBuildViewState {
|
2026-05-10 23:12:26 +02:00
|
|
|
return {
|
2026-05-14 01:43:06 +02:00
|
|
|
primarySources: targets.filter((t) => t.operation === 'database-ingest').map(makeTargetState),
|
2026-05-10 23:12:26 +02:00
|
|
|
contextSources: targets.filter((t) => t.operation === 'source-ingest').map(makeTargetState),
|
|
|
|
|
frame: 0,
|
2026-05-10 16:12:51 -07:00
|
|
|
startedAt: null,
|
|
|
|
|
totalElapsedMs: 0,
|
2026-06-08 16:14:56 +02:00
|
|
|
starCount: null,
|
2026-05-10 23:12:26 +02:00
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
function formatProgressDetail(
|
|
|
|
|
update: Pick<KtxIngestProgressUpdate, 'percent' | 'message'>,
|
|
|
|
|
target: KtxPublicIngestPlanTarget,
|
|
|
|
|
): string {
|
2026-05-13 17:01:48 +02:00
|
|
|
const percent = Math.max(0, Math.min(100, Math.round(update.percent)));
|
2026-05-14 01:43:06 +02:00
|
|
|
return `[${percent}%] ${publicProgressMessage(update.message, target)}`;
|
2026-05-13 17:01:48 +02:00
|
|
|
}
|
|
|
|
|
|
2026-06-08 16:14:56 +02:00
|
|
|
const STAR_COUNT_CACHE_TTL_MS = 24 * 60 * 60 * 1000;
|
|
|
|
|
|
|
|
|
|
function envFlag(value: string | undefined): boolean {
|
|
|
|
|
return value !== undefined && value !== '' && value !== '0' && value !== 'false';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function shouldSuppressStarPrompt(env: StarPromptEnv): boolean {
|
|
|
|
|
return envFlag(env.CI) || envFlag(env.DO_NOT_TRACK) || envFlag(env.KTX_NO_STAR);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function startStarPromptCountRefresh(input: {
|
|
|
|
|
fetchStarCount: typeof defaultFetchGitHubStarCount;
|
|
|
|
|
homeDir?: string;
|
|
|
|
|
now: () => number;
|
|
|
|
|
paint: () => void;
|
|
|
|
|
state: ContextBuildViewState;
|
|
|
|
|
}): void {
|
|
|
|
|
const cached = readStarCountCache({ homeDir: input.homeDir });
|
|
|
|
|
if (cached) {
|
|
|
|
|
input.state.starCount = cached.count;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (isFreshStarCountCache(cached, new Date(input.now()), STAR_COUNT_CACHE_TTL_MS)) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void input.fetchStarCount()
|
|
|
|
|
.then((count) => {
|
|
|
|
|
if (typeof count !== 'number' || !Number.isFinite(count)) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
input.state.starCount = count;
|
|
|
|
|
input.paint();
|
|
|
|
|
void writeStarCountCache(
|
|
|
|
|
{
|
|
|
|
|
count,
|
|
|
|
|
fetchedAt: new Date(input.now()).toISOString(),
|
|
|
|
|
},
|
|
|
|
|
{ homeDir: input.homeDir },
|
|
|
|
|
);
|
|
|
|
|
})
|
|
|
|
|
.catch(() => undefined);
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-10 23:12:26 +02:00
|
|
|
export async function runContextBuild(
|
2026-05-10 23:51:24 +02:00
|
|
|
project: KtxPublicIngestProject,
|
2026-05-10 23:12:26 +02:00
|
|
|
args: ContextBuildArgs,
|
2026-05-10 23:51:24 +02:00
|
|
|
io: KtxCliIo,
|
2026-05-10 23:12:26 +02:00
|
|
|
deps: ContextBuildDeps = {},
|
|
|
|
|
): Promise<ContextBuildResult> {
|
2026-05-14 01:43:06 +02:00
|
|
|
const plan = buildPublicIngestPlan(project, {
|
|
|
|
|
projectDir: args.projectDir,
|
|
|
|
|
...(args.targetConnectionId ? { targetConnectionId: args.targetConnectionId } : {}),
|
|
|
|
|
all: args.all ?? true,
|
|
|
|
|
...(args.queryHistory ? { queryHistory: args.queryHistory } : {}),
|
|
|
|
|
...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}),
|
|
|
|
|
...(args.scanMode ? { scanMode: args.scanMode } : {}),
|
|
|
|
|
});
|
2026-05-10 23:12:26 +02:00
|
|
|
const state = initViewState(plan.targets);
|
|
|
|
|
const isTTY = io.stdout.isTTY === true;
|
|
|
|
|
const nowFn = deps.now ?? (() => Date.now());
|
|
|
|
|
|
2026-05-10 16:12:51 -07:00
|
|
|
state.startedAt = nowFn();
|
|
|
|
|
|
2026-05-10 23:12:26 +02:00
|
|
|
const repainter = isTTY ? createRepainter(io) : null;
|
2026-06-08 16:14:56 +02:00
|
|
|
const starPromptEnabled = repainter !== null && !shouldSuppressStarPrompt(deps.starPromptEnv ?? process.env);
|
2026-05-14 01:43:06 +02:00
|
|
|
const viewOpts = {
|
|
|
|
|
styled: true,
|
|
|
|
|
projectDir: args.projectDir,
|
|
|
|
|
notices: plan.notices ?? [],
|
|
|
|
|
warnings: plan.warnings,
|
|
|
|
|
};
|
2026-06-08 16:14:56 +02:00
|
|
|
const paint = (hint: boolean) =>
|
|
|
|
|
repainter?.paint(
|
|
|
|
|
renderContextBuildView(state, {
|
|
|
|
|
...viewOpts,
|
|
|
|
|
showHint: hint,
|
|
|
|
|
showStarPrompt: starPromptEnabled && hint,
|
|
|
|
|
columns: repainter.columns(),
|
|
|
|
|
}),
|
|
|
|
|
);
|
|
|
|
|
if (starPromptEnabled) {
|
|
|
|
|
startStarPromptCountRefresh({
|
|
|
|
|
fetchStarCount: deps.fetchStarCount ?? defaultFetchGitHubStarCount,
|
|
|
|
|
homeDir: deps.starPromptHomeDir,
|
|
|
|
|
now: nowFn,
|
|
|
|
|
paint: () => paint(true),
|
|
|
|
|
state,
|
|
|
|
|
});
|
|
|
|
|
}
|
2026-05-10 23:12:26 +02:00
|
|
|
paint(true);
|
|
|
|
|
|
|
|
|
|
let spinnerInterval: ReturnType<typeof setInterval> | null = null;
|
|
|
|
|
if (repainter) {
|
|
|
|
|
spinnerInterval = setInterval(() => {
|
|
|
|
|
state.frame++;
|
2026-05-10 16:12:51 -07:00
|
|
|
if (state.startedAt !== null) {
|
|
|
|
|
state.totalElapsedMs = nowFn() - state.startedAt;
|
|
|
|
|
}
|
2026-05-10 23:12:26 +02:00
|
|
|
for (const t of [...state.primarySources, ...state.contextSources]) {
|
|
|
|
|
if (t.status === 'running' && t.startedAt !== null) {
|
|
|
|
|
t.elapsedMs = nowFn() - t.startedAt;
|
|
|
|
|
}
|
2026-05-14 01:43:06 +02:00
|
|
|
for (const phase of t.phases) {
|
|
|
|
|
if (phase.status === 'running' && phase.startedAt !== null) {
|
|
|
|
|
phase.elapsedMs = nowFn() - phase.startedAt;
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
paint(true);
|
|
|
|
|
}, 140);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const orderedTargets = [...state.primarySources, ...state.contextSources];
|
|
|
|
|
const execTarget = deps.executeTarget ?? executePublicIngestTarget;
|
2026-05-10 23:13:17 -07:00
|
|
|
const reportIds = new Set<string>();
|
|
|
|
|
const artifactPaths = new Set<string>();
|
2026-05-13 17:01:48 +02:00
|
|
|
const sourceProgressThrottleMs = deps.sourceProgressThrottleMs ?? 750;
|
|
|
|
|
let lastSourceProgressPublishedAt = Number.NEGATIVE_INFINITY;
|
|
|
|
|
|
|
|
|
|
const publishSourceProgress = (force = false): boolean => {
|
|
|
|
|
if (!deps.onSourceProgress) return false;
|
|
|
|
|
const now = nowFn();
|
|
|
|
|
if (!force && now - lastSourceProgressPublishedAt < sourceProgressThrottleMs) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
lastSourceProgressPublishedAt = now;
|
|
|
|
|
deps.onSourceProgress(collectSourceProgress(orderedTargets));
|
|
|
|
|
return true;
|
|
|
|
|
};
|
2026-05-10 23:12:26 +02:00
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const runArgs: Extract<KtxPublicIngestArgs, { command: 'run' }> = {
|
2026-05-10 23:12:26 +02:00
|
|
|
command: 'run',
|
|
|
|
|
projectDir: args.projectDir,
|
2026-05-14 01:43:06 +02:00
|
|
|
...(args.targetConnectionId ? { targetConnectionId: args.targetConnectionId } : {}),
|
|
|
|
|
all: args.all ?? true,
|
2026-05-10 23:12:26 +02:00
|
|
|
json: false,
|
|
|
|
|
inputMode: args.inputMode,
|
2026-05-14 01:43:06 +02:00
|
|
|
...(args.queryHistory ? { queryHistory: args.queryHistory } : {}),
|
|
|
|
|
...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}),
|
|
|
|
|
...(args.scanMode ? { scanMode: args.scanMode } : {}),
|
|
|
|
|
...(args.detectRelationships !== undefined ? { detectRelationships: args.detectRelationships } : {}),
|
|
|
|
|
...(args.cliVersion ? { cliVersion: args.cliVersion } : {}),
|
|
|
|
|
...(args.runtimeInstallPolicy ? { runtimeInstallPolicy: args.runtimeInstallPolicy } : {}),
|
2026-05-10 23:12:26 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let hasFailure = false;
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
for (const targetState of orderedTargets) {
|
|
|
|
|
targetState.status = 'running';
|
|
|
|
|
targetState.startedAt = nowFn();
|
|
|
|
|
paint(true);
|
2026-05-13 17:01:48 +02:00
|
|
|
publishSourceProgress(true);
|
|
|
|
|
let hasPendingProgressPublish = false;
|
2026-05-14 01:43:06 +02:00
|
|
|
const ingestPhaseKeyForTarget: PhaseKey =
|
|
|
|
|
targetState.target.operation === 'database-ingest' ? 'query-history' : 'source-ingest';
|
|
|
|
|
|
|
|
|
|
const updateNamedPhase = (key: PhaseKey, update: KtxIngestProgressUpdate): void => {
|
|
|
|
|
const phase = targetState.phases.find((p) => p.key === key);
|
|
|
|
|
if (phase) {
|
|
|
|
|
if (phase.status === 'queued') {
|
|
|
|
|
phase.status = 'running';
|
|
|
|
|
phase.startedAt = nowFn();
|
|
|
|
|
}
|
|
|
|
|
const sanitizedMessage = update.message.replace(/^\[\d+%\]\s*/, '');
|
|
|
|
|
phase.detail = publicProgressMessage(sanitizedMessage, targetState.target);
|
|
|
|
|
phase.percent = Math.max(phase.percent, Math.max(0, Math.min(100, Math.round(update.percent))));
|
|
|
|
|
phase.progressUpdatedAtMs = nowFn();
|
|
|
|
|
}
|
|
|
|
|
targetState.detailLine = formatProgressDetail(update, targetState.target);
|
2026-05-13 17:01:48 +02:00
|
|
|
targetState.progressUpdatedAtMs = nowFn();
|
2026-05-14 01:43:06 +02:00
|
|
|
if (!repainter) {
|
|
|
|
|
io.stdout.write(`${targetState.detailLine}\n`);
|
|
|
|
|
}
|
2026-05-13 17:01:48 +02:00
|
|
|
paint(true);
|
|
|
|
|
hasPendingProgressPublish = !publishSourceProgress(false);
|
|
|
|
|
};
|
2026-05-10 23:12:26 +02:00
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
const updateSchemaPhase = (update: KtxIngestProgressUpdate): void => updateNamedPhase('database-schema', update);
|
|
|
|
|
const updateIngestPhase = (update: KtxIngestProgressUpdate): void => updateNamedPhase(ingestPhaseKeyForTarget, update);
|
|
|
|
|
|
2026-05-10 23:12:26 +02:00
|
|
|
const capture = createCaptureIo(
|
|
|
|
|
(message) => {
|
2026-05-14 01:43:06 +02:00
|
|
|
targetState.detailLine = publicProgressMessage(message, targetState.target);
|
2026-05-13 17:01:48 +02:00
|
|
|
targetState.progressUpdatedAtMs = nowFn();
|
2026-05-14 01:43:06 +02:00
|
|
|
if (!repainter) {
|
|
|
|
|
io.stdout.write(`${targetState.detailLine}\n`);
|
|
|
|
|
}
|
2026-05-10 23:12:26 +02:00
|
|
|
paint(true);
|
2026-05-13 17:01:48 +02:00
|
|
|
hasPendingProgressPublish = !publishSourceProgress(false);
|
2026-05-10 23:12:26 +02:00
|
|
|
},
|
|
|
|
|
false,
|
|
|
|
|
);
|
2026-05-14 01:43:06 +02:00
|
|
|
|
|
|
|
|
const onPhaseStart = (key: PhaseKey): void => {
|
|
|
|
|
const phase = targetState.phases.find((p) => p.key === key);
|
|
|
|
|
if (!phase) return;
|
|
|
|
|
phase.status = 'running';
|
|
|
|
|
if (phase.startedAt === null) phase.startedAt = nowFn();
|
|
|
|
|
phase.progressUpdatedAtMs = nowFn();
|
|
|
|
|
paint(true);
|
|
|
|
|
hasPendingProgressPublish = !publishSourceProgress(false);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const onPhaseEnd = (key: PhaseKey, status: 'done' | 'failed' | 'skipped', summary?: string): void => {
|
|
|
|
|
const phase = targetState.phases.find((p) => p.key === key);
|
|
|
|
|
if (!phase) return;
|
|
|
|
|
phase.status = status;
|
|
|
|
|
if (phase.startedAt !== null) {
|
|
|
|
|
phase.elapsedMs = nowFn() - phase.startedAt;
|
|
|
|
|
}
|
|
|
|
|
if (status === 'done') {
|
|
|
|
|
phase.percent = 100;
|
|
|
|
|
}
|
|
|
|
|
let resolvedSummary = summary;
|
|
|
|
|
if (status === 'done' && !resolvedSummary) {
|
|
|
|
|
const captured = capture.captured();
|
|
|
|
|
if (key === 'database-schema') {
|
|
|
|
|
resolvedSummary = parseScanSummary(captured) ?? undefined;
|
|
|
|
|
} else if (key === 'query-history' || key === 'source-ingest') {
|
|
|
|
|
resolvedSummary = parseIngestSummary(captured) ?? undefined;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (resolvedSummary) {
|
|
|
|
|
phase.summary = resolvedSummary;
|
|
|
|
|
}
|
|
|
|
|
paint(true);
|
|
|
|
|
hasPendingProgressPublish = !publishSourceProgress(false);
|
|
|
|
|
};
|
|
|
|
|
|
2026-05-13 17:01:48 +02:00
|
|
|
const progressDeps: KtxPublicIngestDeps = {
|
2026-06-01 23:31:31 +02:00
|
|
|
scanProgress: createAggregateProgressPort(updateSchemaPhase),
|
2026-05-14 01:43:06 +02:00
|
|
|
ingestProgress: updateIngestPhase,
|
2026-05-16 11:39:43 +02:00
|
|
|
runtimeIo: io,
|
2026-05-14 01:43:06 +02:00
|
|
|
onPhaseStart,
|
|
|
|
|
onPhaseEnd,
|
2026-05-13 17:01:48 +02:00
|
|
|
};
|
2026-05-10 23:12:26 +02:00
|
|
|
|
2026-05-12 16:56:58 -04:00
|
|
|
let result: KtxPublicIngestTargetResult | null = null;
|
|
|
|
|
let thrownError: unknown = null;
|
|
|
|
|
try {
|
2026-06-02 20:03:27 +02:00
|
|
|
result = await execTarget(targetState.target, runArgs, capture.io, progressDeps, project);
|
2026-05-12 16:56:58 -04:00
|
|
|
} catch (error) {
|
|
|
|
|
thrownError = error;
|
|
|
|
|
}
|
2026-05-10 23:12:26 +02:00
|
|
|
|
2026-05-13 17:01:48 +02:00
|
|
|
if (hasPendingProgressPublish) {
|
|
|
|
|
publishSourceProgress(true);
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-10 23:12:26 +02:00
|
|
|
targetState.elapsedMs = nowFn() - (targetState.startedAt ?? nowFn());
|
2026-05-12 16:56:58 -04:00
|
|
|
const failed = thrownError !== null || result?.steps.some((s) => s.status === 'failed') === true;
|
2026-05-10 23:12:26 +02:00
|
|
|
targetState.status = failed ? 'failed' : 'done';
|
|
|
|
|
targetState.detailLine = null;
|
2026-05-12 16:56:58 -04:00
|
|
|
const capturedOutput = capture.captured();
|
|
|
|
|
const metadata = collectOutputMetadata(capturedOutput, targetState.target.operation);
|
|
|
|
|
for (const reportId of metadata.reportIds) reportIds.add(reportId);
|
|
|
|
|
for (const artifactPath of metadata.artifactPaths) artifactPaths.add(artifactPath);
|
2026-05-10 23:12:26 +02:00
|
|
|
if (!failed) {
|
|
|
|
|
targetState.summaryText =
|
2026-05-14 01:43:06 +02:00
|
|
|
targetState.target.operation === 'database-ingest'
|
2026-05-10 23:13:17 -07:00
|
|
|
? parseScanSummary(capturedOutput)
|
|
|
|
|
: parseIngestSummary(capturedOutput);
|
2026-05-12 16:56:58 -04:00
|
|
|
} else {
|
|
|
|
|
targetState.failureText = failureTextForTarget({
|
|
|
|
|
target: targetState.target,
|
|
|
|
|
projectDir: args.projectDir,
|
2026-05-14 01:43:06 +02:00
|
|
|
entrypoint: args.entrypoint,
|
2026-05-12 16:56:58 -04:00
|
|
|
capturedOutput,
|
|
|
|
|
error: thrownError,
|
|
|
|
|
fallback: result ? failedStepDetail(result) : null,
|
|
|
|
|
});
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
if (failed) hasFailure = true;
|
|
|
|
|
|
|
|
|
|
paint(true);
|
2026-05-13 17:01:48 +02:00
|
|
|
publishSourceProgress(true);
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
} finally {
|
|
|
|
|
if (spinnerInterval) clearInterval(spinnerInterval);
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-10 16:12:51 -07:00
|
|
|
if (state.startedAt !== null) {
|
|
|
|
|
state.totalElapsedMs = nowFn() - state.startedAt;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-10 23:12:26 +02:00
|
|
|
if (!repainter) {
|
2026-05-13 15:49:22 +02:00
|
|
|
io.stdout.write(renderContextBuildView(state, { ...viewOpts, styled: false }));
|
2026-05-10 23:12:26 +02:00
|
|
|
} else {
|
|
|
|
|
paint(false);
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-10 23:13:17 -07:00
|
|
|
return {
|
|
|
|
|
exitCode: hasFailure ? 1 : 0,
|
|
|
|
|
...(reportIds.size > 0 ? { reportIds: [...reportIds] } : {}),
|
|
|
|
|
...(artifactPaths.size > 0 ? { artifactPaths: [...artifactPaths] } : {}),
|
|
|
|
|
};
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|