feat: merge ingest and scan

* docs: add CLI component reuse guidance

* docs: add unified ingest ux design

* Refine unified ingest UX design after adversarial review iteration 1

* Refine unified ingest UX design after adversarial review iteration 2

* Refine unified ingest UX design after adversarial review iteration 3

* feat(cli): route public connection ingest command

* feat(cli): hide standalone scan from public help

* feat(cli): plan public ingest depth and query history

* feat(cli): execute public database ingest facets

* feat(ingest): read connection query history config

* fix(cli): use public ingest wording

* fix(config): stop generating ingest adapter allow lists

* docs: document public ingest command

* test: align ingest surface expectations

* docs: add unified ingest public CLI surface plan

* feat(cli): preflight deep public ingest readiness

* feat(setup): store query history in connection context

* feat(setup): store database context depth

* feat(setup): verify context readiness by database depth

* fix(setup): keep context build foreground only

* fix(config): reject reserved ingest connection ids

* test: close unified ingest v1 expectations

* docs: add unified ingest v1 closure plan

* fix(ingest): bypass adapter allow-list for public source ingest

* fix(ingest): honor query history window intent

* fix(ingest): hide scan internals from public database ingest

* feat(ingest): use foreground view for interactive public ingest

* fix(setup): use schema context and query history wording

* test(cli): verify unified ingest public output

* docs: add unified ingest v1 public output closure plan

* fix(setup): forward query history flags

* fix(setup): prompt for postgres query history

* fix(status): report query history readiness

* fix(ingest): remove legacy public guidance

* fix(ingest): polish foreground retry copy

* docs(examples): use unified query history wording

* chore(ingest): finish public query history cleanup

* docs: add unified ingest v1 query history status cleanup plan

* test(docs): cover unified ingest public docs

* docs: align ingest CLI reference with unified UX

* docs: update context build guides for unified ingest

* docs: update setup and primary source ingest wording

* docs: stop advertising adapter-backed example ingest

* docs: close unified ingest public docs gaps

* docs: add unified ingest v1 docs site closure plan

* fix: render unified ingest foreground warnings

* fix: explain query history schema order

* fix: add public ingest retry guidance

* fix: align setup next steps with unified ingest

* fix: remove scan wording from demo progress

* test: verify unified ingest ux closure

* docs: add unified ingest v1 foreground and retry closure plan

* fix(cli): preserve query-history pull config in public ingest

* fix(cli): omit hidden commands from docs command tree

* test(cli): close unified ingest final public surface checks

* docs: add unified ingest v1 final public surface closure plan

* fix(cli): use public source labels in ingest reports

* fix(cli): suppress low-level public ingest output

* test(cli): verify unified ingest public plain output

* docs: add unified ingest v1 public plain output closure plan

* fix(cli): add public ingest copy sanitizers

* fix(cli): sanitize public ingest progress copy

* fix(cli): rename setup schema scope prompt

* docs(plan): add progress copy closure; test: align setup back-nav fixture

Adds the iter9 plan and updates the setup back-navigation test fixture
to pass disableQueryHistory plus listSchemas/listTables stubs that the
unified ingest setup step now requires.

* docs(plan): add final ux labels plan with narrowed label scans

* fix(cli): aggregate unsupported query-history warnings

* fix(cli): align setup database labels

* test(cli): fix setup database test type-check

* fix(cli): remove primary-source wording from setup output

* test(cli): verify unified ingest setup closure

* docs(plan): add unified ingest v1 verification copy closure plan

* fix(cli): remove top-level scan command

* fix(cli): remove legacy ingest and wiki commands

* Merge scan into ingest flow

* feat(cli): split ingest progress into per-phase rows, rename work units to tasks

Each database target in the unified ingest dashboard now renders one row per
real subprocess (Schema, then Query history when enabled) instead of a single
combined bar. Each phase has its own monotonic 0-100% bar so the progress
never snaps back to zero when historic-sql starts after scan completes.
Completed phases keep their final bar, summary, and elapsed time visible as
an inline audit trail; queued and skipped phases are shown explicitly.

Also rename user-facing "work units" / "Failed work units" to "tasks" /
"Failed tasks" in ingest output and parseIngestSummary. The parser still
accepts the legacy "Work units:" wording in captured output for backward
compat. Internal memory-flow event names and type fields are left alone.

* Fix test harness failures

* Fix CI smoke checks

---------

Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
This commit is contained in:
Andrey Avtomonov 2026-05-14 01:43:06 +02:00 committed by GitHub
parent 1a472cf3ed
commit b00c1a11a9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
118 changed files with 16890 additions and 2992 deletions

View file

@ -5,6 +5,7 @@ import type {
KtxProjectEmbeddingConfig,
KtxProjectLlmConfig,
} from '@ktx/context/project';
import type { PostgresPgssProbeResult } from '@ktx/context/ingest';
import type { DoctorCheck } from './doctor.js';
type ProjectStatusLevel = 'ok' | 'warn' | 'fail';
@ -32,6 +33,11 @@ interface ConnectionStatus extends ProjectStatusLine {
driver: string;
}
interface QueryHistoryStatus extends ProjectStatusLine {
connection: string;
dialect: 'postgres';
}
interface PipelineStatus {
adapters: string[];
enrichmentMode: string;
@ -70,6 +76,7 @@ export interface ProjectStatus {
embeddings: EmbeddingsStatus;
storage: StorageStatus;
connections: ConnectionStatus[];
queryHistory: QueryHistoryStatus[];
pipeline: PipelineStatus;
warnings: WarningItem[];
verdict: ProjectVerdict;
@ -294,6 +301,144 @@ function buildConnectionStatus(
}
}
interface PostgresQueryHistoryProbeInput {
projectDir: string;
connectionId: string;
connection: KtxProjectConnectionConfig;
env: NodeJS.ProcessEnv;
}
type PostgresQueryHistoryProbe = (
input: PostgresQueryHistoryProbeInput,
) => Promise<PostgresPgssProbeResult>;
function recordValue(value: unknown): Record<string, unknown> | null {
return value && typeof value === 'object' && !Array.isArray(value) ? (value as Record<string, unknown>) : null;
}
function queryHistoryRecord(connection: KtxProjectConnectionConfig): Record<string, unknown> | null {
const context = recordValue(connection.context);
return recordValue(context?.queryHistory);
}
function legacyHistoricSqlRecord(connection: KtxProjectConnectionConfig): Record<string, unknown> | null {
return recordValue(connection.historicSql);
}
function isEnabledPostgresQueryHistory(connection: KtxProjectConnectionConfig): boolean {
const queryHistory = queryHistoryRecord(connection);
if (queryHistory) {
return queryHistory.enabled === true;
}
const legacy = legacyHistoricSqlRecord(connection);
return legacy?.enabled === true && legacy.dialect === 'postgres';
}
function isPostgresDriver(connection: KtxProjectConnectionConfig): boolean {
const driver = String(connection.driver ?? '').toLowerCase();
return driver === 'postgres' || driver === 'postgresql';
}
function queryHistoryFailureFix(error: unknown, connectionId: string, projectDir: string): string {
if (error instanceof Error && error.name === 'HistoricSqlExtensionMissingError' && 'remediation' in error) {
return String(error.remediation);
}
if (error instanceof Error && error.name === 'HistoricSqlGrantsMissingError' && 'remediation' in error) {
return String(error.remediation);
}
if (error instanceof Error && error.name === 'HistoricSqlVersionUnsupportedError') {
return 'Use PostgreSQL 14 or newer, or disable query history for this connection';
}
return `Fix connections.${connectionId} Postgres settings, then rerun \`ktx status --project-dir ${projectDir}\``;
}
function failureDetail(error: unknown): string {
if (error instanceof Error && error.message.trim().length > 0) {
return error.message.trim().split('\n')[0] ?? error.message.trim();
}
return String(error);
}
function readinessDetail(result: PostgresPgssProbeResult): string {
const warningText = result.warnings.length > 0 ? ` with warnings: ${result.warnings.join('; ')}` : '';
const info = result.info ?? [];
const infoText = info.length > 0 ? `; info: ${info.join('; ')}` : '';
return `pg_stat_statements ready (${result.pgServerVersion})${warningText}${infoText}`;
}
async function defaultPostgresQueryHistoryProbe(
input: PostgresQueryHistoryProbeInput,
): Promise<PostgresPgssProbeResult> {
const [{ PostgresPgssReader }, { KtxPostgresHistoricSqlQueryClient, isKtxPostgresConnectionConfig }] =
await Promise.all([import('@ktx/context/ingest'), import('@ktx/connector-postgres')]);
const inputDriver = input.connection.driver ?? 'unknown';
if (!isKtxPostgresConnectionConfig(input.connection)) {
throw new Error(`Native PostgreSQL connector cannot run driver "${inputDriver}"`);
}
const client = new KtxPostgresHistoricSqlQueryClient({
connectionId: input.connectionId,
connection: input.connection,
env: input.env,
});
try {
return await new PostgresPgssReader().probe(client);
} finally {
await client.cleanup();
}
}
async function buildQueryHistoryStatus(
project: KtxLocalProject,
options: BuildProjectStatusOptions,
): Promise<QueryHistoryStatus[]> {
const targets = Object.entries(project.config.connections)
.filter(([, connection]) => isEnabledPostgresQueryHistory(connection))
.sort(([left], [right]) => left.localeCompare(right));
const probe = options.postgresQueryHistoryProbe ?? defaultPostgresQueryHistoryProbe;
const env = options.env ?? process.env;
const statuses: QueryHistoryStatus[] = [];
for (const [connectionId, connection] of targets) {
if (!isPostgresDriver(connection)) {
statuses.push({
connection: connectionId,
dialect: 'postgres',
status: 'fail',
detail: `connections.${connectionId}.context.queryHistory is enabled but driver is ${String(connection.driver)}`,
fix: `Set connections.${connectionId}.driver to postgres or disable query history for this connection`,
});
continue;
}
try {
const result = await probe({ projectDir: project.projectDir, connectionId, connection, env });
statuses.push({
connection: connectionId,
dialect: 'postgres',
status: result.warnings.length > 0 ? 'warn' : 'ok',
detail: readinessDetail(result),
...(result.warnings.length > 0
? {
fix: `Update the Postgres parameter group or config, then rerun \`ktx status --project-dir ${project.projectDir}\``,
}
: {}),
});
} catch (error) {
statuses.push({
connection: connectionId,
dialect: 'postgres',
status: 'fail',
detail: failureDetail(error),
fix: queryHistoryFailureFix(error, connectionId, project.projectDir),
});
}
}
return statuses;
}
const ADAPTER_DRIVER_REQUIREMENT: Record<string, string[]> = {
'live-database': ['postgres', 'postgresql', 'mysql', 'snowflake', 'bigquery', 'clickhouse', 'sqlite', 'sqlserver'],
dbt: ['dbt', 'dbt-core', 'dbt-cloud'],
@ -411,6 +556,7 @@ function buildVerdict(
llm: LlmStatus,
embeddings: EmbeddingsStatus,
connections: ConnectionStatus[],
queryHistory: QueryHistoryStatus[],
warnings: WarningItem[],
): { verdict: ProjectVerdict; reason: string; nextActions: string[] } {
if (llm.status === 'fail') {
@ -420,6 +566,14 @@ function buildVerdict(
nextActions: ['ktx setup'],
};
}
const failedQueryHistory = queryHistory.filter((entry) => entry.status === 'fail').length;
if (failedQueryHistory > 0) {
return {
verdict: 'blocked',
reason: `Query history readiness failed for ${failedQueryHistory} connection${failedQueryHistory === 1 ? '' : 's'}.`,
nextActions: ['ktx status --verbose'],
};
}
const reasons: string[] = [];
if (llm.status === 'warn') reasons.push('LLM credentials missing');
@ -432,6 +586,10 @@ function buildVerdict(
}
const missing = connections.filter((c) => c.status !== 'ok').length;
if (missing > 0) reasons.push(`${missing} connection${missing === 1 ? '' : 's'} need configuration`);
const queryHistoryWarnings = queryHistory.filter((entry) => entry.status === 'warn').length;
if (queryHistoryWarnings > 0) {
reasons.push(`${queryHistoryWarnings} query history warning${queryHistoryWarnings === 1 ? '' : 's'}`);
}
if (warnings.length > 0) reasons.push(`${warnings.length} config warning${warnings.length === 1 ? '' : 's'}`);
if (reasons.length === 0) {
@ -451,9 +609,10 @@ function buildVerdict(
export interface BuildProjectStatusOptions {
env?: NodeJS.ProcessEnv;
postgresQueryHistoryProbe?: PostgresQueryHistoryProbe;
}
export function buildProjectStatus(project: KtxLocalProject, options: BuildProjectStatusOptions = {}): ProjectStatus {
export async function buildProjectStatus(project: KtxLocalProject, options: BuildProjectStatusOptions = {}): Promise<ProjectStatus> {
const env = options.env ?? process.env;
const config = project.config;
@ -463,9 +622,10 @@ export function buildProjectStatus(project: KtxLocalProject, options: BuildProje
const connections = Object.entries(config.connections).map(([name, conn]) =>
buildConnectionStatus(name, conn, env),
);
const queryHistory = await buildQueryHistoryStatus(project, options);
const pipeline = buildPipelineStatus(config);
const warnings = buildWarnings(config, connections, llm, embeddings);
const { verdict, reason, nextActions } = buildVerdict(llm, embeddings, connections, warnings);
const { verdict, reason, nextActions } = buildVerdict(llm, embeddings, connections, queryHistory, warnings);
return {
projectName: config.project,
@ -474,6 +634,7 @@ export function buildProjectStatus(project: KtxLocalProject, options: BuildProje
embeddings,
storage,
connections,
queryHistory,
pipeline,
warnings,
verdict,
@ -580,6 +741,21 @@ export function renderProjectStatus(status: ProjectStatus, options: RenderProjec
}
lines.push('');
if (status.queryHistory.length > 0) {
lines.push(` ${bold('Query history')}`);
const connectionWidth = Math.max(...status.queryHistory.map((entry) => entry.connection.length));
for (const entry of status.queryHistory) {
lines.push(
` ${sym(entry.status)} ${entry.connection.padEnd(connectionWidth)} ${dim(entry.dialect)} ${entry.detail}`,
);
if (entry.fix && entry.status !== 'ok') {
const indent = 6 + connectionWidth + 3 + entry.dialect.length + 3;
lines.push(`${' '.repeat(indent)}${dim(`${entry.fix}`)}`);
}
}
lines.push('');
}
// Pipeline
lines.push(` ${bold('Pipeline')}`);
const pipelineLabelWidth = Math.max('Adapters'.length, 'Enrichment'.length, 'Research agent'.length);