import type { KtxProgressPort, KtxScanMode, KtxScanReport, KtxScanWarning } from './context/scan/types.js'; import { runLocalScan } from './context/scan/local-scan.js'; import { loadKtxProject } from './context/project/project.js'; import { getKtxCliPackageInfo } from './cli-runtime.js'; import { resolveProjectEmbeddingProvider } from './embedding-resolution.js'; import type { KtxCliIo } from './index.js'; import { createKtxCliLocalIngestAdapters } from './local-adapters.js'; import { createKtxCliScanConnector } from './local-scan-connectors.js'; import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js'; import { profileMark } from './startup-profile.js'; import { emitTelemetryEvent } from './telemetry/index.js'; import { formatErrorDetail, scrubErrorClass } from './telemetry/scrubber.js'; profileMark('module:scan'); export interface KtxScanArgs { command: 'run'; projectDir: string; connectionId: string; mode: KtxScanMode; detectRelationships: boolean; dryRun: boolean; databaseIntrospectionUrl?: string; cliVersion?: string; runtimeInstallPolicy?: KtxManagedPythonInstallPolicy; } export interface KtxScanDeps { runLocalScan?: typeof runLocalScan; createLocalIngestAdapters?: typeof createKtxCliLocalIngestAdapters; resolveEmbeddingProvider?: typeof resolveProjectEmbeddingProvider; progress?: KtxProgressPort; runtimeIo?: KtxCliIo; } function shouldUseStyledOutput(io: KtxCliIo): boolean { return io.stdout.isTTY === true && !process.env.NO_COLOR && process.env.TERM !== 'dumb' && !process.env.CI; } function green(text: string): string { return `\u001b[32m${text}\u001b[39m`; } function dim(text: string): string { return `\u001b[2m${text}\u001b[22m`; } function quoteCliArg(value: string): string { if (/^[A-Za-z0-9_./:@=-]+$/.test(value)) { return value; } return `'${value.replaceAll("'", "'\\''")}'`; } function plural(count: number, singular: string, pluralValue = `${singular}s`): string { return count === 1 ? singular : pluralValue; } function tableChangeCount(report: KtxScanReport): number { return report.diffSummary.tablesAdded + report.diffSummary.tablesModified + report.diffSummary.tablesDeleted; } function totalTableCount(report: KtxScanReport): number { return tableChangeCount(report) + report.diffSummary.tablesUnchanged; } function scanColumnCount(report: KtxScanReport): number { return report.structuralSyncStats.columnsCreated + report.structuralSyncStats.columnsUpdated; } function inferredFkCount(report: KtxScanReport): number { return report.relationships.accepted + report.relationships.review + report.relationships.rejected; } function writeScanIdentity(report: KtxScanReport, io: KtxCliIo): void { io.stdout.write(`Run: ${report.runId}\n`); io.stdout.write(`Connection: ${report.connectionId}\n`); io.stdout.write(`Mode: ${report.mode}\n`); io.stdout.write(`Sync: ${report.syncId}\n`); io.stdout.write(`Dry run: ${report.dryRun ? 'yes' : 'no'}\n`); } function writeWhatChanged(report: KtxScanReport, io: KtxCliIo): void { const changedTables = tableChangeCount(report); const totalTables = totalTableCount(report); io.stdout.write('\nWhat changed\n'); const tableNoun = plural(totalTables, 'table'); const changeNoun = plural(changedTables, 'change'); io.stdout.write( ` Semantic layer comparison found ${changedTables} ${changeNoun} across ${totalTables} ${tableNoun}\n`, ); io.stdout.write(` New tables: ${report.diffSummary.tablesAdded}\n`); io.stdout.write(` Changed tables: ${report.diffSummary.tablesModified}\n`); io.stdout.write(` Removed tables: ${report.diffSummary.tablesDeleted}\n`); io.stdout.write(` Unchanged tables: ${report.diffSummary.tablesUnchanged}\n`); if ( report.diffSummary.columnsAdded > 0 || report.diffSummary.columnsModified > 0 || report.diffSummary.columnsDeleted > 0 ) { io.stdout.write(` New columns: ${report.diffSummary.columnsAdded}\n`); io.stdout.write(` Changed columns: ${report.diffSummary.columnsModified}\n`); io.stdout.write(` Removed columns: ${report.diffSummary.columnsDeleted}\n`); } } function hasRelationshipResults(report: KtxScanReport): boolean { return ( report.relationships.accepted > 0 || report.relationships.review > 0 || report.relationships.rejected > 0 || report.relationships.skipped > 0 ); } function writeRelationships(report: KtxScanReport, io: KtxCliIo): void { if (!hasRelationshipResults(report)) { return; } io.stdout.write('\nRelationships\n'); io.stdout.write(` Accepted: ${report.relationships.accepted}\n`); io.stdout.write(` Review: ${report.relationships.review}\n`); io.stdout.write(` Rejected: ${report.relationships.rejected}\n`); io.stdout.write(` Skipped: ${report.relationships.skipped}\n`); } function capabilityGapMessage(gap: string): string { if (gap === 'columnStats') { return 'columnStats is unavailable; relationship confidence may be lower.'; } if (gap === 'tableSampling' || gap === 'columnSampling') { return `${gap} is unavailable; descriptions may be less specific.`; } if (gap === 'readOnlySql') { return 'readOnlySql is unavailable; relationship and validation checks may be limited.'; } return `${gap} is unavailable; scan results may be less complete.`; } function warningLine(warning: KtxScanWarning): string { const location = warning.table ? `${warning.table}${warning.column ? `.${warning.column}` : ''}: ` : ''; return `${warning.code}: ${location}${warning.message}`; } function groupWarningsByCode(warnings: readonly KtxScanWarning[]): Map { const groups = new Map(); for (const warning of warnings) { const list = groups.get(warning.code); if (list) { list.push(warning); } else { groups.set(warning.code, [warning]); } } return groups; } function describeWarningGroup(code: string, count: number): string { switch (code) { case 'sampling_failed': return `${count} ${plural(count, 'table')} could not be sampled (retries exhausted); descriptions used metadata-only fallback or were skipped.`; case 'description_fallback_used': return `${count} ${plural(count, 'table')} got an AI description from column metadata only (no sample rows available).`; case 'enrichment_failed': return `${count} ${plural(count, 'table/column')} could not be enriched.`; case 'connector_capability_missing': return `${count} ${plural(count, 'table')} affected by missing connector capability.`; case 'statistics_failed': return `${count} statistics ${plural(count, 'lookup')} failed.`; case 'llm_unavailable': return 'LLM provider unavailable; AI enrichment was skipped.'; case 'embedding_unavailable': return 'Embedding provider unavailable; embeddings were skipped.'; case 'relationship_validation_failed': return `${count} relationship ${plural(count, 'validation')} could not run.`; case 'relationship_llm_invalid_reference': return `${count} LLM-proposed ${plural(count, 'relationship')} referenced unknown columns.`; case 'relationship_llm_proposal_failed': return `${count} LLM relationship ${plural(count, 'proposal')} failed.`; case 'scan_enrichment_backend_not_configured': return 'Scan enrichment backend is not configured; AI stages were skipped.'; case 'credential_redacted': return `${count} ${plural(count, 'credential')} were redacted from scan output.`; default: return `${count} ${plural(count, 'warning')} (${code})`; } } function managedDaemonOptionsForScanRun(args: Extract, io: KtxCliIo) { if (args.databaseIntrospectionUrl || !args.cliVersion || !args.runtimeInstallPolicy) { return undefined; } return { cliVersion: args.cliVersion, projectDir: args.projectDir, installPolicy: args.runtimeInstallPolicy, io, }; } function writeNeedsAttention(report: KtxScanReport, io: KtxCliIo): void { io.stdout.write('\nNeeds attention\n'); if (report.warnings.length === 0 && report.capabilityGaps.length === 0) { io.stdout.write(' None\n'); return; } if (report.warnings.length > 0) { io.stdout.write(` ${report.warnings.length} ${plural(report.warnings.length, 'warning')}\n`); const groups = groupWarningsByCode(report.warnings); for (const [code, warnings] of groups) { io.stdout.write(` - ${describeWarningGroup(code, warnings.length)}\n`); const first = warnings[0]; if (first) { io.stdout.write(` ${warningLine(first)}\n`); } if (warnings.length > 1) { const moreTables = warnings .slice(1) .map((warning) => warning.table ? (warning.column ? `${warning.table}.${warning.column}` : warning.table) : null, ) .filter((value): value is string => value !== null) .slice(0, 3); if (moreTables.length > 0) { const suffix = warnings.length - 1 > moreTables.length ? `, …` : ''; io.stdout.write(` also: ${moreTables.join(', ')}${suffix}\n`); } } } } if (report.capabilityGaps.length > 0) { io.stdout.write(` ${report.capabilityGaps.length} capability ${plural(report.capabilityGaps.length, 'gap')}\n`); for (const gap of report.capabilityGaps) { io.stdout.write(` - ${capabilityGapMessage(gap)}\n`); } } } function writeArtifacts(report: KtxScanReport, io: KtxCliIo): void { io.stdout.write('\nArtifacts\n'); io.stdout.write(` Report: ${report.artifactPaths.reportPath ?? 'none'}\n`); io.stdout.write(` Raw sources: ${report.artifactPaths.rawSourcesDir ?? 'none'}\n`); if (report.artifactPaths.manifestShards.length > 0) { io.stdout.write(` Schema shards: ${report.artifactPaths.manifestShards.length}\n`); } if (report.artifactPaths.enrichmentArtifacts.length > 0) { io.stdout.write(` Enrichment artifacts: ${report.artifactPaths.enrichmentArtifacts.length}\n`); } } function writeHumanReportBody(report: KtxScanReport, io: KtxCliIo): void { writeScanIdentity(report, io); writeWhatChanged(report, io); writeRelationships(report, io); writeNeedsAttention(report, io); writeArtifacts(report, io); } function writeRunSummary(report: KtxScanReport, projectDir: string, io: KtxCliIo): void { const styled = shouldUseStyledOutput(io); io.stdout.write(`${styled ? green('✓') : ''}${styled ? ' ' : ''}KTX scan completed\n`); io.stdout.write('Status: done\n'); writeHumanReportBody(report, io); const projectDirArg = quoteCliArg(projectDir); io.stdout.write('\nNext:\n'); const statusCommand = styled ? dim('ktx status') : 'ktx status'; io.stdout.write(` ${statusCommand} --project-dir ${projectDirArg}\n`); } interface KtxCliScanProgressState { progress: number; hasPendingTransient: boolean; } interface KtxCliScanProgressUpdateOptions { transient?: boolean; } interface KtxCliScanProgress extends Omit { update(progress: number, message?: string, options?: KtxCliScanProgressUpdateOptions): Promise; flush(): void; } /** @internal */ export function createCliScanProgress( io: KtxCliIo, state: KtxCliScanProgressState = { progress: 0, hasPendingTransient: false }, start = 0, weight = 1, ): KtxCliScanProgress { const shouldWrite = io.stdout.isTTY === true && !process.env.CI; const progress: KtxCliScanProgress = { async update(value: number, message?: string, options?: KtxCliScanProgressUpdateOptions) { const absoluteValue = start + Math.max(0, Math.min(1, value)) * weight; state.progress = Math.max(state.progress, Math.min(1, absoluteValue)); if (!shouldWrite || !message) { return; } const percent = Math.max(0, Math.min(100, Math.round(absoluteValue * 100))); const line = `[${percent}%] ${message}`; if (options?.transient === true) { io.stdout.write(`\r${line}\u001b[K`); state.hasPendingTransient = true; return; } progress.flush(); io.stdout.write(`${line}\n`); }, startPhase(phaseWeight: number) { return createCliScanProgress(io, state, state.progress, weight * phaseWeight); }, flush() { if (!shouldWrite || !state.hasPendingTransient) { return; } io.stdout.write('\n'); state.hasPendingTransient = false; }, }; return progress; } export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps: KtxScanDeps = {}): Promise { const startedAt = performance.now(); try { const project = await loadKtxProject({ projectDir: args.projectDir }); const resolveEmbeddingProvider = deps.resolveEmbeddingProvider ?? resolveProjectEmbeddingProvider; const resolution = await resolveEmbeddingProvider(project, { mode: 'ensure', installPolicy: args.runtimeInstallPolicy ?? 'never', cliVersion: args.cliVersion ?? getKtxCliPackageInfo().version, io: deps.runtimeIo ?? io, }); const embeddingProvider = resolution.kind === 'disabled' || resolution.kind === 'managed-unavailable' ? null : resolution.provider; const managedDaemon = managedDaemonOptionsForScanRun(args, deps.runtimeIo ?? io); const connector = args.mode !== 'structural' || args.detectRelationships ? await createKtxCliScanConnector(project, args.connectionId) : undefined; const cliProgress = deps.progress ? null : createCliScanProgress(io); const progress = deps.progress ?? cliProgress; try { const result = await (deps.runLocalScan ?? runLocalScan)({ project, connectionId: args.connectionId, mode: args.mode, detectRelationships: args.detectRelationships, dryRun: args.dryRun, trigger: 'cli', databaseIntrospectionUrl: args.databaseIntrospectionUrl, connector, embeddingProvider, adapters: (deps.createLocalIngestAdapters ?? createKtxCliLocalIngestAdapters)(project, { ...(args.databaseIntrospectionUrl ? { databaseIntrospectionUrl: args.databaseIntrospectionUrl } : {}), ...(managedDaemon ? { managedDaemon } : {}), }), ...(progress ? { progress } : {}), }); cliProgress?.flush(); await emitTelemetryEvent({ name: 'scan_completed', projectDir: args.projectDir, io, fields: { driver: result.report.driver, tableCount: totalTableCount(result.report), columnCount: scanColumnCount(result.report), inferredFkCount: inferredFkCount(result.report), declaredFkCount: 0, durationMs: Math.max(0, performance.now() - startedAt), outcome: 'ok', }, }); writeRunSummary(result.report, args.projectDir, io); } finally { cliProgress?.flush(); await connector?.cleanup?.(); } return 0; } catch (error) { const errorClass = scrubErrorClass(error); const errorDetail = formatErrorDetail(error); await emitTelemetryEvent({ name: 'scan_completed', projectDir: args.projectDir, io, fields: { driver: 'unknown', tableCount: 0, columnCount: 0, inferredFkCount: 0, declaredFkCount: 0, durationMs: Math.max(0, performance.now() - startedAt), outcome: 'error', ...(errorClass ? { errorClass } : {}), ...(errorDetail ? { errorDetail } : {}), }, }); io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); return 1; } }