From 78b8a0c025d62696c56e945a30b72c1f34fe816e Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Sun, 24 May 2026 19:30:06 +0200 Subject: [PATCH] feat(connectors): generalize readiness and constraint handling (#212) * feat(connectors): add postgres maxConnections * feat(connectors): add mysql maxConnections * feat(connectors): add sqlserver maxConnections * feat(connectors): rename snowflake pool config * docs: document connector maxConnections * feat(scan): add constraint discovery warning helper * feat(scan): carry structural warnings through reports * feat(postgres): soft-fail denied constraint discovery * feat(mysql): soft-fail denied constraint discovery * feat(sqlserver): soft-fail denied constraint discovery * feat(bigquery): soft-fail denied primary key discovery * feat(snowflake): report denied primary key discovery * test(scan): verify constraint discovery warnings * feat(historic-sql): use shared readiness probes * docs: document query history readiness probes * test(historic-sql): verify readiness probe registry * test(ingest): account for live database warnings artifact * Add skip option for agent setup --- README.md | 3 +- .../content/docs/cli-reference/ktx-setup.mdx | 10 + .../content/docs/cli-reference/ktx-status.mdx | 4 +- .../content/docs/configuration/ktx-yaml.mdx | 15 +- .../docs/integrations/agent-clients.mdx | 11 +- .../src/connectors/bigquery/connector.test.ts | 33 +- .../cli/src/connectors/bigquery/connector.ts | 54 ++- .../src/connectors/mysql/connector.test.ts | 111 +++++- .../cli/src/connectors/mysql/connector.ts | 105 +++++- .../src/connectors/postgres/connector.test.ts | 122 ++++++- .../cli/src/connectors/postgres/connector.ts | 95 +++++- .../connectors/snowflake/connector.test.ts | 93 ++++- .../cli/src/connectors/snowflake/connector.ts | 79 +++-- .../connectors/sqlserver/connector.test.ts | 89 ++++- .../cli/src/connectors/sqlserver/connector.ts | 85 ++++- .../adapters/live-database/stage.test.ts | 26 ++ .../ingest/adapters/live-database/stage.ts | 10 + .../ingest/historic-sql-probes.test.ts | 157 +++++++++ .../src/context/ingest/historic-sql-probes.ts | 141 ++++++++ .../bigquery-runner.test.ts | 110 ++++++ .../historic-sql-probes/bigquery-runner.ts | 160 +++++++++ .../postgres-runner.test.ts | 113 +++++++ .../historic-sql-probes/postgres-runner.ts | 111 ++++++ .../snowflake-runner.test.ts | 82 +++++ .../historic-sql-probes/snowflake-runner.ts | 96 ++++++ .../context/ingest/local-stage-ingest.test.ts | 2 +- .../context/scan/constraint-discovery.test.ts | 70 ++++ .../src/context/scan/constraint-discovery.ts | 42 +++ .../cli/src/context/scan/local-scan.test.ts | 49 +++ packages/cli/src/context/scan/local-scan.ts | 3 + .../scan/local-structural-artifacts.test.ts | 83 +++++ .../scan/local-structural-artifacts.ts | 56 +++ packages/cli/src/context/scan/types.ts | 4 +- packages/cli/src/doctor.test.ts | 65 +++- packages/cli/src/ingest.test.ts | 25 +- packages/cli/src/setup-agents.test.ts | 57 ++++ packages/cli/src/setup-agents.ts | 12 +- packages/cli/src/setup-databases.test.ts | 307 +++++++++++++++-- packages/cli/src/setup-databases.ts | 198 ++++------- packages/cli/src/status-project.test.ts | 107 ++++-- packages/cli/src/status-project.ts | 318 ++++-------------- uv.lock | 4 +- 42 files changed, 2763 insertions(+), 554 deletions(-) create mode 100644 packages/cli/src/context/ingest/historic-sql-probes.test.ts create mode 100644 packages/cli/src/context/ingest/historic-sql-probes.ts create mode 100644 packages/cli/src/context/ingest/historic-sql-probes/bigquery-runner.test.ts create mode 100644 packages/cli/src/context/ingest/historic-sql-probes/bigquery-runner.ts create mode 100644 packages/cli/src/context/ingest/historic-sql-probes/postgres-runner.test.ts create mode 100644 packages/cli/src/context/ingest/historic-sql-probes/postgres-runner.ts create mode 100644 packages/cli/src/context/ingest/historic-sql-probes/snowflake-runner.test.ts create mode 100644 packages/cli/src/context/ingest/historic-sql-probes/snowflake-runner.ts create mode 100644 packages/cli/src/context/scan/constraint-discovery.test.ts create mode 100644 packages/cli/src/context/scan/constraint-discovery.ts diff --git a/README.md b/README.md index cb9d25b0..8dadd3e1 100644 --- a/README.md +++ b/README.md @@ -162,7 +162,8 @@ ktx sl query --connection-id warehouse --measure orders.revenue --format sql During setup, choose **Ask data questions with ktx MCP** for agent clients. Choose **Ask data questions + manage ktx with CLI commands** when an operator -agent also needs pinned `ktx` admin commands. +agent also needs pinned `ktx` admin commands. Choose **Skip agent setup for +now** to leave agent integration incomplete and run `ktx setup --agents` later. After setup, **ktx** prints **Required before using agents** with the exact commands to run. If the output includes `ktx mcp start --project-dir ...`, run diff --git a/docs-site/content/docs/cli-reference/ktx-setup.mdx b/docs-site/content/docs/cli-reference/ktx-setup.mdx index b94d65db..17423534 100644 --- a/docs-site/content/docs/cli-reference/ktx-setup.mdx +++ b/docs-site/content/docs/cli-reference/ktx-setup.mdx @@ -134,6 +134,16 @@ window flag applies to BigQuery and Snowflake; Postgres reads the current Enabling query history makes deep ingest readiness matter for later `ktx ingest` runs. +When query history is enabled for PostgreSQL, Snowflake, or BigQuery, +`ktx setup` runs a non-blocking readiness probe after the connection test +passes. A failed probe still writes setup changes, prints the warehouse-specific +grant or extension remediation, and leaves query-history ingest disabled until +you fix the prerequisite. + +For BigQuery, the remediation tells you to grant `roles/bigquery.resourceViewer` +on the BigQuery project, or grant a custom role that contains +`bigquery.jobs.listAll`. + ### Context Sources In interactive setup, after you configure a database, choose diff --git a/docs-site/content/docs/cli-reference/ktx-status.mdx b/docs-site/content/docs/cli-reference/ktx-status.mdx index c86c12e0..51c00148 100644 --- a/docs-site/content/docs/cli-reference/ktx-status.mdx +++ b/docs-site/content/docs/cli-reference/ktx-status.mdx @@ -21,7 +21,7 @@ ktx status [options] | `--json` | Print JSON output | `false` | | `-v`, `--verbose` | Show every check, including passing ones | `false` | | `--validate` | Only validate the `ktx.yaml` schema; skip readiness checks | `false` | -| `--fast` | Skip checks that require external communication (Postgres query-history probe, Claude Code auth probe) | `false` | +| `--fast` | Skip checks that require external communication (query-history readiness probes and Claude Code auth probe) | `false` | | `--no-input` | Disable interactive terminal input | - | ## Examples @@ -39,7 +39,7 @@ ktx status --verbose # Validate ktx.yaml without running readiness checks ktx status --validate -# Skip slow probes (Postgres pg_stat_statements, Claude Code auth) +# Skip slow probes (query-history readiness, Claude Code auth) ktx status --fast # Check a project from another directory diff --git a/docs-site/content/docs/configuration/ktx-yaml.mdx b/docs-site/content/docs/configuration/ktx-yaml.mdx index 873a8acd..4a919d45 100644 --- a/docs-site/content/docs/configuration/ktx-yaml.mdx +++ b/docs-site/content/docs/configuration/ktx-yaml.mdx @@ -157,11 +157,14 @@ connections: dataset_ids: [analytics, mart] ``` -For Snowflake connections, set `maxSessions` when deep ingest needs more or -fewer concurrent warehouse sessions. The default is `4`. This caps all -concurrent Snowflake SQL work for that connector instance, including schema -introspection, table sampling, relationship profiling, relationship -validation, and read-only SQL execution. +For Postgres, MySQL, SQL Server, and Snowflake connections, set +`maxConnections` when scan or ingest work needs to stay below the target's +connection cap. Postgres, MySQL, and SQL Server default to `10`; Snowflake +defaults to `4`. This caps all concurrent SQL work for that connector instance, +including schema introspection, table sampling, relationship profiling, +relationship validation, and read-only SQL execution. BigQuery and ClickHouse +do not expose `maxConnections` because their connectors don't use client-side +connection pools. For Postgres, BigQuery, and Snowflake, `historicSql` and `context.queryHistory` toggle query-history ingest. The shape is connector-specific; the setup wizard @@ -517,7 +520,7 @@ the manifest. | `relationships.maxLlmTablesPerBatch` | `int > 0` | `40` | Max tables included in a single LLM relationship-proposal batch. | | `relationships.maxCandidatesPerColumn` | `int > 0` | `25` | Max join partners considered per column. | | `relationships.profileSampleRows` | `int > 0` | `10000` | Rows sampled per table when profiling values for relationship inference. | -| `relationships.profileConcurrency` | `int > 0` | `4` | Parallel relationship-profile queries against the database. For Snowflake, effective database concurrency is also bounded by the connection's `maxSessions`. | +| `relationships.profileConcurrency` | `int > 0` | `4` | Parallel relationship-profile queries against the database. For pooled connectors, effective database concurrency is also bounded by the connection's `maxConnections`. | | `relationships.validationConcurrency` | `int > 0` | `4` | Parallel relationship validation queries against the database. | | `relationships.validationBudget` | `all` \| `int ≥ 0` | runtime default | Cap on validation queries per scan. `all` means unlimited. | diff --git a/docs-site/content/docs/integrations/agent-clients.mdx b/docs-site/content/docs/integrations/agent-clients.mdx index 36aef1c3..46a1ec8b 100644 --- a/docs-site/content/docs/integrations/agent-clients.mdx +++ b/docs-site/content/docs/integrations/agent-clients.mdx @@ -9,7 +9,9 @@ admin surface for setup, ingest, status, daemon lifecycle, and debugging. Run `ktx setup` and select your agent client targets, or configure manually using the snippets below. Choose **Ask data questions with ktx MCP** for agent clients. Choose **Ask data questions + manage ktx with CLI commands** only when -a developer or operator agent also needs pinned `ktx` admin commands. +a developer or operator agent also needs pinned `ktx` admin commands. Choose +**Skip agent setup for now** to leave agent integration incomplete and run +`ktx setup --agents` later. ## Install with setup @@ -43,14 +45,19 @@ ktx setup --agents --target codex --global manifest lets status checks report agent readiness and lets future cleanup remove only files **ktx** installed. -The interactive command asks two questions: +The interactive command asks what agents can do first: ```txt ◆ What should agents be allowed to do with this ktx project? │ ○ Ask data questions with ktx MCP │ ○ Ask data questions + manage ktx with CLI commands +│ ○ Skip agent setup for now └ +``` +If you choose an install mode, it then asks which targets to install: + +```txt ◆ Which agent targets should ktx install? │ ◻ Claude Code │ ◻ Claude Desktop diff --git a/packages/cli/src/connectors/bigquery/connector.test.ts b/packages/cli/src/connectors/bigquery/connector.test.ts index be65af1e..b9893ccf 100644 --- a/packages/cli/src/connectors/bigquery/connector.test.ts +++ b/packages/cli/src/connectors/bigquery/connector.test.ts @@ -3,7 +3,7 @@ import { bigQueryConnectionConfigFromConfig, isKtxBigQueryConnectionConfig, type import { createBigQueryLiveDatabaseIntrospection } from '../../connectors/bigquery/live-database-introspection.js'; import { tableRefSet } from '../../context/scan/table-ref.js'; -function fakeClientFactory(): KtxBigQueryClientFactory { +function fakeClientFactory(options: { primaryKeyError?: Error } = {}): KtxBigQueryClientFactory { const queryResults = vi.fn(async (): ReturnType => [ [{ id: 1, status: 'paid' }], undefined, @@ -11,6 +11,9 @@ function fakeClientFactory(): KtxBigQueryClientFactory { ]); const createQueryJob = vi.fn(async (input: { query: string }): ReturnType => { if (input.query.includes('INFORMATION_SCHEMA.TABLE_CONSTRAINTS')) { + if (options.primaryKeyError) { + throw options.primaryKeyError; + } return [ { getQueryResults: async (): ReturnType => [ @@ -170,6 +173,34 @@ describe('KtxBigQueryScanConnector', () => { ]); }); + it.each([ + Object.assign(new Error('Access Denied'), { code: 403 }), + Object.assign(new Error('Not found'), { errors: [{ reason: 'notFound' }] }), + ])('soft-fails denied BigQuery primary-key discovery with a scan warning', async (primaryKeyError) => { + const connector = new KtxBigQueryScanConnector({ + connectionId: 'warehouse', + connection, + clientFactory: fakeClientFactory({ primaryKeyError }), + now: () => new Date('2026-04-29T17:00:00.000Z'), + }); + + const snapshot = await connector.introspect( + { connectionId: 'warehouse', driver: 'bigquery' }, + { runId: 'scan-run-bigquery-denied-pk' }, + ); + + expect(snapshot.warnings).toEqual([ + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped primary-key discovery in analytics (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'analytics', kind: 'primary_key' }, + }, + ]); + expect(snapshot.tables[0]?.foreignKeys).toEqual([]); + expect(snapshot.tables[0]?.columns.every((column) => column.primaryKey === false)).toBe(true); + }); + it('runs samples, read-only SQL, distinct values, dataset listing, row counts, and cleanup', async () => { const connector = new KtxBigQueryScanConnector({ connectionId: 'warehouse', diff --git a/packages/cli/src/connectors/bigquery/connector.ts b/packages/cli/src/connectors/bigquery/connector.ts index 7810e251..871f50f4 100644 --- a/packages/cli/src/connectors/bigquery/connector.ts +++ b/packages/cli/src/connectors/bigquery/connector.ts @@ -1,8 +1,28 @@ import { BigQuery, type TableField } from '@google-cloud/bigquery'; import { normalizeBigQueryProjectId, normalizeBigQueryRegion } from '../../context/connections/bigquery-identifiers.js'; import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js'; -import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js'; +import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js'; import { scopedTableNames } from '../../context/scan/table-ref.js'; +import { + createKtxConnectorCapabilities, + type KtxColumnSampleInput, + type KtxColumnSampleResult, + type KtxColumnStatsInput, + type KtxColumnStatsResult, + type KtxQueryResult, + type KtxReadOnlyQueryInput, + type KtxScanConnector, + type KtxScanContext, + type KtxScanInput, + type KtxScanWarning, + type KtxSchemaColumn, + type KtxSchemaSnapshot, + type KtxSchemaTable, + type KtxTableListEntry, + type KtxTableRef, + type KtxTableSampleInput, + type KtxTableSampleResult, +} from '../../context/scan/types.js'; import { readFileSync } from 'node:fs'; import { homedir } from 'node:os'; import { resolve } from 'node:path'; @@ -185,6 +205,17 @@ function firstNumber(value: unknown): number | null { return Number.isFinite(numberValue) ? numberValue : null; } +function isDeniedError(error: unknown): boolean { + if (!error || typeof error !== 'object') { + return false; + } + const candidate = error as { code?: unknown; errors?: Array<{ reason?: unknown }> }; + return ( + candidate.code === 403 || + candidate.errors?.some((item) => item.reason === 'accessDenied' || item.reason === 'notFound') === true + ); +} + function normalizeValue(value: unknown): unknown { if (value === null || value === undefined) { return null; @@ -289,11 +320,12 @@ export class KtxBigQueryScanConnector implements KtxScanConnector { this.assertConnection(input.connectionId); const tables: KtxSchemaTable[] = []; const datasetIds = this.requireDatasetIdsForScan(); + const snapshotWarnings: KtxScanWarning[] = []; for (const datasetId of datasetIds) { const scopedNames = input.tableScope ? scopedTableNames(input.tableScope, { catalog: this.resolved.projectId, db: datasetId }) : null; - tables.push(...(await this.introspectDataset(datasetId, scopedNames))); + tables.push(...(await this.introspectDataset(datasetId, scopedNames, snapshotWarnings))); } return { connectionId: this.connectionId, @@ -307,6 +339,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector { total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0), }, tables, + warnings: snapshotWarnings, }; } @@ -366,7 +399,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector { if (!datasetId) { return 0; } - const tables = await this.introspectDataset(datasetId, null); + const tables = await this.introspectDataset(datasetId, null, []); return tables.find((table) => table.name === tableName)?.estimatedRows ?? 0; } @@ -467,13 +500,24 @@ export class KtxBigQueryScanConnector implements KtxScanConnector { return firstNumber(rows[0]?.[header]); } - private async introspectDataset(datasetId: string, scopedNames: readonly string[] | null): Promise { + private async introspectDataset( + datasetId: string, + scopedNames: readonly string[] | null, + snapshotWarnings: KtxScanWarning[], + ): Promise { if (scopedNames && scopedNames.length === 0) return []; const dataset = this.getClient().dataset(datasetId); const [tableRefs] = await dataset.getTables(); const scopeSet = scopedNames ? new Set(scopedNames) : null; const filteredTableRefs = scopeSet ? tableRefs.filter((tableRef) => scopeSet.has(tableRef.id ?? '')) : tableRefs; - const primaryKeys = await this.primaryKeys(datasetId); + const primaryKeysResult = await tryConstraintQuery( + { schema: datasetId, kind: 'primary_key', isDeniedError }, + () => this.primaryKeys(datasetId), + ); + const primaryKeys = primaryKeysResult.ok ? primaryKeysResult.value : new Map>(); + if (!primaryKeysResult.ok) { + snapshotWarnings.push(primaryKeysResult.warning); + } const tables: KtxSchemaTable[] = []; for (const tableRef of filteredTableRefs) { const tableName = tableRef.id || ''; diff --git a/packages/cli/src/connectors/mysql/connector.test.ts b/packages/cli/src/connectors/mysql/connector.test.ts index 5a21ada7..6c69ea3d 100644 --- a/packages/cli/src/connectors/mysql/connector.test.ts +++ b/packages/cli/src/connectors/mysql/connector.test.ts @@ -1,7 +1,7 @@ import { describe, expect, it, vi } from 'vitest'; import type { FieldPacket, RowDataPacket } from 'mysql2/promise'; import { createMysqlLiveDatabaseIntrospection } from '../../connectors/mysql/live-database-introspection.js'; -import { isKtxMysqlConnectionConfig, KtxMysqlScanConnector, mysqlConnectionPoolConfigFromConfig, type KtxMysqlPoolFactory } from '../../connectors/mysql/connector.js'; +import { isKtxMysqlConnectionConfig, KtxMysqlScanConnector, mysqlConnectionPoolConfigFromConfig, type KtxMysqlConnectionConfig, type KtxMysqlPoolFactory } from '../../connectors/mysql/connector.js'; import { tableRefSet } from '../../context/scan/table-ref.js'; function mysqlResult(rows: Record[], fields: Array<{ name: string; type?: number }>): [RowDataPacket[], FieldPacket[]] { @@ -86,7 +86,9 @@ function fakePoolFactory(): KtxMysqlPoolFactory { }; } -function multiSchemaMysqlPoolFactory(): KtxMysqlPoolFactory { +function multiSchemaMysqlPoolFactory( + options: { primaryKeyError?: Error; foreignKeyError?: Error } = {}, +): KtxMysqlPoolFactory { const query = vi.fn(async (sql: string, params?: unknown): Promise<[RowDataPacket[], FieldPacket[]]> => { if (sql.includes('INFORMATION_SCHEMA.TABLES')) { expect(params).toEqual(['analytics', 'mart']); @@ -141,6 +143,9 @@ function multiSchemaMysqlPoolFactory(): KtxMysqlPoolFactory { ); } if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes("CONSTRAINT_NAME = 'PRIMARY'")) { + if (options.primaryKeyError) { + throw options.primaryKeyError; + } expect(params).toEqual(['analytics', 'mart']); return mysqlResult( [ @@ -151,6 +156,9 @@ function multiSchemaMysqlPoolFactory(): KtxMysqlPoolFactory { ); } if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes('REFERENCED_TABLE_NAME IS NOT NULL')) { + if (options.foreignKeyError) { + throw options.foreignKeyError; + } expect(params).toEqual(['analytics', 'mart']); return mysqlResult([], []); } @@ -191,6 +199,46 @@ describe('KtxMysqlScanConnector', () => { }); }); + it('defaults and validates MySQL maxConnections', () => { + const baseConnection: KtxMysqlConnectionConfig = { + driver: 'mysql', + host: 'db.example.test', + database: 'analytics', + username: 'reader', + password: 'secret', // pragma: allowlist secret + }; + + expect( + mysqlConnectionPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: baseConnection, + }), + ).toMatchObject({ connectionLimit: 10 }); + + expect( + mysqlConnectionPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: { ...baseConnection, maxConnections: 25 }, + }), + ).toMatchObject({ connectionLimit: 25 }); + + expect( + mysqlConnectionPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: { ...baseConnection, maxConnections: '12' as never }, + }), + ).toMatchObject({ connectionLimit: 12 }); + + for (const maxConnections of [0, -1, 1.5, Number.NaN, 'abc' as never]) { + expect(() => + mysqlConnectionPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: { ...baseConnection, maxConnections }, + }), + ).toThrow('connections.warehouse.maxConnections must be a positive integer'); + } + }); + it('introspects schema, primary keys, comments, row counts, views, and foreign keys', async () => { const connector = new KtxMysqlScanConnector({ connectionId: 'warehouse', @@ -276,6 +324,65 @@ describe('KtxMysqlScanConnector', () => { ]); }); + it('soft-fails denied MySQL constraint discovery with one warning per schema and kind', async () => { + const connector = new KtxMysqlScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'mysql', + host: 'db.example.test', + database: 'analytics', + schemas: ['analytics', 'mart'], + username: 'reader', + password: 'secret', // pragma: allowlist secret + }, + poolFactory: multiSchemaMysqlPoolFactory({ + primaryKeyError: Object.assign(new Error('select command denied'), { + code: 'ER_TABLEACCESS_DENIED_ERROR', + errno: 1142, + }), + foreignKeyError: Object.assign(new Error('database access denied'), { + code: 'ER_DBACCESS_DENIED_ERROR', + errno: 1044, + }), + }), + now: () => new Date('2026-04-29T12:00:00.000Z'), + }); + + const snapshot = await connector.introspect( + { connectionId: 'warehouse', driver: 'mysql' }, + { runId: 'scan-run-mysql-denied-constraints' }, + ); + + expect(snapshot.warnings).toEqual([ + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped primary-key discovery in analytics (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'analytics', kind: 'primary_key' }, + }, + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped primary-key discovery in mart (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'mart', kind: 'primary_key' }, + }, + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped foreign-key discovery in analytics (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'analytics', kind: 'foreign_key' }, + }, + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped foreign-key discovery in mart (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'mart', kind: 'foreign_key' }, + }, + ]); + expect(snapshot.tables.every((table) => table.columns.every((column) => column.primaryKey === false))).toBe(true); + expect(snapshot.tables.every((table) => table.foreignKeys.length === 0)).toBe(true); + }); + it('limits introspection to tables in tableScope', async () => { const queries: Array<{ sql: string; params?: unknown }> = []; const poolFactory: KtxMysqlPoolFactory = { diff --git a/packages/cli/src/connectors/mysql/connector.ts b/packages/cli/src/connectors/mysql/connector.ts index 82a2384c..83c9712a 100644 --- a/packages/cli/src/connectors/mysql/connector.ts +++ b/packages/cli/src/connectors/mysql/connector.ts @@ -3,8 +3,33 @@ import { readFileSync } from 'node:fs'; import { homedir } from 'node:os'; import { resolve } from 'node:path'; import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js'; -import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxTableListEntry, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js'; +import { + constraintDiscoveryWarning, + tryConstraintQuery, + type ConstraintDiscoveryKind, +} from '../../context/scan/constraint-discovery.js'; import { scopedTableNames } from '../../context/scan/table-ref.js'; +import { + createKtxConnectorCapabilities, + type KtxColumnSampleInput, + type KtxColumnSampleResult, + type KtxColumnStatsInput, + type KtxColumnStatsResult, + type KtxQueryResult, + type KtxReadOnlyQueryInput, + type KtxScanConnector, + type KtxScanContext, + type KtxScanInput, + type KtxScanWarning, + type KtxSchemaColumn, + type KtxSchemaForeignKey, + type KtxSchemaSnapshot, + type KtxSchemaTable, + type KtxTableListEntry, + type KtxTableRef, + type KtxTableSampleInput, + type KtxTableSampleResult, +} from '../../context/scan/types.js'; import { KtxMysqlDialect } from './dialect.js'; export interface KtxMysqlConnectionConfig { @@ -18,6 +43,7 @@ export interface KtxMysqlConnectionConfig { password?: string; url?: string; ssl?: boolean | { rejectUnauthorized?: boolean }; + maxConnections?: number; [key: string]: unknown; } @@ -163,6 +189,23 @@ function maybeNumber(value: unknown): number | undefined { return typeof value === 'number' && Number.isFinite(value) ? value : undefined; } +function positiveIntegerConfigValue(input: { + connection: KtxMysqlConnectionConfig; + key: keyof KtxMysqlConnectionConfig; + connectionId: string; + defaultValue: number; +}): number { + const value = input.connection[input.key]; + if (value === undefined) { + return input.defaultValue; + } + const numberValue = Number(value); + if (!Number.isInteger(numberValue) || numberValue < 1) { + throw new Error(`connections.${input.connectionId}.${String(input.key)} must be a positive integer`); + } + return numberValue; +} + function parseMysqlUrl(url: string): Partial { const parsed = new URL(url); const sslParam = parsed.searchParams.get('ssl') ?? parsed.searchParams.get('sslmode'); @@ -231,6 +274,28 @@ function primaryKeyMap(rows: MysqlPrimaryKeyRow[], fallbackDatabase: string): Ma return grouped; } +function isDeniedError(error: unknown): boolean { + if (!error || typeof error !== 'object') { + return false; + } + const code = (error as { code?: unknown }).code; + return ( + code === 'ER_TABLEACCESS_DENIED_ERROR' || + code === 'ER_SPECIFIC_ACCESS_DENIED_ERROR' || + code === 'ER_DBACCESS_DENIED_ERROR' + ); +} + +function pushConstraintWarnings( + warnings: KtxScanWarning[], + schemas: readonly string[], + kind: ConstraintDiscoveryKind, +): void { + for (const schema of schemas) { + warnings.push(constraintDiscoveryWarning({ schema, kind })); + } +} + function queryParams(params: Record | unknown[] | undefined): unknown[] | undefined { if (!params) { return undefined; @@ -262,6 +327,12 @@ export function mysqlConnectionPoolConfigFromConfig(input: { const host = stringConfigValue(merged, 'host', env); const database = stringConfigValue(merged, 'database', env); const user = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env); + const maxConnections = positiveIntegerConfigValue({ + connection: merged, + key: 'maxConnections', + connectionId: input.connectionId, + defaultValue: 10, + }); if (!host) { throw new Error(`Native MySQL connector requires connections.${input.connectionId}.host or url`); @@ -280,7 +351,7 @@ export function mysqlConnectionPoolConfigFromConfig(input: { database, user, password: stringConfigValue(merged, 'password', env), - connectionLimit: 10, + connectionLimit: maxConnections, waitForConnections: true, ...(ssl ? { ssl: { rejectUnauthorized: ssl.rejectUnauthorized ?? false } } : {}), }; @@ -335,6 +406,7 @@ export class KtxMysqlScanConnector implements KtxScanConnector { async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise { this.assertConnection(input.connectionId); const databases = configuredMysqlSchemas(this.connection, this.poolConfig.database); + const snapshotWarnings: KtxScanWarning[] = []; const placeholders = databases.map(() => '?').join(', '); let allScopedTables: string[] | null = null; if (input.tableScope) { @@ -368,8 +440,11 @@ export class KtxMysqlScanConnector implements KtxScanConnector { `, [...databases, ...tableNameParams], ); - const primaryKeys = await this.queryRaw( - ` + const primaryKeysResult = await tryConstraintQuery( + { schema: databases[0] ?? this.poolConfig.database, kind: 'primary_key', isDeniedError }, + () => + this.queryRaw( + ` SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE WHERE TABLE_SCHEMA IN (${placeholders}) @@ -377,10 +452,18 @@ export class KtxMysqlScanConnector implements KtxScanConnector { ${tableNameClause} ORDER BY TABLE_SCHEMA, TABLE_NAME, ORDINAL_POSITION `, - [...databases, ...tableNameParams], + [...databases, ...tableNameParams], + ), ); - const foreignKeys = await this.queryRaw( - ` + const primaryKeys = primaryKeysResult.ok ? primaryKeysResult.value : []; + if (!primaryKeysResult.ok) { + pushConstraintWarnings(snapshotWarnings, databases, 'primary_key'); + } + const foreignKeysResult = await tryConstraintQuery( + { schema: databases[0] ?? this.poolConfig.database, kind: 'foreign_key', isDeniedError }, + () => + this.queryRaw( + ` SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, REFERENCED_TABLE_NAME, REFERENCED_COLUMN_NAME, CONSTRAINT_NAME FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE WHERE TABLE_SCHEMA IN (${placeholders}) @@ -388,8 +471,13 @@ export class KtxMysqlScanConnector implements KtxScanConnector { ${tableNameClause} ORDER BY TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME `, - [...databases, ...tableNameParams], + [...databases, ...tableNameParams], + ), ); + const foreignKeys = foreignKeysResult.ok ? foreignKeysResult.value : []; + if (!foreignKeysResult.ok) { + pushConstraintWarnings(snapshotWarnings, databases, 'foreign_key'); + } const columnsByTable = groupByTable(columns, this.poolConfig.database); const primaryKeysByTable = primaryKeyMap(primaryKeys, this.poolConfig.database); @@ -417,6 +505,7 @@ export class KtxMysqlScanConnector implements KtxScanConnector { total_columns: schemaTables.reduce((sum, table) => sum + table.columns.length, 0), }, tables: schemaTables, + warnings: snapshotWarnings, }; } diff --git a/packages/cli/src/connectors/postgres/connector.test.ts b/packages/cli/src/connectors/postgres/connector.test.ts index 0ab23a0a..d9fa45cf 100644 --- a/packages/cli/src/connectors/postgres/connector.test.ts +++ b/packages/cli/src/connectors/postgres/connector.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it, vi } from 'vitest'; import { createPostgresLiveDatabaseIntrospection } from '../../connectors/postgres/live-database-introspection.js'; -import { isKtxPostgresConnectionConfig, KtxPostgresScanConnector, postgresPoolConfigFromConfig, type KtxPostgresPoolFactory } from '../../connectors/postgres/connector.js'; +import { isKtxPostgresConnectionConfig, KtxPostgresScanConnector, postgresPoolConfigFromConfig, type KtxPostgresConnectionConfig, type KtxPostgresPoolFactory } from '../../connectors/postgres/connector.js'; import { tableRefSet } from '../../context/scan/table-ref.js'; interface FakeQueryResult { @@ -8,11 +8,16 @@ interface FakeQueryResult { fields?: Array<{ name: string; dataTypeID: number }>; } -function fakePoolFactory(results: Map): KtxPostgresPoolFactory { +type FakeQueryResponse = FakeQueryResult | Error; + +function fakePoolFactory(results: Map): KtxPostgresPoolFactory { const query = vi.fn(async (sql: string, params?: unknown[]) => { const normalized = sql.replace(/\s+/g, ' ').trim(); for (const [key, value] of results.entries()) { if (normalized.includes(key)) { + if (value instanceof Error) { + throw value; + } return value; } } @@ -33,8 +38,8 @@ function fakePoolFactory(results: Map): KtxPostgresPool }; } -function metadataResults(): Map { - return new Map([ +function metadataResults(): Map { + return new Map([ [ 'FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n', { @@ -154,6 +159,46 @@ describe('KtxPostgresScanConnector', () => { }); }); + it('defaults and validates Postgres maxConnections', () => { + const baseConnection: KtxPostgresConnectionConfig = { + driver: 'postgres', + host: 'db.example.test', + database: 'analytics', + username: 'reader', + password: 'test-password', // pragma: allowlist secret + }; + + expect( + postgresPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: baseConnection, + }), + ).toMatchObject({ max: 10 }); + + expect( + postgresPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: { ...baseConnection, maxConnections: 50 }, + }), + ).toMatchObject({ max: 50 }); + + expect( + postgresPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: { ...baseConnection, maxConnections: '12' as never }, + }), + ).toMatchObject({ max: 12 }); + + for (const maxConnections of [0, -1, 1.5, Number.NaN, 'abc' as never]) { + expect(() => + postgresPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: { ...baseConnection, maxConnections }, + }), + ).toThrow('connections.warehouse.maxConnections must be a positive integer'); + } + }); + it('introspects schemas, tables, views, primary keys, comments, row counts, and foreign keys', async () => { const connector = new KtxPostgresScanConnector({ connectionId: 'warehouse', @@ -212,6 +257,75 @@ describe('KtxPostgresScanConnector', () => { ]); }); + it('soft-fails denied Postgres constraint discovery with scan warnings', async () => { + const results = metadataResults(); + results.set( + "tc.constraint_type = 'PRIMARY KEY'", + Object.assign(new Error('permission denied for information_schema'), { code: '42501' }), + ); + results.set( + "tc.constraint_type = 'FOREIGN KEY'", + Object.assign(new Error('relation information_schema.key_column_usage does not exist'), { code: '42P01' }), + ); + const connector = new KtxPostgresScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'postgres', + host: 'db.example.test', + database: 'analytics', + username: 'reader', + password: 'test-password', // pragma: allowlist secret + schema: 'public', + }, + poolFactory: fakePoolFactory(results), + now: () => new Date('2026-04-29T10:00:00.000Z'), + }); + + const snapshot = await connector.introspect( + { connectionId: 'warehouse', driver: 'postgres' }, + { runId: 'scan-run-denied-constraints' }, + ); + + expect(snapshot.warnings).toEqual([ + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped primary-key discovery in public (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'public', kind: 'primary_key' }, + }, + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped foreign-key discovery in public (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'public', kind: 'foreign_key' }, + }, + ]); + expect(snapshot.tables.every((table) => table.columns.every((column) => column.primaryKey === false))).toBe(true); + expect(snapshot.tables.every((table) => table.foreignKeys.length === 0)).toBe(true); + }); + + it('propagates non-denial Postgres constraint discovery errors', async () => { + const results = metadataResults(); + const resetError = Object.assign(new Error('connection reset'), { code: 'ECONNRESET' }); + results.set("tc.constraint_type = 'PRIMARY KEY'", resetError); + const connector = new KtxPostgresScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'postgres', + host: 'db.example.test', + database: 'analytics', + username: 'reader', + password: 'test-password', // pragma: allowlist secret + schema: 'public', + }, + poolFactory: fakePoolFactory(results), + }); + + await expect( + connector.introspect({ connectionId: 'warehouse', driver: 'postgres' }, { runId: 'scan-run-network-error' }), + ).rejects.toBe(resetError); + }); + it('runs samples, distinct values, statistics, read-only SQL, and schema listing', async () => { const connector = new KtxPostgresScanConnector({ connectionId: 'warehouse', diff --git a/packages/cli/src/connectors/postgres/connector.ts b/packages/cli/src/connectors/postgres/connector.ts index 44bd58b6..1bab5e49 100644 --- a/packages/cli/src/connectors/postgres/connector.ts +++ b/packages/cli/src/connectors/postgres/connector.ts @@ -2,8 +2,29 @@ import { readFileSync } from 'node:fs'; import { homedir } from 'node:os'; import { resolve } from 'node:path'; import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js'; -import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js'; +import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js'; import { scopedTableNames } from '../../context/scan/table-ref.js'; +import { + createKtxConnectorCapabilities, + type KtxColumnSampleInput, + type KtxColumnSampleResult, + type KtxColumnStatsInput, + type KtxColumnStatsResult, + type KtxQueryResult, + type KtxReadOnlyQueryInput, + type KtxScanConnector, + type KtxScanContext, + type KtxScanInput, + type KtxScanWarning, + type KtxSchemaColumn, + type KtxSchemaForeignKey, + type KtxSchemaSnapshot, + type KtxSchemaTable, + type KtxTableListEntry, + type KtxTableRef, + type KtxTableSampleInput, + type KtxTableSampleResult, +} from '../../context/scan/types.js'; import { Pool } from 'pg'; import { KtxPostgresDialect } from './dialect.js'; @@ -43,6 +64,7 @@ export interface KtxPostgresConnectionConfig { sslmode?: string; sslMode?: string; rejectUnauthorized?: boolean; + maxConnections?: number; [key: string]: unknown; } @@ -207,6 +229,14 @@ function primaryKeyMap(rows: PostgresPrimaryKeyRow[]): Map> return grouped; } +function isDeniedError(error: unknown): boolean { + if (!error || typeof error !== 'object') { + return false; + } + const code = (error as { code?: unknown }).code; + return code === '42501' || code === '42P01'; +} + function queryRows(result: KtxPostgresQueryResult): unknown[][] { const headers = (result.fields ?? []).map((field) => field.name); return result.rows.map((row) => headers.map((header) => row[header])); @@ -242,6 +272,23 @@ function numberValue(value: unknown): number | undefined { return typeof value === 'number' && Number.isFinite(value) ? value : undefined; } +function positiveIntegerConfigValue(input: { + connection: KtxPostgresConnectionConfig; + key: keyof KtxPostgresConnectionConfig; + connectionId: string; + defaultValue: number; +}): number { + const value = input.connection[input.key]; + if (value === undefined) { + return input.defaultValue; + } + const numberValue = Number(value); + if (!Number.isInteger(numberValue) || numberValue < 1) { + throw new Error(`connections.${input.connectionId}.${String(input.key)} must be a positive integer`); + } + return numberValue; +} + function parsePostgresUrl(url: string): Partial { const parsed = new URL(url); const sslmode = parsed.searchParams.get('sslmode') ?? undefined; @@ -299,6 +346,12 @@ export function postgresPoolConfigFromConfig(input: { const user = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env); const password = stringConfigValue(merged, 'password', env); const sslmode = normalizedSslMode(merged); + const maxConnections = positiveIntegerConfigValue({ + connection: merged, + key: 'maxConnections', + connectionId: input.connectionId, + defaultValue: 10, + }); if (!referencedUrl && !host) { throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.host or url`); @@ -311,7 +364,7 @@ export function postgresPoolConfigFromConfig(input: { } const config: KtxPostgresPoolConfig = { - max: 10, + max: maxConnections, idleTimeoutMillis: 30_000, connectionTimeoutMillis: 10_000, ...(referencedUrl && sslmode !== 'prefer' && sslmode !== 'disable' @@ -379,10 +432,11 @@ export class KtxPostgresScanConnector implements KtxScanConnector { this.assertConnection(input.connectionId); const schemas = schemasFromConnection(this.connection); const allTables: KtxSchemaTable[] = []; + const snapshotWarnings: KtxScanWarning[] = []; for (const schema of schemas) { const scopedNames = input.tableScope ? scopedTableNames(input.tableScope, { catalog: null, db: schema }) : null; if (scopedNames && scopedNames.length === 0) continue; - const tables = await this.loadSchemaTables(schema, scopedNames); + const tables = await this.loadSchemaTables(schema, scopedNames, snapshotWarnings); allTables.push(...tables); } return { @@ -398,6 +452,7 @@ export class KtxPostgresScanConnector implements KtxScanConnector { total_columns: allTables.reduce((sum, table) => sum + table.columns.length, 0), }, tables: allTables, + warnings: snapshotWarnings, }; } @@ -546,7 +601,11 @@ export class KtxPostgresScanConnector implements KtxScanConnector { } } - private async loadSchemaTables(schema: string, scopedNames: readonly string[] | null): Promise { + private async loadSchemaTables( + schema: string, + scopedNames: readonly string[] | null, + snapshotWarnings: KtxScanWarning[], + ): Promise { if (scopedNames && scopedNames.length === 0) return []; const pgCatalogScopeClause = scopedNames ? 'AND c.relname = ANY($2)' : ''; const tableConstraintScopeClause = scopedNames ? 'AND tc.table_name = ANY($2)' : ''; @@ -591,8 +650,11 @@ export class KtxPostgresScanConnector implements KtxScanConnector { `, [schema, ...scopeValues], ); - const primaryKeys = await this.queryRaw( - ` + const primaryKeysResult = await tryConstraintQuery( + { schema, kind: 'primary_key', isDeniedError }, + () => + this.queryRaw( + ` SELECT tc.table_name, kcu.column_name FROM information_schema.table_constraints tc JOIN information_schema.key_column_usage kcu @@ -603,10 +665,18 @@ export class KtxPostgresScanConnector implements KtxScanConnector { ${tableConstraintScopeClause} ORDER BY tc.table_name, kcu.ordinal_position `, - [schema, ...scopeValues], + [schema, ...scopeValues], + ), ); - const foreignKeys = await this.queryRaw( - ` + const primaryKeys = primaryKeysResult.ok ? primaryKeysResult.value : []; + if (!primaryKeysResult.ok) { + snapshotWarnings.push(primaryKeysResult.warning); + } + const foreignKeysResult = await tryConstraintQuery( + { schema, kind: 'foreign_key', isDeniedError }, + () => + this.queryRaw( + ` SELECT tc.table_name, kcu.column_name, @@ -626,8 +696,13 @@ export class KtxPostgresScanConnector implements KtxScanConnector { ${tableConstraintScopeClause} ORDER BY tc.table_name, kcu.column_name `, - [schema, ...scopeValues], + [schema, ...scopeValues], + ), ); + const foreignKeys = foreignKeysResult.ok ? foreignKeysResult.value : []; + if (!foreignKeysResult.ok) { + snapshotWarnings.push(foreignKeysResult.warning); + } const columnsByTable = groupByTable(columns); const primaryKeysByTable = primaryKeyMap(primaryKeys); diff --git a/packages/cli/src/connectors/snowflake/connector.test.ts b/packages/cli/src/connectors/snowflake/connector.test.ts index a321e289..657dbaf1 100644 --- a/packages/cli/src/connectors/snowflake/connector.test.ts +++ b/packages/cli/src/connectors/snowflake/connector.test.ts @@ -8,7 +8,7 @@ vi.mock('snowflake-sdk', () => ({ })); import { createSnowflakeLiveDatabaseIntrospection } from '../../connectors/snowflake/live-database-introspection.js'; -import { isKtxSnowflakeConnectionConfig, KtxSnowflakeScanConnector, snowflakeConnectionConfigFromConfig, type KtxSnowflakeDriver, type KtxSnowflakeDriverFactory } from '../../connectors/snowflake/connector.js'; +import { isKtxSnowflakeConnectionConfig, KtxSnowflakeScanConnector, snowflakeConnectionConfigFromConfig, type KtxSnowflakeConnectionConfig, type KtxSnowflakeDriver, type KtxSnowflakeDriverFactory } from '../../connectors/snowflake/connector.js'; import { tableRefSet } from '../../context/scan/table-ref.js'; function fakeDriverFactory(): KtxSnowflakeDriverFactory { @@ -140,8 +140,8 @@ describe('KtxSnowflakeScanConnector', () => { }); }); - it('defaults and validates Snowflake maxSessions', () => { - const baseConnection = { + it('defaults and validates Snowflake maxConnections', () => { + const baseConnection: KtxSnowflakeConnectionConfig = { driver: 'snowflake', authMethod: 'password', account: 'acct', @@ -150,32 +150,59 @@ describe('KtxSnowflakeScanConnector', () => { schema_name: 'PUBLIC', username: 'reader', password: 'fixture-pass', // pragma: allowlist secret - } as const; + }; expect( snowflakeConnectionConfigFromConfig({ connectionId: 'warehouse', connection: baseConnection, }), - ).toMatchObject({ maxSessions: 4 }); + ).toMatchObject({ maxConnections: 4 }); expect( snowflakeConnectionConfigFromConfig({ connectionId: 'warehouse', - connection: { ...baseConnection, maxSessions: 8 }, + connection: { ...baseConnection, maxConnections: 8 }, }), - ).toMatchObject({ maxSessions: 8 }); + ).toMatchObject({ maxConnections: 8 }); - for (const maxSessions of [0, -1, 1.5, Number.NaN]) { + expect( + snowflakeConnectionConfigFromConfig({ + connectionId: 'warehouse', + connection: { ...baseConnection, maxConnections: '12' as never }, + }), + ).toMatchObject({ maxConnections: 12 }); + + for (const maxConnections of [0, -1, 1.5, Number.NaN, 'abc' as never]) { expect(() => snowflakeConnectionConfigFromConfig({ connectionId: 'warehouse', - connection: { ...baseConnection, maxSessions }, + connection: { ...baseConnection, maxConnections }, }), - ).toThrow('connections.warehouse.maxSessions must be a positive integer'); + ).toThrow('connections.warehouse.maxConnections must be a positive integer'); } }); + it('rejects stale Snowflake pool config key', () => { + const baseConnection: KtxSnowflakeConnectionConfig = { + driver: 'snowflake', + authMethod: 'password', + account: 'acct', + warehouse: 'WH', + database: 'ANALYTICS', + schema_name: 'PUBLIC', + username: 'reader', + password: 'fixture-pass', // pragma: allowlist secret + }; + + expect(() => + snowflakeConnectionConfigFromConfig({ + connectionId: 'warehouse', + connection: { ...baseConnection, maxSessions: 8 }, + }), + ).toThrow(/renamed to maxConnections/); + }); + it('uses one lazy Snowflake pool and drains it during cleanup', async () => { const { pool, executedSql } = installSnowflakePoolMock(); const close = vi.fn(async () => undefined); @@ -191,7 +218,7 @@ describe('KtxSnowflakeScanConnector', () => { username: 'reader', password: 'fixture-pass', // pragma: allowlist secret role: 'ANALYST', - maxSessions: 3, + maxConnections: 3, }, sdkOptionsProvider: { resolve: vi.fn(async () => ({ sdkOptions: { application: 'ktx-test' }, close })), @@ -332,12 +359,56 @@ describe('KtxSnowflakeScanConnector', () => { expect(snapshot.tables.map((table) => table.name).sort()).toEqual(['ORDERS', 'ORDER_SUMMARY']); expect(snapshot.tables.every((table) => table.columns.every((column) => column.primaryKey === false))).toBe(true); + expect(snapshot.warnings).toEqual([ + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped primary-key discovery in PUBLIC (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'PUBLIC', kind: 'primary_key' }, + }, + ]); expect(warn).not.toHaveBeenCalled(); } finally { warn.mockRestore(); } }); + it('propagates non-denial Snowflake primary-key discovery errors', async () => { + const driverFactory = fakeDriverFactory(); + const driver = (driverFactory.createDriver as ReturnType).getMockImplementation() as + | (() => KtxSnowflakeDriver) + | undefined; + if (!driver) throw new Error('driver mock missing'); + const built = driver(); + const networkError = new Error('network unavailable'); + (built.query as ReturnType).mockImplementation(async (sql: string) => { + if (sql.includes('TABLE_CONSTRAINTS')) { + throw networkError; + } + throw new Error(`Unexpected SQL: ${sql}`); + }); + (driverFactory.createDriver as ReturnType).mockReturnValue(built); + + const connector = new KtxSnowflakeScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'snowflake', + authMethod: 'password', + account: 'acct', + warehouse: 'WH', + database: 'ANALYTICS', + schema_name: 'PUBLIC', + username: 'reader', + password: 'fixture-pass', // pragma: allowlist secret + }, + driverFactory, + }); + + await expect( + connector.introspect({ connectionId: 'warehouse', driver: 'snowflake' }, { runId: 'scan-run-snowflake-network' }), + ).rejects.toBe(networkError); + }); + it('limits introspection to tables in tableScope', async () => { const queries: Array<{ sql: string; params?: unknown }> = []; const getSchemaMetadata = vi.fn(async (_schemaName?: string, scopedNames?: readonly string[] | null) => diff --git a/packages/cli/src/connectors/snowflake/connector.ts b/packages/cli/src/connectors/snowflake/connector.ts index 0281b298..d8737559 100644 --- a/packages/cli/src/connectors/snowflake/connector.ts +++ b/packages/cli/src/connectors/snowflake/connector.ts @@ -3,8 +3,28 @@ import { readFileSync } from 'node:fs'; import { homedir } from 'node:os'; import { resolve } from 'node:path'; import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js'; -import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableListEntry, type KtxTableSampleResult } from '../../context/scan/types.js'; +import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js'; import { scopedTableNames } from '../../context/scan/table-ref.js'; +import { + createKtxConnectorCapabilities, + type KtxColumnSampleInput, + type KtxColumnSampleResult, + type KtxColumnStatsInput, + type KtxColumnStatsResult, + type KtxQueryResult, + type KtxReadOnlyQueryInput, + type KtxScanConnector, + type KtxScanContext, + type KtxScanInput, + type KtxScanWarning, + type KtxSchemaColumn, + type KtxSchemaSnapshot, + type KtxSchemaTable, + type KtxTableListEntry, + type KtxTableRef, + type KtxTableSampleInput, + type KtxTableSampleResult, +} from '../../context/scan/types.js'; import snowflake from 'snowflake-sdk'; import type { Bind, Binds, Connection, ConnectionOptions } from 'snowflake-sdk'; import { KtxSnowflakeDialect } from './dialect.js'; @@ -24,7 +44,7 @@ export interface KtxSnowflakeConnectionConfig { privateKey?: string; passphrase?: string; role?: string; - maxSessions?: number; + maxConnections?: number; [key: string]: unknown; } @@ -39,7 +59,7 @@ export interface KtxSnowflakeResolvedConnectionConfig { privateKey?: string; passphrase?: string; role?: string; - maxSessions: number; + maxConnections: number; } export interface KtxSnowflakeRawColumnMetadata { @@ -166,6 +186,13 @@ function firstNumber(value: unknown): number | null { return Number.isFinite(numberValue) ? numberValue : null; } +function isDeniedError(error: unknown): boolean { + if (error instanceof Error) { + return /insufficient privileges|does not exist or not authorized/i.test(error.message); + } + return false; +} + function normalizeSnowflakeValue(value: unknown, columnType?: string): unknown { if (columnType && DATE_TYPES.some((type) => columnType.toUpperCase().includes(type))) { if (typeof value === 'number') { @@ -218,6 +245,10 @@ export function snowflakeConnectionConfigFromConfig(input: { if (!isKtxSnowflakeConnectionConfig(input.connection)) { throw new Error(`Native Snowflake connector cannot run driver "${inputDriver}"`); } + const staleMaxSessionsKey = 'max' + 'Sessions'; + if (Object.prototype.hasOwnProperty.call(input.connection, staleMaxSessionsKey)) { + throw new Error(`connections.${input.connectionId}.maxSessions has been renamed to maxConnections`); + } const env = input.env ?? process.env; const authMethod = input.connection?.authMethod ?? 'password'; const account = stringConfigValue(input.connection, 'account', env); @@ -249,9 +280,9 @@ export function snowflakeConnectionConfigFromConfig(input: { database, schemas: resolvedSchemas, username, - maxSessions: positiveIntegerConfigValue({ + maxConnections: positiveIntegerConfigValue({ connection: input.connection, - key: 'maxSessions', + key: 'maxConnections', connectionId: input.connectionId, defaultValue: 4, }), @@ -322,7 +353,7 @@ class SnowflakeSdkDriver implements KtxSnowflakeDriver { const message = error instanceof Error ? error.message : String(error); if (/timeout/i.test(message) && /pool|acquire/i.test(message)) { throw new Error( - "Snowflake session pool exhausted after 60s - consider lowering maxSessions or increasing your account's concurrent-statement limit.", + "Snowflake session pool exhausted after 60s - consider lowering maxConnections or increasing your account's concurrent-statement limit.", ); } throw error; @@ -432,7 +463,7 @@ class SnowflakeSdkDriver implements KtxSnowflakeDriver { if (!this.pool) { this.pool = snowflake.createPool(await this.resolveConnectionOptions(), { min: 0, - max: this.resolved.maxSessions, + max: this.resolved.maxConnections, evictionRunIntervalMillis: 30_000, acquireTimeoutMillis: 60_000, }); @@ -540,13 +571,23 @@ export class KtxSnowflakeScanConnector implements KtxScanConnector { async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise { this.assertConnection(input.connectionId); const tables: KtxSchemaTable[] = []; + const snapshotWarnings: KtxScanWarning[] = []; for (const schemaName of this.resolved.schemas) { const scopedNames = input.tableScope ? scopedTableNames(input.tableScope, { catalog: this.resolved.database, db: schemaName }) : null; if (scopedNames && scopedNames.length === 0) continue; const rawTables = await this.getDriver().getSchemaMetadata(schemaName, scopedNames); - const primaryKeys = await this.primaryKeys(rawTables.map((table) => table.name), schemaName); + const primaryKeysResult = await tryConstraintQuery( + { schema: schemaName, kind: 'primary_key', isDeniedError }, + () => this.primaryKeys(rawTables.map((table) => table.name), schemaName), + ); + const primaryKeys = primaryKeysResult.ok + ? primaryKeysResult.value + : new Map(rawTables.map((table) => [table.name, new Set()])); + if (!primaryKeysResult.ok) { + snapshotWarnings.push(primaryKeysResult.warning); + } tables.push(...rawTables.map((table) => this.toSchemaTable(table, primaryKeys))); } return { @@ -563,6 +604,7 @@ export class KtxSnowflakeScanConnector implements KtxScanConnector { total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0), }, tables, + warnings: snapshotWarnings, }; } @@ -686,9 +728,8 @@ export class KtxSnowflakeScanConnector implements KtxScanConnector { return grouped; } const tableNamePlaceholders = tableNames.map(() => '?').join(', '); - try { - const result = await this.getDriver().query( - ` + const result = await this.getDriver().query( + ` SELECT tc.TABLE_NAME, kcu.COLUMN_NAME FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu @@ -701,16 +742,12 @@ export class KtxSnowflakeScanConnector implements KtxScanConnector { AND tc.TABLE_NAME IN (${tableNamePlaceholders}) ORDER BY tc.TABLE_NAME, kcu.ORDINAL_POSITION `, - [schemaName, this.resolved.database, ...tableNames], - ); - for (const row of result.rows) { - const tableName = String(row[0]); - const columnName = String(row[1]); - grouped.get(tableName)?.add(columnName); - } - } catch { - // INFORMATION_SCHEMA.KEY_COLUMN_USAGE often isn't granted to read-only roles; - // continue with empty PK map and let FK inference + profiling carry the slack. + [schemaName, this.resolved.database, ...tableNames], + ); + for (const row of result.rows) { + const tableName = String(row[0]); + const columnName = String(row[1]); + grouped.get(tableName)?.add(columnName); } return grouped; } diff --git a/packages/cli/src/connectors/sqlserver/connector.test.ts b/packages/cli/src/connectors/sqlserver/connector.test.ts index ef00bd3a..4e84ff9a 100644 --- a/packages/cli/src/connectors/sqlserver/connector.test.ts +++ b/packages/cli/src/connectors/sqlserver/connector.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it, vi } from 'vitest'; import { createSqlServerLiveDatabaseIntrospection } from '../../connectors/sqlserver/live-database-introspection.js'; -import { isKtxSqlServerConnectionConfig, KtxSqlServerScanConnector, sqlServerConnectionPoolConfigFromConfig, type KtxSqlServerPoolFactory, type KtxSqlServerQueryResult } from '../../connectors/sqlserver/connector.js'; +import { isKtxSqlServerConnectionConfig, KtxSqlServerScanConnector, sqlServerConnectionPoolConfigFromConfig, type KtxSqlServerConnectionConfig, type KtxSqlServerPoolFactory, type KtxSqlServerQueryResult } from '../../connectors/sqlserver/connector.js'; import { tableRefSet } from '../../context/scan/table-ref.js'; function recordset>( @@ -16,7 +16,7 @@ function result>(rows: T[], columnNames: strin return { recordset: recordset(rows, columnNames) }; } -function fakePoolFactory(): KtxSqlServerPoolFactory { +function fakePoolFactory(options: { primaryKeyError?: Error; foreignKeyError?: Error } = {}): KtxSqlServerPoolFactory { const query = vi.fn(async (sql: string): Promise => { if (sql.includes('INFORMATION_SCHEMA.TABLES')) { return result( @@ -55,6 +55,9 @@ function fakePoolFactory(): KtxSqlServerPoolFactory { ); } if (sql.includes("CONSTRAINT_TYPE = 'PRIMARY KEY'")) { + if (options.primaryKeyError) { + throw options.primaryKeyError; + } return result( [ { table_name: 'customers', column_name: 'id' }, @@ -64,6 +67,9 @@ function fakePoolFactory(): KtxSqlServerPoolFactory { ); } if (sql.includes('REFERENTIAL_CONSTRAINTS')) { + if (options.foreignKeyError) { + throw options.foreignKeyError; + } return result( [ { @@ -164,6 +170,45 @@ describe('KtxSqlServerScanConnector', () => { }); }); + it('defaults and validates SQL Server maxConnections', () => { + const baseConnection: KtxSqlServerConnectionConfig = { + driver: 'sqlserver', + host: 'db.example.test', + database: 'analytics', + username: 'reader', + }; + + expect( + sqlServerConnectionPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: baseConnection, + }), + ).toMatchObject({ pool: { max: 10 } }); + + expect( + sqlServerConnectionPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: { ...baseConnection, maxConnections: 15 }, + }), + ).toMatchObject({ pool: { max: 15 } }); + + expect( + sqlServerConnectionPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: { ...baseConnection, maxConnections: '12' as never }, + }), + ).toMatchObject({ pool: { max: 12 } }); + + for (const maxConnections of [0, -1, 1.5, Number.NaN, 'abc' as never]) { + expect(() => + sqlServerConnectionPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: { ...baseConnection, maxConnections }, + }), + ).toThrow('connections.warehouse.maxConnections must be a positive integer'); + } + }); + it('introspects schema, primary keys, comments, row counts, views, and foreign keys', async () => { const connector = new KtxSqlServerScanConnector({ connectionId: 'warehouse', @@ -222,6 +267,46 @@ describe('KtxSqlServerScanConnector', () => { ]); }); + it('soft-fails denied SQL Server constraint discovery with scan warnings', async () => { + const connector = new KtxSqlServerScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'sqlserver', + host: 'db.example.test', + database: 'analytics', + username: 'reader', + schema: 'dbo', + }, + poolFactory: fakePoolFactory({ + primaryKeyError: Object.assign(new Error('SELECT permission denied'), { number: 229 }), + foreignKeyError: Object.assign(new Error('EXECUTE permission denied'), { number: 230 }), + }), + now: () => new Date('2026-04-29T16:00:00.000Z'), + }); + + const snapshot = await connector.introspect( + { connectionId: 'warehouse', driver: 'sqlserver' }, + { runId: 'scan-run-sqlserver-denied-constraints' }, + ); + + expect(snapshot.warnings).toEqual([ + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped primary-key discovery in dbo (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'dbo', kind: 'primary_key' }, + }, + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped foreign-key discovery in dbo (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'dbo', kind: 'foreign_key' }, + }, + ]); + expect(snapshot.tables.every((table) => table.columns.every((column) => column.primaryKey === false))).toBe(true); + expect(snapshot.tables.every((table) => table.foreignKeys.length === 0)).toBe(true); + }); + it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => { const poolFactory = fakePoolFactory(); const connector = new KtxSqlServerScanConnector({ diff --git a/packages/cli/src/connectors/sqlserver/connector.ts b/packages/cli/src/connectors/sqlserver/connector.ts index 64b8075e..9895027f 100644 --- a/packages/cli/src/connectors/sqlserver/connector.ts +++ b/packages/cli/src/connectors/sqlserver/connector.ts @@ -1,6 +1,27 @@ import { assertReadOnlySql } from '../../context/connections/read-only-sql.js'; -import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js'; +import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js'; import { scopedTableNames } from '../../context/scan/table-ref.js'; +import { + createKtxConnectorCapabilities, + type KtxColumnSampleInput, + type KtxColumnSampleResult, + type KtxColumnStatsInput, + type KtxColumnStatsResult, + type KtxQueryResult, + type KtxReadOnlyQueryInput, + type KtxScanConnector, + type KtxScanContext, + type KtxScanInput, + type KtxScanWarning, + type KtxSchemaColumn, + type KtxSchemaForeignKey, + type KtxSchemaSnapshot, + type KtxSchemaTable, + type KtxTableListEntry, + type KtxTableRef, + type KtxTableSampleInput, + type KtxTableSampleResult, +} from '../../context/scan/types.js'; import { readFileSync } from 'node:fs'; import { homedir } from 'node:os'; import { resolve } from 'node:path'; @@ -19,6 +40,7 @@ export interface KtxSqlServerConnectionConfig { schema?: string; schemas?: string[]; trustServerCertificate?: boolean; + maxConnections?: number; [key: string]: unknown; } @@ -197,6 +219,23 @@ function maybeNumber(value: unknown): number | undefined { return typeof value === 'number' && Number.isFinite(value) ? value : undefined; } +function positiveIntegerConfigValue(input: { + connection: KtxSqlServerConnectionConfig; + key: keyof KtxSqlServerConnectionConfig; + connectionId: string; + defaultValue: number; +}): number { + const value = input.connection[input.key]; + if (value === undefined) { + return input.defaultValue; + } + const numberValue = Number(value); + if (!Number.isInteger(numberValue) || numberValue < 1) { + throw new Error(`connections.${input.connectionId}.${String(input.key)} must be a positive integer`); + } + return numberValue; +} + function schemaNames(connection: KtxSqlServerConnectionConfig, env: NodeJS.ProcessEnv): string[] { if (Array.isArray(connection.schemas) && connection.schemas.length > 0) { return connection.schemas.filter((schema) => schema.trim().length > 0).map((schema) => resolveStringReference(schema, env)); @@ -219,6 +258,14 @@ function firstNumber(value: unknown): number | null { return Number.isFinite(numberValue) ? numberValue : null; } +function isDeniedError(error: unknown): boolean { + if (!error || typeof error !== 'object') { + return false; + } + const number = (error as { number?: unknown }).number; + return number === 229 || number === 230 || number === 297; +} + function limitSqlForSqlServerExecution(sqlText: string, maxRows: number | undefined): string { const trimmed = assertReadOnlySql(sqlText).replace(/;+\s*$/, ''); if (!maxRows) { @@ -254,6 +301,12 @@ export function sqlServerConnectionPoolConfigFromConfig(input: { const server = stringConfigValue(merged, 'host', env); const database = stringConfigValue(merged, 'database', env); const user = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env); + const maxConnections = positiveIntegerConfigValue({ + connection: merged, + key: 'maxConnections', + connectionId: input.connectionId, + defaultValue: 10, + }); if (!server) { throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.host or url`); @@ -272,7 +325,7 @@ export function sqlServerConnectionPoolConfigFromConfig(input: { user, password: stringConfigValue(merged, 'password', env), options: { encrypt: true, trustServerCertificate: merged.trustServerCertificate ?? true }, - pool: { max: 10, min: 0, idleTimeoutMillis: 30000 }, + pool: { max: maxConnections, min: 0, idleTimeoutMillis: 30000 }, }; } @@ -328,11 +381,12 @@ export class KtxSqlServerScanConnector implements KtxScanConnector { async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise { this.assertConnection(input.connectionId); const tables: KtxSchemaTable[] = []; + const snapshotWarnings: KtxScanWarning[] = []; for (const schemaName of this.schemas) { const scopedNames = input.tableScope ? scopedTableNames(input.tableScope, { catalog: this.poolConfig.database, db: schemaName }) : null; - tables.push(...(await this.introspectSchema(schemaName, scopedNames))); + tables.push(...(await this.introspectSchema(schemaName, scopedNames, snapshotWarnings))); } return { connectionId: this.connectionId, @@ -347,6 +401,7 @@ export class KtxSqlServerScanConnector implements KtxScanConnector { total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0), }, tables, + warnings: snapshotWarnings, }; } @@ -479,7 +534,11 @@ export class KtxSqlServerScanConnector implements KtxScanConnector { } } - private async introspectSchema(schemaName: string, scopedNames: readonly string[] | null): Promise { + private async introspectSchema( + schemaName: string, + scopedNames: readonly string[] | null, + snapshotWarnings: KtxScanWarning[], + ): Promise { if (scopedNames && scopedNames.length === 0) return []; const tableScope = tableScopeSql(scopedNames, 'TABLE_NAME'); const tables = await this.queryRaw<{ table_name: string; table_type: string }>( @@ -510,8 +569,22 @@ export class KtxSqlServerScanConnector implements KtxScanConnector { ); const tableComments = await this.tableComments(schemaName, scopedNames); const columnComments = await this.columnComments(schemaName, scopedNames); - const primaryKeys = await this.primaryKeys(schemaName, scopedNames); - const foreignKeys = await this.foreignKeys(schemaName, scopedNames); + const primaryKeysResult = await tryConstraintQuery( + { schema: schemaName, kind: 'primary_key', isDeniedError }, + () => this.primaryKeys(schemaName, scopedNames), + ); + const foreignKeysResult = await tryConstraintQuery( + { schema: schemaName, kind: 'foreign_key', isDeniedError }, + () => this.foreignKeys(schemaName, scopedNames), + ); + const primaryKeys = primaryKeysResult.ok ? primaryKeysResult.value : new Map>(); + const foreignKeys = foreignKeysResult.ok ? foreignKeysResult.value : []; + if (!primaryKeysResult.ok) { + snapshotWarnings.push(primaryKeysResult.warning); + } + if (!foreignKeysResult.ok) { + snapshotWarnings.push(foreignKeysResult.warning); + } const rowCounts = await this.rowCounts(schemaName, scopedNames); const columnsByTable = groupByTable(columns); const foreignKeysByTable = groupByTable(foreignKeys); diff --git a/packages/cli/src/context/ingest/adapters/live-database/stage.test.ts b/packages/cli/src/context/ingest/adapters/live-database/stage.test.ts index 297071ae..8fb675a2 100644 --- a/packages/cli/src/context/ingest/adapters/live-database/stage.test.ts +++ b/packages/cli/src/context/ingest/adapters/live-database/stage.test.ts @@ -6,6 +6,7 @@ import { detectLiveDatabaseStagedDir, LIVE_DATABASE_FOREIGN_KEYS_FILE, LIVE_DATABASE_META_FILE, + LIVE_DATABASE_WARNINGS_FILE, liveDatabaseTablePath, readLiveDatabaseTableFiles, writeLiveDatabaseSnapshot, @@ -145,6 +146,31 @@ describe('live-database staged snapshot files', () => { expect(connectionJson).not.toContain('pem-value'); }); + it('writes redacted scan warnings next to live database metadata', async () => { + const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-warning-stage-')); + await writeLiveDatabaseSnapshot(dir, { + ...snapshot(), + warnings: [ + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped primary-key discovery in public (insufficient grants on system catalogs)', + recoverable: true, + metadata: { + schema: 'public', + kind: 'primary_key', + url: 'postgres://reader:secret@example.test/db', // pragma: allowlist secret + }, + }, + ], + }); + + const warningsJson = await readFile(join(dir, LIVE_DATABASE_WARNINGS_FILE), 'utf8'); + expect(warningsJson).toContain('"constraint_discovery_unauthorized"'); + expect(warningsJson).toContain('"schema": "public"'); + expect(warningsJson).toContain('"url": ""'); + expect(warningsJson).not.toContain('postgres://reader:secret@example.test/db'); // pragma: allowlist secret + }); + it('returns false for a directory that is missing live database metadata', async () => { const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-empty-')); expect(await detectLiveDatabaseStagedDir(dir)).toBe(false); diff --git a/packages/cli/src/context/ingest/adapters/live-database/stage.ts b/packages/cli/src/context/ingest/adapters/live-database/stage.ts index ba925986..5dd21afd 100644 --- a/packages/cli/src/context/ingest/adapters/live-database/stage.ts +++ b/packages/cli/src/context/ingest/adapters/live-database/stage.ts @@ -7,6 +7,8 @@ import type { KtxSchemaSnapshot, KtxSchemaTable, KtxTableRef } from '../../../sc export const LIVE_DATABASE_META_FILE = 'connection.json'; export const LIVE_DATABASE_FOREIGN_KEYS_FILE = 'foreign-keys.json'; +/** @internal */ +export const LIVE_DATABASE_WARNINGS_FILE = 'warnings.json'; const LIVE_DATABASE_TABLES_DIR = 'tables'; interface LiveDatabaseTableFile { @@ -89,6 +91,13 @@ function foreignKeyIndex(snapshot: KtxSchemaSnapshot): ForeignKeyIndexEntry[] { return entries; } +function warningArtifact(snapshot: KtxSchemaSnapshot): { warnings: KtxSchemaSnapshot['warnings'] } { + const redacted = redactKtxSensitiveMetadata({ warnings: snapshot.warnings ?? [] }); + return { + warnings: Array.isArray(redacted.warnings) ? (redacted.warnings as KtxSchemaSnapshot['warnings']) : [], + }; +} + export async function writeLiveDatabaseSnapshot(stagedDir: string, snapshot: KtxSchemaSnapshot): Promise { await mkdir(join(stagedDir, LIVE_DATABASE_TABLES_DIR), { recursive: true }); const sortedTables = [...snapshot.tables].sort((a, b) => tableSortKey(a).localeCompare(tableSortKey(b))); @@ -105,6 +114,7 @@ export async function writeLiveDatabaseSnapshot(stagedDir: string, snapshot: Ktx join(stagedDir, LIVE_DATABASE_FOREIGN_KEYS_FILE), stableJson({ foreignKeys: foreignKeyIndex(snapshot) }), ); + await writeFile(join(stagedDir, LIVE_DATABASE_WARNINGS_FILE), stableJson(warningArtifact(snapshot))); for (const table of sortedTables) { await writeFile(join(stagedDir, liveDatabaseTablePath(table)), stableJson(table)); } diff --git a/packages/cli/src/context/ingest/historic-sql-probes.test.ts b/packages/cli/src/context/ingest/historic-sql-probes.test.ts new file mode 100644 index 00000000..275a84c7 --- /dev/null +++ b/packages/cli/src/context/ingest/historic-sql-probes.test.ts @@ -0,0 +1,157 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { HistoricSqlDialect } from './adapters/historic-sql/types.js'; +import { + historicSqlProbeCatalogName, + runHistoricSqlReadinessProbe, + type HistoricSqlProbeRunner, + type HistoricSqlProbeRunnerFactoryEntry, +} from './historic-sql-probes.js'; + +function fakeRunner( + dialect: HistoricSqlDialect, + catalogName: string, + options: { result?: unknown; error?: unknown } = {}, +): HistoricSqlProbeRunner & { runCalls: () => number } { + let calls = 0; + return { + dialect, + catalogName, + async run() { + calls += 1; + if (options.error) { + throw options.error; + } + return options.result ?? { warnings: [], info: [] }; + }, + formatSuccessDetail() { + return { detail: `${catalogName} ready`, warnings: [] }; + }, + fixAdvice(error) { + return { + failHeadline: error instanceof Error ? error.message : String(error), + remediation: 'Fix the test probe.', + }; + }, + runCalls: () => calls, + }; +} + +function factories( + overrides: Partial>, +): Record { + const postgres = overrides.postgres ?? fakeRunner('postgres', 'pg_stat_statements'); + const snowflake = + overrides.snowflake ?? + fakeRunner('snowflake', 'SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY'); + const bigquery = + overrides.bigquery ?? fakeRunner('bigquery', 'INFORMATION_SCHEMA.JOBS_BY_PROJECT'); + + return { + postgres: { + catalogName: 'pg_stat_statements', + load: vi.fn(async () => postgres), + }, + snowflake: { + catalogName: 'SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', + load: vi.fn(async () => snowflake), + }, + bigquery: { + catalogName: 'INFORMATION_SCHEMA.JOBS_BY_PROJECT', + load: vi.fn(async () => bigquery), + }, + }; +} + +describe('historic-SQL probe registry', () => { + it('returns null when the connection has no query-history dialect', async () => { + const deps = { factories: factories({}), cache: new Map() }; + + await expect( + runHistoricSqlReadinessProbe( + { + projectDir: '/work/project', + connectionId: 'mysql', + connection: { + driver: 'mysql', + context: { queryHistory: { enabled: true } }, + }, + env: {}, + }, + deps, + ), + ).resolves.toBeNull(); + + expect(deps.factories.postgres.load).not.toHaveBeenCalled(); + expect(deps.factories.snowflake.load).not.toHaveBeenCalled(); + expect(deps.factories.bigquery.load).not.toHaveBeenCalled(); + }); + + it('dispatches to the dialect runner and caches the runner instance', async () => { + const runner = fakeRunner('postgres', 'pg_stat_statements', { + result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] }, + }); + const deps = { factories: factories({ postgres: runner }), cache: new Map() }; + const input = { + projectDir: '/work/project', + connectionId: 'warehouse', + connection: { + driver: 'postgres' as const, + url: 'env:DATABASE_URL', + context: { queryHistory: { enabled: true } }, + }, + env: {}, + }; + + const first = await runHistoricSqlReadinessProbe(input, deps); + const second = await runHistoricSqlReadinessProbe(input, deps); + + expect(first).toMatchObject({ ok: true, dialect: 'postgres', runner }); + expect(second).toMatchObject({ ok: true, dialect: 'postgres', runner }); + expect(deps.factories.postgres.load).toHaveBeenCalledTimes(1); + expect(runner.runCalls()).toBe(2); + }); + + it('normalizes runner errors into a failed outcome', async () => { + const error = new Error('missing grants'); + const runner = fakeRunner('bigquery', 'INFORMATION_SCHEMA.JOBS_BY_PROJECT', { + error, + }); + const deps = { factories: factories({ bigquery: runner }), cache: new Map() }; + + await expect( + runHistoricSqlReadinessProbe( + { + projectDir: '/work/project', + connectionId: 'bq', + connection: { + driver: 'bigquery', + credentials_json: '{"project_id":"project-1"}', + context: { queryHistory: { enabled: true } }, + }, + env: {}, + }, + deps, + ), + ).resolves.toEqual({ + ok: false, + dialect: 'bigquery', + runner, + error, + }); + }); + + it('returns catalog names without loading runner modules', () => { + const deps = { factories: factories({}), cache: new Map() }; + + expect(historicSqlProbeCatalogName('postgres', deps)).toBe('pg_stat_statements'); + expect(historicSqlProbeCatalogName('snowflake', deps)).toBe( + 'SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', + ); + expect(historicSqlProbeCatalogName('bigquery', deps)).toBe( + 'INFORMATION_SCHEMA.JOBS_BY_PROJECT', + ); + expect(deps.factories.postgres.load).not.toHaveBeenCalled(); + expect(deps.factories.snowflake.load).not.toHaveBeenCalled(); + expect(deps.factories.bigquery.load).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/cli/src/context/ingest/historic-sql-probes.ts b/packages/cli/src/context/ingest/historic-sql-probes.ts new file mode 100644 index 00000000..07204f3a --- /dev/null +++ b/packages/cli/src/context/ingest/historic-sql-probes.ts @@ -0,0 +1,141 @@ +import type { KtxProjectConnectionConfig } from '../project/config.js'; +import { queryHistoryDialectForConnection } from './adapters/historic-sql/connection-dialect.js'; +import type { HistoricSqlDialect } from './adapters/historic-sql/types.js'; + +export interface HistoricSqlFixAdvice { + failHeadline: string; + remediation: string; +} + +export interface HistoricSqlSuccessDetail { + detail: string; + warnings: string[]; +} + +export interface HistoricSqlProbeInput { + projectDir: string; + connectionId: string; + connection: KtxProjectConnectionConfig; + env?: NodeJS.ProcessEnv; +} + +export interface HistoricSqlProbeRunner { + readonly dialect: HistoricSqlDialect; + readonly catalogName: string; + run(input: HistoricSqlProbeInput): Promise; + formatSuccessDetail(result: unknown): HistoricSqlSuccessDetail; + fixAdvice(error: unknown): HistoricSqlFixAdvice; +} + +/** @internal */ +export interface HistoricSqlProbeRunnerFactoryEntry { + readonly catalogName: string; + load(): Promise; +} + +export type HistoricSqlProbeOutcome = + | { + ok: true; + dialect: HistoricSqlDialect; + runner: HistoricSqlProbeRunner; + result: unknown; + } + | { + ok: false; + dialect: HistoricSqlDialect; + runner: HistoricSqlProbeRunner; + error: unknown; + }; + +export type HistoricSqlReadinessProbe = ( + input: HistoricSqlProbeInput, +) => Promise; + +export interface HistoricSqlProbeRegistryDeps { + factories?: Record; + cache?: Map; +} + +const defaultHistoricSqlProbeRunnerFactories: Record< + HistoricSqlDialect, + HistoricSqlProbeRunnerFactoryEntry +> = { + postgres: { + catalogName: 'pg_stat_statements', + load: async () => { + const { PostgresPgssProbeRunner } = await import( + './historic-sql-probes/postgres-runner.js' + ); + return new PostgresPgssProbeRunner(); + }, + }, + snowflake: { + catalogName: 'SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', + load: async () => { + const { SnowflakeAccountUsageProbeRunner } = await import( + './historic-sql-probes/snowflake-runner.js' + ); + return new SnowflakeAccountUsageProbeRunner(); + }, + }, + bigquery: { + catalogName: 'INFORMATION_SCHEMA.JOBS_BY_PROJECT', + load: async () => { + const { BigQueryJobsByProjectProbeRunner } = await import( + './historic-sql-probes/bigquery-runner.js' + ); + return new BigQueryJobsByProjectProbeRunner(); + }, + }, +}; + +const DEFAULT_RUNNER_CACHE = new Map(); + +function registryDeps(input: HistoricSqlProbeRegistryDeps) { + return { + factories: input.factories ?? defaultHistoricSqlProbeRunnerFactories, + cache: input.cache ?? DEFAULT_RUNNER_CACHE, + }; +} + +export function historicSqlProbeCatalogName( + dialect: HistoricSqlDialect, + deps: HistoricSqlProbeRegistryDeps = {}, +): string { + return registryDeps(deps).factories[dialect].catalogName; +} + +async function loadHistoricSqlProbeRunner( + dialect: HistoricSqlDialect, + deps: HistoricSqlProbeRegistryDeps = {}, +): Promise { + const { factories, cache } = registryDeps(deps); + const cached = cache.get(dialect); + if (cached) { + return cached; + } + const runner = await factories[dialect].load(); + cache.set(dialect, runner); + return runner; +} + +export async function runHistoricSqlReadinessProbe( + input: HistoricSqlProbeInput, + deps: HistoricSqlProbeRegistryDeps = {}, +): Promise { + const dialect = queryHistoryDialectForConnection(input.connection); + if (!dialect) { + return null; + } + const runner = await loadHistoricSqlProbeRunner(dialect, deps); + try { + return { + ok: true, + dialect, + runner, + result: await runner.run(input), + }; + } catch (error) { + return { ok: false, dialect, runner, error }; + } +} diff --git a/packages/cli/src/context/ingest/historic-sql-probes/bigquery-runner.test.ts b/packages/cli/src/context/ingest/historic-sql-probes/bigquery-runner.test.ts new file mode 100644 index 00000000..7a2db117 --- /dev/null +++ b/packages/cli/src/context/ingest/historic-sql-probes/bigquery-runner.test.ts @@ -0,0 +1,110 @@ +import { describe, expect, it, vi } from 'vitest'; +import { HistoricSqlGrantsMissingError } from '../adapters/historic-sql/errors.js'; +import { BigQueryJobsByProjectProbeRunner } from './bigquery-runner.js'; + +describe('BigQueryJobsByProjectProbeRunner', () => { + it('creates a region-scoped reader, runs it, and cleans up the connector', async () => { + const cleanup = vi.fn(async () => undefined); + const reader = { + probe: vi.fn(async () => ({ warnings: [], info: ['region: eu'] })), + }; + const createReader = vi.fn(() => reader); + const runner = new BigQueryJobsByProjectProbeRunner({ + createReader, + createClient: () => ({ client: { executeQuery: vi.fn() }, cleanup }), + resolveReference: () => '{"project_id":"project-1"}', + }); + + await expect( + runner.run({ + projectDir: '/work/project', + connectionId: 'bq', + connection: { + driver: 'bigquery', + credentials_json: 'env:BQ_CREDENTIALS_JSON', + location: 'EU', + }, + env: {}, + }), + ).resolves.toEqual({ warnings: [], info: ['region: eu'] }); + expect(createReader).toHaveBeenCalledWith({ projectId: 'project-1', region: 'EU' }); + expect(reader.probe).toHaveBeenCalledOnce(); + expect(cleanup).toHaveBeenCalledOnce(); + }); + + it('uses us as the default BigQuery region', async () => { + const createReader = vi.fn(() => ({ + probe: vi.fn(async () => ({ warnings: [], info: [] })), + })); + const runner = new BigQueryJobsByProjectProbeRunner({ + createReader, + createClient: () => ({ client: {}, cleanup: vi.fn(async () => undefined) }), + resolveReference: () => '{"project_id":"project-1"}', + }); + + await runner.run({ + projectDir: '/work/project', + connectionId: 'bq', + connection: { + driver: 'bigquery', + credentials_json: '{"project_id":"project-1"}', + }, + env: {}, + }); + + expect(createReader).toHaveBeenCalledWith({ projectId: 'project-1', region: 'us' }); + }); + + it('rejects missing BigQuery credentials_json.project_id', async () => { + const runner = new BigQueryJobsByProjectProbeRunner({ + createReader: vi.fn(), + createClient: () => ({ client: {}, cleanup: vi.fn() }), + resolveReference: () => '{"client_email":"svc@example.test"}', + }); + + await expect( + runner.run({ + projectDir: '/work/project', + connectionId: 'bq', + connection: { + driver: 'bigquery', + credentials_json: 'env:BQ_CREDENTIALS_JSON', + }, + env: {}, + }), + ).rejects.toThrow('Query history BigQuery connection bq requires credentials_json.project_id'); + }); + + it('formats successful BigQuery details', () => { + const runner = new BigQueryJobsByProjectProbeRunner(); + + expect( + runner.formatSuccessDetail({ + warnings: ['JOBS_BY_PROJECT is delayed'], + info: ['region: us'], + }), + ).toEqual({ + detail: 'INFORMATION_SCHEMA.JOBS_BY_PROJECT ready; region: us', + warnings: ['JOBS_BY_PROJECT is delayed'], + }); + }); + + it('maps BigQuery grant errors to runner advice', () => { + const runner = new BigQueryJobsByProjectProbeRunner(); + + expect( + runner.fixAdvice( + new HistoricSqlGrantsMissingError({ + dialect: 'bigquery', + message: 'principal cannot query JOBS_BY_PROJECT', + remediation: + 'Grant roles/bigquery.resourceViewer on the BigQuery project, or grant a custom role containing bigquery.jobs.listAll.', + }), + ), + ).toEqual({ + failHeadline: 'BigQuery principal cannot read INFORMATION_SCHEMA.JOBS_BY_PROJECT', + remediation: + 'Grant roles/bigquery.resourceViewer on the BigQuery project, or grant a custom role containing bigquery.jobs.listAll.', + }); + }); +}); diff --git a/packages/cli/src/context/ingest/historic-sql-probes/bigquery-runner.ts b/packages/cli/src/context/ingest/historic-sql-probes/bigquery-runner.ts new file mode 100644 index 00000000..09ad65d5 --- /dev/null +++ b/packages/cli/src/context/ingest/historic-sql-probes/bigquery-runner.ts @@ -0,0 +1,160 @@ +import { HistoricSqlGrantsMissingError } from '../adapters/historic-sql/errors.js'; +import { BigQueryHistoricSqlQueryHistoryReader } from '../adapters/historic-sql/bigquery-query-history-reader.js'; +import { + type HistoricSqlFixAdvice, + type HistoricSqlProbeInput, + type HistoricSqlProbeRunner, + type HistoricSqlSuccessDetail, +} from '../historic-sql-probes.js'; +import { resolveKtxConfigReference } from '../../core/config-reference.js'; +import { + isKtxBigQueryConnectionConfig, + KtxBigQueryScanConnector, + type KtxBigQueryConnectionConfig, +} from '../../../connectors/bigquery/connector.js'; + +interface GenericProbeResult { + warnings: string[]; + info?: string[]; +} + +interface ClientHandle { + client: unknown; + cleanup(): Promise; +} + +interface BigQueryJobsByProjectProbeRunnerOptions { + createReader?: (options: { projectId: string; region: string }) => { + probe(client: unknown): Promise; + }; + createClient?: ( + input: HistoricSqlProbeInput & { connection: KtxBigQueryConnectionConfig }, + ) => ClientHandle; + resolveReference?: (value: string | undefined, env: NodeJS.ProcessEnv) => string | undefined; +} + +function bigQueryProjectId( + connectionId: string, + connection: KtxBigQueryConnectionConfig, + env: NodeJS.ProcessEnv, + resolveReference: (value: string | undefined, env: NodeJS.ProcessEnv) => string | undefined, +): string { + const rawCredentials = + typeof connection.credentials_json === 'string' ? connection.credentials_json : ''; + const resolvedCredentials = resolveReference(rawCredentials, env); + if (!resolvedCredentials) { + throw new Error(`Query history BigQuery connection ${connectionId} requires credentials_json`); + } + const parsed = JSON.parse(resolvedCredentials) as { project_id?: unknown }; + if (typeof parsed.project_id !== 'string' || parsed.project_id.trim().length === 0) { + throw new Error( + `Query history BigQuery connection ${connectionId} requires credentials_json.project_id`, + ); + } + return parsed.project_id; +} + +function bigQueryRegion(connection: KtxBigQueryConnectionConfig): string { + return typeof connection.location === 'string' && connection.location.trim().length > 0 + ? connection.location.trim() + : 'us'; +} + +function infoSuffix(info: readonly string[] | undefined): string { + return info && info.length > 0 ? `; ${info.join('; ')}` : ''; +} + +export class BigQueryJobsByProjectProbeRunner implements HistoricSqlProbeRunner { + readonly dialect = 'bigquery' as const; + readonly catalogName = 'INFORMATION_SCHEMA.JOBS_BY_PROJECT'; + + private readonly createReader: (options: { projectId: string; region: string }) => { + probe(client: unknown): Promise; + }; + private readonly createClient: ( + input: HistoricSqlProbeInput & { connection: KtxBigQueryConnectionConfig }, + ) => ClientHandle; + private readonly resolveReference: ( + value: string | undefined, + env: NodeJS.ProcessEnv, + ) => string | undefined; + + constructor(options: BigQueryJobsByProjectProbeRunnerOptions = {}) { + this.createReader = + options.createReader ?? + ((readerOptions) => new BigQueryHistoricSqlQueryHistoryReader(readerOptions)); + this.createClient = + options.createClient ?? + ((input) => { + const connector = new KtxBigQueryScanConnector({ + connectionId: input.connectionId, + connection: input.connection, + env: input.env, + }); + return { + client: { + async executeQuery(sql: string) { + const result = await connector.executeReadOnly( + { connectionId: input.connectionId, sql }, + {} as never, + ); + return { + headers: result.headers, + rows: result.rows, + totalRows: result.totalRows, + }; + }, + }, + cleanup: () => connector.cleanup(), + }; + }); + this.resolveReference = options.resolveReference ?? resolveKtxConfigReference; + } + + async run(input: HistoricSqlProbeInput): Promise { + const inputDriver = input.connection.driver ?? 'unknown'; + if (!isKtxBigQueryConnectionConfig(input.connection)) { + throw new Error(`Native BigQuery connector cannot run driver "${inputDriver}"`); + } + const projectId = bigQueryProjectId( + input.connectionId, + input.connection, + input.env ?? process.env, + this.resolveReference, + ); + const reader = this.createReader({ + projectId, + region: bigQueryRegion(input.connection), + }); + const handle = this.createClient({ + ...input, + connection: input.connection, + }); + try { + return await reader.probe(handle.client); + } finally { + await handle.cleanup(); + } + } + + formatSuccessDetail(result: unknown): HistoricSqlSuccessDetail { + const probeResult = result as GenericProbeResult; + return { + detail: `${this.catalogName} ready${infoSuffix(probeResult.info)}`, + warnings: probeResult.warnings, + }; + } + + fixAdvice(error: unknown): HistoricSqlFixAdvice { + if (error instanceof HistoricSqlGrantsMissingError) { + return { + failHeadline: 'BigQuery principal cannot read INFORMATION_SCHEMA.JOBS_BY_PROJECT', + remediation: error.remediation, + }; + } + return { + failHeadline: `${this.catalogName} readiness check failed`, + remediation: error instanceof Error ? error.message : String(error), + }; + } +} diff --git a/packages/cli/src/context/ingest/historic-sql-probes/postgres-runner.test.ts b/packages/cli/src/context/ingest/historic-sql-probes/postgres-runner.test.ts new file mode 100644 index 00000000..bcd6d187 --- /dev/null +++ b/packages/cli/src/context/ingest/historic-sql-probes/postgres-runner.test.ts @@ -0,0 +1,113 @@ +import { describe, expect, it, vi } from 'vitest'; +import { + HistoricSqlExtensionMissingError, + HistoricSqlGrantsMissingError, + HistoricSqlVersionUnsupportedError, +} from '../adapters/historic-sql/errors.js'; +import { PostgresPgssProbeRunner } from './postgres-runner.js'; + +describe('PostgresPgssProbeRunner', () => { + it('runs the pg_stat_statements reader and cleans up the client', async () => { + const cleanup = vi.fn(async () => undefined); + const reader = { + probe: vi.fn(async () => ({ + pgServerVersion: 'PostgreSQL 16.4', + warnings: [], + info: ['tracked statements: 12'], + })), + }; + const runner = new PostgresPgssProbeRunner({ + reader, + createClient: () => ({ client: { executeQuery: vi.fn() }, cleanup }), + }); + + await expect( + runner.run({ + projectDir: '/work/project', + connectionId: 'warehouse', + connection: { driver: 'postgres', url: 'env:DATABASE_URL' }, + env: {}, + }), + ).resolves.toEqual({ + pgServerVersion: 'PostgreSQL 16.4', + warnings: [], + info: ['tracked statements: 12'], + }); + expect(reader.probe).toHaveBeenCalledOnce(); + expect(cleanup).toHaveBeenCalledOnce(); + }); + + it('rejects non-Postgres connections', async () => { + const runner = new PostgresPgssProbeRunner({ + reader: { probe: vi.fn() }, + createClient: () => ({ client: {}, cleanup: vi.fn() }), + }); + + await expect( + runner.run({ + projectDir: '/work/project', + connectionId: 'warehouse', + connection: { driver: 'snowflake' }, + env: {}, + }), + ).rejects.toThrow('Native PostgreSQL connector cannot run driver "snowflake"'); + }); + + it('formats successful Postgres details', () => { + const runner = new PostgresPgssProbeRunner(); + + expect( + runner.formatSuccessDetail({ + pgServerVersion: 'PostgreSQL 16.4', + warnings: ['pg_stat_statements.track is top'], + info: ['tracked statements: 12'], + }), + ).toEqual({ + detail: 'pg_stat_statements ready (PostgreSQL 16.4); tracked statements: 12', + warnings: ['pg_stat_statements.track is top'], + }); + }); + + it('maps Postgres probe errors to actionable advice', () => { + const runner = new PostgresPgssProbeRunner(); + + expect( + runner.fixAdvice( + new HistoricSqlExtensionMissingError({ + dialect: 'postgres', + message: 'pg_stat_statements missing', + remediation: 'CREATE EXTENSION pg_stat_statements;', + }), + ), + ).toEqual({ + failHeadline: 'pg_stat_statements extension is missing', + remediation: 'CREATE EXTENSION pg_stat_statements;', + }); + + expect( + runner.fixAdvice( + new HistoricSqlGrantsMissingError({ + dialect: 'postgres', + message: 'missing grants', + remediation: 'GRANT pg_read_all_stats TO ;', + }), + ), + ).toEqual({ + failHeadline: 'Postgres connection role lacks pg_read_all_stats', + remediation: 'GRANT pg_read_all_stats TO ;', + }); + + expect( + runner.fixAdvice( + new HistoricSqlVersionUnsupportedError({ + dialect: 'postgres', + detectedVersion: 'PostgreSQL 13.12', + minimumVersion: 'PostgreSQL 14', + }), + ), + ).toEqual({ + failHeadline: 'Postgres version too old', + remediation: 'Use PostgreSQL 14 or newer, or disable query history for this connection', + }); + }); +}); diff --git a/packages/cli/src/context/ingest/historic-sql-probes/postgres-runner.ts b/packages/cli/src/context/ingest/historic-sql-probes/postgres-runner.ts new file mode 100644 index 00000000..7ebf9721 --- /dev/null +++ b/packages/cli/src/context/ingest/historic-sql-probes/postgres-runner.ts @@ -0,0 +1,111 @@ +import { + HistoricSqlExtensionMissingError, + HistoricSqlGrantsMissingError, + HistoricSqlVersionUnsupportedError, +} from '../adapters/historic-sql/errors.js'; +import { PostgresPgssReader } from '../adapters/historic-sql/postgres-pgss-reader.js'; +import type { PostgresPgssProbeResult } from '../adapters/historic-sql/types.js'; +import { + type HistoricSqlFixAdvice, + type HistoricSqlProbeInput, + type HistoricSqlProbeRunner, + type HistoricSqlSuccessDetail, +} from '../historic-sql-probes.js'; +import { + isKtxPostgresConnectionConfig, + type KtxPostgresConnectionConfig, +} from '../../../connectors/postgres/connector.js'; +import { KtxPostgresHistoricSqlQueryClient } from '../../../connectors/postgres/historic-sql-query-client.js'; + +interface ClientHandle { + client: unknown; + cleanup(): Promise; +} + +interface PostgresPgssProbeRunnerOptions { + reader?: { probe(client: unknown): Promise }; + createClient?: ( + input: HistoricSqlProbeInput & { connection: KtxPostgresConnectionConfig }, + ) => ClientHandle; +} + +function genericAdvice(error: unknown, catalogName: string): HistoricSqlFixAdvice { + return { + failHeadline: `${catalogName} readiness check failed`, + remediation: error instanceof Error ? error.message : String(error), + }; +} + +function infoSuffix(info: readonly string[] | undefined): string { + return info && info.length > 0 ? `; ${info.join('; ')}` : ''; +} + +export class PostgresPgssProbeRunner implements HistoricSqlProbeRunner { + readonly dialect = 'postgres' as const; + readonly catalogName = 'pg_stat_statements'; + + private readonly reader: { probe(client: unknown): Promise }; + private readonly createClient: ( + input: HistoricSqlProbeInput & { connection: KtxPostgresConnectionConfig }, + ) => ClientHandle; + + constructor(options: PostgresPgssProbeRunnerOptions = {}) { + this.reader = options.reader ?? new PostgresPgssReader(); + this.createClient = + options.createClient ?? + ((input) => { + const client = new KtxPostgresHistoricSqlQueryClient({ + connectionId: input.connectionId, + connection: input.connection, + env: input.env, + }); + return { client, cleanup: () => client.cleanup() }; + }); + } + + async run(input: HistoricSqlProbeInput): Promise { + const inputDriver = input.connection.driver ?? 'unknown'; + if (!isKtxPostgresConnectionConfig(input.connection)) { + throw new Error(`Native PostgreSQL connector cannot run driver "${inputDriver}"`); + } + const handle = this.createClient({ + ...input, + connection: input.connection, + }); + try { + return await this.reader.probe(handle.client); + } finally { + await handle.cleanup(); + } + } + + formatSuccessDetail(result: unknown): HistoricSqlSuccessDetail { + const pgssResult = result as PostgresPgssProbeResult; + return { + detail: `pg_stat_statements ready (${pgssResult.pgServerVersion})${infoSuffix(pgssResult.info)}`, + warnings: pgssResult.warnings, + }; + } + + fixAdvice(error: unknown): HistoricSqlFixAdvice { + if (error instanceof HistoricSqlExtensionMissingError) { + return { + failHeadline: 'pg_stat_statements extension is missing', + remediation: error.remediation, + }; + } + if (error instanceof HistoricSqlGrantsMissingError) { + return { + failHeadline: 'Postgres connection role lacks pg_read_all_stats', + remediation: error.remediation, + }; + } + if (error instanceof HistoricSqlVersionUnsupportedError) { + return { + failHeadline: 'Postgres version too old', + remediation: 'Use PostgreSQL 14 or newer, or disable query history for this connection', + }; + } + return genericAdvice(error, this.catalogName); + } +} diff --git a/packages/cli/src/context/ingest/historic-sql-probes/snowflake-runner.test.ts b/packages/cli/src/context/ingest/historic-sql-probes/snowflake-runner.test.ts new file mode 100644 index 00000000..2d6835bf --- /dev/null +++ b/packages/cli/src/context/ingest/historic-sql-probes/snowflake-runner.test.ts @@ -0,0 +1,82 @@ +import { describe, expect, it, vi } from 'vitest'; +import { HistoricSqlGrantsMissingError } from '../adapters/historic-sql/errors.js'; +import { SnowflakeAccountUsageProbeRunner } from './snowflake-runner.js'; + +describe('SnowflakeAccountUsageProbeRunner', () => { + it('runs the account usage reader and cleans up the client', async () => { + const cleanup = vi.fn(async () => undefined); + const reader = { + probe: vi.fn(async () => ({ warnings: [], info: ['query history available'] })), + }; + const runner = new SnowflakeAccountUsageProbeRunner({ + reader, + createClient: () => ({ client: { executeQuery: vi.fn() }, cleanup }), + }); + + await expect( + runner.run({ + projectDir: '/work/project', + connectionId: 'warehouse', + connection: { + driver: 'snowflake', + account: 'ACCT', + warehouse: 'WH', + database: 'ANALYTICS', + username: 'reader', + }, + env: {}, + }), + ).resolves.toEqual({ warnings: [], info: ['query history available'] }); + expect(reader.probe).toHaveBeenCalledOnce(); + expect(cleanup).toHaveBeenCalledOnce(); + }); + + it('rejects non-Snowflake connections', async () => { + const runner = new SnowflakeAccountUsageProbeRunner({ + reader: { probe: vi.fn() }, + createClient: () => ({ client: {}, cleanup: vi.fn() }), + }); + + await expect( + runner.run({ + projectDir: '/work/project', + connectionId: 'warehouse', + connection: { driver: 'postgres' }, + env: {}, + }), + ).rejects.toThrow('Native Snowflake connector cannot run driver "postgres"'); + }); + + it('formats successful Snowflake details', () => { + const runner = new SnowflakeAccountUsageProbeRunner(); + + expect( + runner.formatSuccessDetail({ + warnings: ['query history is delayed'], + info: ['warehouse: WH'], + }), + ).toEqual({ + detail: 'SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY ready; warehouse: WH', + warnings: ['query history is delayed'], + }); + }); + + it('maps Snowflake grant errors to runner advice', () => { + const runner = new SnowflakeAccountUsageProbeRunner(); + + expect( + runner.fixAdvice( + new HistoricSqlGrantsMissingError({ + dialect: 'snowflake', + message: 'role cannot read account usage', + remediation: + 'GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE ;', + }), + ), + ).toEqual({ + failHeadline: 'Snowflake role cannot read SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', + remediation: + 'GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE ;', + }); + }); +}); diff --git a/packages/cli/src/context/ingest/historic-sql-probes/snowflake-runner.ts b/packages/cli/src/context/ingest/historic-sql-probes/snowflake-runner.ts new file mode 100644 index 00000000..415b46d6 --- /dev/null +++ b/packages/cli/src/context/ingest/historic-sql-probes/snowflake-runner.ts @@ -0,0 +1,96 @@ +import { HistoricSqlGrantsMissingError } from '../adapters/historic-sql/errors.js'; +import { SnowflakeHistoricSqlQueryHistoryReader } from '../adapters/historic-sql/snowflake-query-history-reader.js'; +import { + type HistoricSqlFixAdvice, + type HistoricSqlProbeInput, + type HistoricSqlProbeRunner, + type HistoricSqlSuccessDetail, +} from '../historic-sql-probes.js'; +import { + isKtxSnowflakeConnectionConfig, + type KtxSnowflakeConnectionConfig, +} from '../../../connectors/snowflake/connector.js'; +import { KtxSnowflakeHistoricSqlQueryClient } from '../../../connectors/snowflake/historic-sql-query-client.js'; + +interface GenericProbeResult { + warnings: string[]; + info?: string[]; +} + +interface ClientHandle { + client: unknown; + cleanup(): Promise; +} + +interface SnowflakeAccountUsageProbeRunnerOptions { + reader?: { probe(client: unknown): Promise }; + createClient?: ( + input: HistoricSqlProbeInput & { connection: KtxSnowflakeConnectionConfig }, + ) => ClientHandle; +} + +function infoSuffix(info: readonly string[] | undefined): string { + return info && info.length > 0 ? `; ${info.join('; ')}` : ''; +} + +export class SnowflakeAccountUsageProbeRunner implements HistoricSqlProbeRunner { + readonly dialect = 'snowflake' as const; + readonly catalogName = 'SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY'; + + private readonly reader: { probe(client: unknown): Promise }; + private readonly createClient: ( + input: HistoricSqlProbeInput & { connection: KtxSnowflakeConnectionConfig }, + ) => ClientHandle; + + constructor(options: SnowflakeAccountUsageProbeRunnerOptions = {}) { + this.reader = options.reader ?? new SnowflakeHistoricSqlQueryHistoryReader(); + this.createClient = + options.createClient ?? + ((input) => { + const client = new KtxSnowflakeHistoricSqlQueryClient({ + connectionId: input.connectionId, + connection: input.connection, + projectDir: input.projectDir, + env: input.env, + }); + return { client, cleanup: () => client.cleanup() }; + }); + } + + async run(input: HistoricSqlProbeInput): Promise { + const inputDriver = input.connection.driver ?? 'unknown'; + if (!isKtxSnowflakeConnectionConfig(input.connection)) { + throw new Error(`Native Snowflake connector cannot run driver "${inputDriver}"`); + } + const handle = this.createClient({ + ...input, + connection: input.connection, + }); + try { + return await this.reader.probe(handle.client); + } finally { + await handle.cleanup(); + } + } + + formatSuccessDetail(result: unknown): HistoricSqlSuccessDetail { + const probeResult = result as GenericProbeResult; + return { + detail: `${this.catalogName} ready${infoSuffix(probeResult.info)}`, + warnings: probeResult.warnings, + }; + } + + fixAdvice(error: unknown): HistoricSqlFixAdvice { + if (error instanceof HistoricSqlGrantsMissingError) { + return { + failHeadline: 'Snowflake role cannot read SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', + remediation: error.remediation, + }; + } + return { + failHeadline: `${this.catalogName} readiness check failed`, + remediation: error instanceof Error ? error.message : String(error), + }; + } +} diff --git a/packages/cli/src/context/ingest/local-stage-ingest.test.ts b/packages/cli/src/context/ingest/local-stage-ingest.test.ts index 7a2c5a6a..3f0e617f 100644 --- a/packages/cli/src/context/ingest/local-stage-ingest.test.ts +++ b/packages/cli/src/context/ingest/local-stage-ingest.test.ts @@ -591,7 +591,7 @@ describe('local ingest', () => { status: 'done', adapter: 'live-database', connectionId: 'warehouse', - rawFileCount: 3, + rawFileCount: 4, workUnitCount: 1, }); }); diff --git a/packages/cli/src/context/scan/constraint-discovery.test.ts b/packages/cli/src/context/scan/constraint-discovery.test.ts new file mode 100644 index 00000000..78620204 --- /dev/null +++ b/packages/cli/src/context/scan/constraint-discovery.test.ts @@ -0,0 +1,70 @@ +import { describe, expect, it } from 'vitest'; +import { constraintDiscoveryWarning, tryConstraintQuery } from './constraint-discovery.js'; + +describe('tryConstraintQuery', () => { + it('returns the query value when the query succeeds', async () => { + await expect( + tryConstraintQuery( + { + schema: 'public', + kind: 'primary_key', + isDeniedError: () => false, + }, + async () => ['id'], + ), + ).resolves.toEqual({ ok: true, value: ['id'] }); + }); + + it('returns a recoverable warning when the classifier recognizes denial', async () => { + const error = Object.assign(new Error('permission denied'), { code: '42501' }); + + await expect( + tryConstraintQuery( + { + schema: 'analytics', + kind: 'foreign_key', + isDeniedError: (candidate) => candidate === error, + }, + async () => { + throw error; + }, + ), + ).resolves.toEqual({ + ok: false, + warning: { + code: 'constraint_discovery_unauthorized', + message: 'Skipped foreign-key discovery in analytics (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'analytics', kind: 'foreign_key' }, + }, + }); + }); + + it('rethrows non-denial errors unchanged', async () => { + const error = Object.assign(new Error('connection reset'), { code: 'ECONNRESET' }); + + await expect( + tryConstraintQuery( + { + schema: 'public', + kind: 'primary_key', + isDeniedError: () => false, + }, + async () => { + throw error; + }, + ), + ).rejects.toBe(error); + }); +}); + +describe('constraintDiscoveryWarning', () => { + it('formats stable primary-key warning text and metadata', () => { + expect(constraintDiscoveryWarning({ schema: 'public', kind: 'primary_key' })).toEqual({ + code: 'constraint_discovery_unauthorized', + message: 'Skipped primary-key discovery in public (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'public', kind: 'primary_key' }, + }); + }); +}); diff --git a/packages/cli/src/context/scan/constraint-discovery.ts b/packages/cli/src/context/scan/constraint-discovery.ts new file mode 100644 index 00000000..d58e9053 --- /dev/null +++ b/packages/cli/src/context/scan/constraint-discovery.ts @@ -0,0 +1,42 @@ +import type { KtxScanWarning } from './types.js'; + +export type ConstraintDiscoveryKind = 'primary_key' | 'foreign_key'; + +export interface ConstraintQueryContext { + schema: string; + kind: ConstraintDiscoveryKind; + isDeniedError: (error: unknown) => boolean; +} + +export type ConstraintQueryOutcome = { ok: true; value: T } | { ok: false; warning: KtxScanWarning }; + +export function constraintDiscoveryWarning(input: { + schema: string; + kind: ConstraintDiscoveryKind; +}): KtxScanWarning { + return { + code: 'constraint_discovery_unauthorized', + message: + `Skipped ${input.kind === 'primary_key' ? 'primary-key' : 'foreign-key'} ` + + `discovery in ${input.schema} (insufficient grants on system catalogs)`, + recoverable: true, + metadata: { schema: input.schema, kind: input.kind }, + }; +} + +export async function tryConstraintQuery( + ctx: ConstraintQueryContext, + fn: () => Promise, +): Promise> { + try { + return { ok: true, value: await fn() }; + } catch (error) { + if (!ctx.isDeniedError(error)) { + throw error; + } + return { + ok: false, + warning: constraintDiscoveryWarning({ schema: ctx.schema, kind: ctx.kind }), + }; + } +} diff --git a/packages/cli/src/context/scan/local-scan.test.ts b/packages/cli/src/context/scan/local-scan.test.ts index cb7e0252..f3c1353d 100644 --- a/packages/cli/src/context/scan/local-scan.test.ts +++ b/packages/cli/src/context/scan/local-scan.test.ts @@ -180,6 +180,13 @@ function fetchOnlyAdapter(options: { extractedAt?: () => string; snapshot?: KtxS 'utf-8', ); await writeFile(join(stagedDir, 'foreign-keys.json'), '{"foreignKeys":[]}\n', 'utf-8'); + if (scanSnapshot.warnings?.length) { + await writeFile( + join(stagedDir, 'warnings.json'), + `${JSON.stringify({ warnings: scanSnapshot.warnings })}\n`, + 'utf-8', + ); + } for (const table of scanSnapshot.tables) { await writeFile(join(stagedDir, 'tables', `${table.name}.json`), `${JSON.stringify(table)}\n`, 'utf-8'); } @@ -336,6 +343,48 @@ describe('local scan', () => { }); }); + it('threads structural snapshot warnings into the final scan report', async () => { + const result = await runLocalScan({ + project, + adapters: [ + fetchOnlyAdapter({ + snapshot: { + ...defaultFetchSnapshot(), + warnings: [ + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped primary-key discovery in public (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'public', kind: 'primary_key' }, + }, + ], + }, + }), + ], + connectionId: 'warehouse', + jobId: 'scan-run-structural-warnings', + now: () => new Date('2026-04-29T09:01:00.000Z'), + }); + + expect(result.report.warnings).toEqual([ + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped primary-key discovery in public (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'public', kind: 'primary_key' }, + }, + ]); + await expect( + readFile( + join( + project.projectDir, + 'raw-sources/warehouse/live-database/2026-04-29-090100-scan-run-structural-warnings/scan-report.json', + ), + 'utf-8', + ), + ).resolves.toContain('"constraint_discovery_unauthorized"'); + }); + it('passes enabled_tables as fetch context tableScope and does not post-filter staged snapshots', async () => { project.config.connections.warehouse = { ...project.config.connections.warehouse, diff --git a/packages/cli/src/context/scan/local-scan.ts b/packages/cli/src/context/scan/local-scan.ts index 0e2842da..703ef73f 100644 --- a/packages/cli/src/context/scan/local-scan.ts +++ b/packages/cli/src/context/scan/local-scan.ts @@ -467,6 +467,9 @@ export async function runLocalScan(options: RunLocalScanOptions): Promise { }); }); + it('rebuilds scan warnings from persisted live-database warning files', async () => { + const rawRoot = 'raw-sources/warehouse/live-database/sync-warnings'; + await project.fileStore.writeFile( + `${rawRoot}/connection.json`, + '{"connectionId":"warehouse","metadata":{}}\n', + 'ktx', + 'ktx@example.com', + 'Seed connection artifact', + ); + await project.fileStore.writeFile( + `${rawRoot}/warnings.json`, + `${JSON.stringify( + { + warnings: [ + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped foreign-key discovery in public (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'public', kind: 'foreign_key' }, + }, + ], + }, + null, + 2, + )}\n`, + 'ktx', + 'ktx@example.com', + 'Seed warning artifact', + ); + await project.fileStore.writeFile( + `${rawRoot}/tables/orders.json`, + '{"name":"orders","catalog":null,"db":"public","kind":"table","comment":null,"estimatedRows":null,"columns":[{"name":"id","nativeType":"integer","normalizedType":"integer","dimensionType":"number","nullable":false,"primaryKey":false,"comment":null}],"foreignKeys":[]}\n', + 'ktx', + 'ktx@example.com', + 'Seed orders artifact', + ); + + const snapshot = await readLocalScanStructuralSnapshot({ + project, + connectionId: 'warehouse', + driver: 'postgres', + rawSourcesDir: rawRoot, + extractedAtFallback: '2026-04-29T13:00:00.000Z', + }); + + expect(snapshot.warnings).toEqual([ + { + code: 'constraint_discovery_unauthorized', + message: 'Skipped foreign-key discovery in public (insufficient grants on system catalogs)', + recoverable: true, + metadata: { schema: 'public', kind: 'foreign_key' }, + }, + ]); + }); + it('uses the scan report timestamp when connection.json omits extractedAt', async () => { const rawRoot = 'raw-sources/warehouse/live-database/sync-2'; await project.fileStore.writeFile( @@ -192,4 +247,32 @@ describe('readLocalScanStructuralSnapshot', () => { expect(snapshot.extractedAt).toBe('2026-04-29T13:00:00.000Z'); }); + + it('tolerates older live-database staged directories without warnings.json', async () => { + const rawRoot = 'raw-sources/warehouse/live-database/sync-no-warnings'; + await project.fileStore.writeFile( + `${rawRoot}/connection.json`, + '{"connectionId":"warehouse","metadata":{}}\n', + 'ktx', + 'ktx@example.com', + 'Seed connection artifact', + ); + await project.fileStore.writeFile( + `${rawRoot}/tables/orders.json`, + '{"name":"orders","catalog":null,"db":null,"kind":"table","comment":null,"estimatedRows":null,"columns":[{"name":"id","nativeType":"integer","normalizedType":"integer","dimensionType":"number","nullable":false,"primaryKey":true,"comment":null}],"foreignKeys":[]}\n', + 'ktx', + 'ktx@example.com', + 'Seed orders artifact', + ); + + const snapshot = await readLocalScanStructuralSnapshot({ + project, + connectionId: 'warehouse', + driver: 'postgres', + rawSourcesDir: rawRoot, + extractedAtFallback: '2026-04-29T13:00:00.000Z', + }); + + expect(snapshot.warnings).toEqual([]); + }); }); diff --git a/packages/cli/src/context/scan/local-structural-artifacts.ts b/packages/cli/src/context/scan/local-structural-artifacts.ts index 2c968384..1abc68bc 100644 --- a/packages/cli/src/context/scan/local-structural-artifacts.ts +++ b/packages/cli/src/context/scan/local-structural-artifacts.ts @@ -1,6 +1,7 @@ import type { KtxLocalProject } from '../../context/project/project.js'; import type { KtxConnectionDriver, + KtxScanWarning, KtxSchemaColumn, KtxSchemaForeignKey, KtxSchemaSnapshot, @@ -30,6 +31,59 @@ function metadataRecord(value: unknown): Record { return isRecord(value) ? value : {}; } +const scanWarningCodes = new Set([ + 'connector_capability_missing', + 'sampling_failed', + 'statistics_failed', + 'llm_unavailable', + 'embedding_unavailable', + 'scan_enrichment_backend_not_configured', + 'relationship_validation_failed', + 'relationship_llm_invalid_reference', + 'relationship_llm_proposal_failed', + 'credential_redacted', + 'enrichment_failed', + 'description_fallback_used', + 'constraint_discovery_unauthorized', +]); + +function parseWarning(rawWarning: unknown, path: string): KtxScanWarning { + if ( + !isRecord(rawWarning) || + typeof rawWarning.code !== 'string' || + !scanWarningCodes.has(rawWarning.code as KtxScanWarning['code']) || + typeof rawWarning.message !== 'string' || + typeof rawWarning.recoverable !== 'boolean' + ) { + throw new Error(`Invalid KTX schema warning artifact: ${path}`); + } + return { + code: rawWarning.code as KtxScanWarning['code'], + message: rawWarning.message, + recoverable: rawWarning.recoverable, + ...(typeof rawWarning.table === 'string' ? { table: rawWarning.table } : {}), + ...(typeof rawWarning.column === 'string' ? { column: rawWarning.column } : {}), + ...(isRecord(rawWarning.metadata) ? { metadata: rawWarning.metadata } : {}), + }; +} + +async function readWarnings(input: ReadLocalScanStructuralSnapshotInput): Promise { + const path = `${input.rawSourcesDir}/warnings.json`; + try { + const warningRaw = await input.project.fileStore.readFile(path); + const parsed = JSON.parse(warningRaw.content) as unknown; + if (!isRecord(parsed) || !Array.isArray(parsed.warnings)) { + throw new Error(`Invalid KTX schema warnings artifact: ${path}`); + } + return parsed.warnings.map((warning) => parseWarning(warning, path)); + } catch (error) { + if (error instanceof Error && /not found|ENOENT|no such file/i.test(error.message)) { + return []; + } + throw error; + } +} + function optionalStringOrNull(value: unknown): string | null | undefined { if (value === undefined) { return undefined; @@ -113,6 +167,7 @@ export async function readLocalScanStructuralSnapshot( const tableRaw = await input.project.fileStore.readFile(path); tables.push(parseTable(tableRaw.content, path)); } + const warnings = await readWarnings(input); return { connectionId: typeof connection.connectionId === 'string' ? connection.connectionId : input.connectionId, @@ -121,5 +176,6 @@ export async function readLocalScanStructuralSnapshot( scope: isRecord(connection.scope) ? connection.scope : {}, metadata: metadataRecord(connection.metadata), tables, + warnings, }; } diff --git a/packages/cli/src/context/scan/types.ts b/packages/cli/src/context/scan/types.ts index d8e2aa5a..5590b465 100644 --- a/packages/cli/src/context/scan/types.ts +++ b/packages/cli/src/context/scan/types.ts @@ -90,6 +90,7 @@ export interface KtxSchemaSnapshot { scope: KtxSchemaScope; tables: KtxSchemaTable[]; metadata: Record; + warnings?: KtxScanWarning[]; } interface KtxCredentialEnvReference { @@ -364,7 +365,8 @@ type KtxScanWarningCode = | 'relationship_llm_proposal_failed' | 'credential_redacted' | 'enrichment_failed' - | 'description_fallback_used'; + | 'description_fallback_used' + | 'constraint_discovery_unauthorized'; export interface KtxScanWarning { code: KtxScanWarningCode; diff --git a/packages/cli/src/doctor.test.ts b/packages/cli/src/doctor.test.ts index fb661103..64050623 100644 --- a/packages/cli/src/doctor.test.ts +++ b/packages/cli/src/doctor.test.ts @@ -30,6 +30,30 @@ function makeIo() { }; } +function fakeDoctorHistoricSqlRunner() { + return { + dialect: 'postgres' as const, + catalogName: 'pg_stat_statements', + async run() { + return { warnings: [], info: [] }; + }, + formatSuccessDetail(result: unknown) { + const typed = result as { pgServerVersion?: string; warnings: string[]; info?: string[] }; + const info = typed.info && typed.info.length > 0 ? `; ${typed.info.join('; ')}` : ''; + return { + detail: `pg_stat_statements ready (${typed.pgServerVersion ?? 'PostgreSQL 16.4'})${info}`, + warnings: typed.warnings, + }; + }, + fixAdvice(error: unknown) { + return { + failHeadline: error instanceof Error ? error.message : String(error), + remediation: 'Fix query-history grants.', + }; + }, + }; +} + describe('formatDoctorReport', () => { it('shows the failing check and its fix in plain output', () => { const checks: DoctorCheck[] = [ @@ -539,14 +563,19 @@ describe('runKtxDoctor', () => { { command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, testIo.io, { - postgresQueryHistoryProbe: async () => { + queryHistoryReadinessProbe: async () => { probeCalls += 1; return { - pgServerVersion: 'PostgreSQL 16.4', - warnings: [], - info: [ - 'pg_stat_statements.max is 1000; set it to at least 5000 to reduce query-template eviction churn', - ], + ok: true, + dialect: 'postgres', + runner: fakeDoctorHistoricSqlRunner(), + result: { + pgServerVersion: 'PostgreSQL 16.4', + warnings: [], + info: [ + 'pg_stat_statements.max is 1000; set it to at least 5000 to reduce query-template eviction churn', + ], + }, }; }, }, @@ -558,7 +587,7 @@ describe('runKtxDoctor', () => { expect(out).toContain('Query history'); expect(out).toContain('warehouse'); expect(out).toContain('pg_stat_statements ready (PostgreSQL 16.4)'); - expect(out).toContain('info: pg_stat_statements.max is 1000'); + expect(out).toContain('pg_stat_statements.max is 1000'); expect(out).not.toContain('Update the Postgres parameter group or config'); expect(out).toContain('ktx status --json'); expect(out).toContain('ktx sl'); @@ -634,10 +663,15 @@ describe('runKtxDoctor', () => { { command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, testIo.io, { - postgresQueryHistoryProbe: async () => ({ - pgServerVersion: 'PostgreSQL 16.4', - warnings: [], - info: [], + queryHistoryReadinessProbe: async () => ({ + ok: true, + dialect: 'postgres', + runner: fakeDoctorHistoricSqlRunner(), + result: { + pgServerVersion: 'PostgreSQL 16.4', + warnings: [], + info: [], + }, }), }, ), @@ -842,9 +876,14 @@ describe('runKtxDoctor', () => { { command: 'validate', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, testIo.io, { - postgresQueryHistoryProbe: async () => { + queryHistoryReadinessProbe: async () => { probeCalls += 1; - return { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] }; + return { + ok: true, + dialect: 'postgres', + runner: fakeDoctorHistoricSqlRunner(), + result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] }, + }; }, }, ), diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index 15c71e00..50401df1 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -284,7 +284,30 @@ describe('runKtxIngest', () => { return 0; }, scanConnection: async () => 0, - historicSqlProbe: async () => ({ ok: true, lines: ['PASS Historic SQL probe skipped in test'] }), + historicSqlReadinessProbe: async () => ({ + ok: true, + dialect: 'postgres', + runner: { + dialect: 'postgres', + catalogName: 'pg_stat_statements', + async run() { + return { warnings: [], info: [] }; + }, + formatSuccessDetail() { + return { + detail: 'pg_stat_statements ready (PostgreSQL 16.4)', + warnings: [], + }; + }, + fixAdvice() { + return { + failHeadline: 'pg_stat_statements unavailable', + remediation: 'Fix query-history grants.', + }; + }, + }, + result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] }, + }), }, context: async () => ({ status: 'skipped', projectDir }), runtime: async () => runtimeReady(projectDir), diff --git a/packages/cli/src/setup-agents.test.ts b/packages/cli/src/setup-agents.test.ts index a8090780..bd521787 100644 --- a/packages/cli/src/setup-agents.test.ts +++ b/packages/cli/src/setup-agents.test.ts @@ -418,6 +418,11 @@ describe('setup agents', () => { label: 'Ask data questions + manage KTX with CLI commands', hint: 'Adds an admin CLI skill so agents can run ktx status, sl, wiki, and setup commands.', }, + { + value: 'skip', + label: 'Skip agent setup for now', + hint: 'Leaves agent integration incomplete. You can run ktx setup --agents later.', + }, ], }); expect(prompts.multiselect).toHaveBeenCalledWith( @@ -427,6 +432,58 @@ describe('setup agents', () => { ); }); + it('lets interactive setup skip agent integration from the connection mode prompt', async () => { + const io = makeIo(); + const prompts = { + select: vi.fn(async () => 'skip'), + multiselect: vi.fn(async () => { + throw new Error('target selection should not run'); + }), + cancel: vi.fn(), + }; + + await expect( + runKtxSetupAgentsStep( + { + projectDir: tempDir, + inputMode: 'auto', + yes: false, + agents: true, + scope: 'project', + mode: 'mcp', + skipAgents: false, + }, + io.io, + { prompts }, + ), + ).resolves.toMatchObject({ status: 'skipped', projectDir: tempDir }); + + expect(prompts.select).toHaveBeenCalledWith({ + message: 'What should agents be allowed to do with this KTX project?', + options: [ + { + value: 'mcp', + label: 'Ask data questions with KTX MCP', + hint: 'Installs the MCP connection and analytics workflow skill. Best for normal use.', + }, + { + value: 'mcp-cli', + label: 'Ask data questions + manage KTX with CLI commands', + hint: 'Adds an admin CLI skill so agents can run ktx status, sl, wiki, and setup commands.', + }, + { + value: 'skip', + label: 'Skip agent setup for now', + hint: 'Leaves agent integration incomplete. You can run ktx setup --agents later.', + }, + ], + }); + expect(prompts.multiselect).not.toHaveBeenCalled(); + expect(io.stdout()).toContain('Agent integration skipped.'); + await expect(stat(join(tempDir, '.ktx/agents/install-manifest.json'))).rejects.toThrow(); + expect(await readKtxSetupState(tempDir)).toEqual({ completed_steps: [] }); + }); + it('prompts for global scope when every selected target supports it', async () => { const home = await mkdtemp(join(tmpdir(), 'ktx-setup-agents-home-')); const previousHome = process.env.HOME; diff --git a/packages/cli/src/setup-agents.ts b/packages/cli/src/setup-agents.ts index 113718cd..99d510c5 100644 --- a/packages/cli/src/setup-agents.ts +++ b/packages/cli/src/setup-agents.ts @@ -21,6 +21,7 @@ export type KtxAgentTarget = 'claude-code' | 'claude-desktop' | 'codex' | 'curso export type KtxAgentScope = 'project' | 'global' | 'local'; /** @internal */ export type KtxAgentInstallMode = 'mcp' | 'mcp-cli'; +type KtxAgentModePromptChoice = KtxAgentInstallMode | 'skip' | 'back'; export interface KtxSetupAgentsArgs { projectDir: string; @@ -1122,9 +1123,18 @@ export async function runKtxSetupAgentsStep( label: 'Ask data questions + manage KTX with CLI commands', hint: 'Adds an admin CLI skill so agents can run ktx status, sl, wiki, and setup commands.', }, + { + value: 'skip', + label: 'Skip agent setup for now', + hint: 'Leaves agent integration incomplete. You can run ktx setup --agents later.', + }, ], - })) as KtxAgentInstallMode | 'back'); + })) as KtxAgentModePromptChoice); if (mode === 'back') return { status: 'skipped', projectDir: args.projectDir }; + if (mode === 'skip') { + io.stdout.write('│ Agent integration skipped.\n'); + return { status: 'skipped', projectDir: args.projectDir }; + } const targets = args.target !== undefined diff --git a/packages/cli/src/setup-databases.test.ts b/packages/cli/src/setup-databases.test.ts index 354ba24b..57f507d5 100644 --- a/packages/cli/src/setup-databases.test.ts +++ b/packages/cli/src/setup-databases.test.ts @@ -2116,9 +2116,40 @@ describe('setup databases step', () => { expect(io.stdout()).toContain('│ Changes: 0 changes across 56 tables'); }); + function fakeHistoricSqlRunner( + dialect: 'postgres' | 'snowflake' | 'bigquery', + catalogName: string, + ) { + return { + dialect, + catalogName, + async run() { + return { warnings: [], info: [] }; + }, + formatSuccessDetail() { + return { detail: `${catalogName} ready`, warnings: [] }; + }, + fixAdvice() { + return { + failHeadline: `${catalogName} unavailable`, + remediation: 'Fix query-history grants.', + }; + }, + }; + } + it('writes query history config for supported Snowflake databases after validation succeeds', async () => { const io = makeIo(); - const historicSqlProbe = vi.fn(async () => ({ ok: true, lines: [] })); + const runner = fakeHistoricSqlRunner( + 'snowflake', + 'SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', + ); + const historicSqlReadinessProbe = vi.fn(async () => ({ + ok: true as const, + dialect: 'snowflake' as const, + runner, + result: { warnings: [], info: [] }, + })); const result = await runKtxSetupDatabasesStep( { projectDir: tempDir, @@ -2136,7 +2167,7 @@ describe('setup databases step', () => { { testConnection: vi.fn(async () => 0), scanConnection: vi.fn(async () => 0), - historicSqlProbe, + historicSqlReadinessProbe, prompts: makePromptAdapter({ selectValues: ['password'], textValues: ['env:SNOWFLAKE_ACCOUNT', 'WH', 'ANALYTICS', 'reader', ''], @@ -2144,11 +2175,11 @@ describe('setup databases step', () => { }), }, ); - expect(historicSqlProbe).toHaveBeenCalledWith( + expect(historicSqlReadinessProbe).toHaveBeenCalledWith( expect.objectContaining({ projectDir: tempDir, connectionId: 'snowflake', - dialect: 'snowflake', + connection: expect.objectContaining({ driver: 'snowflake' }), }), ); @@ -2245,7 +2276,15 @@ describe('setup databases step', () => { { testConnection: vi.fn(async () => 0), scanConnection: vi.fn(async () => 0), - historicSqlProbe: vi.fn(async () => ({ ok: true, lines: [' OK pg_stat_statements ready (PostgreSQL 16.4)'] })), + historicSqlReadinessProbe: vi.fn(async () => { + const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements'); + return { + ok: true as const, + dialect: 'postgres' as const, + runner, + result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] }, + }; + }), }, ); @@ -2315,7 +2354,13 @@ describe('setup databases step', () => { ); const io = makeIo(); const prompts = makePromptAdapter({ selectValues: ['yes', 'deep'] }); - const historicSqlProbe = vi.fn(async () => ({ ok: true, lines: [] })); + const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements'); + const historicSqlReadinessProbe = vi.fn(async () => ({ + ok: true as const, + dialect: 'postgres' as const, + runner, + result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] }, + })); const result = await runKtxSetupDatabasesStep( { @@ -2330,7 +2375,7 @@ describe('setup databases step', () => { prompts, testConnection: vi.fn(async () => 0), scanConnection: vi.fn(async () => 0), - historicSqlProbe, + historicSqlReadinessProbe, }, ); @@ -2349,11 +2394,13 @@ describe('setup databases step', () => { message: expect.stringContaining('How much database context should KTX build?'), }), ); - expect(historicSqlProbe).toHaveBeenCalledWith({ - projectDir: tempDir, - connectionId: 'warehouse', - dialect: 'postgres', - }); + expect(historicSqlReadinessProbe).toHaveBeenCalledWith( + expect.objectContaining({ + projectDir: tempDir, + connectionId: 'warehouse', + connection: expect.objectContaining({ driver: 'postgres' }), + }), + ); const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); expect(config.connections.warehouse).toMatchObject({ context: { @@ -2381,6 +2428,13 @@ describe('setup databases step', () => { 'utf-8', ); const io = makeIo(); + const runner = fakeHistoricSqlRunner('bigquery', 'INFORMATION_SCHEMA.JOBS_BY_PROJECT'); + const historicSqlReadinessProbe = vi.fn(async () => ({ + ok: true as const, + dialect: 'bigquery' as const, + runner, + result: { warnings: [], info: [] }, + })); const result = await runKtxSetupDatabasesStep( { @@ -2396,10 +2450,18 @@ describe('setup databases step', () => { { testConnection: vi.fn(async () => 0), scanConnection: vi.fn(async () => 0), + historicSqlReadinessProbe, }, ); expect(result.status).toBe('ready'); + expect(historicSqlReadinessProbe).toHaveBeenCalledWith( + expect.objectContaining({ + projectDir: tempDir, + connectionId: 'analytics', + connection: expect.objectContaining({ driver: 'bigquery' }), + }), + ); const configText = await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'); const config = parseKtxProjectConfig(configText); expect(config.connections.analytics).toMatchObject({ @@ -2420,6 +2482,71 @@ describe('setup databases step', () => { expect(config.ingest.adapters).toEqual([]); }); + it('prints a non-blocking BigQuery query history probe failure with the grants remediation', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'connections:', + ' analytics:', + ' driver: bigquery', + ' dataset_id: analytics', + ' credentials_json: env:BIGQUERY_CREDENTIALS_JSON', + '', + ].join('\n'), + 'utf-8', + ); + const io = makeIo(); + const runner = { + ...fakeHistoricSqlRunner('bigquery', 'INFORMATION_SCHEMA.JOBS_BY_PROJECT'), + fixAdvice: () => ({ + failHeadline: 'BigQuery principal cannot read INFORMATION_SCHEMA.JOBS_BY_PROJECT', + remediation: + 'Grant roles/bigquery.resourceViewer on the BigQuery project, or grant a custom role containing bigquery.jobs.listAll.', + }), + }; + const error = new Error('access denied'); + const historicSqlReadinessProbe = vi.fn(async () => ({ + ok: false as const, + dialect: 'bigquery' as const, + runner, + error, + })); + + const result = await runKtxSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseConnectionIds: ['analytics'], + databaseSchemas: [], + enableQueryHistory: true, + queryHistoryWindowDays: 45, + skipDatabases: false, + }, + io.io, + { + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async () => 0), + historicSqlReadinessProbe, + }, + ); + + expect(result.status).toBe('ready'); + expect(historicSqlReadinessProbe).toHaveBeenCalledWith( + expect.objectContaining({ + projectDir: tempDir, + connectionId: 'analytics', + connection: expect.objectContaining({ driver: 'bigquery' }), + }), + ); + expect(io.stdout()).toContain('Query history probe...'); + expect(io.stdout()).toContain( + 'BigQuery principal cannot read INFORMATION_SCHEMA.JOBS_BY_PROJECT', + ); + expect(io.stdout()).toContain('roles/bigquery.resourceViewer'); + expect(io.stdout()).toContain('bigquery.jobs.listAll'); + expect(io.stdout()).toContain('Setup written; query history will be skipped until fixed.'); + }); + it('enables query history on an existing Postgres connection', async () => { await writeFile( join(tempDir, 'ktx.yaml'), @@ -2448,7 +2575,15 @@ describe('setup databases step', () => { { testConnection: vi.fn(async () => 0), scanConnection: vi.fn(async () => 0), - historicSqlProbe: vi.fn(async () => ({ ok: true, lines: [' OK pg_stat_statements ready (PostgreSQL 16.4)'] })), + historicSqlReadinessProbe: vi.fn(async () => { + const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements'); + return { + ok: true as const, + dialect: 'postgres' as const, + runner, + result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] }, + }; + }), }, ); @@ -2465,17 +2600,104 @@ describe('setup databases step', () => { }, }, }); + expect(config.connections.warehouse.historicSql).toBeUndefined(); + }); + + it('migrates legacy historicSql to context.queryHistory during database setup', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'connections:', + ' warehouse:', + ' driver: postgres', + ' readonly: true', + ' historicSql:', + ' enabled: true', + ' dialect: postgres', + ' windowDays: 45', + ' minExecutions: 9', + ' concurrency: 3', + ' staleArchiveAfterDays: 120', + ' filters:', + ' dropTrivialProbes: true', + ' serviceAccounts:', + ' mode: exclude', + ' patterns:', + " - '^svc_'", + ' orchestrators:', + ' mode: exclude', + ' patterns:', + ' - airflow', + ' dropFailedBelow: 2', + ' redactionPatterns:', + " - '(?i)secret'", + '', + ].join('\n'), + 'utf-8', + ); + + const io = makeIo(); + + await expect( + runKtxSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseConnectionIds: ['warehouse'], + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async () => 0), + historicSqlReadinessProbe: vi.fn(async () => { + const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements'); + return { + ok: true as const, + dialect: 'postgres' as const, + runner, + result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] }, + }; + }), + }, + ), + ).resolves.toMatchObject({ status: 'ready' }); + + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections.warehouse.historicSql).toBeUndefined(); + expect(config.connections.warehouse.context).toMatchObject({ + queryHistory: { + enabled: true, + windowDays: 45, + minExecutions: 9, + concurrency: 3, + staleArchiveAfterDays: 120, + filters: { + dropTrivialProbes: true, + serviceAccounts: { mode: 'exclude', patterns: ['^svc_'] }, + orchestrators: { mode: 'exclude', patterns: ['airflow'] }, + dropFailedBelow: 2, + }, + redactionPatterns: ['(?i)secret'], + }, + }); }); it('prints a non-blocking Postgres query history probe failure after connection test succeeds', async () => { const io = makeIo(); - const historicSqlProbe = vi.fn(async () => ({ - ok: false, - lines: [ - ' FAIL pg_stat_statements extension is not installed in the connection database', - ' Fix: Run (against this database): CREATE EXTENSION pg_stat_statements;', - " Fix: Ensure shared_preload_libraries includes 'pg_stat_statements'.", - ], + const runner = { + ...fakeHistoricSqlRunner('postgres', 'pg_stat_statements'), + fixAdvice: () => ({ + failHeadline: 'pg_stat_statements extension is not installed in the connection database', + remediation: 'Run (against this database): CREATE EXTENSION pg_stat_statements;', + }), + }; + const historicSqlReadinessProbe = vi.fn(async () => ({ + ok: false as const, + dialect: 'postgres' as const, + runner, + error: new Error('missing extension'), })); const result = await runKtxSetupDatabasesStep( @@ -2493,16 +2715,16 @@ describe('setup databases step', () => { { testConnection: vi.fn(async () => 0), scanConnection: vi.fn(async () => 0), - historicSqlProbe, + historicSqlReadinessProbe, }, ); expect(result.status).toBe('ready'); - expect(historicSqlProbe).toHaveBeenCalledWith( + expect(historicSqlReadinessProbe).toHaveBeenCalledWith( expect.objectContaining({ projectDir: tempDir, connectionId: 'warehouse', - dialect: 'postgres', + connection: expect.objectContaining({ driver: 'postgres' }), }), ); expect(io.stdout()).toContain('Query history probe...'); @@ -2513,12 +2735,19 @@ describe('setup databases step', () => { it('prints a non-blocking Snowflake query history probe failure with the grants remediation', async () => { const io = makeIo(); - const historicSqlProbe = vi.fn(async () => ({ - ok: false, - lines: [ - ' FAIL Snowflake role cannot read SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', - ' Fix: Run (as ACCOUNTADMIN): GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE ;', - ], + const runner = { + ...fakeHistoricSqlRunner('snowflake', 'SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY'), + fixAdvice: () => ({ + failHeadline: 'Snowflake role cannot read SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', + remediation: + 'Run (as ACCOUNTADMIN): GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE ;', + }), + }; + const historicSqlReadinessProbe = vi.fn(async () => ({ + ok: false as const, + dialect: 'snowflake' as const, + runner, + error: new Error('role cannot read SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY'), })); const result = await runKtxSetupDatabasesStep( @@ -2535,7 +2764,7 @@ describe('setup databases step', () => { { testConnection: vi.fn(async () => 0), scanConnection: vi.fn(async () => 0), - historicSqlProbe, + historicSqlReadinessProbe, prompts: makePromptAdapter({ textValues: ['env:SNOWFLAKE_ACCOUNT', 'WH', 'ANALYTICS', 'reader', ''], passwordValues: ['env:SNOWFLAKE_PASSWORD'], @@ -2544,11 +2773,11 @@ describe('setup databases step', () => { ); expect(result.status).toBe('ready'); - expect(historicSqlProbe).toHaveBeenCalledWith( + expect(historicSqlReadinessProbe).toHaveBeenCalledWith( expect.objectContaining({ projectDir: tempDir, connectionId: 'warehouse', - dialect: 'snowflake', + connection: expect.objectContaining({ driver: 'snowflake' }), }), ); expect(io.stdout()).toContain('Query history probe...'); @@ -2559,7 +2788,15 @@ describe('setup databases step', () => { it('does not run the query history probe when the regular connection test fails', async () => { const io = makeIo(); - const historicSqlProbe = vi.fn(async () => ({ ok: true, lines: [] })); + const historicSqlReadinessProbe = vi.fn(async () => { + const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements'); + return { + ok: true as const, + dialect: 'postgres' as const, + runner, + result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] }, + }; + }); const result = await runKtxSetupDatabasesStep( { @@ -2576,12 +2813,12 @@ describe('setup databases step', () => { { testConnection: vi.fn(async () => 1), scanConnection: vi.fn(async () => 0), - historicSqlProbe, + historicSqlReadinessProbe, }, ); expect(result.status).toBe('failed'); - expect(historicSqlProbe).not.toHaveBeenCalled(); + expect(historicSqlReadinessProbe).not.toHaveBeenCalled(); }); it('returns missing input when non-interactive database flags are incomplete', async () => { diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index 7781610c..ec2f017f 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -3,7 +3,13 @@ import { readFile, writeFile } from 'node:fs/promises'; import { delimiter, dirname, join } from 'node:path'; import { fileURLToPath } from 'node:url'; import { promisify } from 'node:util'; +import { queryHistoryDialectForConnection } from './context/ingest/adapters/historic-sql/connection-dialect.js'; import type { HistoricSqlDialect } from './context/ingest/adapters/historic-sql/types.js'; +import { + runHistoricSqlReadinessProbe, + type HistoricSqlProbeOutcome, + type HistoricSqlReadinessProbe, +} from './context/ingest/historic-sql-probes.js'; import { type KtxProjectConnectionConfig, serializeKtxProjectConfig } from './context/project/config.js'; import { loadKtxProject } from './context/project/project.js'; import { markKtxSetupStateStepComplete, setKtxSetupDatabaseConnectionIds } from './context/project/setup-config.js'; @@ -89,19 +95,11 @@ export interface KtxSetupDatabasesPromptAdapter { cancel(message: string): void; } -interface KtxSetupHistoricSqlProbeInput { - projectDir: string; - connectionId: string; - dialect: HistoricSqlDialect; -} - interface KtxSetupHistoricSqlProbeResult { ok: boolean; lines: string[]; } -type KtxSetupHistoricSqlProbe = (input: KtxSetupHistoricSqlProbeInput) => Promise; - export interface KtxSetupDatabasesDeps { prompts?: KtxSetupDatabasesPromptAdapter; testConnection?: (projectDir: string, connectionId: string, io: KtxCliIo) => Promise; @@ -110,7 +108,7 @@ export interface KtxSetupDatabasesDeps { listSchemas?: (projectDir: string, connectionId: string) => Promise; listTables?: (projectDir: string, connectionId: string, schemas?: string[]) => Promise; pickDatabaseScope?: (args: PickDatabaseScopeArgs, io: KtxCliIo) => Promise; - historicSqlProbe?: KtxSetupHistoricSqlProbe; + historicSqlReadinessProbe?: HistoricSqlReadinessProbe; } const DRIVER_OPTIONS: Array<{ value: KtxSetupDatabaseDriver; label: string }> = [ @@ -265,6 +263,8 @@ function createPromptAdapter(): KtxSetupDatabasesPromptAdapter { function normalizeDriver(driver: string | undefined): KtxSetupDatabaseDriver | null { const normalized = String(driver ?? '').toLowerCase(); + if (normalized === 'postgresql') return 'postgres'; + if (normalized === 'sqlite3') return 'sqlite'; return DRIVER_OPTIONS.some((option) => option.value === normalized) ? (normalized as KtxSetupDatabaseDriver) : null; } @@ -288,6 +288,13 @@ function numberConfigField(connection: KtxProjectConnectionConfig | undefined, f return typeof value === 'number' && Number.isFinite(value) ? value : undefined; } +function historicSqlConfigRecord(connection: KtxProjectConnectionConfig | undefined): Record | null { + const historicSql = connection?.historicSql; + return historicSql && typeof historicSql === 'object' && !Array.isArray(historicSql) + ? (historicSql as Record) + : null; +} + function contextRecord(connection: KtxProjectConnectionConfig | undefined): Record { const context = connection?.context; return context && typeof context === 'object' && !Array.isArray(context) ? (context as Record) : {}; @@ -300,12 +307,19 @@ function queryHistoryConfigRecord(connection: KtxProjectConnectionConfig | undef : null; } +function stripLegacyHistoricSql(connection: KtxProjectConnectionConfig): KtxProjectConnectionConfig { + const { historicSql: _historicSql, ...rest } = connection as KtxProjectConnectionConfig & { + historicSql?: unknown; + }; + return rest; +} + function withQueryHistoryConfig( connection: KtxProjectConnectionConfig, queryHistory: Record, ): KtxProjectConnectionConfig { return { - ...connection, + ...stripLegacyHistoricSql(connection), context: { ...contextRecord(connection), queryHistory, @@ -313,121 +327,34 @@ function withQueryHistoryConfig( }; } -function historicSqlProbeFailureLines(error: unknown): string[] { - if (error instanceof Error && error.name === 'HistoricSqlExtensionMissingError') { - return [ - ' FAIL pg_stat_statements extension is not installed in the connection database', - ' Fix: Run (against this database): CREATE EXTENSION pg_stat_statements;', - " Fix: Ensure shared_preload_libraries includes 'pg_stat_statements'.", - ]; +function migrateLegacyHistoricSqlConnection(connection: KtxProjectConnectionConfig): KtxProjectConnectionConfig { + const existingQueryHistory = queryHistoryConfigRecord(connection); + const legacy = historicSqlConfigRecord(connection); + if (existingQueryHistory || !legacy) { + return existingQueryHistory ? stripLegacyHistoricSql(connection) : connection; } - if (error instanceof Error && error.name === 'HistoricSqlGrantsMissingError') { - const dialect = (error as { dialect?: unknown }).dialect; - if (dialect === 'snowflake') { - return [ - ' FAIL Snowflake role cannot read SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', - ' Fix: Run (as ACCOUNTADMIN): GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE ;', - ]; - } - return [ - ' FAIL Postgres connection role lacks pg_read_all_stats', - ' Fix: Run: GRANT pg_read_all_stats TO ;', - ]; - } - if (error instanceof Error && error.name === 'HistoricSqlVersionUnsupportedError') { - return [` FAIL ${error.message}`]; - } - return [` FAIL Query history probe failed: ${error instanceof Error ? error.message : String(error)}`]; + const { dialect: _dialect, ...queryHistory } = legacy; + return withQueryHistoryConfig(connection, queryHistory); } -async function defaultHistoricSqlProbe(input: KtxSetupHistoricSqlProbeInput): Promise { - if (input.dialect === 'postgres') { - return probePostgresHistoricSql(input); +function setupHistoricSqlProbeResult( + outcome: HistoricSqlProbeOutcome | null, +): KtxSetupHistoricSqlProbeResult { + if (!outcome) { + return { ok: true, lines: [] }; } - if (input.dialect === 'snowflake') { - return probeSnowflakeHistoricSql(input); - } - return { ok: true, lines: [] }; -} - -async function probePostgresHistoricSql( - input: KtxSetupHistoricSqlProbeInput, -): Promise { - const project = await loadKtxProject({ projectDir: input.projectDir }); - const connection = project.config.connections[input.connectionId]; - const [{ PostgresPgssReader }, { KtxPostgresHistoricSqlQueryClient }, { isKtxPostgresConnectionConfig }] = - await Promise.all([ - import('./context/ingest/adapters/historic-sql/postgres-pgss-reader.js'), - import('./connectors/postgres/historic-sql-query-client.js'), - import('./connectors/postgres/connector.js'), - ]); - - const postgresConnection = connection as Parameters[0]; - if (!isKtxPostgresConnectionConfig(postgresConnection)) { - return { - ok: false, - lines: [` FAIL Connection ${input.connectionId} is not a native Postgres connection.`], - }; - } - - const client = new KtxPostgresHistoricSqlQueryClient({ - connectionId: input.connectionId, - connection: postgresConnection, - }); - try { - const result = await new PostgresPgssReader().probe(client); + if (outcome.ok) { + const { detail, warnings } = outcome.runner.formatSuccessDetail(outcome.result); return { ok: true, - lines: [ - ` OK pg_stat_statements ready (${result.pgServerVersion})`, - ...result.warnings.map((warning: string) => ` ! ${warning}`), - ], - }; - } catch (error) { - return { ok: false, lines: historicSqlProbeFailureLines(error) }; - } finally { - await client.cleanup(); - } -} - -async function probeSnowflakeHistoricSql( - input: KtxSetupHistoricSqlProbeInput, -): Promise { - const project = await loadKtxProject({ projectDir: input.projectDir }); - const connection = project.config.connections[input.connectionId]; - const [{ SnowflakeHistoricSqlQueryHistoryReader }, { KtxSnowflakeHistoricSqlQueryClient }, { isKtxSnowflakeConnectionConfig }] = - await Promise.all([ - import('./context/ingest/adapters/historic-sql/snowflake-query-history-reader.js'), - import('./connectors/snowflake/historic-sql-query-client.js'), - import('./connectors/snowflake/connector.js'), - ]); - - if (!isKtxSnowflakeConnectionConfig(connection)) { - return { - ok: false, - lines: [` FAIL Connection ${input.connectionId} is not a native Snowflake connection.`], + lines: [` OK ${detail}`, ...warnings.map((warning) => ` ! ${warning}`)], }; } - - const client = new KtxSnowflakeHistoricSqlQueryClient({ - connectionId: input.connectionId, - connection, - projectDir: input.projectDir, - }); - try { - const result = await new SnowflakeHistoricSqlQueryHistoryReader().probe(client); - return { - ok: true, - lines: [ - ' OK SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY accessible', - ...result.warnings.map((warning: string) => ` ! ${warning}`), - ], - }; - } catch (error) { - return { ok: false, lines: historicSqlProbeFailureLines(error) }; - } finally { - await client.cleanup(); - } + const advice = outcome.runner.fixAdvice(outcome.error); + return { + ok: false, + lines: [` FAIL ${advice.failHeadline}`, ` Fix: ${advice.remediation}`], + }; } async function defaultListSchemas(projectDir: string, connectionId: string): Promise { @@ -1717,7 +1644,18 @@ async function ensureHistoricSqlIngestDefaults(projectDir: string): Promise { const project = await loadKtxProject({ projectDir }); - const config = setKtxSetupDatabaseConnectionIds(project.config, unique(connectionIds)); + const config = setKtxSetupDatabaseConnectionIds( + { + ...project.config, + connections: Object.fromEntries( + Object.entries(project.config.connections).map(([connectionId, connection]) => [ + connectionId, + migrateLegacyHistoricSqlConnection(connection), + ]), + ), + }, + unique(connectionIds), + ); await writeFile(project.configPath, serializeKtxProjectConfig(config), 'utf-8'); await markKtxSetupStateStepComplete(projectDir, 'databases'); } @@ -1730,24 +1668,28 @@ async function maybeRunHistoricSqlSetupProbe(input: { }): Promise { const project = await loadKtxProject({ projectDir: input.projectDir }); const connection = project.config.connections[input.connectionId]; - const queryHistory = queryHistoryConfigRecord(connection); - const driver = normalizeDriver(connection?.driver); + const queryHistory = queryHistoryConfigRecord(connection) ?? historicSqlConfigRecord(connection); if (queryHistory?.enabled !== true) { return; } - const dialect: 'postgres' | 'snowflake' | null = - driver === 'postgres' ? 'postgres' : driver === 'snowflake' ? 'snowflake' : null; + if (!connection) { + return; + } + const dialect = queryHistoryDialectForConnection(connection); if (!dialect) { return; } input.io.stdout.write('│ Query history probe...\n'); - const probe = input.deps.historicSqlProbe ?? defaultHistoricSqlProbe; - const result = await probe({ - projectDir: input.projectDir, - connectionId: input.connectionId, - dialect, - }); + const probe = input.deps.historicSqlReadinessProbe ?? runHistoricSqlReadinessProbe; + const result = setupHistoricSqlProbeResult( + await probe({ + projectDir: input.projectDir, + connectionId: input.connectionId, + connection, + env: process.env, + }), + ); for (const line of result.lines) { input.io.stdout.write(`│${line}\n`); } diff --git a/packages/cli/src/status-project.test.ts b/packages/cli/src/status-project.test.ts index 83862bfb..8f35cfe8 100644 --- a/packages/cli/src/status-project.test.ts +++ b/packages/cli/src/status-project.test.ts @@ -197,26 +197,58 @@ function withMysqlQueryHistory(config: KtxProjectConfig): KtxProjectConfig { }; } +function fakeStatusRunner( + dialect: 'postgres' | 'snowflake' | 'bigquery', + catalogName: string, +) { + return { + dialect, + catalogName, + async run() { + return { warnings: [], info: [] }; + }, + formatSuccessDetail(result: unknown) { + const typed = result as { warnings: string[]; info?: string[]; pgServerVersion?: string }; + const info = typed.info && typed.info.length > 0 ? `; ${typed.info.join('; ')}` : ''; + const base = + dialect === 'postgres' + ? `pg_stat_statements ready (${typed.pgServerVersion ?? 'PostgreSQL 16.4'})` + : `${catalogName} ready`; + return { detail: `${base}${info}`, warnings: typed.warnings }; + }, + fixAdvice(error: unknown) { + return { + failHeadline: error instanceof Error ? error.message : String(error), + remediation: 'Fix query-history grants.', + }; + }, + }; +} + describe('buildProjectStatus query history dispatch', () => { - it('runs the snowflake probe for snowflake connections, not the postgres one', async () => { - let postgresCalls = 0; - let snowflakeCalls = 0; + it('runs the shared probe for snowflake connections', async () => { + let probeCalls = 0; + const runner = fakeStatusRunner( + 'snowflake', + 'SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', + ); const project = projectWithConfig(withSnowflakeQueryHistory(baseProjectConfig())); const status = await buildProjectStatus(project, { claudeCodeAuthProbe: stubClaudeCodeAuthProbe, - postgresQueryHistoryProbe: async () => { - postgresCalls += 1; - throw new Error('postgres probe should not run for snowflake'); - }, - snowflakeQueryHistoryProbe: async () => { - snowflakeCalls += 1; - return { warnings: [], info: [] }; + queryHistoryReadinessProbe: async (input) => { + probeCalls += 1; + expect(input.connectionId).toBe('warehouse'); + return { + ok: true, + dialect: 'snowflake', + runner, + result: { warnings: [], info: [] }, + }; }, }); - expect(postgresCalls).toBe(0); - expect(snowflakeCalls).toBe(1); + expect(probeCalls).toBe(1); expect(status.queryHistory).toHaveLength(1); expect(status.queryHistory[0]).toMatchObject({ connection: 'warehouse', @@ -231,19 +263,21 @@ describe('buildProjectStatus query history dispatch', () => { it('reports snowflake probe failures with the reader-provided remediation', async () => { const project = projectWithConfig(withSnowflakeQueryHistory(baseProjectConfig())); - const { HistoricSqlGrantsMissingError } = await import( - './context/ingest/adapters/historic-sql/errors.js' - ); const status = await buildProjectStatus(project, { claudeCodeAuthProbe: stubClaudeCodeAuthProbe, - snowflakeQueryHistoryProbe: async () => { - throw new HistoricSqlGrantsMissingError({ - dialect: 'snowflake', - message: 'role cannot read SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', - remediation: 'GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE ktx;', - }); - }, + queryHistoryReadinessProbe: async () => ({ + ok: false, + dialect: 'snowflake', + runner: { + ...fakeStatusRunner('snowflake', 'SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY'), + fixAdvice: () => ({ + failHeadline: 'Snowflake role cannot read SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', + remediation: 'GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE ktx;', + }), + }, + error: new Error('role cannot read SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY'), + }), }); expect(status.queryHistory[0]).toMatchObject({ @@ -257,18 +291,25 @@ describe('buildProjectStatus query history dispatch', () => { }); it('runs the bigquery probe for bigquery connections', async () => { - let bigqueryCalls = 0; + let probeCalls = 0; + const runner = fakeStatusRunner('bigquery', 'INFORMATION_SCHEMA.JOBS_BY_PROJECT'); const project = projectWithConfig(withBigQueryQueryHistory(baseProjectConfig())); const status = await buildProjectStatus(project, { claudeCodeAuthProbe: stubClaudeCodeAuthProbe, - bigqueryQueryHistoryProbe: async () => { - bigqueryCalls += 1; - return { warnings: [], info: [] }; + queryHistoryReadinessProbe: async (input) => { + probeCalls += 1; + expect(input.connectionId).toBe('bq'); + return { + ok: true, + dialect: 'bigquery', + runner, + result: { warnings: [], info: [] }, + }; }, }); - expect(bigqueryCalls).toBe(1); + expect(probeCalls).toBe(1); expect(status.queryHistory[0]).toMatchObject({ connection: 'bq', driver: 'bigquery', @@ -283,7 +324,7 @@ describe('buildProjectStatus query history dispatch', () => { const status = await buildProjectStatus(project, { claudeCodeAuthProbe: stubClaudeCodeAuthProbe, - postgresQueryHistoryProbe: async () => { + queryHistoryReadinessProbe: async () => { throw new Error('postgres probe must not run for mysql'); }, }); @@ -306,7 +347,7 @@ describe('buildProjectStatus query history dispatch', () => { describe('buildProjectStatus --fast', () => { it('skips claude-code probe and Postgres query-history probe', async () => { let claudeProbeCalls = 0; - let pgProbeCalls = 0; + let queryHistoryProbeCalls = 0; const project = projectWithConfig(withPostgresQueryHistory(baseProjectConfig())); const status = await buildProjectStatus(project, { @@ -316,14 +357,14 @@ describe('buildProjectStatus --fast', () => { claudeProbeCalls += 1; return { ok: true }; }, - postgresQueryHistoryProbe: async () => { - pgProbeCalls += 1; + queryHistoryReadinessProbe: async () => { + queryHistoryProbeCalls += 1; throw new Error('should not be called'); }, }); expect(claudeProbeCalls).toBe(0); - expect(pgProbeCalls).toBe(0); + expect(queryHistoryProbeCalls).toBe(0); expect(status.llm.status).toBe('skipped'); expect(status.llm.detail).toMatch(/--fast/); expect(status.queryHistory).toHaveLength(1); @@ -340,7 +381,7 @@ describe('buildProjectStatus --fast', () => { env: { ANALYTICS_DATABASE_URL: 'postgres://example' }, fast: true, claudeCodeAuthProbe: stubClaudeCodeAuthProbe, - postgresQueryHistoryProbe: async () => { + queryHistoryReadinessProbe: async () => { throw new Error('should not be called'); }, }); diff --git a/packages/cli/src/status-project.ts b/packages/cli/src/status-project.ts index 07ccc3c6..097f4091 100644 --- a/packages/cli/src/status-project.ts +++ b/packages/cli/src/status-project.ts @@ -4,11 +4,15 @@ import { runClaudeCodeAuthProbe } from './context/llm/claude-code-runtime.js'; import type { KtxConfigIssue, KtxProjectConfig, KtxProjectConnectionConfig, KtxProjectEmbeddingConfig, KtxProjectLlmConfig } from './context/project/config.js'; import type { KtxLocalProject } from './context/project/project.js'; import { ktxLocalStateDbPath } from './context/project/local-state-db.js'; -import type { PostgresPgssProbeResult } from './context/ingest/adapters/historic-sql/types.js'; import { isQueryHistoryEnabled, queryHistoryDialectForConnection, } from './context/ingest/adapters/historic-sql/connection-dialect.js'; +import { + historicSqlProbeCatalogName, + runHistoricSqlReadinessProbe, + type HistoricSqlReadinessProbe, +} from './context/ingest/historic-sql-probes.js'; import { formatClaudeCodePromptCachingFix, formatClaudeCodePromptCachingWarning, @@ -170,6 +174,13 @@ function resolveRef(value: unknown, env: NodeJS.ProcessEnv): { resolved: string; return { resolved: trimmed, via: 'literal' }; } +function failureDetail(error: unknown): string { + if (error instanceof Error && error.message.trim().length > 0) { + return error.message.trim().split('\n')[0] ?? error.message.trim(); + } + return String(error); +} + function envHint(value: unknown): string | undefined { if (typeof value === 'string' && value.trim().startsWith('env:')) { return value.trim().slice(4).trim(); @@ -392,232 +403,6 @@ function buildConnectionStatus( } } -interface QueryHistoryProbeInput { - projectDir: string; - connectionId: string; - connection: KtxProjectConnectionConfig; - env: NodeJS.ProcessEnv; -} - -interface GenericProbeResult { - warnings: string[]; - info?: string[]; -} - -type PostgresQueryHistoryProbe = (input: QueryHistoryProbeInput) => Promise; -type SnowflakeQueryHistoryProbe = (input: QueryHistoryProbeInput) => Promise; -type BigQueryQueryHistoryProbe = (input: QueryHistoryProbeInput) => Promise; - -function failureDetail(error: unknown): string { - if (error instanceof Error && error.message.trim().length > 0) { - return error.message.trim().split('\n')[0] ?? error.message.trim(); - } - return String(error); -} - -function postgresReadinessDetail(result: PostgresPgssProbeResult): string { - const warningText = result.warnings.length > 0 ? ` with warnings: ${result.warnings.join('; ')}` : ''; - const info = result.info ?? []; - const infoText = info.length > 0 ? `; info: ${info.join('; ')}` : ''; - return `pg_stat_statements ready (${result.pgServerVersion})${warningText}${infoText}`; -} - -function genericReadinessDetail(label: string, result: GenericProbeResult): string { - const warningText = result.warnings.length > 0 ? ` with warnings: ${result.warnings.join('; ')}` : ''; - const info = result.info ?? []; - const infoText = info.length > 0 ? `; info: ${info.join('; ')}` : ''; - return `${label} ready${warningText}${infoText}`; -} - -function probeFailureFix(error: unknown, dialect: string, connectionId: string, projectDir: string): string { - if (error instanceof Error && error.name === 'HistoricSqlExtensionMissingError' && 'remediation' in error) { - return String(error.remediation); - } - if (error instanceof Error && error.name === 'HistoricSqlGrantsMissingError' && 'remediation' in error) { - return String(error.remediation); - } - if (error instanceof Error && error.name === 'HistoricSqlVersionUnsupportedError') { - return 'Use PostgreSQL 14 or newer, or disable query history for this connection'; - } - return `Fix connections.${connectionId} ${dialect} settings, then rerun \`ktx status --project-dir ${projectDir}\``; -} - -async function defaultPostgresQueryHistoryProbe( - input: QueryHistoryProbeInput, -): Promise { - const [{ PostgresPgssReader }, { KtxPostgresHistoricSqlQueryClient }, { isKtxPostgresConnectionConfig }] = - await Promise.all([ - import('./context/ingest/adapters/historic-sql/postgres-pgss-reader.js'), - import('./connectors/postgres/historic-sql-query-client.js'), - import('./connectors/postgres/connector.js'), - ]); - - const inputDriver = input.connection.driver ?? 'unknown'; - if (!isKtxPostgresConnectionConfig(input.connection)) { - throw new Error(`Native PostgreSQL connector cannot run driver "${inputDriver}"`); - } - - const client = new KtxPostgresHistoricSqlQueryClient({ - connectionId: input.connectionId, - connection: input.connection, - env: input.env, - }); - try { - return await new PostgresPgssReader().probe(client); - } finally { - await client.cleanup(); - } -} - -async function defaultSnowflakeQueryHistoryProbe( - input: QueryHistoryProbeInput, -): Promise { - const [{ SnowflakeHistoricSqlQueryHistoryReader }, { KtxSnowflakeHistoricSqlQueryClient }, { isKtxSnowflakeConnectionConfig }] = - await Promise.all([ - import('./context/ingest/adapters/historic-sql/snowflake-query-history-reader.js'), - import('./connectors/snowflake/historic-sql-query-client.js'), - import('./connectors/snowflake/connector.js'), - ]); - - const inputDriver = input.connection.driver ?? 'unknown'; - if (!isKtxSnowflakeConnectionConfig(input.connection)) { - throw new Error(`Native Snowflake connector cannot run driver "${inputDriver}"`); - } - - const client = new KtxSnowflakeHistoricSqlQueryClient({ - connectionId: input.connectionId, - connection: input.connection, - projectDir: input.projectDir, - env: input.env, - }); - try { - return await new SnowflakeHistoricSqlQueryHistoryReader().probe(client); - } finally { - await client.cleanup(); - } -} - -async function defaultBigQueryQueryHistoryProbe( - input: QueryHistoryProbeInput, -): Promise { - const [ - { BigQueryHistoricSqlQueryHistoryReader }, - { KtxBigQueryScanConnector, isKtxBigQueryConnectionConfig }, - { resolveKtxConfigReference }, - ] = await Promise.all([ - import('./context/ingest/adapters/historic-sql/bigquery-query-history-reader.js'), - import('./connectors/bigquery/connector.js'), - import('./context/core/config-reference.js'), - ]); - - const inputDriver = input.connection.driver ?? 'unknown'; - if (!isKtxBigQueryConnectionConfig(input.connection)) { - throw new Error(`Native BigQuery connector cannot run driver "${inputDriver}"`); - } - - const rawCredentials = typeof input.connection.credentials_json === 'string' ? input.connection.credentials_json : ''; - const resolvedCredentials = resolveKtxConfigReference(rawCredentials, input.env); - if (!resolvedCredentials) { - throw new Error(`Query history BigQuery connection ${input.connectionId} requires credentials_json`); - } - const parsed = JSON.parse(resolvedCredentials) as { project_id?: unknown }; - if (typeof parsed.project_id !== 'string' || parsed.project_id.trim().length === 0) { - throw new Error(`Query history BigQuery connection ${input.connectionId} requires credentials_json.project_id`); - } - const region = - typeof input.connection.location === 'string' && input.connection.location.trim().length > 0 - ? input.connection.location.trim() - : 'us'; - - const connector = new KtxBigQueryScanConnector({ - connectionId: input.connectionId, - connection: input.connection, - }); - try { - return await new BigQueryHistoricSqlQueryHistoryReader({ - projectId: parsed.project_id, - region, - }).probe({ - async executeQuery(sql: string) { - const result = await connector.executeReadOnly({ connectionId: input.connectionId, sql }, {} as never); - return { - headers: result.headers, - rows: result.rows, - totalRows: result.totalRows, - }; - }, - }); - } finally { - await connector.cleanup(); - } -} - -interface DispatchedProbe { - label: string; - spinnerLabel: string; - fastSkipDetail: string; - run: () => Promise<{ status: ProjectStatusLevel; detail: string; fix?: string }>; -} - -function postgresProbeDispatch( - input: QueryHistoryProbeInput, - probe: PostgresQueryHistoryProbe, -): DispatchedProbe { - return { - label: 'postgres', - spinnerLabel: `Probing pg_stat_statements on ${input.connectionId}`, - fastSkipDetail: 'pg_stat_statements probe skipped (--fast)', - run: async () => { - const result = await probe(input); - return { - status: result.warnings.length > 0 ? 'warn' : 'ok', - detail: postgresReadinessDetail(result), - ...(result.warnings.length > 0 - ? { - fix: `Update the Postgres parameter group or config, then rerun \`ktx status --project-dir ${input.projectDir}\``, - } - : {}), - }; - }, - }; -} - -function snowflakeProbeDispatch( - input: QueryHistoryProbeInput, - probe: SnowflakeQueryHistoryProbe, -): DispatchedProbe { - return { - label: 'snowflake', - spinnerLabel: `Probing SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY on ${input.connectionId}`, - fastSkipDetail: 'SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY probe skipped (--fast)', - run: async () => { - const result = await probe(input); - return { - status: result.warnings.length > 0 ? 'warn' : 'ok', - detail: genericReadinessDetail('SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', result), - }; - }, - }; -} - -function bigqueryProbeDispatch( - input: QueryHistoryProbeInput, - probe: BigQueryQueryHistoryProbe, -): DispatchedProbe { - return { - label: 'bigquery', - spinnerLabel: `Probing INFORMATION_SCHEMA.JOBS_BY_PROJECT on ${input.connectionId}`, - fastSkipDetail: 'INFORMATION_SCHEMA.JOBS_BY_PROJECT probe skipped (--fast)', - run: async () => { - const result = await probe(input); - return { - status: result.warnings.length > 0 ? 'warn' : 'ok', - detail: genericReadinessDetail('INFORMATION_SCHEMA.JOBS_BY_PROJECT', result), - }; - }, - }; -} - async function buildQueryHistoryStatus( project: KtxLocalProject, options: BuildProjectStatusOptions, @@ -626,9 +411,7 @@ async function buildQueryHistoryStatus( .filter(([, connection]) => isQueryHistoryEnabled(connection)) .sort(([left], [right]) => left.localeCompare(right)); - const postgresProbe = options.postgresQueryHistoryProbe ?? defaultPostgresQueryHistoryProbe; - const snowflakeProbe = options.snowflakeQueryHistoryProbe ?? defaultSnowflakeQueryHistoryProbe; - const bigqueryProbe = options.bigqueryQueryHistoryProbe ?? defaultBigQueryQueryHistoryProbe; + const probe = options.queryHistoryReadinessProbe ?? runHistoricSqlReadinessProbe; const env = options.env ?? process.env; const statuses: QueryHistoryStatus[] = []; @@ -648,18 +431,7 @@ async function buildQueryHistoryStatus( continue; } - const probeInput: QueryHistoryProbeInput = { - projectDir: project.projectDir, - connectionId, - connection, - env, - }; - const dispatched = - dialect === 'postgres' - ? postgresProbeDispatch(probeInput, postgresProbe) - : dialect === 'snowflake' - ? snowflakeProbeDispatch(probeInput, snowflakeProbe) - : bigqueryProbeDispatch(probeInput, bigqueryProbe); + const catalogName = historicSqlProbeCatalogName(dialect); if (options.fast === true) { statuses.push({ @@ -667,29 +439,61 @@ async function buildQueryHistoryStatus( driver, dialect, status: 'skipped', - detail: dispatched.fastSkipDetail, + detail: `${catalogName} probe skipped (--fast)`, }); continue; } - try { - const outcome = await withSpinner(options.useSpinner === true, dispatched.spinnerLabel, dispatched.run); + const outcome = await withSpinner( + options.useSpinner === true, + `Probing ${catalogName} on ${connectionId}`, + () => + probe({ + projectDir: project.projectDir, + connectionId, + connection, + env, + }), + ); + + if (!outcome) { statuses.push({ connection: connectionId, driver, - dialect, - ...outcome, - }); - } catch (error) { - statuses.push({ - connection: connectionId, - driver, - dialect, + dialect: driver, status: 'fail', - detail: failureDetail(error), - fix: probeFailureFix(error, dispatched.label, connectionId, project.projectDir), + detail: `query history is not supported for driver "${driver}"`, + fix: `Disable connections.${connectionId}.context.queryHistory, or use a postgres, snowflake, or bigquery connection`, }); + continue; } + + if (outcome.ok) { + const { detail, warnings } = outcome.runner.formatSuccessDetail(outcome.result); + statuses.push({ + connection: connectionId, + driver, + dialect, + status: warnings.length > 0 ? 'warn' : 'ok', + detail, + ...(dialect === 'postgres' && warnings.length > 0 + ? { + fix: `Update the Postgres parameter group or config, then rerun \`ktx status --project-dir ${project.projectDir}\``, + } + : {}), + }); + continue; + } + + const advice = outcome.runner.fixAdvice(outcome.error); + statuses.push({ + connection: connectionId, + driver, + dialect, + status: 'fail', + detail: advice.failHeadline, + fix: advice.remediation, + }); } return statuses; @@ -828,9 +632,7 @@ function buildVerdict( export interface BuildProjectStatusOptions { env?: NodeJS.ProcessEnv; - postgresQueryHistoryProbe?: PostgresQueryHistoryProbe; - snowflakeQueryHistoryProbe?: SnowflakeQueryHistoryProbe; - bigqueryQueryHistoryProbe?: BigQueryQueryHistoryProbe; + queryHistoryReadinessProbe?: HistoricSqlReadinessProbe; claudeCodeAuthProbe?: ClaudeCodeAuthProbe; configIssues?: KtxConfigIssue[]; fast?: boolean; diff --git a/uv.lock b/uv.lock index 9c580fbf..7c2c368f 100644 --- a/uv.lock +++ b/uv.lock @@ -458,7 +458,7 @@ wheels = [ [[package]] name = "ktx-daemon" -version = "0.4.1" +version = "0.5.0" source = { editable = "python/ktx-daemon" } dependencies = [ { name = "fastapi" }, @@ -515,7 +515,7 @@ dev = [ [[package]] name = "ktx-sl" -version = "0.4.1" +version = "0.5.0" source = { editable = "python/ktx-sl" } dependencies = [ { name = "pydantic" },