From c87d14a554fd04424da20196b9b4e41dd472374d Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Fri, 22 May 2026 14:22:11 +0200 Subject: [PATCH] feat(cli): redesign database scope picker for searchable schema-first setup (#203) * feat: add searchable setup prompt pickers * fix: make snowflake scope discovery single query * fix: make bigquery table discovery schema scoped * fix: honor mysql and clickhouse database scope * feat: wire schema scope discovery for all relational setup drivers * feat: add schema-first database scope picker * test: update setup prompt stubs for type-check * docs: document database scope picker fields * Fix database setup edit preservation --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> --- .../content/docs/cli-reference/ktx-setup.mdx | 6 +- .../content/docs/configuration/ktx-yaml.mdx | 27 +- .../docs/integrations/primary-sources.mdx | 68 ++++- .../src/connectors/bigquery/connector.test.ts | 83 ++++++ .../cli/src/connectors/bigquery/connector.ts | 59 ++-- .../connectors/clickhouse/connector.test.ts | 74 ++++- .../src/connectors/clickhouse/connector.ts | 79 +++-- .../src/connectors/mysql/connector.test.ts | 106 +++++++ .../cli/src/connectors/mysql/connector.ts | 106 ++++--- .../connectors/snowflake/connector.test.ts | 69 +++++ .../cli/src/connectors/snowflake/connector.ts | 90 ++++-- .../connectors/snowflake/identifiers.test.ts | 18 ++ .../src/connectors/snowflake/identifiers.ts | 14 + .../connections/bigquery-identifiers.test.ts | 19 ++ .../connections/bigquery-identifiers.ts | 17 ++ .../bigquery-query-history-reader.ts | 20 +- packages/cli/src/database-tree-picker.test.ts | 130 ++++++-- packages/cli/src/database-tree-picker.ts | 129 +++++++- packages/cli/src/local-adapters.test.ts | 34 +++ packages/cli/src/local-adapters.ts | 6 +- packages/cli/src/setup-databases.test.ts | 199 ++++++++++++- packages/cli/src/setup-databases.ts | 279 +++++++++++------- packages/cli/src/setup-models.test.ts | 85 +++--- packages/cli/src/setup-models.ts | 14 +- packages/cli/src/setup-prompts.test.ts | 52 ++++ packages/cli/src/setup-prompts.ts | 64 ++++ packages/cli/src/setup-sources-notion.test.ts | 1 + packages/cli/src/setup-sources.test.ts | 4 +- packages/cli/src/setup-sources.ts | 8 +- packages/cli/src/setup.test.ts | 1 + 30 files changed, 1530 insertions(+), 331 deletions(-) create mode 100644 packages/cli/src/connectors/snowflake/identifiers.test.ts create mode 100644 packages/cli/src/connectors/snowflake/identifiers.ts create mode 100644 packages/cli/src/context/connections/bigquery-identifiers.test.ts create mode 100644 packages/cli/src/context/connections/bigquery-identifiers.ts diff --git a/docs-site/content/docs/cli-reference/ktx-setup.mdx b/docs-site/content/docs/cli-reference/ktx-setup.mdx index 87fcddaa..a52a3eba 100644 --- a/docs-site/content/docs/cli-reference/ktx-setup.mdx +++ b/docs-site/content/docs/cli-reference/ktx-setup.mdx @@ -103,7 +103,7 @@ runtime features are missing. | Flag | Description | |------|-------------| -| `--database ` | Database driver to configure; repeatable. Choices: `sqlite`, `postgres`, `mysql`, `sqlserver`, `bigquery`, `snowflake` | +| `--database ` | Database driver to configure; repeatable. Choices: `sqlite`, `postgres`, `mysql`, `clickhouse`, `sqlserver`, `bigquery`, `snowflake` | | `--database-connection-id ` | Existing selected connection id; repeatable. With `--database` or `--database-url`, connection id for the new connection. | | `--database-url ` | URL, `env:NAME`, or `file:/path` for one new URL-style database connection; also used as the SQLite path | | `--database-schema ` | Database schema or dataset to include; repeatable | @@ -113,6 +113,10 @@ runtime features are missing. context. Use `--skip-databases` only when intentionally leaving the project incomplete. +`--database-schema` maps to the driver's scope field: `schemas` for PostgreSQL, +MySQL, and SQL Server; `schema_names` for Snowflake; `dataset_ids` for +BigQuery; and `databases` for ClickHouse. + ### Query History | Flag | Description | diff --git a/docs-site/content/docs/configuration/ktx-yaml.mdx b/docs-site/content/docs/configuration/ktx-yaml.mdx index fac6f3f9..4008a45d 100644 --- a/docs-site/content/docs/configuration/ktx-yaml.mdx +++ b/docs-site/content/docs/configuration/ktx-yaml.mdx @@ -109,9 +109,9 @@ context-source drivers share the map. | `mysql` | Warehouse | `driver` | `url`, `enabled_tables` | | `sqlite` | Warehouse | `driver` | `url` or `path`, `enabled_tables` | | `sqlserver` | Warehouse | `driver` | `url`, `enabled_tables` | -| `bigquery` | Warehouse | `driver` | `url`, `enabled_tables`, `historicSql` | -| `snowflake` | Warehouse | `driver` | `url`, `enabled_tables`, `historicSql` | -| `clickhouse` | Warehouse | `driver` | `url`, `enabled_tables` | +| `bigquery` | Warehouse | `driver` | `credentials_json`, `dataset_ids`, `enabled_tables`, `historicSql` | +| `snowflake` | Warehouse | `driver` | `schema_names`, `enabled_tables`, `historicSql` | +| `clickhouse` | Warehouse | `driver` | `url`, `database`, `databases`, `enabled_tables` | | `metabase` | Context source | `driver`, `api_url` | `api_key_ref`, `mappings` | | `looker` | Context source | `driver`, `base_url`, `client_id` | `client_secret_ref`, `mappings` | | `lookml` | Context source | `driver`, `repoUrl` | `branch`, `path`, `auth_token_ref`, `mappings` | @@ -136,6 +136,27 @@ connections: - public.customers ``` +Connector-specific scope fields let setup and scan use the same warehouse +boundary: + +```yaml +connections: + mysql-warehouse: + driver: mysql + url: env:MYSQL_URL + schemas: [analytics, mart] + clickhouse-warehouse: + driver: clickhouse + url: env:CLICKHOUSE_URL + database: analytics + databases: [analytics, mart] + bigquery-warehouse: + driver: bigquery + credentials_json: file:./service-account.json + location: US + dataset_ids: [analytics, mart] +``` + For Postgres, BigQuery, and Snowflake, `historicSql` and `context.queryHistory` toggle query-history ingest. The shape is connector-specific; the setup wizard writes these fields when you pass `--enable-query-history`. diff --git a/docs-site/content/docs/integrations/primary-sources.mdx b/docs-site/content/docs/integrations/primary-sources.mdx index da916339..5e9483f9 100644 --- a/docs-site/content/docs/integrations/primary-sources.mdx +++ b/docs-site/content/docs/integrations/primary-sources.mdx @@ -1,6 +1,6 @@ --- title: Primary Sources -description: Connect ktx to PostgreSQL, Snowflake, BigQuery, MySQL, SQL Server, or SQLite. +description: Connect ktx to PostgreSQL, Snowflake, BigQuery, MySQL, ClickHouse, SQL Server, or SQLite. --- **ktx** connects to your data warehouse or database to build schema context, @@ -26,7 +26,7 @@ Agents should prefer environment or file references over literal secrets. | Field | Required | Applies to | Description | |-------|----------|------------|-------------| -| `driver` | Yes | all connections | Connector driver such as `postgres`, `snowflake`, `bigquery`, `mysql`, `sqlserver`, or `sqlite` | +| `driver` | Yes | all connections | Connector driver such as `postgres`, `snowflake`, `bigquery`, `mysql`, `clickhouse`, `sqlserver`, or `sqlite` | | `url` | One of the connection methods | URL-style connectors | Database URL, `env:NAME`, or `file:/path/to/secret` | | `host`, `port`, `database`, `username`, `password` | One of the connection methods | PostgreSQL, MySQL, SQL Server | Field-by-field connection values | | `schema` or `schemas` | No | schema-aware warehouses | Single schema or list of schemas to scan | @@ -216,6 +216,10 @@ For multiple datasets: - finance ``` +BigQuery dataset scope is stored in `connections..dataset_ids`. Interactive +setup discovers datasets from credentials plus location, then writes the chosen +dataset ids as the scan scope. + ### Authentication | Method | Config | @@ -282,6 +286,10 @@ connections: url: env:MYSQL_DATABASE_URL ``` +MySQL supports selecting one or more databases during `ktx setup`. The selected +database scope is stored in `connections..schemas`, and `ktx scan` reads +exactly those databases. + Or with individual fields: ```yaml title="ktx.yaml" @@ -320,12 +328,66 @@ connections: - Parameter binding uses positional `?` placeholders - Uses `LIMIT X OFFSET Y` for pagination -- Single database per connection (no multi-schema) +- Multi-database scanning uses `schemas` as the selected database list - Supports 20+ MySQL types including `enum`, `json`, `datetime`, `decimal` - Table comments extracted with InnoDB metadata prefix stripping --- +## ClickHouse + +Connects to ClickHouse over HTTP. Supports table and column introspection across +one or more selected databases. + +### Connection config + +```yaml title="ktx.yaml" +connections: + my-clickhouse: + driver: clickhouse + url: env:CLICKHOUSE_DATABASE_URL + database: analytics +``` + +For multiple databases: + +```yaml + databases: + - analytics + - mart +``` + +ClickHouse supports selecting one or more databases during `ktx setup`. The +selected scan scope is stored in `connections..databases`. The single +`database` field remains the connection default for raw SQL and `ktx sql`. + +### Authentication + +| Method | Config | +|--------|--------| +| URL | `url: env:CLICKHOUSE_DATABASE_URL` | +| Password | `password: env:CLICKHOUSE_PASSWORD` or `password: file:/path/to/secret` | + +### Features + +| Feature | Supported | Notes | +|---------|-----------|-------| +| Tables & views | Yes | Via `system.tables` | +| Primary keys | No | Not exposed as relational constraints | +| Foreign keys | No | Not available in ClickHouse | +| Row count estimates | Yes | From ClickHouse metadata where available | +| Column statistics | No | - | +| Query history | No | - | +| Table sampling | Yes | Uses ClickHouse sampling syntax when supported | + +### Dialect notes + +- Parameter binding uses named placeholders +- The `database` field sets the default database for SQL execution +- The `databases` array controls the scan scope + +--- + ## SQL Server Connects to Microsoft SQL Server and Azure SQL. Supports multi-schema scanning with `dbo` as the default schema. diff --git a/packages/cli/src/connectors/bigquery/connector.test.ts b/packages/cli/src/connectors/bigquery/connector.test.ts index 5a890612..c517100a 100644 --- a/packages/cli/src/connectors/bigquery/connector.test.ts +++ b/packages/cli/src/connectors/bigquery/connector.test.ts @@ -234,6 +234,89 @@ describe('KtxBigQueryScanConnector', () => { await connector.cleanup(); }); + it('constructs for discovery without dataset scope and lists tables through one region information schema query', async () => { + const createQueryJob = vi.fn( + async ( + input: { query: string; params?: Record; location?: string }, + ): ReturnType => [ + { + getQueryResults: async (): ReturnType => [ + [ + { table_schema: 'analytics', table_name: 'orders', table_type: 'BASE TABLE' }, + { table_schema: 'analytics', table_name: 'order_clone', table_type: 'CLONE' }, + { table_schema: 'mart', table_name: 'orders_mv', table_type: 'MATERIALIZED VIEW' }, + ], + undefined, + { + schema: { + fields: [ + { name: 'table_schema', type: 'STRING' }, + { name: 'table_name', type: 'STRING' }, + { name: 'table_type', type: 'STRING' }, + ], + }, + }, + ], + }, + ], + ); + const clientFactory: KtxBigQueryClientFactory = { + createClient: vi.fn(() => ({ + getDatasets: vi.fn(async () => [[{ id: 'analytics' }, { id: 'mart' }]] as [{ id: string }[]]), + dataset: vi.fn((datasetId: string) => ({ + get: vi.fn(async () => [{ id: datasetId }]), + getTables: vi.fn(async () => [[]] as [never[]]), + })), + createQueryJob, + })), + }; + const connector = new KtxBigQueryScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'bigquery', + credentials_json: JSON.stringify({ project_id: 'project-1' }), + location: 'US', + }, + clientFactory, + }); + + await expect(connector.listTables(['analytics', 'mart'])).resolves.toEqual([ + { schema: 'analytics', name: 'orders', kind: 'table' }, + { schema: 'analytics', name: 'order_clone', kind: 'table' }, + { schema: 'mart', name: 'orders_mv', kind: 'view' }, + ]); + + expect(createQueryJob).toHaveBeenCalledTimes(1); + expect(createQueryJob).toHaveBeenCalledWith( + expect.objectContaining({ + location: 'US', + params: { dataset_ids: ['analytics', 'mart'] }, + }), + ); + expect(createQueryJob.mock.calls[0]?.[0].query).toContain('`project-1`.`region-us`.INFORMATION_SCHEMA.TABLES'); + expect(createQueryJob.mock.calls[0]?.[0].query).toContain("'CLONE'"); + expect(createQueryJob.mock.calls[0]?.[0].query).toContain("'SNAPSHOT'"); + }); + + it('keeps scan paths requiring dataset scope', async () => { + const connector = new KtxBigQueryScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'bigquery', + credentials_json: JSON.stringify({ project_id: 'project-1' }), + location: 'US', + }, + clientFactory: fakeClientFactory(), + }); + + await expect( + connector.introspect( + { connectionId: 'warehouse', driver: 'bigquery' }, + { runId: 'scan-run-1' }, + ), + ).rejects.toThrow('Native BigQuery scan requires connections.warehouse.dataset_ids or dataset_id'); + }); + it('applies maximumBytesBilled to read-only queries when configured', async () => { const clientFactory = fakeClientFactory(); const connector = new KtxBigQueryScanConnector({ diff --git a/packages/cli/src/connectors/bigquery/connector.ts b/packages/cli/src/connectors/bigquery/connector.ts index 1c6c964b..6a93ccb0 100644 --- a/packages/cli/src/connectors/bigquery/connector.ts +++ b/packages/cli/src/connectors/bigquery/connector.ts @@ -1,4 +1,5 @@ import { BigQuery, type TableField } from '@google-cloud/bigquery'; +import { normalizeBigQueryProjectId, normalizeBigQueryRegion } from '../../context/connections/bigquery-identifiers.js'; import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js'; import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js'; import { readFileSync } from 'node:fs'; @@ -230,9 +231,6 @@ export function bigQueryConnectionConfigFromConfig(input: { throw new Error(`Native BigQuery connector requires credentials_json.project_id for connections.${input.connectionId}`); } const resolvedDatasetIds = datasetIds(input.connection, env); - if (resolvedDatasetIds.length === 0) { - throw new Error(`Native BigQuery connector requires connections.${input.connectionId}.dataset_id or dataset_ids`); - } const location = stringConfigValue(input.connection, 'location', env); return { projectId, credentials, datasetIds: resolvedDatasetIds, ...(location ? { location } : {}) }; } @@ -289,17 +287,18 @@ export class KtxBigQueryScanConnector implements KtxScanConnector { async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise { this.assertConnection(input.connectionId); const tables: KtxSchemaTable[] = []; - for (const datasetId of this.resolved.datasetIds) { + const datasetIds = this.requireDatasetIdsForScan(); + for (const datasetId of datasetIds) { tables.push(...(await this.introspectDataset(datasetId))); } return { connectionId: this.connectionId, driver: 'bigquery', extractedAt: this.now().toISOString(), - scope: { catalogs: [this.resolved.projectId], datasets: this.resolved.datasetIds }, + scope: { catalogs: [this.resolved.projectId], datasets: datasetIds }, metadata: { project_id: this.resolved.projectId, - datasets: this.resolved.datasetIds, + datasets: datasetIds, table_count: tables.length, total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0), }, @@ -381,22 +380,33 @@ export class KtxBigQueryScanConnector implements KtxScanConnector { } async listTables(datasetIds?: string[]): Promise { - const filterDatasets = datasetIds ?? (await this.listDatasets()); - const entries: KtxTableListEntry[] = []; - for (const datasetId of filterDatasets) { - const dataset = this.getClient().dataset(datasetId); - const [tables] = await dataset.getTables(); - for (const table of tables) { - if (!table.id) continue; - entries.push({ - schema: datasetId, - name: table.id, - kind: table.metadata?.type === 'VIEW' ? 'view' : 'table', - }); - } + const projectId = normalizeBigQueryProjectId(this.resolved.projectId, 'table discovery'); + const region = normalizeBigQueryRegion(this.resolved.location ?? 'US', 'table discovery'); + const params: Record = {}; + const filter = datasetIds && datasetIds.length > 0 ? 'AND table_schema IN UNNEST(@dataset_ids)' : ''; + if (datasetIds && datasetIds.length > 0) { + params.dataset_ids = datasetIds; } - entries.sort((a, b) => a.schema.localeCompare(b.schema) || a.name.localeCompare(b.name)); - return entries; + const rows = await this.queryRaw<{ table_schema: string; table_name: string; table_type: string }>( + ` + SELECT table_schema, table_name, table_type + FROM \`${projectId}\`.\`region-${region}\`.INFORMATION_SCHEMA.TABLES + WHERE table_type IN ( + 'BASE TABLE', 'VIEW', 'MATERIALIZED VIEW', 'EXTERNAL', 'CLONE', 'SNAPSHOT' + ) + ${filter} + ORDER BY table_schema, table_name + `, + params, + ); + return rows.map((row) => ({ + schema: row.table_schema, + name: row.table_name, + kind: + row.table_type === 'VIEW' || row.table_type === 'MATERIALIZED VIEW' + ? ('view' as const) + : ('table' as const), + })); } async cleanup(): Promise { @@ -413,6 +423,13 @@ export class KtxBigQueryScanConnector implements KtxScanConnector { return this.client; } + private requireDatasetIdsForScan(): string[] { + if (this.resolved.datasetIds.length === 0) { + throw new Error(`Native BigQuery scan requires connections.${this.connectionId}.dataset_ids or dataset_id`); + } + return this.resolved.datasetIds; + } + private async query(sql: string, params?: Record): Promise { const [job] = await this.getClient().createQueryJob({ query: sql, diff --git a/packages/cli/src/connectors/clickhouse/connector.test.ts b/packages/cli/src/connectors/clickhouse/connector.test.ts index 6ff60299..a3ab11f6 100644 --- a/packages/cli/src/connectors/clickhouse/connector.test.ts +++ b/packages/cli/src/connectors/clickhouse/connector.test.ts @@ -25,7 +25,7 @@ function fakeClientFactory(): KtxClickHouseClientFactory { { table: 'event_summary', name: 'event_name', type: 'String', comment: '', is_in_primary_key: 0 }, ]); } - if (input.query.includes('FROM system.parts') && input.query.includes('GROUP BY table')) { + if (input.query.includes('FROM system.parts') && input.query.includes('GROUP BY')) { return result([{ table: 'events', row_count: '2' }]); } if (input.query.includes('SELECT `id`, `event_name` FROM `analytics`.`events` LIMIT 1')) { @@ -90,6 +90,50 @@ function fakeClientFactory(): KtxClickHouseClientFactory { }; } +function multiDatabaseClickHouseClientFactory(): KtxClickHouseClientFactory { + const query = vi.fn(async (input: { query: string; format: string; query_params?: Record }) => { + if (input.query.includes('FROM system.tables')) { + expect(input.query_params).toEqual({ databases: ['analytics', 'mart'] }); + return result([ + { database: 'analytics', name: 'events', engine: 'MergeTree', comment: 'Event stream' }, + { database: 'mart', name: 'order_events', engine: 'MergeTree', comment: '' }, + ]); + } + if (input.query.includes('FROM system.columns')) { + expect(input.query_params).toEqual({ databases: ['analytics', 'mart'] }); + return result([ + { + database: 'analytics', + table: 'events', + name: 'id', + type: 'UInt64', + comment: '', + is_in_primary_key: 1, + }, + { + database: 'mart', + table: 'order_events', + name: 'id', + type: 'UInt64', + comment: '', + is_in_primary_key: 1, + }, + ]); + } + if (input.query.includes('FROM system.parts') && input.query.includes('GROUP BY')) { + expect(input.query_params).toEqual({ databases: ['analytics', 'mart'] }); + return result([ + { database: 'analytics', table: 'events', row_count: '2' }, + { database: 'mart', table: 'order_events', row_count: '5' }, + ]); + } + throw new Error(`Unexpected SQL: ${input.query}`); + }); + return { + createClient: vi.fn(() => ({ query, close: vi.fn(async () => undefined) })), + }; +} + describe('KtxClickHouseScanConnector', () => { it('resolves ClickHouse connection configuration safely', () => { expect(isKtxClickHouseConnectionConfig({ driver: 'clickhouse', host: 'localhost', database: 'analytics' })).toBe( @@ -166,6 +210,34 @@ describe('KtxClickHouseScanConnector', () => { expect(snapshot.tables.find((table) => table.name === 'events')?.foreignKeys).toEqual([]); }); + it('introspects every configured ClickHouse database scope while preserving the default database', async () => { + const connector = new KtxClickHouseScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'clickhouse', + host: 'ch.example.test', + database: 'analytics', + databases: ['analytics', 'mart'], + username: 'reader', + password: 'test-pass', // pragma: allowlist secret + }, + clientFactory: multiDatabaseClickHouseClientFactory(), + now: () => new Date('2026-05-21T10:00:00.000Z'), + }); + + const snapshot = await connector.introspect( + { connectionId: 'warehouse', driver: 'clickhouse' }, + { runId: 'scan-run-1' }, + ); + + expect(snapshot.scope).toEqual({ schemas: ['analytics', 'mart'] }); + expect(snapshot.metadata).toMatchObject({ database: 'analytics', databases: ['analytics', 'mart'] }); + expect(snapshot.tables.map((table) => `${table.db}.${table.name}`)).toEqual([ + 'analytics.events', + 'mart.order_events', + ]); + }); + it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => { const clientFactory = fakeClientFactory(); const connector = new KtxClickHouseScanConnector({ diff --git a/packages/cli/src/connectors/clickhouse/connector.ts b/packages/cli/src/connectors/clickhouse/connector.ts index 714ccfb1..1d851001 100644 --- a/packages/cli/src/connectors/clickhouse/connector.ts +++ b/packages/cli/src/connectors/clickhouse/connector.ts @@ -12,6 +12,7 @@ export interface KtxClickHouseConnectionConfig { host?: string; port?: number; database?: string; + databases?: string[]; username?: string; user?: string; password?: string; @@ -87,12 +88,14 @@ export interface KtxClickHouseColumnDistinctValuesResult { } interface ClickHouseTableRow { + database?: string; name: string; engine: string; comment: string; } interface ClickHouseColumnRow { + database?: string; table: string; name: string; type: string; @@ -101,6 +104,7 @@ interface ClickHouseColumnRow { } interface ClickHouseRowCountRow { + database?: string; table?: string; row_count?: string | number; count?: string | number; @@ -174,6 +178,25 @@ function isNullableClickHouseType(type: string): boolean { return type.startsWith('Nullable(') || type.startsWith('LowCardinality(Nullable('); } +function configuredClickHouseDatabases( + connection: KtxClickHouseConnectionConfig, + fallbackDatabase: string, +): string[] { + if (Array.isArray(connection.databases) && connection.databases.length > 0) { + const selected = connection.databases + .filter((database): database is string => typeof database === 'string' && database.trim().length > 0) + .map((database) => database.trim()); + if (selected.length > 0) { + return [...new Set(selected)]; + } + } + return [fallbackDatabase]; +} + +function clickHouseTableKey(database: string, table: string): string { + return `${database}.${table}`; +} + export function isKtxClickHouseConnectionConfig( connection: KtxClickHouseConnectionConfig | undefined, ): connection is KtxClickHouseConnectionConfig { @@ -261,52 +284,61 @@ export class KtxClickHouseScanConnector implements KtxScanConnector { async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise { this.assertConnection(input.connectionId); - const database = this.clientConfig.database; + const databases = configuredClickHouseDatabases(this.connection, this.clientConfig.database); const tables = await this.queryEachRow( ` - SELECT name, engine, comment + SELECT database, name, engine, comment FROM system.tables - WHERE database = {database:String} + WHERE database IN {databases:Array(String)} AND engine NOT IN ('Dictionary') - ORDER BY name + ORDER BY database, name `, - { database }, + { databases }, ); const columns = await this.queryEachRow( ` - SELECT table, name, type, comment, is_in_primary_key + SELECT database, table, name, type, comment, is_in_primary_key FROM system.columns - WHERE database = {database:String} - ORDER BY table, position + WHERE database IN {databases:Array(String)} + ORDER BY database, table, position `, - { database }, + { databases }, ); const rowCounts = await this.queryEachRow( ` - SELECT table, sum(rows) AS row_count + SELECT database, table, sum(rows) AS row_count FROM system.parts - WHERE database = {database:String} + WHERE database IN {databases:Array(String)} AND active = 1 - GROUP BY table + GROUP BY database, table `, - { database }, + { databases }, ); const columnsByTable = new Map(); for (const column of columns) { - columnsByTable.set(column.table, [...(columnsByTable.get(column.table) ?? []), column]); + const key = clickHouseTableKey(column.database ?? this.clientConfig.database, column.table); + columnsByTable.set(key, [...(columnsByTable.get(key) ?? []), column]); } - const rowCountByTable = new Map(rowCounts.map((row) => [String(row.table), Number(row.row_count ?? 0)])); - const schemaTables = tables.map((table) => - this.toSchemaTable(table, columnsByTable.get(table.name) ?? [], rowCountByTable.get(table.name) ?? 0), + const rowCountByTable = new Map( + rowCounts.map((row) => [ + clickHouseTableKey(row.database ?? this.clientConfig.database, String(row.table)), + Number(row.row_count ?? 0), + ]), ); + const schemaTables = tables.map((table) => { + const database = table.database ?? this.clientConfig.database; + const key = clickHouseTableKey(database, table.name); + return this.toSchemaTable(database, table, columnsByTable.get(key) ?? [], rowCountByTable.get(key) ?? 0); + }); return { connectionId: this.connectionId, driver: 'clickhouse', extractedAt: this.now().toISOString(), - scope: { schemas: [database] }, + scope: { schemas: databases }, metadata: { - database, + database: this.clientConfig.database, + databases, host: this.clientConfig.host, table_count: schemaTables.length, total_columns: schemaTables.reduce((sum, table) => sum + table.columns.length, 0), @@ -436,11 +468,16 @@ export class KtxClickHouseScanConnector implements KtxScanConnector { } } - private toSchemaTable(table: ClickHouseTableRow, columns: ClickHouseColumnRow[], estimatedRows: number): KtxSchemaTable { + private toSchemaTable( + database: string, + table: ClickHouseTableRow, + columns: ClickHouseColumnRow[], + estimatedRows: number, + ): KtxSchemaTable { const kind = tableKind(table.engine); return { catalog: null, - db: this.clientConfig.database, + db: database, name: table.name, kind, comment: table.comment || null, diff --git a/packages/cli/src/connectors/mysql/connector.test.ts b/packages/cli/src/connectors/mysql/connector.test.ts index 64b576e2..f9f2d0ad 100644 --- a/packages/cli/src/connectors/mysql/connector.test.ts +++ b/packages/cli/src/connectors/mysql/connector.test.ts @@ -85,6 +85,84 @@ function fakePoolFactory(): KtxMysqlPoolFactory { }; } +function multiSchemaMysqlPoolFactory(): KtxMysqlPoolFactory { + const query = vi.fn(async (sql: string, params?: unknown): Promise<[RowDataPacket[], FieldPacket[]]> => { + if (sql.includes('INFORMATION_SCHEMA.TABLES')) { + expect(params).toEqual(['analytics', 'mart']); + return mysqlResult( + [ + { + TABLE_SCHEMA: 'analytics', + TABLE_NAME: 'customers', + TABLE_TYPE: 'BASE TABLE', + TABLE_COMMENT: '', + TABLE_ROWS: 2, + }, + { + TABLE_SCHEMA: 'mart', + TABLE_NAME: 'orders', + TABLE_TYPE: 'BASE TABLE', + TABLE_COMMENT: '', + TABLE_ROWS: 3, + }, + ], + [ + { name: 'TABLE_SCHEMA' }, + { name: 'TABLE_NAME' }, + { name: 'TABLE_TYPE' }, + { name: 'TABLE_COMMENT' }, + { name: 'TABLE_ROWS' }, + ], + ); + } + if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) { + expect(params).toEqual(['analytics', 'mart']); + return mysqlResult( + [ + { + TABLE_SCHEMA: 'analytics', + TABLE_NAME: 'customers', + COLUMN_NAME: 'id', + DATA_TYPE: 'int', + IS_NULLABLE: 'NO', + COLUMN_COMMENT: '', + }, + { + TABLE_SCHEMA: 'mart', + TABLE_NAME: 'orders', + COLUMN_NAME: 'id', + DATA_TYPE: 'int', + IS_NULLABLE: 'NO', + COLUMN_COMMENT: '', + }, + ], + [], + ); + } + if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes("CONSTRAINT_NAME = 'PRIMARY'")) { + expect(params).toEqual(['analytics', 'mart']); + return mysqlResult( + [ + { TABLE_SCHEMA: 'analytics', TABLE_NAME: 'customers', COLUMN_NAME: 'id' }, + { TABLE_SCHEMA: 'mart', TABLE_NAME: 'orders', COLUMN_NAME: 'id' }, + ], + [], + ); + } + if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes('REFERENCED_TABLE_NAME IS NOT NULL')) { + expect(params).toEqual(['analytics', 'mart']); + return mysqlResult([], []); + } + throw new Error(`Unexpected SQL: ${sql} params=${JSON.stringify(params)}`); + }); + return { + createPool: vi.fn(() => ({ + getConnection: vi.fn(async () => ({ query, release: vi.fn() })), + end: vi.fn(async () => undefined), + })), + }; +} + describe('KtxMysqlScanConnector', () => { it('resolves MySQL connection configuration safely', () => { expect(isKtxMysqlConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(true); @@ -169,6 +247,34 @@ describe('KtxMysqlScanConnector', () => { ]); }); + it('introspects every configured MySQL schema scope', async () => { + const connector = new KtxMysqlScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'mysql', + host: 'db.example.test', + database: 'analytics', + schemas: ['analytics', 'mart'], + username: 'reader', + password: 'secret', // pragma: allowlist secret + }, + poolFactory: multiSchemaMysqlPoolFactory(), + now: () => new Date('2026-05-21T10:00:00.000Z'), + }); + + const snapshot = await connector.introspect( + { connectionId: 'warehouse', driver: 'mysql' }, + { runId: 'scan-run-1' }, + ); + + expect(snapshot.scope).toEqual({ schemas: ['analytics', 'mart'] }); + expect(snapshot.metadata).toMatchObject({ database: 'analytics', schemas: ['analytics', 'mart'] }); + expect(snapshot.tables.map((table) => `${table.db}.${table.name}`)).toEqual([ + 'analytics.customers', + 'mart.orders', + ]); + }); + it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => { const poolFactory = fakePoolFactory(); const connector = new KtxMysqlScanConnector({ diff --git a/packages/cli/src/connectors/mysql/connector.ts b/packages/cli/src/connectors/mysql/connector.ts index 9cf242f5..9d92c2e0 100644 --- a/packages/cli/src/connectors/mysql/connector.ts +++ b/packages/cli/src/connectors/mysql/connector.ts @@ -11,6 +11,7 @@ export interface KtxMysqlConnectionConfig { host?: string; port?: number; database?: string; + schemas?: string[]; username?: string; user?: string; password?: string; @@ -79,6 +80,7 @@ export interface KtxMysqlColumnDistinctValuesResult { } interface MysqlTableRow extends RowDataPacket { + TABLE_SCHEMA: string; TABLE_NAME: string; TABLE_TYPE: string; TABLE_COMMENT: string | null; @@ -86,6 +88,7 @@ interface MysqlTableRow extends RowDataPacket { } interface MysqlColumnRow extends RowDataPacket { + TABLE_SCHEMA: string; TABLE_NAME: string; COLUMN_NAME: string; DATA_TYPE: string; @@ -94,11 +97,13 @@ interface MysqlColumnRow extends RowDataPacket { } interface MysqlPrimaryKeyRow extends RowDataPacket { + TABLE_SCHEMA: string; TABLE_NAME: string; COLUMN_NAME: string; } interface MysqlForeignKeyRow extends RowDataPacket { + TABLE_SCHEMA: string; TABLE_NAME: string; COLUMN_NAME: string; REFERENCED_TABLE_NAME: string; @@ -185,22 +190,42 @@ function cleanMySqlTableComment(comment: string | null): string | null { return comment; } -function groupByTable(rows: T[]): Map { +function configuredMysqlSchemas(connection: KtxMysqlConnectionConfig, fallbackDatabase: string): string[] { + if (Array.isArray(connection.schemas) && connection.schemas.length > 0) { + const selected = connection.schemas + .filter((schema): schema is string => typeof schema === 'string' && schema.trim().length > 0) + .map((schema) => schema.trim()); + if (selected.length > 0) { + return [...new Set(selected)]; + } + } + return [fallbackDatabase]; +} + +function mysqlTableKey(schema: string, table: string): string { + return `${schema}.${table}`; +} + +function groupByTable( + rows: T[], + fallbackDatabase: string, +): Map { const grouped = new Map(); for (const row of rows) { - const tableRows = grouped.get(row.TABLE_NAME) ?? []; + const tableRows = grouped.get(mysqlTableKey(row.TABLE_SCHEMA ?? fallbackDatabase, row.TABLE_NAME)) ?? []; tableRows.push(row); - grouped.set(row.TABLE_NAME, tableRows); + grouped.set(mysqlTableKey(row.TABLE_SCHEMA ?? fallbackDatabase, row.TABLE_NAME), tableRows); } return grouped; } -function primaryKeyMap(rows: MysqlPrimaryKeyRow[]): Map> { +function primaryKeyMap(rows: MysqlPrimaryKeyRow[], fallbackDatabase: string): Map> { const grouped = new Map>(); for (const row of rows) { - const columns = grouped.get(row.TABLE_NAME) ?? new Set(); + const key = mysqlTableKey(row.TABLE_SCHEMA ?? fallbackDatabase, row.TABLE_NAME); + const columns = grouped.get(key) ?? new Set(); columns.add(row.COLUMN_NAME); - grouped.set(row.TABLE_NAME, columns); + grouped.set(key, columns); } return grouped; } @@ -308,60 +333,68 @@ export class KtxMysqlScanConnector implements KtxScanConnector { async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise { this.assertConnection(input.connectionId); - const database = this.poolConfig.database; + const databases = configuredMysqlSchemas(this.connection, this.poolConfig.database); + const placeholders = databases.map(() => '?').join(', '); const tables = await this.queryRaw( ` - SELECT TABLE_NAME, TABLE_TYPE, TABLE_COMMENT, TABLE_ROWS + SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE, TABLE_COMMENT, TABLE_ROWS FROM INFORMATION_SCHEMA.TABLES - WHERE TABLE_SCHEMA = ? AND TABLE_TYPE IN ('BASE TABLE', 'VIEW') - ORDER BY TABLE_NAME + WHERE TABLE_SCHEMA IN (${placeholders}) AND TABLE_TYPE IN ('BASE TABLE', 'VIEW') + ORDER BY TABLE_SCHEMA, TABLE_NAME `, - [database], + databases, ); const columns = await this.queryRaw( ` - SELECT TABLE_NAME, COLUMN_NAME, DATA_TYPE, IS_NULLABLE, COLUMN_COMMENT + SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, DATA_TYPE, IS_NULLABLE, COLUMN_COMMENT FROM INFORMATION_SCHEMA.COLUMNS - WHERE TABLE_SCHEMA = ? - ORDER BY TABLE_NAME, ORDINAL_POSITION + WHERE TABLE_SCHEMA IN (${placeholders}) + ORDER BY TABLE_SCHEMA, TABLE_NAME, ORDINAL_POSITION `, - [database], + databases, ); const primaryKeys = await this.queryRaw( ` - SELECT TABLE_NAME, COLUMN_NAME + SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE - WHERE TABLE_SCHEMA = ? + WHERE TABLE_SCHEMA IN (${placeholders}) AND CONSTRAINT_NAME = 'PRIMARY' - ORDER BY TABLE_NAME, ORDINAL_POSITION + ORDER BY TABLE_SCHEMA, TABLE_NAME, ORDINAL_POSITION `, - [database], + databases, ); const foreignKeys = await this.queryRaw( ` - SELECT TABLE_NAME, COLUMN_NAME, REFERENCED_TABLE_NAME, REFERENCED_COLUMN_NAME, CONSTRAINT_NAME + SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, REFERENCED_TABLE_NAME, REFERENCED_COLUMN_NAME, CONSTRAINT_NAME FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE - WHERE TABLE_SCHEMA = ? + WHERE TABLE_SCHEMA IN (${placeholders}) AND REFERENCED_TABLE_NAME IS NOT NULL - ORDER BY TABLE_NAME, COLUMN_NAME + ORDER BY TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME `, - [database], + databases, ); - const columnsByTable = groupByTable(columns); - const primaryKeysByTable = primaryKeyMap(primaryKeys); - const foreignKeysByTable = groupByTable(foreignKeys); + const columnsByTable = groupByTable(columns, this.poolConfig.database); + const primaryKeysByTable = primaryKeyMap(primaryKeys, this.poolConfig.database); + const foreignKeysByTable = groupByTable(foreignKeys, this.poolConfig.database); const schemaTables = tables.map((table) => - this.toSchemaTable(table, columnsByTable.get(table.TABLE_NAME) ?? [], primaryKeysByTable, foreignKeysByTable), + this.toSchemaTable( + table.TABLE_SCHEMA ?? this.poolConfig.database, + table, + columnsByTable.get(mysqlTableKey(table.TABLE_SCHEMA ?? this.poolConfig.database, table.TABLE_NAME)) ?? [], + primaryKeysByTable, + foreignKeysByTable, + ), ); return { connectionId: this.connectionId, driver: 'mysql', extractedAt: this.now().toISOString(), - scope: { schemas: [database] }, + scope: { schemas: databases }, metadata: { - database, + database: this.poolConfig.database, + schemas: databases, host: this.poolConfig.host, table_count: schemaTables.length, total_columns: schemaTables.reduce((sum, table) => sum + table.columns.length, 0), @@ -487,6 +520,7 @@ export class KtxMysqlScanConnector implements KtxScanConnector { } private toSchemaTable( + database: string, table: MysqlTableRow, columns: MysqlColumnRow[], primaryKeysByTable: Map>, @@ -497,13 +531,17 @@ export class KtxMysqlScanConnector implements KtxScanConnector { const estimatedRows = kind === 'view' ? null : Number(table.TABLE_ROWS ?? 0); return { catalog: null, - db: this.poolConfig.database, + db: database, name: tableName, kind, comment: cleanMySqlTableComment(table.TABLE_COMMENT), estimatedRows: Number.isFinite(estimatedRows) ? estimatedRows : null, - columns: columns.map((column) => this.toSchemaColumn(column, primaryKeysByTable.get(tableName) ?? new Set())), - foreignKeys: (foreignKeysByTable.get(tableName) ?? []).map((row) => this.toSchemaForeignKey(row)), + columns: columns.map((column) => + this.toSchemaColumn(column, primaryKeysByTable.get(mysqlTableKey(database, tableName)) ?? new Set()), + ), + foreignKeys: (foreignKeysByTable.get(mysqlTableKey(database, tableName)) ?? []).map((row) => + this.toSchemaForeignKey(database, row), + ), }; } @@ -519,11 +557,11 @@ export class KtxMysqlScanConnector implements KtxScanConnector { }; } - private toSchemaForeignKey(row: MysqlForeignKeyRow): KtxSchemaForeignKey { + private toSchemaForeignKey(database: string, row: MysqlForeignKeyRow): KtxSchemaForeignKey { return { fromColumn: row.COLUMN_NAME, toCatalog: null, - toDb: this.poolConfig.database, + toDb: database, toTable: row.REFERENCED_TABLE_NAME, toColumn: row.REFERENCED_COLUMN_NAME, constraintName: row.CONSTRAINT_NAME || null, diff --git a/packages/cli/src/connectors/snowflake/connector.test.ts b/packages/cli/src/connectors/snowflake/connector.test.ts index 7b2e600f..7e17117e 100644 --- a/packages/cli/src/connectors/snowflake/connector.test.ts +++ b/packages/cli/src/connectors/snowflake/connector.test.ts @@ -215,6 +215,75 @@ describe('KtxSnowflakeScanConnector', () => { expect(driver.cleanup).toHaveBeenCalledTimes(1); }); + it('lists tables across schemas with one information schema query', async () => { + const queries: Array<{ sql: string; params?: unknown }> = []; + const driverFactory: KtxSnowflakeDriverFactory = { + createDriver: vi.fn(() => ({ + test: vi.fn(async () => ({ success: true })), + query: vi.fn(async (sql: string, params?: unknown) => { + queries.push({ sql, params }); + return { + headers: ['TABLE_SCHEMA', 'TABLE_NAME', 'TABLE_TYPE'], + rows: [ + ['MART', 'ORDERS', 'BASE TABLE'], + ['PUBLIC', 'ORDER_SUMMARY', 'VIEW'], + ], + totalRows: 2, + rowCount: 2, + }; + }), + getSchemaMetadata: vi.fn(async () => []), + listSchemas: vi.fn(async () => []), + listTables: vi.fn(async () => []), + cleanup: vi.fn(async () => undefined), + })), + }; + const connector = new KtxSnowflakeScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'snowflake', + authMethod: 'password', + account: 'acct', + warehouse: 'WH', + database: 'ANALYTICS', + schema_name: 'PUBLIC', + username: 'reader', + password: 'fixture-pass', // pragma: allowlist secret + }, + driverFactory, + }); + + await expect(connector.listTables(['MART', 'PUBLIC'])).resolves.toEqual([ + { schema: 'MART', name: 'ORDERS', kind: 'table' }, + { schema: 'PUBLIC', name: 'ORDER_SUMMARY', kind: 'view' }, + ]); + + expect(queries).toHaveLength(1); + expect(queries[0]?.sql).toContain('FROM "ANALYTICS".INFORMATION_SCHEMA.TABLES'); + expect(queries[0]?.sql).toContain('AND TABLE_SCHEMA IN (?, ?)'); + expect(queries[0]?.params).toEqual(['ANALYTICS', 'MART', 'PUBLIC']); + }); + + it('rejects unsafe Snowflake identifiers before driver creation', () => { + expect( + () => + new KtxSnowflakeScanConnector({ + connectionId: 'warehouse', + connection: { + driver: 'snowflake', + authMethod: 'password', + account: 'acct', + warehouse: 'WH;DROP', + database: 'ANALYTICS', + schema_name: 'PUBLIC', + username: 'reader', + password: 'fixture-pass', // pragma: allowlist secret + }, + driverFactory: fakeDriverFactory(), + }), + ).toThrow('Invalid Snowflake warehouse identifier "WH;DROP"'); + }); + it('converts a native snapshot into a live-database introspection snapshot', async () => { const introspection = createSnowflakeLiveDatabaseIntrospection({ connections: { diff --git a/packages/cli/src/connectors/snowflake/connector.ts b/packages/cli/src/connectors/snowflake/connector.ts index 5b0dfcca..41263d4b 100644 --- a/packages/cli/src/connectors/snowflake/connector.ts +++ b/packages/cli/src/connectors/snowflake/connector.ts @@ -6,6 +6,7 @@ import { assertReadOnlySql, limitSqlForExecution } from '../../context/connectio import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableListEntry, type KtxTableSampleResult } from '../../context/scan/types.js'; import * as snowflake from 'snowflake-sdk'; import { KtxSnowflakeDialect } from './dialect.js'; +import { assertSafeSnowflakeIdentifier, quoteSnowflakeIdentifier } from './identifiers.js'; export interface KtxSnowflakeConnectionConfig { driver?: string; @@ -206,16 +207,23 @@ export function snowflakeConnectionConfigFromConfig(input: { if (!username) { throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.username`); } + assertSafeSnowflakeIdentifier(warehouse, 'warehouse'); + assertSafeSnowflakeIdentifier(database, 'database'); + const resolvedSchemas = schemaNames(input.connection!, env); + for (const schema of resolvedSchemas) { + assertSafeSnowflakeIdentifier(schema, 'schema'); + } const resolved: KtxSnowflakeResolvedConnectionConfig = { authMethod, account, warehouse, database, - schemas: schemaNames(input.connection!, env), + schemas: resolvedSchemas, username, }; const role = stringConfigValue(input.connection, 'role', env); if (role) { + assertSafeSnowflakeIdentifier(role, 'role'); resolved.role = role; } if (authMethod === 'rsa') { @@ -322,33 +330,30 @@ class SnowflakeSdkDriver implements KtxSnowflakeDriver { } async listSchemas(): Promise { - const result = await this.query(`SHOW SCHEMAS IN DATABASE "${this.resolved.database}"`); + const result = await this.query( + `SHOW SCHEMAS IN DATABASE ${quoteSnowflakeIdentifier(this.resolved.database, 'database')}`, + ); return result.rows.map((row) => String(row[1])).filter((name) => name !== 'INFORMATION_SCHEMA'); } async listTables(schemas?: string[]): Promise { - const filterSchemas = schemas ?? (await this.listSchemas()); - if (filterSchemas.length === 0) return []; - const entries: KtxTableListEntry[] = []; - for (const schemaName of filterSchemas) { - const result = await this.query( - ` - SELECT TABLE_NAME, TABLE_TYPE - FROM INFORMATION_SCHEMA.TABLES - WHERE TABLE_SCHEMA = ? AND TABLE_CATALOG = ? - ORDER BY TABLE_NAME - `, - [schemaName, this.resolved.database], - ); - for (const row of result.rows) { - entries.push({ - schema: schemaName, - name: String(row[0]), - kind: String(row[1]) === 'VIEW' ? 'view' : 'table', - }); - } - } - return entries; + const filters = schemas && schemas.length > 0 ? schemas.map(() => '?').join(', ') : null; + const result = await this.query( + ` + SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE + FROM ${quoteSnowflakeIdentifier(this.resolved.database, 'database')}.INFORMATION_SCHEMA.TABLES + WHERE TABLE_CATALOG = ? + AND TABLE_SCHEMA <> 'INFORMATION_SCHEMA' + ${filters ? `AND TABLE_SCHEMA IN (${filters})` : ''} + ORDER BY TABLE_SCHEMA, TABLE_NAME + `, + [this.resolved.database, ...(schemas ?? [])], + ); + return result.rows.map((row) => ({ + schema: String(row[0]), + name: String(row[1]), + kind: String(row[2]) === 'VIEW' ? ('view' as const) : ('table' as const), + })); } async cleanup(): Promise { @@ -414,11 +419,20 @@ class SnowflakeSdkDriver implements KtxSnowflakeDriver { private async setConnectionContext(connection: snowflake.Connection): Promise { if (this.resolved.role) { - await this.executeSnowflakeQuery(connection, `USE ROLE "${this.resolved.role}"`); + await this.executeSnowflakeQuery(connection, `USE ROLE ${quoteSnowflakeIdentifier(this.resolved.role, 'role')}`); } - await this.executeSnowflakeQuery(connection, `USE WAREHOUSE "${this.resolved.warehouse}"`); - await this.executeSnowflakeQuery(connection, `USE DATABASE "${this.resolved.database}"`); - await this.executeSnowflakeQuery(connection, `USE SCHEMA "${this.resolved.schemas[0] ?? 'PUBLIC'}"`); + await this.executeSnowflakeQuery( + connection, + `USE WAREHOUSE ${quoteSnowflakeIdentifier(this.resolved.warehouse, 'warehouse')}`, + ); + await this.executeSnowflakeQuery( + connection, + `USE DATABASE ${quoteSnowflakeIdentifier(this.resolved.database, 'database')}`, + ); + await this.executeSnowflakeQuery( + connection, + `USE SCHEMA ${quoteSnowflakeIdentifier(this.resolved.schemas[0] ?? 'PUBLIC', 'schema')}`, + ); } private async executeSnowflakeQuery( @@ -601,8 +615,24 @@ export class KtxSnowflakeScanConnector implements KtxScanConnector { return this.getDriver().listSchemas(); } - listTables(schemas?: string[]): Promise { - return this.getDriver().listTables(schemas); + async listTables(schemas?: string[]): Promise { + const filters = schemas && schemas.length > 0 ? schemas.map(() => '?').join(', ') : null; + const result = await this.getDriver().query( + ` + SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE + FROM ${quoteSnowflakeIdentifier(this.resolved.database, 'database')}.INFORMATION_SCHEMA.TABLES + WHERE TABLE_CATALOG = ? + AND TABLE_SCHEMA <> 'INFORMATION_SCHEMA' + ${filters ? `AND TABLE_SCHEMA IN (${filters})` : ''} + ORDER BY TABLE_SCHEMA, TABLE_NAME + `, + [this.resolved.database, ...(schemas ?? [])], + ); + return result.rows.map((row) => ({ + schema: String(row[0]), + name: String(row[1]), + kind: String(row[2]) === 'VIEW' ? ('view' as const) : ('table' as const), + })); } async cleanup(): Promise { diff --git a/packages/cli/src/connectors/snowflake/identifiers.test.ts b/packages/cli/src/connectors/snowflake/identifiers.test.ts new file mode 100644 index 00000000..d2c3e448 --- /dev/null +++ b/packages/cli/src/connectors/snowflake/identifiers.test.ts @@ -0,0 +1,18 @@ +import { describe, expect, it } from 'vitest'; +import { assertSafeSnowflakeIdentifier, quoteSnowflakeIdentifier } from './identifiers.js'; + +describe('Snowflake identifier guards', () => { + it('quotes simple Snowflake identifiers', () => { + expect(quoteSnowflakeIdentifier('ANALYTICS_DB', 'database')).toBe('"ANALYTICS_DB"'); + expect(quoteSnowflakeIdentifier('ROLE_1$', 'role')).toBe('"ROLE_1$"'); + }); + + it('rejects configured identifiers with field and value in the error', () => { + expect(() => assertSafeSnowflakeIdentifier('bad.db', 'database')).toThrow( + 'Invalid Snowflake database identifier "bad.db"; use a simple unquoted identifier matching /^[A-Za-z_][A-Za-z0-9_$]*$/', + ); + expect(() => assertSafeSnowflakeIdentifier('WH"DROP', 'warehouse')).toThrow( + 'Invalid Snowflake warehouse identifier "WH\\"DROP"; use a simple unquoted identifier matching /^[A-Za-z_][A-Za-z0-9_$]*$/', + ); + }); +}); diff --git a/packages/cli/src/connectors/snowflake/identifiers.ts b/packages/cli/src/connectors/snowflake/identifiers.ts new file mode 100644 index 00000000..2d1c7106 --- /dev/null +++ b/packages/cli/src/connectors/snowflake/identifiers.ts @@ -0,0 +1,14 @@ +const SNOWFLAKE_SIMPLE_IDENTIFIER = /^[A-Za-z_][A-Za-z0-9_$]*$/; + +export function assertSafeSnowflakeIdentifier(value: string, field: string): string { + if (!SNOWFLAKE_SIMPLE_IDENTIFIER.test(value)) { + throw new Error( + `Invalid Snowflake ${field} identifier ${JSON.stringify(value)}; use a simple unquoted identifier matching ${SNOWFLAKE_SIMPLE_IDENTIFIER}`, + ); + } + return value; +} + +export function quoteSnowflakeIdentifier(value: string, field: string): string { + return `"${assertSafeSnowflakeIdentifier(value, field)}"`; +} diff --git a/packages/cli/src/context/connections/bigquery-identifiers.test.ts b/packages/cli/src/context/connections/bigquery-identifiers.test.ts new file mode 100644 index 00000000..a1fd2e09 --- /dev/null +++ b/packages/cli/src/context/connections/bigquery-identifiers.test.ts @@ -0,0 +1,19 @@ +import { describe, expect, it } from 'vitest'; +import { normalizeBigQueryProjectId, normalizeBigQueryRegion } from './bigquery-identifiers.js'; + +describe('BigQuery identifier normalization', () => { + it('normalizes project ids and regions for information schema paths', () => { + expect(normalizeBigQueryProjectId('project-1')).toBe('project-1'); + expect(normalizeBigQueryRegion('US')).toBe('us'); + expect(normalizeBigQueryRegion('region-eu')).toBe('eu'); + }); + + it('rejects malformed project ids and regions with caller-specific context', () => { + expect(() => normalizeBigQueryProjectId('project`1', 'table discovery')).toThrow( + 'Invalid BigQuery project id for table discovery: project`1', + ); + expect(() => normalizeBigQueryRegion('US;DROP', 'table discovery')).toThrow( + 'Invalid BigQuery region for table discovery: US;DROP', + ); + }); +}); diff --git a/packages/cli/src/context/connections/bigquery-identifiers.ts b/packages/cli/src/context/connections/bigquery-identifiers.ts new file mode 100644 index 00000000..f2aa29f9 --- /dev/null +++ b/packages/cli/src/context/connections/bigquery-identifiers.ts @@ -0,0 +1,17 @@ +const BIGQUERY_PROJECT_ID_PATTERN = /^[A-Za-z0-9_-]+$/; +const BIGQUERY_REGION_PATTERN = /^[a-z0-9-]+$/; + +export function normalizeBigQueryProjectId(value: string, context = 'historic-SQL ingest'): string { + if (!BIGQUERY_PROJECT_ID_PATTERN.test(value)) { + throw new Error(`Invalid BigQuery project id for ${context}: ${value}`); + } + return value; +} + +export function normalizeBigQueryRegion(value: string, context = 'historic-SQL ingest'): string { + const normalized = value.trim().toLowerCase().replace(/^region-/, ''); + if (!BIGQUERY_REGION_PATTERN.test(normalized)) { + throw new Error(`Invalid BigQuery region for ${context}: ${value}`); + } + return normalized; +} diff --git a/packages/cli/src/context/ingest/adapters/historic-sql/bigquery-query-history-reader.ts b/packages/cli/src/context/ingest/adapters/historic-sql/bigquery-query-history-reader.ts index e24c50cf..ac4d4c71 100644 --- a/packages/cli/src/context/ingest/adapters/historic-sql/bigquery-query-history-reader.ts +++ b/packages/cli/src/context/ingest/adapters/historic-sql/bigquery-query-history-reader.ts @@ -5,6 +5,7 @@ import { type HistoricSqlTimeWindow, type HistoricSqlUnifiedPullConfig, } from './types.js'; +import { normalizeBigQueryProjectId, normalizeBigQueryRegion } from '../../../connections/bigquery-identifiers.js'; interface QueryResultLike { headers: string[]; @@ -52,21 +53,6 @@ function grantsError(cause: unknown): HistoricSqlGrantsMissingError { }); } -function normalizeProjectId(value: string): string { - if (!/^[A-Za-z0-9_-]+$/.test(value)) { - throw new Error(`Invalid BigQuery project id for historic-SQL ingest: ${value}`); - } - return value; -} - -function normalizeRegion(value: string): string { - const region = value.trim().toLowerCase().replace(/^region-/, ''); - if (!/^[a-z0-9-]+$/.test(region)) { - throw new Error(`Invalid BigQuery region for historic-SQL ingest: ${value}`); - } - return region; -} - function timestampExpression(value: Date | string): string { const date = value instanceof Date ? value : new Date(value); if (Number.isNaN(date.getTime())) { @@ -190,8 +176,8 @@ export class BigQueryHistoricSqlQueryHistoryReader { private readonly viewPath: string; constructor(options: BigQueryHistoricSqlQueryHistoryReaderOptions) { - const projectId = normalizeProjectId(options.projectId); - const region = normalizeRegion(options.region); + const projectId = normalizeBigQueryProjectId(options.projectId); + const region = normalizeBigQueryRegion(options.region); this.viewPath = `\`${projectId}.region-${region}.INFORMATION_SCHEMA.JOBS_BY_PROJECT\``; } diff --git a/packages/cli/src/database-tree-picker.test.ts b/packages/cli/src/database-tree-picker.test.ts index 5559ee42..4dd1dca3 100644 --- a/packages/cli/src/database-tree-picker.test.ts +++ b/packages/cli/src/database-tree-picker.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it, vi } from 'vitest'; import { pickDatabaseScope, + type DatabaseScopePromptAdapter, type DatabaseTreePickerRenderer, type PickDatabaseScopeArgs, } from './database-tree-picker.js'; @@ -12,8 +13,17 @@ function makeIo() { let stderr = ''; return { io: { - stdout: { isTTY: true, write: (chunk: string) => { stdout += chunk; } }, - stderr: { write: (chunk: string) => { stderr += chunk; } }, + stdout: { + isTTY: true, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, }, stdout: () => stdout, stderr: () => stderr, @@ -48,23 +58,96 @@ const discovered = [ { schema: 'public', name: 'sessions', kind: 'table' as const }, ]; +function promptAdapter(overrides: Partial = {}): DatabaseScopePromptAdapter { + return { + autocompleteMultiselect: vi.fn(async () => ['analytics']), + select: vi.fn(async () => 'refine'), + ...overrides, + }; +} + function baseArgs(overrides: Partial = {}): PickDatabaseScopeArgs { return { connectionId: 'warehouse', schemaNoun: 'schema', schemaNounPlural: 'schemas', - discovered, + schemas: ['analytics', 'public'], + schemaSuggestion: { excluded: new Set(), suggested: new Set(['analytics']) }, existing: { enabledTables: [] }, - defaultSchemas: ['analytics'], supportsSchemaScope: true, + listTablesForSchemas: vi.fn(async () => discovered), + prompts: promptAdapter(), ...overrides, }; } describe('pickDatabaseScope', () => { + it('starts Stage 1 with no checked schemas and does not enumerate tables before schema selection', async () => { + const prompts = promptAdapter({ + autocompleteMultiselect: vi.fn(async () => ['analytics']), + select: vi.fn(async () => 'save'), + }); + const listTablesForSchemas = vi.fn(async () => [ + { schema: 'analytics', name: 'orders', kind: 'table' as const }, + ]); + + const result = await pickDatabaseScope( + baseArgs({ + connectionId: 'warehouse', + schemaNoun: 'dataset', + schemaNounPlural: 'datasets', + schemas: ['analytics', 'raw'], + schemaSuggestion: { excluded: new Set(['raw']), suggested: new Set(['analytics']) }, + listTablesForSchemas, + prompts, + }), + makeIo().io, + captureRenderer().renderer, + ); + + expect(listTablesForSchemas).toHaveBeenCalledTimes(1); + expect(listTablesForSchemas).toHaveBeenCalledWith(['analytics']); + expect(result).toEqual({ + kind: 'selected', + activeSchemas: ['analytics'], + enabledTables: ['analytics.orders'], + }); + }); + + it('routes partial existing allowlists through Stage 2 so save preserves table selections', async () => { + const { renderer, setResult } = captureRenderer(); + setResult({ kind: 'save', selectedIds: ['analytics.customers'] }); + const prompts = promptAdapter({ + autocompleteMultiselect: vi.fn(async () => ['analytics']), + select: vi.fn(async () => 'save'), + }); + const listTablesForSchemas = vi.fn(async () => [ + { schema: 'analytics', name: 'customers', kind: 'table' as const }, + { schema: 'analytics', name: 'orders', kind: 'table' as const }, + ]); + + const result = await pickDatabaseScope( + baseArgs({ + schemas: ['analytics'], + schemaSuggestion: { excluded: new Set(), suggested: new Set(['analytics']) }, + existing: { enabledTables: ['analytics.customers'] }, + listTablesForSchemas, + prompts, + }), + makeIo().io, + renderer, + ); + + expect(result).toEqual({ + kind: 'selected', + activeSchemas: ['analytics'], + enabledTables: ['analytics.customers'], + }); + }); + it('builds a 2-level tree (schemas as parents, tables as children) and uses save-empty action', async () => { const { renderer, capture, setResult } = captureRenderer(); - setResult({ kind: 'quit' }); + setResult({ kind: 'save', selectedIds: ['analytics'] }); await pickDatabaseScope(baseArgs(), makeIo().io, renderer); @@ -81,18 +164,18 @@ describe('pickDatabaseScope', () => { expect(capture.state?.byId.get('public.events')?.title).toBe('events (view)'); }); - it('pre-checks default schemas at the parent level when no existing selection', async () => { + it('pre-checks selected schemas at the parent level when no existing selection reaches Stage 2', async () => { const { renderer, capture, setResult } = captureRenderer(); - setResult({ kind: 'quit' }); + setResult({ kind: 'save', selectedIds: ['analytics'] }); - await pickDatabaseScope(baseArgs({ defaultSchemas: ['analytics'] }), makeIo().io, renderer); + await pickDatabaseScope(baseArgs(), makeIo().io, renderer); expect([...(capture.state?.checked ?? [])]).toEqual(['analytics']); }); it('collapses an existing full-schema selection back into the parent check', async () => { const { renderer, capture, setResult } = captureRenderer(); - setResult({ kind: 'quit' }); + setResult({ kind: 'save', selectedIds: ['analytics'] }); await pickDatabaseScope( baseArgs({ existing: { enabledTables: ['analytics.customers', 'analytics.orders'] } }), @@ -105,7 +188,7 @@ describe('pickDatabaseScope', () => { it('keeps a partial existing selection at the leaf level', async () => { const { renderer, capture, setResult } = captureRenderer(); - setResult({ kind: 'quit' }); + setResult({ kind: 'save', selectedIds: ['analytics.customers'] }); await pickDatabaseScope( baseArgs({ existing: { enabledTables: ['analytics.customers'] } }), @@ -142,24 +225,6 @@ describe('pickDatabaseScope', () => { }); }); - it('treats empty save as enable-all', async () => { - const { renderer, setResult } = captureRenderer(); - setResult({ kind: 'save', selectedIds: [] }); - - const result = await pickDatabaseScope(baseArgs(), makeIo().io, renderer); - - expect(result).toEqual({ - kind: 'selected', - activeSchemas: ['analytics', 'public'], - enabledTables: [ - 'analytics.customers', - 'analytics.orders', - 'public.events', - 'public.sessions', - ], - }); - }); - it('omits activeSchemas when the driver does not support a schema scope', async () => { const { renderer, setResult } = captureRenderer(); setResult({ kind: 'save', selectedIds: ['analytics'] }); @@ -177,11 +242,12 @@ describe('pickDatabaseScope', () => { }); }); - it('returns back when the picker quits', async () => { - const { renderer, setResult } = captureRenderer(); - setResult({ kind: 'quit' }); + it('returns back when Stage 1 is cancelled', async () => { + const prompts = promptAdapter({ + autocompleteMultiselect: vi.fn(async () => ['back']), + }); - const result = await pickDatabaseScope(baseArgs(), makeIo().io, renderer); + const result = await pickDatabaseScope(baseArgs({ prompts }), makeIo().io, captureRenderer().renderer); expect(result).toEqual({ kind: 'back' }); }); diff --git a/packages/cli/src/database-tree-picker.ts b/packages/cli/src/database-tree-picker.ts index aea3c092..6b357de6 100644 --- a/packages/cli/src/database-tree-picker.ts +++ b/packages/cli/src/database-tree-picker.ts @@ -38,14 +38,38 @@ export type DatabaseScopePickResult = | { kind: 'selected'; activeSchemas: string[]; enabledTables: string[] } | { kind: 'back' }; +interface ScopeSuggestion { + excluded: Set; + suggested: Set; +} + +/** @internal */ +export interface DatabaseScopePromptAdapter { + autocompleteMultiselect(options: { + message: string; + options: Array<{ value: string; label: string; hint?: string; disabled?: boolean }>; + placeholder?: string; + required?: boolean; + maxItems?: number; + initialValues?: string[]; + }): Promise; + select(options: { + message: string; + options: Array<{ value: string; label: string; hint?: string; disabled?: boolean }>; + }): Promise; +} + export interface PickDatabaseScopeArgs { connectionId: string; schemaNoun: string; schemaNounPlural: string; - discovered: readonly KtxTableListEntry[]; + schemas: readonly string[]; + schemaSuggestion: ScopeSuggestion; existing: { enabledTables: readonly string[] }; - defaultSchemas: readonly string[]; supportsSchemaScope: boolean; + listTablesForSchemas: (schemas: string[]) => Promise; + initialSchemas?: readonly string[]; + prompts: DatabaseScopePromptAdapter; } function qualifiedTableId(entry: KtxTableListEntry): string { @@ -161,12 +185,39 @@ function schemasFromEnabledTables(enabledTables: readonly string[]): string[] { return result; } -export async function pickDatabaseScope( - args: PickDatabaseScopeArgs, - io: KtxCliIo, - render: DatabaseTreePickerRenderer = defaultRenderer, -): Promise { - const { inputs, schemaIds, allTables } = buildTreeInputs(args.discovered); +function schemaOptions(args: PickDatabaseScopeArgs): Array<{ value: string; label: string; hint?: string }> { + return args.schemas + .filter((schema) => !args.schemaSuggestion.excluded.has(schema)) + .slice() + .sort((left, right) => { + const leftSuggested = args.schemaSuggestion.suggested.has(left); + const rightSuggested = args.schemaSuggestion.suggested.has(right); + if (leftSuggested !== rightSuggested) return leftSuggested ? -1 : 1; + return left.localeCompare(right); + }) + .map((schema) => ({ + value: schema, + label: schema, + ...(args.schemaSuggestion.suggested.has(schema) ? { hint: 'suggested' } : {}), + })); +} + +function initialStageOneSchemas(args: PickDatabaseScopeArgs): string[] { + if (args.existing.enabledTables.length > 0) { + return schemasFromEnabledTables(args.existing.enabledTables); + } + return [...(args.initialSchemas ?? [])]; +} + +async function runStageTwoTreePicker(input: { + args: PickDatabaseScopeArgs; + discovered: readonly KtxTableListEntry[]; + selectedSchemas: readonly string[]; + io: KtxCliIo; + render: DatabaseTreePickerRenderer; +}): Promise { + const { args, discovered, selectedSchemas, io, render } = input; + const { inputs, schemaIds, allTables } = buildTreeInputs(discovered); const tree = buildPickerTree(inputs); const byId = new Map(tree.map((node) => [node.id, node])); const tableCount = allTables.length; @@ -175,7 +226,7 @@ export async function pickDatabaseScope( const initialSelection = args.existing.enabledTables.length > 0 ? initialSelectionForExisting(args.existing.enabledTables, byId) - : initialSelectionFromDefaults(args.defaultSchemas, schemaIds); + : initialSelectionFromDefaults(selectedSchemas, schemaIds); const initialState = buildInitialState({ tree, @@ -208,3 +259,63 @@ export async function pickDatabaseScope( return { kind: 'selected', activeSchemas, enabledTables }; } + +export async function pickDatabaseScope( + args: PickDatabaseScopeArgs, + io: KtxCliIo, + render: DatabaseTreePickerRenderer = defaultRenderer, +): Promise { + let selectedSchemas = initialStageOneSchemas(args); + while (true) { + const pickedSchemas = await args.prompts.autocompleteMultiselect({ + message: `Choose ${args.schemaNounPlural} to enable for ${args.connectionId}\nType to filter. Space to select. Enter when done.`, + placeholder: `Search ${args.schemaNounPlural}`, + options: schemaOptions(args), + initialValues: selectedSchemas, + required: false, + }); + if (pickedSchemas.includes('back')) { + return { kind: 'back' }; + } + selectedSchemas = pickedSchemas; + if (selectedSchemas.length === 0) { + io.stderr.write(`Nothing selected - type to filter, or Escape to skip ${args.schemaNoun} scope.\n`); + continue; + } + + const selectedNoun = + selectedSchemas.length === 1 ? args.schemaNoun : args.schemaNounPlural; + const action = await args.prompts.select({ + message: `Enable all tables in ${selectedSchemas.length} ${selectedNoun}, or refine tables?`, + options: [ + { value: 'save', label: `Enable all tables in selected ${selectedNoun}` }, + { value: 'refine', label: 'Refine: choose individual tables' }, + { value: 'back', label: 'Back' }, + ], + }); + if (action === 'back') { + continue; + } + + const discovered = await args.listTablesForSchemas(selectedSchemas); + if (action === 'save' && args.existing.enabledTables.length === 0) { + return { + kind: 'selected', + activeSchemas: args.supportsSchemaScope ? selectedSchemas : [], + enabledTables: discovered.map(qualifiedTableId), + }; + } + + const refined = await runStageTwoTreePicker({ + args, + discovered, + selectedSchemas, + io, + render, + }); + if (refined.kind === 'back') { + continue; + } + return refined; + } +} diff --git a/packages/cli/src/local-adapters.test.ts b/packages/cli/src/local-adapters.test.ts index ac8c3c41..efad86b6 100644 --- a/packages/cli/src/local-adapters.test.ts +++ b/packages/cli/src/local-adapters.test.ts @@ -167,6 +167,40 @@ describe('CLI local ingest adapters', () => { ]); }); + it('resolves BigQuery credentials_json from a file: reference for query history ingest', async () => { + const credentialsPath = join(tempDir, 'credentials.json'); + await writeFile(credentialsPath, JSON.stringify({ project_id: 'demo-project' }), 'utf-8'); + await writeProject( + tempDir, + [ + 'connections:', + ' bq:', + ' driver: bigquery', + ' dataset_id: analytics', + ' location: us', + ` credentials_json: 'file:${credentialsPath}'`, + ' historicSql:', + ' enabled: true', + ' dialect: bigquery', + 'ingest:', + ' adapters:', + ' - historic-sql', + '', + ].join('\n'), + ); + const project = await loadKtxProject({ projectDir: tempDir }); + + const adapters = createKtxCliLocalIngestAdapters(project, { + historicSqlConnectionId: 'bq', + sqlAnalysis: sqlAnalysisStub(), + }); + + expect(adapters.find((adapter) => adapter.source === 'historic-sql')?.skillNames).toEqual([ + 'historic_sql_table_digest', + 'historic_sql_patterns', + ]); + }); + it('uses query-history wording for public BigQuery capability errors', async () => { await writeProject( tempDir, diff --git a/packages/cli/src/local-adapters.ts b/packages/cli/src/local-adapters.ts index d5b7817e..88ee9880 100644 --- a/packages/cli/src/local-adapters.ts +++ b/packages/cli/src/local-adapters.ts @@ -30,6 +30,7 @@ import { type ManagedPythonCoreDaemonOptions, } from './managed-python-http.js'; import type { KtxOperationalLogger } from './io/logger.js'; +import { resolveKtxConfigReference } from './context/core/config-reference.js'; function hasSnowflakeDriver(connection: unknown): boolean { return ( @@ -279,7 +280,10 @@ async function createEphemeralSnowflakeHistoricSqlClient( function bigQueryProjectId(connection: KtxBigQueryConnectionConfig, env: NodeJS.ProcessEnv): string { const raw = typeof connection.credentials_json === 'string' ? connection.credentials_json : ''; - const resolved = raw.startsWith('env:') ? env[raw.slice('env:'.length)] ?? '' : raw; + const resolved = resolveKtxConfigReference(raw, env); + if (!resolved) { + throw new Error('Query history BigQuery connection requires credentials_json'); + } const parsed = JSON.parse(resolved) as { project_id?: unknown }; if (typeof parsed.project_id !== 'string' || parsed.project_id.trim().length === 0) { throw new Error('Query history BigQuery connection requires credentials_json.project_id'); diff --git a/packages/cli/src/setup-databases.test.ts b/packages/cli/src/setup-databases.test.ts index 178ed076..28c9e937 100644 --- a/packages/cli/src/setup-databases.test.ts +++ b/packages/cli/src/setup-databases.test.ts @@ -1,7 +1,7 @@ import { mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join, resolve } from 'node:path'; -import { initKtxProject } from './context/project/project.js'; +import { initKtxProject, loadKtxProject } from './context/project/project.js'; import { parseKtxProjectConfig } from './context/project/config.js'; import { readKtxSetupState, writeKtxSetupState } from './context/project/setup-config.js'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; @@ -42,7 +42,7 @@ function makeIo() { type ScopePick = | 'back' | 'enable-all' - | { schemas: string[]; tables: string[] }; + | { schemas: string[]; tables: string[] | 'back' }; interface PickerStubs { pickDatabaseScope: KtxSetupDatabasesDeps['pickDatabaseScope']; @@ -58,15 +58,21 @@ function makePickerStubs(options: { scopes?: ScopePick[] } = {}): PickerStubs { scopeCalls.push(args); const next = queue.shift(); if (next === undefined || next === 'enable-all') { - const enabledTables = args.discovered.map((t) => `${t.schema}.${t.name}`); + const schemas = args.initialSchemas && args.initialSchemas.length > 0 ? [...args.initialSchemas] : [...args.schemas]; + const discovered = await args.listTablesForSchemas(schemas); + const enabledTables = discovered.map((t) => `${t.schema}.${t.name}`); const activeSchemas = args.supportsSchemaScope - ? Array.from(new Set(args.discovered.map((t) => t.schema))) + ? Array.from(new Set(discovered.map((t) => t.schema))) : []; return { kind: 'selected', activeSchemas, enabledTables }; } if (next === 'back') { return { kind: 'back' }; } + await args.listTablesForSchemas(next.schemas); + if (next.tables === 'back') { + return { kind: 'back' }; + } return { kind: 'selected', activeSchemas: args.supportsSchemaScope ? next.schemas : [], @@ -88,7 +94,21 @@ function makePromptAdapter(options: { const passwordValues = [...(options.passwordValues ?? [])]; return { multiselect: vi.fn(async () => multiselectValues.shift() ?? ['postgres']), + autocompleteMultiselect: vi.fn(async (options) => { + if (multiselectValues.length > 0) { + return multiselectValues.shift() ?? []; + } + if (options.initialValues && options.initialValues.length > 0) { + return options.initialValues; + } + return options.options.length > 0 + ? options.options.map((option: { value: string }) => option.value) + : ['back']; + }), select: vi.fn(async ({ message }) => { + if (message.startsWith('Enable all tables in ') && message.includes(', or refine tables?')) { + return 'save'; + } if (message.includes('How much database context should KTX build?')) { const nextValue = selectValues[0]; return nextValue === 'fast' || nextValue === 'deep' || nextValue === 'back' @@ -240,6 +260,48 @@ describe('setup databases step', () => { expect(prompts.select).toHaveBeenCalledTimes(1); }); + it('preserves context.depth when editing an existing database connection', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'connections:', + ' warehouse:', + ' driver: sqlite', + ' path: ./warehouse.sqlite', + ' context:', + ' depth: deep', + '', + ].join('\n'), + 'utf-8', + ); + const prompts = makePromptAdapter({ + selectValues: ['edit', 'warehouse', 'continue'], + textValues: ['./warehouse.sqlite'], + }); + const testConnection = vi.fn(async () => 0); + const scanConnection = vi.fn(async () => 0); + const io = makeIo(); + const result = await runKtxSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + skipDatabases: false, + databaseSchemas: [], + disableQueryHistory: true, + }, + io.io, + { prompts, testConnection, scanConnection }, + ); + + expect(result.status, io.stderr()).toBe('ready'); + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections.warehouse).toMatchObject({ + driver: 'sqlite', + path: './warehouse.sqlite', + context: { depth: 'deep' }, + }); + }); + it('labels existing database connections with the database type', async () => { await writeFile( join(tempDir, 'ktx.yaml'), @@ -435,16 +497,13 @@ describe('setup databases step', () => { { driver: 'bigquery', selectValues: ['no'], - textValues: ['', 'analytics', '/path/to/service-account.json', ''], + textValues: ['', '/path/to/service-account.json', ''], expectedTextPrompts: [ { message: connectionNamePrompt('BigQuery'), placeholder: 'bigquery-warehouse', initialValue: 'bigquery-warehouse', }, - { - message: 'BigQuery dataset\nFor example analytics.', - }, { message: 'Path to service account JSON file', }, @@ -918,7 +977,7 @@ describe('setup databases step', () => { placeholder: 'env:DATABASE_URL', initialValue: 'env:DATABASE_URL', }); - expect(listTables).toHaveBeenCalledWith(tempDir, 'warehouse', ['analytics', 'public']); + expect(listTables).toHaveBeenCalledWith(tempDir, 'warehouse', ['analytics']); expect(testConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything()); expect(scanConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything()); const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); @@ -1108,7 +1167,7 @@ describe('setup databases step', () => { { schema: 'public', name: 'customers', kind: 'table' as const }, { schema: 'public', name: 'orders', kind: 'table' as const }, ]); - const pickers = makePickerStubs({ scopes: ['back'] }); + const pickers = makePickerStubs({ scopes: [{ schemas: ['public'], tables: 'back' }] }); const result = await runKtxSetupDatabasesStep( { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, @@ -1457,6 +1516,88 @@ describe('setup databases step', () => { }); }); + it('offers schema scope discovery for MySQL and writes selected schemas', async () => { + const prompts = makePromptAdapter({ + multiselectValues: [['mysql']], + selectValues: ['url', 'continue'], + textValues: ['mysql-warehouse', 'mysql://reader@localhost/analytics'], + }); + const listSchemas = vi.fn(async () => ['analytics', 'mart']); + const listTables = vi.fn(async (_projectDir: string, _connectionId: string, schemas?: string[]) => + (schemas ?? []).map((schema) => ({ schema, name: 'orders', kind: 'table' as const })), + ); + const pickDatabaseScope = vi.fn(async (args: PickDatabaseScopeArgs) => { + const scopedArgs = args as PickDatabaseScopeArgs & { + schemaSuggestion: { suggested: Set }; + }; + expect(args.schemaNoun).toBe('database'); + expect(args.schemas).toEqual(['analytics', 'mart']); + expect(scopedArgs.schemaSuggestion.suggested).toEqual(new Set(['analytics', 'mart'])); + return { kind: 'selected' as const, activeSchemas: ['mart'], enabledTables: ['mart.orders'] }; + }); + + await runKtxSetupDatabasesStep( + { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, + makeIo().io, + { prompts, testConnection: vi.fn(async () => 0), scanConnection: vi.fn(async () => 0), listSchemas, listTables, pickDatabaseScope }, + ); + + const project = await loadKtxProject({ projectDir: tempDir }); + expect(project.config.connections['mysql-warehouse']).toMatchObject({ + driver: 'mysql', + schemas: ['mart'], + enabled_tables: ['mart.orders'], + }); + }); + + it('maps ClickHouse scripted database schema input to databases and preserves database', async () => { + await runKtxSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + skipDatabases: false, + databaseDrivers: ['clickhouse'], + databaseConnectionId: 'clickhouse-warehouse', + databaseUrl: 'clickhouse://reader@localhost/analytics', + databaseSchemas: ['analytics', 'mart'], + }, + makeIo().io, + { testConnection: vi.fn(async () => 0), scanConnection: vi.fn(async () => 0) }, + ); + + const project = await loadKtxProject({ projectDir: tempDir }); + expect(project.config.connections['clickhouse-warehouse']).toMatchObject({ + driver: 'clickhouse', + database: 'analytics', + databases: ['analytics', 'mart'], + }); + expect(project.config.connections['clickhouse-warehouse']).not.toHaveProperty('schemas'); + }); + + it('does not prompt for a bootstrap BigQuery dataset before scope discovery', async () => { + const prompts = makePromptAdapter({ + multiselectValues: [['bigquery']], + selectValues: ['no', 'continue'], + textValues: ['bigquery-warehouse', '/tmp/service-account.json', 'US'], + }); + const listSchemas = vi.fn(async () => ['analytics']); + const listTables = vi.fn(async () => [{ schema: 'analytics', name: 'orders', kind: 'table' as const }]); + const pickDatabaseScope = vi.fn(async () => ({ + kind: 'selected' as const, + activeSchemas: ['analytics'], + enabledTables: ['analytics.orders'], + })); + + await runKtxSetupDatabasesStep( + { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, + makeIo().io, + { prompts, testConnection: vi.fn(async () => 0), scanConnection: vi.fn(async () => 0), listSchemas, listTables, pickDatabaseScope }, + ); + + const textMessages = vi.mocked(prompts.text).mock.calls.map(([options]) => options.message); + expect(textMessages).not.toContain(textInputPrompt('BigQuery dataset\nFor example analytics.')); + }); + it('prompts for discovered Postgres schemas before the first scan', async () => { const io = makeIo(); const prompts = makePromptAdapter({ @@ -1512,7 +1653,8 @@ describe('setup databases step', () => { connectionId: 'postgres-warehouse', schemaNoun: 'schema', schemaNounPlural: 'schemas', - defaultSchemas: ['orbit_analytics', 'orbit_raw'], + schemas: ['orbit_analytics', 'orbit_raw', 'public'], + schemaSuggestion: { excluded: new Set(), suggested: new Set() }, }); const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); expect(config.connections['postgres-warehouse']).toMatchObject({ @@ -1521,6 +1663,41 @@ describe('setup databases step', () => { expect(io.stdout()).toContain('✓ orbit_analytics, orbit_raw'); }); + it('passes schemas and a lazy table callback to the scope picker instead of eager table discovery', async () => { + const listSchemas = vi.fn(async () => ['analytics', 'raw']); + const listTables = vi.fn(async (_projectDir: string, _connectionId: string, schemas?: string[]) => + (schemas ?? []).map((schema) => ({ schema, name: 'orders', kind: 'table' as const })), + ); + const pickDatabaseScope = vi.fn(async (args: PickDatabaseScopeArgs) => { + const lazyArgs = args as PickDatabaseScopeArgs & { + schemas: string[]; + listTablesForSchemas: (schemas: string[]) => Promise>; + }; + expect(lazyArgs.schemas).toEqual(['analytics', 'raw']); + expect(args).not.toHaveProperty('discovered'); + expect(listTables).not.toHaveBeenCalled(); + const tables = await lazyArgs.listTablesForSchemas(['analytics']); + expect(tables).toEqual([{ schema: 'analytics', name: 'orders', kind: 'table' }]); + return { kind: 'selected' as const, activeSchemas: ['analytics'], enabledTables: ['analytics.orders'] }; + }); + + await runKtxSetupDatabasesStep( + { projectDir: tempDir, inputMode: 'auto', databaseDrivers: ['postgres'], skipDatabases: false, databaseSchemas: [] }, + makeIo().io, + { + prompts: makePromptAdapter({ selectValues: ['url'], textValues: ['', 'env:DATABASE_URL'] }), + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async () => 0), + listSchemas, + listTables, + pickDatabaseScope, + }, + ); + + expect(listTables).toHaveBeenCalledTimes(1); + expect(listTables).toHaveBeenCalledWith(tempDir, 'postgres-warehouse', ['analytics']); + }); + it('auto-selects all discovered Postgres schemas in non-interactive setup', async () => { const io = makeIo(); const prompts = makePromptAdapter({}); diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index c8e735c5..acdebeec 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -69,6 +69,14 @@ export interface KtxSetupDatabasesPromptAdapter { initialValues?: string[]; }): Promise; select(options: { message: string; options: KtxSetupPromptOption[] }): Promise; + autocompleteMultiselect(options: { + message: string; + options: KtxSetupPromptOption[]; + placeholder?: string; + required?: boolean; + maxItems?: number; + initialValues?: string[]; + }): Promise; text(options: { message: string; placeholder?: string; initialValue?: string }): Promise; password(options: { message: string }): Promise; cancel(message: string): void; @@ -134,8 +142,26 @@ interface ScopeDiscoverySpec { nounPlural: string; promptLabel: string; configArrayField: string; - configSingleField: string; - defaultSelection: (values: string[]) => string[]; + configSingleField?: string; + suggest: ScopeSuggest; +} + +interface ScopeSuggestion { + excluded: Set; + suggested: Set; +} + +type ScopeSuggest = (values: string[]) => ScopeSuggestion; + +const SUGGESTED_SCOPE_PATTERN = /^(mart|prod|analytics|core|dim|fact|gold)(_|$)/i; +const EXCLUDED_SCOPE_PATTERN = /^(information_schema|pg_catalog|pg_toast|_airbyte_|mysql$|performance_schema$|sys$)/i; + +function defaultSuggest(values: string[]): ScopeSuggestion { + const excluded = new Set(values.filter((value) => EXCLUDED_SCOPE_PATTERN.test(value))); + const suggested = new Set( + values.filter((value) => !excluded.has(value) && SUGGESTED_SCOPE_PATTERN.test(value)), + ); + return { excluded, suggested }; } const SCOPE_DISCOVERY_SPECS: Partial> = { @@ -145,10 +171,22 @@ const SCOPE_DISCOVERY_SPECS: Partial s !== 'public'); - return nonPublic.length > 0 ? nonPublic : schemas; - }, + suggest: defaultSuggest, + }, + mysql: { + noun: 'database', + nounPlural: 'databases', + promptLabel: 'MySQL databases', + configArrayField: 'schemas', + configSingleField: 'schema', + suggest: defaultSuggest, + }, + clickhouse: { + noun: 'database', + nounPlural: 'databases', + promptLabel: 'ClickHouse databases', + configArrayField: 'databases', + suggest: defaultSuggest, }, sqlserver: { noun: 'schema', @@ -156,7 +194,7 @@ const SCOPE_DISCOVERY_SPECS: Partial schemas, + suggest: defaultSuggest, }, bigquery: { noun: 'dataset', @@ -164,7 +202,7 @@ const SCOPE_DISCOVERY_SPECS: Partial datasets, + suggest: defaultSuggest, }, snowflake: { noun: 'schema', @@ -172,10 +210,7 @@ const SCOPE_DISCOVERY_SPECS: Partial s !== 'PUBLIC'); - return nonPublic.length > 0 ? nonPublic : schemas; - }, + suggest: defaultSuggest, }, }; @@ -386,6 +421,28 @@ async function defaultListSchemas(projectDir: string, connectionId: string): Pro } } + if (driver === 'mysql') { + const { KtxMysqlScanConnector, isKtxMysqlConnectionConfig } = await import('./connectors/mysql/connector.js');; + if (!isKtxMysqlConnectionConfig(connection)) return []; + const connector = new KtxMysqlScanConnector({ connectionId, connection }); + try { + return await connector.listSchemas(); + } finally { + await connector.cleanup(); + } + } + + if (driver === 'clickhouse') { + const { KtxClickHouseScanConnector, isKtxClickHouseConnectionConfig } = await import('./connectors/clickhouse/connector.js');; + if (!isKtxClickHouseConnectionConfig(connection)) return []; + const connector = new KtxClickHouseScanConnector({ connectionId, connection }); + try { + return await connector.listSchemas(); + } finally { + await connector.cleanup(); + } + } + if (driver === 'bigquery') { const { KtxBigQueryScanConnector, isKtxBigQueryConnectionConfig } = await import('./connectors/bigquery/connector.js');; if (!isKtxBigQueryConnectionConfig(connection)) return []; @@ -606,6 +663,33 @@ function normalizeFileReference(value: string): string { return `file:${normalized}`; } +function displayFileReference(value: string | undefined): string | undefined { + if (value === undefined) return undefined; + if (value.startsWith('file:')) return value.slice('file:'.length); + return value; +} + +function scriptedScopeConfigForDriver( + driver: KtxSetupDatabaseDriver, + databaseSchemas: string[], +): Record { + if (databaseSchemas.length === 0) return {}; + if (driver === 'bigquery') return { dataset_ids: databaseSchemas }; + if (driver === 'clickhouse') return { databases: databaseSchemas }; + return { schemas: databaseSchemas }; +} + +function databaseNameFromLiteralUrl(url: string): string | undefined { + if (url.startsWith('env:') || url.startsWith('file:')) { + return undefined; + } + try { + return new URL(url).pathname.replace(/^\/+/, '') || undefined; + } catch { + return undefined; + } +} + async function promptCredential(input: { prompts: KtxSetupDatabasesPromptAdapter; message: string; @@ -694,7 +778,7 @@ async function buildFieldsConnectionConfig(input: { database, username, ...(passwordRef ? { password: passwordRef } : {}), - ...(input.args.databaseSchemas.length > 0 ? { schemas: input.args.databaseSchemas } : {}), + ...scriptedScopeConfigForDriver(input.driver, input.args.databaseSchemas), }; } @@ -720,10 +804,11 @@ async function buildPastedUrlConnectionConfig(input: { return { driver: input.driver, url, - ...(input.args.databaseSchemas.length > 0 ? { schemas: input.args.databaseSchemas } : {}), + ...scriptedScopeConfigForDriver(input.driver, input.args.databaseSchemas), }; } + const database = input.driver === 'clickhouse' ? databaseNameFromLiteralUrl(url) : undefined; if (urlHasCredentials(url)) { const ref = await writeProjectLocalSecretReference({ projectDir: input.args.projectDir, @@ -733,14 +818,16 @@ async function buildPastedUrlConnectionConfig(input: { return { driver: input.driver, url: ref, - ...(input.args.databaseSchemas.length > 0 ? { schemas: input.args.databaseSchemas } : {}), + ...(database ? { database } : {}), + ...scriptedScopeConfigForDriver(input.driver, input.args.databaseSchemas), }; } return { driver: input.driver, url, - ...(input.args.databaseSchemas.length > 0 ? { schemas: input.args.databaseSchemas } : {}), + ...(database ? { database } : {}), + ...scriptedScopeConfigForDriver(input.driver, input.args.databaseSchemas), }; } @@ -756,6 +843,7 @@ async function buildUrlConnectionConfig(input: { if (input.args.databaseUrl) { const url = normalizeInputReference(input.args.databaseUrl); if (urlHasCredentials(url)) { + const database = input.driver === 'clickhouse' ? databaseNameFromLiteralUrl(url) : undefined; const ref = await writeProjectLocalSecretReference({ projectDir: input.args.projectDir, fileName: `${input.connectionId}-url`, @@ -764,13 +852,16 @@ async function buildUrlConnectionConfig(input: { return { driver: input.driver, url: ref, - ...(input.args.databaseSchemas.length > 0 ? { schemas: input.args.databaseSchemas } : {}), + ...(database ? { database } : {}), + ...scriptedScopeConfigForDriver(input.driver, input.args.databaseSchemas), }; } + const database = input.driver === 'clickhouse' ? databaseNameFromLiteralUrl(url) : undefined; return { driver: input.driver, url, - ...(input.args.databaseSchemas.length > 0 ? { schemas: input.args.databaseSchemas } : {}), + ...(database ? { database } : {}), + ...scriptedScopeConfigForDriver(input.driver, input.args.databaseSchemas), }; } @@ -822,16 +913,10 @@ async function buildConnectionConfig(input: { }); } if (driver === 'bigquery') { - const datasetId = await promptText( - prompts, - 'BigQuery dataset\nFor example analytics.', - stringConfigField(input.existingConnection, 'dataset_id'), - ); - if (datasetId === undefined) return 'back'; const credentialsPath = await promptText( prompts, 'Path to service account JSON file', - stringConfigField(input.existingConnection, 'credentials_json'), + displayFileReference(stringConfigField(input.existingConnection, 'credentials_json')), ); if (credentialsPath === undefined) return 'back'; const location = await promptText( @@ -840,12 +925,12 @@ async function buildConnectionConfig(input: { stringConfigField(input.existingConnection, 'location') ?? 'US', ); if (location === undefined) return 'back'; - if (!datasetId || !credentialsPath) return null; + if (!credentialsPath) return null; return { driver: 'bigquery', - dataset_id: datasetId, credentials_json: normalizeFileReference(credentialsPath), ...(location ? { location } : {}), + ...scriptedScopeConfigForDriver('bigquery', args.databaseSchemas), }; } if (driver === 'snowflake') { @@ -1260,9 +1345,17 @@ function withExistingPrimaryEditPromptDefaults(input: { Array.isArray(previousArray) && previousArray.length > 0 ) { - delete merged[spec.configSingleField]; + if (spec.configSingleField) { + delete merged[spec.configSingleField]; + } merged[spec.configArrayField] = previousArray; - } else if (!Object.hasOwn(input.next, spec.configArrayField) && !Object.hasOwn(input.next, spec.configSingleField)) { + } else if ( + !Object.hasOwn(input.next, spec.configArrayField) && + (!spec.configSingleField || !Object.hasOwn(input.next, spec.configSingleField)) + ) { + if (!spec.configSingleField) { + return merged; + } const previousSingle = input.previous[spec.configSingleField]; if (typeof previousSingle === 'string' && previousSingle.trim().length > 0) { merged[spec.configSingleField] = previousSingle; @@ -1272,6 +1365,9 @@ function withExistingPrimaryEditPromptDefaults(input: { if (!Object.hasOwn(input.next, 'enabled_tables') && Array.isArray(input.previous.enabled_tables)) { merged.enabled_tables = input.previous.enabled_tables; } + if (!Object.hasOwn(input.next, 'context') && input.previous.context !== undefined) { + merged.context = input.previous.context; + } return merged; } @@ -1286,6 +1382,9 @@ function configuredScopeValues( .filter((v): v is string => typeof v === 'string' && v.trim().length > 0) .map((v) => v.trim()); } + if (!spec.configSingleField) { + return []; + } const singleVal = connection[spec.configSingleField]; return typeof singleVal === 'string' && singleVal.trim().length > 0 ? [singleVal.trim()] : []; } @@ -1318,6 +1417,7 @@ async function maybeConfigureDatabaseScope(input: { args: KtxSetupDatabasesArgs; deps: KtxSetupDatabasesDeps; io: KtxCliIo; + prompts: KtxSetupDatabasesPromptAdapter; forcePrompt?: boolean; }): Promise { const project = await loadKtxProject({ projectDir: input.projectDir }); @@ -1378,32 +1478,53 @@ async function maybeConfigureDatabaseScope(input: { }); } - writeSetupSection(input.io, 'Discovering tables', [ - `Connecting to ${input.connectionId}…`, - ]); + writeSetupSection(input.io, 'Discovering tables', [`Connecting to ${input.connectionId}…`]); - const schemasFilter = await (async (): Promise => { - if (cliSchemas.length > 0) return cliSchemas; - if (!spec) return []; - try { - return unique( - await (input.deps.listSchemas ?? defaultListSchemas)(input.projectDir, input.connectionId), - ); - } catch (error) { - const detail = error instanceof Error ? error.message : String(error); - input.io.stderr.write( - `Could not discover ${spec.promptLabel.toLowerCase()} for ${input.connectionId}; ${detail}\n`, - ); - return []; - } - })(); + const schemas = unique( + cliSchemas.length > 0 + ? cliSchemas + : await (async (): Promise => { + if (!spec) return []; + try { + return await (input.deps.listSchemas ?? defaultListSchemas)(input.projectDir, input.connectionId); + } catch (error) { + const detail = error instanceof Error ? error.message : String(error); + input.io.stderr.write( + `Could not discover ${spec.promptLabel.toLowerCase()} for ${input.connectionId}; ${detail}\n`, + ); + return []; + } + })(), + ); + if (spec && schemas.length === 0) { + return 'ready'; + } + const schemaSuggestion = + cliSchemas.length > 0 + ? { excluded: new Set(), suggested: new Set(cliSchemas) } + : spec?.suggest(schemas) ?? { excluded: new Set(), suggested: new Set() }; + const existingEnabled = + hasExistingTables && input.forcePrompt === true + ? (existingTables ?? []).filter((table): table is string => typeof table === 'string') + : []; - let discovered: KtxTableListEntry[]; + let pickResult: DatabaseScopePickResult; try { - discovered = await (input.deps.listTables ?? defaultListTables)( - input.projectDir, - input.connectionId, - schemasFilter.length > 0 ? schemasFilter : undefined, + pickResult = await (input.deps.pickDatabaseScope ?? defaultPickDatabaseScope)( + { + connectionId: input.connectionId, + schemaNoun: spec?.noun ?? 'schema', + schemaNounPlural: spec?.nounPlural ?? 'schemas', + schemas, + schemaSuggestion, + existing: { enabledTables: existingEnabled }, + supportsSchemaScope: spec !== undefined, + initialSchemas: cliSchemas.length > 0 ? cliSchemas : undefined, + prompts: input.prompts, + listTablesForSchemas: (selectedSchemas) => + (input.deps.listTables ?? defaultListTables)(input.projectDir, input.connectionId, selectedSchemas), + }, + input.io, ); } catch (error) { const detail = error instanceof Error ? error.message : String(error); @@ -1414,55 +1535,11 @@ async function maybeConfigureDatabaseScope(input: { ); return input.forcePrompt === true ? 'failed' : 'ready'; } - - if (discovered.length === 0) { - if (input.forcePrompt === true) { - input.io.stderr.write(`No tables discovered for ${input.connectionId}; edit was not saved.\n`); - } - return input.forcePrompt === true ? 'failed' : 'ready'; - } - - const allQualified = discovered.map((t) => `${t.schema}.${t.name}`); - const schemasInDiscovery = unique(discovered.map((t) => t.schema)); - - const defaultSchemas = (() => { - if (cliSchemas.length > 0) return cliSchemas; - if (!spec) return schemasInDiscovery; - return spec.defaultSelection(schemasInDiscovery); - })(); - - const existingEnabled = - hasExistingTables && input.forcePrompt === true - ? (existingTables ?? []).filter( - (table): table is string => typeof table === 'string' && allQualified.includes(table), - ) - : []; - - let activeSchemas: string[]; - let enabledTables: string[]; - - if (discovered.length === 1) { - enabledTables = allQualified; - activeSchemas = spec ? schemasInDiscovery : []; - } else { - const pickResult = await (input.deps.pickDatabaseScope ?? defaultPickDatabaseScope)( - { - connectionId: input.connectionId, - schemaNoun: spec?.noun ?? 'schema', - schemaNounPlural: spec?.nounPlural ?? 'schemas', - discovered, - existing: { enabledTables: existingEnabled }, - defaultSchemas, - supportsSchemaScope: spec !== undefined, - }, - input.io, - ); - if (pickResult.kind === 'back') { - return 'back'; - } - enabledTables = pickResult.enabledTables; - activeSchemas = pickResult.activeSchemas; + if (pickResult.kind === 'back') { + return 'back'; } + const enabledTables = pickResult.enabledTables; + const activeSchemas = pickResult.activeSchemas; if (spec) { await writeScopeConfig({ @@ -1488,7 +1565,7 @@ async function maybeConfigureDatabaseScope(input: { ]); } writeSetupSection(input.io, `Tables enabled for ${input.connectionId}`, [ - `✓ ${enabledTables.length}/${discovered.length} tables enabled`, + `✓ ${enabledTables.length} tables enabled`, ]); return 'ready'; } diff --git a/packages/cli/src/setup-models.test.ts b/packages/cli/src/setup-models.test.ts index ba9260be..444c3b4d 100644 --- a/packages/cli/src/setup-models.test.ts +++ b/packages/cli/src/setup-models.test.ts @@ -58,33 +58,35 @@ function makePromptAdapter(options: { const textValues = [...(options.textValues ?? [])]; const passwordValues = [...(options.passwordValues ?? [])]; let providerPromptCount = 0; + const choose = async ({ message }: { message: string }) => { + if (message.includes('LLM provider')) { + providerPromptCount += 1; + const nextProviderChoice = selectValues[0]; + if ( + nextProviderChoice === 'anthropic' || + nextProviderChoice === 'vertex' || + nextProviderChoice === 'claude-code' || + nextProviderChoice === 'back' + ) { + return selectValues.shift() ?? nextProviderChoice; + } + if (options.credentialChoice === 'back' && providerPromptCount > 1) { + return 'back'; + } + return options.providerChoice ?? 'anthropic'; + } + const nextValue = selectValues.shift(); + if (nextValue) { + return nextValue; + } + if (message.includes('Anthropic API key')) { + return options.credentialChoice ?? 'env'; + } + return options.modelChoice ?? 'claude-sonnet-4-6'; + }; return { - select: vi.fn(async ({ message }) => { - if (message.includes('LLM provider')) { - providerPromptCount += 1; - const nextProviderChoice = selectValues[0]; - if ( - nextProviderChoice === 'anthropic' || - nextProviderChoice === 'vertex' || - nextProviderChoice === 'claude-code' || - nextProviderChoice === 'back' - ) { - return selectValues.shift() ?? nextProviderChoice; - } - if (options.credentialChoice === 'back' && providerPromptCount > 1) { - return 'back'; - } - return options.providerChoice ?? 'anthropic'; - } - const nextValue = selectValues.shift(); - if (nextValue) { - return nextValue; - } - if (message.includes('Anthropic API key')) { - return options.credentialChoice ?? 'env'; - } - return options.modelChoice ?? 'claude-sonnet-4-6'; - }), + select: vi.fn(choose), + autocomplete: vi.fn(choose), text: vi.fn(async () => textValues.shift() ?? ''), password: vi.fn( async () => @@ -152,7 +154,7 @@ describe('setup Anthropic model step', () => { }, ); - expect(prompts.select).toHaveBeenCalledWith( + expect(prompts.autocomplete).toHaveBeenCalledWith( expect.objectContaining({ message: expect.stringContaining('Which Anthropic model should KTX use?'), options: [ @@ -417,7 +419,7 @@ describe('setup Anthropic model step', () => { expect(readGcloudProject).toHaveBeenCalled(); expect(listGcloudProjects).toHaveBeenCalled(); expect(prompts.text).not.toHaveBeenCalled(); - expect(prompts.select).toHaveBeenCalledWith( + expect(prompts.autocomplete).toHaveBeenCalledWith( expect.objectContaining({ message: expect.stringContaining('Which Google Cloud project should KTX use for Vertex AI?'), options: [ @@ -428,7 +430,7 @@ describe('setup Anthropic model step', () => { ], }), ); - expect(prompts.select).toHaveBeenCalledWith( + expect(prompts.autocomplete).toHaveBeenCalledWith( expect.objectContaining({ message: expect.stringContaining('Which Anthropic model should KTX use?'), options: [ @@ -480,7 +482,7 @@ describe('setup Anthropic model step', () => { message: expect.stringContaining('How should KTX authenticate with Google Vertex AI?'), }), ); - expect(prompts.select).toHaveBeenCalledWith( + expect(prompts.autocomplete).toHaveBeenCalledWith( expect.objectContaining({ message: expect.stringContaining('Which Google Cloud project should KTX use for Vertex AI?'), }), @@ -548,7 +550,7 @@ describe('setup Anthropic model step', () => { ); expect(result.status).toBe('ready'); - expect(prompts.select).toHaveBeenCalledWith( + expect(prompts.autocomplete).toHaveBeenCalledWith( expect.objectContaining({ message: expect.stringContaining('Which Google Cloud project should KTX use for Vertex AI?'), options: [ @@ -595,25 +597,25 @@ describe('setup Anthropic model step', () => { expect(result.status).toBe('ready'); expect(listGcloudProjects).toHaveBeenCalledTimes(2); - expect(prompts.select).toHaveBeenCalledWith( + expect(prompts.autocomplete).toHaveBeenCalledWith( expect.objectContaining({ message: expect.stringContaining('Could not list Google Cloud projects with gcloud'), options: expect.arrayContaining([{ value: 'retry', label: 'Retry loading Google Cloud projects' }]), }), ); - expect(prompts.select).toHaveBeenCalledWith( + expect(prompts.autocomplete).toHaveBeenCalledWith( expect.objectContaining({ message: expect.stringContaining( `${String.fromCharCode(0x1b)}[33mCould not list Google Cloud projects with gcloud`, ), }), ); - expect(prompts.select).toHaveBeenCalledWith( + expect(prompts.autocomplete).toHaveBeenCalledWith( expect.objectContaining({ message: expect.stringContaining('gcloud auth login --update-adc'), }), ); - expect(prompts.select).toHaveBeenCalledWith( + expect(prompts.autocomplete).toHaveBeenCalledWith( expect.objectContaining({ message: expect.stringContaining( `${String.fromCharCode(0x1b)}[33mRun \`gcloud auth login --update-adc\``, @@ -643,7 +645,7 @@ describe('setup Anthropic model step', () => { expect(result.status).toBe('back'); expect(prompts.select).toHaveBeenNthCalledWith( - 3, + 2, expect.objectContaining({ message: expect.stringContaining('Which LLM provider should KTX use?'), }), @@ -887,7 +889,7 @@ describe('setup Anthropic model step', () => { ); expect(result.status).toBe('back'); - expect(prompts.select).toHaveBeenCalledWith( + expect(prompts.autocomplete).toHaveBeenCalledWith( expect.objectContaining({ message: expect.stringContaining('Which Anthropic model should KTX use?'), options: expect.not.arrayContaining([expect.objectContaining({ value: 'skip' })]), @@ -919,7 +921,7 @@ describe('setup Anthropic model step', () => { ); expect(result.status).toBe('ready'); - expect(prompts.select).toHaveBeenCalledWith( + expect(prompts.autocomplete).toHaveBeenCalledWith( expect.objectContaining({ message: expectedPromptMessage, }), @@ -965,7 +967,7 @@ describe('setup Anthropic model step', () => { expect(result.status).toBe('missing-input'); expect(BUNDLED_ANTHROPIC_MODELS.length).toBeGreaterThan(0); - expect(prompts.select).toHaveBeenCalledWith( + expect(prompts.autocomplete).toHaveBeenCalledWith( expect.objectContaining({ message: expect.stringContaining('Which Anthropic model should KTX use?'), options: expect.arrayContaining([ @@ -1058,7 +1060,8 @@ describe('setup Anthropic model step', () => { expect(result.status).toBe('ready'); expect(healthCheck).toHaveBeenCalledTimes(2); - expect(prompts.select).toHaveBeenCalledTimes(5); + expect(prompts.select).toHaveBeenCalledTimes(3); + expect(prompts.autocomplete).toHaveBeenCalledTimes(2); expect(io.stderr()).toContain('Anthropic model health check failed: model not found'); expect(io.stderr()).toContain('Choose a different credential source or model, or Back.'); const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); @@ -1110,7 +1113,7 @@ describe('setup Anthropic model step', () => { expect(result.status).toBe('back'); expect(prompts.select).toHaveBeenNthCalledWith( - 4, + 3, expect.objectContaining({ message: expect.stringContaining('How should KTX find your Anthropic API key?'), }), diff --git a/packages/cli/src/setup-models.ts b/packages/cli/src/setup-models.ts index 7f7385b1..041eef5c 100644 --- a/packages/cli/src/setup-models.ts +++ b/packages/cli/src/setup-models.ts @@ -61,6 +61,11 @@ export type KtxSetupLlmBackend = 'anthropic' | 'vertex' | 'claude-code'; /** @internal */ export interface KtxSetupModelPromptAdapter { select(options: { message: string; options: KtxSetupPromptOption[] }): Promise; + autocomplete(options: { + message: string; + placeholder?: string; + options: KtxSetupPromptOption[]; + }): Promise; text(options: { message: string; placeholder?: string }): Promise; password(options: { message: string }): Promise; cancel(message: string): void; @@ -617,13 +622,14 @@ async function chooseInteractiveVertexProject( io.stdout.write('│ gcloud did not return any visible Google Cloud projects. Enter a project ID manually or choose Back.\n'); } - const choice = await prompts.select({ + const choice = await prompts.autocomplete({ message: `Which Google Cloud project should KTX use for Vertex AI?\n\n${[ VERTEX_PROJECT_PROMPT_CONTEXT, listFailureMessage, ] .filter((value): value is string => Boolean(value)) .join('\n\n')}`, + placeholder: 'Type to search projects', options: [ ...orderedProjects.map((project) => ({ value: project.projectId, @@ -778,8 +784,9 @@ async function chooseModel( { value: 'manual', label: 'Enter a model ID manually' }, { value: 'back', label: 'Back' }, ]; - const choice = await prompts.select({ + const choice = await prompts.autocomplete({ message: `Which Anthropic model should KTX use?\n\n${ANTHROPIC_MODEL_PROMPT_CONTEXT}`, + placeholder: 'Type to search models', options: modelOptions, }); if (choice === 'back') { @@ -810,8 +817,9 @@ async function chooseVertexModel(args: KtxSetupModelArgs, io: KtxCliIo, deps: Kt const selectableModels = VERTEX_ANTHROPIC_MODELS.filter(isSelectableAnthropicModel); const prompts = deps.prompts ?? createPromptAdapter(); - const choice = await prompts.select({ + const choice = await prompts.autocomplete({ message: `Which Anthropic model should KTX use?\n\n${ANTHROPIC_MODEL_PROMPT_CONTEXT}`, + placeholder: 'Type to search models', options: [ ...selectableModels.map((model) => ({ value: model.id, diff --git a/packages/cli/src/setup-prompts.test.ts b/packages/cli/src/setup-prompts.test.ts index 23ffd669..95f4b68b 100644 --- a/packages/cli/src/setup-prompts.test.ts +++ b/packages/cli/src/setup-prompts.test.ts @@ -14,6 +14,8 @@ const mocks = vi.hoisted(() => { isCancel: vi.fn((value: unknown): value is symbol => value === cancelSymbol), log: { info: vi.fn() }, multiselect: vi.fn(), + autocomplete: vi.fn(), + autocompleteMultiselect: vi.fn(), note: vi.fn(), password: vi.fn(), select: vi.fn(), @@ -29,6 +31,8 @@ vi.mock('@clack/prompts', () => ({ isCancel: mocks.isCancel, log: mocks.log, multiselect: mocks.multiselect, + autocomplete: mocks.autocomplete, + autocompleteMultiselect: mocks.autocompleteMultiselect, note: mocks.note, password: mocks.password, select: mocks.select, @@ -47,6 +51,8 @@ describe('setup prompt adapter', () => { mocks.isCancel.mockClear(); mocks.log.info.mockReset(); mocks.multiselect.mockReset(); + mocks.autocomplete.mockReset(); + mocks.autocompleteMultiselect.mockReset(); mocks.note.mockReset(); mocks.password.mockReset(); mocks.select.mockReset(); @@ -160,6 +166,52 @@ describe('setup prompt adapter', () => { expect(mocks.cancel).toHaveBeenCalledWith('Setup cancelled.'); }); + it('returns autocomplete selections and maps cancel to back', async () => { + mocks.autocomplete.mockResolvedValueOnce('analytics'); + const adapter = createKtxSetupPromptAdapter({ selectCancelValue: 'back' }); + + await expect( + adapter.autocomplete({ + message: 'Dataset', + placeholder: 'Type to search', + options: [{ value: 'analytics', label: 'analytics' }], + }), + ).resolves.toBe('analytics'); + + mocks.autocomplete.mockResolvedValueOnce(mocks.cancelSymbol); + await expect( + adapter.autocomplete({ + message: 'Dataset', + options: [{ value: 'analytics', label: 'analytics' }], + }), + ).resolves.toBe('back'); + }); + + it('returns autocomplete multiselect selections and maps cancel to back', async () => { + mocks.autocompleteMultiselect.mockResolvedValueOnce(['analytics', 'mart']); + const adapter = createKtxSetupPromptAdapter({ selectCancelValue: 'back', multiselectCancelValue: 'back' }); + + await expect( + adapter.autocompleteMultiselect({ + message: 'Datasets', + placeholder: 'Type to filter', + options: [ + { value: 'analytics', label: 'analytics', hint: 'suggested' }, + { value: 'mart', label: 'mart' }, + ], + initialValues: ['analytics'], + }), + ).resolves.toEqual(['analytics', 'mart']); + + mocks.autocompleteMultiselect.mockResolvedValueOnce(mocks.cancelSymbol); + await expect( + adapter.autocompleteMultiselect({ + message: 'Datasets', + options: [{ value: 'analytics', label: 'analytics' }], + }), + ).resolves.toEqual(['back']); + }); + it('keeps setup intro and note plain for non-stream output', async () => { const { createKtxSetupUiAdapter } = await import('./setup-prompts.js'); const chunks: string[] = []; diff --git a/packages/cli/src/setup-prompts.ts b/packages/cli/src/setup-prompts.ts index f5faacd8..1609bd76 100644 --- a/packages/cli/src/setup-prompts.ts +++ b/packages/cli/src/setup-prompts.ts @@ -1,5 +1,7 @@ import type { Writable } from 'node:stream'; import { + autocomplete, + autocompleteMultiselect, cancel, confirm, intro, @@ -38,6 +40,22 @@ interface KtxSetupMultiselectOptions { cursorAt?: Value; } +interface KtxSetupAutocompleteOptions { + message: string; + options: Array>; + placeholder?: string; + maxItems?: number; +} + +interface KtxSetupAutocompleteMultiselectOptions { + message: string; + options: Array>; + placeholder?: string; + required?: boolean; + maxItems?: number; + initialValues?: Value[]; +} + interface KtxSetupTextOptions { message: string; placeholder?: string; @@ -53,6 +71,8 @@ interface KtxSetupPasswordOptions { export interface KtxSetupPromptAdapter { select(options: KtxSetupSelectOptions): Promise; multiselect(options: KtxSetupMultiselectOptions): Promise; + autocomplete(options: KtxSetupAutocompleteOptions): Promise; + autocompleteMultiselect(options: KtxSetupAutocompleteMultiselectOptions): Promise; text(options: KtxSetupTextOptions): Promise; password(options: KtxSetupPasswordOptions): Promise; cancel(message: string): void; @@ -117,6 +137,50 @@ export function createKtxSetupPromptAdapter(options: KtxSetupPromptAdapterOption return selected; } }, + async autocomplete(promptOptions) { + const value = await withSetupInterruptConfirmation(() => + autocomplete(withMenuOptionsSpacing(promptOptions)), + ); + if (isCancel(value)) { + if (cancelOnSelectCancel) { + cancel(cancelMessage); + } + return options.selectCancelValue; + } + return String(value); + }, + async autocompleteMultiselect(promptOptions) { + while (true) { + const value = await withSetupInterruptConfirmation(() => + autocompleteMultiselect(withMenuOptionsSpacing(promptOptions)), + ); + if (isCancel(value)) { + if (cancelOnMultiselectCancel) { + cancel(cancelMessage); + } + return [multiselectCancelValue]; + } + const selected = [...value].map(String); + if ( + selected.length === 0 && + !promptOptions.required && + options.confirmEmptyOptionalMultiselect === true + ) { + const skipConfirmed = await confirm({ + message: 'Nothing selected. Skip this step?', + initialValue: false, + }); + if (isCancel(skipConfirmed)) { + cancel(cancelMessage); + return [multiselectCancelValue]; + } + if (!skipConfirmed) { + continue; + } + } + return selected; + } + }, async text(promptOptions) { const value = await withSetupInterruptConfirmation(() => text({ ...promptOptions, message: withTextInputNavigation(promptOptions.message) }), diff --git a/packages/cli/src/setup-sources-notion.test.ts b/packages/cli/src/setup-sources-notion.test.ts index 84fce4aa..1306b07b 100644 --- a/packages/cli/src/setup-sources-notion.test.ts +++ b/packages/cli/src/setup-sources-notion.test.ts @@ -57,6 +57,7 @@ function prompts(values: { multiselect?: string[][]; select?: string[] }): KtxSe return { multiselect: vi.fn(async () => multiselectValues.shift() ?? []), select: vi.fn(async () => selectValues.shift() ?? 'back'), + autocomplete: vi.fn(async () => selectValues.shift() ?? 'back'), text: vi.fn(async () => ''), password: vi.fn(async () => undefined), cancel: vi.fn(), diff --git a/packages/cli/src/setup-sources.test.ts b/packages/cli/src/setup-sources.test.ts index a9de4436..d75933c1 100644 --- a/packages/cli/src/setup-sources.test.ts +++ b/packages/cli/src/setup-sources.test.ts @@ -48,6 +48,7 @@ function prompts(values: { return { multiselect: vi.fn(async () => multiselectValues.shift() ?? []), select: vi.fn(async () => selectValues.shift() ?? 'skip'), + autocomplete: vi.fn(async () => selectValues.shift() ?? 'skip'), text: vi.fn(async () => (textValues.length > 0 ? textValues.shift() : '')), password: vi.fn(async () => (passwordValues.length > 0 ? passwordValues.shift() : undefined)), cancel: vi.fn(), @@ -548,8 +549,9 @@ describe('setup sources step', () => { ], }); if (testCase.source === 'metabase') { - expect(testPrompts.select).toHaveBeenCalledWith({ + expect(testPrompts.autocomplete).toHaveBeenCalledWith({ message: 'Metabase database', + placeholder: 'Type to search databases', options: [ { value: '1', label: '1: Finance (postgres)' }, { value: '2', label: '2: Analytics (postgres)' }, diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index ff8eb420..a3f8019d 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -71,6 +71,11 @@ export interface KtxSetupSourcesPromptAdapter { required?: boolean; }): Promise; select(options: { message: string; options: KtxSetupPromptOption[] }): Promise; + autocomplete(options: { + message: string; + placeholder?: string; + options: KtxSetupPromptOption[]; + }): Promise; text(options: { message: string; placeholder?: string; initialValue?: string }): Promise; password(options: { message: string }): Promise; cancel(message: string): void; @@ -931,8 +936,9 @@ async function chooseMetabaseDatabaseId(input: { return discovered[0].id; } if (discovered.length > 1) { - const selected = await input.prompts.select({ + const selected = await input.prompts.autocomplete({ message: 'Metabase database', + placeholder: 'Type to search databases', options: [ ...discovered .slice() diff --git a/packages/cli/src/setup.test.ts b/packages/cli/src/setup.test.ts index 9c4a3b58..6ea0f0a4 100644 --- a/packages/cli/src/setup.test.ts +++ b/packages/cli/src/setup.test.ts @@ -1410,6 +1410,7 @@ describe('setup status', () => { const embeddings = vi.fn(async () => embeddingResults.shift() ?? { status: 'back' as const, projectDir: tempDir }); const databasePrompts = { multiselect: vi.fn(async () => ['back']), + autocompleteMultiselect: vi.fn(async () => ['back']), select: vi.fn(async () => 'back'), text: vi.fn(), password: vi.fn(),