test: split cli tests from source tree (#216)

* feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-06-28 08:49:38 +02:00 · 2026-05-26 08:49:05 +02:00 · 2026-05-26 08:49:05 +02:00 · 56985b7e09
commit 56985b7e09
parent 924868841d
548 changed files with 5048 additions and 2228 deletions
--- a/packages/cli/src/connectors/bigquery/connector.test.ts
+++ b/packages/cli/src/connectors/bigquery/connector.test.ts
@ -1,483 +0,0 @@
-import { describe, expect, it, vi } from 'vitest';
-import { bigQueryConnectionConfigFromConfig, isKtxBigQueryConnectionConfig, type KtxBigQueryClient, KtxBigQueryScanConnector, type KtxBigQueryClientFactory, type KtxBigQueryDataset, type KtxBigQueryQueryJob, type KtxBigQueryTableRef } from '../../connectors/bigquery/connector.js';
-import { createBigQueryLiveDatabaseIntrospection } from '../../connectors/bigquery/live-database-introspection.js';
-import { tableRefSet } from '../../context/scan/table-ref.js';
-
-function fakeClientFactory(options: { primaryKeyError?: Error } = {}): KtxBigQueryClientFactory {
-  const queryResults = vi.fn(async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
-    [{ id: 1, status: 'paid' }],
-    undefined,
-    { schema: { fields: [{ name: 'id', type: 'INT64' }, { name: 'status', type: 'STRING' }] } },
-  ]);
-  const createQueryJob = vi.fn(async (input: { query: string }): ReturnType<KtxBigQueryClient['createQueryJob']> => {
-    if (input.query.includes('INFORMATION_SCHEMA.TABLE_CONSTRAINTS')) {
-      if (options.primaryKeyError) {
-        throw options.primaryKeyError;
-      }
-      return [
-        {
-          getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
-            [{ table_name: 'orders', column_name: 'id' }],
-            undefined,
-            { schema: { fields: [{ name: 'table_name', type: 'STRING' }, { name: 'column_name', type: 'STRING' }] } },
-          ],
-        },
-      ];
-    }
-    if (input.query.includes('APPROX_COUNT_DISTINCT')) {
-      return [
-        {
-          getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
-            [{ cardinality: 2 }],
-            undefined,
-            { schema: { fields: [{ name: 'cardinality', type: 'INT64' }] } },
-          ],
-        },
-      ];
-    }
-    if (input.query.includes('SELECT DISTINCT CAST')) {
-      return [
-        {
-          getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
-            [{ val: 'open' }, { val: 'paid' }],
-            undefined,
-            { schema: { fields: [{ name: 'val', type: 'STRING' }] } },
-          ],
-        },
-      ];
-    }
-    if (input.query.includes('SELECT `status`')) {
-      return [
-        {
-          getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
-            [{ status: 'paid' }],
-            undefined,
-            { schema: { fields: [{ name: 'status', type: 'STRING' }] } },
-          ],
-        },
-      ];
-    }
-    return [{ getQueryResults: queryResults }];
-  });
-  const getTable = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
-    {
-      metadata: {
-        type: 'TABLE',
-        numRows: '12',
-        description: 'Orders table',
-        schema: {
-          fields: [
-            { name: 'id', type: 'INT64', mode: 'REQUIRED', description: 'Order id' },
-            { name: 'status', type: 'STRING', mode: 'NULLABLE' },
-            { name: 'payload', type: 'RECORD', mode: 'NULLABLE' },
-          ],
-        },
-      },
-    },
-  ]);
-  const tableRef: KtxBigQueryTableRef = { id: 'orders', get: getTable };
-  return {
-    createClient: vi.fn(() => ({
-      getDatasets: vi.fn(async (): ReturnType<KtxBigQueryClient['getDatasets']> => [[{ id: 'analytics' }, { id: 'staging' }]]),
-      dataset: vi.fn(
-        (datasetId: string): KtxBigQueryDataset => ({
-        get: vi.fn(async () => [{ id: datasetId }]),
-        getTables: vi.fn(async (): ReturnType<KtxBigQueryDataset['getTables']> => [[tableRef]]),
-      }),
-      ),
-      createQueryJob,
-    })),
-  };
-}
-
-const connection = {
-  driver: 'bigquery',
-  dataset_id: 'analytics',
-  credentials_json: JSON.stringify({ project_id: 'project-1', client_email: 'reader@example.test' }),
-  location: 'US',
-} as const;
-
-describe('KtxBigQueryScanConnector', () => {
-  it('resolves configuration safely', () => {
-    expect(isKtxBigQueryConnectionConfig(connection)).toBe(true);
-    expect(isKtxBigQueryConnectionConfig({ driver: 'mysql' })).toBe(false);
-    expect(bigQueryConnectionConfigFromConfig({ connectionId: 'warehouse', connection })).toMatchObject({
-      projectId: 'project-1',
-      datasetIds: ['analytics'],
-      location: 'US',
-    });
-  });
-
-  it('introspects datasets, table metadata, primary keys, and normalized types', async () => {
-    const connector = new KtxBigQueryScanConnector({
-      connectionId: 'warehouse',
-      connection,
-      clientFactory: fakeClientFactory(),
-      now: () => new Date('2026-04-29T17:00:00.000Z'),
-    });
-
-    const snapshot = await connector.introspect(
-      { connectionId: 'warehouse', driver: 'bigquery' },
-      { runId: 'scan-run-1' },
-    );
-
-    expect(snapshot).toMatchObject({
-      connectionId: 'warehouse',
-      driver: 'bigquery',
-      extractedAt: '2026-04-29T17:00:00.000Z',
-      scope: { catalogs: ['project-1'], datasets: ['analytics'] },
-      metadata: {
-        project_id: 'project-1',
-        datasets: ['analytics'],
-        table_count: 1,
-        total_columns: 3,
-      },
-    });
-    expect(snapshot.tables[0]).toMatchObject({
-      catalog: 'project-1',
-      db: 'analytics',
-      name: 'orders',
-      kind: 'table',
-      comment: 'Orders table',
-      estimatedRows: 12,
-      foreignKeys: [],
-    });
-    expect(snapshot.tables[0]?.columns).toEqual([
-      {
-        name: 'id',
-        nativeType: 'INT64',
-        normalizedType: 'BIGINT',
-        dimensionType: 'number',
-        nullable: false,
-        primaryKey: true,
-        comment: 'Order id',
-      },
-      {
-        name: 'status',
-        nativeType: 'STRING',
-        normalizedType: 'VARCHAR',
-        dimensionType: 'string',
-        nullable: true,
-        primaryKey: false,
-        comment: null,
-      },
-      {
-        name: 'payload',
-        nativeType: 'RECORD',
-        normalizedType: 'JSON',
-        dimensionType: 'string',
-        nullable: true,
-        primaryKey: false,
-        comment: null,
-      },
-    ]);
-  });
-
-  it.each([
-    Object.assign(new Error('Access Denied'), { code: 403 }),
-    Object.assign(new Error('Not found'), { errors: [{ reason: 'notFound' }] }),
-  ])('soft-fails denied BigQuery primary-key discovery with a scan warning', async (primaryKeyError) => {
-    const connector = new KtxBigQueryScanConnector({
-      connectionId: 'warehouse',
-      connection,
-      clientFactory: fakeClientFactory({ primaryKeyError }),
-      now: () => new Date('2026-04-29T17:00:00.000Z'),
-    });
-
-    const snapshot = await connector.introspect(
-      { connectionId: 'warehouse', driver: 'bigquery' },
-      { runId: 'scan-run-bigquery-denied-pk' },
-    );
-
-    expect(snapshot.warnings).toEqual([
-      {
-        code: 'constraint_discovery_unauthorized',
-        message: 'Skipped primary-key discovery in analytics (insufficient grants on system catalogs)',
-        recoverable: true,
-        metadata: { schema: 'analytics', kind: 'primary_key' },
-      },
-    ]);
-    expect(snapshot.tables[0]?.foreignKeys).toEqual([]);
-    expect(snapshot.tables[0]?.columns.every((column) => column.primaryKey === false)).toBe(true);
-  });
-
-  it('runs samples, read-only SQL, distinct values, dataset listing, row counts, and cleanup', async () => {
-    const connector = new KtxBigQueryScanConnector({
-      connectionId: 'warehouse',
-      connection,
-      clientFactory: fakeClientFactory(),
-    });
-
-    await expect(
-      connector.sampleTable(
-        {
-          connectionId: 'warehouse',
-          table: { catalog: 'project-1', db: 'analytics', name: 'orders' },
-          columns: ['id', 'status'],
-          limit: 1,
-        },
-        { runId: 'scan-run-1' },
-      ),
-    ).resolves.toEqual({
-      headers: ['id', 'status'],
-      headerTypes: ['INT64', 'STRING'],
-      rows: [[1, 'paid']],
-      totalRows: 1,
-    });
-
-    await expect(
-      connector.sampleColumn(
-        {
-          connectionId: 'warehouse',
-          table: { catalog: 'project-1', db: 'analytics', name: 'orders' },
-          column: 'status',
-          limit: 5,
-        },
-        { runId: 'scan-run-1' },
-      ),
-    ).resolves.toMatchObject({ values: ['paid'], nullCount: null, distinctCount: null });
-
-    await expect(
-      connector.executeReadOnly(
-        { connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
-        { runId: 'scan-run-1' },
-      ),
-    ).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[1, 'paid']], totalRows: 1, rowCount: 1 });
-
-    await expect(
-      connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
-    ).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
-
-    await expect(
-      connector.getColumnDistinctValues(
-        { catalog: 'project-1', db: 'analytics', name: 'orders' },
-        'status',
-        { maxCardinality: 5, limit: 10, sampleSize: 100 },
-      ),
-    ).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
-    await expect(connector.getTableRowCount('orders')).resolves.toBe(12);
-    await expect(connector.listDatasets()).resolves.toEqual(['analytics', 'staging']);
-    await expect(
-      connector.columnStats(
-        { connectionId: 'warehouse', table: { catalog: 'project-1', db: 'analytics', name: 'orders' }, column: 'status' },
-        { runId: 'scan-run-1' },
-      ),
-    ).resolves.toBeNull();
-    await connector.cleanup();
-  });
-
-  it('limits introspection to tables in tableScope', async () => {
-    const ordersGet = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
-      {
-        metadata: {
-          type: 'TABLE',
-          numRows: '12',
-          schema: { fields: [{ name: 'id', type: 'INT64', mode: 'REQUIRED' }] },
-        },
-      },
-    ]);
-    const skippedGet = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
-      { metadata: { type: 'TABLE', numRows: '1', schema: { fields: [] } } },
-    ]);
-    const clientFactory: KtxBigQueryClientFactory = {
-      createClient: vi.fn(() => ({
-        getDatasets: vi.fn(async (): ReturnType<KtxBigQueryClient['getDatasets']> => [[{ id: 'analytics' }]]),
-        dataset: vi.fn(
-          (): KtxBigQueryDataset => ({
-            get: vi.fn(async () => [{ id: 'analytics' }]),
-            getTables: vi.fn(async (): ReturnType<KtxBigQueryDataset['getTables']> => [
-              [
-                { id: 'orders', get: ordersGet },
-                { id: 'customers', get: skippedGet },
-              ],
-            ]),
-          }),
-        ),
-        createQueryJob: vi.fn(async (): ReturnType<KtxBigQueryClient['createQueryJob']> => [
-          {
-            getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
-              [],
-              undefined,
-              { schema: { fields: [{ name: 'table_name', type: 'STRING' }, { name: 'column_name', type: 'STRING' }] } },
-            ],
-          },
-        ]),
-      })),
-    };
-    const connector = new KtxBigQueryScanConnector({
-      connectionId: 'warehouse',
-      connection,
-      clientFactory,
-    });
-    const scope = tableRefSet([{ catalog: 'project-1', db: 'analytics', name: 'orders' }]);
-    const snapshot = await connector.introspect(
-      { connectionId: 'warehouse', driver: 'bigquery', tableScope: scope },
-      { runId: 'scope-test' },
-    );
-    expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
-    expect(ordersGet).toHaveBeenCalledTimes(1);
-    expect(skippedGet).not.toHaveBeenCalled();
-  });
-
-  it('constructs for discovery without dataset scope and lists tables through one region information schema query', async () => {
-    const createQueryJob = vi.fn(
-      async (
-        input: { query: string; params?: Record<string, unknown>; location?: string },
-      ): ReturnType<KtxBigQueryClient['createQueryJob']> => [
-        {
-          getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
-            [
-              { table_schema: 'analytics', table_name: 'orders', table_type: 'BASE TABLE' },
-              { table_schema: 'analytics', table_name: 'order_clone', table_type: 'CLONE' },
-              { table_schema: 'mart', table_name: 'orders_mv', table_type: 'MATERIALIZED VIEW' },
-            ],
-            undefined,
-            {
-              schema: {
-                fields: [
-                  { name: 'table_schema', type: 'STRING' },
-                  { name: 'table_name', type: 'STRING' },
-                  { name: 'table_type', type: 'STRING' },
-                ],
-              },
-            },
-          ],
-        },
-      ],
-    );
-    const clientFactory: KtxBigQueryClientFactory = {
-      createClient: vi.fn(() => ({
-        getDatasets: vi.fn(async () => [[{ id: 'analytics' }, { id: 'mart' }]] as [{ id: string }[]]),
-        dataset: vi.fn((datasetId: string) => ({
-          get: vi.fn(async () => [{ id: datasetId }]),
-          getTables: vi.fn(async () => [[]] as [never[]]),
-        })),
-        createQueryJob,
-      })),
-    };
-    const connector = new KtxBigQueryScanConnector({
-      connectionId: 'warehouse',
-      connection: {
-        driver: 'bigquery',
-        credentials_json: JSON.stringify({ project_id: 'project-1' }),
-        location: 'US',
-      },
-      clientFactory,
-    });
-
-    await expect(connector.listTables(['analytics', 'mart'])).resolves.toEqual([
-      { schema: 'analytics', name: 'orders', kind: 'table' },
-      { schema: 'analytics', name: 'order_clone', kind: 'table' },
-      { schema: 'mart', name: 'orders_mv', kind: 'view' },
-    ]);
-
-    expect(createQueryJob).toHaveBeenCalledTimes(1);
-    expect(createQueryJob).toHaveBeenCalledWith(
-      expect.objectContaining({
-        location: 'US',
-        params: { dataset_ids: ['analytics', 'mart'] },
-      }),
-    );
-    expect(createQueryJob.mock.calls[0]?.[0].query).toContain('`project-1`.`region-us`.INFORMATION_SCHEMA.TABLES');
-    expect(createQueryJob.mock.calls[0]?.[0].query).toContain("'CLONE'");
-    expect(createQueryJob.mock.calls[0]?.[0].query).toContain("'SNAPSHOT'");
-  });
-
-  it('keeps scan paths requiring dataset scope', async () => {
-    const connector = new KtxBigQueryScanConnector({
-      connectionId: 'warehouse',
-      connection: {
-        driver: 'bigquery',
-        credentials_json: JSON.stringify({ project_id: 'project-1' }),
-        location: 'US',
-      },
-      clientFactory: fakeClientFactory(),
-    });
-
-    await expect(
-      connector.introspect(
-        { connectionId: 'warehouse', driver: 'bigquery' },
-        { runId: 'scan-run-1' },
-      ),
-    ).rejects.toThrow('Native BigQuery scan requires connections.warehouse.dataset_ids or dataset_id');
-  });
-
-  it('applies maximumBytesBilled to read-only queries when configured', async () => {
-    const clientFactory = fakeClientFactory();
-    const connector = new KtxBigQueryScanConnector({
-      connectionId: 'warehouse',
-      connection,
-      clientFactory,
-      maxBytesBilled: 123456789,
-    });
-
-    await expect(
-      connector.executeReadOnly(
-        { connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
-        { runId: 'scan-run-1' },
-      ),
-    ).resolves.toMatchObject({ rows: [[1, 'paid']], rowCount: 1 });
-
-    const client = vi.mocked(clientFactory.createClient).mock.results[0]?.value as KtxBigQueryClient;
-    expect(client.createQueryJob).toHaveBeenLastCalledWith(
-      expect.objectContaining({
-        maximumBytesBilled: '123456789',
-      }),
-    );
-  });
-
-  it('applies canonical BigQuery YAML scan limits to query jobs', async () => {
-    const clientFactory = fakeClientFactory();
-    const connector = new KtxBigQueryScanConnector({
-      connectionId: 'warehouse',
-      connection: { ...connection, max_bytes_billed: '987654321', job_timeout_ms: 30_000 },
-      clientFactory,
-    });
-
-    await expect(
-      connector.executeReadOnly(
-        { connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
-        { runId: 'scan-run-1' },
-      ),
-    ).resolves.toMatchObject({ rows: [[1, 'paid']], rowCount: 1 });
-
-    const client = vi.mocked(clientFactory.createClient).mock.results[0]?.value as KtxBigQueryClient;
-    expect(client.createQueryJob).toHaveBeenLastCalledWith(
-      expect.objectContaining({
-        maximumBytesBilled: '987654321',
-        jobTimeoutMs: 30_000,
-      }),
-    );
-  });
-
-  it('adapts native snapshots to live-database introspection snapshots', async () => {
-    const introspection = createBigQueryLiveDatabaseIntrospection({
-      connections: { warehouse: connection },
-      clientFactory: fakeClientFactory(),
-      now: () => new Date('2026-04-29T17:00:00.000Z'),
-    });
-
-    await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({
-      connectionId: 'warehouse',
-      metadata: { project_id: 'project-1' },
-      tables: expect.arrayContaining([
-        expect.objectContaining({
-          catalog: 'project-1',
-          db: 'analytics',
-          name: 'orders',
-          columns: expect.arrayContaining([
-            {
-              name: 'id',
-              nativeType: 'INT64',
-              normalizedType: 'BIGINT',
-              dimensionType: 'number',
-              nullable: false,
-              primaryKey: true,
-              comment: 'Order id',
-            },
-          ]),
-        }),
-      ]),
-    });
-  });
-});
--- a/packages/cli/src/connectors/bigquery/connector.ts
+++ b/packages/cli/src/connectors/bigquery/connector.ts
@ -1,5 +1,6 @@
 import { BigQuery, type TableField } from '@google-cloud/bigquery';
 import { normalizeBigQueryProjectId, normalizeBigQueryRegion } from '../../context/connections/bigquery-identifiers.js';
+import { getDialectForDriver } from '../../context/connections/dialects.js';
 import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
 import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
 import { scopedTableNames } from '../../context/scan/table-ref.js';
@ -26,7 +27,6 @@ import {
 import { readFileSync } from 'node:fs';
 import { homedir } from 'node:os';
 import { resolve } from 'node:path';
-import { KtxBigQueryDialect } from './dialect.js';

 export interface KtxBigQueryConnectionConfig {
  driver?: string;
@ -235,6 +235,23 @@ function normalizeValue(value: unknown): unknown {
  return value;
 }

+/** @internal */
+export function prepareBigQueryReadOnlyQuery(
+  sql: string,
+  params?: Record<string, unknown>,
+): { sql: string; params?: Record<string, unknown> } {
+  if (!params) {
+    return { sql, params: undefined };
+  }
+  let processedSql = sql;
+  const processedParams: Record<string, unknown> = {};
+  for (const [key, value] of Object.entries(params)) {
+    processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
+    processedParams[key] = value;
+  }
+  return { sql: processedSql, params: Object.keys(processedParams).length > 0 ? processedParams : undefined };
+}
+
 export function isKtxBigQueryConnectionConfig(
  connection: KtxBigQueryConnectionConfig | undefined,
 ): connection is KtxBigQueryConnectionConfig {
@ -286,7 +303,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
  private readonly now: () => Date;
  private readonly maxBytesBilled?: number | string;
  private readonly queryTimeoutMs?: number;
-  private readonly dialect = new KtxBigQueryDialect();
+  private readonly dialect = getDialectForDriver('bigquery');
  private client: KtxBigQueryClient | null = null;

  constructor(options: KtxBigQueryScanConnectorOptions) {
@ -364,7 +381,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
  async executeReadOnly(input: KtxBigQueryReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
    this.assertConnection(input.connectionId);
    const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
-    const prepared = this.dialect.prepareQuery(limitedSql, input.params);
+    const prepared = prepareBigQueryReadOnlyQuery(limitedSql, input.params);
    const result = await this.query(prepared.sql, prepared.params);
    return { ...result, rowCount: result.rows.length };
  }
@ -411,7 +428,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
    return this.dialect.quoteIdentifier(identifier);
  }

-  async listDatasets(): Promise<string[]> {
+  async listSchemas(): Promise<string[]> {
    const [datasets] = await this.getClient().getDatasets();
    return datasets.map((dataset) => dataset.id).filter((id): id is string => Boolean(id));
  }
@ -437,6 +454,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
      params,
    );
    return rows.map((row) => ({
+      catalog: this.resolved.projectId,
      schema: row.table_schema,
      name: row.table_name,
      kind:
--- a/packages/cli/src/connectors/bigquery/dialect.test.ts
+++ b/packages/cli/src/connectors/bigquery/dialect.test.ts
@ -1,52 +0,0 @@
-import { describe, expect, it } from 'vitest';
-import { KtxBigQueryDialect } from './dialect.js';
-
-describe('KtxBigQueryDialect', () => {
-  const dialect = new KtxBigQueryDialect();
-
-  it('quotes identifiers and formats project.dataset.table names', () => {
-    expect(dialect.quoteIdentifier('order`items')).toBe('`order\\`items`');
-    expect(dialect.formatTableName({ catalog: 'project-1', db: 'analytics', name: 'orders' })).toBe(
-      '`project-1`.`analytics`.`orders`',
-    );
-    expect(dialect.formatTableName({ db: 'analytics', name: 'orders' })).toBe('`analytics`.`orders`');
-    expect(dialect.formatTableName({ name: 'orders' })).toBe('`orders`');
-  });
-
-  it('maps native BigQuery types to normalized types and scan dimensions', () => {
-    expect(dialect.mapDataType('INT64')).toBe('BIGINT');
-    expect(dialect.mapDataType('STRUCT')).toBe('JSON');
-    expect(dialect.mapDataType('GEOGRAPHY')).toBe('GEOGRAPHY');
-    expect(dialect.mapToDimensionType('TIMESTAMP')).toBe('time');
-    expect(dialect.mapToDimensionType('NUMERIC')).toBe('number');
-    expect(dialect.mapToDimensionType('BOOL')).toBe('boolean');
-    expect(dialect.mapToDimensionType('JSON')).toBe('string');
-  });
-
-  it('generates sampling, cardinality, and distinct-value SQL', () => {
-    expect(dialect.generateSampleQuery('`p`.`d`.`orders`', 5, ['id', 'status'])).toBe(
-      'SELECT `id`, `status` FROM `p`.`d`.`orders` ORDER BY RAND() LIMIT 5',
-    );
-    expect(dialect.generateColumnSampleQuery('`p`.`d`.`orders`', 'status', 10)).toBe(
-      "SELECT `status` FROM `p`.`d`.`orders` WHERE `status` IS NOT NULL AND TRIM(CAST(`status` AS STRING)) != '' ORDER BY RAND() LIMIT 10",
-    );
-    expect(dialect.generateCardinalitySampleQuery('`p`.`d`.`orders`', '`status`', 100)).toContain(
-      'SELECT APPROX_COUNT_DISTINCT(val) AS cardinality',
-    );
-    expect(dialect.generateDistinctValuesQuery('`p`.`d`.`orders`', '`status`', 20)).toContain(
-      'SELECT DISTINCT CAST(`status` AS STRING) AS val',
-    );
-  });
-
-  it('rewrites colon parameters to BigQuery named parameters', () => {
-    expect(dialect.prepareQuery('SELECT * FROM orders WHERE id = :id AND id_2 = :id_2', { id: 1, id_2: 2 })).toEqual({
-      sql: 'SELECT * FROM orders WHERE id = @id AND id_2 = @id_2',
-      params: { id: 1, id_2: 2 },
-    });
-    expect(dialect.prepareQuery('SELECT * FROM orders')).toEqual({ sql: 'SELECT * FROM orders', params: undefined });
-  });
-
-  it('keeps unsupported statistics explicit', () => {
-    expect(dialect.generateColumnStatisticsQuery('analytics', 'orders')).toBeNull();
-  });
-});
--- a/packages/cli/src/connectors/bigquery/dialect.ts
+++ b/packages/cli/src/connectors/bigquery/dialect.ts
@ -1,9 +1,18 @@
+import type { KtxDialect } from '../../context/connections/dialects.js';
+import {
+  columnDisplayPartCount,
+  formatDialectDisplayRef,
+  formatDialectTableName,
+  limitOffsetClause,
+  parseDialectDisplayRef,
+} from '../../context/connections/dialect-helpers.js';
 import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';

 type BigQueryTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;

-export class KtxBigQueryDialect {
-  readonly type = 'bigquery';
+/** @internal */
+export class KtxBigQueryDialect implements KtxDialect {
+  readonly type = 'bigquery' as const;

  private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
    TIMESTAMP: 'time',
@ -27,13 +36,19 @@ export class KtxBigQueryDialect {
  }

  formatTableName(table: BigQueryTableNameRef): string {
-    if (table.catalog && table.db) {
-      return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
-    }
-    if (table.db) {
-      return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
-    }
-    return this.quoteIdentifier(table.name);
+    return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'three-part');
+  }
+
+  formatDisplayRef(table: BigQueryTableNameRef): string {
+    return formatDialectDisplayRef(table, 'three-part');
+  }
+
+  parseDisplayRef(display: string): KtxTableRef | null {
+    return parseDialectDisplayRef(display, 'three-part');
+  }
+
+  columnDisplayTablePartCount(): 1 | 2 | 3 {
+    return columnDisplayPartCount('three-part');
  }

  mapDataType(nativeType: string): string {
@ -93,19 +108,6 @@ export class KtxBigQueryDialect {
    return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS STRING)) != '' ORDER BY RAND() LIMIT ${limit}`;
  }

-  prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
-    if (!params) {
-      return { sql, params: undefined };
-    }
-    let processedSql = sql;
-    const processedParams: Record<string, unknown> = {};
-    for (const [key, value] of Object.entries(params)) {
-      processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
-      processedParams[key] = value;
-    }
-    return { sql: processedSql, params: Object.keys(processedParams).length > 0 ? processedParams : undefined };
-  }
-
  getRandomSampleFilter(samplePct: number): string {
    if (samplePct <= 0 || samplePct >= 1) {
      return '';
@ -121,7 +123,11 @@ export class KtxBigQueryDialect {
  }

  getLimitOffsetClause(limit: number, offset?: number): string {
-    return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
+    return limitOffsetClause(limit, offset);
+  }
+
+  getTopClause(_limit: number): string {
+    return '';
  }

  getNullCountExpression(column: string): string {
@ -132,6 +138,18 @@ export class KtxBigQueryDialect {
    return `APPROX_COUNT_DISTINCT(${column})`;
  }

+  textLengthExpression(columnSql: string): string {
+    return `LENGTH(CAST(${columnSql} AS STRING))`;
+  }
+
+  castToText(columnSql: string): string {
+    return `CAST(${columnSql} AS STRING)`;
+  }
+
+  getSampleValueAggregation(innerSql: string): string {
+    return `(SELECT STRING_AGG(CAST(value AS STRING), '\\u001F') FROM (${innerSql}) AS relationship_profile_values)`;
+  }
+
  generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
    return `
      WITH sampled AS (
@ -172,36 +190,4 @@ export class KtxBigQueryDialect {
      FROM sampled
    `;
  }
-
-  getTimeTruncExpression(
-    column: string,
-    granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
-    timezone?: string,
-  ): string {
-    const bigQueryGranularity = granularity.toUpperCase();
-    if (timezone) {
-      return `DATE_TRUNC(DATETIME(${column}, '${timezone}'), ${bigQueryGranularity})`;
-    }
-    return `DATE_TRUNC(${column}, ${bigQueryGranularity})`;
-  }
-
-  getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
-    const col = timezone ? `DATETIME(${column}, '${timezone}')` : column;
-    const [rawAmount, rawUnit] = interval.split(' ');
-    let diffUnit = rawUnit!.toUpperCase();
-    let amount = Number(rawAmount);
-    let addUnit = diffUnit;
-    if (diffUnit === 'WEEK') {
-      diffUnit = 'DAY';
-      amount = amount * 7;
-      addUnit = 'DAY';
-    }
-    const originExpr = origin ? `TIMESTAMP '${origin}'` : `TIMESTAMP '1970-01-01'`;
-    return `TIMESTAMP_ADD(${originExpr}, INTERVAL CAST(FLOOR(TIMESTAMP_DIFF(${col}, ${originExpr}, ${diffUnit}) / ${amount}) * ${amount} AS INT64) ${addUnit})`;
-  }
-
-  parseIntervalToSql(interval: string): string {
-    const [amount, unit] = interval.split(' ');
-    return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
-  }
 }