test: split cli tests from source tree (#216)

* feat(cli): define full warehouse dialect contract

* test(cli): keep dialect edge tests focused

* fix(cli): stabilize dialect contract foundation

* refactor(connectors): own read-only query preparation

* refactor(connectors): resolve dialects through registry

* refactor(connectors): keep concrete dialect classes internal

* chore(workspace): enforce dialect import boundary

* refactor(cli): resolve relationship dialect at scan boundary

* refactor(cli): use dialect display parsing for entity details

* refactor(cli): use dialect display parsing for warehouse catalog

* refactor(cli): use dialect SQL in relationship workflows

* test(cli): verify solid dialect scan workflow closure

* test: split cli tests from source tree

* refactor(cli): standardize BigQuery scope listing

* feat(sqlite): implement connector scope listing

* test(connectors): cover required table listing

* feat(cli): add warehouse driver registry

* refactor(setup): route scope discovery through driver registry

* refactor(cli): route local query execution through driver registry

* refactor(historic-sql): route dialect support through driver registry

* refactor(cli): test warehouse connections through driver registry

* fix(cli): close driver registry type export gaps

* Improve setup daemon diagnostics

* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback

Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.

* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match

The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.

Align the picker boundary with the canonical 3-level KtxTableRef:

- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
  resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
  (resolveEnabledTables already accepts the 3-part shape) and
  schemasFromEnabledTables now goes through parseDottedTableEntry so it
  recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
  reuse.

Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).

* fix(cli): allow debug telemetry under opt-out env
This commit is contained in:
Andrey Avtomonov 2026-05-26 08:49:05 +02:00 committed by GitHub
parent 924868841d
commit 56985b7e09
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
548 changed files with 5048 additions and 2228 deletions

View file

@ -1,483 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { bigQueryConnectionConfigFromConfig, isKtxBigQueryConnectionConfig, type KtxBigQueryClient, KtxBigQueryScanConnector, type KtxBigQueryClientFactory, type KtxBigQueryDataset, type KtxBigQueryQueryJob, type KtxBigQueryTableRef } from '../../connectors/bigquery/connector.js';
import { createBigQueryLiveDatabaseIntrospection } from '../../connectors/bigquery/live-database-introspection.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
function fakeClientFactory(options: { primaryKeyError?: Error } = {}): KtxBigQueryClientFactory {
const queryResults = vi.fn(async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[{ id: 1, status: 'paid' }],
undefined,
{ schema: { fields: [{ name: 'id', type: 'INT64' }, { name: 'status', type: 'STRING' }] } },
]);
const createQueryJob = vi.fn(async (input: { query: string }): ReturnType<KtxBigQueryClient['createQueryJob']> => {
if (input.query.includes('INFORMATION_SCHEMA.TABLE_CONSTRAINTS')) {
if (options.primaryKeyError) {
throw options.primaryKeyError;
}
return [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[{ table_name: 'orders', column_name: 'id' }],
undefined,
{ schema: { fields: [{ name: 'table_name', type: 'STRING' }, { name: 'column_name', type: 'STRING' }] } },
],
},
];
}
if (input.query.includes('APPROX_COUNT_DISTINCT')) {
return [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[{ cardinality: 2 }],
undefined,
{ schema: { fields: [{ name: 'cardinality', type: 'INT64' }] } },
],
},
];
}
if (input.query.includes('SELECT DISTINCT CAST')) {
return [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[{ val: 'open' }, { val: 'paid' }],
undefined,
{ schema: { fields: [{ name: 'val', type: 'STRING' }] } },
],
},
];
}
if (input.query.includes('SELECT `status`')) {
return [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[{ status: 'paid' }],
undefined,
{ schema: { fields: [{ name: 'status', type: 'STRING' }] } },
],
},
];
}
return [{ getQueryResults: queryResults }];
});
const getTable = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
{
metadata: {
type: 'TABLE',
numRows: '12',
description: 'Orders table',
schema: {
fields: [
{ name: 'id', type: 'INT64', mode: 'REQUIRED', description: 'Order id' },
{ name: 'status', type: 'STRING', mode: 'NULLABLE' },
{ name: 'payload', type: 'RECORD', mode: 'NULLABLE' },
],
},
},
},
]);
const tableRef: KtxBigQueryTableRef = { id: 'orders', get: getTable };
return {
createClient: vi.fn(() => ({
getDatasets: vi.fn(async (): ReturnType<KtxBigQueryClient['getDatasets']> => [[{ id: 'analytics' }, { id: 'staging' }]]),
dataset: vi.fn(
(datasetId: string): KtxBigQueryDataset => ({
get: vi.fn(async () => [{ id: datasetId }]),
getTables: vi.fn(async (): ReturnType<KtxBigQueryDataset['getTables']> => [[tableRef]]),
}),
),
createQueryJob,
})),
};
}
const connection = {
driver: 'bigquery',
dataset_id: 'analytics',
credentials_json: JSON.stringify({ project_id: 'project-1', client_email: 'reader@example.test' }),
location: 'US',
} as const;
describe('KtxBigQueryScanConnector', () => {
it('resolves configuration safely', () => {
expect(isKtxBigQueryConnectionConfig(connection)).toBe(true);
expect(isKtxBigQueryConnectionConfig({ driver: 'mysql' })).toBe(false);
expect(bigQueryConnectionConfigFromConfig({ connectionId: 'warehouse', connection })).toMatchObject({
projectId: 'project-1',
datasetIds: ['analytics'],
location: 'US',
});
});
it('introspects datasets, table metadata, primary keys, and normalized types', async () => {
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection,
clientFactory: fakeClientFactory(),
now: () => new Date('2026-04-29T17:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'bigquery' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'bigquery',
extractedAt: '2026-04-29T17:00:00.000Z',
scope: { catalogs: ['project-1'], datasets: ['analytics'] },
metadata: {
project_id: 'project-1',
datasets: ['analytics'],
table_count: 1,
total_columns: 3,
},
});
expect(snapshot.tables[0]).toMatchObject({
catalog: 'project-1',
db: 'analytics',
name: 'orders',
kind: 'table',
comment: 'Orders table',
estimatedRows: 12,
foreignKeys: [],
});
expect(snapshot.tables[0]?.columns).toEqual([
{
name: 'id',
nativeType: 'INT64',
normalizedType: 'BIGINT',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'Order id',
},
{
name: 'status',
nativeType: 'STRING',
normalizedType: 'VARCHAR',
dimensionType: 'string',
nullable: true,
primaryKey: false,
comment: null,
},
{
name: 'payload',
nativeType: 'RECORD',
normalizedType: 'JSON',
dimensionType: 'string',
nullable: true,
primaryKey: false,
comment: null,
},
]);
});
it.each([
Object.assign(new Error('Access Denied'), { code: 403 }),
Object.assign(new Error('Not found'), { errors: [{ reason: 'notFound' }] }),
])('soft-fails denied BigQuery primary-key discovery with a scan warning', async (primaryKeyError) => {
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection,
clientFactory: fakeClientFactory({ primaryKeyError }),
now: () => new Date('2026-04-29T17:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'bigquery' },
{ runId: 'scan-run-bigquery-denied-pk' },
);
expect(snapshot.warnings).toEqual([
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in analytics (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'analytics', kind: 'primary_key' },
},
]);
expect(snapshot.tables[0]?.foreignKeys).toEqual([]);
expect(snapshot.tables[0]?.columns.every((column) => column.primaryKey === false)).toBe(true);
});
it('runs samples, read-only SQL, distinct values, dataset listing, row counts, and cleanup', async () => {
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection,
clientFactory: fakeClientFactory(),
});
await expect(
connector.sampleTable(
{
connectionId: 'warehouse',
table: { catalog: 'project-1', db: 'analytics', name: 'orders' },
columns: ['id', 'status'],
limit: 1,
},
{ runId: 'scan-run-1' },
),
).resolves.toEqual({
headers: ['id', 'status'],
headerTypes: ['INT64', 'STRING'],
rows: [[1, 'paid']],
totalRows: 1,
});
await expect(
connector.sampleColumn(
{
connectionId: 'warehouse',
table: { catalog: 'project-1', db: 'analytics', name: 'orders' },
column: 'status',
limit: 5,
},
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ values: ['paid'], nullCount: null, distinctCount: null });
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[1, 'paid']], totalRows: 1, rowCount: 1 });
await expect(
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
await expect(
connector.getColumnDistinctValues(
{ catalog: 'project-1', db: 'analytics', name: 'orders' },
'status',
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
),
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
await expect(connector.getTableRowCount('orders')).resolves.toBe(12);
await expect(connector.listDatasets()).resolves.toEqual(['analytics', 'staging']);
await expect(
connector.columnStats(
{ connectionId: 'warehouse', table: { catalog: 'project-1', db: 'analytics', name: 'orders' }, column: 'status' },
{ runId: 'scan-run-1' },
),
).resolves.toBeNull();
await connector.cleanup();
});
it('limits introspection to tables in tableScope', async () => {
const ordersGet = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
{
metadata: {
type: 'TABLE',
numRows: '12',
schema: { fields: [{ name: 'id', type: 'INT64', mode: 'REQUIRED' }] },
},
},
]);
const skippedGet = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
{ metadata: { type: 'TABLE', numRows: '1', schema: { fields: [] } } },
]);
const clientFactory: KtxBigQueryClientFactory = {
createClient: vi.fn(() => ({
getDatasets: vi.fn(async (): ReturnType<KtxBigQueryClient['getDatasets']> => [[{ id: 'analytics' }]]),
dataset: vi.fn(
(): KtxBigQueryDataset => ({
get: vi.fn(async () => [{ id: 'analytics' }]),
getTables: vi.fn(async (): ReturnType<KtxBigQueryDataset['getTables']> => [
[
{ id: 'orders', get: ordersGet },
{ id: 'customers', get: skippedGet },
],
]),
}),
),
createQueryJob: vi.fn(async (): ReturnType<KtxBigQueryClient['createQueryJob']> => [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[],
undefined,
{ schema: { fields: [{ name: 'table_name', type: 'STRING' }, { name: 'column_name', type: 'STRING' }] } },
],
},
]),
})),
};
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection,
clientFactory,
});
const scope = tableRefSet([{ catalog: 'project-1', db: 'analytics', name: 'orders' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'bigquery', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
expect(ordersGet).toHaveBeenCalledTimes(1);
expect(skippedGet).not.toHaveBeenCalled();
});
it('constructs for discovery without dataset scope and lists tables through one region information schema query', async () => {
const createQueryJob = vi.fn(
async (
input: { query: string; params?: Record<string, unknown>; location?: string },
): ReturnType<KtxBigQueryClient['createQueryJob']> => [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[
{ table_schema: 'analytics', table_name: 'orders', table_type: 'BASE TABLE' },
{ table_schema: 'analytics', table_name: 'order_clone', table_type: 'CLONE' },
{ table_schema: 'mart', table_name: 'orders_mv', table_type: 'MATERIALIZED VIEW' },
],
undefined,
{
schema: {
fields: [
{ name: 'table_schema', type: 'STRING' },
{ name: 'table_name', type: 'STRING' },
{ name: 'table_type', type: 'STRING' },
],
},
},
],
},
],
);
const clientFactory: KtxBigQueryClientFactory = {
createClient: vi.fn(() => ({
getDatasets: vi.fn(async () => [[{ id: 'analytics' }, { id: 'mart' }]] as [{ id: string }[]]),
dataset: vi.fn((datasetId: string) => ({
get: vi.fn(async () => [{ id: datasetId }]),
getTables: vi.fn(async () => [[]] as [never[]]),
})),
createQueryJob,
})),
};
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'bigquery',
credentials_json: JSON.stringify({ project_id: 'project-1' }),
location: 'US',
},
clientFactory,
});
await expect(connector.listTables(['analytics', 'mart'])).resolves.toEqual([
{ schema: 'analytics', name: 'orders', kind: 'table' },
{ schema: 'analytics', name: 'order_clone', kind: 'table' },
{ schema: 'mart', name: 'orders_mv', kind: 'view' },
]);
expect(createQueryJob).toHaveBeenCalledTimes(1);
expect(createQueryJob).toHaveBeenCalledWith(
expect.objectContaining({
location: 'US',
params: { dataset_ids: ['analytics', 'mart'] },
}),
);
expect(createQueryJob.mock.calls[0]?.[0].query).toContain('`project-1`.`region-us`.INFORMATION_SCHEMA.TABLES');
expect(createQueryJob.mock.calls[0]?.[0].query).toContain("'CLONE'");
expect(createQueryJob.mock.calls[0]?.[0].query).toContain("'SNAPSHOT'");
});
it('keeps scan paths requiring dataset scope', async () => {
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'bigquery',
credentials_json: JSON.stringify({ project_id: 'project-1' }),
location: 'US',
},
clientFactory: fakeClientFactory(),
});
await expect(
connector.introspect(
{ connectionId: 'warehouse', driver: 'bigquery' },
{ runId: 'scan-run-1' },
),
).rejects.toThrow('Native BigQuery scan requires connections.warehouse.dataset_ids or dataset_id');
});
it('applies maximumBytesBilled to read-only queries when configured', async () => {
const clientFactory = fakeClientFactory();
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection,
clientFactory,
maxBytesBilled: 123456789,
});
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ rows: [[1, 'paid']], rowCount: 1 });
const client = vi.mocked(clientFactory.createClient).mock.results[0]?.value as KtxBigQueryClient;
expect(client.createQueryJob).toHaveBeenLastCalledWith(
expect.objectContaining({
maximumBytesBilled: '123456789',
}),
);
});
it('applies canonical BigQuery YAML scan limits to query jobs', async () => {
const clientFactory = fakeClientFactory();
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection: { ...connection, max_bytes_billed: '987654321', job_timeout_ms: 30_000 },
clientFactory,
});
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ rows: [[1, 'paid']], rowCount: 1 });
const client = vi.mocked(clientFactory.createClient).mock.results[0]?.value as KtxBigQueryClient;
expect(client.createQueryJob).toHaveBeenLastCalledWith(
expect.objectContaining({
maximumBytesBilled: '987654321',
jobTimeoutMs: 30_000,
}),
);
});
it('adapts native snapshots to live-database introspection snapshots', async () => {
const introspection = createBigQueryLiveDatabaseIntrospection({
connections: { warehouse: connection },
clientFactory: fakeClientFactory(),
now: () => new Date('2026-04-29T17:00:00.000Z'),
});
await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({
connectionId: 'warehouse',
metadata: { project_id: 'project-1' },
tables: expect.arrayContaining([
expect.objectContaining({
catalog: 'project-1',
db: 'analytics',
name: 'orders',
columns: expect.arrayContaining([
{
name: 'id',
nativeType: 'INT64',
normalizedType: 'BIGINT',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'Order id',
},
]),
}),
]),
});
});
});

View file

@ -1,5 +1,6 @@
import { BigQuery, type TableField } from '@google-cloud/bigquery';
import { normalizeBigQueryProjectId, normalizeBigQueryRegion } from '../../context/connections/bigquery-identifiers.js';
import { getDialectForDriver } from '../../context/connections/dialects.js';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
@ -26,7 +27,6 @@ import {
import { readFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import { KtxBigQueryDialect } from './dialect.js';
export interface KtxBigQueryConnectionConfig {
driver?: string;
@ -235,6 +235,23 @@ function normalizeValue(value: unknown): unknown {
return value;
}
/** @internal */
export function prepareBigQueryReadOnlyQuery(
sql: string,
params?: Record<string, unknown>,
): { sql: string; params?: Record<string, unknown> } {
if (!params) {
return { sql, params: undefined };
}
let processedSql = sql;
const processedParams: Record<string, unknown> = {};
for (const [key, value] of Object.entries(params)) {
processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
processedParams[key] = value;
}
return { sql: processedSql, params: Object.keys(processedParams).length > 0 ? processedParams : undefined };
}
export function isKtxBigQueryConnectionConfig(
connection: KtxBigQueryConnectionConfig | undefined,
): connection is KtxBigQueryConnectionConfig {
@ -286,7 +303,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
private readonly now: () => Date;
private readonly maxBytesBilled?: number | string;
private readonly queryTimeoutMs?: number;
private readonly dialect = new KtxBigQueryDialect();
private readonly dialect = getDialectForDriver('bigquery');
private client: KtxBigQueryClient | null = null;
constructor(options: KtxBigQueryScanConnectorOptions) {
@ -364,7 +381,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
async executeReadOnly(input: KtxBigQueryReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
this.assertConnection(input.connectionId);
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
const prepared = prepareBigQueryReadOnlyQuery(limitedSql, input.params);
const result = await this.query(prepared.sql, prepared.params);
return { ...result, rowCount: result.rows.length };
}
@ -411,7 +428,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
return this.dialect.quoteIdentifier(identifier);
}
async listDatasets(): Promise<string[]> {
async listSchemas(): Promise<string[]> {
const [datasets] = await this.getClient().getDatasets();
return datasets.map((dataset) => dataset.id).filter((id): id is string => Boolean(id));
}
@ -437,6 +454,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
params,
);
return rows.map((row) => ({
catalog: this.resolved.projectId,
schema: row.table_schema,
name: row.table_name,
kind:

View file

@ -1,52 +0,0 @@
import { describe, expect, it } from 'vitest';
import { KtxBigQueryDialect } from './dialect.js';
describe('KtxBigQueryDialect', () => {
const dialect = new KtxBigQueryDialect();
it('quotes identifiers and formats project.dataset.table names', () => {
expect(dialect.quoteIdentifier('order`items')).toBe('`order\\`items`');
expect(dialect.formatTableName({ catalog: 'project-1', db: 'analytics', name: 'orders' })).toBe(
'`project-1`.`analytics`.`orders`',
);
expect(dialect.formatTableName({ db: 'analytics', name: 'orders' })).toBe('`analytics`.`orders`');
expect(dialect.formatTableName({ name: 'orders' })).toBe('`orders`');
});
it('maps native BigQuery types to normalized types and scan dimensions', () => {
expect(dialect.mapDataType('INT64')).toBe('BIGINT');
expect(dialect.mapDataType('STRUCT')).toBe('JSON');
expect(dialect.mapDataType('GEOGRAPHY')).toBe('GEOGRAPHY');
expect(dialect.mapToDimensionType('TIMESTAMP')).toBe('time');
expect(dialect.mapToDimensionType('NUMERIC')).toBe('number');
expect(dialect.mapToDimensionType('BOOL')).toBe('boolean');
expect(dialect.mapToDimensionType('JSON')).toBe('string');
});
it('generates sampling, cardinality, and distinct-value SQL', () => {
expect(dialect.generateSampleQuery('`p`.`d`.`orders`', 5, ['id', 'status'])).toBe(
'SELECT `id`, `status` FROM `p`.`d`.`orders` ORDER BY RAND() LIMIT 5',
);
expect(dialect.generateColumnSampleQuery('`p`.`d`.`orders`', 'status', 10)).toBe(
"SELECT `status` FROM `p`.`d`.`orders` WHERE `status` IS NOT NULL AND TRIM(CAST(`status` AS STRING)) != '' ORDER BY RAND() LIMIT 10",
);
expect(dialect.generateCardinalitySampleQuery('`p`.`d`.`orders`', '`status`', 100)).toContain(
'SELECT APPROX_COUNT_DISTINCT(val) AS cardinality',
);
expect(dialect.generateDistinctValuesQuery('`p`.`d`.`orders`', '`status`', 20)).toContain(
'SELECT DISTINCT CAST(`status` AS STRING) AS val',
);
});
it('rewrites colon parameters to BigQuery named parameters', () => {
expect(dialect.prepareQuery('SELECT * FROM orders WHERE id = :id AND id_2 = :id_2', { id: 1, id_2: 2 })).toEqual({
sql: 'SELECT * FROM orders WHERE id = @id AND id_2 = @id_2',
params: { id: 1, id_2: 2 },
});
expect(dialect.prepareQuery('SELECT * FROM orders')).toEqual({ sql: 'SELECT * FROM orders', params: undefined });
});
it('keeps unsupported statistics explicit', () => {
expect(dialect.generateColumnStatisticsQuery('analytics', 'orders')).toBeNull();
});
});

View file

@ -1,9 +1,18 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type BigQueryTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxBigQueryDialect {
readonly type = 'bigquery';
/** @internal */
export class KtxBigQueryDialect implements KtxDialect {
readonly type = 'bigquery' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
TIMESTAMP: 'time',
@ -27,13 +36,19 @@ export class KtxBigQueryDialect {
}
formatTableName(table: BigQueryTableNameRef): string {
if (table.catalog && table.db) {
return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
}
if (table.db) {
return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
}
return this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'three-part');
}
formatDisplayRef(table: BigQueryTableNameRef): string {
return formatDialectDisplayRef(table, 'three-part');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'three-part');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('three-part');
}
mapDataType(nativeType: string): string {
@ -93,19 +108,6 @@ export class KtxBigQueryDialect {
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS STRING)) != '' ORDER BY RAND() LIMIT ${limit}`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
if (!params) {
return { sql, params: undefined };
}
let processedSql = sql;
const processedParams: Record<string, unknown> = {};
for (const [key, value] of Object.entries(params)) {
processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
processedParams[key] = value;
}
return { sql: processedSql, params: Object.keys(processedParams).length > 0 ? processedParams : undefined };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
@ -121,7 +123,11 @@ export class KtxBigQueryDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -132,6 +138,18 @@ export class KtxBigQueryDialect {
return `APPROX_COUNT_DISTINCT(${column})`;
}
textLengthExpression(columnSql: string): string {
return `LENGTH(CAST(${columnSql} AS STRING))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS STRING)`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT STRING_AGG(CAST(value AS STRING), '\\u001F') FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
@ -172,36 +190,4 @@ export class KtxBigQueryDialect {
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const bigQueryGranularity = granularity.toUpperCase();
if (timezone) {
return `DATE_TRUNC(DATETIME(${column}, '${timezone}'), ${bigQueryGranularity})`;
}
return `DATE_TRUNC(${column}, ${bigQueryGranularity})`;
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `DATETIME(${column}, '${timezone}')` : column;
const [rawAmount, rawUnit] = interval.split(' ');
let diffUnit = rawUnit!.toUpperCase();
let amount = Number(rawAmount);
let addUnit = diffUnit;
if (diffUnit === 'WEEK') {
diffUnit = 'DAY';
amount = amount * 7;
addUnit = 'DAY';
}
const originExpr = origin ? `TIMESTAMP '${origin}'` : `TIMESTAMP '1970-01-01'`;
return `TIMESTAMP_ADD(${originExpr}, INTERVAL CAST(FLOOR(TIMESTAMP_DIFF(${col}, ${originExpr}, ${diffUnit}) / ${amount}) * ${amount} AS INT64) ${addUnit})`;
}
parseIntervalToSql(interval: string): string {
const [amount, unit] = interval.split(' ');
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
}
}