mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-28 08:49:38 +02:00
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
This commit is contained in:
parent
924868841d
commit
56985b7e09
548 changed files with 5048 additions and 2228 deletions
|
|
@ -1,483 +0,0 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { bigQueryConnectionConfigFromConfig, isKtxBigQueryConnectionConfig, type KtxBigQueryClient, KtxBigQueryScanConnector, type KtxBigQueryClientFactory, type KtxBigQueryDataset, type KtxBigQueryQueryJob, type KtxBigQueryTableRef } from '../../connectors/bigquery/connector.js';
|
||||
import { createBigQueryLiveDatabaseIntrospection } from '../../connectors/bigquery/live-database-introspection.js';
|
||||
import { tableRefSet } from '../../context/scan/table-ref.js';
|
||||
|
||||
function fakeClientFactory(options: { primaryKeyError?: Error } = {}): KtxBigQueryClientFactory {
|
||||
const queryResults = vi.fn(async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ id: 1, status: 'paid' }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'id', type: 'INT64' }, { name: 'status', type: 'STRING' }] } },
|
||||
]);
|
||||
const createQueryJob = vi.fn(async (input: { query: string }): ReturnType<KtxBigQueryClient['createQueryJob']> => {
|
||||
if (input.query.includes('INFORMATION_SCHEMA.TABLE_CONSTRAINTS')) {
|
||||
if (options.primaryKeyError) {
|
||||
throw options.primaryKeyError;
|
||||
}
|
||||
return [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ table_name: 'orders', column_name: 'id' }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'table_name', type: 'STRING' }, { name: 'column_name', type: 'STRING' }] } },
|
||||
],
|
||||
},
|
||||
];
|
||||
}
|
||||
if (input.query.includes('APPROX_COUNT_DISTINCT')) {
|
||||
return [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ cardinality: 2 }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'cardinality', type: 'INT64' }] } },
|
||||
],
|
||||
},
|
||||
];
|
||||
}
|
||||
if (input.query.includes('SELECT DISTINCT CAST')) {
|
||||
return [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ val: 'open' }, { val: 'paid' }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'val', type: 'STRING' }] } },
|
||||
],
|
||||
},
|
||||
];
|
||||
}
|
||||
if (input.query.includes('SELECT `status`')) {
|
||||
return [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ status: 'paid' }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'status', type: 'STRING' }] } },
|
||||
],
|
||||
},
|
||||
];
|
||||
}
|
||||
return [{ getQueryResults: queryResults }];
|
||||
});
|
||||
const getTable = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
|
||||
{
|
||||
metadata: {
|
||||
type: 'TABLE',
|
||||
numRows: '12',
|
||||
description: 'Orders table',
|
||||
schema: {
|
||||
fields: [
|
||||
{ name: 'id', type: 'INT64', mode: 'REQUIRED', description: 'Order id' },
|
||||
{ name: 'status', type: 'STRING', mode: 'NULLABLE' },
|
||||
{ name: 'payload', type: 'RECORD', mode: 'NULLABLE' },
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
]);
|
||||
const tableRef: KtxBigQueryTableRef = { id: 'orders', get: getTable };
|
||||
return {
|
||||
createClient: vi.fn(() => ({
|
||||
getDatasets: vi.fn(async (): ReturnType<KtxBigQueryClient['getDatasets']> => [[{ id: 'analytics' }, { id: 'staging' }]]),
|
||||
dataset: vi.fn(
|
||||
(datasetId: string): KtxBigQueryDataset => ({
|
||||
get: vi.fn(async () => [{ id: datasetId }]),
|
||||
getTables: vi.fn(async (): ReturnType<KtxBigQueryDataset['getTables']> => [[tableRef]]),
|
||||
}),
|
||||
),
|
||||
createQueryJob,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
const connection = {
|
||||
driver: 'bigquery',
|
||||
dataset_id: 'analytics',
|
||||
credentials_json: JSON.stringify({ project_id: 'project-1', client_email: 'reader@example.test' }),
|
||||
location: 'US',
|
||||
} as const;
|
||||
|
||||
describe('KtxBigQueryScanConnector', () => {
|
||||
it('resolves configuration safely', () => {
|
||||
expect(isKtxBigQueryConnectionConfig(connection)).toBe(true);
|
||||
expect(isKtxBigQueryConnectionConfig({ driver: 'mysql' })).toBe(false);
|
||||
expect(bigQueryConnectionConfigFromConfig({ connectionId: 'warehouse', connection })).toMatchObject({
|
||||
projectId: 'project-1',
|
||||
datasetIds: ['analytics'],
|
||||
location: 'US',
|
||||
});
|
||||
});
|
||||
|
||||
it('introspects datasets, table metadata, primary keys, and normalized types', async () => {
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection,
|
||||
clientFactory: fakeClientFactory(),
|
||||
now: () => new Date('2026-04-29T17:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'bigquery' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'bigquery',
|
||||
extractedAt: '2026-04-29T17:00:00.000Z',
|
||||
scope: { catalogs: ['project-1'], datasets: ['analytics'] },
|
||||
metadata: {
|
||||
project_id: 'project-1',
|
||||
datasets: ['analytics'],
|
||||
table_count: 1,
|
||||
total_columns: 3,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables[0]).toMatchObject({
|
||||
catalog: 'project-1',
|
||||
db: 'analytics',
|
||||
name: 'orders',
|
||||
kind: 'table',
|
||||
comment: 'Orders table',
|
||||
estimatedRows: 12,
|
||||
foreignKeys: [],
|
||||
});
|
||||
expect(snapshot.tables[0]?.columns).toEqual([
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'INT64',
|
||||
normalizedType: 'BIGINT',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'Order id',
|
||||
},
|
||||
{
|
||||
name: 'status',
|
||||
nativeType: 'STRING',
|
||||
normalizedType: 'VARCHAR',
|
||||
dimensionType: 'string',
|
||||
nullable: true,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
{
|
||||
name: 'payload',
|
||||
nativeType: 'RECORD',
|
||||
normalizedType: 'JSON',
|
||||
dimensionType: 'string',
|
||||
nullable: true,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it.each([
|
||||
Object.assign(new Error('Access Denied'), { code: 403 }),
|
||||
Object.assign(new Error('Not found'), { errors: [{ reason: 'notFound' }] }),
|
||||
])('soft-fails denied BigQuery primary-key discovery with a scan warning', async (primaryKeyError) => {
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection,
|
||||
clientFactory: fakeClientFactory({ primaryKeyError }),
|
||||
now: () => new Date('2026-04-29T17:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'bigquery' },
|
||||
{ runId: 'scan-run-bigquery-denied-pk' },
|
||||
);
|
||||
|
||||
expect(snapshot.warnings).toEqual([
|
||||
{
|
||||
code: 'constraint_discovery_unauthorized',
|
||||
message: 'Skipped primary-key discovery in analytics (insufficient grants on system catalogs)',
|
||||
recoverable: true,
|
||||
metadata: { schema: 'analytics', kind: 'primary_key' },
|
||||
},
|
||||
]);
|
||||
expect(snapshot.tables[0]?.foreignKeys).toEqual([]);
|
||||
expect(snapshot.tables[0]?.columns.every((column) => column.primaryKey === false)).toBe(true);
|
||||
});
|
||||
|
||||
it('runs samples, read-only SQL, distinct values, dataset listing, row counts, and cleanup', async () => {
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection,
|
||||
clientFactory: fakeClientFactory(),
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
table: { catalog: 'project-1', db: 'analytics', name: 'orders' },
|
||||
columns: ['id', 'status'],
|
||||
limit: 1,
|
||||
},
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({
|
||||
headers: ['id', 'status'],
|
||||
headerTypes: ['INT64', 'STRING'],
|
||||
rows: [[1, 'paid']],
|
||||
totalRows: 1,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
table: { catalog: 'project-1', db: 'analytics', name: 'orders' },
|
||||
column: 'status',
|
||||
limit: 5,
|
||||
},
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ values: ['paid'], nullCount: null, distinctCount: null });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[1, 'paid']], totalRows: 1, rowCount: 1 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
|
||||
await expect(
|
||||
connector.getColumnDistinctValues(
|
||||
{ catalog: 'project-1', db: 'analytics', name: 'orders' },
|
||||
'status',
|
||||
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
|
||||
),
|
||||
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
|
||||
await expect(connector.getTableRowCount('orders')).resolves.toBe(12);
|
||||
await expect(connector.listDatasets()).resolves.toEqual(['analytics', 'staging']);
|
||||
await expect(
|
||||
connector.columnStats(
|
||||
{ connectionId: 'warehouse', table: { catalog: 'project-1', db: 'analytics', name: 'orders' }, column: 'status' },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toBeNull();
|
||||
await connector.cleanup();
|
||||
});
|
||||
|
||||
it('limits introspection to tables in tableScope', async () => {
|
||||
const ordersGet = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
|
||||
{
|
||||
metadata: {
|
||||
type: 'TABLE',
|
||||
numRows: '12',
|
||||
schema: { fields: [{ name: 'id', type: 'INT64', mode: 'REQUIRED' }] },
|
||||
},
|
||||
},
|
||||
]);
|
||||
const skippedGet = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
|
||||
{ metadata: { type: 'TABLE', numRows: '1', schema: { fields: [] } } },
|
||||
]);
|
||||
const clientFactory: KtxBigQueryClientFactory = {
|
||||
createClient: vi.fn(() => ({
|
||||
getDatasets: vi.fn(async (): ReturnType<KtxBigQueryClient['getDatasets']> => [[{ id: 'analytics' }]]),
|
||||
dataset: vi.fn(
|
||||
(): KtxBigQueryDataset => ({
|
||||
get: vi.fn(async () => [{ id: 'analytics' }]),
|
||||
getTables: vi.fn(async (): ReturnType<KtxBigQueryDataset['getTables']> => [
|
||||
[
|
||||
{ id: 'orders', get: ordersGet },
|
||||
{ id: 'customers', get: skippedGet },
|
||||
],
|
||||
]),
|
||||
}),
|
||||
),
|
||||
createQueryJob: vi.fn(async (): ReturnType<KtxBigQueryClient['createQueryJob']> => [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
|
||||
[],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'table_name', type: 'STRING' }, { name: 'column_name', type: 'STRING' }] } },
|
||||
],
|
||||
},
|
||||
]),
|
||||
})),
|
||||
};
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection,
|
||||
clientFactory,
|
||||
});
|
||||
const scope = tableRefSet([{ catalog: 'project-1', db: 'analytics', name: 'orders' }]);
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'bigquery', tableScope: scope },
|
||||
{ runId: 'scope-test' },
|
||||
);
|
||||
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
|
||||
expect(ordersGet).toHaveBeenCalledTimes(1);
|
||||
expect(skippedGet).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('constructs for discovery without dataset scope and lists tables through one region information schema query', async () => {
|
||||
const createQueryJob = vi.fn(
|
||||
async (
|
||||
input: { query: string; params?: Record<string, unknown>; location?: string },
|
||||
): ReturnType<KtxBigQueryClient['createQueryJob']> => [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
|
||||
[
|
||||
{ table_schema: 'analytics', table_name: 'orders', table_type: 'BASE TABLE' },
|
||||
{ table_schema: 'analytics', table_name: 'order_clone', table_type: 'CLONE' },
|
||||
{ table_schema: 'mart', table_name: 'orders_mv', table_type: 'MATERIALIZED VIEW' },
|
||||
],
|
||||
undefined,
|
||||
{
|
||||
schema: {
|
||||
fields: [
|
||||
{ name: 'table_schema', type: 'STRING' },
|
||||
{ name: 'table_name', type: 'STRING' },
|
||||
{ name: 'table_type', type: 'STRING' },
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
);
|
||||
const clientFactory: KtxBigQueryClientFactory = {
|
||||
createClient: vi.fn(() => ({
|
||||
getDatasets: vi.fn(async () => [[{ id: 'analytics' }, { id: 'mart' }]] as [{ id: string }[]]),
|
||||
dataset: vi.fn((datasetId: string) => ({
|
||||
get: vi.fn(async () => [{ id: datasetId }]),
|
||||
getTables: vi.fn(async () => [[]] as [never[]]),
|
||||
})),
|
||||
createQueryJob,
|
||||
})),
|
||||
};
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'bigquery',
|
||||
credentials_json: JSON.stringify({ project_id: 'project-1' }),
|
||||
location: 'US',
|
||||
},
|
||||
clientFactory,
|
||||
});
|
||||
|
||||
await expect(connector.listTables(['analytics', 'mart'])).resolves.toEqual([
|
||||
{ schema: 'analytics', name: 'orders', kind: 'table' },
|
||||
{ schema: 'analytics', name: 'order_clone', kind: 'table' },
|
||||
{ schema: 'mart', name: 'orders_mv', kind: 'view' },
|
||||
]);
|
||||
|
||||
expect(createQueryJob).toHaveBeenCalledTimes(1);
|
||||
expect(createQueryJob).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
location: 'US',
|
||||
params: { dataset_ids: ['analytics', 'mart'] },
|
||||
}),
|
||||
);
|
||||
expect(createQueryJob.mock.calls[0]?.[0].query).toContain('`project-1`.`region-us`.INFORMATION_SCHEMA.TABLES');
|
||||
expect(createQueryJob.mock.calls[0]?.[0].query).toContain("'CLONE'");
|
||||
expect(createQueryJob.mock.calls[0]?.[0].query).toContain("'SNAPSHOT'");
|
||||
});
|
||||
|
||||
it('keeps scan paths requiring dataset scope', async () => {
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'bigquery',
|
||||
credentials_json: JSON.stringify({ project_id: 'project-1' }),
|
||||
location: 'US',
|
||||
},
|
||||
clientFactory: fakeClientFactory(),
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'bigquery' },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).rejects.toThrow('Native BigQuery scan requires connections.warehouse.dataset_ids or dataset_id');
|
||||
});
|
||||
|
||||
it('applies maximumBytesBilled to read-only queries when configured', async () => {
|
||||
const clientFactory = fakeClientFactory();
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection,
|
||||
clientFactory,
|
||||
maxBytesBilled: 123456789,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ rows: [[1, 'paid']], rowCount: 1 });
|
||||
|
||||
const client = vi.mocked(clientFactory.createClient).mock.results[0]?.value as KtxBigQueryClient;
|
||||
expect(client.createQueryJob).toHaveBeenLastCalledWith(
|
||||
expect.objectContaining({
|
||||
maximumBytesBilled: '123456789',
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('applies canonical BigQuery YAML scan limits to query jobs', async () => {
|
||||
const clientFactory = fakeClientFactory();
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: { ...connection, max_bytes_billed: '987654321', job_timeout_ms: 30_000 },
|
||||
clientFactory,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ rows: [[1, 'paid']], rowCount: 1 });
|
||||
|
||||
const client = vi.mocked(clientFactory.createClient).mock.results[0]?.value as KtxBigQueryClient;
|
||||
expect(client.createQueryJob).toHaveBeenLastCalledWith(
|
||||
expect.objectContaining({
|
||||
maximumBytesBilled: '987654321',
|
||||
jobTimeoutMs: 30_000,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('adapts native snapshots to live-database introspection snapshots', async () => {
|
||||
const introspection = createBigQueryLiveDatabaseIntrospection({
|
||||
connections: { warehouse: connection },
|
||||
clientFactory: fakeClientFactory(),
|
||||
now: () => new Date('2026-04-29T17:00:00.000Z'),
|
||||
});
|
||||
|
||||
await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
metadata: { project_id: 'project-1' },
|
||||
tables: expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
catalog: 'project-1',
|
||||
db: 'analytics',
|
||||
name: 'orders',
|
||||
columns: expect.arrayContaining([
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'INT64',
|
||||
normalizedType: 'BIGINT',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'Order id',
|
||||
},
|
||||
]),
|
||||
}),
|
||||
]),
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
import { BigQuery, type TableField } from '@google-cloud/bigquery';
|
||||
import { normalizeBigQueryProjectId, normalizeBigQueryRegion } from '../../context/connections/bigquery-identifiers.js';
|
||||
import { getDialectForDriver } from '../../context/connections/dialects.js';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
|
||||
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
|
||||
import { scopedTableNames } from '../../context/scan/table-ref.js';
|
||||
|
|
@ -26,7 +27,6 @@ import {
|
|||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { KtxBigQueryDialect } from './dialect.js';
|
||||
|
||||
export interface KtxBigQueryConnectionConfig {
|
||||
driver?: string;
|
||||
|
|
@ -235,6 +235,23 @@ function normalizeValue(value: unknown): unknown {
|
|||
return value;
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function prepareBigQueryReadOnlyQuery(
|
||||
sql: string,
|
||||
params?: Record<string, unknown>,
|
||||
): { sql: string; params?: Record<string, unknown> } {
|
||||
if (!params) {
|
||||
return { sql, params: undefined };
|
||||
}
|
||||
let processedSql = sql;
|
||||
const processedParams: Record<string, unknown> = {};
|
||||
for (const [key, value] of Object.entries(params)) {
|
||||
processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
|
||||
processedParams[key] = value;
|
||||
}
|
||||
return { sql: processedSql, params: Object.keys(processedParams).length > 0 ? processedParams : undefined };
|
||||
}
|
||||
|
||||
export function isKtxBigQueryConnectionConfig(
|
||||
connection: KtxBigQueryConnectionConfig | undefined,
|
||||
): connection is KtxBigQueryConnectionConfig {
|
||||
|
|
@ -286,7 +303,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
|
|||
private readonly now: () => Date;
|
||||
private readonly maxBytesBilled?: number | string;
|
||||
private readonly queryTimeoutMs?: number;
|
||||
private readonly dialect = new KtxBigQueryDialect();
|
||||
private readonly dialect = getDialectForDriver('bigquery');
|
||||
private client: KtxBigQueryClient | null = null;
|
||||
|
||||
constructor(options: KtxBigQueryScanConnectorOptions) {
|
||||
|
|
@ -364,7 +381,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
|
|||
async executeReadOnly(input: KtxBigQueryReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
|
||||
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
|
||||
const prepared = prepareBigQueryReadOnlyQuery(limitedSql, input.params);
|
||||
const result = await this.query(prepared.sql, prepared.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
|
@ -411,7 +428,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
|
|||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
async listDatasets(): Promise<string[]> {
|
||||
async listSchemas(): Promise<string[]> {
|
||||
const [datasets] = await this.getClient().getDatasets();
|
||||
return datasets.map((dataset) => dataset.id).filter((id): id is string => Boolean(id));
|
||||
}
|
||||
|
|
@ -437,6 +454,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
|
|||
params,
|
||||
);
|
||||
return rows.map((row) => ({
|
||||
catalog: this.resolved.projectId,
|
||||
schema: row.table_schema,
|
||||
name: row.table_name,
|
||||
kind:
|
||||
|
|
|
|||
|
|
@ -1,52 +0,0 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KtxBigQueryDialect } from './dialect.js';
|
||||
|
||||
describe('KtxBigQueryDialect', () => {
|
||||
const dialect = new KtxBigQueryDialect();
|
||||
|
||||
it('quotes identifiers and formats project.dataset.table names', () => {
|
||||
expect(dialect.quoteIdentifier('order`items')).toBe('`order\\`items`');
|
||||
expect(dialect.formatTableName({ catalog: 'project-1', db: 'analytics', name: 'orders' })).toBe(
|
||||
'`project-1`.`analytics`.`orders`',
|
||||
);
|
||||
expect(dialect.formatTableName({ db: 'analytics', name: 'orders' })).toBe('`analytics`.`orders`');
|
||||
expect(dialect.formatTableName({ name: 'orders' })).toBe('`orders`');
|
||||
});
|
||||
|
||||
it('maps native BigQuery types to normalized types and scan dimensions', () => {
|
||||
expect(dialect.mapDataType('INT64')).toBe('BIGINT');
|
||||
expect(dialect.mapDataType('STRUCT')).toBe('JSON');
|
||||
expect(dialect.mapDataType('GEOGRAPHY')).toBe('GEOGRAPHY');
|
||||
expect(dialect.mapToDimensionType('TIMESTAMP')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('NUMERIC')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('BOOL')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('JSON')).toBe('string');
|
||||
});
|
||||
|
||||
it('generates sampling, cardinality, and distinct-value SQL', () => {
|
||||
expect(dialect.generateSampleQuery('`p`.`d`.`orders`', 5, ['id', 'status'])).toBe(
|
||||
'SELECT `id`, `status` FROM `p`.`d`.`orders` ORDER BY RAND() LIMIT 5',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('`p`.`d`.`orders`', 'status', 10)).toBe(
|
||||
"SELECT `status` FROM `p`.`d`.`orders` WHERE `status` IS NOT NULL AND TRIM(CAST(`status` AS STRING)) != '' ORDER BY RAND() LIMIT 10",
|
||||
);
|
||||
expect(dialect.generateCardinalitySampleQuery('`p`.`d`.`orders`', '`status`', 100)).toContain(
|
||||
'SELECT APPROX_COUNT_DISTINCT(val) AS cardinality',
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('`p`.`d`.`orders`', '`status`', 20)).toContain(
|
||||
'SELECT DISTINCT CAST(`status` AS STRING) AS val',
|
||||
);
|
||||
});
|
||||
|
||||
it('rewrites colon parameters to BigQuery named parameters', () => {
|
||||
expect(dialect.prepareQuery('SELECT * FROM orders WHERE id = :id AND id_2 = :id_2', { id: 1, id_2: 2 })).toEqual({
|
||||
sql: 'SELECT * FROM orders WHERE id = @id AND id_2 = @id_2',
|
||||
params: { id: 1, id_2: 2 },
|
||||
});
|
||||
expect(dialect.prepareQuery('SELECT * FROM orders')).toEqual({ sql: 'SELECT * FROM orders', params: undefined });
|
||||
});
|
||||
|
||||
it('keeps unsupported statistics explicit', () => {
|
||||
expect(dialect.generateColumnStatisticsQuery('analytics', 'orders')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
|
@ -1,9 +1,18 @@
|
|||
import type { KtxDialect } from '../../context/connections/dialects.js';
|
||||
import {
|
||||
columnDisplayPartCount,
|
||||
formatDialectDisplayRef,
|
||||
formatDialectTableName,
|
||||
limitOffsetClause,
|
||||
parseDialectDisplayRef,
|
||||
} from '../../context/connections/dialect-helpers.js';
|
||||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
|
||||
|
||||
type BigQueryTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxBigQueryDialect {
|
||||
readonly type = 'bigquery';
|
||||
/** @internal */
|
||||
export class KtxBigQueryDialect implements KtxDialect {
|
||||
readonly type = 'bigquery' as const;
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
TIMESTAMP: 'time',
|
||||
|
|
@ -27,13 +36,19 @@ export class KtxBigQueryDialect {
|
|||
}
|
||||
|
||||
formatTableName(table: BigQueryTableNameRef): string {
|
||||
if (table.catalog && table.db) {
|
||||
return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
|
||||
}
|
||||
if (table.db) {
|
||||
return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
|
||||
}
|
||||
return this.quoteIdentifier(table.name);
|
||||
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'three-part');
|
||||
}
|
||||
|
||||
formatDisplayRef(table: BigQueryTableNameRef): string {
|
||||
return formatDialectDisplayRef(table, 'three-part');
|
||||
}
|
||||
|
||||
parseDisplayRef(display: string): KtxTableRef | null {
|
||||
return parseDialectDisplayRef(display, 'three-part');
|
||||
}
|
||||
|
||||
columnDisplayTablePartCount(): 1 | 2 | 3 {
|
||||
return columnDisplayPartCount('three-part');
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
|
|
@ -93,19 +108,6 @@ export class KtxBigQueryDialect {
|
|||
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS STRING)) != '' ORDER BY RAND() LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
|
||||
if (!params) {
|
||||
return { sql, params: undefined };
|
||||
}
|
||||
let processedSql = sql;
|
||||
const processedParams: Record<string, unknown> = {};
|
||||
for (const [key, value] of Object.entries(params)) {
|
||||
processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
|
||||
processedParams[key] = value;
|
||||
}
|
||||
return { sql: processedSql, params: Object.keys(processedParams).length > 0 ? processedParams : undefined };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
|
|
@ -121,7 +123,11 @@ export class KtxBigQueryDialect {
|
|||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
|
||||
return limitOffsetClause(limit, offset);
|
||||
}
|
||||
|
||||
getTopClause(_limit: number): string {
|
||||
return '';
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
|
|
@ -132,6 +138,18 @@ export class KtxBigQueryDialect {
|
|||
return `APPROX_COUNT_DISTINCT(${column})`;
|
||||
}
|
||||
|
||||
textLengthExpression(columnSql: string): string {
|
||||
return `LENGTH(CAST(${columnSql} AS STRING))`;
|
||||
}
|
||||
|
||||
castToText(columnSql: string): string {
|
||||
return `CAST(${columnSql} AS STRING)`;
|
||||
}
|
||||
|
||||
getSampleValueAggregation(innerSql: string): string {
|
||||
return `(SELECT STRING_AGG(CAST(value AS STRING), '\\u001F') FROM (${innerSql}) AS relationship_profile_values)`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
|
|
@ -172,36 +190,4 @@ export class KtxBigQueryDialect {
|
|||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
timezone?: string,
|
||||
): string {
|
||||
const bigQueryGranularity = granularity.toUpperCase();
|
||||
if (timezone) {
|
||||
return `DATE_TRUNC(DATETIME(${column}, '${timezone}'), ${bigQueryGranularity})`;
|
||||
}
|
||||
return `DATE_TRUNC(${column}, ${bigQueryGranularity})`;
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
|
||||
const col = timezone ? `DATETIME(${column}, '${timezone}')` : column;
|
||||
const [rawAmount, rawUnit] = interval.split(' ');
|
||||
let diffUnit = rawUnit!.toUpperCase();
|
||||
let amount = Number(rawAmount);
|
||||
let addUnit = diffUnit;
|
||||
if (diffUnit === 'WEEK') {
|
||||
diffUnit = 'DAY';
|
||||
amount = amount * 7;
|
||||
addUnit = 'DAY';
|
||||
}
|
||||
const originExpr = origin ? `TIMESTAMP '${origin}'` : `TIMESTAMP '1970-01-01'`;
|
||||
return `TIMESTAMP_ADD(${originExpr}, INTERVAL CAST(FLOOR(TIMESTAMP_DIFF(${col}, ${originExpr}, ${diffUnit}) / ${amount}) * ${amount} AS INT64) ${addUnit})`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
const [amount, unit] = interval.split(' ');
|
||||
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue