mirror of
https://github.com/Kaelio/ktx.git
synced 2026-07-01 08:59:39 +02:00
Initial open-source release
This commit is contained in:
commit
1a42152e6f
1199 changed files with 257054 additions and 0 deletions
307
packages/connector-bigquery/src/connector.test.ts
Normal file
307
packages/connector-bigquery/src/connector.test.ts
Normal file
|
|
@ -0,0 +1,307 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
bigQueryConnectionConfigFromConfig,
|
||||
createBigQueryLiveDatabaseIntrospection,
|
||||
isKloBigQueryConnectionConfig,
|
||||
type KloBigQueryClient,
|
||||
KloBigQueryScanConnector,
|
||||
type KloBigQueryClientFactory,
|
||||
type KloBigQueryDataset,
|
||||
type KloBigQueryQueryJob,
|
||||
type KloBigQueryTableRef,
|
||||
} from './index.js';
|
||||
|
||||
function fakeClientFactory(): KloBigQueryClientFactory {
|
||||
const queryResults = vi.fn(async (): ReturnType<KloBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ id: 1, status: 'paid' }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'id', type: 'INT64' }, { name: 'status', type: 'STRING' }] } },
|
||||
]);
|
||||
const createQueryJob = vi.fn(async (input: { query: string }): ReturnType<KloBigQueryClient['createQueryJob']> => {
|
||||
if (input.query.includes('INFORMATION_SCHEMA.TABLE_CONSTRAINTS')) {
|
||||
return [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KloBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ table_name: 'orders', column_name: 'id' }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'table_name', type: 'STRING' }, { name: 'column_name', type: 'STRING' }] } },
|
||||
],
|
||||
},
|
||||
];
|
||||
}
|
||||
if (input.query.includes('APPROX_COUNT_DISTINCT')) {
|
||||
return [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KloBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ cardinality: 2 }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'cardinality', type: 'INT64' }] } },
|
||||
],
|
||||
},
|
||||
];
|
||||
}
|
||||
if (input.query.includes('SELECT DISTINCT CAST')) {
|
||||
return [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KloBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ val: 'open' }, { val: 'paid' }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'val', type: 'STRING' }] } },
|
||||
],
|
||||
},
|
||||
];
|
||||
}
|
||||
if (input.query.includes('SELECT `status`')) {
|
||||
return [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KloBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ status: 'paid' }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'status', type: 'STRING' }] } },
|
||||
],
|
||||
},
|
||||
];
|
||||
}
|
||||
return [{ getQueryResults: queryResults }];
|
||||
});
|
||||
const getTable = vi.fn(async (): ReturnType<KloBigQueryTableRef['get']> => [
|
||||
{
|
||||
metadata: {
|
||||
type: 'TABLE',
|
||||
numRows: '12',
|
||||
description: 'Orders table',
|
||||
schema: {
|
||||
fields: [
|
||||
{ name: 'id', type: 'INT64', mode: 'REQUIRED', description: 'Order id' },
|
||||
{ name: 'status', type: 'STRING', mode: 'NULLABLE' },
|
||||
{ name: 'payload', type: 'RECORD', mode: 'NULLABLE' },
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
]);
|
||||
const tableRef: KloBigQueryTableRef = { id: 'orders', get: getTable };
|
||||
return {
|
||||
createClient: vi.fn(() => ({
|
||||
getDatasets: vi.fn(async (): ReturnType<KloBigQueryClient['getDatasets']> => [[{ id: 'analytics' }, { id: 'staging' }]]),
|
||||
dataset: vi.fn(
|
||||
(datasetId: string): KloBigQueryDataset => ({
|
||||
get: vi.fn(async () => [{ id: datasetId }]),
|
||||
getTables: vi.fn(async (): ReturnType<KloBigQueryDataset['getTables']> => [[tableRef]]),
|
||||
}),
|
||||
),
|
||||
createQueryJob,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
const connection = {
|
||||
driver: 'bigquery',
|
||||
dataset_id: 'analytics',
|
||||
credentials_json: JSON.stringify({ project_id: 'project-1', client_email: 'reader@example.test' }),
|
||||
location: 'US',
|
||||
readonly: true,
|
||||
};
|
||||
|
||||
describe('KloBigQueryScanConnector', () => {
|
||||
it('resolves configuration safely', () => {
|
||||
expect(isKloBigQueryConnectionConfig(connection)).toBe(true);
|
||||
expect(isKloBigQueryConnectionConfig({ driver: 'mysql' })).toBe(false);
|
||||
expect(bigQueryConnectionConfigFromConfig({ connectionId: 'warehouse', connection })).toMatchObject({
|
||||
projectId: 'project-1',
|
||||
datasetIds: ['analytics'],
|
||||
location: 'US',
|
||||
});
|
||||
expect(() =>
|
||||
bigQueryConnectionConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: { ...connection, readonly: false },
|
||||
}),
|
||||
).toThrow('Native BigQuery connector requires connections.warehouse.readonly: true');
|
||||
});
|
||||
|
||||
it('introspects datasets, table metadata, primary keys, and normalized types', async () => {
|
||||
const connector = new KloBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection,
|
||||
clientFactory: fakeClientFactory(),
|
||||
now: () => new Date('2026-04-29T17:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'bigquery' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'bigquery',
|
||||
extractedAt: '2026-04-29T17:00:00.000Z',
|
||||
scope: { catalogs: ['project-1'], datasets: ['analytics'] },
|
||||
metadata: {
|
||||
project_id: 'project-1',
|
||||
datasets: ['analytics'],
|
||||
table_count: 1,
|
||||
total_columns: 3,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables[0]).toMatchObject({
|
||||
catalog: 'project-1',
|
||||
db: 'analytics',
|
||||
name: 'orders',
|
||||
kind: 'table',
|
||||
comment: 'Orders table',
|
||||
estimatedRows: 12,
|
||||
foreignKeys: [],
|
||||
});
|
||||
expect(snapshot.tables[0]?.columns).toEqual([
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'INT64',
|
||||
normalizedType: 'BIGINT',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'Order id',
|
||||
},
|
||||
{
|
||||
name: 'status',
|
||||
nativeType: 'STRING',
|
||||
normalizedType: 'VARCHAR',
|
||||
dimensionType: 'string',
|
||||
nullable: true,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
{
|
||||
name: 'payload',
|
||||
nativeType: 'RECORD',
|
||||
normalizedType: 'JSON',
|
||||
dimensionType: 'string',
|
||||
nullable: true,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('runs samples, read-only SQL, distinct values, dataset listing, row counts, and cleanup', async () => {
|
||||
const connector = new KloBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection,
|
||||
clientFactory: fakeClientFactory(),
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
table: { catalog: 'project-1', db: 'analytics', name: 'orders' },
|
||||
columns: ['id', 'status'],
|
||||
limit: 1,
|
||||
},
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({
|
||||
headers: ['id', 'status'],
|
||||
headerTypes: ['INT64', 'STRING'],
|
||||
rows: [[1, 'paid']],
|
||||
totalRows: 1,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
table: { catalog: 'project-1', db: 'analytics', name: 'orders' },
|
||||
column: 'status',
|
||||
limit: 5,
|
||||
},
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ values: ['paid'], nullCount: null, distinctCount: null });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[1, 'paid']], totalRows: 1, rowCount: 1 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
|
||||
await expect(
|
||||
connector.getColumnDistinctValues(
|
||||
{ catalog: 'project-1', db: 'analytics', name: 'orders' },
|
||||
'status',
|
||||
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
|
||||
),
|
||||
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
|
||||
await expect(connector.getTableRowCount('orders')).resolves.toBe(12);
|
||||
await expect(connector.listDatasets()).resolves.toEqual(['analytics', 'staging']);
|
||||
await expect(
|
||||
connector.columnStats(
|
||||
{ connectionId: 'warehouse', table: { catalog: 'project-1', db: 'analytics', name: 'orders' }, column: 'status' },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toBeNull();
|
||||
await connector.cleanup();
|
||||
});
|
||||
|
||||
it('applies maximumBytesBilled to read-only queries when configured', async () => {
|
||||
const clientFactory = fakeClientFactory();
|
||||
const connector = new KloBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection,
|
||||
clientFactory,
|
||||
maxBytesBilled: 123456789,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ rows: [[1, 'paid']], rowCount: 1 });
|
||||
|
||||
const client = vi.mocked(clientFactory.createClient).mock.results[0]?.value as KloBigQueryClient;
|
||||
expect(client.createQueryJob).toHaveBeenLastCalledWith(
|
||||
expect.objectContaining({
|
||||
maximumBytesBilled: '123456789',
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('adapts native snapshots to live-database introspection snapshots', async () => {
|
||||
const introspection = createBigQueryLiveDatabaseIntrospection({
|
||||
connections: { warehouse: connection },
|
||||
clientFactory: fakeClientFactory(),
|
||||
now: () => new Date('2026-04-29T17:00:00.000Z'),
|
||||
});
|
||||
|
||||
await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
metadata: { project_id: 'project-1' },
|
||||
tables: expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
catalog: 'project-1',
|
||||
db: 'analytics',
|
||||
name: 'orders',
|
||||
columns: expect.arrayContaining([
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'INT64',
|
||||
normalizedType: 'BIGINT',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'Order id',
|
||||
},
|
||||
]),
|
||||
}),
|
||||
]),
|
||||
});
|
||||
});
|
||||
});
|
||||
492
packages/connector-bigquery/src/connector.ts
Normal file
492
packages/connector-bigquery/src/connector.ts
Normal file
|
|
@ -0,0 +1,492 @@
|
|||
import { BigQuery, type TableField } from '@google-cloud/bigquery';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '@klo/context/connections';
|
||||
import {
|
||||
createKloConnectorCapabilities,
|
||||
type KloColumnSampleInput,
|
||||
type KloColumnSampleResult,
|
||||
type KloColumnStatsInput,
|
||||
type KloColumnStatsResult,
|
||||
type KloQueryResult,
|
||||
type KloReadOnlyQueryInput,
|
||||
type KloScanConnector,
|
||||
type KloScanContext,
|
||||
type KloScanInput,
|
||||
type KloSchemaColumn,
|
||||
type KloSchemaSnapshot,
|
||||
type KloSchemaTable,
|
||||
type KloTableRef,
|
||||
type KloTableSampleInput,
|
||||
type KloTableSampleResult,
|
||||
} from '@klo/context/scan';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { KloBigQueryDialect } from './dialect.js';
|
||||
|
||||
export interface KloBigQueryConnectionConfig {
|
||||
driver?: string;
|
||||
dataset_id?: string;
|
||||
dataset_ids?: string[];
|
||||
credentials_json?: string;
|
||||
location?: string;
|
||||
readonly?: boolean;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KloBigQueryResolvedConnectionConfig {
|
||||
projectId: string;
|
||||
credentials: Record<string, unknown>;
|
||||
datasetIds: string[];
|
||||
location?: string;
|
||||
}
|
||||
|
||||
export interface KloBigQueryReadOnlyQueryInput extends KloReadOnlyQueryInput {
|
||||
params?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface KloBigQueryColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KloBigQueryColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
export interface KloBigQueryQueryJob {
|
||||
getQueryResults(): Promise<
|
||||
[Array<Record<string, unknown>>, unknown, { schema?: { fields?: TableField[] } }?, ...unknown[]]
|
||||
>;
|
||||
}
|
||||
|
||||
export interface KloBigQueryTableRef {
|
||||
id?: string;
|
||||
get(): Promise<
|
||||
[
|
||||
{
|
||||
metadata: {
|
||||
type?: string;
|
||||
numRows?: string | number;
|
||||
description?: string;
|
||||
schema?: { fields?: TableField[] };
|
||||
};
|
||||
},
|
||||
...unknown[],
|
||||
]
|
||||
>;
|
||||
}
|
||||
|
||||
export interface KloBigQueryDataset {
|
||||
get(): Promise<unknown>;
|
||||
getTables(): Promise<[KloBigQueryTableRef[], ...unknown[]]>;
|
||||
}
|
||||
|
||||
export interface KloBigQueryClient {
|
||||
getDatasets(input?: { maxResults?: number }): Promise<[Array<{ id?: string }>, ...unknown[]]>;
|
||||
dataset(datasetId: string): KloBigQueryDataset;
|
||||
createQueryJob(input: {
|
||||
query: string;
|
||||
location?: string;
|
||||
params?: Record<string, unknown>;
|
||||
maximumBytesBilled?: string;
|
||||
jobTimeoutMs?: number;
|
||||
}): Promise<[KloBigQueryQueryJob, ...unknown[]]>;
|
||||
}
|
||||
|
||||
export interface KloBigQueryClientFactory {
|
||||
createClient(input: { projectId: string; credentials: Record<string, unknown> }): KloBigQueryClient;
|
||||
}
|
||||
|
||||
export interface KloBigQueryScanConnectorOptions {
|
||||
connectionId: string;
|
||||
connection: KloBigQueryConnectionConfig | undefined;
|
||||
clientFactory?: KloBigQueryClientFactory;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
now?: () => Date;
|
||||
maxBytesBilled?: number | string;
|
||||
queryTimeoutMs?: number;
|
||||
}
|
||||
|
||||
class DefaultBigQueryClientFactory implements KloBigQueryClientFactory {
|
||||
createClient(input: { projectId: string; credentials: Record<string, unknown> }): KloBigQueryClient {
|
||||
const client = new BigQuery(input);
|
||||
return {
|
||||
getDatasets: (options) => client.getDatasets(options) as Promise<[Array<{ id?: string }>, ...unknown[]]>,
|
||||
dataset: (datasetId) => {
|
||||
const dataset = client.dataset(datasetId);
|
||||
return {
|
||||
get: () => dataset.get() as Promise<unknown>,
|
||||
getTables: () => dataset.getTables() as Promise<[KloBigQueryTableRef[], ...unknown[]]>,
|
||||
};
|
||||
},
|
||||
createQueryJob: (options) => client.createQueryJob(options) as Promise<[KloBigQueryQueryJob, ...unknown[]]>,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KloBigQueryConnectionConfig | undefined,
|
||||
key: keyof KloBigQueryConnectionConfig,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function datasetIds(connection: KloBigQueryConnectionConfig, env: NodeJS.ProcessEnv): string[] {
|
||||
if (Array.isArray(connection.dataset_ids) && connection.dataset_ids.length > 0) {
|
||||
return connection.dataset_ids
|
||||
.filter((dataset) => dataset.trim().length > 0)
|
||||
.map((dataset) => resolveStringReference(dataset, env));
|
||||
}
|
||||
const datasetId = stringConfigValue(connection, 'dataset_id', env);
|
||||
return datasetId ? [datasetId] : [];
|
||||
}
|
||||
|
||||
function tableKind(metadataType: string | undefined): KloSchemaTable['kind'] {
|
||||
const type = String(metadataType ?? '').toUpperCase();
|
||||
if (type === 'VIEW' || type === 'MATERIALIZED_VIEW') {
|
||||
return 'view';
|
||||
}
|
||||
if (type === 'EXTERNAL' || type === 'EXTERNAL_TABLE') {
|
||||
return 'external';
|
||||
}
|
||||
return 'table';
|
||||
}
|
||||
|
||||
function firstNumber(value: unknown): number | null {
|
||||
const numberValue = Number(value);
|
||||
return Number.isFinite(numberValue) ? numberValue : null;
|
||||
}
|
||||
|
||||
function normalizeValue(value: unknown): unknown {
|
||||
if (value === null || value === undefined) {
|
||||
return null;
|
||||
}
|
||||
if (Array.isArray(value)) {
|
||||
return value.map((item) => String(item)).join(', ');
|
||||
}
|
||||
if (typeof value === 'object') {
|
||||
if ('toNumber' in value && typeof value.toNumber === 'function' && 'toFixed' in value && typeof value.toFixed === 'function') {
|
||||
return value.toNumber();
|
||||
}
|
||||
if ('value' in value && Object.keys(value).length === 1 && typeof value.value !== 'object') {
|
||||
return value.value;
|
||||
}
|
||||
return JSON.stringify(value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export function isKloBigQueryConnectionConfig(connection: KloBigQueryConnectionConfig | undefined): boolean {
|
||||
return String(connection?.driver ?? '').toLowerCase() === 'bigquery';
|
||||
}
|
||||
|
||||
export function bigQueryConnectionConfigFromConfig(input: {
|
||||
connectionId: string;
|
||||
connection: KloBigQueryConnectionConfig | undefined;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): KloBigQueryResolvedConnectionConfig {
|
||||
if (!isKloBigQueryConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native BigQuery connector cannot run driver "${input.connection?.driver ?? 'unknown'}"`);
|
||||
}
|
||||
if (input.connection?.readonly !== true) {
|
||||
throw new Error(`Native BigQuery connector requires connections.${input.connectionId}.readonly: true`);
|
||||
}
|
||||
|
||||
const env = input.env ?? process.env;
|
||||
const credentialsJson = stringConfigValue(input.connection, 'credentials_json', env);
|
||||
if (!credentialsJson) {
|
||||
throw new Error(`Native BigQuery connector requires connections.${input.connectionId}.credentials_json`);
|
||||
}
|
||||
const credentials = JSON.parse(credentialsJson) as Record<string, unknown>;
|
||||
const projectId = typeof credentials.project_id === 'string' ? credentials.project_id : undefined;
|
||||
if (!projectId) {
|
||||
throw new Error(`Native BigQuery connector requires credentials_json.project_id for connections.${input.connectionId}`);
|
||||
}
|
||||
const resolvedDatasetIds = datasetIds(input.connection, env);
|
||||
if (resolvedDatasetIds.length === 0) {
|
||||
throw new Error(`Native BigQuery connector requires connections.${input.connectionId}.dataset_id or dataset_ids`);
|
||||
}
|
||||
const location = stringConfigValue(input.connection, 'location', env);
|
||||
return { projectId, credentials, datasetIds: resolvedDatasetIds, ...(location ? { location } : {}) };
|
||||
}
|
||||
|
||||
export class KloBigQueryScanConnector implements KloScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'bigquery' as const;
|
||||
readonly capabilities = createKloConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: false,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: true,
|
||||
formalForeignKeys: false,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly connectionId: string;
|
||||
private readonly resolved: KloBigQueryResolvedConnectionConfig;
|
||||
private readonly clientFactory: KloBigQueryClientFactory;
|
||||
private readonly now: () => Date;
|
||||
private readonly maxBytesBilled?: number | string;
|
||||
private readonly queryTimeoutMs?: number;
|
||||
private readonly dialect = new KloBigQueryDialect();
|
||||
private client: KloBigQueryClient | null = null;
|
||||
|
||||
constructor(options: KloBigQueryScanConnectorOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.resolved = bigQueryConnectionConfigFromConfig({
|
||||
connectionId: options.connectionId,
|
||||
connection: options.connection,
|
||||
env: options.env,
|
||||
});
|
||||
this.clientFactory = options.clientFactory ?? new DefaultBigQueryClientFactory();
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.maxBytesBilled = options.maxBytesBilled;
|
||||
this.queryTimeoutMs = options.queryTimeoutMs;
|
||||
this.id = `bigquery:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
try {
|
||||
const client = this.getClient();
|
||||
await client.getDatasets({ maxResults: 1 });
|
||||
for (const datasetId of this.resolved.datasetIds) {
|
||||
await client.dataset(datasetId).get();
|
||||
}
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
async introspect(input: KloScanInput, _ctx: KloScanContext): Promise<KloSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const tables: KloSchemaTable[] = [];
|
||||
for (const datasetId of this.resolved.datasetIds) {
|
||||
tables.push(...(await this.introspectDataset(datasetId)));
|
||||
}
|
||||
return {
|
||||
connectionId: this.connectionId,
|
||||
driver: 'bigquery',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: { catalogs: [this.resolved.projectId], datasets: this.resolved.datasetIds },
|
||||
metadata: {
|
||||
project_id: this.resolved.projectId,
|
||||
datasets: this.resolved.datasetIds,
|
||||
table_count: tables.length,
|
||||
total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KloTableSampleInput, _ctx: KloScanContext): Promise<KloTableSampleResult & { headerTypes?: string[] }> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
|
||||
return { headers: result.headers, headerTypes: result.headerTypes, rows: result.rows, totalRows: result.totalRows };
|
||||
}
|
||||
|
||||
async sampleColumn(input: KloColumnSampleInput, _ctx: KloScanContext): Promise<KloColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
return { values: result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]), nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(_input: KloColumnStatsInput, _ctx: KloScanContext): Promise<KloColumnStatsResult | null> {
|
||||
return null;
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KloBigQueryReadOnlyQueryInput, _ctx: KloScanContext): Promise<KloQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
|
||||
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
|
||||
const result = await this.query(prepared.sql, prepared.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KloTableRef,
|
||||
columnName: string,
|
||||
options: KloBigQueryColumnDistinctValuesOptions,
|
||||
): Promise<KloBigQueryColumnDistinctValuesResult | null> {
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinality = await this.singleNumber(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, options.sampleSize ?? 10000),
|
||||
'cardinality',
|
||||
);
|
||||
if (cardinality === null) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valueRows = await this.queryRaw<{ val: unknown }>(
|
||||
this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit),
|
||||
);
|
||||
return { values: valueRows.filter((row) => row.val !== null).map((row) => String(row.val)), cardinality };
|
||||
}
|
||||
|
||||
async getTableRowCount(tableName: string, datasetId = this.resolved.datasetIds[0]): Promise<number> {
|
||||
if (!datasetId) {
|
||||
return 0;
|
||||
}
|
||||
const tables = await this.introspectDataset(datasetId);
|
||||
return tables.find((table) => table.name === tableName)?.estimatedRows ?? 0;
|
||||
}
|
||||
|
||||
qTableName(table: Pick<KloTableRef, 'name'> & Partial<Pick<KloTableRef, 'catalog' | 'db'>>): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
async listDatasets(): Promise<string[]> {
|
||||
const [datasets] = await this.getClient().getDatasets();
|
||||
return datasets.map((dataset) => dataset.id).filter((id): id is string => Boolean(id));
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
this.client = null;
|
||||
}
|
||||
|
||||
private getClient(): KloBigQueryClient {
|
||||
if (!this.client) {
|
||||
this.client = this.clientFactory.createClient({
|
||||
projectId: this.resolved.projectId,
|
||||
credentials: this.resolved.credentials,
|
||||
});
|
||||
}
|
||||
return this.client;
|
||||
}
|
||||
|
||||
private async query(sql: string, params?: Record<string, unknown>): Promise<KloQueryResult> {
|
||||
const [job] = await this.getClient().createQueryJob({
|
||||
query: sql,
|
||||
...(this.resolved.location ? { location: this.resolved.location } : {}),
|
||||
...(params && Object.keys(params).length > 0 ? { params } : {}),
|
||||
...(this.maxBytesBilled ? { maximumBytesBilled: String(this.maxBytesBilled) } : {}),
|
||||
...(this.queryTimeoutMs ? { jobTimeoutMs: this.queryTimeoutMs } : {}),
|
||||
});
|
||||
const [rows, , response] = await job.getQueryResults();
|
||||
let headers = response?.schema?.fields?.map((field) => field.name || '') ?? [];
|
||||
const headerTypes = response?.schema?.fields?.map((field) => String(field.type || 'STRING')) ?? [];
|
||||
if (headers.length === 0 && rows.length > 0) {
|
||||
headers = Object.keys(rows[0]!);
|
||||
}
|
||||
return {
|
||||
headers,
|
||||
headerTypes: headerTypes.length > 0 ? headerTypes : undefined,
|
||||
rows: rows.map((row) => headers.map((header) => normalizeValue(row[header]))),
|
||||
totalRows: rows.length,
|
||||
rowCount: rows.length,
|
||||
};
|
||||
}
|
||||
|
||||
private async queryRaw<T extends Record<string, unknown>>(sql: string, params?: Record<string, unknown>): Promise<T[]> {
|
||||
const result = await this.query(sql, params);
|
||||
return result.rows.map((row) => Object.fromEntries(result.headers.map((header, index) => [header, row[index]])) as T);
|
||||
}
|
||||
|
||||
private async singleNumber(sql: string, header: string): Promise<number | null> {
|
||||
const rows = await this.queryRaw<Record<string, unknown>>(sql);
|
||||
return firstNumber(rows[0]?.[header]);
|
||||
}
|
||||
|
||||
private async introspectDataset(datasetId: string): Promise<KloSchemaTable[]> {
|
||||
const dataset = this.getClient().dataset(datasetId);
|
||||
const [tableRefs] = await dataset.getTables();
|
||||
const primaryKeys = await this.primaryKeys(datasetId);
|
||||
const tables: KloSchemaTable[] = [];
|
||||
for (const tableRef of tableRefs) {
|
||||
const tableName = tableRef.id || '';
|
||||
const [table] = await tableRef.get();
|
||||
const fields = table.metadata.schema?.fields ?? [];
|
||||
tables.push({
|
||||
catalog: this.resolved.projectId,
|
||||
db: datasetId,
|
||||
name: tableName,
|
||||
kind: tableKind(table.metadata.type),
|
||||
comment: table.metadata.description || null,
|
||||
estimatedRows: firstNumber(table.metadata.numRows) ?? 0,
|
||||
columns: fields.map((field) => this.toSchemaColumn(tableName, field, primaryKeys)),
|
||||
foreignKeys: [],
|
||||
});
|
||||
}
|
||||
return tables;
|
||||
}
|
||||
|
||||
private async primaryKeys(datasetId: string): Promise<Map<string, Set<string>>> {
|
||||
const rows = await this.queryRaw<{ table_name: string; column_name: string }>(
|
||||
'SELECT tc.table_name, kcu.column_name ' +
|
||||
'FROM `' +
|
||||
this.resolved.projectId +
|
||||
'.' +
|
||||
datasetId +
|
||||
'.INFORMATION_SCHEMA.TABLE_CONSTRAINTS` tc ' +
|
||||
'JOIN `' +
|
||||
this.resolved.projectId +
|
||||
'.' +
|
||||
datasetId +
|
||||
'.INFORMATION_SCHEMA.KEY_COLUMN_USAGE` kcu ' +
|
||||
'ON tc.constraint_name = kcu.constraint_name ' +
|
||||
'AND tc.table_schema = kcu.table_schema ' +
|
||||
'AND tc.table_name = kcu.table_name ' +
|
||||
"WHERE tc.constraint_type = 'PRIMARY KEY' " +
|
||||
"AND tc.table_schema = '" +
|
||||
datasetId +
|
||||
"' " +
|
||||
"AND NOT REGEXP_CONTAINS(kcu.column_name, r'^(stacksync_record_id|sync_primary_key)_') " +
|
||||
'ORDER BY tc.table_name, kcu.ordinal_position',
|
||||
);
|
||||
const grouped = new Map<string, Set<string>>();
|
||||
for (const row of rows) {
|
||||
const columns = grouped.get(row.table_name) ?? new Set<string>();
|
||||
columns.add(row.column_name);
|
||||
grouped.set(row.table_name, columns);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
private toSchemaColumn(tableName: string, field: TableField, primaryKeys: Map<string, Set<string>>): KloSchemaColumn {
|
||||
const nativeType = String(field.type || 'STRING').toUpperCase();
|
||||
return {
|
||||
name: field.name || '',
|
||||
nativeType,
|
||||
normalizedType: this.dialect.mapDataType(nativeType),
|
||||
dimensionType: this.dialect.mapToDimensionType(nativeType),
|
||||
nullable: field.mode !== 'REQUIRED',
|
||||
primaryKey: primaryKeys.get(tableName)?.has(field.name || '') ?? false,
|
||||
comment: field.description || null,
|
||||
};
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`BigQuery connector ${this.connectionId} cannot scan connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
52
packages/connector-bigquery/src/dialect.test.ts
Normal file
52
packages/connector-bigquery/src/dialect.test.ts
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KloBigQueryDialect } from './dialect.js';
|
||||
|
||||
describe('KloBigQueryDialect', () => {
|
||||
const dialect = new KloBigQueryDialect();
|
||||
|
||||
it('quotes identifiers and formats project.dataset.table names', () => {
|
||||
expect(dialect.quoteIdentifier('order`items')).toBe('`order\\`items`');
|
||||
expect(dialect.formatTableName({ catalog: 'project-1', db: 'analytics', name: 'orders' })).toBe(
|
||||
'`project-1`.`analytics`.`orders`',
|
||||
);
|
||||
expect(dialect.formatTableName({ db: 'analytics', name: 'orders' })).toBe('`analytics`.`orders`');
|
||||
expect(dialect.formatTableName({ name: 'orders' })).toBe('`orders`');
|
||||
});
|
||||
|
||||
it('maps native BigQuery types to normalized types and scan dimensions', () => {
|
||||
expect(dialect.mapDataType('INT64')).toBe('BIGINT');
|
||||
expect(dialect.mapDataType('STRUCT')).toBe('JSON');
|
||||
expect(dialect.mapDataType('GEOGRAPHY')).toBe('GEOGRAPHY');
|
||||
expect(dialect.mapToDimensionType('TIMESTAMP')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('NUMERIC')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('BOOL')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('JSON')).toBe('string');
|
||||
});
|
||||
|
||||
it('generates sampling, cardinality, and distinct-value SQL', () => {
|
||||
expect(dialect.generateSampleQuery('`p`.`d`.`orders`', 5, ['id', 'status'])).toBe(
|
||||
'SELECT `id`, `status` FROM `p`.`d`.`orders` ORDER BY RAND() LIMIT 5',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('`p`.`d`.`orders`', 'status', 10)).toBe(
|
||||
"SELECT `status` FROM `p`.`d`.`orders` WHERE `status` IS NOT NULL AND TRIM(CAST(`status` AS STRING)) != '' ORDER BY RAND() LIMIT 10",
|
||||
);
|
||||
expect(dialect.generateCardinalitySampleQuery('`p`.`d`.`orders`', '`status`', 100)).toContain(
|
||||
'SELECT APPROX_COUNT_DISTINCT(val) AS cardinality',
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('`p`.`d`.`orders`', '`status`', 20)).toContain(
|
||||
'SELECT DISTINCT CAST(`status` AS STRING) AS val',
|
||||
);
|
||||
});
|
||||
|
||||
it('rewrites colon parameters to BigQuery named parameters', () => {
|
||||
expect(dialect.prepareQuery('SELECT * FROM orders WHERE id = :id AND id_2 = :id_2', { id: 1, id_2: 2 })).toEqual({
|
||||
sql: 'SELECT * FROM orders WHERE id = @id AND id_2 = @id_2',
|
||||
params: { id: 1, id_2: 2 },
|
||||
});
|
||||
expect(dialect.prepareQuery('SELECT * FROM orders')).toEqual({ sql: 'SELECT * FROM orders', params: undefined });
|
||||
});
|
||||
|
||||
it('keeps unsupported statistics explicit', () => {
|
||||
expect(dialect.generateColumnStatisticsQuery('analytics', 'orders')).toBeNull();
|
||||
});
|
||||
});
|
||||
207
packages/connector-bigquery/src/dialect.ts
Normal file
207
packages/connector-bigquery/src/dialect.ts
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
import type { KloSchemaDimensionType, KloTableRef } from '@klo/context/scan';
|
||||
|
||||
type BigQueryTableNameRef = Pick<KloTableRef, 'name'> & Partial<Pick<KloTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KloBigQueryDialect {
|
||||
readonly type = 'bigquery';
|
||||
|
||||
private readonly typeMappings: Record<string, KloSchemaDimensionType> = {
|
||||
TIMESTAMP: 'time',
|
||||
DATETIME: 'time',
|
||||
DATE: 'time',
|
||||
TIME: 'time',
|
||||
INT64: 'number',
|
||||
INTEGER: 'number',
|
||||
FLOAT64: 'number',
|
||||
FLOAT: 'number',
|
||||
NUMERIC: 'number',
|
||||
BIGNUMERIC: 'number',
|
||||
STRING: 'string',
|
||||
BYTES: 'string',
|
||||
BOOL: 'boolean',
|
||||
BOOLEAN: 'boolean',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `\`${identifier.replace(/`/g, '\\`')}\``;
|
||||
}
|
||||
|
||||
formatTableName(table: BigQueryTableNameRef): string {
|
||||
if (table.catalog && table.db) {
|
||||
return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
|
||||
}
|
||||
if (table.db) {
|
||||
return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
|
||||
}
|
||||
return this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
const fieldType = nativeType.toUpperCase().trim();
|
||||
if (fieldType === 'RECORD' || fieldType === 'STRUCT') {
|
||||
return 'JSON';
|
||||
}
|
||||
const typeMapping: Record<string, string> = {
|
||||
STRING: 'VARCHAR',
|
||||
BYTES: 'VARBINARY',
|
||||
INTEGER: 'BIGINT',
|
||||
INT64: 'BIGINT',
|
||||
FLOAT: 'DOUBLE',
|
||||
FLOAT64: 'DOUBLE',
|
||||
NUMERIC: 'DECIMAL',
|
||||
BIGNUMERIC: 'DECIMAL',
|
||||
BOOLEAN: 'BOOLEAN',
|
||||
BOOL: 'BOOLEAN',
|
||||
TIMESTAMP: 'TIMESTAMP',
|
||||
DATE: 'DATE',
|
||||
TIME: 'TIME',
|
||||
DATETIME: 'DATETIME',
|
||||
GEOGRAPHY: 'GEOGRAPHY',
|
||||
JSON: 'JSON',
|
||||
};
|
||||
return typeMapping[fieldType] || fieldType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KloSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
const normalizedType = nativeType.toUpperCase().trim();
|
||||
if (this.typeMappings[normalizedType]) {
|
||||
return this.typeMappings[normalizedType];
|
||||
}
|
||||
if (normalizedType.includes('TIME') || normalizedType.includes('DATE')) {
|
||||
return 'time';
|
||||
}
|
||||
if (normalizedType.includes('INT') || normalizedType.includes('NUM') || normalizedType.includes('FLOAT')) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalizedType.includes('BOOL')) {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT ${columnList} FROM ${tableName} ORDER BY RAND() LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quotedColumn = this.quoteIdentifier(columnName);
|
||||
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS STRING)) != '' ORDER BY RAND() LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
|
||||
if (!params) {
|
||||
return { sql, params: undefined };
|
||||
}
|
||||
let processedSql = sql;
|
||||
const processedParams: Record<string, unknown> = {};
|
||||
for (const [key, value] of Object.entries(params)) {
|
||||
processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
|
||||
processedParams[key] = value;
|
||||
}
|
||||
return { sql: processedSql, params: Object.keys(processedParams).length > 0 ? processedParams : undefined };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `RAND() < ${samplePct}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `TABLESAMPLE SYSTEM (${samplePct * 100} PERCENT)`;
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `COUNTIF(${column} IS NULL)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `APPROX_COUNT_DISTINCT(${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT APPROX_COUNT_DISTINCT(val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT DISTINCT CAST(${columnName} AS STRING) AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY val
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY RAND()
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT APPROX_COUNT_DISTINCT(val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
timezone?: string,
|
||||
): string {
|
||||
const bigQueryGranularity = granularity.toUpperCase();
|
||||
if (timezone) {
|
||||
return `DATE_TRUNC(DATETIME(${column}, '${timezone}'), ${bigQueryGranularity})`;
|
||||
}
|
||||
return `DATE_TRUNC(${column}, ${bigQueryGranularity})`;
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
|
||||
const col = timezone ? `DATETIME(${column}, '${timezone}')` : column;
|
||||
const [rawAmount, rawUnit] = interval.split(' ');
|
||||
let diffUnit = rawUnit!.toUpperCase();
|
||||
let amount = Number(rawAmount);
|
||||
let addUnit = diffUnit;
|
||||
if (diffUnit === 'WEEK') {
|
||||
diffUnit = 'DAY';
|
||||
amount = amount * 7;
|
||||
addUnit = 'DAY';
|
||||
}
|
||||
const originExpr = origin ? `TIMESTAMP '${origin}'` : `TIMESTAMP '1970-01-01'`;
|
||||
return `TIMESTAMP_ADD(${originExpr}, INTERVAL CAST(FLOOR(TIMESTAMP_DIFF(${col}, ${originExpr}, ${diffUnit}) / ${amount}) * ${amount} AS INT64) ${addUnit})`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
const [amount, unit] = interval.split(' ');
|
||||
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
|
||||
}
|
||||
}
|
||||
18
packages/connector-bigquery/src/index.ts
Normal file
18
packages/connector-bigquery/src/index.ts
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
export { KloBigQueryDialect } from './dialect.js';
|
||||
export {
|
||||
bigQueryConnectionConfigFromConfig,
|
||||
isKloBigQueryConnectionConfig,
|
||||
KloBigQueryScanConnector,
|
||||
type KloBigQueryClient,
|
||||
type KloBigQueryClientFactory,
|
||||
type KloBigQueryColumnDistinctValuesOptions,
|
||||
type KloBigQueryColumnDistinctValuesResult,
|
||||
type KloBigQueryConnectionConfig,
|
||||
type KloBigQueryDataset,
|
||||
type KloBigQueryQueryJob,
|
||||
type KloBigQueryReadOnlyQueryInput,
|
||||
type KloBigQueryResolvedConnectionConfig,
|
||||
type KloBigQueryScanConnectorOptions,
|
||||
type KloBigQueryTableRef,
|
||||
} from './connector.js';
|
||||
export { createBigQueryLiveDatabaseIntrospection } from './live-database-introspection.js';
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '@klo/context/ingest';
|
||||
import type { KloProjectConnectionConfig } from '@klo/context/project';
|
||||
import {
|
||||
KloBigQueryScanConnector,
|
||||
type KloBigQueryClientFactory,
|
||||
type KloBigQueryConnectionConfig,
|
||||
} from './connector.js';
|
||||
|
||||
interface CreateBigQueryLiveDatabaseIntrospectionOptions {
|
||||
connections: Record<string, KloProjectConnectionConfig>;
|
||||
clientFactory?: KloBigQueryClientFactory;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createBigQueryLiveDatabaseIntrospection(
|
||||
options: CreateBigQueryLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KloBigQueryConnectionConfig | undefined;
|
||||
const connector = new KloBigQueryScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
clientFactory: options.clientFactory,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect({ connectionId, driver: 'bigquery' }, { runId: `bigquery-${connectionId}` });
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
11
packages/connector-bigquery/src/package-exports.test.ts
Normal file
11
packages/connector-bigquery/src/package-exports.test.ts
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import * as connector from './index.js';
|
||||
|
||||
describe('@klo/connector-bigquery exports', () => {
|
||||
it('exports public connector, dialect, and introspection APIs', () => {
|
||||
expect(connector.KloBigQueryDialect).toBeTypeOf('function');
|
||||
expect(connector.KloBigQueryScanConnector).toBeTypeOf('function');
|
||||
expect(connector.bigQueryConnectionConfigFromConfig).toBeTypeOf('function');
|
||||
expect(connector.createBigQueryLiveDatabaseIntrospection).toBeTypeOf('function');
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue