test: split cli tests from source tree (#216)

* feat(cli): define full warehouse dialect contract

* test(cli): keep dialect edge tests focused

* fix(cli): stabilize dialect contract foundation

* refactor(connectors): own read-only query preparation

* refactor(connectors): resolve dialects through registry

* refactor(connectors): keep concrete dialect classes internal

* chore(workspace): enforce dialect import boundary

* refactor(cli): resolve relationship dialect at scan boundary

* refactor(cli): use dialect display parsing for entity details

* refactor(cli): use dialect display parsing for warehouse catalog

* refactor(cli): use dialect SQL in relationship workflows

* test(cli): verify solid dialect scan workflow closure

* test: split cli tests from source tree

* refactor(cli): standardize BigQuery scope listing

* feat(sqlite): implement connector scope listing

* test(connectors): cover required table listing

* feat(cli): add warehouse driver registry

* refactor(setup): route scope discovery through driver registry

* refactor(cli): route local query execution through driver registry

* refactor(historic-sql): route dialect support through driver registry

* refactor(cli): test warehouse connections through driver registry

* fix(cli): close driver registry type export gaps

* Improve setup daemon diagnostics

* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback

Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.

* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match

The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.

Align the picker boundary with the canonical 3-level KtxTableRef:

- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
  resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
  (resolveEnabledTables already accepts the 3-part shape) and
  schemasFromEnabledTables now goes through parseDottedTableEntry so it
  recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
  reuse.

Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).

* fix(cli): allow debug telemetry under opt-out env
This commit is contained in:
Andrey Avtomonov 2026-05-26 08:49:05 +02:00 committed by GitHub
parent 924868841d
commit 56985b7e09
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
548 changed files with 5048 additions and 2228 deletions

View file

@ -1,483 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { bigQueryConnectionConfigFromConfig, isKtxBigQueryConnectionConfig, type KtxBigQueryClient, KtxBigQueryScanConnector, type KtxBigQueryClientFactory, type KtxBigQueryDataset, type KtxBigQueryQueryJob, type KtxBigQueryTableRef } from '../../connectors/bigquery/connector.js';
import { createBigQueryLiveDatabaseIntrospection } from '../../connectors/bigquery/live-database-introspection.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
function fakeClientFactory(options: { primaryKeyError?: Error } = {}): KtxBigQueryClientFactory {
const queryResults = vi.fn(async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[{ id: 1, status: 'paid' }],
undefined,
{ schema: { fields: [{ name: 'id', type: 'INT64' }, { name: 'status', type: 'STRING' }] } },
]);
const createQueryJob = vi.fn(async (input: { query: string }): ReturnType<KtxBigQueryClient['createQueryJob']> => {
if (input.query.includes('INFORMATION_SCHEMA.TABLE_CONSTRAINTS')) {
if (options.primaryKeyError) {
throw options.primaryKeyError;
}
return [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[{ table_name: 'orders', column_name: 'id' }],
undefined,
{ schema: { fields: [{ name: 'table_name', type: 'STRING' }, { name: 'column_name', type: 'STRING' }] } },
],
},
];
}
if (input.query.includes('APPROX_COUNT_DISTINCT')) {
return [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[{ cardinality: 2 }],
undefined,
{ schema: { fields: [{ name: 'cardinality', type: 'INT64' }] } },
],
},
];
}
if (input.query.includes('SELECT DISTINCT CAST')) {
return [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[{ val: 'open' }, { val: 'paid' }],
undefined,
{ schema: { fields: [{ name: 'val', type: 'STRING' }] } },
],
},
];
}
if (input.query.includes('SELECT `status`')) {
return [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[{ status: 'paid' }],
undefined,
{ schema: { fields: [{ name: 'status', type: 'STRING' }] } },
],
},
];
}
return [{ getQueryResults: queryResults }];
});
const getTable = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
{
metadata: {
type: 'TABLE',
numRows: '12',
description: 'Orders table',
schema: {
fields: [
{ name: 'id', type: 'INT64', mode: 'REQUIRED', description: 'Order id' },
{ name: 'status', type: 'STRING', mode: 'NULLABLE' },
{ name: 'payload', type: 'RECORD', mode: 'NULLABLE' },
],
},
},
},
]);
const tableRef: KtxBigQueryTableRef = { id: 'orders', get: getTable };
return {
createClient: vi.fn(() => ({
getDatasets: vi.fn(async (): ReturnType<KtxBigQueryClient['getDatasets']> => [[{ id: 'analytics' }, { id: 'staging' }]]),
dataset: vi.fn(
(datasetId: string): KtxBigQueryDataset => ({
get: vi.fn(async () => [{ id: datasetId }]),
getTables: vi.fn(async (): ReturnType<KtxBigQueryDataset['getTables']> => [[tableRef]]),
}),
),
createQueryJob,
})),
};
}
const connection = {
driver: 'bigquery',
dataset_id: 'analytics',
credentials_json: JSON.stringify({ project_id: 'project-1', client_email: 'reader@example.test' }),
location: 'US',
} as const;
describe('KtxBigQueryScanConnector', () => {
it('resolves configuration safely', () => {
expect(isKtxBigQueryConnectionConfig(connection)).toBe(true);
expect(isKtxBigQueryConnectionConfig({ driver: 'mysql' })).toBe(false);
expect(bigQueryConnectionConfigFromConfig({ connectionId: 'warehouse', connection })).toMatchObject({
projectId: 'project-1',
datasetIds: ['analytics'],
location: 'US',
});
});
it('introspects datasets, table metadata, primary keys, and normalized types', async () => {
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection,
clientFactory: fakeClientFactory(),
now: () => new Date('2026-04-29T17:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'bigquery' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'bigquery',
extractedAt: '2026-04-29T17:00:00.000Z',
scope: { catalogs: ['project-1'], datasets: ['analytics'] },
metadata: {
project_id: 'project-1',
datasets: ['analytics'],
table_count: 1,
total_columns: 3,
},
});
expect(snapshot.tables[0]).toMatchObject({
catalog: 'project-1',
db: 'analytics',
name: 'orders',
kind: 'table',
comment: 'Orders table',
estimatedRows: 12,
foreignKeys: [],
});
expect(snapshot.tables[0]?.columns).toEqual([
{
name: 'id',
nativeType: 'INT64',
normalizedType: 'BIGINT',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'Order id',
},
{
name: 'status',
nativeType: 'STRING',
normalizedType: 'VARCHAR',
dimensionType: 'string',
nullable: true,
primaryKey: false,
comment: null,
},
{
name: 'payload',
nativeType: 'RECORD',
normalizedType: 'JSON',
dimensionType: 'string',
nullable: true,
primaryKey: false,
comment: null,
},
]);
});
it.each([
Object.assign(new Error('Access Denied'), { code: 403 }),
Object.assign(new Error('Not found'), { errors: [{ reason: 'notFound' }] }),
])('soft-fails denied BigQuery primary-key discovery with a scan warning', async (primaryKeyError) => {
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection,
clientFactory: fakeClientFactory({ primaryKeyError }),
now: () => new Date('2026-04-29T17:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'bigquery' },
{ runId: 'scan-run-bigquery-denied-pk' },
);
expect(snapshot.warnings).toEqual([
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in analytics (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'analytics', kind: 'primary_key' },
},
]);
expect(snapshot.tables[0]?.foreignKeys).toEqual([]);
expect(snapshot.tables[0]?.columns.every((column) => column.primaryKey === false)).toBe(true);
});
it('runs samples, read-only SQL, distinct values, dataset listing, row counts, and cleanup', async () => {
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection,
clientFactory: fakeClientFactory(),
});
await expect(
connector.sampleTable(
{
connectionId: 'warehouse',
table: { catalog: 'project-1', db: 'analytics', name: 'orders' },
columns: ['id', 'status'],
limit: 1,
},
{ runId: 'scan-run-1' },
),
).resolves.toEqual({
headers: ['id', 'status'],
headerTypes: ['INT64', 'STRING'],
rows: [[1, 'paid']],
totalRows: 1,
});
await expect(
connector.sampleColumn(
{
connectionId: 'warehouse',
table: { catalog: 'project-1', db: 'analytics', name: 'orders' },
column: 'status',
limit: 5,
},
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ values: ['paid'], nullCount: null, distinctCount: null });
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[1, 'paid']], totalRows: 1, rowCount: 1 });
await expect(
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
await expect(
connector.getColumnDistinctValues(
{ catalog: 'project-1', db: 'analytics', name: 'orders' },
'status',
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
),
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
await expect(connector.getTableRowCount('orders')).resolves.toBe(12);
await expect(connector.listDatasets()).resolves.toEqual(['analytics', 'staging']);
await expect(
connector.columnStats(
{ connectionId: 'warehouse', table: { catalog: 'project-1', db: 'analytics', name: 'orders' }, column: 'status' },
{ runId: 'scan-run-1' },
),
).resolves.toBeNull();
await connector.cleanup();
});
it('limits introspection to tables in tableScope', async () => {
const ordersGet = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
{
metadata: {
type: 'TABLE',
numRows: '12',
schema: { fields: [{ name: 'id', type: 'INT64', mode: 'REQUIRED' }] },
},
},
]);
const skippedGet = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
{ metadata: { type: 'TABLE', numRows: '1', schema: { fields: [] } } },
]);
const clientFactory: KtxBigQueryClientFactory = {
createClient: vi.fn(() => ({
getDatasets: vi.fn(async (): ReturnType<KtxBigQueryClient['getDatasets']> => [[{ id: 'analytics' }]]),
dataset: vi.fn(
(): KtxBigQueryDataset => ({
get: vi.fn(async () => [{ id: 'analytics' }]),
getTables: vi.fn(async (): ReturnType<KtxBigQueryDataset['getTables']> => [
[
{ id: 'orders', get: ordersGet },
{ id: 'customers', get: skippedGet },
],
]),
}),
),
createQueryJob: vi.fn(async (): ReturnType<KtxBigQueryClient['createQueryJob']> => [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[],
undefined,
{ schema: { fields: [{ name: 'table_name', type: 'STRING' }, { name: 'column_name', type: 'STRING' }] } },
],
},
]),
})),
};
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection,
clientFactory,
});
const scope = tableRefSet([{ catalog: 'project-1', db: 'analytics', name: 'orders' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'bigquery', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
expect(ordersGet).toHaveBeenCalledTimes(1);
expect(skippedGet).not.toHaveBeenCalled();
});
it('constructs for discovery without dataset scope and lists tables through one region information schema query', async () => {
const createQueryJob = vi.fn(
async (
input: { query: string; params?: Record<string, unknown>; location?: string },
): ReturnType<KtxBigQueryClient['createQueryJob']> => [
{
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
[
{ table_schema: 'analytics', table_name: 'orders', table_type: 'BASE TABLE' },
{ table_schema: 'analytics', table_name: 'order_clone', table_type: 'CLONE' },
{ table_schema: 'mart', table_name: 'orders_mv', table_type: 'MATERIALIZED VIEW' },
],
undefined,
{
schema: {
fields: [
{ name: 'table_schema', type: 'STRING' },
{ name: 'table_name', type: 'STRING' },
{ name: 'table_type', type: 'STRING' },
],
},
},
],
},
],
);
const clientFactory: KtxBigQueryClientFactory = {
createClient: vi.fn(() => ({
getDatasets: vi.fn(async () => [[{ id: 'analytics' }, { id: 'mart' }]] as [{ id: string }[]]),
dataset: vi.fn((datasetId: string) => ({
get: vi.fn(async () => [{ id: datasetId }]),
getTables: vi.fn(async () => [[]] as [never[]]),
})),
createQueryJob,
})),
};
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'bigquery',
credentials_json: JSON.stringify({ project_id: 'project-1' }),
location: 'US',
},
clientFactory,
});
await expect(connector.listTables(['analytics', 'mart'])).resolves.toEqual([
{ schema: 'analytics', name: 'orders', kind: 'table' },
{ schema: 'analytics', name: 'order_clone', kind: 'table' },
{ schema: 'mart', name: 'orders_mv', kind: 'view' },
]);
expect(createQueryJob).toHaveBeenCalledTimes(1);
expect(createQueryJob).toHaveBeenCalledWith(
expect.objectContaining({
location: 'US',
params: { dataset_ids: ['analytics', 'mart'] },
}),
);
expect(createQueryJob.mock.calls[0]?.[0].query).toContain('`project-1`.`region-us`.INFORMATION_SCHEMA.TABLES');
expect(createQueryJob.mock.calls[0]?.[0].query).toContain("'CLONE'");
expect(createQueryJob.mock.calls[0]?.[0].query).toContain("'SNAPSHOT'");
});
it('keeps scan paths requiring dataset scope', async () => {
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'bigquery',
credentials_json: JSON.stringify({ project_id: 'project-1' }),
location: 'US',
},
clientFactory: fakeClientFactory(),
});
await expect(
connector.introspect(
{ connectionId: 'warehouse', driver: 'bigquery' },
{ runId: 'scan-run-1' },
),
).rejects.toThrow('Native BigQuery scan requires connections.warehouse.dataset_ids or dataset_id');
});
it('applies maximumBytesBilled to read-only queries when configured', async () => {
const clientFactory = fakeClientFactory();
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection,
clientFactory,
maxBytesBilled: 123456789,
});
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ rows: [[1, 'paid']], rowCount: 1 });
const client = vi.mocked(clientFactory.createClient).mock.results[0]?.value as KtxBigQueryClient;
expect(client.createQueryJob).toHaveBeenLastCalledWith(
expect.objectContaining({
maximumBytesBilled: '123456789',
}),
);
});
it('applies canonical BigQuery YAML scan limits to query jobs', async () => {
const clientFactory = fakeClientFactory();
const connector = new KtxBigQueryScanConnector({
connectionId: 'warehouse',
connection: { ...connection, max_bytes_billed: '987654321', job_timeout_ms: 30_000 },
clientFactory,
});
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ rows: [[1, 'paid']], rowCount: 1 });
const client = vi.mocked(clientFactory.createClient).mock.results[0]?.value as KtxBigQueryClient;
expect(client.createQueryJob).toHaveBeenLastCalledWith(
expect.objectContaining({
maximumBytesBilled: '987654321',
jobTimeoutMs: 30_000,
}),
);
});
it('adapts native snapshots to live-database introspection snapshots', async () => {
const introspection = createBigQueryLiveDatabaseIntrospection({
connections: { warehouse: connection },
clientFactory: fakeClientFactory(),
now: () => new Date('2026-04-29T17:00:00.000Z'),
});
await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({
connectionId: 'warehouse',
metadata: { project_id: 'project-1' },
tables: expect.arrayContaining([
expect.objectContaining({
catalog: 'project-1',
db: 'analytics',
name: 'orders',
columns: expect.arrayContaining([
{
name: 'id',
nativeType: 'INT64',
normalizedType: 'BIGINT',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'Order id',
},
]),
}),
]),
});
});
});

View file

@ -1,5 +1,6 @@
import { BigQuery, type TableField } from '@google-cloud/bigquery';
import { normalizeBigQueryProjectId, normalizeBigQueryRegion } from '../../context/connections/bigquery-identifiers.js';
import { getDialectForDriver } from '../../context/connections/dialects.js';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
@ -26,7 +27,6 @@ import {
import { readFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import { KtxBigQueryDialect } from './dialect.js';
export interface KtxBigQueryConnectionConfig {
driver?: string;
@ -235,6 +235,23 @@ function normalizeValue(value: unknown): unknown {
return value;
}
/** @internal */
export function prepareBigQueryReadOnlyQuery(
sql: string,
params?: Record<string, unknown>,
): { sql: string; params?: Record<string, unknown> } {
if (!params) {
return { sql, params: undefined };
}
let processedSql = sql;
const processedParams: Record<string, unknown> = {};
for (const [key, value] of Object.entries(params)) {
processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
processedParams[key] = value;
}
return { sql: processedSql, params: Object.keys(processedParams).length > 0 ? processedParams : undefined };
}
export function isKtxBigQueryConnectionConfig(
connection: KtxBigQueryConnectionConfig | undefined,
): connection is KtxBigQueryConnectionConfig {
@ -286,7 +303,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
private readonly now: () => Date;
private readonly maxBytesBilled?: number | string;
private readonly queryTimeoutMs?: number;
private readonly dialect = new KtxBigQueryDialect();
private readonly dialect = getDialectForDriver('bigquery');
private client: KtxBigQueryClient | null = null;
constructor(options: KtxBigQueryScanConnectorOptions) {
@ -364,7 +381,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
async executeReadOnly(input: KtxBigQueryReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
this.assertConnection(input.connectionId);
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
const prepared = prepareBigQueryReadOnlyQuery(limitedSql, input.params);
const result = await this.query(prepared.sql, prepared.params);
return { ...result, rowCount: result.rows.length };
}
@ -411,7 +428,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
return this.dialect.quoteIdentifier(identifier);
}
async listDatasets(): Promise<string[]> {
async listSchemas(): Promise<string[]> {
const [datasets] = await this.getClient().getDatasets();
return datasets.map((dataset) => dataset.id).filter((id): id is string => Boolean(id));
}
@ -437,6 +454,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector {
params,
);
return rows.map((row) => ({
catalog: this.resolved.projectId,
schema: row.table_schema,
name: row.table_name,
kind:

View file

@ -1,52 +0,0 @@
import { describe, expect, it } from 'vitest';
import { KtxBigQueryDialect } from './dialect.js';
describe('KtxBigQueryDialect', () => {
const dialect = new KtxBigQueryDialect();
it('quotes identifiers and formats project.dataset.table names', () => {
expect(dialect.quoteIdentifier('order`items')).toBe('`order\\`items`');
expect(dialect.formatTableName({ catalog: 'project-1', db: 'analytics', name: 'orders' })).toBe(
'`project-1`.`analytics`.`orders`',
);
expect(dialect.formatTableName({ db: 'analytics', name: 'orders' })).toBe('`analytics`.`orders`');
expect(dialect.formatTableName({ name: 'orders' })).toBe('`orders`');
});
it('maps native BigQuery types to normalized types and scan dimensions', () => {
expect(dialect.mapDataType('INT64')).toBe('BIGINT');
expect(dialect.mapDataType('STRUCT')).toBe('JSON');
expect(dialect.mapDataType('GEOGRAPHY')).toBe('GEOGRAPHY');
expect(dialect.mapToDimensionType('TIMESTAMP')).toBe('time');
expect(dialect.mapToDimensionType('NUMERIC')).toBe('number');
expect(dialect.mapToDimensionType('BOOL')).toBe('boolean');
expect(dialect.mapToDimensionType('JSON')).toBe('string');
});
it('generates sampling, cardinality, and distinct-value SQL', () => {
expect(dialect.generateSampleQuery('`p`.`d`.`orders`', 5, ['id', 'status'])).toBe(
'SELECT `id`, `status` FROM `p`.`d`.`orders` ORDER BY RAND() LIMIT 5',
);
expect(dialect.generateColumnSampleQuery('`p`.`d`.`orders`', 'status', 10)).toBe(
"SELECT `status` FROM `p`.`d`.`orders` WHERE `status` IS NOT NULL AND TRIM(CAST(`status` AS STRING)) != '' ORDER BY RAND() LIMIT 10",
);
expect(dialect.generateCardinalitySampleQuery('`p`.`d`.`orders`', '`status`', 100)).toContain(
'SELECT APPROX_COUNT_DISTINCT(val) AS cardinality',
);
expect(dialect.generateDistinctValuesQuery('`p`.`d`.`orders`', '`status`', 20)).toContain(
'SELECT DISTINCT CAST(`status` AS STRING) AS val',
);
});
it('rewrites colon parameters to BigQuery named parameters', () => {
expect(dialect.prepareQuery('SELECT * FROM orders WHERE id = :id AND id_2 = :id_2', { id: 1, id_2: 2 })).toEqual({
sql: 'SELECT * FROM orders WHERE id = @id AND id_2 = @id_2',
params: { id: 1, id_2: 2 },
});
expect(dialect.prepareQuery('SELECT * FROM orders')).toEqual({ sql: 'SELECT * FROM orders', params: undefined });
});
it('keeps unsupported statistics explicit', () => {
expect(dialect.generateColumnStatisticsQuery('analytics', 'orders')).toBeNull();
});
});

View file

@ -1,9 +1,18 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type BigQueryTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxBigQueryDialect {
readonly type = 'bigquery';
/** @internal */
export class KtxBigQueryDialect implements KtxDialect {
readonly type = 'bigquery' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
TIMESTAMP: 'time',
@ -27,13 +36,19 @@ export class KtxBigQueryDialect {
}
formatTableName(table: BigQueryTableNameRef): string {
if (table.catalog && table.db) {
return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
}
if (table.db) {
return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
}
return this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'three-part');
}
formatDisplayRef(table: BigQueryTableNameRef): string {
return formatDialectDisplayRef(table, 'three-part');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'three-part');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('three-part');
}
mapDataType(nativeType: string): string {
@ -93,19 +108,6 @@ export class KtxBigQueryDialect {
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS STRING)) != '' ORDER BY RAND() LIMIT ${limit}`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
if (!params) {
return { sql, params: undefined };
}
let processedSql = sql;
const processedParams: Record<string, unknown> = {};
for (const [key, value] of Object.entries(params)) {
processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
processedParams[key] = value;
}
return { sql: processedSql, params: Object.keys(processedParams).length > 0 ? processedParams : undefined };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
@ -121,7 +123,11 @@ export class KtxBigQueryDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -132,6 +138,18 @@ export class KtxBigQueryDialect {
return `APPROX_COUNT_DISTINCT(${column})`;
}
textLengthExpression(columnSql: string): string {
return `LENGTH(CAST(${columnSql} AS STRING))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS STRING)`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT STRING_AGG(CAST(value AS STRING), '\\u001F') FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
@ -172,36 +190,4 @@ export class KtxBigQueryDialect {
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const bigQueryGranularity = granularity.toUpperCase();
if (timezone) {
return `DATE_TRUNC(DATETIME(${column}, '${timezone}'), ${bigQueryGranularity})`;
}
return `DATE_TRUNC(${column}, ${bigQueryGranularity})`;
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `DATETIME(${column}, '${timezone}')` : column;
const [rawAmount, rawUnit] = interval.split(' ');
let diffUnit = rawUnit!.toUpperCase();
let amount = Number(rawAmount);
let addUnit = diffUnit;
if (diffUnit === 'WEEK') {
diffUnit = 'DAY';
amount = amount * 7;
addUnit = 'DAY';
}
const originExpr = origin ? `TIMESTAMP '${origin}'` : `TIMESTAMP '1970-01-01'`;
return `TIMESTAMP_ADD(${originExpr}, INTERVAL CAST(FLOOR(TIMESTAMP_DIFF(${col}, ${originExpr}, ${diffUnit}) / ${amount}) * ${amount} AS INT64) ${addUnit})`;
}
parseIntervalToSql(interval: string): string {
const [amount, unit] = interval.split(' ');
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
}
}

View file

@ -1,405 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { clickHouseClientConfigFromConfig, isKtxClickHouseConnectionConfig, KtxClickHouseScanConnector, type KtxClickHouseClientFactory } from '../../connectors/clickhouse/connector.js';
import { createClickHouseLiveDatabaseIntrospection } from '../../connectors/clickhouse/live-database-introspection.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
function result<T>(payload: T) {
return {
async json(): Promise<T> {
return payload;
},
};
}
function fakeClientFactory(): KtxClickHouseClientFactory {
const query = vi.fn(async (input: { query: string; format: string; query_params?: Record<string, unknown> }) => {
if (input.query.includes('FROM system.tables')) {
return result([
{ name: 'events', engine: 'MergeTree', comment: 'Event stream' },
{ name: 'event_summary', engine: 'View', comment: '' },
]);
}
if (input.query.includes('FROM system.columns')) {
return result([
{ table: 'events', name: 'id', type: 'UInt64', comment: 'PK', is_in_primary_key: 1 },
{ table: 'events', name: 'event_name', type: 'LowCardinality(String)', comment: '', is_in_primary_key: 0 },
{ table: 'event_summary', name: 'event_name', type: 'String', comment: '', is_in_primary_key: 0 },
]);
}
if (input.query.includes('FROM system.parts') && input.query.includes('GROUP BY')) {
return result([{ table: 'events', row_count: '2' }]);
}
if (input.query.includes('SELECT `id`, `event_name` FROM `analytics`.`events` LIMIT 1')) {
return result({
meta: [
{ name: 'id', type: 'UInt64' },
{ name: 'event_name', type: 'String' },
],
data: [[10, 'signup']],
rows: 1,
});
}
if (input.query.includes('SELECT `event_name` FROM `analytics`.`events`')) {
return result({
meta: [{ name: 'event_name', type: 'String' }],
data: [['signup'], ['purchase']],
rows: 2,
});
}
if (input.query.includes('COUNT(DISTINCT val)')) {
return result({
meta: [{ name: 'cardinality', type: 'UInt64' }],
data: [[2]],
rows: 1,
});
}
if (input.query.includes('SELECT DISTINCT toString(`event_name`) AS val')) {
return result({
meta: [{ name: 'val', type: 'String' }],
data: [['purchase'], ['signup']],
rows: 2,
});
}
if (input.query.includes('sum(rows) AS count')) {
return result({
meta: [{ name: 'count', type: 'UInt64' }],
data: [[2]],
rows: 1,
});
}
if (input.query.includes('FROM system.databases')) {
return result([{ name: 'analytics' }, { name: 'warehouse' }]);
}
if (input.query.trim() === 'SELECT 1') {
return result({ meta: [{ name: '1', type: 'UInt8' }], data: [[1]], rows: 1 });
}
if (input.query.includes('select * from (select id, event_name from analytics.events) as ktx_query_result limit 1')) {
return result({
meta: [
{ name: 'id', type: 'UInt64' },
{ name: 'event_name', type: 'String' },
],
data: [[10, 'signup']],
rows: 1,
});
}
throw new Error(`Unexpected SQL: ${input.query}`);
});
const close = vi.fn(async () => undefined);
return {
createClient: vi.fn(() => ({ query, close })),
};
}
function multiDatabaseClickHouseClientFactory(): KtxClickHouseClientFactory {
const query = vi.fn(async (input: { query: string; format: string; query_params?: Record<string, unknown> }) => {
if (input.query.includes('FROM system.tables')) {
expect(input.query_params).toEqual({ databases: ['analytics', 'mart'] });
return result([
{ database: 'analytics', name: 'events', engine: 'MergeTree', comment: 'Event stream' },
{ database: 'mart', name: 'order_events', engine: 'MergeTree', comment: '' },
]);
}
if (input.query.includes('FROM system.columns')) {
expect(input.query_params).toEqual({ databases: ['analytics', 'mart'] });
return result([
{
database: 'analytics',
table: 'events',
name: 'id',
type: 'UInt64',
comment: '',
is_in_primary_key: 1,
},
{
database: 'mart',
table: 'order_events',
name: 'id',
type: 'UInt64',
comment: '',
is_in_primary_key: 1,
},
]);
}
if (input.query.includes('FROM system.parts') && input.query.includes('GROUP BY')) {
expect(input.query_params).toEqual({ databases: ['analytics', 'mart'] });
return result([
{ database: 'analytics', table: 'events', row_count: '2' },
{ database: 'mart', table: 'order_events', row_count: '5' },
]);
}
throw new Error(`Unexpected SQL: ${input.query}`);
});
return {
createClient: vi.fn(() => ({ query, close: vi.fn(async () => undefined) })),
};
}
describe('KtxClickHouseScanConnector', () => {
it('resolves ClickHouse connection configuration safely', () => {
expect(isKtxClickHouseConnectionConfig({ driver: 'clickhouse', host: 'localhost', database: 'analytics' })).toBe(
true,
);
expect(isKtxClickHouseConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(false);
expect(
clickHouseClientConfigFromConfig({
connectionId: 'warehouse',
connection: {
driver: 'clickhouse',
host: 'ch.example.test',
port: 9440,
database: 'analytics',
username: 'reader',
password: 'test-pass', // pragma: allowlist secret
ssl: true,
},
}),
).toMatchObject({
host: 'ch.example.test',
port: 9440,
database: 'analytics',
username: 'reader',
password: 'test-pass', // pragma: allowlist secret
ssl: true,
});
});
it('introspects schema, primary keys, comments, row counts, and views', async () => {
const connector = new KtxClickHouseScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'clickhouse',
host: 'ch.example.test',
database: 'analytics',
username: 'reader',
password: 'test-pass', // pragma: allowlist secret
},
clientFactory: fakeClientFactory(),
now: () => new Date('2026-04-29T14:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'clickhouse' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'clickhouse',
extractedAt: '2026-04-29T14:00:00.000Z',
scope: { schemas: ['analytics'] },
metadata: {
database: 'analytics',
host: 'ch.example.test',
table_count: 2,
total_columns: 3,
},
});
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([
['events', 'table', 2, 'Event stream'],
['event_summary', 'view', null, null],
]);
expect(snapshot.tables.find((table) => table.name === 'events')?.columns[0]).toMatchObject({
name: 'id',
nativeType: 'UInt64',
normalizedType: 'UInt64',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'PK',
});
expect(snapshot.tables.find((table) => table.name === 'events')?.foreignKeys).toEqual([]);
});
it('introspects every configured ClickHouse database scope while preserving the default database', async () => {
const connector = new KtxClickHouseScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'clickhouse',
host: 'ch.example.test',
database: 'analytics',
databases: ['analytics', 'mart'],
username: 'reader',
password: 'test-pass', // pragma: allowlist secret
},
clientFactory: multiDatabaseClickHouseClientFactory(),
now: () => new Date('2026-05-21T10:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'clickhouse' },
{ runId: 'scan-run-1' },
);
expect(snapshot.scope).toEqual({ schemas: ['analytics', 'mart'] });
expect(snapshot.metadata).toMatchObject({ database: 'analytics', databases: ['analytics', 'mart'] });
expect(snapshot.tables.map((table) => `${table.db}.${table.name}`)).toEqual([
'analytics.events',
'mart.order_events',
]);
});
it('limits introspection to tables in tableScope', async () => {
const queries: Array<{ query: string; query_params?: Record<string, unknown> }> = [];
const clientFactory: KtxClickHouseClientFactory = {
createClient: vi.fn(() => ({
query: vi.fn(async (input: { query: string; format: string; query_params?: Record<string, unknown> }) => {
queries.push({ query: input.query, query_params: input.query_params });
if (input.query.includes('FROM system.tables')) {
return result([{ database: 'analytics', name: 'events', engine: 'MergeTree', comment: '' }]);
}
if (input.query.includes('FROM system.columns')) {
return result([
{
database: 'analytics',
table: 'events',
name: 'id',
type: 'UInt64',
comment: '',
is_in_primary_key: 1,
},
]);
}
if (input.query.includes('FROM system.parts')) {
return result([{ database: 'analytics', table: 'events', row_count: '2' }]);
}
throw new Error(`Unexpected SQL: ${input.query}`);
}),
close: vi.fn(async () => undefined),
})),
};
const connector = new KtxClickHouseScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'clickhouse',
host: 'ch.example.test',
database: 'analytics',
username: 'reader',
password: 'test-pass', // pragma: allowlist secret
},
clientFactory,
});
const scope = tableRefSet([{ catalog: null, db: 'analytics', name: 'events' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'clickhouse', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['events']);
const tablesQuery = queries.find((query) => query.query.includes('FROM system.tables'));
expect(tablesQuery?.query).toContain('AND name IN {table_names:Array(String)}');
expect(tablesQuery?.query_params).toEqual({ databases: ['analytics'], table_names: ['events'] });
});
it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => {
const clientFactory = fakeClientFactory();
const connector = new KtxClickHouseScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'clickhouse',
host: 'ch.example.test',
database: 'analytics',
username: 'reader',
password: 'test-pass', // pragma: allowlist secret
},
clientFactory,
});
await expect(
connector.sampleTable(
{
connectionId: 'warehouse',
table: { catalog: null, db: 'analytics', name: 'events' },
columns: ['id', 'event_name'],
limit: 1,
},
{ runId: 'scan-run-1' },
),
).resolves.toEqual({ headers: ['id', 'event_name'], rows: [[10, 'signup']], totalRows: 1 });
await expect(
connector.sampleColumn(
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'events' }, column: 'event_name', limit: 5 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ values: ['signup', 'purchase'], nullCount: null, distinctCount: null });
await expect(
connector.getColumnDistinctValues(
{ catalog: null, db: 'analytics', name: 'events' },
'event_name',
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
),
).resolves.toEqual({ values: ['purchase', 'signup'], cardinality: 2 });
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, event_name from analytics.events', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ headers: ['id', 'event_name'], rows: [[10, 'signup']], totalRows: 1, rowCount: 1 });
await expect(
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from events' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
await expect(connector.getTableRowCount('events')).resolves.toBe(2);
await expect(connector.listSchemas()).resolves.toEqual(['analytics', 'warehouse']);
await expect(
connector.columnStats(
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'events' }, column: 'event_name' },
{ runId: 'scan-run-1' },
),
).resolves.toBeNull();
await connector.cleanup();
});
it('adapts native ClickHouse snapshots to live-database introspection for local ingest', async () => {
const introspection = createClickHouseLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'clickhouse',
host: 'ch.example.test',
database: 'analytics',
username: 'reader',
password: 'test-pass', // pragma: allowlist secret
},
},
clientFactory: fakeClientFactory(),
now: () => new Date('2026-04-29T14:00:00.000Z'),
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
extractedAt: '2026-04-29T14:00:00.000Z',
});
expect(snapshot.tables.find((table) => table.name === 'events')).toMatchObject({
name: 'events',
catalog: null,
db: 'analytics',
columns: [
{
name: 'id',
nativeType: 'UInt64',
normalizedType: 'UInt64',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'PK',
},
{
name: 'event_name',
nativeType: 'LowCardinality(String)',
normalizedType: 'LowCardinality(String)',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: null,
},
],
foreignKeys: [],
});
});
});

View file

@ -1,4 +1,5 @@
import { createClient } from '@clickhouse/client';
import { getDialectForDriver } from '../../context/connections/dialects.js';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableListEntry, type KtxTableSampleResult } from '../../context/scan/types.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
@ -6,7 +7,6 @@ import { readFileSync } from 'node:fs';
import { Agent as HttpsAgent } from 'node:https';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import { KtxClickHouseDialect } from './dialect.js';
export interface KtxClickHouseConnectionConfig {
driver?: string;
@ -198,6 +198,49 @@ function clickHouseTableKey(database: string, table: string): string {
return `${database}.${table}`;
}
function inferClickHouseQueryParamType(value: unknown): string {
if (value === null || value === undefined) {
return 'String';
}
if (typeof value === 'boolean') {
return 'Bool';
}
if (typeof value === 'number') {
return Number.isInteger(value) ? 'Int64' : 'Float64';
}
if (value instanceof Date) {
return 'DateTime';
}
return 'String';
}
/** @internal */
export function prepareClickHouseReadOnlyQuery(
sql: string,
params?: Record<string, unknown>,
): { sql: string; params?: Record<string, unknown> } {
if (!params) {
return { sql, params: undefined };
}
let parameterizedQuery = sql;
const queryParams: Record<string, unknown> = {};
const sortedKeys = Object.keys(params).sort((a, b) => b.length - a.length);
for (const key of sortedKeys) {
const placeholder = `:${key}`;
if (parameterizedQuery.includes(placeholder)) {
parameterizedQuery = parameterizedQuery.replace(
new RegExp(`:${key}\\b`, 'g'),
`{${key}:${inferClickHouseQueryParamType(params[key])}}`,
);
queryParams[key] = params[key];
}
}
return { sql: parameterizedQuery, params: Object.keys(queryParams).length > 0 ? queryParams : undefined };
}
export function isKtxClickHouseConnectionConfig(
connection: KtxClickHouseConnectionConfig | undefined,
): connection is KtxClickHouseConnectionConfig {
@ -256,7 +299,7 @@ export class KtxClickHouseScanConnector implements KtxScanConnector {
private readonly clientFactory: KtxClickHouseClientFactory;
private readonly endpointResolver?: KtxClickHouseEndpointResolver;
private readonly now: () => Date;
private readonly dialect = new KtxClickHouseDialect();
private readonly dialect = getDialectForDriver('clickhouse');
private client: KtxClickHouseClient | null = null;
private resolvedEndpoint: KtxClickHouseResolvedEndpoint | null = null;
@ -408,7 +451,7 @@ export class KtxClickHouseScanConnector implements KtxScanConnector {
async executeReadOnly(input: KtxClickHouseReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
this.assertConnection(input.connectionId);
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
const prepared = prepareClickHouseReadOnlyQuery(limitedSql, input.params);
const result = await this.query(prepared.sql, prepared.params);
return { ...result, rowCount: result.rows.length };
}
@ -488,6 +531,7 @@ export class KtxClickHouseScanConnector implements KtxScanConnector {
{ schemas: filterSchemas },
);
return rows.map((row) => ({
catalog: null,
schema: row.database,
name: row.name,
kind: row.engine === 'View' || row.engine === 'MaterializedView' ? ('view' as const) : ('table' as const),

View file

@ -1,49 +0,0 @@
import { describe, expect, it } from 'vitest';
import { KtxClickHouseDialect } from './dialect.js';
describe('KtxClickHouseDialect', () => {
const dialect = new KtxClickHouseDialect();
it('quotes identifiers and formats database-qualified table names', () => {
expect(dialect.quoteIdentifier('events')).toBe('`events`');
expect(dialect.quoteIdentifier('odd`name')).toBe('`odd``name`');
expect(dialect.formatTableName({ catalog: null, db: 'analytics', name: 'events' })).toBe(
'`analytics`.`events`',
);
expect(dialect.formatTableName({ catalog: null, db: null, name: 'events' })).toBe('`events`');
});
it('maps nullable and low-cardinality ClickHouse types to KTX dimension types', () => {
expect(dialect.mapToDimensionType('Nullable(DateTime64(3))')).toBe('time');
expect(dialect.mapToDimensionType('LowCardinality(Nullable(String))')).toBe('string');
expect(dialect.mapToDimensionType('UInt64')).toBe('number');
expect(dialect.mapToDimensionType('Decimal(18, 4)')).toBe('number');
expect(dialect.mapToDimensionType('Bool')).toBe('boolean');
expect(dialect.mapToDimensionType('IPv4')).toBe('string');
expect(dialect.mapToDimensionType('')).toBe('string');
});
it('builds sampling, distinct-value, pagination, and time SQL', () => {
expect(dialect.generateSampleQuery('`analytics`.`events`', 25, ['id', 'event_name'])).toBe(
'SELECT `id`, `event_name` FROM `analytics`.`events` LIMIT 25',
);
expect(dialect.generateColumnSampleQuery('`analytics`.`events`', 'event_name', 10)).toBe(
"SELECT `event_name` FROM `analytics`.`events` WHERE `event_name` IS NOT NULL AND trim(toString(`event_name`)) != '' LIMIT 10",
);
expect(dialect.generateDistinctValuesQuery('`analytics`.`events`', '`event_name`', 5)).toContain(
'SELECT DISTINCT toString(`event_name`) AS val',
);
expect(dialect.getLimitOffsetClause(10, 20)).toBe('LIMIT 10 OFFSET 20');
expect(dialect.getTimeTruncExpression('created_at', 'week')).toBe('toStartOfWeek(created_at, 1)');
});
it('prepares named parameters using ClickHouse typed placeholders', () => {
expect(dialect.prepareQuery('select * from events where id = :id and event_name = :name', {
id: 10,
name: 'signup',
})).toEqual({
sql: 'select * from events where id = {id:Int64} and event_name = {name:String}',
params: { id: 10, name: 'signup' },
});
});
});

View file

@ -1,9 +1,18 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type ClickHouseTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxClickHouseDialect {
readonly type = 'clickhouse';
/** @internal */
export class KtxClickHouseDialect implements KtxDialect {
readonly type = 'clickhouse' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
date: 'time',
@ -45,9 +54,19 @@ export class KtxClickHouseDialect {
}
formatTableName(table: ClickHouseTableNameRef): string {
return table.db
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
: this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'ansi');
}
formatDisplayRef(table: ClickHouseTableNameRef): string {
return formatDialectDisplayRef(table, 'ansi');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'ansi');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('ansi');
}
mapDataType(nativeType: string): string {
@ -97,29 +116,6 @@ export class KtxClickHouseDialect {
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND trim(toString(${quotedColumn})) != '' LIMIT ${limit}`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
if (!params) {
return { sql, params: undefined };
}
let parameterizedQuery = sql;
const queryParams: Record<string, unknown> = {};
const sortedKeys = Object.keys(params).sort((a, b) => b.length - a.length);
for (const key of sortedKeys) {
const placeholder = `:${key}`;
if (parameterizedQuery.includes(placeholder)) {
parameterizedQuery = parameterizedQuery.replace(
new RegExp(`:${key}\\b`, 'g'),
`{${key}:${this.inferClickHouseType(params[key])}}`,
);
queryParams[key] = params[key];
}
}
return { sql: parameterizedQuery, params: queryParams };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
@ -132,7 +128,11 @@ export class KtxClickHouseDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -143,6 +143,18 @@ export class KtxClickHouseDialect {
return `COUNT(DISTINCT ${column})`;
}
textLengthExpression(columnSql: string): string {
return `length(toString(${columnSql}))`;
}
castToText(columnSql: string): string {
return `toString(${columnSql})`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT arrayStringConcat(groupArray(toString(value)), '\\x1F') FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
SELECT COUNT(DISTINCT val) AS cardinality
@ -181,99 +193,9 @@ export class KtxClickHouseDialect {
)
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const tz = timezone ? `, '${timezone}'` : '';
switch (granularity) {
case 'day':
return `toStartOfDay(${column}${tz})`;
case 'week':
return `toStartOfWeek(${column}, 1${tz})`;
case 'month':
return `toStartOfMonth(${column}${tz})`;
case 'quarter':
return `toStartOfQuarter(${column}${tz})`;
case 'year':
return `toStartOfYear(${column}${tz})`;
}
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `toTimezone(${column}, '${timezone}')` : column;
const [rawAmount, rawUnit] = interval.split(' ');
const amount = Number(rawAmount);
const unit = rawUnit!.toLowerCase();
const originExpr = origin ? `toDateTime('${origin}')` : "toDateTime('1970-01-01')";
const calendarUnit = this.toClickHouseDateDiffUnit(unit);
if (calendarUnit) {
return `dateAdd(${calendarUnit}, intDiv(dateDiff(${calendarUnit}, ${originExpr}, ${col}), ${amount}) * ${amount}, ${originExpr})`;
}
const seconds = this.intervalToSeconds(amount, unit);
return `addSeconds(${originExpr}, intDiv(toUInt64(dateDiff('second', ${originExpr}, ${col})), ${seconds}) * ${seconds})`;
}
parseIntervalToSql(interval: string): string {
const [amount, unit] = interval.split(' ');
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
}
private unwrapClickHouseType(value: string, wrapper: string): string {
const prefix = `${wrapper}(`;
return value.startsWith(prefix) && value.endsWith(')') ? value.slice(prefix.length, -1) : value;
}
private inferClickHouseType(value: unknown): string {
if (value === null || value === undefined) {
return 'String';
}
if (typeof value === 'boolean') {
return 'Bool';
}
if (typeof value === 'number') {
return Number.isInteger(value) ? 'Int64' : 'Float64';
}
if (value instanceof Date) {
return 'DateTime';
}
return 'String';
}
private toClickHouseDateDiffUnit(unit: string): string | null {
if (unit === 'month' || unit === 'months') {
return "'month'";
}
if (unit === 'quarter' || unit === 'quarters') {
return "'quarter'";
}
if (unit === 'year' || unit === 'years') {
return "'year'";
}
return null;
}
private intervalToSeconds(amount: number, unit: string): number {
switch (unit) {
case 'second':
case 'seconds':
return amount;
case 'minute':
case 'minutes':
return amount * 60;
case 'hour':
case 'hours':
return amount * 3600;
case 'day':
case 'days':
return amount * 86400;
case 'week':
case 'weeks':
return amount * 604800;
default:
return amount * 86400;
}
}
}

View file

@ -1,556 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import type { FieldPacket, RowDataPacket } from 'mysql2/promise';
import { createMysqlLiveDatabaseIntrospection } from '../../connectors/mysql/live-database-introspection.js';
import { isKtxMysqlConnectionConfig, KtxMysqlScanConnector, mysqlConnectionPoolConfigFromConfig, type KtxMysqlConnectionConfig, type KtxMysqlPoolFactory } from '../../connectors/mysql/connector.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
function mysqlResult(rows: Record<string, unknown>[], fields: Array<{ name: string; type?: number }>): [RowDataPacket[], FieldPacket[]] {
return [rows as RowDataPacket[], fields as FieldPacket[]];
}
function fakePoolFactory(): KtxMysqlPoolFactory {
const query = vi.fn(async (sql: string, params?: unknown): Promise<[RowDataPacket[], FieldPacket[]]> => {
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
return mysqlResult(
[
{ TABLE_NAME: 'customers', TABLE_TYPE: 'BASE TABLE', TABLE_COMMENT: 'Customer table', TABLE_ROWS: 2 },
{ TABLE_NAME: 'orders', TABLE_TYPE: 'BASE TABLE', TABLE_COMMENT: 'InnoDB free: 1 kB; Order table', TABLE_ROWS: 2 },
{ TABLE_NAME: 'order_summary', TABLE_TYPE: 'VIEW', TABLE_COMMENT: '', TABLE_ROWS: null },
],
[{ name: 'TABLE_NAME' }, { name: 'TABLE_TYPE' }, { name: 'TABLE_COMMENT' }, { name: 'TABLE_ROWS' }],
);
}
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
return mysqlResult(
[
{ TABLE_NAME: 'customers', COLUMN_NAME: 'id', DATA_TYPE: 'int', IS_NULLABLE: 'NO', COLUMN_COMMENT: 'PK' },
{ TABLE_NAME: 'customers', COLUMN_NAME: 'name', DATA_TYPE: 'varchar', IS_NULLABLE: 'NO', COLUMN_COMMENT: '' },
{ TABLE_NAME: 'orders', COLUMN_NAME: 'id', DATA_TYPE: 'int', IS_NULLABLE: 'NO', COLUMN_COMMENT: '' },
{ TABLE_NAME: 'orders', COLUMN_NAME: 'customer_id', DATA_TYPE: 'int', IS_NULLABLE: 'NO', COLUMN_COMMENT: '' },
{ TABLE_NAME: 'orders', COLUMN_NAME: 'status', DATA_TYPE: 'varchar', IS_NULLABLE: 'YES', COLUMN_COMMENT: '' },
{ TABLE_NAME: 'order_summary', COLUMN_NAME: 'status', DATA_TYPE: 'varchar', IS_NULLABLE: 'YES', COLUMN_COMMENT: '' },
],
[{ name: 'TABLE_NAME' }, { name: 'COLUMN_NAME' }, { name: 'DATA_TYPE' }, { name: 'IS_NULLABLE' }],
);
}
if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes("CONSTRAINT_NAME = 'PRIMARY'")) {
return mysqlResult([{ TABLE_NAME: 'customers', COLUMN_NAME: 'id' }, { TABLE_NAME: 'orders', COLUMN_NAME: 'id' }], []);
}
if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes('REFERENCED_TABLE_NAME IS NOT NULL')) {
return mysqlResult(
[
{
TABLE_NAME: 'orders',
COLUMN_NAME: 'customer_id',
REFERENCED_TABLE_NAME: 'customers',
REFERENCED_COLUMN_NAME: 'id',
CONSTRAINT_NAME: 'orders_customer_id_fk',
},
],
[],
);
}
if (sql.includes('SELECT `id`, `status` FROM `analytics`.`orders` LIMIT 1')) {
return mysqlResult([{ id: 10, status: 'paid' }], [{ name: 'id', type: 3 }, { name: 'status', type: 253 }]);
}
if (sql.includes('select * from (select id, status from analytics.orders) as ktx_query_result limit 1')) {
return mysqlResult([{ id: 10, status: 'paid' }], [{ name: 'id', type: 3 }, { name: 'status', type: 253 }]);
}
if (sql.includes('SELECT `status` FROM `analytics`.`orders`')) {
return mysqlResult([{ status: 'paid' }, { status: 'open' }], [{ name: 'status', type: 253 }]);
}
if (sql.includes('COUNT(DISTINCT val)')) {
return mysqlResult([{ cardinality: 2 }], [{ name: 'cardinality', type: 8 }]);
}
if (sql.includes('SELECT DISTINCT CAST(`status` AS CHAR) AS val')) {
return mysqlResult([{ val: 'open' }, { val: 'paid' }], [{ name: 'val', type: 253 }]);
}
if (sql.includes('COUNT(*) AS count')) {
return mysqlResult([{ count: 2 }], [{ name: 'count', type: 8 }]);
}
if (sql.includes('INFORMATION_SCHEMA.SCHEMATA')) {
return mysqlResult([{ SCHEMA_NAME: 'analytics' }, { SCHEMA_NAME: 'warehouse' }], [{ name: 'SCHEMA_NAME' }]);
}
if (sql.trim() === 'SELECT 1') {
return mysqlResult([{ '1': 1 }], [{ name: '1', type: 8 }]);
}
throw new Error(`Unexpected SQL: ${sql} params=${JSON.stringify(params)}`);
});
const release = vi.fn();
const end = vi.fn(async () => undefined);
return {
createPool: vi.fn(() => ({
getConnection: vi.fn(async () => ({ query, release })),
end,
})),
};
}
function multiSchemaMysqlPoolFactory(
options: { primaryKeyError?: Error; foreignKeyError?: Error } = {},
): KtxMysqlPoolFactory {
const query = vi.fn(async (sql: string, params?: unknown): Promise<[RowDataPacket[], FieldPacket[]]> => {
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
expect(params).toEqual(['analytics', 'mart']);
return mysqlResult(
[
{
TABLE_SCHEMA: 'analytics',
TABLE_NAME: 'customers',
TABLE_TYPE: 'BASE TABLE',
TABLE_COMMENT: '',
TABLE_ROWS: 2,
},
{
TABLE_SCHEMA: 'mart',
TABLE_NAME: 'orders',
TABLE_TYPE: 'BASE TABLE',
TABLE_COMMENT: '',
TABLE_ROWS: 3,
},
],
[
{ name: 'TABLE_SCHEMA' },
{ name: 'TABLE_NAME' },
{ name: 'TABLE_TYPE' },
{ name: 'TABLE_COMMENT' },
{ name: 'TABLE_ROWS' },
],
);
}
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
expect(params).toEqual(['analytics', 'mart']);
return mysqlResult(
[
{
TABLE_SCHEMA: 'analytics',
TABLE_NAME: 'customers',
COLUMN_NAME: 'id',
DATA_TYPE: 'int',
IS_NULLABLE: 'NO',
COLUMN_COMMENT: '',
},
{
TABLE_SCHEMA: 'mart',
TABLE_NAME: 'orders',
COLUMN_NAME: 'id',
DATA_TYPE: 'int',
IS_NULLABLE: 'NO',
COLUMN_COMMENT: '',
},
],
[],
);
}
if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes("CONSTRAINT_NAME = 'PRIMARY'")) {
if (options.primaryKeyError) {
throw options.primaryKeyError;
}
expect(params).toEqual(['analytics', 'mart']);
return mysqlResult(
[
{ TABLE_SCHEMA: 'analytics', TABLE_NAME: 'customers', COLUMN_NAME: 'id' },
{ TABLE_SCHEMA: 'mart', TABLE_NAME: 'orders', COLUMN_NAME: 'id' },
],
[],
);
}
if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes('REFERENCED_TABLE_NAME IS NOT NULL')) {
if (options.foreignKeyError) {
throw options.foreignKeyError;
}
expect(params).toEqual(['analytics', 'mart']);
return mysqlResult([], []);
}
throw new Error(`Unexpected SQL: ${sql} params=${JSON.stringify(params)}`);
});
return {
createPool: vi.fn(() => ({
getConnection: vi.fn(async () => ({ query, release: vi.fn() })),
end: vi.fn(async () => undefined),
})),
};
}
describe('KtxMysqlScanConnector', () => {
it('resolves MySQL connection configuration safely', () => {
expect(isKtxMysqlConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(true);
expect(isKtxMysqlConnectionConfig({ driver: 'postgres', host: 'localhost', database: 'analytics' })).toBe(false);
expect(
mysqlConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: {
driver: 'mysql',
host: 'db.example.test',
port: 3307,
database: 'analytics',
username: 'reader',
password: 'secret', // pragma: allowlist secret
ssl: true,
},
}),
).toMatchObject({
host: 'db.example.test',
port: 3307,
database: 'analytics',
user: 'reader',
password: 'secret', // pragma: allowlist secret
ssl: { rejectUnauthorized: false },
});
});
it('defaults and validates MySQL maxConnections', () => {
const baseConnection: KtxMysqlConnectionConfig = {
driver: 'mysql',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'secret', // pragma: allowlist secret
};
expect(
mysqlConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: baseConnection,
}),
).toMatchObject({ connectionLimit: 10 });
expect(
mysqlConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: 25 },
}),
).toMatchObject({ connectionLimit: 25 });
expect(
mysqlConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: '12' as never },
}),
).toMatchObject({ connectionLimit: 12 });
for (const maxConnections of [0, -1, 1.5, Number.NaN, 'abc' as never]) {
expect(() =>
mysqlConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections },
}),
).toThrow('connections.warehouse.maxConnections must be a positive integer');
}
});
it('introspects schema, primary keys, comments, row counts, views, and foreign keys', async () => {
const connector = new KtxMysqlScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'mysql',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'secret', // pragma: allowlist secret
},
poolFactory: fakePoolFactory(),
now: () => new Date('2026-04-29T12:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'mysql' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'mysql',
extractedAt: '2026-04-29T12:00:00.000Z',
scope: { schemas: ['analytics'] },
metadata: {
database: 'analytics',
host: 'db.example.test',
table_count: 3,
total_columns: 6,
},
});
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([
['customers', 'table', 2, 'Customer table'],
['orders', 'table', 2, 'Order table'],
['order_summary', 'view', null, null],
]);
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
name: 'id',
nativeType: 'int',
normalizedType: 'int',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'PK',
});
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
{
fromColumn: 'customer_id',
toCatalog: null,
toDb: 'analytics',
toTable: 'customers',
toColumn: 'id',
constraintName: 'orders_customer_id_fk',
},
]);
});
it('introspects every configured MySQL schema scope', async () => {
const connector = new KtxMysqlScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'mysql',
host: 'db.example.test',
database: 'analytics',
schemas: ['analytics', 'mart'],
username: 'reader',
password: 'secret', // pragma: allowlist secret
},
poolFactory: multiSchemaMysqlPoolFactory(),
now: () => new Date('2026-05-21T10:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'mysql' },
{ runId: 'scan-run-1' },
);
expect(snapshot.scope).toEqual({ schemas: ['analytics', 'mart'] });
expect(snapshot.metadata).toMatchObject({ database: 'analytics', schemas: ['analytics', 'mart'] });
expect(snapshot.tables.map((table) => `${table.db}.${table.name}`)).toEqual([
'analytics.customers',
'mart.orders',
]);
});
it('soft-fails denied MySQL constraint discovery with one warning per schema and kind', async () => {
const connector = new KtxMysqlScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'mysql',
host: 'db.example.test',
database: 'analytics',
schemas: ['analytics', 'mart'],
username: 'reader',
password: 'secret', // pragma: allowlist secret
},
poolFactory: multiSchemaMysqlPoolFactory({
primaryKeyError: Object.assign(new Error('select command denied'), {
code: 'ER_TABLEACCESS_DENIED_ERROR',
errno: 1142,
}),
foreignKeyError: Object.assign(new Error('database access denied'), {
code: 'ER_DBACCESS_DENIED_ERROR',
errno: 1044,
}),
}),
now: () => new Date('2026-04-29T12:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'mysql' },
{ runId: 'scan-run-mysql-denied-constraints' },
);
expect(snapshot.warnings).toEqual([
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in analytics (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'analytics', kind: 'primary_key' },
},
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in mart (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'mart', kind: 'primary_key' },
},
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped foreign-key discovery in analytics (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'analytics', kind: 'foreign_key' },
},
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped foreign-key discovery in mart (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'mart', kind: 'foreign_key' },
},
]);
expect(snapshot.tables.every((table) => table.columns.every((column) => column.primaryKey === false))).toBe(true);
expect(snapshot.tables.every((table) => table.foreignKeys.length === 0)).toBe(true);
});
it('limits introspection to tables in tableScope', async () => {
const queries: Array<{ sql: string; params?: unknown }> = [];
const poolFactory: KtxMysqlPoolFactory = {
createPool: vi.fn(() => ({
getConnection: vi.fn(async () => ({
query: vi.fn(async (sql: string, params?: unknown): Promise<[RowDataPacket[], FieldPacket[]]> => {
queries.push({ sql, params });
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
return mysqlResult(
[
{
TABLE_SCHEMA: 'analytics',
TABLE_NAME: 'orders',
TABLE_TYPE: 'BASE TABLE',
TABLE_COMMENT: '',
TABLE_ROWS: 2,
},
],
[],
);
}
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
return mysqlResult(
[
{
TABLE_SCHEMA: 'analytics',
TABLE_NAME: 'orders',
COLUMN_NAME: 'id',
DATA_TYPE: 'int',
IS_NULLABLE: 'NO',
COLUMN_COMMENT: '',
},
],
[],
);
}
return mysqlResult([], []);
}),
release: vi.fn(),
})),
end: vi.fn(async () => undefined),
})),
};
const connector = new KtxMysqlScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'mysql',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'secret', // pragma: allowlist secret
},
poolFactory,
});
const scope = tableRefSet([{ catalog: null, db: 'analytics', name: 'orders' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'mysql', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
const tablesQuery = queries.find((query) => query.sql.includes('INFORMATION_SCHEMA.TABLES'));
expect(tablesQuery?.sql).toMatch(/TABLE_NAME IN \(\?\)/);
expect(tablesQuery?.params).toEqual(['analytics', 'orders']);
});
it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => {
const poolFactory = fakePoolFactory();
const connector = new KtxMysqlScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'mysql',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'secret', // pragma: allowlist secret
},
poolFactory,
});
await expect(
connector.sampleTable(
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'orders' }, columns: ['id', 'status'], limit: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toEqual({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1 });
await expect(
connector.sampleColumn(
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'orders' }, column: 'status', limit: 5 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
await expect(
connector.getColumnDistinctValues(
{ catalog: null, db: 'analytics', name: 'orders' },
'status',
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
),
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from analytics.orders', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 });
await expect(
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
await expect(connector.getTableRowCount('orders')).resolves.toBe(2);
await expect(connector.listSchemas()).resolves.toEqual(['analytics', 'warehouse']);
await expect(connector.columnStats(
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'orders' }, column: 'status' },
{ runId: 'scan-run-1' },
)).resolves.toBeNull();
await connector.cleanup();
});
it('adapts native MySQL snapshots to live-database introspection for local ingest', async () => {
const introspection = createMysqlLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'mysql',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'secret', // pragma: allowlist secret
},
},
poolFactory: fakePoolFactory(),
now: () => new Date('2026-04-29T12:00:00.000Z'),
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
extractedAt: '2026-04-29T12:00:00.000Z',
});
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
name: 'customers',
catalog: null,
db: 'analytics',
columns: [
{
name: 'id',
nativeType: 'int',
normalizedType: 'int',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'PK',
},
{
name: 'name',
nativeType: 'varchar',
normalizedType: 'varchar',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: null,
},
],
foreignKeys: [],
});
});
});

View file

@ -2,6 +2,7 @@ import mysql, { type FieldPacket, type Pool, type RowDataPacket } from 'mysql2/p
import { readFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import { getDialectForDriver } from '../../context/connections/dialects.js';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import {
constraintDiscoveryWarning,
@ -30,7 +31,6 @@ import {
type KtxTableSampleInput,
type KtxTableSampleResult,
} from '../../context/scan/types.js';
import { KtxMysqlDialect } from './dialect.js';
export interface KtxMysqlConnectionConfig {
driver?: string;
@ -303,6 +303,25 @@ function queryParams(params: Record<string, unknown> | unknown[] | undefined): u
return Array.isArray(params) ? params : Object.values(params);
}
/** @internal */
export function prepareMysqlReadOnlyQuery(
sql: string,
params?: Record<string, unknown>,
): { sql: string; params?: unknown[] } {
if (!params) {
return { sql, params: undefined };
}
const values: unknown[] = [];
const parameterizedQuery = sql.replace(/:([A-Za-z_][A-Za-z0-9_]*)\b/g, (placeholder, key: string) => {
if (!(key in params)) {
return placeholder;
}
values.push(params[key]);
return '?';
});
return { sql: parameterizedQuery, params: values };
}
export function isKtxMysqlConnectionConfig(
connection: KtxMysqlConnectionConfig | undefined,
): connection is KtxMysqlConnectionConfig {
@ -376,7 +395,7 @@ export class KtxMysqlScanConnector implements KtxScanConnector {
private readonly poolFactory: KtxMysqlPoolFactory;
private readonly endpointResolver?: KtxMysqlEndpointResolver;
private readonly now: () => Date;
private readonly dialect = new KtxMysqlDialect();
private readonly dialect = getDialectForDriver('mysql');
private pool: KtxMysqlPool | null = null;
private resolvedEndpoint: KtxMysqlResolvedEndpoint | null = null;
@ -550,7 +569,7 @@ export class KtxMysqlScanConnector implements KtxScanConnector {
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
const prepared = Array.isArray(input.params)
? { sql: limitedSql, params: input.params }
: this.dialect.prepareQuery(limitedSql, input.params);
: prepareMysqlReadOnlyQuery(limitedSql, input.params);
const result = await this.query(prepared.sql, prepared.params);
return { ...result, rowCount: result.rows.length };
}
@ -625,6 +644,7 @@ export class KtxMysqlScanConnector implements KtxScanConnector {
filterSchemas,
);
return rows.map((row) => ({
catalog: null,
schema: row.TABLE_SCHEMA,
name: row.TABLE_NAME,
kind: row.TABLE_TYPE === 'VIEW' ? ('view' as const) : ('table' as const),

View file

@ -1,49 +0,0 @@
import { describe, expect, it } from 'vitest';
import { KtxMysqlDialect } from './dialect.js';
describe('KtxMysqlDialect', () => {
const dialect = new KtxMysqlDialect();
it('quotes identifiers and formats database-qualified table names', () => {
expect(dialect.quoteIdentifier('orders')).toBe('`orders`');
expect(dialect.quoteIdentifier('odd`name')).toBe('`odd``name`');
expect(dialect.formatTableName({ catalog: null, db: 'analytics', name: 'orders' })).toBe(
'`analytics`.`orders`',
);
expect(dialect.formatTableName({ catalog: null, db: null, name: 'orders' })).toBe('`orders`');
});
it('maps native MySQL types to KTX dimension types', () => {
expect(dialect.mapToDimensionType('tinyint(1)')).toBe('boolean');
expect(dialect.mapToDimensionType('int')).toBe('number');
expect(dialect.mapToDimensionType('decimal(10,2)')).toBe('number');
expect(dialect.mapToDimensionType('timestamp')).toBe('time');
expect(dialect.mapToDimensionType('varchar(255)')).toBe('string');
expect(dialect.mapToDimensionType('json')).toBe('string');
expect(dialect.mapToDimensionType('')).toBe('string');
});
it('builds sampling, distinct-value, pagination, and time SQL', () => {
expect(dialect.generateSampleQuery('`analytics`.`orders`', 25, ['id', 'status'])).toBe(
'SELECT `id`, `status` FROM `analytics`.`orders` LIMIT 25',
);
expect(dialect.generateColumnSampleQuery('`analytics`.`orders`', 'status', 10)).toBe(
"SELECT `status` FROM `analytics`.`orders` WHERE `status` IS NOT NULL AND TRIM(CAST(`status` AS CHAR)) != '' LIMIT 10",
);
expect(dialect.generateDistinctValuesQuery('`analytics`.`orders`', '`status`', 5)).toContain(
'SELECT DISTINCT CAST(`status` AS CHAR) AS val',
);
expect(dialect.getLimitOffsetClause(10, 20)).toBe('LIMIT 10 OFFSET 20');
expect(dialect.getTimeTruncExpression('created_at', 'month')).toBe("DATE_FORMAT(created_at, '%Y-%m-01')");
});
it('prepares named parameters in deterministic SQL placeholder order', () => {
expect(dialect.prepareQuery('select * from orders where id = :id and status = :status', {
status: 'paid',
id: 10,
})).toEqual({
sql: 'select * from orders where id = ? and status = ?',
params: [10, 'paid'],
});
});
});

View file

@ -1,9 +1,18 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type MysqlTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxMysqlDialect {
readonly type = 'mysql';
/** @internal */
export class KtxMysqlDialect implements KtxDialect {
readonly type = 'mysql' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
datetime: 'time',
@ -41,9 +50,19 @@ export class KtxMysqlDialect {
}
formatTableName(table: MysqlTableNameRef): string {
return table.db
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
: this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'ansi');
}
formatDisplayRef(table: MysqlTableNameRef): string {
return formatDialectDisplayRef(table, 'ansi');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'ansi');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('ansi');
}
mapDataType(nativeType: string): string {
@ -91,21 +110,6 @@ export class KtxMysqlDialect {
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS CHAR)) != '' LIMIT ${limit}`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown[] } {
if (!params) {
return { sql, params: undefined };
}
const values: unknown[] = [];
const parameterizedQuery = sql.replace(/:([A-Za-z_][A-Za-z0-9_]*)\b/g, (placeholder, key: string) => {
if (!(key in params)) {
return placeholder;
}
values.push(params[key]);
return '?';
});
return { sql: parameterizedQuery, params: values };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
@ -118,7 +122,11 @@ export class KtxMysqlDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -129,6 +137,18 @@ export class KtxMysqlDialect {
return `COUNT(DISTINCT ${column})`;
}
textLengthExpression(columnSql: string): string {
return `CHAR_LENGTH(CAST(${columnSql} AS CHAR))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS CHAR)`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT GROUP_CONCAT(CAST(value AS CHAR) SEPARATOR CHAR(31)) FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
SELECT COUNT(DISTINCT val) AS cardinality
@ -167,36 +187,4 @@ export class KtxMysqlDialect {
) AS sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const col = timezone ? `CONVERT_TZ(${column}, '+00:00', '${timezone}')` : column;
switch (granularity) {
case 'day':
return `DATE(${col})`;
case 'week':
return `DATE(${col} - INTERVAL WEEKDAY(${col}) DAY)`;
case 'month':
return `DATE_FORMAT(${col}, '%Y-%m-01')`;
case 'quarter':
return `MAKEDATE(YEAR(${col}), 1) + INTERVAL (QUARTER(${col}) - 1) QUARTER`;
case 'year':
return `DATE_FORMAT(${col}, '%Y-01-01')`;
}
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `CONVERT_TZ(${column}, '+00:00', '${timezone}')` : column;
const [amount, unit] = interval.split(' ');
const originExpr = origin ? `'${origin}'` : `'1970-01-01'`;
return `DATE_ADD(${originExpr}, INTERVAL FLOOR(TIMESTAMPDIFF(${unit!.toUpperCase()}, ${originExpr}, ${col}) / ${amount}) * ${amount} ${unit!.toUpperCase()})`;
}
parseIntervalToSql(interval: string): string {
const [amount, unit] = interval.split(' ');
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
}
}

View file

@ -1,559 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { createPostgresLiveDatabaseIntrospection } from '../../connectors/postgres/live-database-introspection.js';
import { isKtxPostgresConnectionConfig, KtxPostgresScanConnector, postgresPoolConfigFromConfig, type KtxPostgresConnectionConfig, type KtxPostgresPoolFactory } from '../../connectors/postgres/connector.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
interface FakeQueryResult {
rows: Record<string, unknown>[];
fields?: Array<{ name: string; dataTypeID: number }>;
}
type FakeQueryResponse = FakeQueryResult | Error;
function fakePoolFactory(results: Map<string, FakeQueryResponse>): KtxPostgresPoolFactory {
const query = vi.fn(async (sql: string, params?: unknown[]) => {
const normalized = sql.replace(/\s+/g, ' ').trim();
for (const [key, value] of results.entries()) {
if (normalized.includes(key)) {
if (value instanceof Error) {
throw value;
}
return value;
}
}
throw new Error(`Unexpected SQL: ${normalized} params=${JSON.stringify(params ?? [])}`);
});
return {
createPool() {
return {
async connect() {
return {
query,
release: vi.fn(),
};
},
end: vi.fn(async () => undefined),
};
},
};
}
function metadataResults(): Map<string, FakeQueryResponse> {
return new Map<string, FakeQueryResponse>([
[
'FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n',
{
rows: [
{ table_name: 'customers', table_kind: 'r', row_count: '2', table_comment: 'Customers' },
{ table_name: 'orders', table_kind: 'r', row_count: '3', table_comment: null },
{ table_name: 'recent_orders', table_kind: 'v', row_count: '0', table_comment: 'Recent orders' },
],
},
],
[
'FROM pg_catalog.pg_attribute a JOIN pg_catalog.pg_class c',
{
rows: [
{ table_name: 'customers', column_name: 'id', data_type: 'integer', is_nullable: false, column_comment: null },
{ table_name: 'customers', column_name: 'name', data_type: 'text', is_nullable: false, column_comment: 'Name' },
{ table_name: 'orders', column_name: 'id', data_type: 'integer', is_nullable: false, column_comment: null },
{ table_name: 'orders', column_name: 'customer_id', data_type: 'integer', is_nullable: false, column_comment: null },
{ table_name: 'orders', column_name: 'status', data_type: 'text', is_nullable: true, column_comment: null },
{ table_name: 'recent_orders', column_name: 'id', data_type: 'integer', is_nullable: true, column_comment: null },
],
},
],
[
"tc.constraint_type = 'FOREIGN KEY'",
{
rows: [
{
table_name: 'orders',
column_name: 'customer_id',
foreign_table_schema: 'public',
foreign_table_name: 'customers',
foreign_column_name: 'id',
constraint_name: 'orders_customer_id_fkey',
},
],
},
],
[
"tc.constraint_type = 'PRIMARY KEY'",
{
rows: [
{ table_name: 'customers', column_name: 'id' },
{ table_name: 'orders', column_name: 'id' },
],
},
],
['SELECT "id" FROM "public"."orders" LIMIT 1', { rows: [{ id: 10 }], fields: [{ name: 'id', dataTypeID: 23 }] }],
[
'SELECT "status" FROM "public"."orders" WHERE "status" IS NOT NULL',
{ rows: [{ status: 'paid' }, { status: 'open' }], fields: [{ name: 'status', dataTypeID: 25 }] },
],
['COUNT(DISTINCT val) AS cardinality', { rows: [{ cardinality: '2' }] }],
['SELECT DISTINCT "status"::text AS val', { rows: [{ val: 'open' }, { val: 'paid' }] }],
['SELECT COUNT(*) AS count FROM "public"."orders"', { rows: [{ count: '3' }] }],
['FROM pg_stats s', { rows: [{ column_name: 'status', estimated_cardinality: '2' }] }],
['SELECT 1', { rows: [{ '?column?': 1 }], fields: [{ name: '?column?', dataTypeID: 23 }] }],
['SELECT schema_name FROM information_schema.schemata', { rows: [{ schema_name: 'public' }] }],
]);
}
describe('KtxPostgresScanConnector', () => {
it('resolves configuration safely', () => {
expect(isKtxPostgresConnectionConfig({ driver: 'postgres', url: 'env:DATABASE_URL' })).toBe(true);
expect(isKtxPostgresConnectionConfig({ driver: 'postgresql', host: 'db', database: 'analytics' })).toBe(false);
expect(isKtxPostgresConnectionConfig({ driver: 'mysql', host: 'db' })).toBe(false);
expect(
postgresPoolConfigFromConfig({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schemas: ['analytics', 'public'],
ssl: true,
rejectUnauthorized: false,
},
}),
).toMatchObject({
host: 'db.example.test',
port: 5432,
database: 'analytics',
user: 'reader',
password: 'test-password', // pragma: allowlist secret
options: '-c search_path=analytics,public',
ssl: { rejectUnauthorized: false },
});
const libpqPreferConfig = postgresPoolConfigFromConfig({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
url: 'env:DEMO_DATABASE_URL',
},
env: {
DEMO_DATABASE_URL: 'postgresql://reader@demo.example.test:5432/demo?sslmode=prefer',
},
});
expect(libpqPreferConfig).toMatchObject({
host: 'demo.example.test',
port: 5432,
database: 'demo',
user: 'reader',
});
expect(libpqPreferConfig).not.toHaveProperty('connectionString');
expect(libpqPreferConfig).not.toHaveProperty('ssl');
expect(
postgresPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { driver: 'postgres', host: 'db.example.test', database: 'analytics', username: 'reader' },
}),
).toMatchObject({
host: 'db.example.test',
database: 'analytics',
user: 'reader',
});
});
it('defaults and validates Postgres maxConnections', () => {
const baseConnection: KtxPostgresConnectionConfig = {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
};
expect(
postgresPoolConfigFromConfig({
connectionId: 'warehouse',
connection: baseConnection,
}),
).toMatchObject({ max: 10 });
expect(
postgresPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: 50 },
}),
).toMatchObject({ max: 50 });
expect(
postgresPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: '12' as never },
}),
).toMatchObject({ max: 12 });
for (const maxConnections of [0, -1, 1.5, Number.NaN, 'abc' as never]) {
expect(() =>
postgresPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections },
}),
).toThrow('connections.warehouse.maxConnections must be a positive integer');
}
});
it('introspects schemas, tables, views, primary keys, comments, row counts, and foreign keys', async () => {
const connector = new KtxPostgresScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
},
poolFactory: fakePoolFactory(metadataResults()),
now: () => new Date('2026-04-29T10:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'postgres' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'postgres',
extractedAt: '2026-04-29T10:00:00.000Z',
scope: { schemas: ['public'] },
metadata: {
database: 'analytics',
schemas: ['public'],
host: 'db.example.test',
table_count: 3,
total_columns: 6,
},
});
expect(snapshot.tables.map((table) => [table.db, table.name, table.kind, table.estimatedRows])).toEqual([
['public', 'customers', 'table', 2],
['public', 'orders', 'table', 3],
['public', 'recent_orders', 'view', null],
]);
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
});
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
{
fromColumn: 'customer_id',
toCatalog: null,
toDb: 'public',
toTable: 'customers',
toColumn: 'id',
constraintName: 'orders_customer_id_fkey',
},
]);
});
it('soft-fails denied Postgres constraint discovery with scan warnings', async () => {
const results = metadataResults();
results.set(
"tc.constraint_type = 'PRIMARY KEY'",
Object.assign(new Error('permission denied for information_schema'), { code: '42501' }),
);
results.set(
"tc.constraint_type = 'FOREIGN KEY'",
Object.assign(new Error('relation information_schema.key_column_usage does not exist'), { code: '42P01' }),
);
const connector = new KtxPostgresScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
},
poolFactory: fakePoolFactory(results),
now: () => new Date('2026-04-29T10:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'postgres' },
{ runId: 'scan-run-denied-constraints' },
);
expect(snapshot.warnings).toEqual([
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in public (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'public', kind: 'primary_key' },
},
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped foreign-key discovery in public (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'public', kind: 'foreign_key' },
},
]);
expect(snapshot.tables.every((table) => table.columns.every((column) => column.primaryKey === false))).toBe(true);
expect(snapshot.tables.every((table) => table.foreignKeys.length === 0)).toBe(true);
});
it('propagates non-denial Postgres constraint discovery errors', async () => {
const results = metadataResults();
const resetError = Object.assign(new Error('connection reset'), { code: 'ECONNRESET' });
results.set("tc.constraint_type = 'PRIMARY KEY'", resetError);
const connector = new KtxPostgresScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
},
poolFactory: fakePoolFactory(results),
});
await expect(
connector.introspect({ connectionId: 'warehouse', driver: 'postgres' }, { runId: 'scan-run-network-error' }),
).rejects.toBe(resetError);
});
it('runs samples, distinct values, statistics, read-only SQL, and schema listing', async () => {
const connector = new KtxPostgresScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
},
poolFactory: fakePoolFactory(metadataResults()),
});
await expect(
connector.sampleTable(
{ connectionId: 'warehouse', table: { catalog: null, db: 'public', name: 'orders' }, columns: ['id'], limit: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toEqual({ headers: ['id'], headerTypes: ['integer'], rows: [[10]], totalRows: 1 });
await expect(
connector.sampleColumn(
{ connectionId: 'warehouse', table: { catalog: null, db: 'public', name: 'orders' }, column: 'status', limit: 5 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
await expect(
connector.getColumnDistinctValues(
{ catalog: null, db: 'public', name: 'orders' },
'status',
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
),
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
await expect(connector.getColumnStatistics({ catalog: null, db: 'public', name: 'orders' })).resolves.toEqual({
cardinalityByColumn: new Map([['status', 2]]),
});
await expect(connector.getTableRowCount({ db: 'public', name: 'orders' })).resolves.toBe(3);
await expect(connector.listSchemas()).resolves.toEqual(['public']);
await expect(connector.testConnection()).resolves.toEqual({ success: true });
await expect(
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
});
it('limits introspection to tables in tableScope', async () => {
const queries: Array<{ sql: string; params?: unknown[] }> = [];
const poolFactory: KtxPostgresPoolFactory = {
createPool() {
return {
async connect() {
return {
query: vi.fn(async (sql: string, params?: unknown[]) => {
queries.push({ sql, params });
if (sql.includes('FROM pg_catalog.pg_class c')) {
return { rows: [{ table_name: 'orders', table_kind: 'r', row_count: '3', table_comment: null }] };
}
if (sql.includes('FROM pg_catalog.pg_attribute a')) {
return {
rows: [
{
table_name: 'orders',
column_name: 'id',
data_type: 'integer',
is_nullable: false,
column_comment: null,
},
],
};
}
return { rows: [] };
}),
release: vi.fn(),
};
},
end: vi.fn(async () => undefined),
};
},
};
const connector = new KtxPostgresScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
},
poolFactory,
});
const scope = tableRefSet([{ catalog: null, db: 'public', name: 'orders' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'postgres', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
const tablesQuery = queries.find((query) => query.sql.includes('FROM pg_catalog.pg_class c'));
expect(tablesQuery?.sql).toMatch(/c\.relname = ANY\(\$2\)/);
expect(tablesQuery?.params).toEqual(['public', ['orders']]);
});
it('adapts native PostgreSQL snapshots to live-database introspection for local ingest', async () => {
const introspection = createPostgresLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
},
},
poolFactory: fakePoolFactory(metadataResults()),
now: () => new Date('2026-04-29T10:00:00.000Z'),
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
extractedAt: '2026-04-29T10:00:00.000Z',
});
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
name: 'customers',
catalog: null,
db: 'public',
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
{
name: 'name',
nativeType: 'text',
normalizedType: 'text',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: 'Name',
},
],
foreignKeys: [],
});
});
it('does not end the pool before introspection completes', async () => {
let endCalled = false;
const endAwarePoolFactory: KtxPostgresPoolFactory = {
createPool() {
const inner = fakePoolFactory(metadataResults()).createPool({
max: 1,
idleTimeoutMillis: 1,
connectionTimeoutMillis: 1,
});
return {
async connect() {
if (endCalled) {
throw new Error('Cannot use a pool after calling end on the pool');
}
return inner.connect();
},
async end() {
endCalled = true;
return inner.end();
},
};
},
};
const introspection = createPostgresLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
},
},
poolFactory: endAwarePoolFactory,
now: () => new Date('2026-04-29T10:00:00.000Z'),
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot.tables.length).toBeGreaterThan(0);
expect(endCalled).toBe(true);
});
it('attaches an error listener to the pg pool', async () => {
const on = vi.fn();
const poolFactory: KtxPostgresPoolFactory = {
createPool() {
return {
on,
async connect() {
return {
query: vi.fn(async () => ({ rows: [{ '?column?': 1 }], fields: [{ name: '?column?', dataTypeID: 23 }] })),
release: vi.fn(),
};
},
end: vi.fn(async () => undefined),
};
},
};
const connector = new KtxPostgresScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
},
poolFactory,
});
await expect(connector.testConnection()).resolves.toEqual({ success: true });
expect(on).toHaveBeenCalledWith('error', expect.any(Function));
});
});

View file

@ -1,6 +1,7 @@
import { readFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import { getDialectForDriver } from '../../context/connections/dialects.js';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
@ -26,7 +27,6 @@ import {
type KtxTableSampleResult,
} from '../../context/scan/types.js';
import { Pool } from 'pg';
import { KtxPostgresDialect } from './dialect.js';
const PG_OID_TYPE_MAP: Record<number, string> = {
16: 'boolean',
@ -219,6 +219,29 @@ function groupByTable<T extends { table_name: string }>(rows: T[]): Map<string,
return grouped;
}
/** @internal */
export function preparePostgresReadOnlyQuery(
sql: string,
params?: Record<string, unknown>,
): { sql: string; params?: unknown[] } {
if (!params) {
return { sql, params: undefined };
}
const paramNames = Object.keys(params);
const values: unknown[] = new Array(paramNames.length);
const paramIndexMap = new Map<string, number>();
paramNames.forEach((name, index) => {
paramIndexMap.set(name, index + 1);
values[index] = params[name];
});
const sortedKeys = [...paramNames].sort((a, b) => b.length - a.length);
let parameterizedQuery = sql;
for (const name of sortedKeys) {
parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${name}\\b`, 'g'), `$${paramIndexMap.get(name)}`);
}
return { sql: parameterizedQuery, params: values };
}
function primaryKeyMap(rows: PostgresPrimaryKeyRow[]): Map<string, Set<string>> {
const grouped = new Map<string, Set<string>>();
for (const row of rows) {
@ -400,7 +423,7 @@ export class KtxPostgresScanConnector implements KtxScanConnector {
private readonly poolFactory: KtxPostgresPoolFactory;
private readonly endpointResolver?: KtxPostgresEndpointResolver;
private readonly now: () => Date;
private readonly dialect = new KtxPostgresDialect();
private readonly dialect = getDialectForDriver('postgres');
private pool: KtxPostgresPool | null = null;
private lastIdlePoolError: Error | null = null;
private resolvedEndpoint: KtxPostgresResolvedEndpoint | null = null;
@ -489,7 +512,7 @@ export class KtxPostgresScanConnector implements KtxScanConnector {
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
const prepared = Array.isArray(input.params)
? { sql: limitedSql, params: input.params }
: this.dialect.prepareQuery(limitedSql, input.params);
: preparePostgresReadOnlyQuery(limitedSql, input.params);
const result = await this.query(prepared.sql, prepared.params);
return { ...result, rowCount: result.rows.length };
}
@ -584,6 +607,7 @@ export class KtxPostgresScanConnector implements KtxScanConnector {
[filterSchemas],
);
return rows.map((row) => ({
catalog: null,
schema: row.schema_name,
name: row.table_name,
kind: row.table_kind === 'v' ? ('view' as const) : ('table' as const),

View file

@ -1,52 +0,0 @@
import { describe, expect, it } from 'vitest';
import { KtxPostgresDialect } from './dialect.js';
describe('KtxPostgresDialect', () => {
const dialect = new KtxPostgresDialect();
it('quotes identifiers and formats schema-qualified tables', () => {
expect(dialect.quoteIdentifier('order"items')).toBe('"order""items"');
expect(dialect.formatTableName({ catalog: null, db: 'public', name: 'orders' })).toBe('"public"."orders"');
expect(dialect.formatTableName({ catalog: null, db: null, name: 'orders' })).toBe('"orders"');
});
it('maps native PostgreSQL types to KTX dimension types', () => {
expect(dialect.mapToDimensionType('timestamp with time zone')).toBe('time');
expect(dialect.mapToDimensionType('numeric(12,2)')).toBe('number');
expect(dialect.mapToDimensionType('uuid')).toBe('string');
expect(dialect.mapToDimensionType('boolean')).toBe('boolean');
expect(dialect.mapToDimensionType('jsonb')).toBe('string');
});
it('generates sample, distinct-value, statistics, and time SQL', () => {
expect(dialect.generateSampleQuery('"public"."orders"', 5, ['id', 'status'])).toBe(
'SELECT "id", "status" FROM "public"."orders" LIMIT 5',
);
expect(dialect.generateColumnSampleQuery('"public"."orders"', 'status', 10)).toContain(
'TRIM(CAST("status" AS TEXT)) != \'\'',
);
expect(dialect.generateDistinctValuesQuery('"public"."orders"', '"status"', 20)).toContain(
'SELECT DISTINCT "status"::text AS val',
);
expect(dialect.generateColumnStatisticsQuery('public', 'orders')).toContain('FROM pg_stats s');
expect(dialect.getTimeTruncExpression('"created_at"', 'month')).toBe('DATE_TRUNC(\'month\', "created_at")');
});
it('prepares named parameters with PostgreSQL positional parameters', () => {
expect(
dialect.prepareQuery('select * from orders where id = :id and status = :status', { id: 1, status: 'paid' }),
).toEqual({
sql: 'select * from orders where id = $1 and status = $2',
params: [1, 'paid'],
});
expect(
dialect.prepareQuery('select :Client_Name_10, :Client_Name_1', {
Client_Name_1: 'short',
Client_Name_10: 'long',
}),
).toEqual({
sql: 'select $2, $1',
params: ['short', 'long'],
});
});
});

View file

@ -1,9 +1,18 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type PostgresTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxPostgresDialect {
readonly type = 'postgresql';
/** @internal */
export class KtxPostgresDialect implements KtxDialect {
readonly type = 'postgres' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
timestamp: 'time',
@ -45,9 +54,19 @@ export class KtxPostgresDialect {
}
formatTableName(table: PostgresTableNameRef): string {
return table.db
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
: this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'ansi');
}
formatDisplayRef(table: PostgresTableNameRef): string {
return formatDialectDisplayRef(table, 'ansi');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'ansi');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('ansi');
}
mapDataType(nativeType: string): string {
@ -92,25 +111,6 @@ export class KtxPostgresDialect {
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS TEXT)) != '' LIMIT ${limit}`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown[] } {
if (!params) {
return { sql, params: undefined };
}
const paramNames = Object.keys(params);
const values: unknown[] = new Array(paramNames.length);
const paramIndexMap = new Map<string, number>();
paramNames.forEach((name, index) => {
paramIndexMap.set(name, index + 1);
values[index] = params[name];
});
const sortedKeys = [...paramNames].sort((a, b) => b.length - a.length);
let parameterizedQuery = sql;
for (const name of sortedKeys) {
parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${name}\\b`, 'g'), `$${paramIndexMap.get(name)}`);
}
return { sql: parameterizedQuery, params: values };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
@ -126,7 +126,11 @@ export class KtxPostgresDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -137,6 +141,18 @@ export class KtxPostgresDialect {
return `COUNT(DISTINCT ${column})`;
}
textLengthExpression(columnSql: string): string {
return `LENGTH(CAST(${columnSql} AS TEXT))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS TEXT)`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT STRING_AGG(CAST(value AS TEXT), CHR(31)) FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
@ -191,23 +207,4 @@ export class KtxPostgresDialect {
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column;
return `DATE_TRUNC('${granularity}', ${col})`;
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column;
const originExpr = origin ? `TIMESTAMP '${origin.replace(/'/g, "''")}'` : "TIMESTAMP '1970-01-01'";
return `${originExpr} + FLOOR(EXTRACT(EPOCH FROM (${col} - ${originExpr})) / EXTRACT(EPOCH FROM INTERVAL '${interval.replace(/'/g, "''")}')) * INTERVAL '${interval.replace(/'/g, "''")}'`;
}
parseIntervalToSql(interval: string): string {
return `INTERVAL '${interval.replace(/'/g, "''")}'`;
}
}

View file

@ -1,49 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { KtxPostgresHistoricSqlQueryClient } from './historic-sql-query-client.js';
import type { KtxPostgresPoolConfig, KtxPostgresPoolFactory } from './connector.js';
describe('KtxPostgresHistoricSqlQueryClient', () => {
it('executes parameterized read-only SQL through the native Postgres connector pool', async () => {
const queryCalls: Array<{ sql: string; params?: unknown[] }> = [];
const release = vi.fn();
const end = vi.fn(async () => {});
const poolFactory: KtxPostgresPoolFactory = {
createPool(_config: KtxPostgresPoolConfig) {
return {
async connect() {
return {
async query(sql: string, params?: unknown[]) {
queryCalls.push({ sql, params });
return {
fields: [{ name: 'answer', dataTypeID: 23 }],
rows: [{ answer: 42 }],
};
},
release,
};
},
end,
};
},
};
const client = new KtxPostgresHistoricSqlQueryClient({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
url: 'postgresql://readonly:secret@pg.example.test/warehouse', // pragma: allowlist secret
},
poolFactory,
});
await expect(client.executeQuery('SELECT $1::int AS answer', [42])).resolves.toEqual({
headers: ['answer'],
rows: [[42]],
totalRows: 1,
});
expect(queryCalls).toEqual([{ sql: 'SELECT $1::int AS answer', params: [42] }]);
await client.cleanup();
expect(release).toHaveBeenCalledTimes(1);
expect(end).toHaveBeenCalledTimes(1);
});
});

View file

@ -1,620 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
const createPool = vi.hoisted(() => vi.fn());
vi.mock('snowflake-sdk', () => ({
default: { createPool },
createPool,
}));
import { createSnowflakeLiveDatabaseIntrospection } from '../../connectors/snowflake/live-database-introspection.js';
import { isKtxSnowflakeConnectionConfig, KtxSnowflakeScanConnector, snowflakeConnectionConfigFromConfig, type KtxSnowflakeConnectionConfig, type KtxSnowflakeDriver, type KtxSnowflakeDriverFactory } from '../../connectors/snowflake/connector.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
function fakeDriverFactory(): KtxSnowflakeDriverFactory {
const driver: KtxSnowflakeDriver = {
test: vi.fn(async () => ({ success: true })),
query: vi.fn(async (sql: string) => {
if (sql.includes('TABLE_CONSTRAINTS')) {
return { headers: ['TABLE_NAME', 'COLUMN_NAME'], rows: [['ORDERS', 'ID']], totalRows: 1, rowCount: 1 };
}
if (sql.includes('SELECT "ID", "STATUS" FROM "ANALYTICS"."PUBLIC"."ORDERS"')) {
return {
headers: ['ID', 'STATUS'],
headerTypes: ['NUMBER', 'VARCHAR'],
rows: [[1, 'paid']],
totalRows: 1,
rowCount: 1,
};
}
if (sql.includes('select * from (select ID, STATUS from ORDERS) as ktx_query_result limit 1')) {
return { headers: ['ID', 'STATUS'], rows: [[1, 'paid']], totalRows: 1, rowCount: 1 };
}
if (sql.includes('SELECT "STATUS" FROM "ANALYTICS"."PUBLIC"."ORDERS"')) {
return { headers: ['STATUS'], rows: [['paid'], ['open']], totalRows: 2, rowCount: 2 };
}
if (sql.includes('COUNT(DISTINCT val)')) {
return { headers: ['CARDINALITY'], rows: [[2]], totalRows: 1, rowCount: 1 };
}
if (sql.includes('SELECT DISTINCT "STATUS"::VARCHAR AS val')) {
return { headers: ['VAL'], rows: [['open'], ['paid']], totalRows: 2, rowCount: 2 };
}
throw new Error(`Unexpected SQL: ${sql}`);
}),
getSchemaMetadata: vi.fn(async () => [
{
name: 'ORDERS',
catalog: 'ANALYTICS',
db: 'PUBLIC',
rowCount: 12,
comment: 'Orders',
columns: [
{ name: 'ID', type: 'NUMBER(38,0)', nullable: false, comment: 'Primary key' },
{ name: 'STATUS', type: 'VARCHAR', nullable: true, comment: null },
],
},
{
name: 'ORDER_SUMMARY',
catalog: 'ANALYTICS',
db: 'PUBLIC',
rowCount: 3,
comment: null,
columns: [{ name: 'STATUS', type: 'VARCHAR', nullable: true, comment: null }],
},
]),
listSchemas: vi.fn(async () => ['PUBLIC', 'MART']),
listTables: vi.fn(async () => [
{ schema: 'PUBLIC', name: 'ORDERS', kind: 'table' as const },
{ schema: 'PUBLIC', name: 'ORDER_SUMMARY', kind: 'view' as const },
]),
cleanup: vi.fn(async () => undefined),
};
return { createDriver: vi.fn(() => driver) };
}
function fakeSnowflakeStatement(headers: string[] = ['ONE']) {
return {
getColumns: () => headers.map((header) => ({ getName: () => header, getType: () => 'TEXT' })),
};
}
function installSnowflakePoolMock() {
const executedSql: string[] = [];
const connection = {
execute: vi.fn(
(input: {
sqlText: string;
complete: (
error: Error | null,
statement: ReturnType<typeof fakeSnowflakeStatement>,
rows: Array<Record<string, unknown>>,
) => void;
}) => {
executedSql.push(input.sqlText);
input.complete(null, fakeSnowflakeStatement(), [{ ONE: 1 }]);
},
),
};
const pool = {
use: vi.fn(async (fn: (conn: typeof connection) => Promise<unknown>) => fn(connection)),
drain: vi.fn(async () => undefined),
clear: vi.fn(async () => undefined),
};
createPool.mockReturnValue(pool);
return { connection, pool, executedSql };
}
describe('KtxSnowflakeScanConnector', () => {
it('resolves Snowflake connection configuration safely', () => {
expect(
isKtxSnowflakeConnectionConfig({
driver: 'snowflake',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
username: 'reader',
}),
).toBe(true);
expect(isKtxSnowflakeConnectionConfig({ driver: 'bigquery' })).toBe(false);
expect(
snowflakeConnectionConfigFromConfig({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
}),
).toMatchObject({
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schemas: ['PUBLIC'],
username: 'reader',
authMethod: 'password',
});
});
it('defaults and validates Snowflake maxConnections', () => {
const baseConnection: KtxSnowflakeConnectionConfig = {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
};
expect(
snowflakeConnectionConfigFromConfig({
connectionId: 'warehouse',
connection: baseConnection,
}),
).toMatchObject({ maxConnections: 4 });
expect(
snowflakeConnectionConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: 8 },
}),
).toMatchObject({ maxConnections: 8 });
expect(
snowflakeConnectionConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: '12' as never },
}),
).toMatchObject({ maxConnections: 12 });
for (const maxConnections of [0, -1, 1.5, Number.NaN, 'abc' as never]) {
expect(() =>
snowflakeConnectionConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections },
}),
).toThrow('connections.warehouse.maxConnections must be a positive integer');
}
});
it('rejects stale Snowflake pool config key', () => {
const baseConnection: KtxSnowflakeConnectionConfig = {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
};
expect(() =>
snowflakeConnectionConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxSessions: 8 },
}),
).toThrow(/renamed to maxConnections/);
});
it('uses one lazy Snowflake pool and drains it during cleanup', async () => {
const { pool, executedSql } = installSnowflakePoolMock();
const close = vi.fn(async () => undefined);
const connector = new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
role: 'ANALYST',
maxConnections: 3,
},
sdkOptionsProvider: {
resolve: vi.fn(async () => ({ sdkOptions: { application: 'ktx-test' }, close })),
},
});
expect(createPool).not.toHaveBeenCalled();
await connector.executeReadOnly({ connectionId: 'warehouse', sql: 'select 1', maxRows: 1 }, { runId: 'run-1' });
await connector.executeReadOnly({ connectionId: 'warehouse', sql: 'select 1', maxRows: 1 }, { runId: 'run-1' });
expect(createPool).toHaveBeenCalledTimes(1);
expect(createPool).toHaveBeenCalledWith(
expect.objectContaining({
account: 'acct',
username: 'reader',
warehouse: 'WH',
database: 'ANALYTICS',
schema: 'PUBLIC',
role: 'ANALYST',
password: 'fixture-pass', // pragma: allowlist secret
clientSessionKeepAlive: true,
clientSessionKeepAliveHeartbeatFrequency: 900,
application: 'ktx-test',
}),
expect.objectContaining({
min: 0,
max: 3,
evictionRunIntervalMillis: 30_000,
acquireTimeoutMillis: 60_000,
}),
);
expect(pool.use).toHaveBeenCalledTimes(2);
expect(executedSql.some((sql) => /^USE\s+/i.test(sql.trim()))).toBe(false);
await connector.cleanup();
expect(pool.drain).toHaveBeenCalledBefore(pool.clear);
expect(pool.clear).toHaveBeenCalledTimes(1);
expect(close).toHaveBeenCalledTimes(1);
});
it('introspects schema, primary keys, comments, row counts, and dimensions', async () => {
const connector = new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
driverFactory: fakeDriverFactory(),
now: () => new Date('2026-04-29T18:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'snowflake' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'snowflake',
extractedAt: '2026-04-29T18:00:00.000Z',
scope: { catalogs: ['ANALYTICS'], schemas: ['PUBLIC'] },
metadata: {
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schemas: ['PUBLIC'],
table_count: 2,
total_columns: 3,
},
});
expect(snapshot.tables.find((table) => table.name === 'ORDERS')?.columns).toEqual([
{
name: 'ID',
nativeType: 'NUMBER(38,0)',
normalizedType: 'NUMBER(38,0)',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'Primary key',
},
{
name: 'STATUS',
nativeType: 'VARCHAR',
normalizedType: 'VARCHAR',
dimensionType: 'string',
nullable: true,
primaryKey: false,
comment: null,
},
]);
});
it('continues introspection when primary-key discovery is not authorized', async () => {
const driverFactory = fakeDriverFactory();
const driver = (driverFactory.createDriver as ReturnType<typeof vi.fn>).getMockImplementation() as
| (() => KtxSnowflakeDriver)
| undefined;
if (!driver) throw new Error('driver mock missing');
const built = driver();
(built.query as ReturnType<typeof vi.fn>).mockImplementation(async (sql: string) => {
if (sql.includes('TABLE_CONSTRAINTS')) {
throw new Error(
"SQL compilation error: Object 'ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE' does not exist or not authorized.",
);
}
throw new Error(`Unexpected SQL: ${sql}`);
});
(driverFactory.createDriver as ReturnType<typeof vi.fn>).mockReturnValue(built);
const warn = vi.spyOn(console, 'warn').mockImplementation(() => undefined);
try {
const connector = new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
driverFactory,
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'snowflake' },
{ runId: 'scan-run-pk-skip' },
);
expect(snapshot.tables.map((table) => table.name).sort()).toEqual(['ORDERS', 'ORDER_SUMMARY']);
expect(snapshot.tables.every((table) => table.columns.every((column) => column.primaryKey === false))).toBe(true);
expect(snapshot.warnings).toEqual([
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in PUBLIC (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'PUBLIC', kind: 'primary_key' },
},
]);
expect(warn).not.toHaveBeenCalled();
} finally {
warn.mockRestore();
}
});
it('propagates non-denial Snowflake primary-key discovery errors', async () => {
const driverFactory = fakeDriverFactory();
const driver = (driverFactory.createDriver as ReturnType<typeof vi.fn>).getMockImplementation() as
| (() => KtxSnowflakeDriver)
| undefined;
if (!driver) throw new Error('driver mock missing');
const built = driver();
const networkError = new Error('network unavailable');
(built.query as ReturnType<typeof vi.fn>).mockImplementation(async (sql: string) => {
if (sql.includes('TABLE_CONSTRAINTS')) {
throw networkError;
}
throw new Error(`Unexpected SQL: ${sql}`);
});
(driverFactory.createDriver as ReturnType<typeof vi.fn>).mockReturnValue(built);
const connector = new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
driverFactory,
});
await expect(
connector.introspect({ connectionId: 'warehouse', driver: 'snowflake' }, { runId: 'scan-run-snowflake-network' }),
).rejects.toBe(networkError);
});
it('limits introspection to tables in tableScope', async () => {
const queries: Array<{ sql: string; params?: unknown }> = [];
const getSchemaMetadata = vi.fn(async (_schemaName?: string, scopedNames?: readonly string[] | null) =>
scopedNames?.includes('ORDERS')
? [
{
name: 'ORDERS',
catalog: 'ANALYTICS',
db: 'MARTS',
rowCount: 10,
comment: null,
columns: [{ name: 'ID', type: 'NUMBER', nullable: false, comment: null }],
},
]
: [],
);
const driverFactory: KtxSnowflakeDriverFactory = {
createDriver: vi.fn(() => ({
test: vi.fn(async () => ({ success: true })),
query: vi.fn(async (sql: string, params?: unknown) => {
queries.push({ sql, params });
return { headers: [], rows: [], totalRows: 0, rowCount: 0 };
}),
getSchemaMetadata,
listSchemas: vi.fn(async () => []),
listTables: vi.fn(async () => []),
cleanup: vi.fn(async () => undefined),
})),
};
const connector = new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'MARTS',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
driverFactory,
});
const scope = tableRefSet([{ catalog: 'ANALYTICS', db: 'MARTS', name: 'ORDERS' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'snowflake', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['ORDERS']);
expect(getSchemaMetadata).toHaveBeenCalledWith('MARTS', ['ORDERS']);
const primaryKeysQuery = queries.find((query) => query.sql.includes('TABLE_CONSTRAINTS'));
expect(primaryKeysQuery?.sql).toMatch(/AND tc\.TABLE_NAME IN \(\?\)/);
expect(primaryKeysQuery?.params).toEqual(['MARTS', 'ANALYTICS', 'ORDERS']);
});
it('supports read-only query, sampling, distinct values, row counts, schema listing, and cleanup', async () => {
const driverFactory = fakeDriverFactory();
const connector = new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
driverFactory,
});
await expect(
connector.sampleTable(
{
connectionId: 'warehouse',
table: { catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' },
limit: 1,
columns: ['ID', 'STATUS'],
},
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ headers: ['ID', 'STATUS'], rows: [[1, 'paid']], totalRows: 1 });
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select ID, STATUS from ORDERS', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ headers: ['ID', 'STATUS'], rows: [[1, 'paid']], rowCount: 1 });
await expect(
connector.sampleColumn(
{
connectionId: 'warehouse',
table: { catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' },
column: 'STATUS',
limit: 2,
},
{ runId: 'scan-run-1' },
),
).resolves.toEqual({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
await expect(
connector.getColumnDistinctValues({ catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' }, 'STATUS', {
maxCardinality: 10,
limit: 5,
}),
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
await expect(connector.getTableRowCount('ORDERS')).resolves.toBe(12);
await expect(connector.listSchemas()).resolves.toEqual(['PUBLIC', 'MART']);
await connector.cleanup();
const driver = (driverFactory.createDriver as ReturnType<typeof vi.fn>).mock.results[0]?.value as KtxSnowflakeDriver;
expect(driver.cleanup).toHaveBeenCalledTimes(1);
});
it('lists tables across schemas with one information schema query', async () => {
const queries: Array<{ sql: string; params?: unknown }> = [];
const driverFactory: KtxSnowflakeDriverFactory = {
createDriver: vi.fn(() => ({
test: vi.fn(async () => ({ success: true })),
query: vi.fn(async (sql: string, params?: unknown) => {
queries.push({ sql, params });
return {
headers: ['TABLE_SCHEMA', 'TABLE_NAME', 'TABLE_TYPE'],
rows: [
['MART', 'ORDERS', 'BASE TABLE'],
['PUBLIC', 'ORDER_SUMMARY', 'VIEW'],
],
totalRows: 2,
rowCount: 2,
};
}),
getSchemaMetadata: vi.fn(async () => []),
listSchemas: vi.fn(async () => []),
listTables: vi.fn(async () => []),
cleanup: vi.fn(async () => undefined),
})),
};
const connector = new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
driverFactory,
});
await expect(connector.listTables(['MART', 'PUBLIC'])).resolves.toEqual([
{ schema: 'MART', name: 'ORDERS', kind: 'table' },
{ schema: 'PUBLIC', name: 'ORDER_SUMMARY', kind: 'view' },
]);
expect(queries).toHaveLength(1);
expect(queries[0]?.sql).toContain('FROM "ANALYTICS".INFORMATION_SCHEMA.TABLES');
expect(queries[0]?.sql).toContain('AND TABLE_SCHEMA IN (?, ?)');
expect(queries[0]?.params).toEqual(['ANALYTICS', 'MART', 'PUBLIC']);
});
it('rejects unsafe Snowflake identifiers before driver creation', () => {
expect(
() =>
new KtxSnowflakeScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH;DROP',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
driverFactory: fakeDriverFactory(),
}),
).toThrow('Invalid Snowflake warehouse identifier "WH;DROP"');
});
it('converts a native snapshot into a live-database introspection snapshot', async () => {
const introspection = createSnowflakeLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'snowflake',
authMethod: 'password',
account: 'acct',
warehouse: 'WH',
database: 'ANALYTICS',
schema_name: 'PUBLIC',
username: 'reader',
password: 'fixture-pass', // pragma: allowlist secret
},
},
driverFactory: fakeDriverFactory(),
now: () => new Date('2026-04-29T18:00:00.000Z'),
});
await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({
connectionId: 'warehouse',
metadata: { database: 'ANALYTICS', schemas: ['PUBLIC'] },
tables: expect.arrayContaining([
expect.objectContaining({ catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' }),
]),
});
});
});

View file

@ -2,6 +2,7 @@ import { createPrivateKey } from 'node:crypto';
import { readFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import { getDialectForDriver } from '../../context/connections/dialects.js';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
@ -27,7 +28,6 @@ import {
} from '../../context/scan/types.js';
import snowflake from 'snowflake-sdk';
import type { Bind, Binds, Connection, ConnectionOptions } from 'snowflake-sdk';
import { KtxSnowflakeDialect } from './dialect.js';
import { assertSafeSnowflakeIdentifier, quoteSnowflakeIdentifier } from './identifiers.js';
import { configureSnowflakeSdkLogger } from './sdk-logger.js';
@ -229,6 +229,14 @@ function toSnowflakeBinds(params: unknown[] | undefined): Binds | undefined {
return params?.map((value) => toSnowflakeBind(value));
}
/** @internal */
export function prepareSnowflakeReadOnlyQuery(
sql: string,
params?: Record<string, unknown>,
): { sql: string; params?: unknown[] } {
return { sql, params: params ? Object.values(params) : undefined };
}
export function isKtxSnowflakeConnectionConfig(
connection: KtxSnowflakeConnectionConfig | undefined,
): connection is KtxSnowflakeConnectionConfig {
@ -430,6 +438,7 @@ class SnowflakeSdkDriver implements KtxSnowflakeDriver {
[this.resolved.database, ...(schemas ?? [])],
);
return result.rows.map((row) => ({
catalog: this.resolved.database,
schema: String(row[0]),
name: String(row[1]),
kind: String(row[2]) === 'VIEW' ? ('view' as const) : ('table' as const),
@ -550,7 +559,7 @@ export class KtxSnowflakeScanConnector implements KtxScanConnector {
private readonly resolved: KtxSnowflakeResolvedConnectionConfig;
private readonly driverFactory: KtxSnowflakeDriverFactory;
private readonly dialect = new KtxSnowflakeDialect();
private readonly dialect = getDialectForDriver('snowflake');
private readonly now: () => Date;
private driverInstance: KtxSnowflakeDriver | null = null;
@ -635,7 +644,7 @@ export class KtxSnowflakeScanConnector implements KtxScanConnector {
async executeReadOnly(input: KtxSnowflakeReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
this.assertConnection(input.connectionId);
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
const prepared = prepareSnowflakeReadOnlyQuery(limitedSql, input.params);
return this.getDriver().query(prepared.sql, prepared.params);
}
@ -696,6 +705,7 @@ export class KtxSnowflakeScanConnector implements KtxScanConnector {
[this.resolved.database, ...(schemas ?? [])],
);
return result.rows.map((row) => ({
catalog: this.resolved.database,
schema: String(row[0]),
name: String(row[1]),
kind: String(row[2]) === 'VIEW' ? ('view' as const) : ('table' as const),

View file

@ -1,50 +0,0 @@
import { describe, expect, it } from 'vitest';
import { KtxSnowflakeDialect } from './dialect.js';
describe('KtxSnowflakeDialect', () => {
const dialect = new KtxSnowflakeDialect();
it('quotes identifiers and formats database.schema.table names', () => {
expect(dialect.quoteIdentifier('order"items')).toBe('"order""items"');
expect(dialect.formatTableName({ catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' })).toBe(
'"ANALYTICS"."PUBLIC"."ORDERS"',
);
expect(dialect.formatTableName({ db: 'PUBLIC', name: 'ORDERS' })).toBe('"PUBLIC"."ORDERS"');
expect(dialect.formatTableName({ name: 'ORDERS' })).toBe('"ORDERS"');
});
it('maps native Snowflake types to scan dimensions', () => {
expect(dialect.mapDataType('NUMBER(38,0)')).toBe('NUMBER(38,0)');
expect(dialect.mapToDimensionType('TIMESTAMP_NTZ')).toBe('time');
expect(dialect.mapToDimensionType('NUMBER(38,0)')).toBe('number');
expect(dialect.mapToDimensionType('BOOLEAN')).toBe('boolean');
expect(dialect.mapToDimensionType('VARIANT')).toBe('string');
});
it('generates sampling and dictionary SQL', () => {
expect(dialect.generateSampleQuery('"PUBLIC"."ORDERS"', 5, ['ID', 'STATUS'])).toBe(
'SELECT "ID", "STATUS" FROM "PUBLIC"."ORDERS" SAMPLE ROW (5 ROWS)',
);
expect(dialect.generateColumnSampleQuery('"PUBLIC"."ORDERS"', 'STATUS', 10)).toBe(
'SELECT "STATUS" FROM "PUBLIC"."ORDERS" WHERE "STATUS" IS NOT NULL AND TRIM(CAST("STATUS" AS STRING)) != \'\' LIMIT 10',
);
expect(dialect.generateCardinalitySampleQuery('"PUBLIC"."ORDERS"', '"STATUS"', 100)).toContain(
'SELECT COUNT(DISTINCT val) AS cardinality',
);
expect(dialect.generateDistinctValuesQuery('"PUBLIC"."ORDERS"', '"STATUS"', 20)).toContain(
'SELECT DISTINCT "STATUS"::VARCHAR AS val',
);
});
it('passes Snowflake positional parameters as bind arrays', () => {
expect(dialect.prepareQuery('SELECT * FROM ORDERS WHERE ID = ? AND STATUS = ?', { id: 1, status: 'paid' })).toEqual({
sql: 'SELECT * FROM ORDERS WHERE ID = ? AND STATUS = ?',
params: [1, 'paid'],
});
expect(dialect.prepareQuery('SELECT * FROM ORDERS')).toEqual({ sql: 'SELECT * FROM ORDERS', params: undefined });
});
it('keeps unsupported statistics explicit', () => {
expect(dialect.generateColumnStatisticsQuery('PUBLIC', 'ORDERS')).toBeNull();
});
});

View file

@ -1,9 +1,18 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type SnowflakeTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxSnowflakeDialect {
readonly type = 'snowflake';
/** @internal */
export class KtxSnowflakeDialect implements KtxDialect {
readonly type = 'snowflake' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
TIMESTAMP_NTZ: 'time',
@ -45,13 +54,19 @@ export class KtxSnowflakeDialect {
}
formatTableName(table: SnowflakeTableNameRef): string {
if (table.catalog && table.db) {
return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
}
if (table.db) {
return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
}
return this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'three-part');
}
formatDisplayRef(table: SnowflakeTableNameRef): string {
return formatDialectDisplayRef(table, 'three-part');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'three-part');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('three-part');
}
mapDataType(nativeType: string): string {
@ -96,10 +111,6 @@ export class KtxSnowflakeDialect {
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS STRING)) != '' LIMIT ${limit}`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown[] } {
return { sql, params: params ? Object.values(params) : undefined };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
@ -115,7 +126,11 @@ export class KtxSnowflakeDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -126,6 +141,18 @@ export class KtxSnowflakeDialect {
return `APPROX_COUNT_DISTINCT(${column})`;
}
textLengthExpression(columnSql: string): string {
return `LENGTH(CAST(${columnSql} AS TEXT))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS VARCHAR)`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT LISTAGG(CAST(value AS VARCHAR), '\\x1f') FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
@ -164,24 +191,4 @@ export class KtxSnowflakeDialect {
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const target = timezone ? `CONVERT_TIMEZONE('UTC', '${timezone}', ${column})` : column;
return `DATE_TRUNC('${granularity}', ${target})`;
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const target = timezone ? `CONVERT_TIMEZONE('UTC', '${timezone}', ${column})` : column;
const [amount, unit] = interval.split(' ');
const originExpr = origin ? `'${origin}'::TIMESTAMP` : `'1970-01-01'::TIMESTAMP`;
return `DATEADD(${unit}, FLOOR(DATEDIFF(${unit}, ${originExpr}, ${target}) / ${amount}) * ${amount}, ${originExpr})`;
}
parseIntervalToSql(interval: string): string {
return `INTERVAL '${interval}'`;
}
}

View file

@ -1,18 +0,0 @@
import { describe, expect, it } from 'vitest';
import { assertSafeSnowflakeIdentifier, quoteSnowflakeIdentifier } from './identifiers.js';
describe('Snowflake identifier guards', () => {
it('quotes simple Snowflake identifiers', () => {
expect(quoteSnowflakeIdentifier('ANALYTICS_DB', 'database')).toBe('"ANALYTICS_DB"');
expect(quoteSnowflakeIdentifier('ROLE_1$', 'role')).toBe('"ROLE_1$"');
});
it('rejects configured identifiers with field and value in the error', () => {
expect(() => assertSafeSnowflakeIdentifier('bad.db', 'database')).toThrow(
'Invalid Snowflake database identifier "bad.db"; use a simple unquoted identifier matching /^[A-Za-z_][A-Za-z0-9_$]*$/',
);
expect(() => assertSafeSnowflakeIdentifier('WH"DROP', 'warehouse')).toThrow(
'Invalid Snowflake warehouse identifier "WH\\"DROP"; use a simple unquoted identifier matching /^[A-Za-z_][A-Za-z0-9_$]*$/',
);
});
});

View file

@ -1,57 +0,0 @@
import { mkdtempSync, rmSync, statSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join, resolve } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
const { configure } = vi.hoisted(() => ({ configure: vi.fn() }));
vi.mock('snowflake-sdk', () => ({
default: { configure },
}));
import {
configureSnowflakeSdkLogger,
resetSnowflakeSdkLoggerConfigurationForTests,
} from './sdk-logger.js';
describe('configureSnowflakeSdkLogger', () => {
let projectDir: string;
beforeEach(() => {
configure.mockReset();
resetSnowflakeSdkLoggerConfigurationForTests();
projectDir = mkdtempSync(join(tmpdir(), 'ktx-snowflake-logger-'));
});
afterEach(() => {
rmSync(projectDir, { recursive: true, force: true });
});
it('routes logs to <projectDir>/.ktx/logs/snowflake.log with console output disabled', () => {
const expected = resolve(projectDir, '.ktx', 'logs', 'snowflake.log');
const returned = configureSnowflakeSdkLogger(projectDir);
expect(returned).toBe(expected);
expect(configure).toHaveBeenCalledTimes(1);
expect(configure).toHaveBeenCalledWith({
logFilePath: expected,
additionalLogToConsole: false,
});
expect(statSync(resolve(projectDir, '.ktx', 'logs')).isDirectory()).toBe(true);
});
it('is idempotent for the same projectDir', () => {
configureSnowflakeSdkLogger(projectDir);
configureSnowflakeSdkLogger(projectDir);
expect(configure).toHaveBeenCalledTimes(1);
});
it('reconfigures when projectDir changes', () => {
const other = mkdtempSync(join(tmpdir(), 'ktx-snowflake-logger-other-'));
try {
configureSnowflakeSdkLogger(projectDir);
configureSnowflakeSdkLogger(other);
expect(configure).toHaveBeenCalledTimes(2);
} finally {
rmSync(other, { recursive: true, force: true });
}
});
});

View file

@ -1,270 +0,0 @@
import Database from 'better-sqlite3';
import { writeFileSync } from 'node:fs';
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { createSqliteLiveDatabaseIntrospection } from '../../connectors/sqlite/live-database-introspection.js';
import { isKtxSqliteConnectionConfig, KtxSqliteScanConnector, sqliteDatabasePathFromConfig } from '../../connectors/sqlite/connector.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
describe('KtxSqliteScanConnector', () => {
let tempDir: string;
let dbPath: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-connector-sqlite-'));
dbPath = join(tempDir, 'warehouse.db');
const db = new Database(dbPath);
db.exec(`
PRAGMA foreign_keys = ON;
CREATE TABLE customers (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
tier TEXT
);
CREATE TABLE orders (
id INTEGER PRIMARY KEY,
customer_id INTEGER NOT NULL,
status TEXT,
total NUMERIC,
created_at TEXT,
FOREIGN KEY(customer_id) REFERENCES customers(id)
);
CREATE VIEW recent_orders AS SELECT id, customer_id, status FROM orders;
INSERT INTO customers (id, name, tier) VALUES (1, 'Ada', 'enterprise'), (2, 'Grace', 'growth');
INSERT INTO orders (id, customer_id, status, total, created_at)
VALUES (10, 1, 'paid', 42.5, '2026-04-28'), (11, 2, 'open', 9.5, '2026-04-29');
`);
db.close();
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('resolves SQLite path configuration safely', () => {
const originalDatabaseUrl = process.env.KTX_SQLITE_TEST_URL;
const pointerPath = join(tempDir, 'sqlite-path.txt');
process.env.KTX_SQLITE_TEST_URL = `sqlite:${dbPath}`;
writeFileSync(pointerPath, dbPath, 'utf-8');
try {
expect(isKtxSqliteConnectionConfig({ driver: 'sqlite', path: 'warehouse.db' })).toBe(true);
expect(isKtxSqliteConnectionConfig({ driver: 'postgres', url: 'env:DATABASE_URL' })).toBe(false);
expect(
sqliteDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', path: 'warehouse.db' },
}),
).toBe(dbPath);
expect(
sqliteDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', url: 'env:KTX_SQLITE_TEST_URL' },
}),
).toBe(dbPath);
expect(
sqliteDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', url: `file://${dbPath}` },
}),
).toBe(dbPath);
expect(
sqliteDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', path: `file:${pointerPath}` },
}),
).toBe(dbPath);
expect(
sqliteDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', path: 'warehouse.db' },
}),
).toBe(dbPath);
expect(() =>
sqliteDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', file_path: 'warehouse.db' },
}),
).toThrow('Native SQLite connector requires connections.warehouse.path or url');
} finally {
if (originalDatabaseUrl === undefined) {
delete process.env.KTX_SQLITE_TEST_URL;
} else {
process.env.KTX_SQLITE_TEST_URL = originalDatabaseUrl;
}
}
});
it('introspects schema, primary keys, row counts, views, and foreign keys', async () => {
const connector = new KtxSqliteScanConnector({
connectionId: 'warehouse',
connection: { driver: 'sqlite', path: dbPath },
now: () => new Date('2026-04-29T10:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'sqlite' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'sqlite',
extractedAt: '2026-04-29T10:00:00.000Z',
metadata: {
file_path: dbPath,
table_count: 3,
total_columns: 11,
},
});
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows])).toEqual([
['customers', 'table', 2],
['orders', 'table', 2],
['recent_orders', 'view', null],
]);
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
name: 'id',
nativeType: 'INTEGER',
normalizedType: 'INTEGER',
dimensionType: 'number',
nullable: false,
primaryKey: true,
});
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
{
fromColumn: 'customer_id',
toCatalog: null,
toDb: null,
toTable: 'customers',
toColumn: 'id',
constraintName: null,
},
]);
});
it('runs samples, distinct values, statistics, and read-only SQL', async () => {
const connector = new KtxSqliteScanConnector({
connectionId: 'warehouse',
connection: { driver: 'sqlite', path: dbPath },
});
await expect(
connector.sampleTable(
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, columns: ['id'], limit: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toEqual({ headers: ['id'], rows: [[10]], totalRows: 1 });
await expect(
connector.sampleColumn(
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, column: 'status', limit: 5 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
await expect(
connector.getColumnDistinctValues(
{ catalog: null, db: null, name: 'orders' },
'status',
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
),
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from orders order by id', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toEqual({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 });
await expect(
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
await expect(
connector.columnStats(
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, column: 'status' },
{ runId: 'scan-run-1' },
),
).resolves.toBeNull();
});
it('limits introspection to tables in tableScope', async () => {
const connector = new KtxSqliteScanConnector({
connectionId: 'warehouse',
connection: { driver: 'sqlite', path: dbPath },
});
const scope = tableRefSet([{ catalog: null, db: null, name: 'orders' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'sqlite', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
});
it('adapts native SQLite snapshots to live-database introspection for local ingest', async () => {
const introspection = createSqliteLiveDatabaseIntrospection({
projectDir: tempDir,
connections: {
warehouse: { driver: 'sqlite', path: 'warehouse.db' },
},
now: () => new Date('2026-04-29T10:00:00.000Z'),
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
extractedAt: '2026-04-29T10:00:00.000Z',
});
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
name: 'customers',
catalog: null,
db: null,
columns: [
{
name: 'id',
nativeType: 'INTEGER',
normalizedType: 'INTEGER',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
{
name: 'name',
nativeType: 'TEXT',
normalizedType: 'TEXT',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: null,
},
{
name: 'tier',
nativeType: 'TEXT',
normalizedType: 'TEXT',
dimensionType: 'string',
nullable: true,
primaryKey: false,
comment: null,
},
],
foreignKeys: [],
});
expect(snapshot.tables.find((table) => table.name === 'orders')).toMatchObject({
name: 'orders',
catalog: null,
db: null,
foreignKeys: [{ fromColumn: 'customer_id', toTable: 'customers', toColumn: 'id' }],
});
});
});

View file

@ -3,11 +3,11 @@ import { existsSync, readFileSync, statSync } from 'node:fs';
import { homedir } from 'node:os';
import { isAbsolute, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import { getDialectForDriver } from '../../context/connections/dialects.js';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { normalizeQueryRows } from '../../context/connections/query-executor.js';
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
import { KtxSqliteDialect } from './dialect.js';
export interface KtxSqliteConnectionConfig {
driver?: string;
@ -157,7 +157,7 @@ export class KtxSqliteScanConnector implements KtxScanConnector {
private readonly connectionId: string;
private readonly dbPath: string;
private readonly now: () => Date;
private readonly dialect = new KtxSqliteDialect();
private readonly dialect = getDialectForDriver('sqlite');
private db: Database.Database | null = null;
constructor(options: KtxSqliteScanConnectorOptions) {
@ -209,6 +209,31 @@ export class KtxSqliteScanConnector implements KtxScanConnector {
};
}
async listSchemas(): Promise<string[]> {
return [];
}
async listTables(_schemas?: string[]): Promise<KtxTableListEntry[]> {
const rows = this.database()
.prepare(
`
SELECT name, type
FROM sqlite_master
WHERE type IN ('table', 'view')
AND name NOT LIKE 'sqlite_%'
ORDER BY name
`,
)
.all() as SqliteMasterRow[];
return rows.map((row) => ({
catalog: null,
schema: '',
name: row.name,
kind: row.type === 'view' ? ('view' as const) : ('table' as const),
}));
}
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxTableSampleResult> {
this.assertConnection(input.connectionId);
const result = this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));

View file

@ -1,33 +0,0 @@
import { describe, expect, it } from 'vitest';
import { KtxSqliteDialect } from './dialect.js';
describe('KtxSqliteDialect', () => {
const dialect = new KtxSqliteDialect();
it('quotes identifiers and formats single-file SQLite table names', () => {
expect(dialect.quoteIdentifier('orders')).toBe('"orders"');
expect(dialect.quoteIdentifier('weird"name')).toBe('"weird""name"');
expect(dialect.formatTableName({ catalog: 'ignored', db: 'ignored', name: 'orders' })).toBe('"orders"');
});
it('maps native SQLite types to KTX dimension types', () => {
expect(dialect.mapToDimensionType('INTEGER')).toBe('number');
expect(dialect.mapToDimensionType('numeric(10,2)')).toBe('number');
expect(dialect.mapToDimensionType('timestamp')).toBe('time');
expect(dialect.mapToDimensionType('VARCHAR(255)')).toBe('string');
expect(dialect.mapToDimensionType('bool')).toBe('boolean');
expect(dialect.mapToDimensionType('')).toBe('string');
});
it('builds sampling and distinct-value SQL without host-specific state', () => {
expect(dialect.generateSampleQuery('"orders"', 25, ['id', 'status'])).toBe(
'SELECT "id", "status" FROM "orders" LIMIT 25',
);
expect(dialect.generateColumnSampleQuery('"orders"', 'status', 10)).toBe(
'SELECT "status" FROM "orders" WHERE "status" IS NOT NULL AND TRIM(CAST("status" AS TEXT)) != \'\' LIMIT 10',
);
expect(dialect.generateDistinctValuesQuery('"orders"', '"status"', 5)).toContain(
'SELECT DISTINCT CAST("status" AS TEXT) AS val',
);
});
});

View file

@ -1,9 +1,18 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type SqliteTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxSqliteDialect {
readonly type = 'sqlite';
/** @internal */
export class KtxSqliteDialect implements KtxDialect {
readonly type = 'sqlite' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
DATETIME: 'time',
@ -29,7 +38,19 @@ export class KtxSqliteDialect {
}
formatTableName(table: SqliteTableNameRef): string {
return this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'sqlite');
}
formatDisplayRef(table: SqliteTableNameRef): string {
return formatDialectDisplayRef(table, 'sqlite');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'sqlite');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('sqlite');
}
mapDataType(nativeType: string): string {
@ -76,10 +97,6 @@ export class KtxSqliteDialect {
return `SELECT ${quoted} FROM ${tableName} WHERE ${quoted} IS NOT NULL AND TRIM(CAST(${quoted} AS TEXT)) != '' LIMIT ${limit}`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown } {
return params ? { sql, params } : { sql };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
@ -92,7 +109,11 @@ export class KtxSqliteDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -103,6 +124,18 @@ export class KtxSqliteDialect {
return `COUNT(DISTINCT ${column})`;
}
textLengthExpression(columnSql: string): string {
return `LENGTH(CAST(${columnSql} AS TEXT))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS TEXT)`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT GROUP_CONCAT(CAST(value AS TEXT), char(31)) FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
@ -143,35 +176,4 @@ export class KtxSqliteDialect {
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
_timezone?: string,
): string {
switch (granularity) {
case 'day':
return `DATE(${column})`;
case 'week':
return `DATE(${column}, 'weekday 0', '-6 days')`;
case 'month':
return `DATE(${column}, 'start of month')`;
case 'quarter':
return `DATE(${column}, 'start of month', '-' || ((CAST(STRFTIME('%m', ${column}) AS INTEGER) - 1) % 3) || ' months')`;
case 'year':
return `DATE(${column}, 'start of year')`;
}
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, _timezone?: string): string {
const [amount, unit] = interval.split(' ');
const originExpr = origin ? `julianday('${origin}')` : `julianday('1970-01-01')`;
const unitDays = unit === 'day' ? 1 : unit === 'week' ? 7 : 30;
const intervalDays = Number(amount) * unitDays;
return `DATE(julianday('1970-01-01') + (CAST((julianday(${column}) - ${originExpr}) / ${intervalDays} AS INTEGER) * ${intervalDays}))`;
}
parseIntervalToSql(interval: string): string {
return `'${interval}'`;
}
}

View file

@ -1,476 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { createSqlServerLiveDatabaseIntrospection } from '../../connectors/sqlserver/live-database-introspection.js';
import { isKtxSqlServerConnectionConfig, KtxSqlServerScanConnector, sqlServerConnectionPoolConfigFromConfig, type KtxSqlServerConnectionConfig, type KtxSqlServerPoolFactory, type KtxSqlServerQueryResult } from '../../connectors/sqlserver/connector.js';
import { tableRefSet } from '../../context/scan/table-ref.js';
function recordset<T extends Record<string, unknown>>(
rows: T[],
columnNames: string[],
): T[] & { columns: Record<string, { type: { declaration: string } }> } {
const withColumns = rows as T[] & { columns: Record<string, { type: { declaration: string } }> };
withColumns.columns = Object.fromEntries(columnNames.map((name) => [name, { type: { declaration: 'nvarchar' } }]));
return withColumns;
}
function result<T extends Record<string, unknown>>(rows: T[], columnNames: string[]): KtxSqlServerQueryResult {
return { recordset: recordset(rows, columnNames) };
}
function fakePoolFactory(options: { primaryKeyError?: Error; foreignKeyError?: Error } = {}): KtxSqlServerPoolFactory {
const query = vi.fn(async (sql: string): Promise<KtxSqlServerQueryResult> => {
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
return result(
[
{ table_name: 'customers', table_type: 'BASE TABLE' },
{ table_name: 'orders', table_type: 'BASE TABLE' },
{ table_name: 'order_summary', table_type: 'VIEW' },
],
['table_name', 'table_type'],
);
}
if (sql.includes("ep.name = 'MS_Description'") && sql.includes('ep.minor_id = 0')) {
return result([{ table_name: 'customers', table_comment: 'Customer table' }], [
'table_name',
'table_comment',
]);
}
if (sql.includes("ep.name = 'MS_Description'") && sql.includes('ep.minor_id = c.column_id')) {
return result([{ table_name: 'customers', column_name: 'id', column_comment: 'PK' }], [
'table_name',
'column_name',
'column_comment',
]);
}
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
return result(
[
{ table_name: 'customers', column_name: 'id', data_type: 'int', is_nullable: 'NO' },
{ table_name: 'customers', column_name: 'name', data_type: 'nvarchar', is_nullable: 'NO' },
{ table_name: 'orders', column_name: 'id', data_type: 'int', is_nullable: 'NO' },
{ table_name: 'orders', column_name: 'customer_id', data_type: 'int', is_nullable: 'NO' },
{ table_name: 'orders', column_name: 'status', data_type: 'nvarchar', is_nullable: 'YES' },
{ table_name: 'order_summary', column_name: 'status', data_type: 'nvarchar', is_nullable: 'YES' },
],
['table_name', 'column_name', 'data_type', 'is_nullable'],
);
}
if (sql.includes("CONSTRAINT_TYPE = 'PRIMARY KEY'")) {
if (options.primaryKeyError) {
throw options.primaryKeyError;
}
return result(
[
{ table_name: 'customers', column_name: 'id' },
{ table_name: 'orders', column_name: 'id' },
],
['table_name', 'column_name'],
);
}
if (sql.includes('REFERENTIAL_CONSTRAINTS')) {
if (options.foreignKeyError) {
throw options.foreignKeyError;
}
return result(
[
{
table_name: 'orders',
column_name: 'customer_id',
referenced_table_schema: 'dbo',
referenced_table_name: 'customers',
referenced_column_name: 'id',
constraint_name: 'orders_customer_id_fk',
},
],
[
'table_name',
'column_name',
'referenced_table_schema',
'referenced_table_name',
'referenced_column_name',
'constraint_name',
],
);
}
if (sql.includes('sys.partitions') && sql.includes('GROUP BY t.name')) {
return result(
[
{ table_name: 'customers', row_count: 2 },
{ table_name: 'orders', row_count: 2 },
],
['table_name', 'row_count'],
);
}
if (sql.includes('SELECT TOP 1 [id], [status] FROM [dbo].[orders]')) {
return result([{ id: 10, status: 'paid' }], ['id', 'status']);
}
if (sql.includes('SELECT TOP 1 * FROM (select id, status from dbo.orders) AS ktx_query_result')) {
return result([{ id: 10, status: 'paid' }], ['id', 'status']);
}
if (sql.includes('SELECT TOP 5 [status] FROM [dbo].[orders]')) {
return result([{ status: 'paid' }, { status: 'open' }], ['status']);
}
if (sql.includes('COUNT(DISTINCT val)')) {
return result([{ cardinality: 2 }], ['cardinality']);
}
if (sql.includes('SELECT TOP 10 val')) {
return result([{ val: 'open' }, { val: 'paid' }], ['val']);
}
if (sql.includes('SUM(p.rows) AS row_count') && sql.includes('t.name = @tableName')) {
return result([{ row_count: 2 }], ['row_count']);
}
if (sql.includes('SELECT s.name AS schema_name')) {
return result([{ schema_name: 'dbo' }, { schema_name: 'sales' }], ['schema_name']);
}
if (sql.trim() === 'SELECT 1') {
return result([{ ok: 1 }], ['ok']);
}
throw new Error(`Unexpected SQL: ${sql}`);
});
const request: { input(name: string, value: unknown): typeof request; query: typeof query } = {
input: vi.fn((_key: string, _value: unknown) => request),
query,
};
const close = vi.fn(async () => undefined);
return {
createPool: vi.fn(async () => ({
request: () => request,
close,
})),
};
}
describe('KtxSqlServerScanConnector', () => {
it('resolves SQL Server connection configuration safely', () => {
expect(
isKtxSqlServerConnectionConfig({
driver: 'sqlserver',
host: 'localhost',
database: 'analytics',
}),
).toBe(true);
expect(isKtxSqlServerConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(false);
expect(
sqlServerConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
port: 14330,
database: 'analytics',
username: 'reader',
trustServerCertificate: false,
},
}),
).toMatchObject({
server: 'db.example.test',
port: 14330,
database: 'analytics',
user: 'reader',
options: { encrypt: true, trustServerCertificate: false },
});
});
it('defaults and validates SQL Server maxConnections', () => {
const baseConnection: KtxSqlServerConnectionConfig = {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
};
expect(
sqlServerConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: baseConnection,
}),
).toMatchObject({ pool: { max: 10 } });
expect(
sqlServerConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: 15 },
}),
).toMatchObject({ pool: { max: 15 } });
expect(
sqlServerConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: '12' as never },
}),
).toMatchObject({ pool: { max: 12 } });
for (const maxConnections of [0, -1, 1.5, Number.NaN, 'abc' as never]) {
expect(() =>
sqlServerConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections },
}),
).toThrow('connections.warehouse.maxConnections must be a positive integer');
}
});
it('introspects schema, primary keys, comments, row counts, views, and foreign keys', async () => {
const connector = new KtxSqlServerScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
},
poolFactory: fakePoolFactory(),
now: () => new Date('2026-04-29T16:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'sqlserver' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'sqlserver',
extractedAt: '2026-04-29T16:00:00.000Z',
scope: { catalogs: ['analytics'], schemas: ['dbo'] },
metadata: {
database: 'analytics',
host: 'db.example.test',
schemas: ['dbo'],
table_count: 3,
total_columns: 6,
},
});
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([
['customers', 'table', 2, 'Customer table'],
['orders', 'table', 2, null],
['order_summary', 'view', null, null],
]);
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
name: 'id',
nativeType: 'int',
normalizedType: 'int',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'PK',
});
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
{
fromColumn: 'customer_id',
toCatalog: 'analytics',
toDb: 'dbo',
toTable: 'customers',
toColumn: 'id',
constraintName: 'orders_customer_id_fk',
},
]);
});
it('soft-fails denied SQL Server constraint discovery with scan warnings', async () => {
const connector = new KtxSqlServerScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
},
poolFactory: fakePoolFactory({
primaryKeyError: Object.assign(new Error('SELECT permission denied'), { number: 229 }),
foreignKeyError: Object.assign(new Error('EXECUTE permission denied'), { number: 230 }),
}),
now: () => new Date('2026-04-29T16:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'sqlserver' },
{ runId: 'scan-run-sqlserver-denied-constraints' },
);
expect(snapshot.warnings).toEqual([
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in dbo (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'dbo', kind: 'primary_key' },
},
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped foreign-key discovery in dbo (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'dbo', kind: 'foreign_key' },
},
]);
expect(snapshot.tables.every((table) => table.columns.every((column) => column.primaryKey === false))).toBe(true);
expect(snapshot.tables.every((table) => table.foreignKeys.length === 0)).toBe(true);
});
it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => {
const poolFactory = fakePoolFactory();
const connector = new KtxSqlServerScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
},
poolFactory,
});
await expect(
connector.sampleTable(
{
connectionId: 'warehouse',
table: { catalog: 'analytics', db: 'dbo', name: 'orders' },
columns: ['id', 'status'],
limit: 1,
},
{ runId: 'scan-run-1' },
),
).resolves.toEqual({
headers: ['id', 'status'],
headerTypes: ['nvarchar', 'nvarchar'],
rows: [[10, 'paid']],
totalRows: 1,
});
await expect(
connector.sampleColumn(
{ connectionId: 'warehouse', table: { catalog: 'analytics', db: 'dbo', name: 'orders' }, column: 'status', limit: 5 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
await expect(
connector.getColumnDistinctValues(
{ catalog: 'analytics', db: 'dbo', name: 'orders' },
'status',
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
),
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from dbo.orders', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 });
await expect(
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
await expect(connector.getTableRowCount('orders')).resolves.toBe(2);
await expect(connector.listSchemas()).resolves.toEqual(['dbo', 'sales']);
await expect(
connector.columnStats(
{ connectionId: 'warehouse', table: { catalog: 'analytics', db: 'dbo', name: 'orders' }, column: 'status' },
{ runId: 'scan-run-1' },
),
).resolves.toBeNull();
await connector.cleanup();
});
it('limits introspection to tables in tableScope', async () => {
const queries: string[] = [];
const inputs: Array<{ name: string; value: unknown }> = [];
const request = {
input: vi.fn((name: string, value: unknown) => {
inputs.push({ name, value });
return request;
}),
query: vi.fn(async (sql: string): Promise<KtxSqlServerQueryResult> => {
queries.push(sql);
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
return result([{ table_name: 'orders', table_type: 'BASE TABLE' }], ['table_name', 'table_type']);
}
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
return result(
[{ table_name: 'orders', column_name: 'id', data_type: 'int', is_nullable: 'NO' }],
['table_name', 'column_name', 'data_type', 'is_nullable'],
);
}
return result([], []);
}),
};
const poolFactory: KtxSqlServerPoolFactory = {
createPool: vi.fn(async () => ({
request: () => request,
close: vi.fn(async () => undefined),
})),
};
const connector = new KtxSqlServerScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
},
poolFactory,
});
const scope = tableRefSet([{ catalog: 'analytics', db: 'dbo', name: 'orders' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'sqlserver', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
expect(queries.find((query) => query.includes('INFORMATION_SCHEMA.TABLES'))).toMatch(/TABLE_NAME IN \(@table_0\)/);
expect(inputs).toEqual(expect.arrayContaining([{ name: 'table_0', value: 'orders' }]));
});
it('adapts native SQL Server snapshots to live-database introspection for local ingest', async () => {
const introspection = createSqlServerLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
},
},
poolFactory: fakePoolFactory(),
now: () => new Date('2026-04-29T16:00:00.000Z'),
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
extractedAt: '2026-04-29T16:00:00.000Z',
});
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
name: 'customers',
catalog: 'analytics',
db: 'dbo',
columns: [
{
name: 'id',
nativeType: 'int',
normalizedType: 'int',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'PK',
},
{
name: 'name',
nativeType: 'nvarchar',
normalizedType: 'nvarchar',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: null,
},
],
foreignKeys: [],
});
});
});

View file

@ -1,4 +1,5 @@
import { assertReadOnlySql } from '../../context/connections/read-only-sql.js';
import { getDialectForDriver } from '../../context/connections/dialects.js';
import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
import {
@ -26,7 +27,6 @@ import { readFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import sql from 'mssql';
import { KtxSqlServerDialect } from './dialect.js';
export interface KtxSqlServerConnectionConfig {
driver?: string;
@ -158,6 +158,21 @@ function tableScopeSql(
return { clause: `AND ${columnExpression} IN (${placeholders.join(', ')})`, params };
}
/** @internal */
export function prepareSqlServerReadOnlyQuery(
sql: string,
params?: Record<string, unknown>,
): { sql: string; params?: Record<string, unknown> } {
if (!params) {
return { sql, params: undefined };
}
let parameterizedQuery = sql;
for (const key of Object.keys(params)) {
parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
}
return { sql: parameterizedQuery, params };
}
class DefaultSqlServerPoolFactory implements KtxSqlServerPoolFactory {
async createPool(config: KtxSqlServerPoolConfig): Promise<KtxSqlServerPool> {
const pool = await new sql.ConnectionPool(config as sql.config).connect();
@ -349,7 +364,7 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
private readonly poolFactory: KtxSqlServerPoolFactory;
private readonly endpointResolver?: KtxSqlServerEndpointResolver;
private readonly now: () => Date;
private readonly dialect = new KtxSqlServerDialect();
private readonly dialect = getDialectForDriver('sqlserver');
private pool: KtxSqlServerPool | null = null;
private resolvedEndpoint: KtxSqlServerResolvedEndpoint | null = null;
@ -427,7 +442,7 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
async executeReadOnly(input: KtxSqlServerReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
this.assertConnection(input.connectionId);
const limitedSql = limitSqlForSqlServerExecution(input.sql, input.maxRows);
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
const prepared = prepareSqlServerReadOnlyQuery(limitedSql, input.params);
const result = await this.query(prepared.sql, prepared.params);
return { ...result, rowCount: result.rows.length };
}
@ -517,6 +532,7 @@ export class KtxSqlServerScanConnector implements KtxScanConnector {
params,
);
return rows.map((row) => ({
catalog: this.poolConfig.database,
schema: row.schema_name,
name: row.table_name,
kind: row.table_type === 'VIEW' ? ('view' as const) : ('table' as const),

View file

@ -1,49 +0,0 @@
import { describe, expect, it } from 'vitest';
import { KtxSqlServerDialect } from './dialect.js';
describe('KtxSqlServerDialect', () => {
const dialect = new KtxSqlServerDialect();
it('quotes identifiers and formats schema-qualified table names', () => {
expect(dialect.quoteIdentifier('events')).toBe('[events]');
expect(dialect.quoteIdentifier('odd]name')).toBe('[odd]]name]');
expect(dialect.formatTableName({ catalog: 'warehouse', db: 'dbo', name: 'events' })).toBe('[dbo].[events]');
expect(dialect.formatTableName({ catalog: null, db: null, name: 'events' })).toBe('[events]');
});
it('maps SQL Server types to KTX dimension types', () => {
expect(dialect.mapToDimensionType('datetime2')).toBe('time');
expect(dialect.mapToDimensionType('decimal(18, 2)')).toBe('number');
expect(dialect.mapToDimensionType('bigint')).toBe('number');
expect(dialect.mapToDimensionType('bit')).toBe('boolean');
expect(dialect.mapToDimensionType('uniqueidentifier')).toBe('string');
expect(dialect.mapToDimensionType('')).toBe('string');
});
it('builds sampling, distinct-value, pagination, and time SQL', () => {
expect(dialect.generateSampleQuery('[dbo].[events]', 25, ['id', 'event_name'])).toBe(
'SELECT TOP 25 [id], [event_name] FROM [dbo].[events]',
);
expect(dialect.generateColumnSampleQuery('[dbo].[events]', 'event_name', 10)).toBe(
"SELECT TOP 10 [event_name] FROM [dbo].[events] WHERE [event_name] IS NOT NULL AND LTRIM(RTRIM(CAST([event_name] AS NVARCHAR(MAX)))) != ''",
);
expect(dialect.generateDistinctValuesQuery('[dbo].[events]', '[event_name]', 5)).toContain('SELECT TOP 5 val');
expect(dialect.getTopClause(10)).toBe('TOP 10');
expect(dialect.getLimitOffsetClause(10, 20)).toBe('OFFSET 20 ROWS FETCH NEXT 10 ROWS ONLY');
expect(dialect.getTimeTruncExpression('created_at', 'month')).toBe(
'DATEFROMPARTS(YEAR(created_at), MONTH(created_at), 1)',
);
});
it('prepares named parameters using SQL Server @ parameters', () => {
expect(
dialect.prepareQuery('select * from events where id = :id and name = :name', {
id: 10,
name: 'signup',
}),
).toEqual({
sql: 'select * from events where id = @id and name = @name',
params: { id: 10, name: 'signup' },
});
});
});

View file

@ -1,9 +1,18 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
parseDialectDisplayRef,
safeSqlLimit,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type SqlServerTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxSqlServerDialect {
readonly type = 'sqlserver';
/** @internal */
export class KtxSqlServerDialect implements KtxDialect {
readonly type = 'sqlserver' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
datetime: 'time',
@ -39,9 +48,19 @@ export class KtxSqlServerDialect {
}
formatTableName(table: SqlServerTableNameRef): string {
return table.db
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
: this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'three-part');
}
formatDisplayRef(table: SqlServerTableNameRef): string {
return formatDialectDisplayRef(table, 'three-part');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'three-part');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('three-part');
}
mapDataType(nativeType: string): string {
@ -86,17 +105,6 @@ export class KtxSqlServerDialect {
return `SELECT TOP ${limit} ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND LTRIM(RTRIM(CAST(${quotedColumn} AS NVARCHAR(MAX)))) != ''`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
if (!params) {
return { sql, params: undefined };
}
let parameterizedQuery = sql;
for (const key of Object.keys(params)) {
parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
}
return { sql: parameterizedQuery, params };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
@ -111,12 +119,12 @@ export class KtxSqlServerDialect {
return `TABLESAMPLE (${samplePct * 100} PERCENT)`;
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `OFFSET ${offset} ROWS FETCH NEXT ${limit} ROWS ONLY` : '';
getLimitOffsetClause(_limit: number, _offset?: number): string {
return '';
}
getTopClause(limit: number): string {
return `TOP ${limit}`;
return `TOP (${safeSqlLimit(limit)})`;
}
getNullCountExpression(column: string): string {
@ -127,6 +135,18 @@ export class KtxSqlServerDialect {
return `COUNT(DISTINCT ${column})`;
}
textLengthExpression(columnSql: string): string {
return `LEN(CAST(${columnSql} AS NVARCHAR(MAX)))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS NVARCHAR(MAX))`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT STRING_AGG(CAST(value AS NVARCHAR(MAX)), CHAR(31)) FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
@ -167,35 +187,4 @@ export class KtxSqlServerDialect {
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column;
switch (granularity) {
case 'day':
return `CAST(${col} AS DATE)`;
case 'week':
return `DATEADD(WEEK, DATEDIFF(WEEK, 0, ${col}), 0)`;
case 'month':
return `DATEFROMPARTS(YEAR(${col}), MONTH(${col}), 1)`;
case 'quarter':
return `DATEFROMPARTS(YEAR(${col}), (DATEPART(QUARTER, ${col}) - 1) * 3 + 1, 1)`;
case 'year':
return `DATEFROMPARTS(YEAR(${col}), 1, 1)`;
}
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column;
const [amount, unit] = interval.split(' ');
const originExpr = origin ? `'${origin}'` : `'1970-01-01'`;
return `DATEADD(${unit}, (DATEDIFF(${unit}, ${originExpr}, ${col}) / ${amount}) * ${amount}, ${originExpr})`;
}
parseIntervalToSql(interval: string): string {
return `'${interval}'`;
}
}