mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
feat(cli): define full warehouse dialect contract
This commit is contained in:
parent
78b8a0c025
commit
95a2b5daf1
10 changed files with 705 additions and 379 deletions
87
packages/cli/src/context/connections/dialect-helpers.ts
Normal file
87
packages/cli/src/context/connections/dialect-helpers.ts
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
import type { KtxTableRef } from '../scan/types.js';
|
||||
|
||||
export type KtxDialectIdentifierShape = 'ansi' | 'sqlite' | 'three-part';
|
||||
|
||||
export type KtxDialectTableRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export function safeSqlLimit(limit: number): number {
|
||||
return Math.max(1, Math.floor(limit));
|
||||
}
|
||||
|
||||
function safeSqlOffset(offset: number | undefined): number | null {
|
||||
if (offset === undefined) {
|
||||
return null;
|
||||
}
|
||||
const normalized = Math.floor(offset);
|
||||
return normalized > 0 ? normalized : null;
|
||||
}
|
||||
|
||||
function cleanIdentifierPart(part: string): string {
|
||||
return part.trim().replace(/^["'`\[]|["'`\]]$/g, '');
|
||||
}
|
||||
|
||||
function splitDisplay(display: string): string[] {
|
||||
return display.trim().split('.').map(cleanIdentifierPart).filter(Boolean);
|
||||
}
|
||||
|
||||
function tableParts(table: KtxDialectTableRef, shape: KtxDialectIdentifierShape): string[] {
|
||||
if (shape === 'sqlite') {
|
||||
return [table.name];
|
||||
}
|
||||
return [table.catalog ?? null, table.db ?? null, table.name].filter((part): part is string => Boolean(part));
|
||||
}
|
||||
|
||||
function acceptedDisplayPartCounts(shape: KtxDialectIdentifierShape): readonly number[] {
|
||||
if (shape === 'sqlite') {
|
||||
return [1];
|
||||
}
|
||||
if (shape === 'three-part') {
|
||||
return [3];
|
||||
}
|
||||
return [2, 3];
|
||||
}
|
||||
|
||||
export function formatDialectTableName(
|
||||
table: KtxDialectTableRef,
|
||||
quoteIdentifier: (identifier: string) => string,
|
||||
shape: KtxDialectIdentifierShape,
|
||||
): string {
|
||||
return tableParts(table, shape).map(quoteIdentifier).join('.');
|
||||
}
|
||||
|
||||
export function formatDialectDisplayRef(table: KtxDialectTableRef, shape: KtxDialectIdentifierShape): string {
|
||||
return tableParts(table, shape).join('.');
|
||||
}
|
||||
|
||||
export function parseDialectDisplayRef(display: string, shape: KtxDialectIdentifierShape): KtxTableRef | null {
|
||||
const parts = splitDisplay(display);
|
||||
if (!acceptedDisplayPartCounts(shape).includes(parts.length)) {
|
||||
return null;
|
||||
}
|
||||
if (parts.length === 1) {
|
||||
return { catalog: null, db: null, name: parts[0]! };
|
||||
}
|
||||
if (parts.length === 2) {
|
||||
return { catalog: null, db: parts[0]!, name: parts[1]! };
|
||||
}
|
||||
if (parts.length === 3) {
|
||||
return { catalog: parts[0]!, db: parts[1]!, name: parts[2]! };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export function columnDisplayPartCount(shape: KtxDialectIdentifierShape): 1 | 2 | 3 {
|
||||
if (shape === 'sqlite') {
|
||||
return 1;
|
||||
}
|
||||
if (shape === 'three-part') {
|
||||
return 3;
|
||||
}
|
||||
return 2;
|
||||
}
|
||||
|
||||
export function limitOffsetClause(limit: number, offset?: number): string {
|
||||
const safeLimit = safeSqlLimit(limit);
|
||||
const safeOffset = safeSqlOffset(offset);
|
||||
return safeOffset === null ? `LIMIT ${safeLimit}` : `LIMIT ${safeLimit} OFFSET ${safeOffset}`;
|
||||
}
|
||||
|
|
@ -1,24 +1,306 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { getDialectForDriver } from './dialects.js';
|
||||
import type { KtxConnectionDriver, KtxTableRef } from '../scan/types.js';
|
||||
|
||||
interface DialectFixture {
|
||||
driver: KtxConnectionDriver;
|
||||
table: KtxTableRef;
|
||||
quoteInput: string;
|
||||
quotedIdentifier: string;
|
||||
formattedTable: string;
|
||||
display: string;
|
||||
invalidDisplay: string;
|
||||
columnDisplayTablePartCount: 1 | 2 | 3;
|
||||
limitClause: string;
|
||||
topClause: string;
|
||||
randomFilter: string;
|
||||
tableSampleClause: string;
|
||||
sampleQuery: string;
|
||||
columnSampleContains: string;
|
||||
nullCountExpression: string;
|
||||
distinctCountExpression: string;
|
||||
textLengthExpression: string;
|
||||
castToText: string;
|
||||
sampleValueAggregation: string;
|
||||
cardinalityContains: string;
|
||||
randomizedCardinalityContains: string;
|
||||
distinctValuesContains: string;
|
||||
statisticsContains: string | null;
|
||||
dimensionInput: string;
|
||||
dimensionType: 'time' | 'string' | 'number' | 'boolean';
|
||||
nativeTypeInput: string;
|
||||
normalizedType: string;
|
||||
}
|
||||
|
||||
const innerSampleSql = 'SELECT status AS value FROM orders';
|
||||
|
||||
const fixtures: DialectFixture[] = [
|
||||
{
|
||||
driver: 'postgres',
|
||||
table: { catalog: null, db: 'public', name: 'orders' },
|
||||
quoteInput: 'order"items',
|
||||
quotedIdentifier: '"order""items"',
|
||||
formattedTable: '"public"."orders"',
|
||||
display: 'public.orders',
|
||||
invalidDisplay: 'orders',
|
||||
columnDisplayTablePartCount: 2,
|
||||
limitClause: 'LIMIT 25 OFFSET 5',
|
||||
topClause: '',
|
||||
randomFilter: 'RANDOM() < 0.25',
|
||||
tableSampleClause: 'TABLESAMPLE SYSTEM (25)',
|
||||
sampleQuery: 'SELECT "id", "status" FROM "public"."orders" LIMIT 5',
|
||||
columnSampleContains: 'TRIM(CAST("status" AS TEXT)) != \'\'',
|
||||
nullCountExpression: 'COUNT(*) FILTER (WHERE "status" IS NULL)',
|
||||
distinctCountExpression: 'COUNT(DISTINCT "status")',
|
||||
textLengthExpression: 'LENGTH(CAST("status" AS TEXT))',
|
||||
castToText: 'CAST("status" AS TEXT)',
|
||||
sampleValueAggregation:
|
||||
'(SELECT STRING_AGG(CAST(value AS TEXT), CHR(31)) FROM (SELECT status AS value FROM orders) AS relationship_profile_values)',
|
||||
cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality',
|
||||
randomizedCardinalityContains: 'ORDER BY RANDOM()',
|
||||
distinctValuesContains: 'SELECT DISTINCT "status"::text AS val',
|
||||
statisticsContains: 'FROM pg_stats s',
|
||||
dimensionInput: 'timestamp with time zone',
|
||||
dimensionType: 'time',
|
||||
nativeTypeInput: 'numeric(12,2)',
|
||||
normalizedType: 'numeric(12,2)',
|
||||
},
|
||||
{
|
||||
driver: 'mysql',
|
||||
table: { catalog: null, db: 'analytics', name: 'orders' },
|
||||
quoteInput: 'order`items',
|
||||
quotedIdentifier: '`order``items`',
|
||||
formattedTable: '`analytics`.`orders`',
|
||||
display: 'analytics.orders',
|
||||
invalidDisplay: 'orders',
|
||||
columnDisplayTablePartCount: 2,
|
||||
limitClause: 'LIMIT 25 OFFSET 5',
|
||||
topClause: '',
|
||||
randomFilter: 'RAND() < 0.25',
|
||||
tableSampleClause: '',
|
||||
sampleQuery: 'SELECT `id`, `status` FROM `analytics`.`orders` LIMIT 5',
|
||||
columnSampleContains: 'TRIM(CAST(`status` AS CHAR)) != \'\'',
|
||||
nullCountExpression: 'SUM(CASE WHEN `status` IS NULL THEN 1 ELSE 0 END)',
|
||||
distinctCountExpression: 'COUNT(DISTINCT `status`)',
|
||||
textLengthExpression: 'CHAR_LENGTH(CAST(`status` AS CHAR))',
|
||||
castToText: 'CAST(`status` AS CHAR)',
|
||||
sampleValueAggregation:
|
||||
'(SELECT GROUP_CONCAT(CAST(value AS CHAR) SEPARATOR CHAR(31)) FROM (SELECT status AS value FROM orders) AS relationship_profile_values)',
|
||||
cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality',
|
||||
randomizedCardinalityContains: 'ORDER BY RAND()',
|
||||
distinctValuesContains: 'SELECT DISTINCT CAST(`status` AS CHAR) AS val',
|
||||
statisticsContains: null,
|
||||
dimensionInput: 'tinyint(1)',
|
||||
dimensionType: 'boolean',
|
||||
nativeTypeInput: 'varchar(255)',
|
||||
normalizedType: 'varchar(255)',
|
||||
},
|
||||
{
|
||||
driver: 'clickhouse',
|
||||
table: { catalog: null, db: 'analytics', name: 'events' },
|
||||
quoteInput: 'order`items',
|
||||
quotedIdentifier: '`order``items`',
|
||||
formattedTable: '`analytics`.`events`',
|
||||
display: 'analytics.events',
|
||||
invalidDisplay: 'events',
|
||||
columnDisplayTablePartCount: 2,
|
||||
limitClause: 'LIMIT 25 OFFSET 5',
|
||||
topClause: '',
|
||||
randomFilter: 'rand() / 4294967295.0 < 0.25',
|
||||
tableSampleClause: '',
|
||||
sampleQuery: 'SELECT `id`, `status` FROM `analytics`.`events` LIMIT 5',
|
||||
columnSampleContains: 'trim(toString(`status`)) != \'\'',
|
||||
nullCountExpression: 'countIf(`status` IS NULL)',
|
||||
distinctCountExpression: 'COUNT(DISTINCT `status`)',
|
||||
textLengthExpression: 'length(toString(`status`))',
|
||||
castToText: 'toString(`status`)',
|
||||
sampleValueAggregation:
|
||||
'(SELECT arrayStringConcat(groupArray(toString(value)), \'\\x1F\') FROM (SELECT status AS value FROM orders) AS relationship_profile_values)',
|
||||
cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality',
|
||||
randomizedCardinalityContains: 'ORDER BY rand()',
|
||||
distinctValuesContains: 'SELECT DISTINCT toString(`status`) AS val',
|
||||
statisticsContains: null,
|
||||
dimensionInput: 'Nullable(DateTime64(3))',
|
||||
dimensionType: 'time',
|
||||
nativeTypeInput: 'LowCardinality(String)',
|
||||
normalizedType: 'LowCardinality(String)',
|
||||
},
|
||||
{
|
||||
driver: 'sqlite',
|
||||
table: { catalog: null, db: null, name: 'orders' },
|
||||
quoteInput: 'order"items',
|
||||
quotedIdentifier: '"order""items"',
|
||||
formattedTable: '"orders"',
|
||||
display: 'orders',
|
||||
invalidDisplay: 'public.orders',
|
||||
columnDisplayTablePartCount: 1,
|
||||
limitClause: 'LIMIT 25 OFFSET 5',
|
||||
topClause: '',
|
||||
randomFilter: '(RANDOM() % 100) < 25',
|
||||
tableSampleClause: '',
|
||||
sampleQuery: 'SELECT "id", "status" FROM "orders" LIMIT 5',
|
||||
columnSampleContains: 'TRIM(CAST("status" AS TEXT)) != \'\'',
|
||||
nullCountExpression: 'SUM(CASE WHEN "status" IS NULL THEN 1 ELSE 0 END)',
|
||||
distinctCountExpression: 'COUNT(DISTINCT "status")',
|
||||
textLengthExpression: 'LENGTH(CAST("status" AS TEXT))',
|
||||
castToText: 'CAST("status" AS TEXT)',
|
||||
sampleValueAggregation:
|
||||
'(SELECT GROUP_CONCAT(CAST(value AS TEXT), char(31)) FROM (SELECT status AS value FROM orders) AS relationship_profile_values)',
|
||||
cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality',
|
||||
randomizedCardinalityContains: 'ORDER BY RANDOM()',
|
||||
distinctValuesContains: 'SELECT DISTINCT CAST("status" AS TEXT) AS val',
|
||||
statisticsContains: null,
|
||||
dimensionInput: 'INTEGER',
|
||||
dimensionType: 'number',
|
||||
nativeTypeInput: 'VARCHAR(255)',
|
||||
normalizedType: 'VARCHAR(255)',
|
||||
},
|
||||
{
|
||||
driver: 'snowflake',
|
||||
table: { catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' },
|
||||
quoteInput: 'order"items',
|
||||
quotedIdentifier: '"order""items"',
|
||||
formattedTable: '"ANALYTICS"."PUBLIC"."ORDERS"',
|
||||
display: 'ANALYTICS.PUBLIC.ORDERS',
|
||||
invalidDisplay: 'PUBLIC.ORDERS',
|
||||
columnDisplayTablePartCount: 3,
|
||||
limitClause: 'LIMIT 25 OFFSET 5',
|
||||
topClause: '',
|
||||
randomFilter: 'UNIFORM(0::FLOAT, 1::FLOAT, RANDOM()) < 0.25',
|
||||
tableSampleClause: 'SAMPLE (25)',
|
||||
sampleQuery: 'SELECT "id", "status" FROM "ANALYTICS"."PUBLIC"."ORDERS" SAMPLE ROW (5 ROWS)',
|
||||
columnSampleContains: 'TRIM(CAST("status" AS STRING)) != \'\'',
|
||||
nullCountExpression: 'COUNT_IF("status" IS NULL)',
|
||||
distinctCountExpression: 'APPROX_COUNT_DISTINCT("status")',
|
||||
textLengthExpression: 'LENGTH(CAST("status" AS TEXT))',
|
||||
castToText: 'CAST("status" AS VARCHAR)',
|
||||
sampleValueAggregation:
|
||||
'(SELECT LISTAGG(CAST(value AS VARCHAR), \'\\x1f\') FROM (SELECT status AS value FROM orders) AS relationship_profile_values)',
|
||||
cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality',
|
||||
randomizedCardinalityContains: 'SAMPLE ROW (100 ROWS)',
|
||||
distinctValuesContains: 'SELECT DISTINCT "status"::VARCHAR AS val',
|
||||
statisticsContains: null,
|
||||
dimensionInput: 'TIMESTAMP_NTZ',
|
||||
dimensionType: 'time',
|
||||
nativeTypeInput: 'NUMBER(38,0)',
|
||||
normalizedType: 'NUMBER(38,0)',
|
||||
},
|
||||
{
|
||||
driver: 'bigquery',
|
||||
table: { catalog: 'analytics-project', db: 'warehouse', name: 'orders' },
|
||||
quoteInput: 'order`items',
|
||||
quotedIdentifier: '`order\\`items`',
|
||||
formattedTable: '`analytics-project`.`warehouse`.`orders`',
|
||||
display: 'analytics-project.warehouse.orders',
|
||||
invalidDisplay: 'warehouse.orders',
|
||||
columnDisplayTablePartCount: 3,
|
||||
limitClause: 'LIMIT 25 OFFSET 5',
|
||||
topClause: '',
|
||||
randomFilter: 'RAND() < 0.25',
|
||||
tableSampleClause: 'TABLESAMPLE SYSTEM (25 PERCENT)',
|
||||
sampleQuery: 'SELECT `id`, `status` FROM `analytics-project`.`warehouse`.`orders` ORDER BY RAND() LIMIT 5',
|
||||
columnSampleContains: 'TRIM(CAST(`status` AS STRING)) != \'\'',
|
||||
nullCountExpression: 'COUNTIF(`status` IS NULL)',
|
||||
distinctCountExpression: 'APPROX_COUNT_DISTINCT(`status`)',
|
||||
textLengthExpression: 'LENGTH(CAST(`status` AS STRING))',
|
||||
castToText: 'CAST(`status` AS STRING)',
|
||||
sampleValueAggregation:
|
||||
'(SELECT STRING_AGG(CAST(value AS STRING), \'\\u001F\') FROM (SELECT status AS value FROM orders) AS relationship_profile_values)',
|
||||
cardinalityContains: 'SELECT APPROX_COUNT_DISTINCT(val) AS cardinality',
|
||||
randomizedCardinalityContains: 'ORDER BY RAND()',
|
||||
distinctValuesContains: 'SELECT DISTINCT CAST(`status` AS STRING) AS val',
|
||||
statisticsContains: null,
|
||||
dimensionInput: 'INT64',
|
||||
dimensionType: 'number',
|
||||
nativeTypeInput: 'INT64',
|
||||
normalizedType: 'BIGINT',
|
||||
},
|
||||
{
|
||||
driver: 'sqlserver',
|
||||
table: { catalog: 'warehouse', db: 'dbo', name: 'events' },
|
||||
quoteInput: 'odd]name',
|
||||
quotedIdentifier: '[odd]]name]',
|
||||
formattedTable: '[warehouse].[dbo].[events]',
|
||||
display: 'warehouse.dbo.events',
|
||||
invalidDisplay: 'dbo.events',
|
||||
columnDisplayTablePartCount: 3,
|
||||
limitClause: '',
|
||||
topClause: 'TOP (25)',
|
||||
randomFilter: 'ABS(CHECKSUM(NEWID())) % 100 < 25',
|
||||
tableSampleClause: 'TABLESAMPLE (25 PERCENT)',
|
||||
sampleQuery: 'SELECT TOP 5 [id], [status] FROM [warehouse].[dbo].[events]',
|
||||
columnSampleContains: 'LTRIM(RTRIM(CAST([status] AS NVARCHAR(MAX)))) != \'\'',
|
||||
nullCountExpression: 'SUM(CASE WHEN [status] IS NULL THEN 1 ELSE 0 END)',
|
||||
distinctCountExpression: 'COUNT(DISTINCT [status])',
|
||||
textLengthExpression: 'LEN(CAST([status] AS NVARCHAR(MAX)))',
|
||||
castToText: 'CAST([status] AS NVARCHAR(MAX))',
|
||||
sampleValueAggregation:
|
||||
'(SELECT STRING_AGG(CAST(value AS NVARCHAR(MAX)), CHAR(31)) FROM (SELECT status AS value FROM orders) AS relationship_profile_values)',
|
||||
cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality',
|
||||
randomizedCardinalityContains: 'ORDER BY NEWID()',
|
||||
distinctValuesContains: 'SELECT TOP 20 val',
|
||||
statisticsContains: null,
|
||||
dimensionInput: 'datetime2',
|
||||
dimensionType: 'time',
|
||||
nativeTypeInput: 'uniqueidentifier',
|
||||
normalizedType: 'uniqueidentifier',
|
||||
},
|
||||
];
|
||||
|
||||
describe('getDialectForDriver', () => {
|
||||
it.each([
|
||||
['postgres', '"public"."orders"'],
|
||||
['mysql', '`public`.`orders`'],
|
||||
['clickhouse', '`public`.`orders`'],
|
||||
['sqlite', '"orders"'],
|
||||
['snowflake', '"analytics"."public"."orders"'],
|
||||
['bigquery', '`analytics`.`public`.`orders`'],
|
||||
['sqlserver', '[analytics].[public].[orders]'],
|
||||
] as const)('formats table names for %s', (driver, expected) => {
|
||||
const dialect = getDialectForDriver(driver);
|
||||
expect(
|
||||
dialect.formatTableName({
|
||||
catalog: driver === 'snowflake' || driver === 'bigquery' || driver === 'sqlserver' ? 'analytics' : null,
|
||||
db: driver === 'sqlite' ? null : 'public',
|
||||
it.each(fixtures)('returns a full KtxDialect for $driver', (fixture) => {
|
||||
const dialect = getDialectForDriver(fixture.driver);
|
||||
const column = dialect.quoteIdentifier('status');
|
||||
|
||||
expect(dialect.type).toBe(fixture.driver);
|
||||
expect(dialect.quoteIdentifier(fixture.quoteInput)).toBe(fixture.quotedIdentifier);
|
||||
expect(dialect.formatTableName(fixture.table)).toBe(fixture.formattedTable);
|
||||
expect(dialect.formatDisplayRef(fixture.table)).toBe(fixture.display);
|
||||
expect(dialect.parseDisplayRef(fixture.display)).toEqual(fixture.table);
|
||||
expect(dialect.parseDisplayRef(fixture.invalidDisplay)).toBeNull();
|
||||
expect(dialect.columnDisplayTablePartCount()).toBe(fixture.columnDisplayTablePartCount);
|
||||
expect(dialect.getLimitOffsetClause(25, 5)).toBe(fixture.limitClause);
|
||||
expect(dialect.getTopClause(25)).toBe(fixture.topClause);
|
||||
expect(dialect.getRandomSampleFilter(0.25)).toBe(fixture.randomFilter);
|
||||
expect(dialect.getTableSampleClause(0.25)).toBe(fixture.tableSampleClause);
|
||||
expect(dialect.generateSampleQuery(fixture.formattedTable, 5, ['id', 'status'])).toBe(fixture.sampleQuery);
|
||||
expect(dialect.generateColumnSampleQuery(fixture.formattedTable, 'status', 10)).toContain(
|
||||
fixture.columnSampleContains,
|
||||
);
|
||||
expect(dialect.getNullCountExpression(column)).toBe(fixture.nullCountExpression);
|
||||
expect(dialect.getDistinctCountExpression(column)).toBe(fixture.distinctCountExpression);
|
||||
expect(dialect.textLengthExpression(column)).toBe(fixture.textLengthExpression);
|
||||
expect(dialect.castToText(column)).toBe(fixture.castToText);
|
||||
expect(dialect.getSampleValueAggregation(innerSampleSql)).toBe(fixture.sampleValueAggregation);
|
||||
expect(dialect.generateCardinalitySampleQuery(fixture.formattedTable, column, 100)).toContain(
|
||||
fixture.cardinalityContains,
|
||||
);
|
||||
expect(dialect.generateRandomizedCardinalitySampleQuery(fixture.formattedTable, column, 100)).toContain(
|
||||
fixture.randomizedCardinalityContains,
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery(fixture.formattedTable, column, 20)).toContain(
|
||||
fixture.distinctValuesContains,
|
||||
);
|
||||
const statistics = dialect.generateColumnStatisticsQuery(fixture.table.db ?? '', fixture.table.name);
|
||||
if (fixture.statisticsContains) {
|
||||
expect(statistics).toContain(fixture.statisticsContains);
|
||||
} else {
|
||||
expect(statistics).toBeNull();
|
||||
}
|
||||
expect(dialect.mapToDimensionType(fixture.dimensionInput)).toBe(fixture.dimensionType);
|
||||
expect(dialect.mapDataType(fixture.nativeTypeInput)).toBe(fixture.normalizedType);
|
||||
});
|
||||
|
||||
it('accepts three-part ANSI display refs while keeping one-part names caller-owned', () => {
|
||||
for (const driver of ['postgres', 'mysql', 'clickhouse'] as const) {
|
||||
const dialect = getDialectForDriver(driver);
|
||||
expect(dialect.parseDisplayRef('warehouse.public.orders')).toEqual({
|
||||
catalog: 'warehouse',
|
||||
db: 'public',
|
||||
name: 'orders',
|
||||
}),
|
||||
).toBe(expected);
|
||||
});
|
||||
expect(dialect.parseDisplayRef('orders')).toBeNull();
|
||||
}
|
||||
});
|
||||
|
||||
it('throws with a supported-driver list for unknown drivers', () => {
|
||||
|
|
|
|||
|
|
@ -1,22 +1,40 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../scan/types.js';
|
||||
|
||||
type SupportedDriver =
|
||||
| 'postgres'
|
||||
| 'mysql'
|
||||
| 'sqlserver'
|
||||
| 'snowflake'
|
||||
| 'bigquery'
|
||||
| 'clickhouse'
|
||||
| 'sqlite';
|
||||
import { KtxBigQueryDialect } from '../../connectors/bigquery/dialect.js';
|
||||
import { KtxClickHouseDialect } from '../../connectors/clickhouse/dialect.js';
|
||||
import { KtxMysqlDialect } from '../../connectors/mysql/dialect.js';
|
||||
import { KtxPostgresDialect } from '../../connectors/postgres/dialect.js';
|
||||
import { KtxSqliteDialect } from '../../connectors/sqlite/dialect.js';
|
||||
import { KtxSnowflakeDialect } from '../../connectors/snowflake/dialect.js';
|
||||
import { KtxSqlServerDialect } from '../../connectors/sqlserver/dialect.js';
|
||||
import type { KtxConnectionDriver, KtxSchemaDimensionType, KtxTableRef } from '../scan/types.js';
|
||||
import type { KtxDialectTableRef } from './dialect-helpers.js';
|
||||
|
||||
export interface KtxDialect {
|
||||
readonly type: SupportedDriver;
|
||||
readonly type: KtxConnectionDriver;
|
||||
quoteIdentifier(identifier: string): string;
|
||||
formatTableName(table: KtxTableRef): string;
|
||||
formatTableName(table: KtxDialectTableRef): string;
|
||||
formatDisplayRef(table: KtxDialectTableRef): string;
|
||||
parseDisplayRef(display: string): KtxTableRef | null;
|
||||
columnDisplayTablePartCount(): 1 | 2 | 3;
|
||||
getLimitOffsetClause(limit: number, offset?: number): string;
|
||||
getTopClause(limit: number): string;
|
||||
getRandomSampleFilter(samplePct: number): string;
|
||||
getTableSampleClause(samplePct: number): string;
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string;
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string;
|
||||
getSampleValueAggregation(innerSql: string): string;
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string;
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string;
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string;
|
||||
generateColumnStatisticsQuery(schemaName: string, tableName: string): string | null;
|
||||
getNullCountExpression(column: string): string;
|
||||
getDistinctCountExpression(column: string): string;
|
||||
textLengthExpression(columnSql: string): string;
|
||||
castToText(columnSql: string): string;
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType;
|
||||
mapDataType(nativeType: string): string;
|
||||
}
|
||||
|
||||
const supportedDrivers: SupportedDriver[] = [
|
||||
const supportedDrivers: KtxConnectionDriver[] = [
|
||||
'bigquery',
|
||||
'clickhouse',
|
||||
'mysql',
|
||||
|
|
@ -26,71 +44,21 @@ const supportedDrivers: SupportedDriver[] = [
|
|||
'sqlserver',
|
||||
];
|
||||
|
||||
function doubleQuoted(identifier: string): string {
|
||||
return `"${identifier.replace(/"/g, '""')}"`;
|
||||
}
|
||||
|
||||
function backtickQuoted(identifier: string): string {
|
||||
return `\`${identifier.replace(/`/g, '``')}\``;
|
||||
}
|
||||
|
||||
function bigQueryQuoted(identifier: string): string {
|
||||
return `\`${identifier.replace(/`/g, '\\`')}\``;
|
||||
}
|
||||
|
||||
function bracketQuoted(identifier: string): string {
|
||||
return `[${identifier.replace(/\]/g, ']]')}]`;
|
||||
}
|
||||
|
||||
function inferDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
const normalized = nativeType.toLowerCase().trim();
|
||||
if (normalized.includes('date') || normalized.includes('time')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalized.includes('int') ||
|
||||
normalized.includes('num') ||
|
||||
normalized.includes('dec') ||
|
||||
normalized.includes('float') ||
|
||||
normalized.includes('double') ||
|
||||
normalized.includes('real')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalized.includes('bool') || normalized === 'bit') {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
function formatWithParts(table: KtxTableRef, quote: (identifier: string) => string, sqlite = false): string {
|
||||
const parts = sqlite ? [table.name] : [table.catalog, table.db, table.name].filter((part): part is string => !!part);
|
||||
return parts.map(quote).join('.');
|
||||
}
|
||||
|
||||
function createDialect(type: SupportedDriver, quote: (identifier: string) => string, sqlite = false): KtxDialect {
|
||||
return {
|
||||
type,
|
||||
quoteIdentifier: quote,
|
||||
formatTableName: (table) => formatWithParts(table, quote, sqlite),
|
||||
mapToDimensionType: inferDimensionType,
|
||||
};
|
||||
}
|
||||
|
||||
const dialects: Record<SupportedDriver, KtxDialect> = {
|
||||
postgres: createDialect('postgres', doubleQuoted),
|
||||
mysql: createDialect('mysql', backtickQuoted),
|
||||
clickhouse: createDialect('clickhouse', backtickQuoted),
|
||||
sqlite: createDialect('sqlite', doubleQuoted, true),
|
||||
snowflake: createDialect('snowflake', doubleQuoted),
|
||||
bigquery: createDialect('bigquery', bigQueryQuoted),
|
||||
sqlserver: createDialect('sqlserver', bracketQuoted),
|
||||
const dialectFactories: Record<KtxConnectionDriver, () => KtxDialect> = {
|
||||
bigquery: () => new KtxBigQueryDialect(),
|
||||
clickhouse: () => new KtxClickHouseDialect(),
|
||||
mysql: () => new KtxMysqlDialect(),
|
||||
postgres: () => new KtxPostgresDialect(),
|
||||
sqlite: () => new KtxSqliteDialect(),
|
||||
snowflake: () => new KtxSnowflakeDialect(),
|
||||
sqlserver: () => new KtxSqlServerDialect(),
|
||||
};
|
||||
|
||||
export function getDialectForDriver(driver: string): KtxDialect {
|
||||
const normalized = driver.toLowerCase().trim();
|
||||
if (normalized in dialects) {
|
||||
return dialects[normalized as SupportedDriver];
|
||||
const factory = dialectFactories[normalized as KtxConnectionDriver];
|
||||
if (factory) {
|
||||
return factory();
|
||||
}
|
||||
throw new Error(`Unsupported warehouse driver "${driver}". Supported drivers: ${supportedDrivers.join(', ')}`);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue