feat(cli): define full warehouse dialect contract

This commit is contained in:
Andrey Avtomonov 2026-05-24 23:59:38 +02:00
parent 78b8a0c025
commit 95a2b5daf1
10 changed files with 705 additions and 379 deletions

View file

@ -1,9 +1,17 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type BigQueryTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxBigQueryDialect {
readonly type = 'bigquery';
export class KtxBigQueryDialect implements KtxDialect {
readonly type = 'bigquery' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
TIMESTAMP: 'time',
@ -27,13 +35,19 @@ export class KtxBigQueryDialect {
}
formatTableName(table: BigQueryTableNameRef): string {
if (table.catalog && table.db) {
return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
}
if (table.db) {
return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
}
return this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'three-part');
}
formatDisplayRef(table: BigQueryTableNameRef): string {
return formatDialectDisplayRef(table, 'three-part');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'three-part');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('three-part');
}
mapDataType(nativeType: string): string {
@ -121,7 +135,11 @@ export class KtxBigQueryDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -132,6 +150,18 @@ export class KtxBigQueryDialect {
return `APPROX_COUNT_DISTINCT(${column})`;
}
textLengthExpression(columnSql: string): string {
return `LENGTH(CAST(${columnSql} AS STRING))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS STRING)`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT STRING_AGG(CAST(value AS STRING), '\\u001F') FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
@ -172,36 +202,4 @@ export class KtxBigQueryDialect {
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const bigQueryGranularity = granularity.toUpperCase();
if (timezone) {
return `DATE_TRUNC(DATETIME(${column}, '${timezone}'), ${bigQueryGranularity})`;
}
return `DATE_TRUNC(${column}, ${bigQueryGranularity})`;
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `DATETIME(${column}, '${timezone}')` : column;
const [rawAmount, rawUnit] = interval.split(' ');
let diffUnit = rawUnit!.toUpperCase();
let amount = Number(rawAmount);
let addUnit = diffUnit;
if (diffUnit === 'WEEK') {
diffUnit = 'DAY';
amount = amount * 7;
addUnit = 'DAY';
}
const originExpr = origin ? `TIMESTAMP '${origin}'` : `TIMESTAMP '1970-01-01'`;
return `TIMESTAMP_ADD(${originExpr}, INTERVAL CAST(FLOOR(TIMESTAMP_DIFF(${col}, ${originExpr}, ${diffUnit}) / ${amount}) * ${amount} AS INT64) ${addUnit})`;
}
parseIntervalToSql(interval: string): string {
const [amount, unit] = interval.split(' ');
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
}
}

View file

@ -1,9 +1,17 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type ClickHouseTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxClickHouseDialect {
readonly type = 'clickhouse';
export class KtxClickHouseDialect implements KtxDialect {
readonly type = 'clickhouse' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
date: 'time',
@ -45,9 +53,19 @@ export class KtxClickHouseDialect {
}
formatTableName(table: ClickHouseTableNameRef): string {
return table.db
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
: this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'ansi');
}
formatDisplayRef(table: ClickHouseTableNameRef): string {
return formatDialectDisplayRef(table, 'ansi');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'ansi');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('ansi');
}
mapDataType(nativeType: string): string {
@ -132,7 +150,11 @@ export class KtxClickHouseDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -143,6 +165,18 @@ export class KtxClickHouseDialect {
return `COUNT(DISTINCT ${column})`;
}
textLengthExpression(columnSql: string): string {
return `length(toString(${columnSql}))`;
}
castToText(columnSql: string): string {
return `toString(${columnSql})`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT arrayStringConcat(groupArray(toString(value)), '\\x1F') FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
SELECT COUNT(DISTINCT val) AS cardinality
@ -181,46 +215,6 @@ export class KtxClickHouseDialect {
)
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const tz = timezone ? `, '${timezone}'` : '';
switch (granularity) {
case 'day':
return `toStartOfDay(${column}${tz})`;
case 'week':
return `toStartOfWeek(${column}, 1${tz})`;
case 'month':
return `toStartOfMonth(${column}${tz})`;
case 'quarter':
return `toStartOfQuarter(${column}${tz})`;
case 'year':
return `toStartOfYear(${column}${tz})`;
}
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `toTimezone(${column}, '${timezone}')` : column;
const [rawAmount, rawUnit] = interval.split(' ');
const amount = Number(rawAmount);
const unit = rawUnit!.toLowerCase();
const originExpr = origin ? `toDateTime('${origin}')` : "toDateTime('1970-01-01')";
const calendarUnit = this.toClickHouseDateDiffUnit(unit);
if (calendarUnit) {
return `dateAdd(${calendarUnit}, intDiv(dateDiff(${calendarUnit}, ${originExpr}, ${col}), ${amount}) * ${amount}, ${originExpr})`;
}
const seconds = this.intervalToSeconds(amount, unit);
return `addSeconds(${originExpr}, intDiv(toUInt64(dateDiff('second', ${originExpr}, ${col})), ${seconds}) * ${seconds})`;
}
parseIntervalToSql(interval: string): string {
const [amount, unit] = interval.split(' ');
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
}
private unwrapClickHouseType(value: string, wrapper: string): string {
const prefix = `${wrapper}(`;
return value.startsWith(prefix) && value.endsWith(')') ? value.slice(prefix.length, -1) : value;
@ -242,38 +236,4 @@ export class KtxClickHouseDialect {
return 'String';
}
private toClickHouseDateDiffUnit(unit: string): string | null {
if (unit === 'month' || unit === 'months') {
return "'month'";
}
if (unit === 'quarter' || unit === 'quarters') {
return "'quarter'";
}
if (unit === 'year' || unit === 'years') {
return "'year'";
}
return null;
}
private intervalToSeconds(amount: number, unit: string): number {
switch (unit) {
case 'second':
case 'seconds':
return amount;
case 'minute':
case 'minutes':
return amount * 60;
case 'hour':
case 'hours':
return amount * 3600;
case 'day':
case 'days':
return amount * 86400;
case 'week':
case 'weeks':
return amount * 604800;
default:
return amount * 86400;
}
}
}

View file

@ -1,9 +1,17 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type MysqlTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxMysqlDialect {
readonly type = 'mysql';
export class KtxMysqlDialect implements KtxDialect {
readonly type = 'mysql' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
datetime: 'time',
@ -41,9 +49,19 @@ export class KtxMysqlDialect {
}
formatTableName(table: MysqlTableNameRef): string {
return table.db
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
: this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'ansi');
}
formatDisplayRef(table: MysqlTableNameRef): string {
return formatDialectDisplayRef(table, 'ansi');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'ansi');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('ansi');
}
mapDataType(nativeType: string): string {
@ -118,7 +136,11 @@ export class KtxMysqlDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -129,6 +151,18 @@ export class KtxMysqlDialect {
return `COUNT(DISTINCT ${column})`;
}
textLengthExpression(columnSql: string): string {
return `CHAR_LENGTH(CAST(${columnSql} AS CHAR))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS CHAR)`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT GROUP_CONCAT(CAST(value AS CHAR) SEPARATOR CHAR(31)) FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
SELECT COUNT(DISTINCT val) AS cardinality
@ -167,36 +201,4 @@ export class KtxMysqlDialect {
) AS sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const col = timezone ? `CONVERT_TZ(${column}, '+00:00', '${timezone}')` : column;
switch (granularity) {
case 'day':
return `DATE(${col})`;
case 'week':
return `DATE(${col} - INTERVAL WEEKDAY(${col}) DAY)`;
case 'month':
return `DATE_FORMAT(${col}, '%Y-%m-01')`;
case 'quarter':
return `MAKEDATE(YEAR(${col}), 1) + INTERVAL (QUARTER(${col}) - 1) QUARTER`;
case 'year':
return `DATE_FORMAT(${col}, '%Y-01-01')`;
}
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `CONVERT_TZ(${column}, '+00:00', '${timezone}')` : column;
const [amount, unit] = interval.split(' ');
const originExpr = origin ? `'${origin}'` : `'1970-01-01'`;
return `DATE_ADD(${originExpr}, INTERVAL FLOOR(TIMESTAMPDIFF(${unit!.toUpperCase()}, ${originExpr}, ${col}) / ${amount}) * ${amount} ${unit!.toUpperCase()})`;
}
parseIntervalToSql(interval: string): string {
const [amount, unit] = interval.split(' ');
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
}
}

View file

@ -1,9 +1,17 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type PostgresTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxPostgresDialect {
readonly type = 'postgresql';
export class KtxPostgresDialect implements KtxDialect {
readonly type = 'postgres' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
timestamp: 'time',
@ -45,9 +53,19 @@ export class KtxPostgresDialect {
}
formatTableName(table: PostgresTableNameRef): string {
return table.db
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
: this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'ansi');
}
formatDisplayRef(table: PostgresTableNameRef): string {
return formatDialectDisplayRef(table, 'ansi');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'ansi');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('ansi');
}
mapDataType(nativeType: string): string {
@ -126,7 +144,11 @@ export class KtxPostgresDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -137,6 +159,18 @@ export class KtxPostgresDialect {
return `COUNT(DISTINCT ${column})`;
}
textLengthExpression(columnSql: string): string {
return `LENGTH(CAST(${columnSql} AS TEXT))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS TEXT)`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT STRING_AGG(CAST(value AS TEXT), CHR(31)) FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
@ -191,23 +225,4 @@ export class KtxPostgresDialect {
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column;
return `DATE_TRUNC('${granularity}', ${col})`;
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column;
const originExpr = origin ? `TIMESTAMP '${origin.replace(/'/g, "''")}'` : "TIMESTAMP '1970-01-01'";
return `${originExpr} + FLOOR(EXTRACT(EPOCH FROM (${col} - ${originExpr})) / EXTRACT(EPOCH FROM INTERVAL '${interval.replace(/'/g, "''")}')) * INTERVAL '${interval.replace(/'/g, "''")}'`;
}
parseIntervalToSql(interval: string): string {
return `INTERVAL '${interval.replace(/'/g, "''")}'`;
}
}

View file

@ -1,9 +1,17 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type SnowflakeTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxSnowflakeDialect {
readonly type = 'snowflake';
export class KtxSnowflakeDialect implements KtxDialect {
readonly type = 'snowflake' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
TIMESTAMP_NTZ: 'time',
@ -45,13 +53,19 @@ export class KtxSnowflakeDialect {
}
formatTableName(table: SnowflakeTableNameRef): string {
if (table.catalog && table.db) {
return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
}
if (table.db) {
return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
}
return this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'three-part');
}
formatDisplayRef(table: SnowflakeTableNameRef): string {
return formatDialectDisplayRef(table, 'three-part');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'three-part');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('three-part');
}
mapDataType(nativeType: string): string {
@ -115,7 +129,11 @@ export class KtxSnowflakeDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -126,6 +144,18 @@ export class KtxSnowflakeDialect {
return `APPROX_COUNT_DISTINCT(${column})`;
}
textLengthExpression(columnSql: string): string {
return `LENGTH(CAST(${columnSql} AS TEXT))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS VARCHAR)`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT LISTAGG(CAST(value AS VARCHAR), '\\x1f') FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
@ -164,24 +194,4 @@ export class KtxSnowflakeDialect {
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const target = timezone ? `CONVERT_TIMEZONE('UTC', '${timezone}', ${column})` : column;
return `DATE_TRUNC('${granularity}', ${target})`;
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const target = timezone ? `CONVERT_TIMEZONE('UTC', '${timezone}', ${column})` : column;
const [amount, unit] = interval.split(' ');
const originExpr = origin ? `'${origin}'::TIMESTAMP` : `'1970-01-01'::TIMESTAMP`;
return `DATEADD(${unit}, FLOOR(DATEDIFF(${unit}, ${originExpr}, ${target}) / ${amount}) * ${amount}, ${originExpr})`;
}
parseIntervalToSql(interval: string): string {
return `INTERVAL '${interval}'`;
}
}

View file

@ -1,9 +1,17 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
limitOffsetClause,
parseDialectDisplayRef,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type SqliteTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxSqliteDialect {
readonly type = 'sqlite';
export class KtxSqliteDialect implements KtxDialect {
readonly type = 'sqlite' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
DATETIME: 'time',
@ -29,7 +37,19 @@ export class KtxSqliteDialect {
}
formatTableName(table: SqliteTableNameRef): string {
return this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'sqlite');
}
formatDisplayRef(table: SqliteTableNameRef): string {
return formatDialectDisplayRef(table, 'sqlite');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'sqlite');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('sqlite');
}
mapDataType(nativeType: string): string {
@ -92,7 +112,11 @@ export class KtxSqliteDialect {
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
return limitOffsetClause(limit, offset);
}
getTopClause(_limit: number): string {
return '';
}
getNullCountExpression(column: string): string {
@ -103,6 +127,18 @@ export class KtxSqliteDialect {
return `COUNT(DISTINCT ${column})`;
}
textLengthExpression(columnSql: string): string {
return `LENGTH(CAST(${columnSql} AS TEXT))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS TEXT)`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT GROUP_CONCAT(CAST(value AS TEXT), char(31)) FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
@ -143,35 +179,4 @@ export class KtxSqliteDialect {
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
_timezone?: string,
): string {
switch (granularity) {
case 'day':
return `DATE(${column})`;
case 'week':
return `DATE(${column}, 'weekday 0', '-6 days')`;
case 'month':
return `DATE(${column}, 'start of month')`;
case 'quarter':
return `DATE(${column}, 'start of month', '-' || ((CAST(STRFTIME('%m', ${column}) AS INTEGER) - 1) % 3) || ' months')`;
case 'year':
return `DATE(${column}, 'start of year')`;
}
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, _timezone?: string): string {
const [amount, unit] = interval.split(' ');
const originExpr = origin ? `julianday('${origin}')` : `julianday('1970-01-01')`;
const unitDays = unit === 'day' ? 1 : unit === 'week' ? 7 : 30;
const intervalDays = Number(amount) * unitDays;
return `DATE(julianday('1970-01-01') + (CAST((julianday(${column}) - ${originExpr}) / ${intervalDays} AS INTEGER) * ${intervalDays}))`;
}
parseIntervalToSql(interval: string): string {
return `'${interval}'`;
}
}

View file

@ -1,9 +1,17 @@
import type { KtxDialect } from '../../context/connections/dialects.js';
import {
columnDisplayPartCount,
formatDialectDisplayRef,
formatDialectTableName,
parseDialectDisplayRef,
safeSqlLimit,
} from '../../context/connections/dialect-helpers.js';
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
type SqlServerTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export class KtxSqlServerDialect {
readonly type = 'sqlserver';
export class KtxSqlServerDialect implements KtxDialect {
readonly type = 'sqlserver' as const;
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
datetime: 'time',
@ -39,9 +47,19 @@ export class KtxSqlServerDialect {
}
formatTableName(table: SqlServerTableNameRef): string {
return table.db
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
: this.quoteIdentifier(table.name);
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'three-part');
}
formatDisplayRef(table: SqlServerTableNameRef): string {
return formatDialectDisplayRef(table, 'three-part');
}
parseDisplayRef(display: string): KtxTableRef | null {
return parseDialectDisplayRef(display, 'three-part');
}
columnDisplayTablePartCount(): 1 | 2 | 3 {
return columnDisplayPartCount('three-part');
}
mapDataType(nativeType: string): string {
@ -111,12 +129,12 @@ export class KtxSqlServerDialect {
return `TABLESAMPLE (${samplePct * 100} PERCENT)`;
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `OFFSET ${offset} ROWS FETCH NEXT ${limit} ROWS ONLY` : '';
getLimitOffsetClause(_limit: number, _offset?: number): string {
return '';
}
getTopClause(limit: number): string {
return `TOP ${limit}`;
return `TOP (${safeSqlLimit(limit)})`;
}
getNullCountExpression(column: string): string {
@ -127,6 +145,18 @@ export class KtxSqlServerDialect {
return `COUNT(DISTINCT ${column})`;
}
textLengthExpression(columnSql: string): string {
return `LEN(CAST(${columnSql} AS NVARCHAR(MAX)))`;
}
castToText(columnSql: string): string {
return `CAST(${columnSql} AS NVARCHAR(MAX))`;
}
getSampleValueAggregation(innerSql: string): string {
return `(SELECT STRING_AGG(CAST(value AS NVARCHAR(MAX)), CHAR(31)) FROM (${innerSql}) AS relationship_profile_values)`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
@ -167,35 +197,4 @@ export class KtxSqlServerDialect {
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column;
switch (granularity) {
case 'day':
return `CAST(${col} AS DATE)`;
case 'week':
return `DATEADD(WEEK, DATEDIFF(WEEK, 0, ${col}), 0)`;
case 'month':
return `DATEFROMPARTS(YEAR(${col}), MONTH(${col}), 1)`;
case 'quarter':
return `DATEFROMPARTS(YEAR(${col}), (DATEPART(QUARTER, ${col}) - 1) * 3 + 1, 1)`;
case 'year':
return `DATEFROMPARTS(YEAR(${col}), 1, 1)`;
}
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column;
const [amount, unit] = interval.split(' ');
const originExpr = origin ? `'${origin}'` : `'1970-01-01'`;
return `DATEADD(${unit}, (DATEDIFF(${unit}, ${originExpr}, ${col}) / ${amount}) * ${amount}, ${originExpr})`;
}
parseIntervalToSql(interval: string): string {
return `'${interval}'`;
}
}

View file

@ -0,0 +1,87 @@
import type { KtxTableRef } from '../scan/types.js';
export type KtxDialectIdentifierShape = 'ansi' | 'sqlite' | 'three-part';
export type KtxDialectTableRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export function safeSqlLimit(limit: number): number {
return Math.max(1, Math.floor(limit));
}
function safeSqlOffset(offset: number | undefined): number | null {
if (offset === undefined) {
return null;
}
const normalized = Math.floor(offset);
return normalized > 0 ? normalized : null;
}
function cleanIdentifierPart(part: string): string {
return part.trim().replace(/^["'`\[]|["'`\]]$/g, '');
}
function splitDisplay(display: string): string[] {
return display.trim().split('.').map(cleanIdentifierPart).filter(Boolean);
}
function tableParts(table: KtxDialectTableRef, shape: KtxDialectIdentifierShape): string[] {
if (shape === 'sqlite') {
return [table.name];
}
return [table.catalog ?? null, table.db ?? null, table.name].filter((part): part is string => Boolean(part));
}
function acceptedDisplayPartCounts(shape: KtxDialectIdentifierShape): readonly number[] {
if (shape === 'sqlite') {
return [1];
}
if (shape === 'three-part') {
return [3];
}
return [2, 3];
}
export function formatDialectTableName(
table: KtxDialectTableRef,
quoteIdentifier: (identifier: string) => string,
shape: KtxDialectIdentifierShape,
): string {
return tableParts(table, shape).map(quoteIdentifier).join('.');
}
export function formatDialectDisplayRef(table: KtxDialectTableRef, shape: KtxDialectIdentifierShape): string {
return tableParts(table, shape).join('.');
}
export function parseDialectDisplayRef(display: string, shape: KtxDialectIdentifierShape): KtxTableRef | null {
const parts = splitDisplay(display);
if (!acceptedDisplayPartCounts(shape).includes(parts.length)) {
return null;
}
if (parts.length === 1) {
return { catalog: null, db: null, name: parts[0]! };
}
if (parts.length === 2) {
return { catalog: null, db: parts[0]!, name: parts[1]! };
}
if (parts.length === 3) {
return { catalog: parts[0]!, db: parts[1]!, name: parts[2]! };
}
return null;
}
export function columnDisplayPartCount(shape: KtxDialectIdentifierShape): 1 | 2 | 3 {
if (shape === 'sqlite') {
return 1;
}
if (shape === 'three-part') {
return 3;
}
return 2;
}
export function limitOffsetClause(limit: number, offset?: number): string {
const safeLimit = safeSqlLimit(limit);
const safeOffset = safeSqlOffset(offset);
return safeOffset === null ? `LIMIT ${safeLimit}` : `LIMIT ${safeLimit} OFFSET ${safeOffset}`;
}

View file

@ -1,24 +1,306 @@
import { describe, expect, it } from 'vitest';
import { getDialectForDriver } from './dialects.js';
import type { KtxConnectionDriver, KtxTableRef } from '../scan/types.js';
interface DialectFixture {
driver: KtxConnectionDriver;
table: KtxTableRef;
quoteInput: string;
quotedIdentifier: string;
formattedTable: string;
display: string;
invalidDisplay: string;
columnDisplayTablePartCount: 1 | 2 | 3;
limitClause: string;
topClause: string;
randomFilter: string;
tableSampleClause: string;
sampleQuery: string;
columnSampleContains: string;
nullCountExpression: string;
distinctCountExpression: string;
textLengthExpression: string;
castToText: string;
sampleValueAggregation: string;
cardinalityContains: string;
randomizedCardinalityContains: string;
distinctValuesContains: string;
statisticsContains: string | null;
dimensionInput: string;
dimensionType: 'time' | 'string' | 'number' | 'boolean';
nativeTypeInput: string;
normalizedType: string;
}
const innerSampleSql = 'SELECT status AS value FROM orders';
const fixtures: DialectFixture[] = [
{
driver: 'postgres',
table: { catalog: null, db: 'public', name: 'orders' },
quoteInput: 'order"items',
quotedIdentifier: '"order""items"',
formattedTable: '"public"."orders"',
display: 'public.orders',
invalidDisplay: 'orders',
columnDisplayTablePartCount: 2,
limitClause: 'LIMIT 25 OFFSET 5',
topClause: '',
randomFilter: 'RANDOM() < 0.25',
tableSampleClause: 'TABLESAMPLE SYSTEM (25)',
sampleQuery: 'SELECT "id", "status" FROM "public"."orders" LIMIT 5',
columnSampleContains: 'TRIM(CAST("status" AS TEXT)) != \'\'',
nullCountExpression: 'COUNT(*) FILTER (WHERE "status" IS NULL)',
distinctCountExpression: 'COUNT(DISTINCT "status")',
textLengthExpression: 'LENGTH(CAST("status" AS TEXT))',
castToText: 'CAST("status" AS TEXT)',
sampleValueAggregation:
'(SELECT STRING_AGG(CAST(value AS TEXT), CHR(31)) FROM (SELECT status AS value FROM orders) AS relationship_profile_values)',
cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality',
randomizedCardinalityContains: 'ORDER BY RANDOM()',
distinctValuesContains: 'SELECT DISTINCT "status"::text AS val',
statisticsContains: 'FROM pg_stats s',
dimensionInput: 'timestamp with time zone',
dimensionType: 'time',
nativeTypeInput: 'numeric(12,2)',
normalizedType: 'numeric(12,2)',
},
{
driver: 'mysql',
table: { catalog: null, db: 'analytics', name: 'orders' },
quoteInput: 'order`items',
quotedIdentifier: '`order``items`',
formattedTable: '`analytics`.`orders`',
display: 'analytics.orders',
invalidDisplay: 'orders',
columnDisplayTablePartCount: 2,
limitClause: 'LIMIT 25 OFFSET 5',
topClause: '',
randomFilter: 'RAND() < 0.25',
tableSampleClause: '',
sampleQuery: 'SELECT `id`, `status` FROM `analytics`.`orders` LIMIT 5',
columnSampleContains: 'TRIM(CAST(`status` AS CHAR)) != \'\'',
nullCountExpression: 'SUM(CASE WHEN `status` IS NULL THEN 1 ELSE 0 END)',
distinctCountExpression: 'COUNT(DISTINCT `status`)',
textLengthExpression: 'CHAR_LENGTH(CAST(`status` AS CHAR))',
castToText: 'CAST(`status` AS CHAR)',
sampleValueAggregation:
'(SELECT GROUP_CONCAT(CAST(value AS CHAR) SEPARATOR CHAR(31)) FROM (SELECT status AS value FROM orders) AS relationship_profile_values)',
cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality',
randomizedCardinalityContains: 'ORDER BY RAND()',
distinctValuesContains: 'SELECT DISTINCT CAST(`status` AS CHAR) AS val',
statisticsContains: null,
dimensionInput: 'tinyint(1)',
dimensionType: 'boolean',
nativeTypeInput: 'varchar(255)',
normalizedType: 'varchar(255)',
},
{
driver: 'clickhouse',
table: { catalog: null, db: 'analytics', name: 'events' },
quoteInput: 'order`items',
quotedIdentifier: '`order``items`',
formattedTable: '`analytics`.`events`',
display: 'analytics.events',
invalidDisplay: 'events',
columnDisplayTablePartCount: 2,
limitClause: 'LIMIT 25 OFFSET 5',
topClause: '',
randomFilter: 'rand() / 4294967295.0 < 0.25',
tableSampleClause: '',
sampleQuery: 'SELECT `id`, `status` FROM `analytics`.`events` LIMIT 5',
columnSampleContains: 'trim(toString(`status`)) != \'\'',
nullCountExpression: 'countIf(`status` IS NULL)',
distinctCountExpression: 'COUNT(DISTINCT `status`)',
textLengthExpression: 'length(toString(`status`))',
castToText: 'toString(`status`)',
sampleValueAggregation:
'(SELECT arrayStringConcat(groupArray(toString(value)), \'\\x1F\') FROM (SELECT status AS value FROM orders) AS relationship_profile_values)',
cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality',
randomizedCardinalityContains: 'ORDER BY rand()',
distinctValuesContains: 'SELECT DISTINCT toString(`status`) AS val',
statisticsContains: null,
dimensionInput: 'Nullable(DateTime64(3))',
dimensionType: 'time',
nativeTypeInput: 'LowCardinality(String)',
normalizedType: 'LowCardinality(String)',
},
{
driver: 'sqlite',
table: { catalog: null, db: null, name: 'orders' },
quoteInput: 'order"items',
quotedIdentifier: '"order""items"',
formattedTable: '"orders"',
display: 'orders',
invalidDisplay: 'public.orders',
columnDisplayTablePartCount: 1,
limitClause: 'LIMIT 25 OFFSET 5',
topClause: '',
randomFilter: '(RANDOM() % 100) < 25',
tableSampleClause: '',
sampleQuery: 'SELECT "id", "status" FROM "orders" LIMIT 5',
columnSampleContains: 'TRIM(CAST("status" AS TEXT)) != \'\'',
nullCountExpression: 'SUM(CASE WHEN "status" IS NULL THEN 1 ELSE 0 END)',
distinctCountExpression: 'COUNT(DISTINCT "status")',
textLengthExpression: 'LENGTH(CAST("status" AS TEXT))',
castToText: 'CAST("status" AS TEXT)',
sampleValueAggregation:
'(SELECT GROUP_CONCAT(CAST(value AS TEXT), char(31)) FROM (SELECT status AS value FROM orders) AS relationship_profile_values)',
cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality',
randomizedCardinalityContains: 'ORDER BY RANDOM()',
distinctValuesContains: 'SELECT DISTINCT CAST("status" AS TEXT) AS val',
statisticsContains: null,
dimensionInput: 'INTEGER',
dimensionType: 'number',
nativeTypeInput: 'VARCHAR(255)',
normalizedType: 'VARCHAR(255)',
},
{
driver: 'snowflake',
table: { catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' },
quoteInput: 'order"items',
quotedIdentifier: '"order""items"',
formattedTable: '"ANALYTICS"."PUBLIC"."ORDERS"',
display: 'ANALYTICS.PUBLIC.ORDERS',
invalidDisplay: 'PUBLIC.ORDERS',
columnDisplayTablePartCount: 3,
limitClause: 'LIMIT 25 OFFSET 5',
topClause: '',
randomFilter: 'UNIFORM(0::FLOAT, 1::FLOAT, RANDOM()) < 0.25',
tableSampleClause: 'SAMPLE (25)',
sampleQuery: 'SELECT "id", "status" FROM "ANALYTICS"."PUBLIC"."ORDERS" SAMPLE ROW (5 ROWS)',
columnSampleContains: 'TRIM(CAST("status" AS STRING)) != \'\'',
nullCountExpression: 'COUNT_IF("status" IS NULL)',
distinctCountExpression: 'APPROX_COUNT_DISTINCT("status")',
textLengthExpression: 'LENGTH(CAST("status" AS TEXT))',
castToText: 'CAST("status" AS VARCHAR)',
sampleValueAggregation:
'(SELECT LISTAGG(CAST(value AS VARCHAR), \'\\x1f\') FROM (SELECT status AS value FROM orders) AS relationship_profile_values)',
cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality',
randomizedCardinalityContains: 'SAMPLE ROW (100 ROWS)',
distinctValuesContains: 'SELECT DISTINCT "status"::VARCHAR AS val',
statisticsContains: null,
dimensionInput: 'TIMESTAMP_NTZ',
dimensionType: 'time',
nativeTypeInput: 'NUMBER(38,0)',
normalizedType: 'NUMBER(38,0)',
},
{
driver: 'bigquery',
table: { catalog: 'analytics-project', db: 'warehouse', name: 'orders' },
quoteInput: 'order`items',
quotedIdentifier: '`order\\`items`',
formattedTable: '`analytics-project`.`warehouse`.`orders`',
display: 'analytics-project.warehouse.orders',
invalidDisplay: 'warehouse.orders',
columnDisplayTablePartCount: 3,
limitClause: 'LIMIT 25 OFFSET 5',
topClause: '',
randomFilter: 'RAND() < 0.25',
tableSampleClause: 'TABLESAMPLE SYSTEM (25 PERCENT)',
sampleQuery: 'SELECT `id`, `status` FROM `analytics-project`.`warehouse`.`orders` ORDER BY RAND() LIMIT 5',
columnSampleContains: 'TRIM(CAST(`status` AS STRING)) != \'\'',
nullCountExpression: 'COUNTIF(`status` IS NULL)',
distinctCountExpression: 'APPROX_COUNT_DISTINCT(`status`)',
textLengthExpression: 'LENGTH(CAST(`status` AS STRING))',
castToText: 'CAST(`status` AS STRING)',
sampleValueAggregation:
'(SELECT STRING_AGG(CAST(value AS STRING), \'\\u001F\') FROM (SELECT status AS value FROM orders) AS relationship_profile_values)',
cardinalityContains: 'SELECT APPROX_COUNT_DISTINCT(val) AS cardinality',
randomizedCardinalityContains: 'ORDER BY RAND()',
distinctValuesContains: 'SELECT DISTINCT CAST(`status` AS STRING) AS val',
statisticsContains: null,
dimensionInput: 'INT64',
dimensionType: 'number',
nativeTypeInput: 'INT64',
normalizedType: 'BIGINT',
},
{
driver: 'sqlserver',
table: { catalog: 'warehouse', db: 'dbo', name: 'events' },
quoteInput: 'odd]name',
quotedIdentifier: '[odd]]name]',
formattedTable: '[warehouse].[dbo].[events]',
display: 'warehouse.dbo.events',
invalidDisplay: 'dbo.events',
columnDisplayTablePartCount: 3,
limitClause: '',
topClause: 'TOP (25)',
randomFilter: 'ABS(CHECKSUM(NEWID())) % 100 < 25',
tableSampleClause: 'TABLESAMPLE (25 PERCENT)',
sampleQuery: 'SELECT TOP 5 [id], [status] FROM [warehouse].[dbo].[events]',
columnSampleContains: 'LTRIM(RTRIM(CAST([status] AS NVARCHAR(MAX)))) != \'\'',
nullCountExpression: 'SUM(CASE WHEN [status] IS NULL THEN 1 ELSE 0 END)',
distinctCountExpression: 'COUNT(DISTINCT [status])',
textLengthExpression: 'LEN(CAST([status] AS NVARCHAR(MAX)))',
castToText: 'CAST([status] AS NVARCHAR(MAX))',
sampleValueAggregation:
'(SELECT STRING_AGG(CAST(value AS NVARCHAR(MAX)), CHAR(31)) FROM (SELECT status AS value FROM orders) AS relationship_profile_values)',
cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality',
randomizedCardinalityContains: 'ORDER BY NEWID()',
distinctValuesContains: 'SELECT TOP 20 val',
statisticsContains: null,
dimensionInput: 'datetime2',
dimensionType: 'time',
nativeTypeInput: 'uniqueidentifier',
normalizedType: 'uniqueidentifier',
},
];
describe('getDialectForDriver', () => {
it.each([
['postgres', '"public"."orders"'],
['mysql', '`public`.`orders`'],
['clickhouse', '`public`.`orders`'],
['sqlite', '"orders"'],
['snowflake', '"analytics"."public"."orders"'],
['bigquery', '`analytics`.`public`.`orders`'],
['sqlserver', '[analytics].[public].[orders]'],
] as const)('formats table names for %s', (driver, expected) => {
const dialect = getDialectForDriver(driver);
expect(
dialect.formatTableName({
catalog: driver === 'snowflake' || driver === 'bigquery' || driver === 'sqlserver' ? 'analytics' : null,
db: driver === 'sqlite' ? null : 'public',
it.each(fixtures)('returns a full KtxDialect for $driver', (fixture) => {
const dialect = getDialectForDriver(fixture.driver);
const column = dialect.quoteIdentifier('status');
expect(dialect.type).toBe(fixture.driver);
expect(dialect.quoteIdentifier(fixture.quoteInput)).toBe(fixture.quotedIdentifier);
expect(dialect.formatTableName(fixture.table)).toBe(fixture.formattedTable);
expect(dialect.formatDisplayRef(fixture.table)).toBe(fixture.display);
expect(dialect.parseDisplayRef(fixture.display)).toEqual(fixture.table);
expect(dialect.parseDisplayRef(fixture.invalidDisplay)).toBeNull();
expect(dialect.columnDisplayTablePartCount()).toBe(fixture.columnDisplayTablePartCount);
expect(dialect.getLimitOffsetClause(25, 5)).toBe(fixture.limitClause);
expect(dialect.getTopClause(25)).toBe(fixture.topClause);
expect(dialect.getRandomSampleFilter(0.25)).toBe(fixture.randomFilter);
expect(dialect.getTableSampleClause(0.25)).toBe(fixture.tableSampleClause);
expect(dialect.generateSampleQuery(fixture.formattedTable, 5, ['id', 'status'])).toBe(fixture.sampleQuery);
expect(dialect.generateColumnSampleQuery(fixture.formattedTable, 'status', 10)).toContain(
fixture.columnSampleContains,
);
expect(dialect.getNullCountExpression(column)).toBe(fixture.nullCountExpression);
expect(dialect.getDistinctCountExpression(column)).toBe(fixture.distinctCountExpression);
expect(dialect.textLengthExpression(column)).toBe(fixture.textLengthExpression);
expect(dialect.castToText(column)).toBe(fixture.castToText);
expect(dialect.getSampleValueAggregation(innerSampleSql)).toBe(fixture.sampleValueAggregation);
expect(dialect.generateCardinalitySampleQuery(fixture.formattedTable, column, 100)).toContain(
fixture.cardinalityContains,
);
expect(dialect.generateRandomizedCardinalitySampleQuery(fixture.formattedTable, column, 100)).toContain(
fixture.randomizedCardinalityContains,
);
expect(dialect.generateDistinctValuesQuery(fixture.formattedTable, column, 20)).toContain(
fixture.distinctValuesContains,
);
const statistics = dialect.generateColumnStatisticsQuery(fixture.table.db ?? '', fixture.table.name);
if (fixture.statisticsContains) {
expect(statistics).toContain(fixture.statisticsContains);
} else {
expect(statistics).toBeNull();
}
expect(dialect.mapToDimensionType(fixture.dimensionInput)).toBe(fixture.dimensionType);
expect(dialect.mapDataType(fixture.nativeTypeInput)).toBe(fixture.normalizedType);
});
it('accepts three-part ANSI display refs while keeping one-part names caller-owned', () => {
for (const driver of ['postgres', 'mysql', 'clickhouse'] as const) {
const dialect = getDialectForDriver(driver);
expect(dialect.parseDisplayRef('warehouse.public.orders')).toEqual({
catalog: 'warehouse',
db: 'public',
name: 'orders',
}),
).toBe(expected);
});
expect(dialect.parseDisplayRef('orders')).toBeNull();
}
});
it('throws with a supported-driver list for unknown drivers', () => {

View file

@ -1,22 +1,40 @@
import type { KtxSchemaDimensionType, KtxTableRef } from '../scan/types.js';
type SupportedDriver =
| 'postgres'
| 'mysql'
| 'sqlserver'
| 'snowflake'
| 'bigquery'
| 'clickhouse'
| 'sqlite';
import { KtxBigQueryDialect } from '../../connectors/bigquery/dialect.js';
import { KtxClickHouseDialect } from '../../connectors/clickhouse/dialect.js';
import { KtxMysqlDialect } from '../../connectors/mysql/dialect.js';
import { KtxPostgresDialect } from '../../connectors/postgres/dialect.js';
import { KtxSqliteDialect } from '../../connectors/sqlite/dialect.js';
import { KtxSnowflakeDialect } from '../../connectors/snowflake/dialect.js';
import { KtxSqlServerDialect } from '../../connectors/sqlserver/dialect.js';
import type { KtxConnectionDriver, KtxSchemaDimensionType, KtxTableRef } from '../scan/types.js';
import type { KtxDialectTableRef } from './dialect-helpers.js';
export interface KtxDialect {
readonly type: SupportedDriver;
readonly type: KtxConnectionDriver;
quoteIdentifier(identifier: string): string;
formatTableName(table: KtxTableRef): string;
formatTableName(table: KtxDialectTableRef): string;
formatDisplayRef(table: KtxDialectTableRef): string;
parseDisplayRef(display: string): KtxTableRef | null;
columnDisplayTablePartCount(): 1 | 2 | 3;
getLimitOffsetClause(limit: number, offset?: number): string;
getTopClause(limit: number): string;
getRandomSampleFilter(samplePct: number): string;
getTableSampleClause(samplePct: number): string;
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string;
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string;
getSampleValueAggregation(innerSql: string): string;
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string;
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string;
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string;
generateColumnStatisticsQuery(schemaName: string, tableName: string): string | null;
getNullCountExpression(column: string): string;
getDistinctCountExpression(column: string): string;
textLengthExpression(columnSql: string): string;
castToText(columnSql: string): string;
mapToDimensionType(nativeType: string): KtxSchemaDimensionType;
mapDataType(nativeType: string): string;
}
const supportedDrivers: SupportedDriver[] = [
const supportedDrivers: KtxConnectionDriver[] = [
'bigquery',
'clickhouse',
'mysql',
@ -26,71 +44,21 @@ const supportedDrivers: SupportedDriver[] = [
'sqlserver',
];
function doubleQuoted(identifier: string): string {
return `"${identifier.replace(/"/g, '""')}"`;
}
function backtickQuoted(identifier: string): string {
return `\`${identifier.replace(/`/g, '``')}\``;
}
function bigQueryQuoted(identifier: string): string {
return `\`${identifier.replace(/`/g, '\\`')}\``;
}
function bracketQuoted(identifier: string): string {
return `[${identifier.replace(/\]/g, ']]')}]`;
}
function inferDimensionType(nativeType: string): KtxSchemaDimensionType {
const normalized = nativeType.toLowerCase().trim();
if (normalized.includes('date') || normalized.includes('time')) {
return 'time';
}
if (
normalized.includes('int') ||
normalized.includes('num') ||
normalized.includes('dec') ||
normalized.includes('float') ||
normalized.includes('double') ||
normalized.includes('real')
) {
return 'number';
}
if (normalized.includes('bool') || normalized === 'bit') {
return 'boolean';
}
return 'string';
}
function formatWithParts(table: KtxTableRef, quote: (identifier: string) => string, sqlite = false): string {
const parts = sqlite ? [table.name] : [table.catalog, table.db, table.name].filter((part): part is string => !!part);
return parts.map(quote).join('.');
}
function createDialect(type: SupportedDriver, quote: (identifier: string) => string, sqlite = false): KtxDialect {
return {
type,
quoteIdentifier: quote,
formatTableName: (table) => formatWithParts(table, quote, sqlite),
mapToDimensionType: inferDimensionType,
};
}
const dialects: Record<SupportedDriver, KtxDialect> = {
postgres: createDialect('postgres', doubleQuoted),
mysql: createDialect('mysql', backtickQuoted),
clickhouse: createDialect('clickhouse', backtickQuoted),
sqlite: createDialect('sqlite', doubleQuoted, true),
snowflake: createDialect('snowflake', doubleQuoted),
bigquery: createDialect('bigquery', bigQueryQuoted),
sqlserver: createDialect('sqlserver', bracketQuoted),
const dialectFactories: Record<KtxConnectionDriver, () => KtxDialect> = {
bigquery: () => new KtxBigQueryDialect(),
clickhouse: () => new KtxClickHouseDialect(),
mysql: () => new KtxMysqlDialect(),
postgres: () => new KtxPostgresDialect(),
sqlite: () => new KtxSqliteDialect(),
snowflake: () => new KtxSnowflakeDialect(),
sqlserver: () => new KtxSqlServerDialect(),
};
export function getDialectForDriver(driver: string): KtxDialect {
const normalized = driver.toLowerCase().trim();
if (normalized in dialects) {
return dialects[normalized as SupportedDriver];
const factory = dialectFactories[normalized as KtxConnectionDriver];
if (factory) {
return factory();
}
throw new Error(`Unsupported warehouse driver "${driver}". Supported drivers: ${supportedDrivers.join(', ')}`);
}