From 95a2b5daf1429bf4fd87e22ac2cbbb3e0a1fbda4 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Sun, 24 May 2026 23:59:38 +0200 Subject: [PATCH] feat(cli): define full warehouse dialect contract --- .../cli/src/connectors/bigquery/dialect.ts | 82 +++-- .../cli/src/connectors/clickhouse/dialect.ts | 120 +++---- packages/cli/src/connectors/mysql/dialect.ts | 78 ++--- .../cli/src/connectors/postgres/dialect.ts | 65 ++-- .../cli/src/connectors/snowflake/dialect.ts | 70 ++-- packages/cli/src/connectors/sqlite/dialect.ts | 75 +++-- .../cli/src/connectors/sqlserver/dialect.ts | 77 +++-- .../context/connections/dialect-helpers.ts | 87 +++++ .../src/context/connections/dialects.test.ts | 314 +++++++++++++++++- .../cli/src/context/connections/dialects.ts | 116 +++---- 10 files changed, 705 insertions(+), 379 deletions(-) create mode 100644 packages/cli/src/context/connections/dialect-helpers.ts diff --git a/packages/cli/src/connectors/bigquery/dialect.ts b/packages/cli/src/connectors/bigquery/dialect.ts index 02d904ed..3a46f416 100644 --- a/packages/cli/src/connectors/bigquery/dialect.ts +++ b/packages/cli/src/connectors/bigquery/dialect.ts @@ -1,9 +1,17 @@ +import type { KtxDialect } from '../../context/connections/dialects.js'; +import { + columnDisplayPartCount, + formatDialectDisplayRef, + formatDialectTableName, + limitOffsetClause, + parseDialectDisplayRef, +} from '../../context/connections/dialect-helpers.js'; import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js'; type BigQueryTableNameRef = Pick & Partial>; -export class KtxBigQueryDialect { - readonly type = 'bigquery'; +export class KtxBigQueryDialect implements KtxDialect { + readonly type = 'bigquery' as const; private readonly typeMappings: Record = { TIMESTAMP: 'time', @@ -27,13 +35,19 @@ export class KtxBigQueryDialect { } formatTableName(table: BigQueryTableNameRef): string { - if (table.catalog && table.db) { - return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`; - } - if (table.db) { - return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`; - } - return this.quoteIdentifier(table.name); + return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'three-part'); + } + + formatDisplayRef(table: BigQueryTableNameRef): string { + return formatDialectDisplayRef(table, 'three-part'); + } + + parseDisplayRef(display: string): KtxTableRef | null { + return parseDialectDisplayRef(display, 'three-part'); + } + + columnDisplayTablePartCount(): 1 | 2 | 3 { + return columnDisplayPartCount('three-part'); } mapDataType(nativeType: string): string { @@ -121,7 +135,11 @@ export class KtxBigQueryDialect { } getLimitOffsetClause(limit: number, offset?: number): string { - return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`; + return limitOffsetClause(limit, offset); + } + + getTopClause(_limit: number): string { + return ''; } getNullCountExpression(column: string): string { @@ -132,6 +150,18 @@ export class KtxBigQueryDialect { return `APPROX_COUNT_DISTINCT(${column})`; } + textLengthExpression(columnSql: string): string { + return `LENGTH(CAST(${columnSql} AS STRING))`; + } + + castToText(columnSql: string): string { + return `CAST(${columnSql} AS STRING)`; + } + + getSampleValueAggregation(innerSql: string): string { + return `(SELECT STRING_AGG(CAST(value AS STRING), '\\u001F') FROM (${innerSql}) AS relationship_profile_values)`; + } + generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { return ` WITH sampled AS ( @@ -172,36 +202,4 @@ export class KtxBigQueryDialect { FROM sampled `; } - - getTimeTruncExpression( - column: string, - granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', - timezone?: string, - ): string { - const bigQueryGranularity = granularity.toUpperCase(); - if (timezone) { - return `DATE_TRUNC(DATETIME(${column}, '${timezone}'), ${bigQueryGranularity})`; - } - return `DATE_TRUNC(${column}, ${bigQueryGranularity})`; - } - - getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string { - const col = timezone ? `DATETIME(${column}, '${timezone}')` : column; - const [rawAmount, rawUnit] = interval.split(' '); - let diffUnit = rawUnit!.toUpperCase(); - let amount = Number(rawAmount); - let addUnit = diffUnit; - if (diffUnit === 'WEEK') { - diffUnit = 'DAY'; - amount = amount * 7; - addUnit = 'DAY'; - } - const originExpr = origin ? `TIMESTAMP '${origin}'` : `TIMESTAMP '1970-01-01'`; - return `TIMESTAMP_ADD(${originExpr}, INTERVAL CAST(FLOOR(TIMESTAMP_DIFF(${col}, ${originExpr}, ${diffUnit}) / ${amount}) * ${amount} AS INT64) ${addUnit})`; - } - - parseIntervalToSql(interval: string): string { - const [amount, unit] = interval.split(' '); - return `INTERVAL ${amount} ${unit!.toUpperCase()}`; - } } diff --git a/packages/cli/src/connectors/clickhouse/dialect.ts b/packages/cli/src/connectors/clickhouse/dialect.ts index 48452ea6..5a43c8fa 100644 --- a/packages/cli/src/connectors/clickhouse/dialect.ts +++ b/packages/cli/src/connectors/clickhouse/dialect.ts @@ -1,9 +1,17 @@ +import type { KtxDialect } from '../../context/connections/dialects.js'; +import { + columnDisplayPartCount, + formatDialectDisplayRef, + formatDialectTableName, + limitOffsetClause, + parseDialectDisplayRef, +} from '../../context/connections/dialect-helpers.js'; import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js'; type ClickHouseTableNameRef = Pick & Partial>; -export class KtxClickHouseDialect { - readonly type = 'clickhouse'; +export class KtxClickHouseDialect implements KtxDialect { + readonly type = 'clickhouse' as const; private readonly typeMappings: Record = { date: 'time', @@ -45,9 +53,19 @@ export class KtxClickHouseDialect { } formatTableName(table: ClickHouseTableNameRef): string { - return table.db - ? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}` - : this.quoteIdentifier(table.name); + return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'ansi'); + } + + formatDisplayRef(table: ClickHouseTableNameRef): string { + return formatDialectDisplayRef(table, 'ansi'); + } + + parseDisplayRef(display: string): KtxTableRef | null { + return parseDialectDisplayRef(display, 'ansi'); + } + + columnDisplayTablePartCount(): 1 | 2 | 3 { + return columnDisplayPartCount('ansi'); } mapDataType(nativeType: string): string { @@ -132,7 +150,11 @@ export class KtxClickHouseDialect { } getLimitOffsetClause(limit: number, offset?: number): string { - return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`; + return limitOffsetClause(limit, offset); + } + + getTopClause(_limit: number): string { + return ''; } getNullCountExpression(column: string): string { @@ -143,6 +165,18 @@ export class KtxClickHouseDialect { return `COUNT(DISTINCT ${column})`; } + textLengthExpression(columnSql: string): string { + return `length(toString(${columnSql}))`; + } + + castToText(columnSql: string): string { + return `toString(${columnSql})`; + } + + getSampleValueAggregation(innerSql: string): string { + return `(SELECT arrayStringConcat(groupArray(toString(value)), '\\x1F') FROM (${innerSql}) AS relationship_profile_values)`; + } + generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { return ` SELECT COUNT(DISTINCT val) AS cardinality @@ -181,46 +215,6 @@ export class KtxClickHouseDialect { ) `; } - - getTimeTruncExpression( - column: string, - granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', - timezone?: string, - ): string { - const tz = timezone ? `, '${timezone}'` : ''; - switch (granularity) { - case 'day': - return `toStartOfDay(${column}${tz})`; - case 'week': - return `toStartOfWeek(${column}, 1${tz})`; - case 'month': - return `toStartOfMonth(${column}${tz})`; - case 'quarter': - return `toStartOfQuarter(${column}${tz})`; - case 'year': - return `toStartOfYear(${column}${tz})`; - } - } - - getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string { - const col = timezone ? `toTimezone(${column}, '${timezone}')` : column; - const [rawAmount, rawUnit] = interval.split(' '); - const amount = Number(rawAmount); - const unit = rawUnit!.toLowerCase(); - const originExpr = origin ? `toDateTime('${origin}')` : "toDateTime('1970-01-01')"; - const calendarUnit = this.toClickHouseDateDiffUnit(unit); - if (calendarUnit) { - return `dateAdd(${calendarUnit}, intDiv(dateDiff(${calendarUnit}, ${originExpr}, ${col}), ${amount}) * ${amount}, ${originExpr})`; - } - const seconds = this.intervalToSeconds(amount, unit); - return `addSeconds(${originExpr}, intDiv(toUInt64(dateDiff('second', ${originExpr}, ${col})), ${seconds}) * ${seconds})`; - } - - parseIntervalToSql(interval: string): string { - const [amount, unit] = interval.split(' '); - return `INTERVAL ${amount} ${unit!.toUpperCase()}`; - } - private unwrapClickHouseType(value: string, wrapper: string): string { const prefix = `${wrapper}(`; return value.startsWith(prefix) && value.endsWith(')') ? value.slice(prefix.length, -1) : value; @@ -242,38 +236,4 @@ export class KtxClickHouseDialect { return 'String'; } - private toClickHouseDateDiffUnit(unit: string): string | null { - if (unit === 'month' || unit === 'months') { - return "'month'"; - } - if (unit === 'quarter' || unit === 'quarters') { - return "'quarter'"; - } - if (unit === 'year' || unit === 'years') { - return "'year'"; - } - return null; - } - - private intervalToSeconds(amount: number, unit: string): number { - switch (unit) { - case 'second': - case 'seconds': - return amount; - case 'minute': - case 'minutes': - return amount * 60; - case 'hour': - case 'hours': - return amount * 3600; - case 'day': - case 'days': - return amount * 86400; - case 'week': - case 'weeks': - return amount * 604800; - default: - return amount * 86400; - } - } } diff --git a/packages/cli/src/connectors/mysql/dialect.ts b/packages/cli/src/connectors/mysql/dialect.ts index d61db36c..6e2f1e63 100644 --- a/packages/cli/src/connectors/mysql/dialect.ts +++ b/packages/cli/src/connectors/mysql/dialect.ts @@ -1,9 +1,17 @@ +import type { KtxDialect } from '../../context/connections/dialects.js'; +import { + columnDisplayPartCount, + formatDialectDisplayRef, + formatDialectTableName, + limitOffsetClause, + parseDialectDisplayRef, +} from '../../context/connections/dialect-helpers.js'; import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js'; type MysqlTableNameRef = Pick & Partial>; -export class KtxMysqlDialect { - readonly type = 'mysql'; +export class KtxMysqlDialect implements KtxDialect { + readonly type = 'mysql' as const; private readonly typeMappings: Record = { datetime: 'time', @@ -41,9 +49,19 @@ export class KtxMysqlDialect { } formatTableName(table: MysqlTableNameRef): string { - return table.db - ? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}` - : this.quoteIdentifier(table.name); + return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'ansi'); + } + + formatDisplayRef(table: MysqlTableNameRef): string { + return formatDialectDisplayRef(table, 'ansi'); + } + + parseDisplayRef(display: string): KtxTableRef | null { + return parseDialectDisplayRef(display, 'ansi'); + } + + columnDisplayTablePartCount(): 1 | 2 | 3 { + return columnDisplayPartCount('ansi'); } mapDataType(nativeType: string): string { @@ -118,7 +136,11 @@ export class KtxMysqlDialect { } getLimitOffsetClause(limit: number, offset?: number): string { - return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`; + return limitOffsetClause(limit, offset); + } + + getTopClause(_limit: number): string { + return ''; } getNullCountExpression(column: string): string { @@ -129,6 +151,18 @@ export class KtxMysqlDialect { return `COUNT(DISTINCT ${column})`; } + textLengthExpression(columnSql: string): string { + return `CHAR_LENGTH(CAST(${columnSql} AS CHAR))`; + } + + castToText(columnSql: string): string { + return `CAST(${columnSql} AS CHAR)`; + } + + getSampleValueAggregation(innerSql: string): string { + return `(SELECT GROUP_CONCAT(CAST(value AS CHAR) SEPARATOR CHAR(31)) FROM (${innerSql}) AS relationship_profile_values)`; + } + generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { return ` SELECT COUNT(DISTINCT val) AS cardinality @@ -167,36 +201,4 @@ export class KtxMysqlDialect { ) AS sampled `; } - - getTimeTruncExpression( - column: string, - granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', - timezone?: string, - ): string { - const col = timezone ? `CONVERT_TZ(${column}, '+00:00', '${timezone}')` : column; - switch (granularity) { - case 'day': - return `DATE(${col})`; - case 'week': - return `DATE(${col} - INTERVAL WEEKDAY(${col}) DAY)`; - case 'month': - return `DATE_FORMAT(${col}, '%Y-%m-01')`; - case 'quarter': - return `MAKEDATE(YEAR(${col}), 1) + INTERVAL (QUARTER(${col}) - 1) QUARTER`; - case 'year': - return `DATE_FORMAT(${col}, '%Y-01-01')`; - } - } - - getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string { - const col = timezone ? `CONVERT_TZ(${column}, '+00:00', '${timezone}')` : column; - const [amount, unit] = interval.split(' '); - const originExpr = origin ? `'${origin}'` : `'1970-01-01'`; - return `DATE_ADD(${originExpr}, INTERVAL FLOOR(TIMESTAMPDIFF(${unit!.toUpperCase()}, ${originExpr}, ${col}) / ${amount}) * ${amount} ${unit!.toUpperCase()})`; - } - - parseIntervalToSql(interval: string): string { - const [amount, unit] = interval.split(' '); - return `INTERVAL ${amount} ${unit!.toUpperCase()}`; - } } diff --git a/packages/cli/src/connectors/postgres/dialect.ts b/packages/cli/src/connectors/postgres/dialect.ts index ea0590b8..c3d6b77a 100644 --- a/packages/cli/src/connectors/postgres/dialect.ts +++ b/packages/cli/src/connectors/postgres/dialect.ts @@ -1,9 +1,17 @@ +import type { KtxDialect } from '../../context/connections/dialects.js'; +import { + columnDisplayPartCount, + formatDialectDisplayRef, + formatDialectTableName, + limitOffsetClause, + parseDialectDisplayRef, +} from '../../context/connections/dialect-helpers.js'; import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js'; type PostgresTableNameRef = Pick & Partial>; -export class KtxPostgresDialect { - readonly type = 'postgresql'; +export class KtxPostgresDialect implements KtxDialect { + readonly type = 'postgres' as const; private readonly typeMappings: Record = { timestamp: 'time', @@ -45,9 +53,19 @@ export class KtxPostgresDialect { } formatTableName(table: PostgresTableNameRef): string { - return table.db - ? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}` - : this.quoteIdentifier(table.name); + return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'ansi'); + } + + formatDisplayRef(table: PostgresTableNameRef): string { + return formatDialectDisplayRef(table, 'ansi'); + } + + parseDisplayRef(display: string): KtxTableRef | null { + return parseDialectDisplayRef(display, 'ansi'); + } + + columnDisplayTablePartCount(): 1 | 2 | 3 { + return columnDisplayPartCount('ansi'); } mapDataType(nativeType: string): string { @@ -126,7 +144,11 @@ export class KtxPostgresDialect { } getLimitOffsetClause(limit: number, offset?: number): string { - return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`; + return limitOffsetClause(limit, offset); + } + + getTopClause(_limit: number): string { + return ''; } getNullCountExpression(column: string): string { @@ -137,6 +159,18 @@ export class KtxPostgresDialect { return `COUNT(DISTINCT ${column})`; } + textLengthExpression(columnSql: string): string { + return `LENGTH(CAST(${columnSql} AS TEXT))`; + } + + castToText(columnSql: string): string { + return `CAST(${columnSql} AS TEXT)`; + } + + getSampleValueAggregation(innerSql: string): string { + return `(SELECT STRING_AGG(CAST(value AS TEXT), CHR(31)) FROM (${innerSql}) AS relationship_profile_values)`; + } + generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { return ` WITH sampled AS ( @@ -191,23 +225,4 @@ export class KtxPostgresDialect { FROM sampled `; } - - getTimeTruncExpression( - column: string, - granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', - timezone?: string, - ): string { - const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column; - return `DATE_TRUNC('${granularity}', ${col})`; - } - - getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string { - const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column; - const originExpr = origin ? `TIMESTAMP '${origin.replace(/'/g, "''")}'` : "TIMESTAMP '1970-01-01'"; - return `${originExpr} + FLOOR(EXTRACT(EPOCH FROM (${col} - ${originExpr})) / EXTRACT(EPOCH FROM INTERVAL '${interval.replace(/'/g, "''")}')) * INTERVAL '${interval.replace(/'/g, "''")}'`; - } - - parseIntervalToSql(interval: string): string { - return `INTERVAL '${interval.replace(/'/g, "''")}'`; - } } diff --git a/packages/cli/src/connectors/snowflake/dialect.ts b/packages/cli/src/connectors/snowflake/dialect.ts index db508134..322aeeb7 100644 --- a/packages/cli/src/connectors/snowflake/dialect.ts +++ b/packages/cli/src/connectors/snowflake/dialect.ts @@ -1,9 +1,17 @@ +import type { KtxDialect } from '../../context/connections/dialects.js'; +import { + columnDisplayPartCount, + formatDialectDisplayRef, + formatDialectTableName, + limitOffsetClause, + parseDialectDisplayRef, +} from '../../context/connections/dialect-helpers.js'; import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js'; type SnowflakeTableNameRef = Pick & Partial>; -export class KtxSnowflakeDialect { - readonly type = 'snowflake'; +export class KtxSnowflakeDialect implements KtxDialect { + readonly type = 'snowflake' as const; private readonly typeMappings: Record = { TIMESTAMP_NTZ: 'time', @@ -45,13 +53,19 @@ export class KtxSnowflakeDialect { } formatTableName(table: SnowflakeTableNameRef): string { - if (table.catalog && table.db) { - return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`; - } - if (table.db) { - return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`; - } - return this.quoteIdentifier(table.name); + return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'three-part'); + } + + formatDisplayRef(table: SnowflakeTableNameRef): string { + return formatDialectDisplayRef(table, 'three-part'); + } + + parseDisplayRef(display: string): KtxTableRef | null { + return parseDialectDisplayRef(display, 'three-part'); + } + + columnDisplayTablePartCount(): 1 | 2 | 3 { + return columnDisplayPartCount('three-part'); } mapDataType(nativeType: string): string { @@ -115,7 +129,11 @@ export class KtxSnowflakeDialect { } getLimitOffsetClause(limit: number, offset?: number): string { - return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`; + return limitOffsetClause(limit, offset); + } + + getTopClause(_limit: number): string { + return ''; } getNullCountExpression(column: string): string { @@ -126,6 +144,18 @@ export class KtxSnowflakeDialect { return `APPROX_COUNT_DISTINCT(${column})`; } + textLengthExpression(columnSql: string): string { + return `LENGTH(CAST(${columnSql} AS TEXT))`; + } + + castToText(columnSql: string): string { + return `CAST(${columnSql} AS VARCHAR)`; + } + + getSampleValueAggregation(innerSql: string): string { + return `(SELECT LISTAGG(CAST(value AS VARCHAR), '\\x1f') FROM (${innerSql}) AS relationship_profile_values)`; + } + generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { return ` WITH sampled AS ( @@ -164,24 +194,4 @@ export class KtxSnowflakeDialect { FROM sampled `; } - - getTimeTruncExpression( - column: string, - granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', - timezone?: string, - ): string { - const target = timezone ? `CONVERT_TIMEZONE('UTC', '${timezone}', ${column})` : column; - return `DATE_TRUNC('${granularity}', ${target})`; - } - - getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string { - const target = timezone ? `CONVERT_TIMEZONE('UTC', '${timezone}', ${column})` : column; - const [amount, unit] = interval.split(' '); - const originExpr = origin ? `'${origin}'::TIMESTAMP` : `'1970-01-01'::TIMESTAMP`; - return `DATEADD(${unit}, FLOOR(DATEDIFF(${unit}, ${originExpr}, ${target}) / ${amount}) * ${amount}, ${originExpr})`; - } - - parseIntervalToSql(interval: string): string { - return `INTERVAL '${interval}'`; - } } diff --git a/packages/cli/src/connectors/sqlite/dialect.ts b/packages/cli/src/connectors/sqlite/dialect.ts index b5771b62..fa626cc3 100644 --- a/packages/cli/src/connectors/sqlite/dialect.ts +++ b/packages/cli/src/connectors/sqlite/dialect.ts @@ -1,9 +1,17 @@ +import type { KtxDialect } from '../../context/connections/dialects.js'; +import { + columnDisplayPartCount, + formatDialectDisplayRef, + formatDialectTableName, + limitOffsetClause, + parseDialectDisplayRef, +} from '../../context/connections/dialect-helpers.js'; import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js'; type SqliteTableNameRef = Pick & Partial>; -export class KtxSqliteDialect { - readonly type = 'sqlite'; +export class KtxSqliteDialect implements KtxDialect { + readonly type = 'sqlite' as const; private readonly typeMappings: Record = { DATETIME: 'time', @@ -29,7 +37,19 @@ export class KtxSqliteDialect { } formatTableName(table: SqliteTableNameRef): string { - return this.quoteIdentifier(table.name); + return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'sqlite'); + } + + formatDisplayRef(table: SqliteTableNameRef): string { + return formatDialectDisplayRef(table, 'sqlite'); + } + + parseDisplayRef(display: string): KtxTableRef | null { + return parseDialectDisplayRef(display, 'sqlite'); + } + + columnDisplayTablePartCount(): 1 | 2 | 3 { + return columnDisplayPartCount('sqlite'); } mapDataType(nativeType: string): string { @@ -92,7 +112,11 @@ export class KtxSqliteDialect { } getLimitOffsetClause(limit: number, offset?: number): string { - return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`; + return limitOffsetClause(limit, offset); + } + + getTopClause(_limit: number): string { + return ''; } getNullCountExpression(column: string): string { @@ -103,6 +127,18 @@ export class KtxSqliteDialect { return `COUNT(DISTINCT ${column})`; } + textLengthExpression(columnSql: string): string { + return `LENGTH(CAST(${columnSql} AS TEXT))`; + } + + castToText(columnSql: string): string { + return `CAST(${columnSql} AS TEXT)`; + } + + getSampleValueAggregation(innerSql: string): string { + return `(SELECT GROUP_CONCAT(CAST(value AS TEXT), char(31)) FROM (${innerSql}) AS relationship_profile_values)`; + } + generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { return ` WITH sampled AS ( @@ -143,35 +179,4 @@ export class KtxSqliteDialect { FROM sampled `; } - - getTimeTruncExpression( - column: string, - granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', - _timezone?: string, - ): string { - switch (granularity) { - case 'day': - return `DATE(${column})`; - case 'week': - return `DATE(${column}, 'weekday 0', '-6 days')`; - case 'month': - return `DATE(${column}, 'start of month')`; - case 'quarter': - return `DATE(${column}, 'start of month', '-' || ((CAST(STRFTIME('%m', ${column}) AS INTEGER) - 1) % 3) || ' months')`; - case 'year': - return `DATE(${column}, 'start of year')`; - } - } - - getCustomTimeTruncExpression(column: string, interval: string, origin?: string, _timezone?: string): string { - const [amount, unit] = interval.split(' '); - const originExpr = origin ? `julianday('${origin}')` : `julianday('1970-01-01')`; - const unitDays = unit === 'day' ? 1 : unit === 'week' ? 7 : 30; - const intervalDays = Number(amount) * unitDays; - return `DATE(julianday('1970-01-01') + (CAST((julianday(${column}) - ${originExpr}) / ${intervalDays} AS INTEGER) * ${intervalDays}))`; - } - - parseIntervalToSql(interval: string): string { - return `'${interval}'`; - } } diff --git a/packages/cli/src/connectors/sqlserver/dialect.ts b/packages/cli/src/connectors/sqlserver/dialect.ts index 8444317d..00b04fe8 100644 --- a/packages/cli/src/connectors/sqlserver/dialect.ts +++ b/packages/cli/src/connectors/sqlserver/dialect.ts @@ -1,9 +1,17 @@ +import type { KtxDialect } from '../../context/connections/dialects.js'; +import { + columnDisplayPartCount, + formatDialectDisplayRef, + formatDialectTableName, + parseDialectDisplayRef, + safeSqlLimit, +} from '../../context/connections/dialect-helpers.js'; import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js'; type SqlServerTableNameRef = Pick & Partial>; -export class KtxSqlServerDialect { - readonly type = 'sqlserver'; +export class KtxSqlServerDialect implements KtxDialect { + readonly type = 'sqlserver' as const; private readonly typeMappings: Record = { datetime: 'time', @@ -39,9 +47,19 @@ export class KtxSqlServerDialect { } formatTableName(table: SqlServerTableNameRef): string { - return table.db - ? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}` - : this.quoteIdentifier(table.name); + return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'three-part'); + } + + formatDisplayRef(table: SqlServerTableNameRef): string { + return formatDialectDisplayRef(table, 'three-part'); + } + + parseDisplayRef(display: string): KtxTableRef | null { + return parseDialectDisplayRef(display, 'three-part'); + } + + columnDisplayTablePartCount(): 1 | 2 | 3 { + return columnDisplayPartCount('three-part'); } mapDataType(nativeType: string): string { @@ -111,12 +129,12 @@ export class KtxSqlServerDialect { return `TABLESAMPLE (${samplePct * 100} PERCENT)`; } - getLimitOffsetClause(limit: number, offset?: number): string { - return offset !== undefined && offset > 0 ? `OFFSET ${offset} ROWS FETCH NEXT ${limit} ROWS ONLY` : ''; + getLimitOffsetClause(_limit: number, _offset?: number): string { + return ''; } getTopClause(limit: number): string { - return `TOP ${limit}`; + return `TOP (${safeSqlLimit(limit)})`; } getNullCountExpression(column: string): string { @@ -127,6 +145,18 @@ export class KtxSqlServerDialect { return `COUNT(DISTINCT ${column})`; } + textLengthExpression(columnSql: string): string { + return `LEN(CAST(${columnSql} AS NVARCHAR(MAX)))`; + } + + castToText(columnSql: string): string { + return `CAST(${columnSql} AS NVARCHAR(MAX))`; + } + + getSampleValueAggregation(innerSql: string): string { + return `(SELECT STRING_AGG(CAST(value AS NVARCHAR(MAX)), CHAR(31)) FROM (${innerSql}) AS relationship_profile_values)`; + } + generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string { return ` WITH sampled AS ( @@ -167,35 +197,4 @@ export class KtxSqlServerDialect { FROM sampled `; } - - getTimeTruncExpression( - column: string, - granularity: 'day' | 'week' | 'month' | 'quarter' | 'year', - timezone?: string, - ): string { - const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column; - switch (granularity) { - case 'day': - return `CAST(${col} AS DATE)`; - case 'week': - return `DATEADD(WEEK, DATEDIFF(WEEK, 0, ${col}), 0)`; - case 'month': - return `DATEFROMPARTS(YEAR(${col}), MONTH(${col}), 1)`; - case 'quarter': - return `DATEFROMPARTS(YEAR(${col}), (DATEPART(QUARTER, ${col}) - 1) * 3 + 1, 1)`; - case 'year': - return `DATEFROMPARTS(YEAR(${col}), 1, 1)`; - } - } - - getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string { - const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column; - const [amount, unit] = interval.split(' '); - const originExpr = origin ? `'${origin}'` : `'1970-01-01'`; - return `DATEADD(${unit}, (DATEDIFF(${unit}, ${originExpr}, ${col}) / ${amount}) * ${amount}, ${originExpr})`; - } - - parseIntervalToSql(interval: string): string { - return `'${interval}'`; - } } diff --git a/packages/cli/src/context/connections/dialect-helpers.ts b/packages/cli/src/context/connections/dialect-helpers.ts new file mode 100644 index 00000000..04ed569b --- /dev/null +++ b/packages/cli/src/context/connections/dialect-helpers.ts @@ -0,0 +1,87 @@ +import type { KtxTableRef } from '../scan/types.js'; + +export type KtxDialectIdentifierShape = 'ansi' | 'sqlite' | 'three-part'; + +export type KtxDialectTableRef = Pick & Partial>; + +export function safeSqlLimit(limit: number): number { + return Math.max(1, Math.floor(limit)); +} + +function safeSqlOffset(offset: number | undefined): number | null { + if (offset === undefined) { + return null; + } + const normalized = Math.floor(offset); + return normalized > 0 ? normalized : null; +} + +function cleanIdentifierPart(part: string): string { + return part.trim().replace(/^["'`\[]|["'`\]]$/g, ''); +} + +function splitDisplay(display: string): string[] { + return display.trim().split('.').map(cleanIdentifierPart).filter(Boolean); +} + +function tableParts(table: KtxDialectTableRef, shape: KtxDialectIdentifierShape): string[] { + if (shape === 'sqlite') { + return [table.name]; + } + return [table.catalog ?? null, table.db ?? null, table.name].filter((part): part is string => Boolean(part)); +} + +function acceptedDisplayPartCounts(shape: KtxDialectIdentifierShape): readonly number[] { + if (shape === 'sqlite') { + return [1]; + } + if (shape === 'three-part') { + return [3]; + } + return [2, 3]; +} + +export function formatDialectTableName( + table: KtxDialectTableRef, + quoteIdentifier: (identifier: string) => string, + shape: KtxDialectIdentifierShape, +): string { + return tableParts(table, shape).map(quoteIdentifier).join('.'); +} + +export function formatDialectDisplayRef(table: KtxDialectTableRef, shape: KtxDialectIdentifierShape): string { + return tableParts(table, shape).join('.'); +} + +export function parseDialectDisplayRef(display: string, shape: KtxDialectIdentifierShape): KtxTableRef | null { + const parts = splitDisplay(display); + if (!acceptedDisplayPartCounts(shape).includes(parts.length)) { + return null; + } + if (parts.length === 1) { + return { catalog: null, db: null, name: parts[0]! }; + } + if (parts.length === 2) { + return { catalog: null, db: parts[0]!, name: parts[1]! }; + } + if (parts.length === 3) { + return { catalog: parts[0]!, db: parts[1]!, name: parts[2]! }; + } + return null; +} + +export function columnDisplayPartCount(shape: KtxDialectIdentifierShape): 1 | 2 | 3 { + if (shape === 'sqlite') { + return 1; + } + if (shape === 'three-part') { + return 3; + } + return 2; +} + +export function limitOffsetClause(limit: number, offset?: number): string { + const safeLimit = safeSqlLimit(limit); + const safeOffset = safeSqlOffset(offset); + return safeOffset === null ? `LIMIT ${safeLimit}` : `LIMIT ${safeLimit} OFFSET ${safeOffset}`; +} diff --git a/packages/cli/src/context/connections/dialects.test.ts b/packages/cli/src/context/connections/dialects.test.ts index d4f77997..414a6e8f 100644 --- a/packages/cli/src/context/connections/dialects.test.ts +++ b/packages/cli/src/context/connections/dialects.test.ts @@ -1,24 +1,306 @@ import { describe, expect, it } from 'vitest'; import { getDialectForDriver } from './dialects.js'; +import type { KtxConnectionDriver, KtxTableRef } from '../scan/types.js'; + +interface DialectFixture { + driver: KtxConnectionDriver; + table: KtxTableRef; + quoteInput: string; + quotedIdentifier: string; + formattedTable: string; + display: string; + invalidDisplay: string; + columnDisplayTablePartCount: 1 | 2 | 3; + limitClause: string; + topClause: string; + randomFilter: string; + tableSampleClause: string; + sampleQuery: string; + columnSampleContains: string; + nullCountExpression: string; + distinctCountExpression: string; + textLengthExpression: string; + castToText: string; + sampleValueAggregation: string; + cardinalityContains: string; + randomizedCardinalityContains: string; + distinctValuesContains: string; + statisticsContains: string | null; + dimensionInput: string; + dimensionType: 'time' | 'string' | 'number' | 'boolean'; + nativeTypeInput: string; + normalizedType: string; +} + +const innerSampleSql = 'SELECT status AS value FROM orders'; + +const fixtures: DialectFixture[] = [ + { + driver: 'postgres', + table: { catalog: null, db: 'public', name: 'orders' }, + quoteInput: 'order"items', + quotedIdentifier: '"order""items"', + formattedTable: '"public"."orders"', + display: 'public.orders', + invalidDisplay: 'orders', + columnDisplayTablePartCount: 2, + limitClause: 'LIMIT 25 OFFSET 5', + topClause: '', + randomFilter: 'RANDOM() < 0.25', + tableSampleClause: 'TABLESAMPLE SYSTEM (25)', + sampleQuery: 'SELECT "id", "status" FROM "public"."orders" LIMIT 5', + columnSampleContains: 'TRIM(CAST("status" AS TEXT)) != \'\'', + nullCountExpression: 'COUNT(*) FILTER (WHERE "status" IS NULL)', + distinctCountExpression: 'COUNT(DISTINCT "status")', + textLengthExpression: 'LENGTH(CAST("status" AS TEXT))', + castToText: 'CAST("status" AS TEXT)', + sampleValueAggregation: + '(SELECT STRING_AGG(CAST(value AS TEXT), CHR(31)) FROM (SELECT status AS value FROM orders) AS relationship_profile_values)', + cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality', + randomizedCardinalityContains: 'ORDER BY RANDOM()', + distinctValuesContains: 'SELECT DISTINCT "status"::text AS val', + statisticsContains: 'FROM pg_stats s', + dimensionInput: 'timestamp with time zone', + dimensionType: 'time', + nativeTypeInput: 'numeric(12,2)', + normalizedType: 'numeric(12,2)', + }, + { + driver: 'mysql', + table: { catalog: null, db: 'analytics', name: 'orders' }, + quoteInput: 'order`items', + quotedIdentifier: '`order``items`', + formattedTable: '`analytics`.`orders`', + display: 'analytics.orders', + invalidDisplay: 'orders', + columnDisplayTablePartCount: 2, + limitClause: 'LIMIT 25 OFFSET 5', + topClause: '', + randomFilter: 'RAND() < 0.25', + tableSampleClause: '', + sampleQuery: 'SELECT `id`, `status` FROM `analytics`.`orders` LIMIT 5', + columnSampleContains: 'TRIM(CAST(`status` AS CHAR)) != \'\'', + nullCountExpression: 'SUM(CASE WHEN `status` IS NULL THEN 1 ELSE 0 END)', + distinctCountExpression: 'COUNT(DISTINCT `status`)', + textLengthExpression: 'CHAR_LENGTH(CAST(`status` AS CHAR))', + castToText: 'CAST(`status` AS CHAR)', + sampleValueAggregation: + '(SELECT GROUP_CONCAT(CAST(value AS CHAR) SEPARATOR CHAR(31)) FROM (SELECT status AS value FROM orders) AS relationship_profile_values)', + cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality', + randomizedCardinalityContains: 'ORDER BY RAND()', + distinctValuesContains: 'SELECT DISTINCT CAST(`status` AS CHAR) AS val', + statisticsContains: null, + dimensionInput: 'tinyint(1)', + dimensionType: 'boolean', + nativeTypeInput: 'varchar(255)', + normalizedType: 'varchar(255)', + }, + { + driver: 'clickhouse', + table: { catalog: null, db: 'analytics', name: 'events' }, + quoteInput: 'order`items', + quotedIdentifier: '`order``items`', + formattedTable: '`analytics`.`events`', + display: 'analytics.events', + invalidDisplay: 'events', + columnDisplayTablePartCount: 2, + limitClause: 'LIMIT 25 OFFSET 5', + topClause: '', + randomFilter: 'rand() / 4294967295.0 < 0.25', + tableSampleClause: '', + sampleQuery: 'SELECT `id`, `status` FROM `analytics`.`events` LIMIT 5', + columnSampleContains: 'trim(toString(`status`)) != \'\'', + nullCountExpression: 'countIf(`status` IS NULL)', + distinctCountExpression: 'COUNT(DISTINCT `status`)', + textLengthExpression: 'length(toString(`status`))', + castToText: 'toString(`status`)', + sampleValueAggregation: + '(SELECT arrayStringConcat(groupArray(toString(value)), \'\\x1F\') FROM (SELECT status AS value FROM orders) AS relationship_profile_values)', + cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality', + randomizedCardinalityContains: 'ORDER BY rand()', + distinctValuesContains: 'SELECT DISTINCT toString(`status`) AS val', + statisticsContains: null, + dimensionInput: 'Nullable(DateTime64(3))', + dimensionType: 'time', + nativeTypeInput: 'LowCardinality(String)', + normalizedType: 'LowCardinality(String)', + }, + { + driver: 'sqlite', + table: { catalog: null, db: null, name: 'orders' }, + quoteInput: 'order"items', + quotedIdentifier: '"order""items"', + formattedTable: '"orders"', + display: 'orders', + invalidDisplay: 'public.orders', + columnDisplayTablePartCount: 1, + limitClause: 'LIMIT 25 OFFSET 5', + topClause: '', + randomFilter: '(RANDOM() % 100) < 25', + tableSampleClause: '', + sampleQuery: 'SELECT "id", "status" FROM "orders" LIMIT 5', + columnSampleContains: 'TRIM(CAST("status" AS TEXT)) != \'\'', + nullCountExpression: 'SUM(CASE WHEN "status" IS NULL THEN 1 ELSE 0 END)', + distinctCountExpression: 'COUNT(DISTINCT "status")', + textLengthExpression: 'LENGTH(CAST("status" AS TEXT))', + castToText: 'CAST("status" AS TEXT)', + sampleValueAggregation: + '(SELECT GROUP_CONCAT(CAST(value AS TEXT), char(31)) FROM (SELECT status AS value FROM orders) AS relationship_profile_values)', + cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality', + randomizedCardinalityContains: 'ORDER BY RANDOM()', + distinctValuesContains: 'SELECT DISTINCT CAST("status" AS TEXT) AS val', + statisticsContains: null, + dimensionInput: 'INTEGER', + dimensionType: 'number', + nativeTypeInput: 'VARCHAR(255)', + normalizedType: 'VARCHAR(255)', + }, + { + driver: 'snowflake', + table: { catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' }, + quoteInput: 'order"items', + quotedIdentifier: '"order""items"', + formattedTable: '"ANALYTICS"."PUBLIC"."ORDERS"', + display: 'ANALYTICS.PUBLIC.ORDERS', + invalidDisplay: 'PUBLIC.ORDERS', + columnDisplayTablePartCount: 3, + limitClause: 'LIMIT 25 OFFSET 5', + topClause: '', + randomFilter: 'UNIFORM(0::FLOAT, 1::FLOAT, RANDOM()) < 0.25', + tableSampleClause: 'SAMPLE (25)', + sampleQuery: 'SELECT "id", "status" FROM "ANALYTICS"."PUBLIC"."ORDERS" SAMPLE ROW (5 ROWS)', + columnSampleContains: 'TRIM(CAST("status" AS STRING)) != \'\'', + nullCountExpression: 'COUNT_IF("status" IS NULL)', + distinctCountExpression: 'APPROX_COUNT_DISTINCT("status")', + textLengthExpression: 'LENGTH(CAST("status" AS TEXT))', + castToText: 'CAST("status" AS VARCHAR)', + sampleValueAggregation: + '(SELECT LISTAGG(CAST(value AS VARCHAR), \'\\x1f\') FROM (SELECT status AS value FROM orders) AS relationship_profile_values)', + cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality', + randomizedCardinalityContains: 'SAMPLE ROW (100 ROWS)', + distinctValuesContains: 'SELECT DISTINCT "status"::VARCHAR AS val', + statisticsContains: null, + dimensionInput: 'TIMESTAMP_NTZ', + dimensionType: 'time', + nativeTypeInput: 'NUMBER(38,0)', + normalizedType: 'NUMBER(38,0)', + }, + { + driver: 'bigquery', + table: { catalog: 'analytics-project', db: 'warehouse', name: 'orders' }, + quoteInput: 'order`items', + quotedIdentifier: '`order\\`items`', + formattedTable: '`analytics-project`.`warehouse`.`orders`', + display: 'analytics-project.warehouse.orders', + invalidDisplay: 'warehouse.orders', + columnDisplayTablePartCount: 3, + limitClause: 'LIMIT 25 OFFSET 5', + topClause: '', + randomFilter: 'RAND() < 0.25', + tableSampleClause: 'TABLESAMPLE SYSTEM (25 PERCENT)', + sampleQuery: 'SELECT `id`, `status` FROM `analytics-project`.`warehouse`.`orders` ORDER BY RAND() LIMIT 5', + columnSampleContains: 'TRIM(CAST(`status` AS STRING)) != \'\'', + nullCountExpression: 'COUNTIF(`status` IS NULL)', + distinctCountExpression: 'APPROX_COUNT_DISTINCT(`status`)', + textLengthExpression: 'LENGTH(CAST(`status` AS STRING))', + castToText: 'CAST(`status` AS STRING)', + sampleValueAggregation: + '(SELECT STRING_AGG(CAST(value AS STRING), \'\\u001F\') FROM (SELECT status AS value FROM orders) AS relationship_profile_values)', + cardinalityContains: 'SELECT APPROX_COUNT_DISTINCT(val) AS cardinality', + randomizedCardinalityContains: 'ORDER BY RAND()', + distinctValuesContains: 'SELECT DISTINCT CAST(`status` AS STRING) AS val', + statisticsContains: null, + dimensionInput: 'INT64', + dimensionType: 'number', + nativeTypeInput: 'INT64', + normalizedType: 'BIGINT', + }, + { + driver: 'sqlserver', + table: { catalog: 'warehouse', db: 'dbo', name: 'events' }, + quoteInput: 'odd]name', + quotedIdentifier: '[odd]]name]', + formattedTable: '[warehouse].[dbo].[events]', + display: 'warehouse.dbo.events', + invalidDisplay: 'dbo.events', + columnDisplayTablePartCount: 3, + limitClause: '', + topClause: 'TOP (25)', + randomFilter: 'ABS(CHECKSUM(NEWID())) % 100 < 25', + tableSampleClause: 'TABLESAMPLE (25 PERCENT)', + sampleQuery: 'SELECT TOP 5 [id], [status] FROM [warehouse].[dbo].[events]', + columnSampleContains: 'LTRIM(RTRIM(CAST([status] AS NVARCHAR(MAX)))) != \'\'', + nullCountExpression: 'SUM(CASE WHEN [status] IS NULL THEN 1 ELSE 0 END)', + distinctCountExpression: 'COUNT(DISTINCT [status])', + textLengthExpression: 'LEN(CAST([status] AS NVARCHAR(MAX)))', + castToText: 'CAST([status] AS NVARCHAR(MAX))', + sampleValueAggregation: + '(SELECT STRING_AGG(CAST(value AS NVARCHAR(MAX)), CHAR(31)) FROM (SELECT status AS value FROM orders) AS relationship_profile_values)', + cardinalityContains: 'SELECT COUNT(DISTINCT val) AS cardinality', + randomizedCardinalityContains: 'ORDER BY NEWID()', + distinctValuesContains: 'SELECT TOP 20 val', + statisticsContains: null, + dimensionInput: 'datetime2', + dimensionType: 'time', + nativeTypeInput: 'uniqueidentifier', + normalizedType: 'uniqueidentifier', + }, +]; describe('getDialectForDriver', () => { - it.each([ - ['postgres', '"public"."orders"'], - ['mysql', '`public`.`orders`'], - ['clickhouse', '`public`.`orders`'], - ['sqlite', '"orders"'], - ['snowflake', '"analytics"."public"."orders"'], - ['bigquery', '`analytics`.`public`.`orders`'], - ['sqlserver', '[analytics].[public].[orders]'], - ] as const)('formats table names for %s', (driver, expected) => { - const dialect = getDialectForDriver(driver); - expect( - dialect.formatTableName({ - catalog: driver === 'snowflake' || driver === 'bigquery' || driver === 'sqlserver' ? 'analytics' : null, - db: driver === 'sqlite' ? null : 'public', + it.each(fixtures)('returns a full KtxDialect for $driver', (fixture) => { + const dialect = getDialectForDriver(fixture.driver); + const column = dialect.quoteIdentifier('status'); + + expect(dialect.type).toBe(fixture.driver); + expect(dialect.quoteIdentifier(fixture.quoteInput)).toBe(fixture.quotedIdentifier); + expect(dialect.formatTableName(fixture.table)).toBe(fixture.formattedTable); + expect(dialect.formatDisplayRef(fixture.table)).toBe(fixture.display); + expect(dialect.parseDisplayRef(fixture.display)).toEqual(fixture.table); + expect(dialect.parseDisplayRef(fixture.invalidDisplay)).toBeNull(); + expect(dialect.columnDisplayTablePartCount()).toBe(fixture.columnDisplayTablePartCount); + expect(dialect.getLimitOffsetClause(25, 5)).toBe(fixture.limitClause); + expect(dialect.getTopClause(25)).toBe(fixture.topClause); + expect(dialect.getRandomSampleFilter(0.25)).toBe(fixture.randomFilter); + expect(dialect.getTableSampleClause(0.25)).toBe(fixture.tableSampleClause); + expect(dialect.generateSampleQuery(fixture.formattedTable, 5, ['id', 'status'])).toBe(fixture.sampleQuery); + expect(dialect.generateColumnSampleQuery(fixture.formattedTable, 'status', 10)).toContain( + fixture.columnSampleContains, + ); + expect(dialect.getNullCountExpression(column)).toBe(fixture.nullCountExpression); + expect(dialect.getDistinctCountExpression(column)).toBe(fixture.distinctCountExpression); + expect(dialect.textLengthExpression(column)).toBe(fixture.textLengthExpression); + expect(dialect.castToText(column)).toBe(fixture.castToText); + expect(dialect.getSampleValueAggregation(innerSampleSql)).toBe(fixture.sampleValueAggregation); + expect(dialect.generateCardinalitySampleQuery(fixture.formattedTable, column, 100)).toContain( + fixture.cardinalityContains, + ); + expect(dialect.generateRandomizedCardinalitySampleQuery(fixture.formattedTable, column, 100)).toContain( + fixture.randomizedCardinalityContains, + ); + expect(dialect.generateDistinctValuesQuery(fixture.formattedTable, column, 20)).toContain( + fixture.distinctValuesContains, + ); + const statistics = dialect.generateColumnStatisticsQuery(fixture.table.db ?? '', fixture.table.name); + if (fixture.statisticsContains) { + expect(statistics).toContain(fixture.statisticsContains); + } else { + expect(statistics).toBeNull(); + } + expect(dialect.mapToDimensionType(fixture.dimensionInput)).toBe(fixture.dimensionType); + expect(dialect.mapDataType(fixture.nativeTypeInput)).toBe(fixture.normalizedType); + }); + + it('accepts three-part ANSI display refs while keeping one-part names caller-owned', () => { + for (const driver of ['postgres', 'mysql', 'clickhouse'] as const) { + const dialect = getDialectForDriver(driver); + expect(dialect.parseDisplayRef('warehouse.public.orders')).toEqual({ + catalog: 'warehouse', + db: 'public', name: 'orders', - }), - ).toBe(expected); + }); + expect(dialect.parseDisplayRef('orders')).toBeNull(); + } }); it('throws with a supported-driver list for unknown drivers', () => { diff --git a/packages/cli/src/context/connections/dialects.ts b/packages/cli/src/context/connections/dialects.ts index 5c6cc27f..c7929cea 100644 --- a/packages/cli/src/context/connections/dialects.ts +++ b/packages/cli/src/context/connections/dialects.ts @@ -1,22 +1,40 @@ -import type { KtxSchemaDimensionType, KtxTableRef } from '../scan/types.js'; - -type SupportedDriver = - | 'postgres' - | 'mysql' - | 'sqlserver' - | 'snowflake' - | 'bigquery' - | 'clickhouse' - | 'sqlite'; +import { KtxBigQueryDialect } from '../../connectors/bigquery/dialect.js'; +import { KtxClickHouseDialect } from '../../connectors/clickhouse/dialect.js'; +import { KtxMysqlDialect } from '../../connectors/mysql/dialect.js'; +import { KtxPostgresDialect } from '../../connectors/postgres/dialect.js'; +import { KtxSqliteDialect } from '../../connectors/sqlite/dialect.js'; +import { KtxSnowflakeDialect } from '../../connectors/snowflake/dialect.js'; +import { KtxSqlServerDialect } from '../../connectors/sqlserver/dialect.js'; +import type { KtxConnectionDriver, KtxSchemaDimensionType, KtxTableRef } from '../scan/types.js'; +import type { KtxDialectTableRef } from './dialect-helpers.js'; export interface KtxDialect { - readonly type: SupportedDriver; + readonly type: KtxConnectionDriver; quoteIdentifier(identifier: string): string; - formatTableName(table: KtxTableRef): string; + formatTableName(table: KtxDialectTableRef): string; + formatDisplayRef(table: KtxDialectTableRef): string; + parseDisplayRef(display: string): KtxTableRef | null; + columnDisplayTablePartCount(): 1 | 2 | 3; + getLimitOffsetClause(limit: number, offset?: number): string; + getTopClause(limit: number): string; + getRandomSampleFilter(samplePct: number): string; + getTableSampleClause(samplePct: number): string; + generateSampleQuery(tableName: string, limit: number, columns?: string[]): string; + generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string; + getSampleValueAggregation(innerSql: string): string; + generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string; + generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string; + generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string; + generateColumnStatisticsQuery(schemaName: string, tableName: string): string | null; + getNullCountExpression(column: string): string; + getDistinctCountExpression(column: string): string; + textLengthExpression(columnSql: string): string; + castToText(columnSql: string): string; mapToDimensionType(nativeType: string): KtxSchemaDimensionType; + mapDataType(nativeType: string): string; } -const supportedDrivers: SupportedDriver[] = [ +const supportedDrivers: KtxConnectionDriver[] = [ 'bigquery', 'clickhouse', 'mysql', @@ -26,71 +44,21 @@ const supportedDrivers: SupportedDriver[] = [ 'sqlserver', ]; -function doubleQuoted(identifier: string): string { - return `"${identifier.replace(/"/g, '""')}"`; -} - -function backtickQuoted(identifier: string): string { - return `\`${identifier.replace(/`/g, '``')}\``; -} - -function bigQueryQuoted(identifier: string): string { - return `\`${identifier.replace(/`/g, '\\`')}\``; -} - -function bracketQuoted(identifier: string): string { - return `[${identifier.replace(/\]/g, ']]')}]`; -} - -function inferDimensionType(nativeType: string): KtxSchemaDimensionType { - const normalized = nativeType.toLowerCase().trim(); - if (normalized.includes('date') || normalized.includes('time')) { - return 'time'; - } - if ( - normalized.includes('int') || - normalized.includes('num') || - normalized.includes('dec') || - normalized.includes('float') || - normalized.includes('double') || - normalized.includes('real') - ) { - return 'number'; - } - if (normalized.includes('bool') || normalized === 'bit') { - return 'boolean'; - } - return 'string'; -} - -function formatWithParts(table: KtxTableRef, quote: (identifier: string) => string, sqlite = false): string { - const parts = sqlite ? [table.name] : [table.catalog, table.db, table.name].filter((part): part is string => !!part); - return parts.map(quote).join('.'); -} - -function createDialect(type: SupportedDriver, quote: (identifier: string) => string, sqlite = false): KtxDialect { - return { - type, - quoteIdentifier: quote, - formatTableName: (table) => formatWithParts(table, quote, sqlite), - mapToDimensionType: inferDimensionType, - }; -} - -const dialects: Record = { - postgres: createDialect('postgres', doubleQuoted), - mysql: createDialect('mysql', backtickQuoted), - clickhouse: createDialect('clickhouse', backtickQuoted), - sqlite: createDialect('sqlite', doubleQuoted, true), - snowflake: createDialect('snowflake', doubleQuoted), - bigquery: createDialect('bigquery', bigQueryQuoted), - sqlserver: createDialect('sqlserver', bracketQuoted), +const dialectFactories: Record KtxDialect> = { + bigquery: () => new KtxBigQueryDialect(), + clickhouse: () => new KtxClickHouseDialect(), + mysql: () => new KtxMysqlDialect(), + postgres: () => new KtxPostgresDialect(), + sqlite: () => new KtxSqliteDialect(), + snowflake: () => new KtxSnowflakeDialect(), + sqlserver: () => new KtxSqlServerDialect(), }; export function getDialectForDriver(driver: string): KtxDialect { const normalized = driver.toLowerCase().trim(); - if (normalized in dialects) { - return dialects[normalized as SupportedDriver]; + const factory = dialectFactories[normalized as KtxConnectionDriver]; + if (factory) { + return factory(); } throw new Error(`Unsupported warehouse driver "${driver}". Supported drivers: ${supportedDrivers.join(', ')}`); }