mirror of
https://github.com/Kaelio/ktx.git
synced 2026-07-01 08:59:39 +02:00
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
This commit is contained in:
parent
924868841d
commit
56985b7e09
548 changed files with 5048 additions and 2228 deletions
|
|
@ -1,9 +1,18 @@
|
|||
import type { KtxDialect } from '../../context/connections/dialects.js';
|
||||
import {
|
||||
columnDisplayPartCount,
|
||||
formatDialectDisplayRef,
|
||||
formatDialectTableName,
|
||||
limitOffsetClause,
|
||||
parseDialectDisplayRef,
|
||||
} from '../../context/connections/dialect-helpers.js';
|
||||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
|
||||
|
||||
type BigQueryTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxBigQueryDialect {
|
||||
readonly type = 'bigquery';
|
||||
/** @internal */
|
||||
export class KtxBigQueryDialect implements KtxDialect {
|
||||
readonly type = 'bigquery' as const;
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
TIMESTAMP: 'time',
|
||||
|
|
@ -27,13 +36,19 @@ export class KtxBigQueryDialect {
|
|||
}
|
||||
|
||||
formatTableName(table: BigQueryTableNameRef): string {
|
||||
if (table.catalog && table.db) {
|
||||
return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
|
||||
}
|
||||
if (table.db) {
|
||||
return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
|
||||
}
|
||||
return this.quoteIdentifier(table.name);
|
||||
return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'three-part');
|
||||
}
|
||||
|
||||
formatDisplayRef(table: BigQueryTableNameRef): string {
|
||||
return formatDialectDisplayRef(table, 'three-part');
|
||||
}
|
||||
|
||||
parseDisplayRef(display: string): KtxTableRef | null {
|
||||
return parseDialectDisplayRef(display, 'three-part');
|
||||
}
|
||||
|
||||
columnDisplayTablePartCount(): 1 | 2 | 3 {
|
||||
return columnDisplayPartCount('three-part');
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
|
|
@ -93,19 +108,6 @@ export class KtxBigQueryDialect {
|
|||
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS STRING)) != '' ORDER BY RAND() LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
|
||||
if (!params) {
|
||||
return { sql, params: undefined };
|
||||
}
|
||||
let processedSql = sql;
|
||||
const processedParams: Record<string, unknown> = {};
|
||||
for (const [key, value] of Object.entries(params)) {
|
||||
processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
|
||||
processedParams[key] = value;
|
||||
}
|
||||
return { sql: processedSql, params: Object.keys(processedParams).length > 0 ? processedParams : undefined };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
|
|
@ -121,7 +123,11 @@ export class KtxBigQueryDialect {
|
|||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
|
||||
return limitOffsetClause(limit, offset);
|
||||
}
|
||||
|
||||
getTopClause(_limit: number): string {
|
||||
return '';
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
|
|
@ -132,6 +138,18 @@ export class KtxBigQueryDialect {
|
|||
return `APPROX_COUNT_DISTINCT(${column})`;
|
||||
}
|
||||
|
||||
textLengthExpression(columnSql: string): string {
|
||||
return `LENGTH(CAST(${columnSql} AS STRING))`;
|
||||
}
|
||||
|
||||
castToText(columnSql: string): string {
|
||||
return `CAST(${columnSql} AS STRING)`;
|
||||
}
|
||||
|
||||
getSampleValueAggregation(innerSql: string): string {
|
||||
return `(SELECT STRING_AGG(CAST(value AS STRING), '\\u001F') FROM (${innerSql}) AS relationship_profile_values)`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
|
|
@ -172,36 +190,4 @@ export class KtxBigQueryDialect {
|
|||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
timezone?: string,
|
||||
): string {
|
||||
const bigQueryGranularity = granularity.toUpperCase();
|
||||
if (timezone) {
|
||||
return `DATE_TRUNC(DATETIME(${column}, '${timezone}'), ${bigQueryGranularity})`;
|
||||
}
|
||||
return `DATE_TRUNC(${column}, ${bigQueryGranularity})`;
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
|
||||
const col = timezone ? `DATETIME(${column}, '${timezone}')` : column;
|
||||
const [rawAmount, rawUnit] = interval.split(' ');
|
||||
let diffUnit = rawUnit!.toUpperCase();
|
||||
let amount = Number(rawAmount);
|
||||
let addUnit = diffUnit;
|
||||
if (diffUnit === 'WEEK') {
|
||||
diffUnit = 'DAY';
|
||||
amount = amount * 7;
|
||||
addUnit = 'DAY';
|
||||
}
|
||||
const originExpr = origin ? `TIMESTAMP '${origin}'` : `TIMESTAMP '1970-01-01'`;
|
||||
return `TIMESTAMP_ADD(${originExpr}, INTERVAL CAST(FLOOR(TIMESTAMP_DIFF(${col}, ${originExpr}, ${diffUnit}) / ${amount}) * ${amount} AS INT64) ${addUnit})`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
const [amount, unit] = interval.split(' ');
|
||||
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue