test: split cli tests from source tree (#216)

* feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-07-01 08:59:39 +02:00 · 2026-05-26 08:49:05 +02:00 · 2026-05-26 08:49:05 +02:00 · 56985b7e09
commit 56985b7e09
parent 924868841d
548 changed files with 5048 additions and 2228 deletions
--- a/packages/cli/src/connectors/bigquery/dialect.ts
+++ b/packages/cli/src/connectors/bigquery/dialect.ts
@ -1,9 +1,18 @@
+import type { KtxDialect } from '../../context/connections/dialects.js';
+import {
+  columnDisplayPartCount,
+  formatDialectDisplayRef,
+  formatDialectTableName,
+  limitOffsetClause,
+  parseDialectDisplayRef,
+} from '../../context/connections/dialect-helpers.js';
 import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';

 type BigQueryTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;

-export class KtxBigQueryDialect {
-  readonly type = 'bigquery';
+/** @internal */
+export class KtxBigQueryDialect implements KtxDialect {
+  readonly type = 'bigquery' as const;

  private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
    TIMESTAMP: 'time',
@ -27,13 +36,19 @@ export class KtxBigQueryDialect {
  }

  formatTableName(table: BigQueryTableNameRef): string {
-    if (table.catalog && table.db) {
-      return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
-    }
-    if (table.db) {
-      return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
-    }
-    return this.quoteIdentifier(table.name);
+    return formatDialectTableName(table, this.quoteIdentifier.bind(this), 'three-part');
+  }
+
+  formatDisplayRef(table: BigQueryTableNameRef): string {
+    return formatDialectDisplayRef(table, 'three-part');
+  }
+
+  parseDisplayRef(display: string): KtxTableRef | null {
+    return parseDialectDisplayRef(display, 'three-part');
+  }
+
+  columnDisplayTablePartCount(): 1 | 2 | 3 {
+    return columnDisplayPartCount('three-part');
  }

  mapDataType(nativeType: string): string {
@ -93,19 +108,6 @@ export class KtxBigQueryDialect {
    return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS STRING)) != '' ORDER BY RAND() LIMIT ${limit}`;
  }

-  prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
-    if (!params) {
-      return { sql, params: undefined };
-    }
-    let processedSql = sql;
-    const processedParams: Record<string, unknown> = {};
-    for (const [key, value] of Object.entries(params)) {
-      processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
-      processedParams[key] = value;
-    }
-    return { sql: processedSql, params: Object.keys(processedParams).length > 0 ? processedParams : undefined };
-  }
-
  getRandomSampleFilter(samplePct: number): string {
    if (samplePct <= 0 || samplePct >= 1) {
      return '';
@ -121,7 +123,11 @@ export class KtxBigQueryDialect {
  }

  getLimitOffsetClause(limit: number, offset?: number): string {
-    return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
+    return limitOffsetClause(limit, offset);
+  }
+
+  getTopClause(_limit: number): string {
+    return '';
  }

  getNullCountExpression(column: string): string {
@ -132,6 +138,18 @@ export class KtxBigQueryDialect {
    return `APPROX_COUNT_DISTINCT(${column})`;
  }

+  textLengthExpression(columnSql: string): string {
+    return `LENGTH(CAST(${columnSql} AS STRING))`;
+  }
+
+  castToText(columnSql: string): string {
+    return `CAST(${columnSql} AS STRING)`;
+  }
+
+  getSampleValueAggregation(innerSql: string): string {
+    return `(SELECT STRING_AGG(CAST(value AS STRING), '\\u001F') FROM (${innerSql}) AS relationship_profile_values)`;
+  }
+
  generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
    return `
      WITH sampled AS (
@ -172,36 +190,4 @@ export class KtxBigQueryDialect {
      FROM sampled
    `;
  }
-
-  getTimeTruncExpression(
-    column: string,
-    granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
-    timezone?: string,
-  ): string {
-    const bigQueryGranularity = granularity.toUpperCase();
-    if (timezone) {
-      return `DATE_TRUNC(DATETIME(${column}, '${timezone}'), ${bigQueryGranularity})`;
-    }
-    return `DATE_TRUNC(${column}, ${bigQueryGranularity})`;
-  }
-
-  getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
-    const col = timezone ? `DATETIME(${column}, '${timezone}')` : column;
-    const [rawAmount, rawUnit] = interval.split(' ');
-    let diffUnit = rawUnit!.toUpperCase();
-    let amount = Number(rawAmount);
-    let addUnit = diffUnit;
-    if (diffUnit === 'WEEK') {
-      diffUnit = 'DAY';
-      amount = amount * 7;
-      addUnit = 'DAY';
-    }
-    const originExpr = origin ? `TIMESTAMP '${origin}'` : `TIMESTAMP '1970-01-01'`;
-    return `TIMESTAMP_ADD(${originExpr}, INTERVAL CAST(FLOOR(TIMESTAMP_DIFF(${col}, ${originExpr}, ${diffUnit}) / ${amount}) * ${amount} AS INT64) ${addUnit})`;
-  }
-
-  parseIntervalToSql(interval: string): string {
-    const [amount, unit] = interval.split(' ');
-    return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
-  }
 }