test: split cli tests from source tree (#216)

* feat(cli): define full warehouse dialect contract

* test(cli): keep dialect edge tests focused

* fix(cli): stabilize dialect contract foundation

* refactor(connectors): own read-only query preparation

* refactor(connectors): resolve dialects through registry

* refactor(connectors): keep concrete dialect classes internal

* chore(workspace): enforce dialect import boundary

* refactor(cli): resolve relationship dialect at scan boundary

* refactor(cli): use dialect display parsing for entity details

* refactor(cli): use dialect display parsing for warehouse catalog

* refactor(cli): use dialect SQL in relationship workflows

* test(cli): verify solid dialect scan workflow closure

* test: split cli tests from source tree

* refactor(cli): standardize BigQuery scope listing

* feat(sqlite): implement connector scope listing

* test(connectors): cover required table listing

* feat(cli): add warehouse driver registry

* refactor(setup): route scope discovery through driver registry

* refactor(cli): route local query execution through driver registry

* refactor(historic-sql): route dialect support through driver registry

* refactor(cli): test warehouse connections through driver registry

* fix(cli): close driver registry type export gaps

* Improve setup daemon diagnostics

* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback

Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.

* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match

The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.

Align the picker boundary with the canonical 3-level KtxTableRef:

- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
  resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
  (resolveEnabledTables already accepts the 3-part shape) and
  schemasFromEnabledTables now goes through parseDottedTableEntry so it
  recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
  reuse.

Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).

* fix(cli): allow debug telemetry under opt-out env
This commit is contained in:
Andrey Avtomonov 2026-05-26 08:49:05 +02:00 committed by GitHub
parent 924868841d
commit 56985b7e09
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
548 changed files with 5048 additions and 2228 deletions

View file

@ -1,19 +0,0 @@
import { describe, expect, it } from 'vitest';
import { normalizeBigQueryProjectId, normalizeBigQueryRegion } from './bigquery-identifiers.js';
describe('BigQuery identifier normalization', () => {
it('normalizes project ids and regions for information schema paths', () => {
expect(normalizeBigQueryProjectId('project-1')).toBe('project-1');
expect(normalizeBigQueryRegion('US')).toBe('us');
expect(normalizeBigQueryRegion('region-eu')).toBe('eu');
});
it('rejects malformed project ids and regions with caller-specific context', () => {
expect(() => normalizeBigQueryProjectId('project`1', 'table discovery')).toThrow(
'Invalid BigQuery project id for table discovery: project`1',
);
expect(() => normalizeBigQueryRegion('US;DROP', 'table discovery')).toThrow(
'Invalid BigQuery region for table discovery: US;DROP',
);
});
});

View file

@ -0,0 +1,87 @@
import type { KtxTableRef } from '../scan/types.js';
export type KtxDialectIdentifierShape = 'ansi' | 'sqlite' | 'three-part';
export type KtxDialectTableRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
export function safeSqlLimit(limit: number): number {
return Math.max(1, Math.floor(limit));
}
function safeSqlOffset(offset: number | undefined): number | null {
if (offset === undefined) {
return null;
}
const normalized = Math.floor(offset);
return normalized > 0 ? normalized : null;
}
function cleanIdentifierPart(part: string): string {
return part.trim().replace(/^["'`\[]|["'`\]]$/g, '');
}
function splitDisplay(display: string): string[] {
return display.trim().split('.').map(cleanIdentifierPart).filter(Boolean);
}
function tableParts(table: KtxDialectTableRef, shape: KtxDialectIdentifierShape): string[] {
if (shape === 'sqlite') {
return [table.name];
}
return [table.catalog ?? null, table.db ?? null, table.name].filter((part): part is string => Boolean(part));
}
function acceptedDisplayPartCounts(shape: KtxDialectIdentifierShape): readonly number[] {
if (shape === 'sqlite') {
return [1];
}
if (shape === 'three-part') {
return [3];
}
return [2, 3];
}
export function formatDialectTableName(
table: KtxDialectTableRef,
quoteIdentifier: (identifier: string) => string,
shape: KtxDialectIdentifierShape,
): string {
return tableParts(table, shape).map(quoteIdentifier).join('.');
}
export function formatDialectDisplayRef(table: KtxDialectTableRef, shape: KtxDialectIdentifierShape): string {
return tableParts(table, shape).join('.');
}
export function parseDialectDisplayRef(display: string, shape: KtxDialectIdentifierShape): KtxTableRef | null {
const parts = splitDisplay(display);
if (!acceptedDisplayPartCounts(shape).includes(parts.length)) {
return null;
}
if (parts.length === 1) {
return { catalog: null, db: null, name: parts[0]! };
}
if (parts.length === 2) {
return { catalog: null, db: parts[0]!, name: parts[1]! };
}
if (parts.length === 3) {
return { catalog: parts[0]!, db: parts[1]!, name: parts[2]! };
}
return null;
}
export function columnDisplayPartCount(shape: KtxDialectIdentifierShape): 1 | 2 | 3 {
if (shape === 'sqlite') {
return 1;
}
if (shape === 'three-part') {
return 3;
}
return 2;
}
export function limitOffsetClause(limit: number, offset?: number): string {
const safeLimit = safeSqlLimit(limit);
const safeOffset = safeSqlOffset(offset);
return safeOffset === null ? `LIMIT ${safeLimit}` : `LIMIT ${safeLimit} OFFSET ${safeOffset}`;
}

View file

@ -1,34 +0,0 @@
import { describe, expect, it } from 'vitest';
import { getDialectForDriver } from './dialects.js';
describe('getDialectForDriver', () => {
it.each([
['postgres', '"public"."orders"'],
['mysql', '`public`.`orders`'],
['clickhouse', '`public`.`orders`'],
['sqlite', '"orders"'],
['snowflake', '"analytics"."public"."orders"'],
['bigquery', '`analytics`.`public`.`orders`'],
['sqlserver', '[analytics].[public].[orders]'],
] as const)('formats table names for %s', (driver, expected) => {
const dialect = getDialectForDriver(driver);
expect(
dialect.formatTableName({
catalog: driver === 'snowflake' || driver === 'bigquery' || driver === 'sqlserver' ? 'analytics' : null,
db: driver === 'sqlite' ? null : 'public',
name: 'orders',
}),
).toBe(expected);
});
it('throws with a supported-driver list for unknown drivers', () => {
expect(() => getDialectForDriver('oracle')).toThrow(
'Unsupported warehouse driver "oracle". Supported drivers: bigquery, clickhouse, mysql, postgres, sqlite, snowflake, sqlserver',
);
});
it('rejects legacy driver aliases', () => {
expect(() => getDialectForDriver('postgresql')).toThrow('Unsupported warehouse driver "postgresql"');
expect(() => getDialectForDriver('sqlite3')).toThrow('Unsupported warehouse driver "sqlite3"');
});
});

View file

@ -1,22 +1,40 @@
import type { KtxSchemaDimensionType, KtxTableRef } from '../scan/types.js';
type SupportedDriver =
| 'postgres'
| 'mysql'
| 'sqlserver'
| 'snowflake'
| 'bigquery'
| 'clickhouse'
| 'sqlite';
import { KtxBigQueryDialect } from '../../connectors/bigquery/dialect.js';
import { KtxClickHouseDialect } from '../../connectors/clickhouse/dialect.js';
import { KtxMysqlDialect } from '../../connectors/mysql/dialect.js';
import { KtxPostgresDialect } from '../../connectors/postgres/dialect.js';
import { KtxSqliteDialect } from '../../connectors/sqlite/dialect.js';
import { KtxSnowflakeDialect } from '../../connectors/snowflake/dialect.js';
import { KtxSqlServerDialect } from '../../connectors/sqlserver/dialect.js';
import type { KtxConnectionDriver, KtxSchemaDimensionType, KtxTableRef } from '../scan/types.js';
import type { KtxDialectTableRef } from './dialect-helpers.js';
export interface KtxDialect {
readonly type: SupportedDriver;
readonly type: KtxConnectionDriver;
quoteIdentifier(identifier: string): string;
formatTableName(table: KtxTableRef): string;
formatTableName(table: KtxDialectTableRef): string;
formatDisplayRef(table: KtxDialectTableRef): string;
parseDisplayRef(display: string): KtxTableRef | null;
columnDisplayTablePartCount(): 1 | 2 | 3;
getLimitOffsetClause(limit: number, offset?: number): string;
getTopClause(limit: number): string;
getRandomSampleFilter(samplePct: number): string;
getTableSampleClause(samplePct: number): string;
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string;
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string;
getSampleValueAggregation(innerSql: string): string;
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string;
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string;
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string;
generateColumnStatisticsQuery(schemaName: string, tableName: string): string | null;
getNullCountExpression(column: string): string;
getDistinctCountExpression(column: string): string;
textLengthExpression(columnSql: string): string;
castToText(columnSql: string): string;
mapToDimensionType(nativeType: string): KtxSchemaDimensionType;
mapDataType(nativeType: string): string;
}
const supportedDrivers: SupportedDriver[] = [
const supportedDrivers: KtxConnectionDriver[] = [
'bigquery',
'clickhouse',
'mysql',
@ -26,71 +44,21 @@ const supportedDrivers: SupportedDriver[] = [
'sqlserver',
];
function doubleQuoted(identifier: string): string {
return `"${identifier.replace(/"/g, '""')}"`;
}
function backtickQuoted(identifier: string): string {
return `\`${identifier.replace(/`/g, '``')}\``;
}
function bigQueryQuoted(identifier: string): string {
return `\`${identifier.replace(/`/g, '\\`')}\``;
}
function bracketQuoted(identifier: string): string {
return `[${identifier.replace(/\]/g, ']]')}]`;
}
function inferDimensionType(nativeType: string): KtxSchemaDimensionType {
const normalized = nativeType.toLowerCase().trim();
if (normalized.includes('date') || normalized.includes('time')) {
return 'time';
}
if (
normalized.includes('int') ||
normalized.includes('num') ||
normalized.includes('dec') ||
normalized.includes('float') ||
normalized.includes('double') ||
normalized.includes('real')
) {
return 'number';
}
if (normalized.includes('bool') || normalized === 'bit') {
return 'boolean';
}
return 'string';
}
function formatWithParts(table: KtxTableRef, quote: (identifier: string) => string, sqlite = false): string {
const parts = sqlite ? [table.name] : [table.catalog, table.db, table.name].filter((part): part is string => !!part);
return parts.map(quote).join('.');
}
function createDialect(type: SupportedDriver, quote: (identifier: string) => string, sqlite = false): KtxDialect {
return {
type,
quoteIdentifier: quote,
formatTableName: (table) => formatWithParts(table, quote, sqlite),
mapToDimensionType: inferDimensionType,
};
}
const dialects: Record<SupportedDriver, KtxDialect> = {
postgres: createDialect('postgres', doubleQuoted),
mysql: createDialect('mysql', backtickQuoted),
clickhouse: createDialect('clickhouse', backtickQuoted),
sqlite: createDialect('sqlite', doubleQuoted, true),
snowflake: createDialect('snowflake', doubleQuoted),
bigquery: createDialect('bigquery', bigQueryQuoted),
sqlserver: createDialect('sqlserver', bracketQuoted),
const dialectFactories: Record<KtxConnectionDriver, () => KtxDialect> = {
bigquery: () => new KtxBigQueryDialect(),
clickhouse: () => new KtxClickHouseDialect(),
mysql: () => new KtxMysqlDialect(),
postgres: () => new KtxPostgresDialect(),
sqlite: () => new KtxSqliteDialect(),
snowflake: () => new KtxSnowflakeDialect(),
sqlserver: () => new KtxSqlServerDialect(),
};
export function getDialectForDriver(driver: string): KtxDialect {
const normalized = driver.toLowerCase().trim();
if (normalized in dialects) {
return dialects[normalized as SupportedDriver];
const factory = dialectFactories[normalized as KtxConnectionDriver];
if (factory) {
return factory();
}
throw new Error(`Unsupported warehouse driver "${driver}". Supported drivers: ${supportedDrivers.join(', ')}`);
}

View file

@ -0,0 +1,199 @@
import type { KtxConnectionDriver, KtxScanConnector } from '../scan/types.js';
/** @internal */
export type KtxScopeConfigKey = 'dataset_ids' | 'databases' | 'schemas' | 'schema_names';
/** @internal */
export interface KtxDriverConnectorModule {
isConnectionConfig(connection: unknown): boolean;
createScanConnector(args: {
connectionId: string;
connection: unknown;
projectDir: string;
}): KtxScanConnector;
}
export interface KtxDriverRegistration {
readonly driver: KtxConnectionDriver;
readonly scopeConfigKey: KtxScopeConfigKey | null;
readonly hasHistoricSqlReader: boolean;
readonly hasLocalQueryExecutor: boolean;
load(): Promise<KtxDriverConnectorModule>;
}
function invalidConnectionConfig(driver: KtxConnectionDriver): Error {
return new Error(`Connection config does not match warehouse driver "${driver}".`);
}
/** @internal */
export const driverRegistrations: Record<KtxConnectionDriver, KtxDriverRegistration> = {
bigquery: {
driver: 'bigquery',
scopeConfigKey: 'dataset_ids',
hasHistoricSqlReader: true,
hasLocalQueryExecutor: false,
load: async () => {
const m = await import('../../connectors/bigquery/connector.js');
return {
isConnectionConfig: (connection) => {
const typedConnection = connection as Parameters<typeof m.isKtxBigQueryConnectionConfig>[0];
return m.isKtxBigQueryConnectionConfig(typedConnection);
},
createScanConnector: ({ connectionId, connection }) => {
const typedConnection = connection as Parameters<typeof m.isKtxBigQueryConnectionConfig>[0];
if (!m.isKtxBigQueryConnectionConfig(typedConnection)) {
throw invalidConnectionConfig('bigquery');
}
return new m.KtxBigQueryScanConnector({ connectionId, connection: typedConnection });
},
};
},
},
clickhouse: {
driver: 'clickhouse',
scopeConfigKey: 'databases',
hasHistoricSqlReader: false,
hasLocalQueryExecutor: false,
load: async () => {
const m = await import('../../connectors/clickhouse/connector.js');
return {
isConnectionConfig: (connection) => {
const typedConnection = connection as Parameters<typeof m.isKtxClickHouseConnectionConfig>[0];
return m.isKtxClickHouseConnectionConfig(typedConnection);
},
createScanConnector: ({ connectionId, connection }) => {
const typedConnection = connection as Parameters<typeof m.isKtxClickHouseConnectionConfig>[0];
if (!m.isKtxClickHouseConnectionConfig(typedConnection)) {
throw invalidConnectionConfig('clickhouse');
}
return new m.KtxClickHouseScanConnector({ connectionId, connection: typedConnection });
},
};
},
},
mysql: {
driver: 'mysql',
scopeConfigKey: 'schemas',
hasHistoricSqlReader: false,
hasLocalQueryExecutor: false,
load: async () => {
const m = await import('../../connectors/mysql/connector.js');
return {
isConnectionConfig: (connection) => {
const typedConnection = connection as Parameters<typeof m.isKtxMysqlConnectionConfig>[0];
return m.isKtxMysqlConnectionConfig(typedConnection);
},
createScanConnector: ({ connectionId, connection }) => {
const typedConnection = connection as Parameters<typeof m.isKtxMysqlConnectionConfig>[0];
if (!m.isKtxMysqlConnectionConfig(typedConnection)) {
throw invalidConnectionConfig('mysql');
}
return new m.KtxMysqlScanConnector({ connectionId, connection: typedConnection });
},
};
},
},
postgres: {
driver: 'postgres',
scopeConfigKey: 'schemas',
hasHistoricSqlReader: true,
hasLocalQueryExecutor: true,
load: async () => {
const m = await import('../../connectors/postgres/connector.js');
return {
isConnectionConfig: (connection) => {
const typedConnection = connection as Parameters<typeof m.isKtxPostgresConnectionConfig>[0];
return m.isKtxPostgresConnectionConfig(typedConnection);
},
createScanConnector: ({ connectionId, connection }) => {
const typedConnection = connection as Parameters<typeof m.isKtxPostgresConnectionConfig>[0];
if (!m.isKtxPostgresConnectionConfig(typedConnection)) {
throw invalidConnectionConfig('postgres');
}
return new m.KtxPostgresScanConnector({ connectionId, connection: typedConnection });
},
};
},
},
sqlite: {
driver: 'sqlite',
scopeConfigKey: null,
hasHistoricSqlReader: false,
hasLocalQueryExecutor: true,
load: async () => {
const m = await import('../../connectors/sqlite/connector.js');
return {
isConnectionConfig: (connection) => {
const typedConnection = connection as Parameters<typeof m.isKtxSqliteConnectionConfig>[0];
return m.isKtxSqliteConnectionConfig(typedConnection);
},
createScanConnector: ({ connectionId, connection, projectDir }) => {
const typedConnection = connection as Parameters<typeof m.isKtxSqliteConnectionConfig>[0];
if (!m.isKtxSqliteConnectionConfig(typedConnection)) {
throw invalidConnectionConfig('sqlite');
}
return new m.KtxSqliteScanConnector({ connectionId, connection: typedConnection, projectDir });
},
};
},
},
snowflake: {
driver: 'snowflake',
scopeConfigKey: 'schema_names',
hasHistoricSqlReader: true,
hasLocalQueryExecutor: false,
load: async () => {
const m = await import('../../connectors/snowflake/connector.js');
return {
isConnectionConfig: (connection) => {
const typedConnection = connection as Parameters<typeof m.isKtxSnowflakeConnectionConfig>[0];
return m.isKtxSnowflakeConnectionConfig(typedConnection);
},
createScanConnector: ({ connectionId, connection, projectDir }) => {
const typedConnection = connection as Parameters<typeof m.isKtxSnowflakeConnectionConfig>[0];
if (!m.isKtxSnowflakeConnectionConfig(typedConnection)) {
throw invalidConnectionConfig('snowflake');
}
return new m.KtxSnowflakeScanConnector({ connectionId, connection: typedConnection, projectDir });
},
};
},
},
sqlserver: {
driver: 'sqlserver',
scopeConfigKey: 'schemas',
hasHistoricSqlReader: false,
hasLocalQueryExecutor: false,
load: async () => {
const m = await import('../../connectors/sqlserver/connector.js');
return {
isConnectionConfig: (connection) => {
const typedConnection = connection as Parameters<typeof m.isKtxSqlServerConnectionConfig>[0];
return m.isKtxSqlServerConnectionConfig(typedConnection);
},
createScanConnector: ({ connectionId, connection }) => {
const typedConnection = connection as Parameters<typeof m.isKtxSqlServerConnectionConfig>[0];
if (!m.isKtxSqlServerConnectionConfig(typedConnection)) {
throw invalidConnectionConfig('sqlserver');
}
return new m.KtxSqlServerScanConnector({ connectionId, connection: typedConnection });
},
};
},
},
};
const supportedDrivers = Object.keys(driverRegistrations).sort() as KtxConnectionDriver[];
function isRegisteredDriver(driver: string): driver is KtxConnectionDriver {
return Object.prototype.hasOwnProperty.call(driverRegistrations, driver);
}
export function getDriverRegistration(driver: string): KtxDriverRegistration | undefined {
const normalized = driver.toLowerCase().trim();
return isRegisteredDriver(normalized) ? driverRegistrations[normalized] : undefined;
}
export function listSupportedDrivers(): KtxConnectionDriver[] {
return [...supportedDrivers];
}

View file

@ -1,59 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { createDefaultLocalQueryExecutor } from './local-query-executor.js';
describe('createDefaultLocalQueryExecutor', () => {
it('dispatches postgres and sqlite drivers to their executors', async () => {
const postgres = {
execute: vi.fn(async () => ({
headers: ['pg'],
rows: [[1]],
totalRows: 1,
command: 'SELECT',
rowCount: 1,
})),
};
const sqlite = {
execute: vi.fn(async () => ({
headers: ['sqlite'],
rows: [[2]],
totalRows: 1,
command: 'SELECT',
rowCount: 1,
})),
};
const executor = createDefaultLocalQueryExecutor({ postgres, sqlite });
await expect(
executor.execute({
connectionId: 'pg',
connection: { driver: 'postgres' },
sql: 'select 1',
}),
).resolves.toMatchObject({ headers: ['pg'] });
await expect(
executor.execute({
connectionId: 'local',
connection: { driver: 'sqlite' },
sql: 'select 1',
}),
).resolves.toMatchObject({ headers: ['sqlite'] });
expect(postgres.execute).toHaveBeenCalledTimes(1);
expect(sqlite.execute).toHaveBeenCalledTimes(1);
});
it('rejects unsupported local execution drivers', async () => {
const executor = createDefaultLocalQueryExecutor({
postgres: { execute: vi.fn() },
sqlite: { execute: vi.fn() },
});
await expect(
executor.execute({
connectionId: 'warehouse',
connection: { driver: 'snowflake' },
sql: 'select 1',
}),
).rejects.toThrow('No local query executor is configured for driver "snowflake".');
});
});

View file

@ -1,3 +1,4 @@
import { driverRegistrations, getDriverRegistration } from './drivers.js';
import { createPostgresQueryExecutor } from './postgres-query-executor.js';
import type {
KtxSqlQueryExecutionInput,
@ -5,6 +6,7 @@ import type {
KtxSqlQueryExecutorPort,
} from './query-executor.js';
import { createSqliteQueryExecutor } from './sqlite-query-executor.js';
import type { KtxConnectionDriver } from '../scan/types.js';
export interface DefaultLocalQueryExecutorOptions {
postgres?: KtxSqlQueryExecutorPort;
@ -15,20 +17,43 @@ function driverFor(input: KtxSqlQueryExecutionInput): string {
return String(input.connection?.driver ?? '').toLowerCase();
}
function localExecutorMap(
options: DefaultLocalQueryExecutorOptions,
): Partial<Record<KtxConnectionDriver, KtxSqlQueryExecutorPort>> {
const wiredExecutors: Partial<Record<KtxConnectionDriver, KtxSqlQueryExecutorPort>> = {
postgres: options.postgres ?? createPostgresQueryExecutor(),
sqlite: options.sqlite ?? createSqliteQueryExecutor(),
};
const executors: Partial<Record<KtxConnectionDriver, KtxSqlQueryExecutorPort>> = {};
for (const registration of Object.values(driverRegistrations)) {
if (!registration.hasLocalQueryExecutor) continue;
const executor = wiredExecutors[registration.driver];
if (executor) {
executors[registration.driver] = executor;
}
}
return executors;
}
export function createDefaultLocalQueryExecutor(options: DefaultLocalQueryExecutorOptions = {}): KtxSqlQueryExecutorPort {
const postgres = options.postgres ?? createPostgresQueryExecutor();
const sqlite = options.sqlite ?? createSqliteQueryExecutor();
const executors = localExecutorMap(options);
return {
async execute(input: KtxSqlQueryExecutionInput): Promise<KtxSqlQueryExecutionResult> {
const driver = driverFor(input);
if (driver === 'postgres') {
return postgres.execute(input);
const registration = getDriverRegistration(driver);
if (!registration?.hasLocalQueryExecutor) {
throw new Error(`No local query executor is configured for driver "${input.connection?.driver ?? 'unknown'}".`);
}
if (driver === 'sqlite') {
return sqlite.execute(input);
const executor = executors[registration.driver];
if (!executor) {
throw new Error(
`Local query executor flag is enabled for driver "${registration.driver}", but no executor factory is wired.`,
);
}
throw new Error(`No local query executor is configured for driver "${input.connection?.driver ?? 'unknown'}".`);
return executor.execute(input);
},
};
}

View file

@ -1,76 +0,0 @@
import { describe, expect, it } from 'vitest';
import {
localConnectionInfoFromConfig,
localConnectionToWarehouseDescriptor,
localConnectionTypeForConfig,
} from './local-warehouse-descriptor.js';
describe('localConnectionToWarehouseDescriptor', () => {
it('maps local Postgres URLs to canonical warehouse descriptors', () => {
expect(
localConnectionToWarehouseDescriptor('warehouse', {
driver: 'postgres',
url: 'postgresql://readonly@db.example.test/analytics',
}),
).toMatchObject({
id: 'warehouse',
connection_type: 'POSTGRESQL',
host: 'db.example.test',
database: 'analytics',
});
});
it('maps BigQuery project and dataset from explicit fields', () => {
expect(
localConnectionToWarehouseDescriptor('bq', {
driver: 'bigquery',
project_id: 'acme',
dataset_id: 'warehouse',
}),
).toMatchObject({
id: 'bq',
connection_type: 'BIGQUERY',
project_id: 'acme',
dataset_id: 'warehouse',
});
});
it('returns null for non-warehouse adapters', () => {
expect(
localConnectionToWarehouseDescriptor('looker', {
driver: 'looker',
base_url: 'https://looker.example.com',
client_id: 'client',
}),
).toBeNull();
});
});
describe('local connection info helpers', () => {
it('returns canonical warehouse connection types for local catalogs', () => {
expect(localConnectionTypeForConfig('warehouse', { driver: 'postgres' })).toBe('POSTGRESQL');
expect(localConnectionTypeForConfig('bq', { driver: 'bigquery', project_id: 'acme' })).toBe('BIGQUERY');
expect(localConnectionTypeForConfig('snowflake', { driver: 'snowflake' })).toBe('SNOWFLAKE');
});
it('keeps removed driver aliases as display-only labels', () => {
expect(localConnectionTypeForConfig('warehouse', { driver: 'postgresql' } as never)).toBe('postgresql');
expect(localConnectionTypeForConfig('warehouse', { driver: 'mssql' } as never)).toBe('mssql');
});
it('keeps non-warehouse adapter labels for display-only local connection surfaces', () => {
expect(localConnectionTypeForConfig('prod-metabase', { driver: 'metabase', api_url: 'https://metabase.example.com' })).toBe(
'metabase',
);
expect(localConnectionTypeForConfig('missing-driver', {} as never)).toBe('unknown');
});
it('builds nullable local connection info records', () => {
expect(localConnectionInfoFromConfig('warehouse', { driver: 'postgres' })).toEqual({
id: 'warehouse',
name: 'warehouse',
connectionType: 'POSTGRESQL',
});
expect(localConnectionInfoFromConfig('missing', undefined)).toBeNull();
});
});

View file

@ -1,157 +0,0 @@
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import {
notionConnectionToPullConfig,
parseNotionConnectionConfig,
redactNotionConnectionConfig,
resolveNotionAuthToken,
} from './notion-config.js';
describe('standalone Notion connection config', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-notion-config-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('parses selected-root Notion config with safe defaults', () => {
const parsed = parseNotionConnectionConfig({
driver: 'notion',
auth_token_ref: 'env:NOTION_TOKEN',
crawl_mode: 'selected_roots',
root_page_ids: ['page-1'],
});
expect(parsed).toEqual({
driver: 'notion',
auth_token: null,
auth_token_ref: 'env:NOTION_TOKEN',
crawl_mode: 'selected_roots',
root_page_ids: ['page-1'],
root_database_ids: [],
root_data_source_ids: [],
max_pages_per_run: 1000,
max_knowledge_creates_per_run: 25,
max_knowledge_updates_per_run: 20,
});
expect(parsed).not.toHaveProperty('last_successful_cursor');
});
it('parses inline Notion auth tokens without requiring auth_token_ref', () => {
const parsed = parseNotionConnectionConfig({
driver: 'notion',
auth_token: ' ntn_inline_token ',
crawl_mode: 'selected_roots',
root_page_ids: ['page-1'],
});
expect(parsed).toMatchObject({
driver: 'notion',
auth_token: 'ntn_inline_token',
auth_token_ref: null,
crawl_mode: 'selected_roots',
root_page_ids: ['page-1'],
});
});
it('redacts token references from display output', () => {
expect(
redactNotionConnectionConfig(
parseNotionConnectionConfig({
driver: 'notion',
auth_token_ref: 'file:/Users/example/.config/notion-token',
crawl_mode: 'all_accessible',
max_pages_per_run: 80,
}),
),
).toEqual({
driver: 'notion',
hasAuthToken: true,
crawlMode: 'all_accessible',
rootPageIds: [],
rootDatabaseIds: [],
rootDataSourceIds: [],
maxPagesPerRun: 80,
maxKnowledgeCreatesPerRun: 25,
maxKnowledgeUpdatesPerRun: 20,
warning: 'Anything accessible to this Notion integration can become organization knowledge.',
});
});
it('requires at least one selected root in selected_roots mode', () => {
expect(() =>
parseNotionConnectionConfig({
driver: 'notion',
auth_token_ref: 'env:NOTION_TOKEN',
crawl_mode: 'selected_roots',
}),
).toThrow('selected_roots requires at least one root page, database, or data source id');
});
it('resolves env and file token references without exposing the reference in errors', async () => {
const tokenPath = join(tempDir, 'notion-token.txt');
await writeFile(tokenPath, 'ntn_file_token\n', 'utf-8');
await expect(
resolveNotionAuthToken('env:NOTION_TOKEN', {
env: { NOTION_TOKEN: 'ntn_env_token' },
}),
).resolves.toBe('ntn_env_token');
await expect(resolveNotionAuthToken(`file:${tokenPath}`)).resolves.toBe('ntn_file_token');
await expect(resolveNotionAuthToken('env:MISSING_NOTION_TOKEN', { env: {} })).rejects.toThrow(
'Notion token environment variable MISSING_NOTION_TOKEN is not set',
);
});
it('converts standalone config into adapter pull config', async () => {
const pullConfig = await notionConnectionToPullConfig(
parseNotionConnectionConfig({
driver: 'notion',
auth_token_ref: 'env:NOTION_TOKEN',
crawl_mode: 'all_accessible',
max_pages_per_run: 12,
max_knowledge_creates_per_run: 2,
max_knowledge_updates_per_run: 7,
last_successful_cursor: '{"phase":"all_accessible_pages","cursor":"cursor-1"}',
}),
{ env: { NOTION_TOKEN: 'ntn_env_token' } },
);
expect(pullConfig).toEqual({
authToken: 'ntn_env_token',
crawlMode: 'all_accessible',
rootPageIds: [],
rootDatabaseIds: [],
rootDataSourceIds: [],
maxPagesPerRun: 12,
maxKnowledgeCreatesPerRun: 2,
maxKnowledgeUpdatesPerRun: 7,
lastSuccessfulCursor: null,
});
});
it('uses inline Notion auth_token when building adapter pull config', async () => {
const pullConfig = await notionConnectionToPullConfig(
parseNotionConnectionConfig({
driver: 'notion',
auth_token: 'ntn_inline_token',
auth_token_ref: 'env:STALE_NOTION_TOKEN',
crawl_mode: 'all_accessible',
}),
{
env: {},
readTextFile: async () => {
throw new Error('readTextFile should not be called for inline auth_token');
},
},
);
expect(pullConfig.authToken).toBe('ntn_inline_token');
});
});

View file

@ -1,103 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { createPostgresQueryExecutor } from './postgres-query-executor.js';
function makeClient() {
const calls: unknown[] = [];
const client = {
connect: vi.fn(async () => undefined),
query: vi.fn(async (input: unknown) => {
calls.push(input);
if (input === 'BEGIN READ ONLY') {
return { rows: [], fields: [], rowCount: null, command: 'BEGIN' };
}
if (input === 'COMMIT') {
return { rows: [], fields: [], rowCount: null, command: 'COMMIT' };
}
return {
rows: [
['paid', 2],
['open', 1],
],
fields: [{ name: 'status' }, { name: 'order_count' }],
rowCount: 2,
command: 'SELECT',
};
}),
end: vi.fn(async () => undefined),
};
return { client, calls };
}
describe('createPostgresQueryExecutor', () => {
it('runs a read-only transaction in array row mode and closes the client', async () => {
const { client, calls } = makeClient();
const executor = createPostgresQueryExecutor({
clientFactory: vi.fn(() => client),
});
const result = await executor.execute({
connectionId: 'warehouse',
connection: { driver: 'postgres', url: 'postgres://example/db' },
sql: 'select status, count(*) as order_count from public.orders group by status',
maxRows: 50,
});
expect(client.connect).toHaveBeenCalledTimes(1);
expect(calls[0]).toBe('BEGIN READ ONLY');
expect(calls[1]).toEqual({
text: 'select * from (select status, count(*) as order_count from public.orders group by status) as ktx_query_result limit 50',
rowMode: 'array',
});
expect(calls[2]).toBe('COMMIT');
expect(client.end).toHaveBeenCalledTimes(1);
expect(result).toEqual({
headers: ['status', 'order_count'],
rows: [
['paid', 2],
['open', 1],
],
totalRows: 2,
command: 'SELECT',
rowCount: 2,
});
});
it('rolls back and closes the client when query execution fails', async () => {
const client = {
connect: vi.fn(async () => undefined),
query: vi.fn(async (input: unknown) => {
if (input === 'BEGIN READ ONLY' || input === 'ROLLBACK') {
return { rows: [], fields: [], rowCount: null, command: String(input) };
}
throw new Error('syntax error');
}),
end: vi.fn(async () => undefined),
};
const executor = createPostgresQueryExecutor({
clientFactory: vi.fn(() => client),
});
await expect(
executor.execute({
connectionId: 'warehouse',
connection: { driver: 'postgres', url: 'postgres://example/db' },
sql: 'select * from broken',
maxRows: 10,
}),
).rejects.toThrow('syntax error');
expect(client.query).toHaveBeenCalledWith('ROLLBACK');
expect(client.end).toHaveBeenCalledTimes(1);
});
it('requires a Postgres url', async () => {
const executor = createPostgresQueryExecutor({ clientFactory: vi.fn() });
await expect(
executor.execute({
connectionId: 'warehouse',
connection: { driver: 'postgres' },
sql: 'select 1',
}),
).rejects.toThrow('Local Postgres execution requires connections.warehouse.url');
});
});

View file

@ -1,30 +0,0 @@
import { describe, expect, it } from 'vitest';
import { assertReadOnlySql, limitSqlForExecution } from './read-only-sql.js';
describe('assertReadOnlySql', () => {
it('allows select and with queries', () => {
expect(assertReadOnlySql('select * from orders')).toBe('select * from orders');
expect(assertReadOnlySql('with paid as (select * from orders) select * from paid')).toContain('with paid');
});
it('rejects mutating statements before opening a database connection', () => {
expect(() => assertReadOnlySql('delete from orders')).toThrow(
'Only read-only SELECT/WITH queries can be executed locally',
);
expect(() => assertReadOnlySql('create table x(id int)')).toThrow(
'Only read-only SELECT/WITH queries can be executed locally',
);
});
});
describe('limitSqlForExecution', () => {
it('wraps compiled SQL and strips trailing semicolons', () => {
expect(limitSqlForExecution('select * from public.orders; ', 25)).toBe(
'select * from (select * from public.orders) as ktx_query_result limit 25',
);
});
it('returns the trimmed SQL when no maxRows value is provided', () => {
expect(limitSqlForExecution('select * from orders; ', undefined)).toBe('select * from orders');
});
});

View file

@ -1,139 +0,0 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { writeFileSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import Database from 'better-sqlite3';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { createSqliteQueryExecutor, sqliteDatabasePathFromConnection } from './sqlite-query-executor.js';
describe('createSqliteQueryExecutor', () => {
let tempDir: string;
let dbPath: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-sqlite-query-'));
dbPath = join(tempDir, 'warehouse.db');
const db = new Database(dbPath);
db.exec(`
CREATE TABLE orders (
id INTEGER PRIMARY KEY,
status TEXT NOT NULL,
amount INTEGER NOT NULL
);
INSERT INTO orders (status, amount) VALUES
('paid', 20),
('paid', 30),
('open', 10);
`);
db.close();
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('executes read-only SELECT SQL against a relative SQLite path', async () => {
const executor = createSqliteQueryExecutor();
const result = await executor.execute({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', path: 'warehouse.db' },
sql: 'select status, count(*) as order_count from orders group by status order by status',
maxRows: 10,
});
expect(result).toEqual({
headers: ['status', 'order_count'],
rows: [
['open', 1],
['paid', 2],
],
totalRows: 2,
command: 'SELECT',
rowCount: 2,
});
});
it('supports file urls for SQLite database paths', async () => {
expect(
sqliteDatabasePathFromConnection({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', url: `file://${dbPath}` },
sql: 'select 1',
}),
).toBe(dbPath);
});
it('resolves file references for SQLite path fields', async () => {
const pointerPath = join(tempDir, 'sqlite-path.txt');
writeFileSync(pointerPath, dbPath, 'utf-8');
expect(
sqliteDatabasePathFromConnection({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', path: `file:${pointerPath}` },
sql: 'select 1',
}),
).toBe(dbPath);
});
it('resolves env references for SQLite database urls', async () => {
const originalDatabaseUrl = process.env.KTX_SQLITE_TEST_URL;
process.env.KTX_SQLITE_TEST_URL = `sqlite:${dbPath}`;
try {
expect(
sqliteDatabasePathFromConnection({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', url: 'env:KTX_SQLITE_TEST_URL' },
sql: 'select 1',
}),
).toBe(dbPath);
} finally {
if (originalDatabaseUrl === undefined) {
delete process.env.KTX_SQLITE_TEST_URL;
} else {
process.env.KTX_SQLITE_TEST_URL = originalDatabaseUrl;
}
}
});
it('rejects mutating SQL before opening the database', async () => {
const executor = createSqliteQueryExecutor();
await expect(
executor.execute({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', path: 'warehouse.db' },
sql: 'delete from orders',
}),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
});
it('requires a SQLite driver and a database path', async () => {
const executor = createSqliteQueryExecutor();
await expect(
executor.execute({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'postgres', path: 'warehouse.db' },
sql: 'select 1',
}),
).rejects.toThrow('Local SQLite execution cannot run driver "postgres"');
await expect(
executor.execute({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite' },
sql: 'select 1',
}),
).rejects.toThrow('Local SQLite execution requires connections.warehouse.path or connections.warehouse.url');
});
});

View file

@ -1,34 +0,0 @@
import { mkdir, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import { resolveKtxConfigReference, resolveKtxHomePath } from './config-reference.js';
describe('KTX config references', () => {
it('resolves env references without returning empty values', () => {
expect(resolveKtxConfigReference('env:AI_GATEWAY_API_KEY', { AI_GATEWAY_API_KEY: ' gateway-key ' })).toBe(
'gateway-key',
);
expect(resolveKtxConfigReference('env:AI_GATEWAY_API_KEY', { AI_GATEWAY_API_KEY: ' ' })).toBeUndefined();
expect(resolveKtxConfigReference('env:AI_GATEWAY_API_KEY', {})).toBeUndefined();
});
it('resolves file references and trims file content', async () => {
const dir = join(tmpdir(), `ktx-config-reference-${process.pid}`);
await mkdir(dir, { recursive: true });
const keyPath = join(dir, 'gateway-key.txt');
await writeFile(keyPath, 'file-gateway-key\n', 'utf8');
expect(resolveKtxConfigReference(`file:${keyPath}`, {})).toBe('file-gateway-key');
});
it('returns literal values unchanged after trimming blank-only values', () => {
expect(resolveKtxConfigReference('provider/model', {})).toBe('provider/model');
expect(resolveKtxConfigReference(' ', {})).toBeUndefined();
expect(resolveKtxConfigReference(undefined, {})).toBeUndefined();
});
it('resolves home-prefixed paths', () => {
expect(resolveKtxHomePath('~/ktx/key.txt')).toContain('/ktx/key.txt');
});
});

View file

@ -1,75 +0,0 @@
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import type { SimpleGit } from 'simple-git';
import type { KtxCoreConfig } from './config.js';
import { createSimpleGit } from './git-env.js';
import { GitService } from './git.service.js';
describe('GitService.assertWorktreeClean', () => {
let workdir: string;
let git: SimpleGit;
let gitService: GitService;
beforeEach(async () => {
workdir = await mkdtemp(join(tmpdir(), 'gitsvc-clean-'));
git = createSimpleGit(workdir);
await git.init();
await git.addConfig('user.email', 't@test');
await git.addConfig('user.name', 'Test');
await writeFile(join(workdir, 'init'), 'init');
await git.add('.');
await git.commit('init');
const coreConfig: KtxCoreConfig = {
storage: { configDir: workdir, homeDir: workdir },
git: { userName: 'Test', userEmail: 't@test' },
};
gitService = new GitService(coreConfig);
(gitService as any).git = git;
(gitService as any).configDir = workdir;
});
afterEach(async () => rm(workdir, { recursive: true, force: true }));
it('does not throw on a clean worktree', async () => {
await expect(gitService.assertWorktreeClean()).resolves.toBeUndefined();
});
it('throws when MERGE_HEAD exists', async () => {
await writeFile(join(workdir, '.git', 'MERGE_HEAD'), 'deadbeef\n');
await expect(gitService.assertWorktreeClean()).rejects.toThrow(/MERGE_HEAD/);
});
it('throws when CHERRY_PICK_HEAD exists', async () => {
await writeFile(join(workdir, '.git', 'CHERRY_PICK_HEAD'), 'deadbeef\n');
await expect(gitService.assertWorktreeClean()).rejects.toThrow(/CHERRY_PICK_HEAD/);
});
it('throws when REVERT_HEAD exists', async () => {
await writeFile(join(workdir, '.git', 'REVERT_HEAD'), 'deadbeef\n');
await expect(gitService.assertWorktreeClean()).rejects.toThrow(/REVERT_HEAD/);
});
it('throws when sequencer/todo exists (interrupted multi-commit revert/cherry-pick)', async () => {
await mkdir(join(workdir, '.git', 'sequencer'), { recursive: true });
await writeFile(join(workdir, '.git', 'sequencer', 'todo'), 'pick deadbeef foo\n');
await expect(gitService.assertWorktreeClean()).rejects.toThrow(/sequencer/);
});
it('throws when the index has unmerged paths', async () => {
await git.checkoutLocalBranch('a');
await writeFile(join(workdir, 'shared'), 'A version');
await git.add('.');
await git.commit('a');
await git.checkout('master').catch(() => git.checkout('main'));
await git.checkoutLocalBranch('b');
await writeFile(join(workdir, 'shared'), 'B version');
await git.add('.');
await git.commit('b');
await git.raw(['merge', 'a']).catch(() => undefined);
await expect(gitService.assertWorktreeClean()).rejects.toThrow();
});
});

View file

@ -1,78 +0,0 @@
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { mkdir, mkdtemp, readdir, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import type { SimpleGit } from 'simple-git';
import type { KtxCoreConfig } from './config.js';
import { createSimpleGit } from './git-env.js';
import { GitService } from './git.service.js';
describe('GitService.deleteDirectories', () => {
let workdir: string;
let git: SimpleGit;
let gitService: GitService;
beforeEach(async () => {
workdir = await mkdtemp(join(tmpdir(), 'gitsvc-dd-'));
git = createSimpleGit(workdir);
await git.init();
await git.addConfig('user.email', 't@test');
await git.addConfig('user.name', 'Test');
await writeFile(join(workdir, 'keep'), 'k');
await git.add('.');
await git.commit('init');
const coreConfig: KtxCoreConfig = {
storage: { configDir: workdir, homeDir: workdir },
git: { userName: 'Test', userEmail: 't@test' },
};
gitService = new GitService(coreConfig);
(gitService as any).git = git;
(gitService as any).configDir = workdir;
});
afterEach(async () => rm(workdir, { recursive: true, force: true }));
it('removes multiple directories in a single commit', async () => {
for (const name of ['a', 'b', 'c']) {
await mkdir(join(workdir, name), { recursive: true });
await writeFile(join(workdir, name, 'f.txt'), name);
}
await git.add('.');
await git.commit('seed 3 dirs');
const beforeCommits = (await git.log()).total;
const result = await gitService.deleteDirectories(['a', 'b'], 'gc: drop a+b', 'System User', 'system@example.com');
expect(result.commitHash).toBeTruthy();
const entries = await readdir(workdir);
expect(entries).not.toContain('a');
expect(entries).not.toContain('b');
expect(entries).toContain('c');
const afterCommits = (await git.log()).total;
expect(afterCommits).toBe(beforeCommits + 1);
});
it('no-ops and returns a null hash when the input list is empty', async () => {
const result = await gitService.deleteDirectories([], 'empty', 'X', 'x@example.com');
expect(result.commitHash).toBe('');
expect(result.created).toBe(false);
});
it('ignores paths that have already been deleted — commits only the remaining ones', async () => {
await mkdir(join(workdir, 'stale'), { recursive: true });
await writeFile(join(workdir, 'stale', 'x'), 'x');
await git.add('.');
await git.commit('seed stale');
const result = await gitService.deleteDirectories(
['stale', 'missing'],
'gc: drop stale + missing',
'System User',
'system@example.com',
);
expect(result.commitHash).toBeTruthy();
const entries = await readdir(workdir);
expect(entries).not.toContain('stale');
});
});

View file

@ -1,45 +0,0 @@
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import { GitService } from './git.service.js';
async function makeGit() {
const homeDir = await mkdtemp(join(tmpdir(), 'ktx-git-patch-'));
const configDir = join(homeDir, 'config');
const git = new GitService({
storage: { configDir, homeDir },
git: {
userName: 'System User',
userEmail: 'system@example.com',
bootstrapMessage: 'init',
bootstrapAuthor: 'system',
bootstrapAuthorEmail: 'system@example.com',
},
});
await git.onModuleInit();
return { homeDir, configDir, git };
}
describe('GitService patch helpers', () => {
it('collects binary-safe no-rename patches and applies them with --3way --index', async () => {
const { homeDir, configDir, git } = await makeGit();
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
await writeFile(join(configDir, 'wiki/global/page.md'), 'old\n');
await git.commitFiles(['wiki/global/page.md'], 'add page', 'System User', 'system@example.com');
const base = await git.revParseHead();
await writeFile(join(configDir, 'wiki/global/page.md'), 'new\n');
await git.commitFiles(['wiki/global/page.md'], 'edit page', 'System User', 'system@example.com');
const patchPath = join(homeDir, 'proposal.patch');
await git.writeBinaryNoRenamePatch(base, 'HEAD', patchPath);
const targetDir = join(homeDir, 'target');
await git.addWorktree(targetDir, 'target', base);
const targetGit = git.forWorktree(targetDir);
await targetGit.applyPatchFile3WayIndex(patchPath);
await targetGit.commitStaged('apply proposal', 'System User', 'system@example.com');
await expect(readFile(join(targetDir, 'wiki/global/page.md'), 'utf-8')).resolves.toBe('new\n');
});
});

View file

@ -1,56 +0,0 @@
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import type { SimpleGit } from 'simple-git';
import type { KtxCoreConfig } from './config.js';
import { createSimpleGit } from './git-env.js';
import { GitService } from './git.service.js';
describe('GitService.resetHardTo', () => {
let workdir: string;
let git: SimpleGit;
let gitService: GitService;
beforeEach(async () => {
workdir = await mkdtemp(join(tmpdir(), 'gitsvc-reset-'));
git = createSimpleGit(workdir);
await git.init();
await git.addConfig('user.email', 't@test');
await git.addConfig('user.name', 'Test');
await writeFile(join(workdir, 'init'), 'init');
await git.add('.');
await git.commit('init');
const coreConfig: KtxCoreConfig = {
storage: { configDir: workdir, homeDir: workdir },
git: { userName: 'Test', userEmail: 't@test' },
};
gitService = new GitService(coreConfig);
(gitService as any).git = git;
(gitService as any).configDir = workdir;
});
afterEach(async () => rm(workdir, { recursive: true, force: true }));
it('rewinds HEAD to the target SHA, removing later commits and their files', async () => {
const baseSha = (await git.revparse(['HEAD'])).trim();
await writeFile(join(workdir, 'a'), 'a1');
await git.add('.');
await git.commit('a');
await writeFile(join(workdir, 'b'), 'b1');
await git.add('.');
await git.commit('b');
await gitService.resetHardTo(baseSha);
expect((await git.revparse(['HEAD'])).trim()).toBe(baseSha);
expect(await readFile(join(workdir, 'a'), 'utf-8').catch(() => null)).toBeNull();
expect(await readFile(join(workdir, 'b'), 'utf-8').catch(() => null)).toBeNull();
});
it('is a no-op when target SHA equals current HEAD', async () => {
const sha = (await git.revparse(['HEAD'])).trim();
await gitService.resetHardTo(sha);
expect((await git.revparse(['HEAD'])).trim()).toBe(sha);
});
});

View file

@ -1,450 +0,0 @@
import { mkdtemp, readFile, realpath, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import type { KtxCoreConfig } from './config.js';
import { GitService } from './git.service.js';
// These tests drive a real git repo inside a temp directory — simple-git shells out to the
// system `git` binary. They are fast enough to run as unit tests and catch real issues that
// would be invisible with mocked git.
describe('GitService', () => {
let service: GitService;
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'git-service-spec-'));
const coreConfig: KtxCoreConfig = {
storage: { configDir: tempDir, homeDir: tempDir },
git: {
userName: 'Test User',
userEmail: 'test@example.com',
bootstrapMessage: 'Initialize test config repo',
bootstrapAuthor: 'test-system',
bootstrapAuthorEmail: 'system@example.com',
},
};
service = new GitService(coreConfig);
await service.onModuleInit();
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
const writeAndCommit = async (filePath: string, content: string, message = 'msg') => {
await writeFile(join(tempDir, filePath), content, 'utf-8');
return service.commitFile(filePath, message, 'Test', 'test@example.com');
};
describe('cold-start bootstrap commit', () => {
it('writes an empty commit on init so HEAD always resolves', async () => {
// beforeEach already ran onModuleInit() against an empty temp dir.
const head = await service.revParseHead();
expect(head).toMatch(/^[0-9a-f]{40}$/);
});
it('does not double-commit when re-initialized', async () => {
const before = await service.revParseHead();
await service.onModuleInit();
const after = await service.revParseHead();
expect(after).toBe(before);
});
it('keeps git auto-maintenance attached for deterministic cleanup', async () => {
const config = await readFile(join(tempDir, '.git', 'config'), 'utf-8');
expect(config).toMatch(/\[gc]\n\s+autoDetach = false/);
expect(config).toMatch(/\[maintenance]\n\s+autoDetach = false/);
});
it('initializes when release automation sets GIT_ASKPASS', async () => {
const releaseEnvDir = await mkdtemp(join(tmpdir(), 'git-service-release-env-'));
const previousAskPass = process.env.GIT_ASKPASS;
process.env.GIT_ASKPASS = 'echo';
try {
const releaseEnvService = new GitService({
storage: { configDir: releaseEnvDir, homeDir: releaseEnvDir },
git: {
userName: 'Test User',
userEmail: 'test@example.com',
bootstrapMessage: 'Initialize test config repo',
bootstrapAuthor: 'test-system',
bootstrapAuthorEmail: 'system@example.com',
},
});
await expect(releaseEnvService.onModuleInit()).resolves.toBeUndefined();
} finally {
if (previousAskPass === undefined) {
delete process.env.GIT_ASKPASS;
} else {
process.env.GIT_ASKPASS = previousAskPass;
}
await rm(releaseEnvDir, { recursive: true, force: true });
}
});
});
describe('commitFile `created` flag', () => {
it('is true for a real commit', async () => {
const info = await writeAndCommit('a.md', '# Hello');
expect(info.created).toBe(true);
});
it('is false on a no-op write (content unchanged)', async () => {
await writeAndCommit('a.md', '# Hello');
const second = await writeAndCommit('a.md', '# Hello', 'unused');
expect(second.created).toBe(false);
});
});
describe('addNote / getNote', () => {
it('attaches a note and reads it back', async () => {
const info = await writeAndCommit('a.md', '# Hello');
await service.addNote(info.commitHash, 'Rich message from LLM');
expect(await service.getNote(info.commitHash)).toBe('Rich message from LLM');
});
it('returns undefined when no note exists', async () => {
const info = await writeAndCommit('a.md', '# Hello');
expect(await service.getNote(info.commitHash)).toBeUndefined();
});
it('overwrites an existing note (idempotent retries)', async () => {
const info = await writeAndCommit('a.md', '# Hello');
await service.addNote(info.commitHash, 'First');
await service.addNote(info.commitHash, 'Second');
expect(await service.getNote(info.commitHash)).toBe('Second');
});
it('skips empty/whitespace messages silently', async () => {
const info = await writeAndCommit('a.md', '# Hello');
await service.addNote(info.commitHash, ' ');
expect(await service.getNote(info.commitHash)).toBeUndefined();
});
});
describe('getFileHistory', () => {
it('surfaces enhancedMessage when a note is present', async () => {
const info = await writeAndCommit('a.md', '# Hello');
await service.addNote(info.commitHash, 'Note body');
const history = await service.getFileHistory('a.md');
expect(history[0]?.enhancedMessage).toBe('Note body');
});
it('leaves enhancedMessage undefined when no note is attached', async () => {
await writeAndCommit('a.md', '# Hello');
const history = await service.getFileHistory('a.md');
expect(history[0]?.enhancedMessage).toBeUndefined();
});
});
describe('getCommitDiff', () => {
it('returns the patch scoped to the requested path', async () => {
const info = await writeAndCommit('a.md', '# Hello');
const diff = await service.getCommitDiff(info.commitHash, 'a.md');
expect(diff).toContain('diff --git');
expect(diff).toContain('Hello');
});
it('handles the repository initial commit without throwing', async () => {
const info = await writeAndCommit('first.md', 'first');
await expect(service.getCommitDiff(info.commitHash, 'first.md')).resolves.toBeDefined();
});
});
describe('squashTo', () => {
const writeAsSystem = async (filePath: string, content: string, message = 'msg') => {
await writeFile(join(tempDir, filePath), content, 'utf-8');
return service.commitFile(filePath, message, 'System User', 'system@example.com');
};
it('collapses 3 commits after preHead into a single commit', async () => {
const pre = await writeAsSystem('a.md', 'v1');
const preHead = pre.commitHash;
await writeAsSystem('b.md', 'b', 'add b');
await writeAsSystem('c.md', 'c', 'add c');
await writeAsSystem('a.md', 'v2', 'update a');
const result = await service.squashTo(preHead, {
message: 'Ingest: bundle 3 writes',
author: 'System User',
authorEmail: 'system@example.com',
});
expect(result.squashed).toBe(true);
expect(result.squashedCount).toBe(3);
expect(result.commitHash).toBeTruthy();
expect(result.commitHash).not.toBe(preHead);
const commitHash = result.commitHash;
if (!commitHash) {
throw new Error('Expected squash commit hash');
}
// The squashed commit should preserve the final tree state.
const fileAtSquash = await service.getFileAtCommit('a.md', commitHash);
expect(fileAtSquash).toBe('v2');
const bAtSquash = await service.getFileAtCommit('b.md', commitHash);
expect(bAtSquash).toBe('b');
});
it('is a no-op when preHead equals HEAD', async () => {
const pre = await writeAsSystem('a.md', 'v1');
const result = await service.squashTo(pre.commitHash, {
message: 'nothing to squash',
author: 'System User',
authorEmail: 'system@example.com',
});
expect(result.squashed).toBe(false);
expect(result.commitHash).toBe(pre.commitHash);
});
it('skips squash when a foreign-author commit sits between preHead and HEAD', async () => {
const pre = await writeAsSystem('a.md', 'v1');
const preHead = pre.commitHash;
await writeAsSystem('b.md', 'from us', 'ours');
// Foreign commit
await writeAndCommit('c.md', 'from someone else', 'foreign');
await writeAsSystem('d.md', 'ours again', 'ours 2');
const result = await service.squashTo(preHead, {
message: 'should be skipped',
author: 'System User',
authorEmail: 'system@example.com',
});
expect(result.squashed).toBe(false);
expect(result.reason).toContain('foreign');
expect(result.squashedCount).toBe(3);
});
it('returns cleanly when preHead is empty (no starting commit)', async () => {
const result = await service.squashTo('', {
message: 'would have squashed',
author: 'System User',
authorEmail: 'system@example.com',
});
expect(result.squashed).toBe(false);
expect(result.commitHash).toBeNull();
});
});
describe('worktree lifecycle', () => {
// macOS canonicalizes tmp paths (/var/folders → /private/var/folders) when git
// returns them from `worktree list`. Resolve through realpath() before comparing.
const canonicalSiblingPath = async (suffix: string): Promise<string> => {
const parent = await realpath(join(tempDir, '..'));
return join(parent, `wt-${Date.now()}-${suffix}`);
};
it('addWorktree creates a branch + directory at the given startSha', async () => {
const { commitHash } = await writeAndCommit('seed.md', 'seed');
const wtDir = await canonicalSiblingPath('add');
await service.addWorktree(wtDir, 'session/alpha', commitHash);
const list = await service.listWorktrees();
expect(list.find((e) => e.path === wtDir && e.branch === 'refs/heads/session/alpha')).toBeTruthy();
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
it('removeWorktree detaches the worktree entry', async () => {
const { commitHash } = await writeAndCommit('seed.md', 'seed');
const wtDir = await canonicalSiblingPath('rm');
await service.addWorktree(wtDir, 'session/beta', commitHash);
await service.removeWorktree(wtDir);
const list = await service.listWorktrees();
expect(list.find((e) => e.path === wtDir)).toBeFalsy();
});
it('deleteBranch removes a branch ref', async () => {
const { commitHash } = await writeAndCommit('seed.md', 'seed');
const wtDir = await canonicalSiblingPath('br');
await service.addWorktree(wtDir, 'session/gamma', commitHash);
await service.removeWorktree(wtDir);
await service.deleteBranch('session/gamma', true);
const branches = await (service as unknown as { git: import('simple-git').SimpleGit }).git.branchLocal();
expect(branches.all).not.toContain('session/gamma');
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
});
describe('forWorktree', () => {
it('returns a GitService whose operations run inside the given worktree', async () => {
const { commitHash } = await writeAndCommit('seed.md', 'seed');
const parent = await realpath(join(tempDir, '..'));
const wtDir = join(parent, `wt-${Date.now()}-fw`);
await service.addWorktree(wtDir, 'session/delta', commitHash);
const scoped = service.forWorktree(wtDir);
expect(await scoped.revParseHead()).toBe(commitHash);
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
it('serializes concurrent commits from scoped services targeting the same worktree', async () => {
const { commitHash } = await writeAndCommit('seed.md', 'seed');
const parent = await realpath(join(tempDir, '..'));
const wtDir = join(parent, `wt-${Date.now()}-fw-concurrent`);
await service.addWorktree(wtDir, 'session/concurrent', commitHash);
const first = service.forWorktree(wtDir);
const second = service.forWorktree(wtDir);
await writeFile(join(wtDir, 'a.md'), 'a\n', 'utf-8');
await writeFile(join(wtDir, 'b.md'), 'b\n', 'utf-8');
const [a, b] = await Promise.all([
first.commitFile('a.md', 'add a', 'System User', 'system@example.com'),
second.commitFile('b.md', 'add b', 'System User', 'system@example.com'),
]);
expect(a.commitHash).toMatch(/^[0-9a-f]{40}$/);
expect(b.commitHash).toMatch(/^[0-9a-f]{40}$/);
await expect(first.getFileAtCommit('a.md', a.commitHash)).resolves.toBe('a\n');
await expect(second.getFileAtCommit('b.md', b.commitHash)).resolves.toBe('b\n');
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
});
describe('squashMergeIntoMain', () => {
it('merges a session branch as one commit on main, returning the new SHA + touched paths', async () => {
const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed');
const parent = await realpath(join(tempDir, '..'));
const wtDir = join(parent, `wt-${Date.now()}-sm`);
await service.addWorktree(wtDir, 'session/happy', baseSha);
const scoped = service.forWorktree(wtDir);
await writeFile(join(wtDir, 'a.yaml'), 'one: 1\n', 'utf-8');
await scoped.commitFile('a.yaml', 'wip a', 'System User', 'system@example.com');
await writeFile(join(wtDir, 'b.yaml'), 'two: 2\n', 'utf-8');
await scoped.commitFile('b.yaml', 'wip b', 'System User', 'system@example.com');
const result = await service.squashMergeIntoMain(
'session/happy',
'System User',
'system@example.com',
'Memory capture: 2 files [chat=abcd1234]',
);
expect(result.ok).toBe(true);
if (!result.ok) {
throw new Error('unreachable');
}
expect(result.squashSha).toMatch(/^[0-9a-f]{40}$/);
expect(result.touchedPaths.sort()).toEqual(['a.yaml', 'b.yaml']);
const mainHead = await service.revParseHead();
expect(mainHead).toBe(result.squashSha);
expect(mainHead).not.toBe(baseSha);
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
it('returns ok with empty touchedPaths when the session branch has no diff vs main', async () => {
const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed');
const parent = await realpath(join(tempDir, '..'));
const wtDir = join(parent, `wt-${Date.now()}-sm-empty`);
await service.addWorktree(wtDir, 'session/empty', baseSha);
const result = await service.squashMergeIntoMain(
'session/empty',
'System User',
'system@example.com',
'should be a no-op',
);
expect(result.ok).toBe(true);
if (!result.ok) {
throw new Error('unreachable');
}
expect(result.touchedPaths).toEqual([]);
expect(result.squashSha).toBe(baseSha);
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
it('returns conflict=true and leaves main clean when session+main touched same file differently', async () => {
await writeAndCommit('shared.yaml', 'base\n');
const base = await service.revParseHead();
if (!base) {
throw new Error('no base head');
}
const parent = await realpath(join(tempDir, '..'));
const wtDir = join(parent, `wt-${Date.now()}-conf`);
await service.addWorktree(wtDir, 'session/conf', base);
const scoped = service.forWorktree(wtDir);
await writeFile(join(wtDir, 'shared.yaml'), 'session-edit\n', 'utf-8');
await scoped.commitFile('shared.yaml', 'session edit', 'System User', 'system@example.com');
// Main edits the same file a different way, after the session branched.
await writeAndCommit('shared.yaml', 'main-edit\n');
const result = await service.squashMergeIntoMain(
'session/conf',
'System User',
'system@example.com',
'Memory capture: 1 file [chat=dead1234]',
);
expect(result.ok).toBe(false);
if (result.ok) {
throw new Error('unreachable');
}
expect(result.conflict).toBe(true);
expect(result.conflictPaths).toContain('shared.yaml');
const status = await (service as unknown as { git: import('simple-git').SimpleGit }).git.status();
expect(status.isClean()).toBe(true);
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
it('reports untracked files that would be overwritten by the squash merge', async () => {
const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed');
const parent = await realpath(join(tempDir, '..'));
const wtDir = join(parent, `wt-${Date.now()}-untracked`);
await service.addWorktree(wtDir, 'session/untracked', baseSha);
const scoped = service.forWorktree(wtDir);
await writeFile(join(wtDir, 'knowledge.md'), 'session version\n', 'utf-8');
await scoped.commitFile('knowledge.md', 'session write', 'System User', 'system@example.com');
await writeFile(join(tempDir, 'knowledge.md'), 'untracked local version\n', 'utf-8');
const result = await service.squashMergeIntoMain(
'session/untracked',
'System User',
'system@example.com',
'Memory capture: 1 file [chat=untracked]',
);
expect(result.ok).toBe(false);
if (result.ok) {
throw new Error('unreachable');
}
expect(result.conflict).toBe(true);
expect(result.conflictPaths).toEqual(['knowledge.md']);
const status = await (service as unknown as { git: import('simple-git').SimpleGit }).git.status();
expect(status.not_added).toContain('knowledge.md');
await service.removeWorktree(wtDir).catch(() => undefined);
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
});
});
});

View file

@ -1,124 +0,0 @@
import { mkdtemp, realpath, rm, stat } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import type { KtxCoreConfig } from './config.js';
import { GitService } from './git.service.js';
import { SessionWorktreeService, type WorktreeConfigPort } from './session-worktree.service.js';
interface TestWorktreeConfig extends WorktreeConfigPort<TestWorktreeConfig> {
workdir?: string;
}
// SessionWorktreeService glues a real GitService to a scoped config adapter.
describe('SessionWorktreeService', () => {
let sessionService: SessionWorktreeService<TestWorktreeConfig>;
let gitService: GitService;
let homeDir: string;
beforeEach(async () => {
homeDir = await mkdtemp(join(tmpdir(), 'sws-spec-'));
homeDir = await realpath(homeDir);
const coreConfig: KtxCoreConfig = {
storage: { configDir: homeDir, homeDir },
git: {
userName: 'System User',
userEmail: 'system@example.com',
bootstrapMessage: 'Initialize test config repo',
bootstrapAuthor: 'test-system',
bootstrapAuthorEmail: 'system@example.com',
},
};
gitService = new GitService(coreConfig);
await gitService.onModuleInit();
const configService: TestWorktreeConfig = {
forWorktree: vi.fn(
(workdir: string): TestWorktreeConfig => ({ workdir, forWorktree: configService.forWorktree }),
),
};
sessionService = new SessionWorktreeService({
coreConfig,
gitService,
configService,
});
});
afterEach(async () => {
await rm(homeDir, { recursive: true, force: true });
});
describe('create', () => {
it('creates a worktree + branch and returns scoped services', async () => {
const baseSha = await gitService.revParseHead();
if (!baseSha) {
throw new Error('no base sha');
}
const session = await sessionService.create('chat-abc', baseSha);
expect(session.workdir).toBe(join(homeDir, '.worktrees', 'session-chat-abc'));
expect(session.branch).toBe('session/chat-abc');
expect(session.baseSha).toBe(baseSha);
const stats = await stat(session.workdir);
expect(stats.isDirectory()).toBe(true);
// Scoped git instance reports the worktree's HEAD (= baseSha at creation time).
expect(await session.git.revParseHead()).toBe(baseSha);
const list = await gitService.listWorktrees();
expect(list.find((e) => e.path === session.workdir)).toBeTruthy();
});
it('appends a timestamp suffix when the primary dir already exists', async () => {
const baseSha = await gitService.revParseHead();
if (!baseSha) {
throw new Error('no base sha');
}
const first = await sessionService.create('chat-dup', baseSha);
const second = await sessionService.create('chat-dup', baseSha);
expect(first.workdir).not.toBe(second.workdir);
expect(second.branch).toMatch(/^session\/chat-dup-\d+$/);
});
});
describe('cleanup', () => {
it('success removes the worktree dir and deletes the branch', async () => {
const baseSha = await gitService.revParseHead();
if (!baseSha) {
throw new Error('no base sha');
}
const session = await sessionService.create('chat-cleanup-ok', baseSha);
await sessionService.cleanup(session, 'success');
const list = await gitService.listWorktrees();
expect(list.find((e) => e.path === session.workdir)).toBeFalsy();
await expect(stat(session.workdir)).rejects.toThrow();
});
it('conflict keeps the worktree and writes a sentinel file', async () => {
const baseSha = await gitService.revParseHead();
if (!baseSha) {
throw new Error('no base sha');
}
const session = await sessionService.create('chat-cleanup-conflict', baseSha);
await sessionService.cleanup(session, 'conflict', { conflictPaths: ['shared.yaml'] });
// Dir still exists.
await expect(stat(session.workdir)).resolves.toBeTruthy();
const { readFile } = await import('node:fs/promises');
const raw = await readFile(join(session.workdir, '.ktx-outcome'), 'utf-8');
const parsed = JSON.parse(raw);
expect(parsed.outcome).toBe('conflict');
expect(parsed.chatId).toBe('chat-cleanup-conflict');
expect(parsed.conflictPaths).toEqual(['shared.yaml']);
expect(typeof parsed.at).toBe('string');
});
});
});

View file

@ -1,343 +0,0 @@
import { once } from 'node:events';
import { createServer } from 'node:http';
import { describe, expect, it, vi } from 'vitest';
import { createHttpSemanticLayerComputePort, createPythonSemanticLayerComputePort } from './semantic-layer-compute.js';
const source = {
name: 'orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'number' }],
joins: [],
measures: [{ name: 'order_count', expr: 'count(*)' }],
};
const sourceGenerationInput = {
tables: [
{
name: 'orders',
db: 'public',
comment: 'Orders table',
columns: [
{ name: 'id', type: 'integer', primaryKey: true, nullable: false, comment: 'Order ID' },
{ name: 'customer_id', type: 'integer' },
{ name: 'amount', type: 'decimal', comment: 'Order amount' },
],
},
{
name: 'customers',
db: 'public',
columns: [
{ name: 'id', type: 'integer', primaryKey: true },
{ name: 'email', type: 'varchar' },
],
},
],
links: [
{
fromTable: 'orders',
fromColumn: 'customer_id',
toTable: 'customers',
toColumn: 'id',
relationshipType: 'MANY_TO_ONE',
},
],
dialect: 'postgres',
};
const sourceGenerationDaemonPayload = {
tables: [
{
name: 'orders',
db: 'public',
comment: 'Orders table',
columns: [
{ name: 'id', type: 'integer', primary_key: true, nullable: false, comment: 'Order ID' },
{ name: 'customer_id', type: 'integer' },
{ name: 'amount', type: 'decimal', comment: 'Order amount' },
],
},
{
name: 'customers',
db: 'public',
columns: [
{ name: 'id', type: 'integer', primary_key: true },
{ name: 'email', type: 'varchar' },
],
},
],
links: [
{
from_table: 'orders',
from_column: 'customer_id',
to_table: 'customers',
to_column: 'id',
relationship_type: 'MANY_TO_ONE',
},
],
dialect: 'postgres',
};
const sourceGenerationDaemonResponse = {
source_count: 2,
sources: [
{
name: 'orders',
table: 'public.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'number' }],
joins: [
{
to: 'customers',
on: 'customer_id = customers.id',
relationship: 'many_to_one',
},
],
measures: [{ name: 'record_count', expr: 'count(id)' }],
},
],
};
describe('createPythonSemanticLayerComputePort', () => {
it('calls the semantic-query stdio command', async () => {
const runJson = vi.fn(async () => ({
sql: 'select count(*) from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: { sources_used: ['orders'] },
}));
const port = createPythonSemanticLayerComputePort({
runJson,
projectId: 'hashed-project-id',
});
await expect(
port.query({
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
}),
).resolves.toEqual({
sql: 'select count(*) from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: { sources_used: ['orders'] },
});
expect(runJson).toHaveBeenCalledWith('semantic-query', {
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
projectId: 'hashed-project-id',
});
});
it('calls the semantic-validate stdio command', async () => {
const runJson = vi.fn(async () => ({
valid: true,
errors: [],
warnings: [],
per_source_warnings: {},
}));
const port = createPythonSemanticLayerComputePort({ runJson });
await expect(
port.validateSources({
sources: [source],
dialect: 'postgres',
recentlyTouched: ['orders'],
}),
).resolves.toEqual({
valid: true,
errors: [],
warnings: [],
perSourceWarnings: {},
});
expect(runJson).toHaveBeenCalledWith('semantic-validate', {
sources: [source],
dialect: 'postgres',
recently_touched: ['orders'],
});
});
it('calls the semantic-generate-sources stdio command', async () => {
const runJson = vi.fn(async () => sourceGenerationDaemonResponse);
const port = createPythonSemanticLayerComputePort({ runJson });
await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({
sourceCount: 2,
sources: sourceGenerationDaemonResponse.sources,
});
expect(runJson).toHaveBeenCalledWith('semantic-generate-sources', sourceGenerationDaemonPayload);
});
});
describe('createHttpSemanticLayerComputePort', () => {
it('calls semantic query and validate HTTP endpoints through an injected runner', async () => {
const requestJson = vi.fn(async (path: string) => {
if (path === '/semantic-layer/query') {
return {
sql: 'select count(*) from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: { sources_used: ['orders'] },
};
}
return {
valid: true,
errors: [],
warnings: [],
per_source_warnings: {},
};
});
const port = createHttpSemanticLayerComputePort({ baseUrl: 'http://127.0.0.1:8765/', requestJson });
await expect(
port.query({
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
}),
).resolves.toEqual({
sql: 'select count(*) from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: { sources_used: ['orders'] },
});
await expect(
port.validateSources({
sources: [source],
dialect: 'postgres',
recentlyTouched: ['orders'],
}),
).resolves.toEqual({
valid: true,
errors: [],
warnings: [],
perSourceWarnings: {},
});
expect(requestJson).toHaveBeenNthCalledWith(1, '/semantic-layer/query', {
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
});
expect(requestJson).toHaveBeenNthCalledWith(2, '/semantic-layer/validate', {
sources: [source],
dialect: 'postgres',
recently_touched: ['orders'],
});
});
it('calls the semantic source-generation HTTP endpoint through an injected runner', async () => {
const requestJson = vi.fn(async () => sourceGenerationDaemonResponse);
const port = createHttpSemanticLayerComputePort({ baseUrl: 'http://127.0.0.1:8765/', requestJson });
await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({
sourceCount: 2,
sources: sourceGenerationDaemonResponse.sources,
});
expect(requestJson).toHaveBeenCalledWith('/semantic-layer/generate-sources', sourceGenerationDaemonPayload);
});
it('posts JSON to a running HTTP daemon endpoint', async () => {
const requests: Array<{ url: string | undefined; body: unknown }> = [];
const server = createServer((request, response) => {
const chunks: Buffer[] = [];
request.on('data', (chunk: Buffer) => chunks.push(chunk));
request.on('end', () => {
requests.push({
url: request.url,
body: JSON.parse(Buffer.concat(chunks).toString('utf8')),
});
response.writeHead(200, { 'content-type': 'application/json' });
response.end(
JSON.stringify({
sql: 'select count(*) from public.orders',
dialect: 'postgres',
columns: [{ name: 'orders.order_count' }],
plan: { sources_used: ['orders'] },
}),
);
});
});
server.listen(0, '127.0.0.1');
await once(server, 'listening');
try {
const address = server.address();
if (!address || typeof address === 'string') {
throw new Error('expected TCP server address');
}
const port = createHttpSemanticLayerComputePort({ baseUrl: `http://127.0.0.1:${address.port}` });
await expect(
port.query({
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
}),
).resolves.toMatchObject({
sql: 'select count(*) from public.orders',
dialect: 'postgres',
});
expect(requests).toEqual([
{
url: '/semantic-layer/query',
body: {
sources: [source],
dialect: 'postgres',
query: { measures: ['orders.order_count'], dimensions: [] },
},
},
]);
} finally {
server.close();
}
});
it('posts source-generation JSON to a running HTTP daemon endpoint', async () => {
const requests: Array<{ url: string | undefined; body: unknown }> = [];
const server = createServer((request, response) => {
const chunks: Buffer[] = [];
request.on('data', (chunk: Buffer) => chunks.push(chunk));
request.on('end', () => {
requests.push({
url: request.url,
body: JSON.parse(Buffer.concat(chunks).toString('utf8')),
});
response.writeHead(200, { 'content-type': 'application/json' });
response.end(JSON.stringify(sourceGenerationDaemonResponse));
});
});
server.listen(0, '127.0.0.1');
await once(server, 'listening');
try {
const address = server.address();
if (!address || typeof address === 'string') {
throw new Error('expected TCP server address');
}
const port = createHttpSemanticLayerComputePort({ baseUrl: `http://127.0.0.1:${address.port}` });
await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({
sourceCount: 2,
sources: sourceGenerationDaemonResponse.sources,
});
expect(requests).toEqual([
{
url: '/semantic-layer/generate-sources',
body: sourceGenerationDaemonPayload,
},
]);
} finally {
server.close();
}
});
});

View file

@ -1,196 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import type { KtxEmbeddingPort } from '../../context/core/embedding.js';
import { initKtxProject, loadKtxProject, type KtxLocalProject } from '../../context/project/project.js';
import { SqliteKnowledgeIndex } from '../wiki/sqlite-knowledge-index.js';
import { reindexLocalIndexes } from './reindex.js';
class FakeEmbeddingPort implements KtxEmbeddingPort {
readonly maxBatchSize = 8;
async computeEmbedding(text: string): Promise<number[]> {
return [text.length, 1];
}
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
return texts.map((text) => [text.length, 1]);
}
}
async function createProject(tempDir: string): Promise<KtxLocalProject> {
await initKtxProject({ projectDir: tempDir, force: true });
return loadKtxProject({ projectDir: tempDir });
}
describe('reindexLocalIndexes', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-reindex-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('returns an empty summary when no wiki or semantic-layer directories exist', async () => {
const project = await createProject(tempDir);
await rm(join(project.projectDir, 'wiki'), { recursive: true, force: true });
await rm(join(project.projectDir, 'semantic-layer'), { recursive: true, force: true });
await expect(reindexLocalIndexes(project, { force: false, embeddingService: null })).resolves.toMatchObject({
scopes: [],
totals: { scanned: 0, updated: 0, deleted: 0, embeddingsRecomputed: 0, embeddingsFailed: 0 },
force: false,
embeddingsAvailable: false,
});
});
it('discovers empty directories as zero-row scopes', async () => {
const project = await createProject(tempDir);
await mkdir(join(project.projectDir, 'wiki/user/local'), { recursive: true });
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
expect(summary.scopes.map((scope) => scope.label)).toEqual(['global', 'user/local', 'warehouse']);
expect(summary.totals.scanned).toBe(0);
});
it('indexes mixed wiki and SL sources and reports totals', async () => {
const project = await createProject(tempDir);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
await writeFile(
join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'),
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n',
'utf-8',
);
const summary = await reindexLocalIndexes(project, {
force: false,
embeddingService: new FakeEmbeddingPort(),
});
expect(summary.scopes).toHaveLength(2);
expect(summary.totals).toMatchObject({ scanned: 2, updated: 2, deleted: 0, embeddingsRecomputed: 2 });
expect(summary.embeddingsAvailable).toBe(true);
});
it('does not report unchanged lexical-only rows as updated on repeated runs', async () => {
const project = await createProject(tempDir);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
await writeFile(
join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'),
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n',
'utf-8',
);
const first = await reindexLocalIndexes(project, { force: false, embeddingService: null });
expect(first.totals).toMatchObject({
scanned: 2,
updated: 2,
deleted: 0,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
});
const second = await reindexLocalIndexes(project, { force: false, embeddingService: null });
expect(second.totals).toMatchObject({
scanned: 2,
updated: 0,
deleted: 0,
embeddingsRecomputed: 0,
embeddingsFailed: 0,
});
expect(second.scopes.map((scope) => [scope.label, scope.updated])).toEqual([
['global', 0],
['warehouse', 0],
]);
});
it('force clears stale rows before rebuilding each discovered scope', async () => {
const project = await createProject(tempDir);
const wikiIndex = new SqliteKnowledgeIndex({ dbPath: join(project.projectDir, '.ktx/db.sqlite') });
wikiIndex.sync([
{
path: 'wiki/global/stale.md',
key: 'stale',
scope: 'GLOBAL',
scopeId: null,
summary: 'Stale',
content: 'Stale content',
tags: [],
embedding: [1, 0],
},
]);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
const summary = await reindexLocalIndexes(project, {
force: true,
embeddingService: new FakeEmbeddingPort(),
});
expect(summary.force).toBe(true);
expect(summary.totals).toMatchObject({ scanned: 1, updated: 1, deleted: 0 });
expect(wikiIndex.search('Stale', 10)).toEqual([]);
});
it('captures a per-scope error and continues other scopes', async () => {
const project = await createProject(tempDir);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
await writeFile(join(project.projectDir, 'semantic-layer/warehouse/broken.yaml'), 'not: [valid', 'utf-8');
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
expect(summary.scopes.find((scope) => scope.label === 'global')?.error).toBeUndefined();
expect(summary.scopes.find((scope) => scope.label === 'warehouse')?.error).toContain('YAML');
});
it('marks a scope errored when configured embeddings fail', async () => {
const project = await createProject(tempDir);
await writeFile(
join(project.projectDir, 'wiki/global/revenue.md'),
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
'utf-8',
);
const embeddingService: KtxEmbeddingPort = {
maxBatchSize: 8,
async computeEmbedding() {
throw new Error('embedding provider unavailable');
},
async computeEmbeddingsBulk() {
throw new Error('embedding provider unavailable');
},
};
const summary = await reindexLocalIndexes(project, { force: false, embeddingService });
expect(summary.scopes[0]).toMatchObject({
label: 'global',
embeddingsFailed: 1,
error: '1 embedding recomputation failed',
});
});
});

View file

@ -1,42 +0,0 @@
import { describe, expect, it } from 'vitest';
import { actionTargetConnectionId, memoryActionIdentity } from './action-identity.js';
describe('memory action target identity', () => {
it('keys SL actions by target connection and wiki actions by run connection', () => {
expect(
memoryActionIdentity(
{ target: 'sl', type: 'created', key: 'orders', detail: '', targetConnectionId: 'warehouse-b' },
'looker-run',
),
).toBe('sl:warehouse-b:orders');
expect(memoryActionIdentity({ target: 'sl', type: 'created', key: 'orders', detail: '' }, 'warehouse-a')).toBe(
'sl:warehouse-a:orders',
);
expect(
memoryActionIdentity(
{
target: 'wiki',
type: 'created',
key: 'wiki/global/orders.md',
detail: '',
targetConnectionId: 'ignored',
},
'looker-run',
),
).toBe('wiki:looker-run:wiki/global/orders.md');
});
it('resolves action target connection only for SL actions', () => {
expect(
actionTargetConnectionId(
{ target: 'sl', type: 'updated', key: 'orders', detail: '', targetConnectionId: 'warehouse-b' },
'looker-run',
),
).toBe('warehouse-b');
expect(actionTargetConnectionId({ target: 'wiki', type: 'updated', key: 'orders', detail: '' }, 'looker-run')).toBe(
'looker-run',
);
});
});

View file

@ -1,214 +0,0 @@
import { describe, expect, it } from 'vitest';
import { parseDbtSchemaFile, parseDbtSchemaFiles } from './parse-schema.js';
describe('dbt descriptions schema parser', () => {
it('resolves shared dbt vars and defaults before parsing schema YAML', () => {
const result = parseDbtSchemaFile(
`
version: 2
sources:
- name: raw
database: "{{ var('database') }}"
schema: "{{ var('schema', 'fallback_schema') }}"
tables:
- name: orders
identifier: fct_orders
description: "Orders from {{ var('database') }}"
columns:
- name: customer_id
description: "Customer id"
tests:
- relationships:
to: ref('customers')
field: id
models:
- name: "{{ var('model_name', 'orders_model') }}"
schema: "{{ var('model_schema') }}"
columns:
- name: id
description: "Order id"
`,
{ path: 'models/schema.yml', variables: new Map([['database', 'analytics'], ['model_schema', 'mart']]) },
);
expect(result.tables).toEqual([
{
name: 'fct_orders',
description: 'Orders from analytics',
database: 'analytics',
schema: 'fallback_schema',
columns: [
{
name: 'customer_id',
description: 'Customer id',
dataType: null,
dataTests: [{ name: 'relationships', package: 'dbt', kwargs: { to: "ref('customers')", field: 'id' } }],
},
],
resourceType: 'source',
},
{
name: 'orders_model',
description: null,
database: null,
schema: 'mart',
columns: [{ name: 'id', description: 'Order id', dataType: null }],
resourceType: 'model',
},
]);
expect(result.relationships).toEqual([
{
fromTable: 'fct_orders',
fromColumn: 'customer_id',
toTable: 'customers',
toColumn: 'id',
fromSchema: 'fallback_schema',
},
]);
});
it('deduplicates tables by database schema and name while merging columns', () => {
const result = parseDbtSchemaFiles([
{
path: 'models/a.yml',
content: `
version: 2
models:
- name: orders
description: Orders
columns:
- name: id
description: Primary key
`,
},
{
path: 'models/b.yml',
content: `
version: 2
models:
- name: orders
columns:
- name: status
description: Status
- name: id
data_type: integer
`,
},
]);
expect(result.tables).toEqual([
{
name: 'orders',
description: 'Orders',
database: null,
schema: null,
resourceType: 'model',
columns: [
{ name: 'id', description: 'Primary key', dataType: 'integer' },
{ name: 'status', description: 'Status', dataType: null },
],
},
]);
});
it('returns an empty result for malformed YAML and preserves unresolved Jinja text', () => {
expect(parseDbtSchemaFile('{{{{ invalid yaml', { path: 'broken.yml' })).toEqual({
projectName: null,
dbtVersion: null,
tables: [],
relationships: [],
});
const unresolved = parseDbtSchemaFile(
`
version: 2
models:
- name: "{{ var('missing_model') }}"
`,
{ variables: new Map() },
);
expect(unresolved.tables[0]?.name).toBe("{{ var('missing_model') }}");
});
it('extracts data tests, constraints, enum values, tags, and freshness', () => {
const result = parseDbtSchemaFile(`
version: 2
sources:
- name: raw
schema: jaffle
tags: ["raw"]
tables:
- name: customers
tags: ["core"]
loaded_at_field: updated_at
freshness:
warn_after: { count: 12, period: hour }
columns:
- name: id
tests:
- not_null
- unique
- name: status
data_tests:
- accepted_values:
values: ['active', 'inactive']
models:
- name: orders
tags: ["finance"]
loaded_at_field: run_at
columns:
- name: status
data_tests:
- dbt_utils.expression_is_true:
expression: "status is not null"
- accepted_values: ['placed', 'shipped']
`);
const customers = result.tables.find((table) => table.name === 'customers');
expect(customers?.tagsDbt).toEqual(['raw', 'core']);
expect(customers?.freshnessDbt?.loadedAtField).toBe('updated_at');
expect(customers?.freshnessDbt?.raw).toBeDefined();
const id = customers?.columns.find((column) => column.name === 'id');
expect(id?.constraints?.dbt).toEqual({ not_null: true, unique: true });
const status = customers?.columns.find((column) => column.name === 'status');
expect(status?.enumValuesDbt).toEqual(['active', 'inactive']);
const orders = result.tables.find((table) => table.name === 'orders');
expect(orders?.tagsDbt).toEqual(['finance']);
expect(orders?.freshnessDbt?.loadedAtField).toBe('run_at');
const ordersStatus = orders?.columns.find((column) => column.name === 'status');
expect(ordersStatus?.enumValuesDbt).toEqual(['placed', 'shipped']);
expect(ordersStatus?.dataTests).toEqual(
expect.arrayContaining([
expect.objectContaining({ package: 'dbt_utils', name: 'expression_is_true' }),
expect.objectContaining({ package: 'dbt', name: 'accepted_values' }),
]),
);
});
it('parses relationships from model column data tests', () => {
const result = parseDbtSchemaFile(`
version: 2
models:
- name: orders
schema: public
columns:
- name: customer_id
data_tests:
- relationships:
arguments:
to: "ref('customers')"
field: id
`);
expect(result.relationships).toEqual([
{
fromTable: 'orders',
fromColumn: 'customer_id',
toTable: 'customers',
toColumn: 'id',
fromSchema: 'public',
},
]);
});
});

View file

@ -1,36 +0,0 @@
import { describe, expect, it } from 'vitest';
import { chunkDbtProject } from './chunk.js';
describe('chunkDbtProject', () => {
const diffSet = (modified: string[]) => ({ added: [], modified, deleted: [], unchanged: [] });
it('caps peerFileIndex when the project has very many yaml files', () => {
const modelPaths = Array.from({ length: 201 }, (_, i) => `models/m${i}.yml`);
const allPaths = ['dbt_project.yml', ...modelPaths].sort();
const { workUnits } = chunkDbtProject({ allPaths });
const [first] = workUnits;
expect(first).toBeDefined();
expect(first?.peerFileIndex).toHaveLength(200);
expect(first?.notes).toMatch(/capped at 200/);
});
it('keeps large-project model work units when dbt_project.yml changes', () => {
const modelPaths = Array.from({ length: 30 }, (_, i) => `models/m${i}.yml`);
const allPaths = ['dbt_project.yml', ...modelPaths].sort();
const { workUnits } = chunkDbtProject({ allPaths }, { diffSet: diffSet(['dbt_project.yml']) });
expect(workUnits).toHaveLength(30);
expect(workUnits[0]?.rawFiles).toEqual(['models/m0.yml']);
expect(workUnits[0]?.dependencyPaths).toContain('dbt_project.yml');
});
it('keeps large-project model work units when non-model yaml peers change', () => {
const modelPaths = Array.from({ length: 30 }, (_, i) => `models/m${i}.yml`);
const allPaths = ['dbt_project.yml', 'seeds/seed_properties.yml', ...modelPaths].sort();
const { workUnits } = chunkDbtProject({ allPaths }, { diffSet: diffSet(['seeds/seed_properties.yml']) });
expect(workUnits).toHaveLength(30);
expect(workUnits[0]?.rawFiles).toEqual(['models/m0.yml']);
expect(workUnits[0]?.dependencyPaths).toContain('seeds/seed_properties.yml');
});
});

View file

@ -1,57 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import type { SourceAdapter } from '../../types.js';
import { DbtSourceAdapter } from './dbt.adapter.js';
describe('DbtSourceAdapter', () => {
let stagedDir: string;
let adapter: SourceAdapter;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'dbt-adapter-'));
adapter = new DbtSourceAdapter();
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('declares the expected source key and skill list', () => {
expect(adapter.source).toBe('dbt');
expect(adapter.skillNames).toEqual(['dbt_ingest']);
});
it('detects a staged dbt project root (dbt_project.yml)', async () => {
await writeFile(join(stagedDir, 'dbt_project.yml'), "name: 'jaffle'\nversion: '1.0.0'\n", 'utf-8');
expect(await adapter.detect(stagedDir)).toBe(true);
});
it('chunk: dbt_project.yml + models/a.yml yields one WU (≤25 files)', async () => {
await writeFile(join(stagedDir, 'dbt_project.yml'), "name: 'jaffle'\n", 'utf-8');
await mkdir(join(stagedDir, 'models'), { recursive: true });
await writeFile(
join(stagedDir, 'models/a.yml'),
'version: 2\nmodels:\n - name: orders\n description: Orders\n',
'utf-8',
);
const result = await adapter.chunk(stagedDir);
expect(result.workUnits).toHaveLength(1);
expect(result.workUnits[0].unitKey).toBe('dbt-all');
expect(result.parseArtifacts).toMatchObject({
projectName: 'jaffle',
tables: [{ name: 'orders', description: 'Orders' }],
});
});
it('implements fetch() for git-backed dbt source setup', () => {
expect(adapter.fetch).toBeTypeOf('function');
});
it('reports mapped warehouse targets for bundle SL discovery', async () => {
adapter = new DbtSourceAdapter({ targetConnectionIds: ['postgres-warehouse', 'postgres-warehouse'] });
await expect(adapter.listTargetConnectionIds?.(stagedDir)).resolves.toEqual(['postgres-warehouse']);
});
});

View file

@ -1,38 +0,0 @@
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { fetchDbtRepo } from './fetch.js';
describe('fetchDbtRepo', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-dbt-fetch-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('copies dbt yaml files from a fetched repo subpath into staged dir', async () => {
const cacheDir = join(tempDir, 'cache');
const stagedDir = join(tempDir, 'staged');
await mkdir(join(cacheDir, 'analytics', 'models'), { recursive: true });
await writeFile(join(cacheDir, 'analytics', 'dbt_project.yml'), 'name: analytics\n', 'utf-8');
await writeFile(join(cacheDir, 'analytics', 'models', 'orders.yml'), 'models: []\n', 'utf-8');
const cloneOrPull = vi.fn(async () => ({ commitHash: 'abc123' }));
await expect(
fetchDbtRepo({
config: { repoUrl: 'https://github.com/acme/dbt.git', path: 'analytics' },
cacheDir,
stagedDir,
deps: { cloneOrPull },
}),
).resolves.toEqual({ commitHash: 'abc123', filesCopied: 2 });
await expect(readFile(join(stagedDir, 'dbt_project.yml'), 'utf-8')).resolves.toContain('analytics');
await expect(readFile(join(stagedDir, 'models', 'orders.yml'), 'utf-8')).resolves.toContain('models');
});
});

View file

@ -1,8 +0,0 @@
import { describe, expect, it } from 'vitest';
import { normalizeDbtPath } from './parse.js';
describe('normalizeDbtPath', () => {
it('normalizes Windows separators to POSIX separators', () => {
expect(normalizeDbtPath('models\\marts\\orders.yml')).toBe('models/marts/orders.yml');
});
});

View file

@ -1,158 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { BigQueryHistoricSqlQueryHistoryReader } from './bigquery-query-history-reader.js';
import { HistoricSqlGrantsMissingError } from './errors.js';
interface FakeQueryResult {
headers: string[];
rows: unknown[][];
totalRows: number;
error?: string;
}
function queryClient(results: FakeQueryResult[]) {
const executeQuery = vi.fn(async (_query: string) => {
const next = results.shift();
if (!next) {
throw new Error('unexpected query');
}
return next;
});
return { executeQuery };
}
function firstQuery(client: ReturnType<typeof queryClient>): string {
const call = client.executeQuery.mock.calls[0];
if (!call) {
throw new Error('expected query client to be called');
}
return call[0];
}
describe('BigQueryHistoricSqlQueryHistoryReader', () => {
it('probes region-qualified INFORMATION_SCHEMA.JOBS_BY_PROJECT', async () => {
const client = queryClient([{ headers: ['1'], rows: [[1]], totalRows: 1 }]);
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' });
await expect(reader.probe(client)).resolves.toEqual({ warnings: [], info: [] });
expect(client.executeQuery).toHaveBeenCalledWith(
'SELECT 1 FROM `project-1.region-us.INFORMATION_SCHEMA.JOBS_BY_PROJECT` LIMIT 1',
);
});
it('turns probe result errors into HistoricSqlGrantsMissingError', async () => {
const client = queryClient([{ headers: [], rows: [], totalRows: 0, error: 'Access Denied: jobs.listAll' }]);
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'us-central1' });
await expect(reader.probe(client)).rejects.toMatchObject({
name: 'HistoricSqlGrantsMissingError',
dialect: 'bigquery',
remediation:
'Grant roles/bigquery.resourceViewer on the BigQuery project, or grant a custom role containing bigquery.jobs.listAll.',
});
});
it('turns thrown probe failures into HistoricSqlGrantsMissingError', async () => {
const client = {
executeQuery: vi.fn(async () => {
throw new Error('permission denied');
}),
};
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' });
await expect(reader.probe(client)).rejects.toBeInstanceOf(HistoricSqlGrantsMissingError);
});
it('fetches aggregated BigQuery query templates', async () => {
const client = queryClient([
{
headers: [
'template_id',
'canonical_sql',
'executions',
'distinct_users',
'first_seen',
'last_seen',
'p50_ms',
'p95_ms',
'error_rate',
'rows_produced',
'top_users',
],
rows: [
[
'hash-1',
'select status from orders',
42,
3,
'2026-05-01T00:00:00.000Z',
'2026-05-11T00:00:00.000Z',
12,
40,
0.05,
null,
JSON.stringify([{ user: 'analyst@example.test', executions: 1 }]),
],
],
totalRows: 1,
},
]);
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'demo', region: 'us' });
const rows = [];
for await (const row of reader.fetchAggregated(
client,
{ start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') },
{ dialect: 'bigquery', minExecutions: 5, windowDays: 90, enabledTables: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
)) {
rows.push(row);
}
const sql = firstQuery(client);
expect(sql).toContain('COUNT(*) AS executions');
expect(sql).toContain('COUNT(DISTINCT user_email) AS distinct_users');
expect(sql).toContain('GROUP BY query_hash');
expect(sql).toContain('HAVING COUNT(*) >= 5');
expect(rows).toMatchObject([
{
templateId: 'hash-1',
stats: {
executions: 42,
errorRate: 0.05,
},
topUsers: [{ user: 'analyst@example.test', executions: 1 }],
},
]);
});
it('throws a clear error when the query client cannot execute SQL', async () => {
const reader = new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US' });
await expect(async () => {
for await (const _row of reader.fetchAggregated(
{},
{ start: new Date(), end: new Date() },
{
dialect: 'bigquery',
minExecutions: 5,
windowDays: 90,
enabledTables: [],
filters: { dropTrivialProbes: true },
redactionPatterns: [],
staleArchiveAfterDays: 90,
},
)) {
throw new Error('unreachable');
}
}).rejects.toThrow('Historic SQL BigQuery reader requires a query client with executeQuery(query)');
});
it('rejects unsafe project and region identifiers before building SQL', () => {
expect(() => new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project`1', region: 'US' })).toThrow(
'Invalid BigQuery project id for historic-SQL ingest: project`1',
);
expect(() => new BigQueryHistoricSqlQueryHistoryReader({ projectId: 'project-1', region: 'US;DROP' })).toThrow(
'Invalid BigQuery region for historic-SQL ingest: US;DROP',
);
});
});

View file

@ -1,59 +0,0 @@
import { describe, expect, it } from 'vitest';
import {
bucketDistinctUsers,
bucketErrorRate,
bucketExecutions,
bucketFrequency,
bucketP95Runtime,
bucketRecency,
} from './buckets.js';
describe('historic-sql bucket helpers', () => {
it('uses stable execution buckets', () => {
expect([0, 9, 10, 99, 100, 999, 1000, 4999, 5000, 49999, 50000].map(bucketExecutions)).toEqual([
'<10',
'<10',
'10-100',
'10-100',
'100-1k',
'100-1k',
'1k-5k',
'1k-5k',
'5k-50k',
'5k-50k',
'>50k',
]);
});
it('uses stable distinct-user, error-rate, runtime, and recency buckets', () => {
expect([0, 1, 2, 5, 6, 10, 11].map(bucketDistinctUsers)).toEqual([
'0',
'1',
'2-5',
'2-5',
'5-10',
'5-10',
'>10',
]);
expect([0, 0.01, 0.05, 0.2].map(bucketErrorRate)).toEqual(['none', 'low', 'low', 'high']);
expect([null, 99, 100, 999, 1000, 9999, 10000].map(bucketP95Runtime)).toEqual([
'unknown',
'<100ms',
'100ms-1s',
'100ms-1s',
'1s-10s',
'1s-10s',
'>10s',
]);
expect(bucketRecency('2026-05-11T00:00:00.000Z', new Date('2026-05-11T12:00:00.000Z'))).toBe('current');
expect(bucketRecency('2026-04-20T00:00:00.000Z', new Date('2026-05-11T12:00:00.000Z'))).toBe('recent');
expect(bucketRecency('2026-01-01T00:00:00.000Z', new Date('2026-05-11T12:00:00.000Z'))).toBe('stale');
});
it('maps frequency counts to high, mid, and low labels', () => {
expect(bucketFrequency(80, 100)).toBe('high');
expect(bucketFrequency(20, 100)).toBe('mid');
expect(bucketFrequency(1, 100)).toBe('low');
expect(bucketFrequency(0, 0)).toBe('low');
});
});

View file

@ -1,182 +0,0 @@
import { mkdir, mkdtemp, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import { chunkHistoricSqlUnifiedStagedDir, describeHistoricSqlUnifiedScope } from './chunk-unified.js';
async function tempDir(): Promise<string> {
return mkdtemp(join(tmpdir(), 'historic-sql-unified-chunk-'));
}
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
const target = join(root, relPath);
await mkdir(join(target, '..'), { recursive: true });
await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
async function writeUnifiedStagedDir(root: string): Promise<void> {
await writeJson(root, 'manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 1,
touchedTableCount: 1,
parseFailures: 0,
warnings: [],
probeWarnings: [],
});
await writeJson(root, 'tables/public.orders.json', {
table: 'public.orders',
stats: {
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
errorRateBucket: 'none',
p95RuntimeBucket: '<100ms',
recencyBucket: 'current',
},
columnsByClause: { select: [['status', 'high']] },
observedJoins: [],
topTemplates: [{ id: 'orders', canonicalSql: 'select * from public.orders', topUsers: [{ user: 'analyst' }] }],
});
await writeJson(root, 'patterns-input.json', {
templates: [
{
id: 'orders',
canonicalSql: 'select * from public.orders join public.customers on true',
tablesTouched: ['public.orders', 'public.customers'],
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
dialect: 'postgres',
},
],
});
await writeJson(root, 'patterns-input/part-0001.json', {
templates: [
{
id: 'orders',
canonicalSql: 'select * from public.orders join public.customers on true',
tablesTouched: ['public.orders', 'public.customers'],
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
dialect: 'postgres',
},
],
});
}
describe('chunkHistoricSqlUnifiedStagedDir', () => {
it('emits one table WorkUnit plus one patterns WorkUnit', async () => {
const stagedDir = await tempDir();
await writeUnifiedStagedDir(stagedDir);
const result = await chunkHistoricSqlUnifiedStagedDir(stagedDir);
expect(result.workUnits).toEqual([
expect.objectContaining({
unitKey: 'historic-sql-table-public-orders',
displayLabel: 'Historic SQL usage: public.orders',
rawFiles: ['tables/public.orders.json'],
dependencyPaths: ['manifest.json'],
notes: expect.stringContaining('historic_sql_table_digest'),
}),
expect.objectContaining({
unitKey: 'historic-sql-patterns-part-0001',
displayLabel: 'Historic SQL cross-table patterns: part-0001',
rawFiles: ['patterns-input/part-0001.json'],
dependencyPaths: ['manifest.json'],
notes: expect.stringContaining('patterns-input/part-0001.json'),
}),
]);
expect(result.workUnits[0]?.notes).toContain('emit_historic_sql_evidence');
expect(result.workUnits[1]?.notes).toContain('emit_historic_sql_evidence');
expect(result.reconcileNotes).toEqual(['Historic-SQL touched tables=1 parseFailures=0']);
});
it('respects diff sets for unchanged table and patterns files', async () => {
const stagedDir = await tempDir();
await writeUnifiedStagedDir(stagedDir);
await expect(
chunkHistoricSqlUnifiedStagedDir(stagedDir, {
added: [],
modified: ['tables/public.orders.json'],
deleted: [],
unchanged: ['manifest.json', 'patterns-input.json', 'patterns-input/part-0001.json'],
}),
).resolves.toMatchObject({
workUnits: [expect.objectContaining({ unitKey: 'historic-sql-table-public-orders' })],
});
await expect(
chunkHistoricSqlUnifiedStagedDir(stagedDir, {
added: [],
modified: ['patterns-input/part-0001.json'],
deleted: [],
unchanged: ['manifest.json', 'patterns-input.json', 'tables/public.orders.json'],
}),
).resolves.toMatchObject({
workUnits: [expect.objectContaining({ unitKey: 'historic-sql-patterns-part-0001' })],
});
await expect(
chunkHistoricSqlUnifiedStagedDir(stagedDir, {
added: [],
modified: ['patterns-input.json'],
deleted: [],
unchanged: ['manifest.json', 'patterns-input/part-0001.json', 'tables/public.orders.json'],
}),
).resolves.toMatchObject({
workUnits: [],
});
});
it('describes unified staged scope', async () => {
const stagedDir = await tempDir();
await writeUnifiedStagedDir(stagedDir);
const scope = await describeHistoricSqlUnifiedScope(stagedDir);
expect(scope.isPathInScope('manifest.json')).toBe(true);
expect(scope.isPathInScope('patterns-input.json')).toBe(true);
expect(scope.isPathInScope('patterns-input/part-0001.json')).toBe(true);
expect(scope.isPathInScope('patterns-input/part-1.json')).toBe(false);
expect(scope.isPathInScope('tables/public.orders.json')).toBe(true);
expect(scope.isPathInScope('templates/old/page.md')).toBe(false);
});
it('emits one patterns WorkUnit per changed shard', async () => {
const stagedDir = await tempDir();
await writeUnifiedStagedDir(stagedDir);
await writeJson(stagedDir, 'patterns-input/part-0002.json', {
templates: [
{
id: 'line-items',
canonicalSql: 'select * from public.orders join public.line_items on true',
tablesTouched: ['public.orders', 'public.line_items'],
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
dialect: 'postgres',
},
],
});
const result = await chunkHistoricSqlUnifiedStagedDir(stagedDir, {
added: ['patterns-input/part-0002.json'],
modified: ['patterns-input/part-0001.json'],
deleted: [],
unchanged: ['manifest.json', 'patterns-input.json', 'tables/public.orders.json'],
});
expect(result.workUnits.map((unit) => unit.unitKey)).toEqual([
'historic-sql-patterns-part-0001',
'historic-sql-patterns-part-0002',
]);
expect(result.workUnits.map((unit) => unit.rawFiles)).toEqual([
['patterns-input/part-0001.json'],
['patterns-input/part-0002.json'],
]);
});
});

View file

@ -1,5 +1,9 @@
import { getDriverRegistration } from '../../../connections/drivers.js';
import type { KtxConnectionDriver } from '../../../scan/types.js';
import type { HistoricSqlDialect } from './types.js';
const historicSqlDialects: readonly HistoricSqlDialect[] = ['postgres', 'bigquery', 'snowflake'];
function recordOrNull(value: unknown): Record<string, unknown> | null {
return value && typeof value === 'object' && !Array.isArray(value) ? (value as Record<string, unknown>) : null;
}
@ -10,6 +14,14 @@ function queryHistoryRecord(connection: unknown): Record<string, unknown> | null
return context ? recordOrNull(context.queryHistory) : null;
}
function historicSqlDialectForDriver(driver: KtxConnectionDriver): HistoricSqlDialect {
const dialect = historicSqlDialects.find((candidate) => candidate === driver);
if (!dialect) {
throw new Error(`Driver "${driver}" is marked as historic-SQL capable but has no HistoricSqlDialect mapping.`);
}
return dialect;
}
export function isQueryHistoryEnabled(connection: unknown): boolean {
return queryHistoryRecord(connection)?.enabled === true;
}
@ -25,8 +37,6 @@ export function queryHistoryDialectForConnection(connection: unknown): HistoricS
}
const conn = recordOrNull(connection);
const driver = String(conn?.driver ?? '').toLowerCase();
if (driver === 'postgres') return 'postgres';
if (driver === 'bigquery') return 'bigquery';
if (driver === 'snowflake') return 'snowflake';
return null;
const registration = getDriverRegistration(driver);
return registration?.hasHistoricSqlReader ? historicSqlDialectForDriver(registration.driver) : null;
}

View file

@ -1,57 +0,0 @@
import { mkdir, mkdtemp, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import { detectHistoricSqlStagedDir } from './detect.js';
import { HISTORIC_SQL_SOURCE_KEY, stagedManifestSchema } from './types.js';
async function tempDir(): Promise<string> {
return mkdtemp(join(tmpdir(), 'historic-sql-detect-'));
}
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
const target = join(root, relPath);
await mkdir(join(target, '..'), { recursive: true });
await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
function manifest() {
return stagedManifestSchema.parse({
source: HISTORIC_SQL_SOURCE_KEY,
connectionId: 'conn_1',
dialect: 'postgres',
fetchedAt: '2026-05-04T12:00:00.000Z',
windowStart: '2026-02-03T12:00:00.000Z',
windowEnd: '2026-05-04T12:00:00.000Z',
snapshotRowCount: 0,
touchedTableCount: 0,
parseFailures: 0,
warnings: [],
probeWarnings: [],
});
}
describe('historic-sql staged dir detection', () => {
it('detects manifest source', async () => {
const stagedDir = await tempDir();
await writeJson(stagedDir, 'manifest.json', manifest());
await expect(detectHistoricSqlStagedDir(stagedDir)).resolves.toBe(true);
});
it('detects unified table and patterns structure without manifest', async () => {
const stagedDir = await tempDir();
await writeFile(join(stagedDir, 'not-a-match.txt'), 'x', 'utf-8');
await writeJson(stagedDir, 'patterns-input.json', { templates: [] });
await writeJson(stagedDir, 'tables/public.orders.json', { table: 'public.orders' });
await expect(detectHistoricSqlStagedDir(stagedDir)).resolves.toBe(true);
});
it('does not detect unrelated directories', async () => {
const stagedDir = await tempDir();
await writeJson(stagedDir, 'manifest.json', { source: 'notion' });
await expect(detectHistoricSqlStagedDir(stagedDir)).resolves.toBe(false);
});
});

View file

@ -1,89 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { asSchema } from 'ai';
import { createEmitHistoricSqlEvidenceTool } from './evidence-tool.js';
describe('emit_historic_sql_evidence tool', () => {
it('exposes an AI SDK v6 tool input schema with top-level object type', async () => {
const tool = createEmitHistoricSqlEvidenceTool();
expect(await asSchema(tool.inputSchema).jsonSchema).toMatchObject({
type: 'object',
});
});
it('writes table usage evidence to the ignored run evidence directory', async () => {
const writeFile = vi.fn(async () => ({ success: true, commitHash: null }));
const tool = createEmitHistoricSqlEvidenceTool();
const result = await tool.execute!(
{
kind: 'table_usage',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Orders are repeatedly queried by paid status.',
frequencyTier: 'high',
commonFilters: ['status'],
commonJoins: [],
staleSince: null,
},
},
{
toolCallId: 'call-1',
messages: [],
abortSignal: new AbortController().signal,
experimental_context: {
connectionId: 'warehouse',
session: {
ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'historic-sql' },
configService: { writeFile },
},
},
} as never,
);
expect(result).toBe('Recorded historic-SQL table_usage evidence for public.orders.');
expect(writeFile).toHaveBeenCalledWith(
'.ktx/ingest-evidence/historic-sql/run-1/historic-sql-table-public-orders.json',
expect.stringContaining('"kind": "table_usage"'),
'System User',
'system@example.com',
'Record historic-SQL evidence: historic-sql-table-public-orders',
{ skipLock: true },
);
});
it('rejects non-historic ingest sessions', async () => {
const tool = createEmitHistoricSqlEvidenceTool();
await expect(
tool.execute!(
{
kind: 'pattern',
rawPath: 'patterns-input.json',
pattern: {
slug: 'orders',
title: 'Orders',
narrative: 'Orders pattern.',
definitionSql: 'select * from public.orders',
tablesInvolved: ['public.orders'],
slRefs: ['orders'],
constituentTemplateIds: ['pg:1'],
},
},
{
toolCallId: 'call-1',
messages: [],
abortSignal: new AbortController().signal,
experimental_context: {
connectionId: 'warehouse',
session: {
ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'notion' },
configService: { writeFile: vi.fn() },
},
},
} as never,
),
).resolves.toContain('Error: emit_historic_sql_evidence is only available during historic-sql ingest');
});
});

View file

@ -1,57 +0,0 @@
import { describe, expect, it } from 'vitest';
import {
historicSqlEvidenceEnvelopeSchema,
historicSqlEvidencePath,
historicSqlPatternEvidenceSchema,
historicSqlTableUsageEvidenceSchema,
} from './evidence.js';
describe('historic-sql evidence contracts', () => {
it('validates table usage evidence emitted by table digest WorkUnits', () => {
const parsed = historicSqlTableUsageEvidenceSchema.parse({
kind: 'table_usage',
connectionId: 'warehouse',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Orders are repeatedly queried for paid/refunded lifecycle analysis.',
frequencyTier: 'high',
commonFilters: ['status', 'created_at'],
commonGroupBys: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
staleSince: null,
},
});
expect(parsed.table).toBe('public.orders');
expect(parsed.usage.frequencyTier).toBe('high');
});
it('validates pattern evidence emitted by the patterns WorkUnit', () => {
const parsed = historicSqlPatternEvidenceSchema.parse(
historicSqlEvidenceEnvelopeSchema.parse({
kind: 'pattern',
connectionId: 'warehouse',
rawPath: 'patterns-input.json',
pattern: {
slug: 'order-lifecycle-analysis',
title: 'Order Lifecycle Analysis',
narrative: 'Analysts compare order status changes by customer segment.',
definitionSql: 'select status, count(*) from public.orders group by status',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['pg:1', 'pg:2'],
},
}),
);
expect(parsed.kind).toBe('pattern');
expect(parsed.pattern.slug).toBe('order-lifecycle-analysis');
});
it('builds a stable ignored evidence path from run and WorkUnit identity', () => {
expect(historicSqlEvidencePath('run-1', 'historic-sql-table-public-orders')).toBe(
'.ktx/ingest-evidence/historic-sql/run-1/historic-sql-table-public-orders.json',
);
});
});

View file

@ -1,110 +0,0 @@
import { mkdtemp } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import type { SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
import type { SourceAdapter } from '../../types.js';
import { HistoricSqlSourceAdapter } from './historic-sql.adapter.js';
import type { HistoricSqlReader } from './types.js';
async function tempDir(): Promise<string> {
return mkdtemp(join(tmpdir(), 'historic-sql-adapter-'));
}
const sqlAnalysis: SqlAnalysisPort = {
async analyzeForFingerprint() {
throw new Error('analyzeForFingerprint must not be used');
},
async analyzeBatch() {
return new Map();
},
async validateReadOnly() {
return { ok: true };
},
};
const reader: HistoricSqlReader = {
async probe() {
return { warnings: [], info: [] };
},
async *fetchAggregated() {},
};
describe('HistoricSqlSourceAdapter', () => {
it('declares canonical adapter metadata', () => {
const adapter = new HistoricSqlSourceAdapter({ sqlAnalysis, reader, queryClient: {} });
expect(adapter.source).toBe('historic-sql');
expect(adapter.skillNames).toEqual(['historic_sql_table_digest', 'historic_sql_patterns']);
expect(adapter.reconcileSkillNames).toEqual([]);
expect((adapter as SourceAdapter).evidenceIndexing).toBeUndefined();
expect(adapter.triageSupported).toBe(false);
});
it('fetches a unified aggregate snapshot and emits unified WorkUnits', async () => {
const stagedDir = await tempDir();
const aggregateReader: HistoricSqlReader = {
async probe() {
return { warnings: [], info: [] };
},
async *fetchAggregated() {
yield {
templateId: 'pg:1',
canonicalSql:
'select o.status, count(*) from public.orders o join public.customers c on c.id = o.customer_id group by o.status',
dialect: 'postgres',
stats: {
executions: 25,
distinctUsers: 3,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 10,
p95RuntimeMs: 20,
errorRate: 0,
rowsProduced: 10,
},
topUsers: [{ user: 'analyst', executions: 25 }],
};
},
};
const batchSqlAnalysis: SqlAnalysisPort = {
async analyzeForFingerprint() {
throw new Error('analyzeForFingerprint must not be used');
},
async analyzeBatch() {
return new Map([
[
'pg:1',
{
tablesTouched: ['public.orders', 'public.customers'],
columnsByClause: { select: ['status'], join: ['customer_id', 'id'], groupBy: ['status'] },
},
],
]);
},
async validateReadOnly() {
return { ok: true };
},
};
const adapter = new HistoricSqlSourceAdapter({
sqlAnalysis: batchSqlAnalysis,
reader: aggregateReader,
queryClient: {},
now: () => new Date('2026-05-11T00:00:00.000Z'),
});
await adapter.fetch({ dialect: 'postgres', minExecutions: 5 }, stagedDir, {
connectionId: 'warehouse',
sourceKey: 'historic-sql',
});
await expect(adapter.detect(stagedDir)).resolves.toBe(true);
await expect(adapter.chunk(stagedDir)).resolves.toMatchObject({
workUnits: [
{ unitKey: 'historic-sql-table-public-customers' },
{ unitKey: 'historic-sql-table-public-orders' },
{ unitKey: 'historic-sql-patterns-part-0001' },
],
});
});
});

View file

@ -1,286 +0,0 @@
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import YAML from 'yaml';
import type { AgentRunnerPort, RunLoopParams } from '../../../../context/llm/runtime-port.js';
import { initKtxProject, loadKtxProject, type KtxLocalProject } from '../../../../context/project/project.js';
import type { SqlAnalysisBatchItem, SqlAnalysisBatchResult, SqlAnalysisDialect, SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
import { searchLocalSlSources } from '../../../sl/local-sl.js';
import { searchLocalKnowledgePages } from '../../../wiki/local-knowledge.js';
import { runLocalIngest } from '../../local-ingest.js';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { HistoricSqlSourceAdapter } from './historic-sql.adapter.js';
import type { AggregatedTemplate, HistoricSqlReader, HistoricSqlUnifiedPullConfig } from './types.js';
class AcceptanceHistoricSqlReader implements HistoricSqlReader {
async probe() {
return { warnings: [], info: [] };
}
async *fetchAggregated(
_client: unknown,
_window: { start: Date; end: Date },
_config: HistoricSqlUnifiedPullConfig,
): AsyncIterable<AggregatedTemplate> {
yield {
templateId: 'pg:orders-lifecycle',
canonicalSql:
'select o.status, c.segment, count(*) from public.orders o join public.customers c on c.id = o.customer_id where o.status = $1 group by o.status, c.segment',
dialect: 'postgres',
stats: {
executions: 42,
distinctUsers: 4,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 18,
p95RuntimeMs: 84,
errorRate: 0,
rowsProduced: 420,
},
topUsers: [{ user: 'analyst@example.test', executions: 42 }],
};
}
}
class HistoricSqlAcceptanceAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (params.telemetryTags?.operationName !== 'ingest-bundle-wu') {
return { stopReason: 'natural' as const };
}
const emitEvidence = params.toolSet.emit_historic_sql_evidence;
if (!emitEvidence?.execute) {
throw new Error('emit_historic_sql_evidence tool was not available to the historic-SQL WorkUnit');
}
if (params.telemetryTags.unitKey === 'historic-sql-table-public-orders') {
const result = await emitEvidence.execute({
kind: 'table_usage',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Analysts repeatedly inspect paid order lifecycle by customer segment.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['status', 'segment'],
commonJoins: [{ table: 'public.customers', on: ['customer_id', 'id'] }],
staleSince: null,
},
});
if (!result.markdown.includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected orders evidence result: ${result.markdown}`);
}
}
if (params.telemetryTags.unitKey === 'historic-sql-table-public-customers') {
const result = await emitEvidence.execute({
kind: 'table_usage',
table: 'public.customers',
rawPath: 'tables/public.customers.json',
usage: {
narrative: 'Customers provide segment context for paid order lifecycle analysis.',
frequencyTier: 'mid',
commonFilters: [],
commonGroupBys: ['segment'],
commonJoins: [{ table: 'public.orders', on: ['id', 'customer_id'] }],
staleSince: null,
},
});
if (!result.markdown.includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected customers evidence result: ${result.markdown}`);
}
}
if (params.telemetryTags.unitKey === 'historic-sql-patterns-part-0001') {
const result = await emitEvidence.execute({
kind: 'pattern',
rawPath: 'patterns-input/part-0001.json',
pattern: {
slug: 'paid-order-lifecycle',
title: 'Paid Order Lifecycle',
narrative: 'Analysts join orders and customers to compare paid order lifecycle by segment.',
definitionSql:
'select o.status, c.segment, count(*) from public.orders o join public.customers c on c.id = o.customer_id group by o.status, c.segment',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['pg:orders-lifecycle'],
},
});
if (!result.markdown.includes('Recorded historic-SQL pattern evidence')) {
throw new Error(`Unexpected pattern evidence result: ${result.markdown}`);
}
}
return { stopReason: 'natural' as const };
});
}
function acceptanceSqlAnalysis(): SqlAnalysisPort {
return {
analyzeForFingerprint: async () => {
throw new Error('analyzeForFingerprint should not be used by unified historic-SQL ingest');
},
analyzeBatch: vi.fn(
async (
items: SqlAnalysisBatchItem[],
_dialect: SqlAnalysisDialect,
): Promise<Map<string, SqlAnalysisBatchResult>> => {
return new Map(
items.map((item) => [
item.id,
{
tablesTouched: ['public.orders', 'public.customers'],
columnsByClause: {
select: ['status', 'segment'],
where: ['status'],
join: ['customer_id', 'id'],
groupBy: ['status', 'segment'],
},
},
]),
);
},
),
validateReadOnly: vi.fn(async () => ({ ok: true })),
};
}
async function writeHistoricSqlProject(project: KtxLocalProject): Promise<KtxLocalProject> {
await writeFile(
join(project.projectDir, 'ktx.yaml'),
[
'connections:',
' warehouse:',
' driver: postgres',
' context:',
' queryHistory:',
' enabled: true',
' minExecutions: 2',
'ingest:',
' adapters:',
' - historic-sql',
' embeddings:',
' backend: none',
'storage:',
' state: sqlite',
' search: sqlite-fts5',
' git:',
' auto_commit: false',
' author: KTX Test <system@ktx.local>',
'',
].join('\n'),
'utf-8',
);
const loaded = await loadKtxProject({ projectDir: project.projectDir });
await loaded.fileStore.writeFile(
'semantic-layer/warehouse/_schema/public.yaml',
YAML.stringify({
tables: {
orders: {
table: 'public.orders',
columns: [
{ name: 'id', type: 'string' },
{ name: 'status', type: 'string' },
{ name: 'customer_id', type: 'string' },
],
},
customers: {
table: 'public.customers',
columns: [
{ name: 'id', type: 'string' },
{ name: 'segment', type: 'string' },
],
},
},
}),
'KTX Test',
'system@ktx.local',
'Seed schema shard',
);
return loaded;
}
describe('historic-SQL local ingest retrieval acceptance', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-historic-sql-acceptance-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('projects table and pattern evidence into semantic-layer and wiki retrieval surfaces', async () => {
const initialized = await initKtxProject({ projectDir: join(tempDir, 'project') });
const project = await writeHistoricSqlProject(initialized);
const sqlAnalysis = acceptanceSqlAnalysis();
const agentRunner = new HistoricSqlAcceptanceAgentRunner();
const adapter = new HistoricSqlSourceAdapter({
reader: new AcceptanceHistoricSqlReader(),
queryClient: {},
sqlAnalysis,
now: () => new Date('2026-05-11T00:00:00.000Z'),
});
const result = await runLocalIngest({
project,
adapters: [adapter],
adapter: 'historic-sql',
connectionId: 'warehouse',
jobId: 'historic-sql-retrieval-acceptance',
agentRunner,
});
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledTimes(1);
expect(result.result.failedWorkUnits).toEqual([]);
expect(result.result.workUnitCount).toBe(3);
expect(agentRunner.runLoop).toHaveBeenCalledTimes(3);
const finalization = result.report.body.finalization;
expect(finalization).toBeDefined();
if (!finalization) {
throw new Error('Expected historic-SQL finalization result');
}
expect(finalization).toMatchObject({
sourceKey: 'historic-sql',
status: 'success',
result: {
tableUsageMerged: 2,
patternPagesWritten: 1,
},
});
expect(finalization.declaredTouchedSources).toEqual(
expect.arrayContaining([
{ connectionId: 'warehouse', sourceName: 'customers' },
{ connectionId: 'warehouse', sourceName: 'orders' },
]),
);
await expect(readFile(join(project.projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8')).resolves
.toContain('Analysts repeatedly inspect paid order lifecycle by customer segment.');
await expect(readFile(join(project.projectDir, 'wiki/global/historic-sql-paid-order-lifecycle.md'), 'utf-8'))
.resolves.toContain('Paid Order Lifecycle');
const reloaded = await loadKtxProject({ projectDir: project.projectDir });
await expect(
searchLocalSlSources(reloaded, { connectionId: 'warehouse', query: 'paid order lifecycle', limit: 5 }),
).resolves.toEqual(expect.arrayContaining([
expect.objectContaining({
name: 'orders',
frequencyTier: 'high',
snippet: expect.stringContaining('<mark>'),
matchReasons: expect.arrayContaining(['lexical']),
}),
]));
await expect(
searchLocalKnowledgePages(reloaded, { query: 'paid order lifecycle', userId: 'local', limit: 5 }),
).resolves.toEqual([
expect.objectContaining({
key: 'historic-sql-paid-order-lifecycle',
summary: 'Paid Order Lifecycle',
matchReasons: expect.arrayContaining(['lexical']),
}),
]);
});
});

View file

@ -1,89 +0,0 @@
import { describe, expect, it } from 'vitest';
import {
HISTORIC_SQL_PATTERN_WORKUNIT_MAX_BYTES,
isHistoricSqlPatternInputShardPath,
serializedStagedPatternsInputByteLength,
splitHistoricSqlPatternInputs,
} from './pattern-inputs.js';
import type { StagedPatternsInput } from './types.js';
type PatternTemplate = StagedPatternsInput['templates'][number];
function template(id: string, tablesTouched: string[], canonicalSql = 'select 1'): PatternTemplate {
return {
id,
canonicalSql,
tablesTouched,
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
dialect: 'postgres',
};
}
describe('historic-SQL pattern input sharding', () => {
it('keeps the audit input complete while sharding only cross-table pattern candidates', () => {
const largeSql = `select * from public.orders join public.customers on true where marker = '${'x'.repeat(260)}'`;
const input: StagedPatternsInput = {
templates: [
template('single-table-orders', ['public.orders']),
template('orders-customers-2', ['public.orders', 'public.customers'], largeSql),
template('orders-customers-1', ['public.customers', 'public.orders'], largeSql),
template('orders-customers-payments', ['public.orders', 'public.customers', 'public.payments'], largeSql),
],
};
const result = splitHistoricSqlPatternInputs(input, { maxBytes: 760 });
expect(result.auditInput.templates.map((entry) => entry.id)).toEqual([
'orders-customers-1',
'orders-customers-2',
'orders-customers-payments',
'single-table-orders',
]);
expect(result.shards.length).toBeGreaterThan(1);
expect(result.shards.map((shard) => shard.path)).toEqual([
'patterns-input/part-0001.json',
'patterns-input/part-0002.json',
'patterns-input/part-0003.json',
]);
expect(result.shards.flatMap((shard) => shard.input.templates.map((entry) => entry.id))).toEqual([
'orders-customers-payments',
'orders-customers-1',
'orders-customers-2',
]);
expect(result.shards.every((shard) => shard.byteLength <= 760)).toBe(true);
expect(result.shards.flatMap((shard) => shard.input.templates).some((entry) => entry.id === 'single-table-orders')).toBe(false);
expect(result.warnings).toEqual([]);
});
it('omits a single oversized template from shards and reports a manifest warning', () => {
const input: StagedPatternsInput = {
templates: [
template(
'oversized-cross-table',
['public.orders', 'public.customers'],
`select * from public.orders join public.customers on true where payload = '${'x'.repeat(500)}'`,
),
],
};
const result = splitHistoricSqlPatternInputs(input, { maxBytes: 240 });
expect(result.auditInput.templates.map((entry) => entry.id)).toEqual(['oversized-cross-table']);
expect(result.shards).toEqual([]);
expect(result.warnings).toEqual(['patterns_input_template_too_large:oversized-cross-table']);
});
it('recognizes only generated pattern shard paths', () => {
expect(isHistoricSqlPatternInputShardPath('patterns-input/part-0001.json')).toBe(true);
expect(isHistoricSqlPatternInputShardPath('patterns-input/part-0012.json')).toBe(true);
expect(isHistoricSqlPatternInputShardPath('patterns-input.json')).toBe(false);
expect(isHistoricSqlPatternInputShardPath('patterns-input/part-1.json')).toBe(false);
expect(isHistoricSqlPatternInputShardPath('patterns-input/readme.md')).toBe(false);
});
it('uses a production byte budget below read_raw_file maximum size', () => {
expect(HISTORIC_SQL_PATTERN_WORKUNIT_MAX_BYTES).toBeLessThan(120_000);
expect(serializedStagedPatternsInputByteLength({ templates: [] })).toBeGreaterThan(0);
});
});

View file

@ -1,242 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import {
HistoricSqlExtensionMissingError,
HistoricSqlGrantsMissingError,
HistoricSqlVersionUnsupportedError,
} from './errors.js';
import { PostgresPgssReader } from './postgres-pgss-reader.js';
interface FakeQueryResult {
headers: string[];
rows: unknown[][];
totalRows?: number;
error?: string;
}
function queryClient(results: Array<FakeQueryResult | Error>) {
const executeQuery = vi.fn(async (_query: string, _params?: unknown[]) => {
const next = results.shift();
if (!next) {
throw new Error('unexpected query');
}
if (next instanceof Error) {
throw next;
}
return next;
});
return { executeQuery };
}
function executedSql(client: ReturnType<typeof queryClient>, index: number): string {
const call = client.executeQuery.mock.calls[index];
if (!call) {
throw new Error(`expected query client call ${index}`);
}
return call[0];
}
describe('PostgresPgssReader aggregate path', () => {
it('probes version, extension presence, grants, and tracking state', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4 on x86_64-apple-darwin']],
},
{ headers: ['?column?'], rows: [[1]] },
{ headers: ['has_role'], rows: [[true]] },
{ headers: ['track'], rows: [['top']] },
{ headers: ['max'], rows: [['5000']] },
]);
const reader = new PostgresPgssReader();
await expect(reader.probe(client)).resolves.toEqual({
pgServerVersion: 'PostgreSQL 16.4 on x86_64-apple-darwin',
warnings: [],
info: [],
});
expect(executedSql(client, 0)).toContain("current_setting('server_version_num')::int");
expect(executedSql(client, 1)).toBe('SELECT 1 FROM pg_stat_statements LIMIT 1');
expect(executedSql(client, 2)).toBe(
"SELECT pg_has_role(current_user, 'pg_read_all_stats', 'USAGE') AS has_role",
);
expect(executedSql(client, 3)).toBe("SELECT current_setting('pg_stat_statements.track') AS track");
expect(executedSql(client, 4)).toBe("SELECT current_setting('pg_stat_statements.max') AS max");
});
it('rejects PostgreSQL versions older than 14 without probing the extension', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[130012, 'PostgreSQL 13.12']],
},
]);
const reader = new PostgresPgssReader();
const promise = reader.probe(client);
await expect(promise).rejects.toMatchObject({
name: 'HistoricSqlVersionUnsupportedError',
dialect: 'postgres',
detectedVersion: 'PostgreSQL 13.12',
minimumVersion: 'PostgreSQL 14',
});
await expect(promise).rejects.toBeInstanceOf(HistoricSqlVersionUnsupportedError);
expect(client.executeQuery).toHaveBeenCalledTimes(1);
});
it('maps a missing pg_stat_statements relation to HistoricSqlExtensionMissingError', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4']],
},
new Error('relation "pg_stat_statements" does not exist'),
]);
const reader = new PostgresPgssReader();
const promise = reader.probe(client);
await expect(promise).rejects.toMatchObject({
name: 'HistoricSqlExtensionMissingError',
dialect: 'postgres',
});
await expect(promise).rejects.toBeInstanceOf(HistoricSqlExtensionMissingError);
});
it('maps pg_stat_statements preload failures to HistoricSqlExtensionMissingError with preload remediation', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4']],
},
new Error('pg_stat_statements must be loaded via shared_preload_libraries'),
]);
const reader = new PostgresPgssReader();
const promise = reader.probe(client);
await expect(promise).rejects.toMatchObject({
name: 'HistoricSqlExtensionMissingError',
dialect: 'postgres',
message: 'pg_stat_statements is installed but not loaded via shared_preload_libraries.',
remediation: expect.stringContaining("shared_preload_libraries includes 'pg_stat_statements'"),
});
await expect(promise).rejects.toBeInstanceOf(HistoricSqlExtensionMissingError);
});
it('maps missing pg_read_all_stats membership to HistoricSqlGrantsMissingError', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4']],
},
{ headers: ['?column?'], rows: [[1]] },
{ headers: ['has_role'], rows: [[false]] },
]);
const reader = new PostgresPgssReader();
const promise = reader.probe(client);
await expect(promise).rejects.toMatchObject({
name: 'HistoricSqlGrantsMissingError',
dialect: 'postgres',
remediation: 'GRANT pg_read_all_stats TO <connection role>;',
});
await expect(promise).rejects.toBeInstanceOf(HistoricSqlGrantsMissingError);
});
it('returns a warning instead of failing when pg_stat_statements.track is none', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4']],
},
{ headers: ['?column?'], rows: [[1]] },
{ headers: ['has_role'], rows: [[true]] },
{ headers: ['track'], rows: [['none']] },
{ headers: ['max'], rows: [['5000']] },
]);
const reader = new PostgresPgssReader();
await expect(reader.probe(client)).resolves.toEqual({
pgServerVersion: 'PostgreSQL 16.4',
warnings: [
"pg_stat_statements.track is none; set it to top or all in the Postgres parameter group or config",
],
info: [],
});
});
it('returns an info note when pg_stat_statements.max is below the recommended floor', async () => {
const client = queryClient([
{
headers: ['server_version_num', 'server_version'],
rows: [[160004, 'PostgreSQL 16.4']],
},
{ headers: ['?column?'], rows: [[1]] },
{ headers: ['has_role'], rows: [[true]] },
{ headers: ['track'], rows: [['top']] },
{ headers: ['max'], rows: [['1000']] },
]);
const reader = new PostgresPgssReader();
await expect(reader.probe(client)).resolves.toEqual({
pgServerVersion: 'PostgreSQL 16.4',
warnings: [],
info: [
'pg_stat_statements.max is 1000; set it to at least 5000 to reduce query-template eviction churn',
],
});
});
it('aggregates pg_stat_statements rows by queryid and query', async () => {
const executeQuery = vi.fn(async (sql: string, params?: unknown[]) => {
if (sql.includes('pg_stat_statements_info')) {
return { headers: ['stats_reset', 'dealloc'], rows: [['2026-05-01T00:00:00.000Z', 1]] };
}
expect(sql).toContain('GROUP BY queryid, query');
expect(sql).toContain('HAVING SUM(calls) >= $1');
expect(params).toEqual([5]);
return {
headers: ['template_id', 'canonical_sql', 'executions', 'distinct_users', 'mean_ms', 'rows_produced', 'top_users'],
rows: [
[
'123',
'select status from public.orders',
'42',
'3',
'11.5',
'100',
JSON.stringify([{ user: 'analyst', executions: 40 }]),
],
],
};
});
const reader = new PostgresPgssReader();
const rows = [];
for await (const row of reader.fetchAggregated(
{ executeQuery },
{ start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') },
{ dialect: 'postgres', minExecutions: 5, enabledTables: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
)) {
rows.push(row);
}
expect(rows).toEqual([
{
templateId: '123',
canonicalSql: 'select status from public.orders',
dialect: 'postgres',
stats: {
executions: 42,
distinctUsers: 3,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 11.5,
p95RuntimeMs: 11.5,
errorRate: 0,
rowsProduced: 100,
},
topUsers: [{ user: 'analyst', executions: 40 }],
},
]);
});
});

View file

@ -1,457 +0,0 @@
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import YAML from 'yaml';
import { describe, expect, it } from 'vitest';
import { projectHistoricSqlEvidence } from './projection.js';
async function tempWorkdir(): Promise<string> {
return mkdtemp(join(tmpdir(), 'historic-sql-projection-'));
}
async function writeText(root: string, relPath: string, content: string): Promise<void> {
const target = join(root, relPath);
await mkdir(join(target, '..'), { recursive: true });
await writeFile(target, content, 'utf-8');
}
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
await writeText(root, relPath, `${JSON.stringify(value, null, 2)}\n`);
}
describe('projectHistoricSqlEvidence', () => {
it('merges table usage into matching _schema shards and preserves external usage keys', async () => {
const workdir = await tempWorkdir();
await writeText(
workdir,
'semantic-layer/warehouse/_schema/public.yaml',
YAML.stringify({
tables: {
orders: {
table: 'public.orders',
usage: {
narrative: 'Old generated usage.',
frequencyTier: 'low',
commonFilters: ['old_status'],
commonJoins: [],
ownerNote: 'keep me',
},
columns: [{ name: 'id', type: 'string' }],
},
},
}),
);
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 1,
touchedTableCount: 1,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 90,
});
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.orders.json', { table: 'public.orders' });
await writeJson(workdir, '.ktx/ingest-evidence/historic-sql/run-1/orders.json', {
kind: 'table_usage',
connectionId: 'warehouse',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Orders are repeatedly queried for lifecycle analysis.',
frequencyTier: 'high',
commonFilters: ['status', 'created_at'],
commonGroupBys: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
staleSince: null,
},
});
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
expect(result.touchedSources).toEqual([{ connectionId: 'warehouse', sourceName: 'orders' }]);
expect(result.actions).toEqual(
expect.arrayContaining([
expect.objectContaining({
target: 'sl',
key: 'orders',
rawPaths: ['tables/public.orders.json'],
}),
]),
);
const shard = YAML.parse(await readFile(join(workdir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8'));
expect(shard.tables.orders.usage).toEqual({
ownerNote: 'keep me',
narrative: 'Orders are repeatedly queried for lifecycle analysis.',
frequencyTier: 'high',
commonFilters: ['status', 'created_at'],
commonGroupBys: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
staleSince: null,
});
});
it('writes pattern pages, reuses similar slugs, and marks missing old pattern pages stale', async () => {
const workdir = await tempWorkdir();
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 2,
touchedTableCount: 2,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 90,
});
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.orders.json', { table: 'public.orders' });
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.customers.json', { table: 'public.customers' });
await writeText(
workdir,
'wiki/global/historic-sql-old-order-lifecycle.md',
[
'---',
YAML.stringify({
summary: 'Old order lifecycle page',
tags: ['historic-sql', 'pattern'],
refs: [],
sl_refs: ['orders'],
usage_mode: 'auto',
source: 'historic-sql',
tables: ['public.orders', 'public.customers'],
fingerprints: ['pg:1'],
}).trimEnd(),
'---',
'',
'Old body',
'',
].join('\n'),
);
await writeText(
workdir,
'wiki/global/historic-sql-retired-pattern.md',
[
'---',
YAML.stringify({
summary: 'Retired pattern',
tags: ['historic-sql', 'pattern'],
refs: [],
sl_refs: [],
usage_mode: 'auto',
source: 'historic-sql',
tables: ['public.tickets'],
fingerprints: ['pg:9'],
}).trimEnd(),
'---',
'',
'Retired body',
'',
].join('\n'),
);
await writeJson(workdir, '.ktx/ingest-evidence/historic-sql/run-1/pattern.json', {
kind: 'pattern',
connectionId: 'warehouse',
rawPath: 'patterns-input.json',
pattern: {
slug: 'order-lifecycle-analysis',
title: 'Order Lifecycle Analysis',
narrative: 'Analysts compare order status with customer segment.',
definitionSql: 'select * from public.orders join public.customers on customers.id = orders.customer_id',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['pg:1', 'pg:2'],
},
});
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
expect(result.patternPagesWritten).toBe(1);
expect(result.changedWikiPageKeys).toContain('historic-sql-old-order-lifecycle');
expect(result.actions).toEqual(
expect.arrayContaining([
expect.objectContaining({
target: 'wiki',
key: 'historic-sql-old-order-lifecycle',
rawPaths: ['patterns-input.json'],
}),
]),
);
await expect(readFile(join(workdir, 'wiki/global/historic-sql-old-order-lifecycle.md'), 'utf-8')).resolves.toContain(
'Order Lifecycle Analysis',
);
await expect(readFile(join(workdir, 'wiki/global/historic-sql-retired-pattern.md'), 'utf-8')).resolves.toContain(
'stale_since: "2026-05-11T00:00:00.000Z"',
);
});
it('rewrites a reappearing archived pattern at the flat slug', async () => {
const workdir = await tempWorkdir();
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 2,
touchedTableCount: 2,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 30,
});
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.orders.json', { table: 'public.orders' });
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.customers.json', { table: 'public.customers' });
await writeText(
workdir,
'wiki/global/historic-sql-order-lifecycle-analysis.md',
[
'---',
YAML.stringify({
summary: 'Archived order lifecycle page',
tags: ['historic-sql', 'pattern', 'archived'],
refs: [],
sl_refs: ['orders'],
usage_mode: 'auto',
source: 'historic-sql',
tables: ['public.orders', 'public.customers'],
fingerprints: ['pg:1'],
stale_since: '2026-01-01T00:00:00.000Z',
}).trimEnd(),
'---',
'',
'Archived body',
'',
].join('\n'),
);
await writeJson(workdir, '.ktx/ingest-evidence/historic-sql/run-1/pattern.json', {
kind: 'pattern',
connectionId: 'warehouse',
rawPath: 'patterns-input.json',
pattern: {
slug: 'order-lifecycle-analysis',
title: 'Order Lifecycle Analysis',
narrative: 'Analysts compare order status with customer segment again.',
definitionSql: 'select * from public.orders join public.customers on customers.id = orders.customer_id',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['pg:1', 'pg:2'],
},
});
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
expect(result.patternPagesWritten).toBe(1);
const page = await readFile(join(workdir, 'wiki/global/historic-sql-order-lifecycle-analysis.md'), 'utf-8');
expect(page).toContain('Analysts compare order status with customer segment again.');
expect(page).not.toContain('Archived body');
expect(page).not.toContain('archived');
});
it('leaves already archived pattern pages stable when they are still absent', async () => {
const workdir = await tempWorkdir();
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 0,
touchedTableCount: 0,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 30,
});
await writeText(
workdir,
'wiki/global/historic-sql-retired-pattern.md',
[
'---',
YAML.stringify({
summary: 'Retired pattern',
tags: ['historic-sql', 'pattern', 'archived'],
refs: [],
sl_refs: [],
usage_mode: 'auto',
source: 'historic-sql',
tables: ['public.tickets'],
fingerprints: ['pg:9'],
stale_since: '2026-01-01T00:00:00.000Z',
}).trimEnd(),
'---',
'',
'Archived retired body',
'',
].join('\n'),
);
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
expect(result.archivedPatternPages).toBe(0);
expect(result.stalePatternPagesMarked).toBe(0);
await expect(readFile(join(workdir, 'wiki/global/historic-sql-retired-pattern.md'), 'utf-8')).resolves.toContain(
'Archived retired body',
);
});
it('marks missing table usage stale without deleting old query pages', async () => {
const workdir = await tempWorkdir();
await writeText(
workdir,
'semantic-layer/warehouse/_schema/public.yaml',
YAML.stringify({
tables: {
orders: {
table: 'public.orders',
usage: {
narrative: 'Orders were active before.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
ownerNote: 'keep analyst annotation',
},
columns: [{ name: 'id', type: 'string' }],
},
},
}),
);
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 0,
touchedTableCount: 0,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 90,
});
await writeJson(workdir, '.ktx/ingest-evidence/historic-sql/run-1/customers.json', {
kind: 'table_usage',
connectionId: 'warehouse',
table: 'public.customers',
rawPath: 'tables/public.customers.json',
usage: {
narrative: 'Customers were queried.',
frequencyTier: 'low',
commonFilters: [],
commonJoins: [],
staleSince: null,
},
});
await writeText(
workdir,
'wiki/global/historic-sql-old-template.md',
[
'---',
YAML.stringify({
summary: 'Old template page',
tags: ['historic-sql', 'query-pattern'],
refs: [],
sl_refs: ['orders'],
usage_mode: 'auto',
source: 'historic-sql',
tables: ['public.orders'],
fingerprints: ['old:1'],
}).trimEnd(),
'---',
'',
'Old body',
'',
].join('\n'),
);
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
expect(result.staleTablesMarked).toBe(1);
expect(result.touchedSources).toEqual([{ connectionId: 'warehouse', sourceName: 'orders' }]);
const staleAction = result.actions.find((action) => action.target === 'sl' && action.key === 'orders');
expect(staleAction).toEqual(expect.objectContaining({ target: 'sl', key: 'orders' }));
expect(staleAction?.rawPaths).toBeUndefined();
const shard = YAML.parse(await readFile(join(workdir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8'));
expect(shard.tables.orders.usage).toEqual({
ownerNote: 'keep analyst annotation',
narrative: 'No recent historic SQL usage was observed in the latest snapshot.',
frequencyTier: 'unused',
commonFilters: [],
commonGroupBys: [],
commonJoins: [],
staleSince: '2026-05-11T00:00:00.000Z',
});
await expect(readFile(join(workdir, 'wiki/global/historic-sql-old-template.md'), 'utf-8')).resolves.toContain(
'Old body',
);
});
it('does not mark stale or archive pages when override replay has no current-run evidence', async () => {
const workdir = await tempWorkdir();
await writeText(
workdir,
'semantic-layer/warehouse/_schema/public.yaml',
YAML.stringify({
tables: {
orders: {
table: 'public.orders',
usage: {
narrative: 'Orders were active before.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['status'],
commonJoins: [],
},
columns: [{ name: 'id', type: 'string' }],
},
},
}),
);
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/override-sync/manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 0,
touchedTableCount: 0,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 90,
});
const result = await projectHistoricSqlEvidence({
workdir,
connectionId: 'warehouse',
syncId: 'override-sync',
runId: 'override-run',
overrideReplay: {
priorJobId: 'prior-job',
priorRunId: 'prior-run',
priorSyncId: 'prior-sync',
evictionRawPaths: ['tables/public/orders.json'],
},
});
expect(result.tableUsageMerged).toBe(0);
expect(result.staleTablesMarked).toBe(0);
expect(result.patternPagesWritten).toBe(0);
expect(result.stalePatternPagesMarked).toBe(0);
expect(result.archivedPatternPages).toBe(0);
expect(result.touchedSources).toEqual([]);
expect(result.changedWikiPageKeys).toEqual([]);
expect(result.actions).toEqual([]);
});
});

View file

@ -1,36 +0,0 @@
import { describe, expect, it } from 'vitest';
import { compileHistoricSqlRedactionPatterns, redactHistoricSqlText } from './redaction.js';
describe('historic-SQL redaction', () => {
it('redacts regex matches and supports the (?i) case-insensitive prefix', () => {
const redactors = compileHistoricSqlRedactionPatterns([
'sk_live_[A-Za-z0-9]+',
'(?i)secret_token_[a-z0-9]+',
]);
const sql =
"select * from public.api_events where api_key = 'sk_live_abc123' and note = 'Secret_Token_9f'"; // pragma: allowlist secret
expect(redactHistoricSqlText(sql, redactors)).toBe(
"select * from public.api_events where api_key = '[REDACTED]' and note = '[REDACTED]'",
);
});
it('returns the original SQL text when no redaction patterns are configured', () => {
const sql = "select * from public.orders where status = 'paid'";
expect(redactHistoricSqlText(sql, compileHistoricSqlRedactionPatterns([]))).toBe(sql);
});
it('throws a config-focused error for invalid redaction regex patterns', () => {
expect(() => compileHistoricSqlRedactionPatterns(['[broken'])).toThrow(
'Invalid historicSql.redactionPatterns entry "[broken"',
);
});
it('throws a config-focused error for empty redaction regex patterns', () => {
expect(() => compileHistoricSqlRedactionPatterns([' '])).toThrow(
'Invalid historicSql.redactionPatterns entry " "',
);
});
});

View file

@ -1,74 +0,0 @@
import { describe, expect, it } from 'vitest';
import { z } from 'zod';
import {
patternOutputSchema,
patternsArraySchema,
tableUsageOutputSchema,
} from './skill-schemas.js';
describe('historic-sql skill schemas', () => {
it('accepts table usage output and preserves future keys', () => {
const parsed = tableUsageOutputSchema.parse({
narrative: 'Orders are queried for paid/refunded lifecycle analysis.',
frequencyTier: 'high',
commonFilters: ['status', 'created_at'],
commonGroupBys: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
staleSince: null,
analystNote: 'preserve me',
});
expect(parsed).toMatchObject({
narrative: 'Orders are queried for paid/refunded lifecycle analysis.',
frequencyTier: 'high',
commonFilters: ['status', 'created_at'],
commonGroupBys: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
staleSince: null,
analystNote: 'preserve me',
});
});
it('rejects invalid frequency tiers', () => {
const result = tableUsageOutputSchema.safeParse({
narrative: 'Orders are queried often.',
frequencyTier: 'sometimes',
commonFilters: [],
commonJoins: [],
});
expect(result.success).toBe(false);
});
it('accepts pattern outputs used for wiki projection', () => {
const parsed = patternsArraySchema.parse([
{
slug: 'order-lifecycle-analysis',
title: 'Order Lifecycle Analysis',
narrative: 'Teams inspect order status by customer and month.',
definitionSql: 'select status, count(*) from public.orders group by status',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['template_1', 'template_2'],
},
]);
expect(parsed[0]).toEqual({
slug: 'order-lifecycle-analysis',
title: 'Order Lifecycle Analysis',
narrative: 'Teams inspect order status by customer and month.',
definitionSql: 'select status, count(*) from public.orders group by status',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['template_1', 'template_2'],
});
});
it('exports zod schemas that can produce JSON schema for prompt prefixes', () => {
const tableUsageJsonSchema = z.toJSONSchema(tableUsageOutputSchema);
const patternJsonSchema = z.toJSONSchema(patternOutputSchema);
expect(tableUsageJsonSchema).toMatchObject({ type: 'object' });
expect(patternJsonSchema).toMatchObject({ type: 'object' });
});
});

View file

@ -1,148 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { HistoricSqlGrantsMissingError } from './errors.js';
import { SnowflakeHistoricSqlQueryHistoryReader } from './snowflake-query-history-reader.js';
interface FakeQueryResult {
headers: string[];
rows: unknown[][];
totalRows: number;
error?: string;
}
function queryClient(results: FakeQueryResult[]) {
const executeQuery = vi.fn(async (_query: string) => {
const next = results.shift();
if (!next) {
throw new Error('unexpected query');
}
return next;
});
return { executeQuery };
}
function firstQuery(client: ReturnType<typeof queryClient>): string {
const call = client.executeQuery.mock.calls[0];
if (!call) {
throw new Error('expected query client to be called');
}
return call[0];
}
describe('SnowflakeHistoricSqlQueryHistoryReader', () => {
it('probes SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY', async () => {
const client = queryClient([{ headers: ['1'], rows: [[1]], totalRows: 1 }]);
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
await expect(reader.probe(client)).resolves.toEqual({ warnings: [], info: [] });
expect(client.executeQuery).toHaveBeenCalledWith(
'SELECT 1 FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY LIMIT 1',
);
});
it('turns probe result errors into HistoricSqlGrantsMissingError', async () => {
const client = queryClient([{ headers: [], rows: [], totalRows: 0, error: 'Object does not exist or not authorized' }]);
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
await expect(reader.probe(client)).rejects.toMatchObject({
name: 'HistoricSqlGrantsMissingError',
dialect: 'snowflake',
remediation: 'GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE <connection role>;',
});
});
it('turns thrown probe failures into HistoricSqlGrantsMissingError', async () => {
const client = {
executeQuery: vi.fn(async () => {
throw new Error('permission denied');
}),
};
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
await expect(reader.probe(client)).rejects.toBeInstanceOf(HistoricSqlGrantsMissingError);
});
it('fetches aggregated Snowflake query templates', async () => {
const client = queryClient([
{
headers: [
'template_id',
'canonical_sql',
'executions',
'distinct_users',
'first_seen',
'last_seen',
'p50_ms',
'p95_ms',
'error_rate',
'rows_produced',
'top_users',
],
rows: [
[
'hash-1',
'select status from orders',
42,
3,
'2026-05-01T00:00:00.000Z',
'2026-05-11T00:00:00.000Z',
12,
40,
0.05,
100,
JSON.stringify([{ user: 'ANALYST', executions: 1 }]),
],
],
totalRows: 1,
},
]);
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
const rows = [];
for await (const row of reader.fetchAggregated(
client,
{ start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') },
{ dialect: 'snowflake', minExecutions: 5, windowDays: 90, enabledTables: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
)) {
rows.push(row);
}
const sql = firstQuery(client);
expect(sql).toContain('SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY');
expect(sql).toContain('COUNT(*) AS executions');
expect(sql).toContain('GROUP BY query_hash');
expect(sql).toContain('HAVING COUNT(*) >= 5');
expect(rows).toMatchObject([
{
templateId: 'hash-1',
stats: {
executions: 42,
errorRate: 0.05,
},
topUsers: [{ user: 'ANALYST', executions: 1 }],
},
]);
});
it('throws a clear error when the query client cannot execute SQL', async () => {
const reader = new SnowflakeHistoricSqlQueryHistoryReader();
await expect(async () => {
for await (const _row of reader.fetchAggregated(
{},
{ start: new Date(), end: new Date() },
{
dialect: 'snowflake',
minExecutions: 5,
windowDays: 90,
enabledTables: [],
filters: { dropTrivialProbes: true },
redactionPatterns: [],
staleArchiveAfterDays: 90,
},
)) {
throw new Error('unreachable');
}
}).rejects.toThrow('Historic SQL Snowflake reader requires a query client with executeQuery(query)');
});
});

View file

@ -1,436 +0,0 @@
import { mkdtemp, readFile, readdir } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it, vi } from 'vitest';
import type { SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
import { stageHistoricSqlAggregatedSnapshot } from './stage-unified.js';
import type { AggregatedTemplate, HistoricSqlReader } from './types.js';
async function tempDir(): Promise<string> {
return mkdtemp(join(tmpdir(), 'historic-sql-unified-stage-'));
}
async function readJson<T>(root: string, relPath: string): Promise<T> {
return JSON.parse(await readFile(join(root, relPath), 'utf-8')) as T;
}
function aggregate(overrides: Partial<AggregatedTemplate> & { templateId: string; canonicalSql: string }): AggregatedTemplate {
return {
templateId: overrides.templateId,
canonicalSql: overrides.canonicalSql,
dialect: overrides.dialect ?? 'postgres',
stats: overrides.stats ?? {
executions: 42,
distinctUsers: 3,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 20,
p95RuntimeMs: 80,
errorRate: 0,
rowsProduced: 100,
},
topUsers: overrides.topUsers ?? [{ user: 'analyst', executions: 40 }],
};
}
describe('stageHistoricSqlAggregatedSnapshot', () => {
it('batch parses templates and writes stable table and patterns artifacts', async () => {
const stagedDir = await tempDir();
const reader: HistoricSqlReader = {
async probe() {
return { warnings: ['pg_stat_statements.track is none; aggregation still proceeds'], info: [] };
},
async *fetchAggregated() {
yield aggregate({
templateId: 'orders-by-status',
canonicalSql: 'select o.status, count(*) from public.orders o join public.customers c on c.id = o.customer_id where o.created_at >= $1 group by o.status',
});
yield aggregate({
templateId: 'service-account-only',
canonicalSql: 'select * from public.orders where id = $1',
stats: {
executions: 20,
distinctUsers: 1,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 5,
p95RuntimeMs: 10,
errorRate: 0,
rowsProduced: 1,
},
topUsers: [{ user: 'svc_loader', executions: 20 }],
});
yield aggregate({
templateId: 'bad-parse',
canonicalSql: 'select broken from',
});
},
};
const sqlAnalysis: SqlAnalysisPort = {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(async () => new Map([
[
'orders-by-status',
{
tablesTouched: ['public.orders', 'public.customers'],
columnsByClause: {
select: ['status'],
where: ['created_at'],
join: ['customer_id'],
groupBy: ['status'],
},
},
],
['bad-parse', { tablesTouched: [], columnsByClause: {}, error: 'parse failed' }],
])),
validateReadOnly: vi.fn(async () => ({ ok: true })),
};
await stageHistoricSqlAggregatedSnapshot({
stagedDir,
connectionId: 'warehouse',
queryClient: {},
reader,
sqlAnalysis,
pullConfig: {
dialect: 'postgres',
filters: {
serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' },
},
},
now: new Date('2026-05-11T12:00:00.000Z'),
});
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledTimes(1);
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledWith(
[
{
id: 'orders-by-status',
sql: 'select o.status, count(*) from public.orders o join public.customers c on c.id = o.customer_id where o.created_at >= $1 group by o.status',
},
{ id: 'bad-parse', sql: 'select broken from' },
],
'postgres',
);
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['public.customers.json', 'public.orders.json']);
const manifest = await readJson<Record<string, unknown>>(stagedDir, 'manifest.json');
expect(manifest).toMatchObject({
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
snapshotRowCount: 3,
touchedTableCount: 2,
parseFailures: 1,
warnings: ['parse_failed:bad-parse'],
probeWarnings: ['pg_stat_statements.track is none; aggregation still proceeds'],
staleArchiveAfterDays: 90,
});
const orders = await readJson<Record<string, any>>(stagedDir, 'tables/public.orders.json');
expect(orders).toMatchObject({
table: 'public.orders',
stats: {
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
errorRateBucket: 'none',
p95RuntimeBucket: '<100ms',
recencyBucket: 'current',
},
columnsByClause: {
select: [['status', 'high']],
where: [['created_at', 'high']],
join: [['customer_id', 'high']],
groupBy: [['status', 'high']],
},
observedJoins: [{ withTable: 'public.customers', on: ['customer_id'], freq: 'high' }],
topTemplates: [
{
id: 'orders-by-status',
topUsers: [{ user: 'analyst' }],
},
],
});
expect(orders.topTemplates[0].canonicalSql).toContain('group by o.status');
const patterns = await readJson<Record<string, any>>(stagedDir, 'patterns-input.json');
expect(patterns.templates).toEqual([
{
id: 'orders-by-status',
canonicalSql: expect.stringContaining('public.orders'),
tablesTouched: ['public.customers', 'public.orders'],
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
dialect: 'postgres',
},
]);
});
it('redacts configured SQL substrings in staged artifacts while analyzing original SQL', async () => {
const stagedDir = await tempDir();
const originalSql =
"select * from public.api_events where api_key = 'sk_live_abc123' and note = 'Secret_Token_9f'"; // pragma: allowlist secret
const reader: HistoricSqlReader = {
async probe() {
return { warnings: [], info: [] };
},
async *fetchAggregated() {
yield aggregate({
templateId: 'api-events-with-secret',
canonicalSql: originalSql,
stats: {
executions: 15,
distinctUsers: 2,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 12,
p95RuntimeMs: 25,
errorRate: 0,
rowsProduced: 15,
},
});
},
};
const sqlAnalysis: SqlAnalysisPort = {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(async () => new Map([
[
'api-events-with-secret',
{
tablesTouched: ['public.api_events'],
columnsByClause: {
select: [],
where: ['api_key', 'note'],
join: [],
groupBy: [],
},
},
],
])),
validateReadOnly: vi.fn(async () => ({ ok: true })),
};
await stageHistoricSqlAggregatedSnapshot({
stagedDir,
connectionId: 'warehouse',
queryClient: {},
reader,
sqlAnalysis,
pullConfig: {
dialect: 'postgres',
redactionPatterns: ['sk_live_[A-Za-z0-9]+', '(?i)secret_token_[a-z0-9]+'],
},
now: new Date('2026-05-11T12:00:00.000Z'),
});
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledWith(
[{ id: 'api-events-with-secret', sql: originalSql }],
'postgres',
);
const tableJson = await readFile(join(stagedDir, 'tables/public.api_events.json'), 'utf-8');
const patternsJson = await readFile(join(stagedDir, 'patterns-input.json'), 'utf-8');
expect(tableJson).not.toContain('sk_live_abc123');
expect(tableJson).not.toContain('Secret_Token_9f');
expect(patternsJson).not.toContain('sk_live_abc123');
expect(patternsJson).not.toContain('Secret_Token_9f');
expect(tableJson).toContain('[REDACTED]');
expect(patternsJson).toContain('[REDACTED]');
});
it('limits staged table artifacts to configured enabled tables', async () => {
const stagedDir = await tempDir();
const reader: HistoricSqlReader = {
async probe() {
return { warnings: [], info: [] };
},
async *fetchAggregated() {
yield aggregate({
templateId: 'selected-qualified',
canonicalSql: 'select count(*) from orbit_analytics.int_active_contract_arr',
});
yield aggregate({
templateId: 'selected-unqualified',
canonicalSql: 'select count(*) from int_customer_health_signals',
});
yield aggregate({
templateId: 'unselected',
canonicalSql: 'select count(*) from orbit_raw.accounts',
});
},
};
const sqlAnalysis: SqlAnalysisPort = {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(async () => new Map([
[
'selected-qualified',
{
tablesTouched: ['orbit_analytics.int_active_contract_arr'],
columnsByClause: { select: [], where: [], join: [], groupBy: [] },
},
],
[
'selected-unqualified',
{
tablesTouched: ['int_customer_health_signals'],
columnsByClause: { select: [], where: [], join: [], groupBy: [] },
},
],
[
'unselected',
{
tablesTouched: ['orbit_raw.accounts'],
columnsByClause: { select: [], where: [], join: [], groupBy: [] },
},
],
])),
validateReadOnly: vi.fn(async () => ({ ok: true })),
};
await stageHistoricSqlAggregatedSnapshot({
stagedDir,
connectionId: 'warehouse',
queryClient: {},
reader,
sqlAnalysis,
pullConfig: {
dialect: 'postgres',
enabledTables: [
'orbit_analytics.int_active_contract_arr',
'orbit_analytics.int_customer_health_signals',
],
},
now: new Date('2026-05-11T12:00:00.000Z'),
});
expect(await readdir(join(stagedDir, 'tables'))).toEqual([
'int_customer_health_signals.json',
'orbit_analytics.int_active_contract_arr.json',
]);
const manifest = await readJson<Record<string, any>>(stagedDir, 'manifest.json');
expect(manifest.touchedTableCount).toBe(2);
const patterns = await readJson<Record<string, any>>(stagedDir, 'patterns-input.json');
expect(patterns.templates.map((entry: any) => entry.id)).toEqual(['selected-qualified', 'selected-unqualified']);
});
it('preserves full patterns audit input and writes bounded cross-table pattern shards', async () => {
const stagedDir = await tempDir();
const largeSql = `select * from public.orders o join public.customers c on c.id = o.customer_id where payload = '${'x'.repeat(8000)}'`;
const reader: HistoricSqlReader = {
async probe() {
return { warnings: [], info: [] };
},
async *fetchAggregated() {
yield aggregate({
templateId: 'orders-customers-a',
canonicalSql: largeSql,
stats: {
executions: 25,
distinctUsers: 4,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 15,
p95RuntimeMs: 90,
errorRate: 0,
rowsProduced: 250,
},
});
yield aggregate({
templateId: 'orders-customers-b',
canonicalSql: largeSql.replace('payload', 'payload_b'),
stats: {
executions: 22,
distinctUsers: 3,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 20,
p95RuntimeMs: 95,
errorRate: 0,
rowsProduced: 220,
},
});
yield aggregate({
templateId: 'orders-single-table',
canonicalSql: 'select count(*) from public.orders',
stats: {
executions: 30,
distinctUsers: 2,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 10,
p95RuntimeMs: 20,
errorRate: 0,
rowsProduced: 30,
},
});
},
};
const sqlAnalysis: SqlAnalysisPort = {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(async () => new Map([
[
'orders-customers-a',
{
tablesTouched: ['public.orders', 'public.customers'],
columnsByClause: {
select: [],
where: ['payload'],
join: ['customer_id', 'id'],
groupBy: [],
},
},
],
[
'orders-customers-b',
{
tablesTouched: ['public.orders', 'public.customers'],
columnsByClause: {
select: [],
where: ['payload_b'],
join: ['customer_id', 'id'],
groupBy: [],
},
},
],
[
'orders-single-table',
{
tablesTouched: ['public.orders'],
columnsByClause: {
select: [],
where: [],
join: [],
groupBy: [],
},
},
],
])),
validateReadOnly: vi.fn(async () => ({ ok: true })),
};
await stageHistoricSqlAggregatedSnapshot({
stagedDir,
connectionId: 'warehouse',
queryClient: {},
reader,
sqlAnalysis,
pullConfig: { dialect: 'postgres' },
now: new Date('2026-05-11T12:00:00.000Z'),
});
const audit = await readJson<Record<string, any>>(stagedDir, 'patterns-input.json');
expect(audit.templates.map((entry: any) => entry.id)).toEqual([
'orders-customers-a',
'orders-customers-b',
'orders-single-table',
]);
const firstShard = await readJson<Record<string, any>>(stagedDir, 'patterns-input/part-0001.json');
expect(firstShard.templates.map((entry: any) => entry.id)).toEqual(['orders-customers-a', 'orders-customers-b']);
expect(firstShard.templates.some((entry: any) => entry.id === 'orders-single-table')).toBe(false);
const manifest = await readJson<Record<string, any>>(stagedDir, 'manifest.json');
expect(manifest.warnings).toEqual([]);
});
});

View file

@ -1,110 +0,0 @@
import { describe, expect, it } from 'vitest';
import {
aggregatedTemplateSchema,
historicSqlUnifiedPullConfigSchema,
stagedManifestSchema,
stagedPatternsInputSchema,
stagedTableInputSchema,
} from './types.js';
describe('historic-sql unified contracts', () => {
it('parses minExecutions and service-account filters', () => {
expect(historicSqlUnifiedPullConfigSchema.parse({ dialect: 'postgres', minExecutions: 9 })).toMatchObject({
dialect: 'postgres',
minExecutions: 9,
redactionPatterns: [],
staleArchiveAfterDays: 90,
});
expect(historicSqlUnifiedPullConfigSchema.parse({ dialect: 'postgres', minExecutions: 9 })).not.toHaveProperty(
'windowDays',
);
expect(historicSqlUnifiedPullConfigSchema.parse({ dialect: 'postgres', minExecutions: 9 })).not.toHaveProperty(
'concurrency',
);
const parsed = historicSqlUnifiedPullConfigSchema.parse({
dialect: 'postgres',
minExecutions: 7,
filters: {
serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' },
},
});
expect(parsed.minExecutions).toBe(7);
expect(parsed.filters.serviceAccounts).toEqual({ patterns: ['^svc_'], mode: 'exclude' });
});
it('validates aggregate templates from warehouse readers', () => {
const parsed = aggregatedTemplateSchema.parse({
templateId: 'pg:123',
canonicalSql: 'select status, count(*) from public.orders group by status',
dialect: 'postgres',
stats: {
executions: 42,
distinctUsers: 3,
firstSeen: '2026-05-01T00:00:00.000Z',
lastSeen: '2026-05-11T00:00:00.000Z',
p50RuntimeMs: 12.5,
p95RuntimeMs: 40,
errorRate: 0,
rowsProduced: 100,
},
topUsers: [{ user: 'analyst', executions: 40 }],
});
expect(parsed.templateId).toBe('pg:123');
expect(parsed.topUsers).toEqual([{ user: 'analyst', executions: 40 }]);
});
it('validates staged table, patterns, and manifest artifacts', () => {
expect(
stagedTableInputSchema.parse({
table: 'public.orders',
stats: {
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
errorRateBucket: 'none',
p95RuntimeBucket: '<100ms',
recencyBucket: 'current',
},
columnsByClause: {
select: [['status', 'high']],
where: [['created_at', 'mid']],
},
observedJoins: [{ withTable: 'public.customers', on: ['customer_id'], freq: 'high' }],
topTemplates: [{ id: 'pg:123', canonicalSql: 'select * from public.orders', topUsers: [{ user: 'analyst' }] }],
}).table,
).toBe('public.orders');
expect(
stagedPatternsInputSchema.parse({
templates: [
{
id: 'pg:123',
canonicalSql: 'select * from public.orders',
tablesTouched: ['public.orders'],
executionsBucket: '10-100',
distinctUsersBucket: '2-5',
dialect: 'postgres',
},
],
}).templates,
).toHaveLength(1);
expect(
stagedManifestSchema.parse({
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 2,
touchedTableCount: 1,
parseFailures: 1,
warnings: ['parse_failed:bad'],
probeWarnings: [],
staleArchiveAfterDays: 90,
}).staleArchiveAfterDays,
).toBe(90);
});
});

View file

@ -1,107 +0,0 @@
import { mkdtemp } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import type { KtxSchemaSnapshot } from '../../../scan/types.js';
import { chunkLiveDatabaseStagedDir } from './chunk.js';
import { liveDatabaseTablePath, writeLiveDatabaseSnapshot } from './stage.js';
function snapshot(): KtxSchemaSnapshot {
return {
connectionId: 'conn-1',
driver: 'postgres',
extractedAt: '2026-04-27T00:00:00.000Z',
scope: { schemas: ['public'] },
metadata: {},
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
kind: 'table',
comment: null,
estimatedRows: null,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
},
{
name: 'customers',
catalog: null,
db: 'public',
kind: 'table',
comment: null,
estimatedRows: null,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
},
],
};
}
describe('chunkLiveDatabaseStagedDir', () => {
it('emits one work unit per table on the first run', async () => {
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-chunk-'));
await writeLiveDatabaseSnapshot(dir, snapshot());
const result = await chunkLiveDatabaseStagedDir(dir);
expect(result.workUnits.map((wu) => wu.unitKey)).toEqual([
'live-database-public-customers',
'live-database-public-orders',
]);
expect(result.workUnits[0]?.dependencyPaths).toEqual(['connection.json', 'foreign-keys.json']);
expect(result.workUnits[0]?.peerFileIndex).toContain(
liveDatabaseTablePath({ catalog: null, db: 'public', name: 'orders' }),
);
});
it('keeps only changed tables during incremental syncs and records table evictions', async () => {
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-diff-'));
await writeLiveDatabaseSnapshot(dir, snapshot());
const ordersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'orders' });
const customersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'customers' });
const result = await chunkLiveDatabaseStagedDir(dir, {
added: [],
modified: [ordersPath],
deleted: [customersPath],
unchanged: ['connection.json', 'foreign-keys.json'],
});
expect(result.workUnits.map((wu) => wu.unitKey)).toEqual(['live-database-public-orders']);
expect(result.eviction?.deletedRawPaths).toEqual([customersPath]);
});
it('fans out all table work units when the foreign-key index changes', async () => {
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-fk-'));
await writeLiveDatabaseSnapshot(dir, snapshot());
const result = await chunkLiveDatabaseStagedDir(dir, {
added: [],
modified: ['foreign-keys.json'],
deleted: [],
unchanged: [],
});
expect(result.workUnits).toHaveLength(2);
});
});

View file

@ -1,262 +0,0 @@
import { once } from 'node:events';
import { createServer } from 'node:http';
import { describe, expect, it, vi } from 'vitest';
import { tableRefSet } from '../../../scan/table-ref.js';
import { createDaemonLiveDatabaseIntrospection } from './daemon-introspection.js';
const daemonResponse = {
connection_id: 'warehouse',
extracted_at: '2026-04-28T10:00:00+00:00',
metadata: { driver: 'postgres', schemas: ['public'] },
tables: [
{
catalog: 'warehouse',
db: 'public',
name: 'customers',
comment: null,
columns: [{ name: 'id', type: 'integer', nullable: false, primary_key: true, comment: null }],
foreign_keys: [],
},
{
catalog: 'warehouse',
db: 'public',
name: 'orders',
comment: 'Order facts',
columns: [
{ name: 'id', type: 'integer', nullable: false, primary_key: true, comment: 'Order id' },
{ name: 'customer_id', type: 'integer', nullable: false, primary_key: false, comment: null },
],
foreign_keys: [
{
from_column: 'customer_id',
to_table: 'customers',
to_column: 'id',
constraint_name: 'orders_customer_id_fkey',
},
],
},
],
};
describe('createDaemonLiveDatabaseIntrospection', () => {
it('calls the database-introspect daemon command and maps the snapshot response', async () => {
const runJson = vi.fn(async () => daemonResponse);
const introspection = createDaemonLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
url: 'postgres://localhost:5432/warehouse',
},
},
schemas: ['public'],
runJson,
});
await expect(introspection.extractSchema('warehouse')).resolves.toEqual({
connectionId: 'warehouse',
driver: 'postgres',
extractedAt: '2026-04-28T10:00:00+00:00',
scope: { schemas: ['public'] },
metadata: { driver: 'postgres', schemas: ['public'] },
tables: [
{
catalog: 'warehouse',
db: 'public',
name: 'customers',
kind: 'table',
comment: null,
estimatedRows: null,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
},
{
catalog: 'warehouse',
db: 'public',
name: 'orders',
kind: 'table',
comment: 'Order facts',
estimatedRows: null,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'Order id',
},
{
name: 'customer_id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: false,
comment: null,
},
],
foreignKeys: [
{
fromColumn: 'customer_id',
toCatalog: null,
toDb: null,
toTable: 'customers',
toColumn: 'id',
constraintName: 'orders_customer_id_fkey',
},
],
},
],
});
expect(runJson).toHaveBeenCalledWith('database-introspect', {
connection_id: 'warehouse',
driver: 'postgres',
url: 'postgres://localhost:5432/warehouse',
schemas: ['public'],
statement_timeout_ms: 30_000,
connection_timeout_seconds: 5,
});
});
it('calls a running daemon HTTP endpoint when baseUrl is configured', async () => {
const requests: Array<{ url: string | undefined; body: unknown }> = [];
const server = createServer((request, response) => {
const chunks: Buffer[] = [];
request.on('data', (chunk: Buffer) => chunks.push(chunk));
request.on('end', () => {
requests.push({
url: request.url,
body: JSON.parse(Buffer.concat(chunks).toString('utf8')),
});
response.writeHead(200, { 'content-type': 'application/json' });
response.end(JSON.stringify(daemonResponse));
});
});
server.listen(0, '127.0.0.1');
await once(server, 'listening');
try {
const address = server.address();
if (!address || typeof address === 'string') {
throw new Error('expected TCP server address');
}
const introspection = createDaemonLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
url: 'postgres://localhost:5432/warehouse',
},
},
baseUrl: `http://127.0.0.1:${address.port}`,
});
await expect(
introspection.extractSchema('warehouse', {
tableScope: tableRefSet([{ catalog: 'warehouse', db: 'public', name: 'orders' }]),
}),
).resolves.toMatchObject({
connectionId: 'warehouse',
tables: [{ name: 'customers' }, { name: 'orders' }],
});
expect(requests).toEqual([
{
url: '/database/introspect',
body: {
connection_id: 'warehouse',
driver: 'postgres',
url: 'postgres://localhost:5432/warehouse',
schemas: ['public'],
statement_timeout_ms: 30_000,
connection_timeout_seconds: 5,
table_scope: [{ catalog: 'warehouse', db: 'public', name: 'orders' }],
},
},
]);
} finally {
server.close();
}
});
it('requires a configured postgres connection with a url', async () => {
const introspection = createDaemonLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
},
},
runJson: vi.fn(async () => daemonResponse),
});
await expect(introspection.extractSchema('warehouse')).rejects.toThrow(
'Local live-database ingest requires connections.warehouse.url.',
);
});
it('rejects unsupported local connection drivers before calling the daemon', async () => {
const runJson = vi.fn(async () => daemonResponse);
const introspection = createDaemonLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'snowflake',
url: 'snowflake://example',
},
},
runJson,
});
await expect(introspection.extractSchema('warehouse')).rejects.toThrow(
'Local live-database ingest cannot run driver "snowflake".',
);
expect(runJson).not.toHaveBeenCalled();
});
it('does not use connection enabled_tables as a response filter', async () => {
const runJson = vi.fn(async () => daemonResponse);
const introspection = createDaemonLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
url: 'postgres://localhost:5432/warehouse',
enabled_tables: ['public.orders'],
},
},
schemas: ['public'],
runJson,
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot.tables.map((table) => `${table.db}.${table.name}`)).toEqual(['public.customers', 'public.orders']);
expect(runJson).toHaveBeenCalledWith('database-introspect', expect.not.objectContaining({ table_scope: expect.anything() }));
});
it('passes through every table when enabled_tables is omitted or empty', async () => {
const runJson = vi.fn(async () => daemonResponse);
const introspection = createDaemonLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
url: 'postgres://localhost:5432/warehouse',
enabled_tables: [],
},
},
schemas: ['public'],
runJson,
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot.tables.map((table) => table.name)).toEqual(['customers', 'orders']);
});
});

View file

@ -1,111 +0,0 @@
import { mkdtemp, readdir, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it, vi } from 'vitest';
import { tableRefSet, type KtxTableRefKey } from '../../../scan/table-ref.js';
import { LiveDatabaseSourceAdapter } from './live-database.adapter.js';
describe('LiveDatabaseSourceAdapter', () => {
it('fetches a schema snapshot through the introspection port', async () => {
const extractSchema = vi.fn().mockResolvedValue({
connectionId: 'conn-1',
driver: 'postgres',
extractedAt: '2026-04-27T00:00:00.000Z',
scope: { schemas: ['public'] },
metadata: {},
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
kind: 'table',
comment: null,
estimatedRows: null,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
},
],
});
const adapter = new LiveDatabaseSourceAdapter({
introspection: { extractSchema },
now: () => new Date('2026-04-27T00:00:00.000Z'),
});
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-adapter-'));
await adapter.fetch(undefined, dir, { connectionId: 'conn-1', sourceKey: 'live-database' });
expect(extractSchema).toHaveBeenCalledWith('conn-1', { tableScope: undefined });
await expect(adapter.detect(dir)).resolves.toBe(true);
const chunked = await adapter.chunk(dir);
expect(chunked.workUnits.map((wu) => wu.unitKey)).toEqual(['live-database-public-orders']);
});
it('declares the live database source and skill', () => {
const adapter = new LiveDatabaseSourceAdapter({
introspection: { extractSchema: vi.fn() },
});
expect(adapter.source).toBe('live-database');
expect(adapter.skillNames).toEqual(['live_database_ingest']);
});
it('threads tableScope from fetch context into the introspection port without post-filtering', async () => {
const extractSchema = vi.fn(
async (_connectionId: string, _options?: { tableScope?: ReadonlySet<KtxTableRefKey> }) => ({
connectionId: 'warehouse',
driver: 'snowflake' as const,
extractedAt: '2026-05-22T00:00:00.000Z',
scope: {},
metadata: {},
tables: [
{
catalog: 'A',
db: 'MARTS',
name: 'IN_SCOPE',
kind: 'table' as const,
comment: null,
estimatedRows: 0,
columns: [],
foreignKeys: [],
},
{
catalog: 'A',
db: 'MARTS',
name: 'OUT_OF_SCOPE',
kind: 'table' as const,
comment: null,
estimatedRows: 0,
columns: [],
foreignKeys: [],
},
],
}),
);
const scope = tableRefSet([{ catalog: 'A', db: 'MARTS', name: 'IN_SCOPE' }]);
const adapter = new LiveDatabaseSourceAdapter({
introspection: { extractSchema },
});
const stagedDir = await mkdtemp(join(tmpdir(), 'ktx-livedb-scope-'));
try {
await adapter.fetch(undefined, stagedDir, {
connectionId: 'warehouse',
sourceKey: 'live-database',
tableScope: scope,
});
expect(extractSchema).toHaveBeenCalledWith('warehouse', { tableScope: scope });
const tables = await readdir(join(stagedDir, 'tables'));
expect(tables).toHaveLength(2);
} finally {
await rm(stagedDir, { recursive: true, force: true });
}
});
});

View file

@ -1,308 +0,0 @@
import { describe, expect, it } from 'vitest';
import {
buildLiveDatabaseManifestShards,
type LiveDatabaseManifestExistingDescriptions,
type LiveDatabaseManifestJoinEntry,
type LiveDatabaseManifestShard,
} from './manifest.js';
function shardObject(shards: Map<string, LiveDatabaseManifestShard>): Record<string, LiveDatabaseManifestShard> {
return Object.fromEntries([...shards.entries()].sort(([a], [b]) => a.localeCompare(b)));
}
describe('buildLiveDatabaseManifestShards', () => {
it('builds shard objects with generated joins and preserved external descriptions', () => {
const existingDescriptions = new Map<string, LiveDatabaseManifestExistingDescriptions>([
[
'orders',
{
table: { user: 'Pinned analyst description', db: 'Old db description' },
columns: new Map([['id', { user: 'Pinned id description', db: 'Old id description' }]]),
},
],
]);
const preservedJoins = new Map<string, LiveDatabaseManifestJoinEntry[]>([
[
'orders',
[
{
to: 'customers',
on: 'orders.account_id = customers.id',
relationship: 'many_to_one',
source: 'manual',
},
{
to: 'missing_accounts',
on: 'orders.account_id = missing_accounts.id',
relationship: 'many_to_one',
source: 'manual',
},
],
],
]);
const result = buildLiveDatabaseManifestShards({
connectionType: 'POSTGRESQL',
mapColumnType: (nativeType) => nativeType.toLowerCase(),
existingDescriptions,
existingPreservedJoins: preservedJoins,
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
descriptions: { db: 'Fresh db description', ai: 'Generated AI description' },
columns: [
{
name: 'id',
type: 'INTEGER',
pk: true,
nullable: false,
descriptions: { db: 'Fresh id description' },
},
{
name: 'customer_id',
type: 'INTEGER',
},
],
},
{
name: 'customers',
catalog: null,
db: 'public',
columns: [
{
name: 'id',
type: 'INTEGER',
pk: true,
nullable: false,
},
],
},
],
joins: [
{
fromTable: 'orders',
fromColumns: ['customer_id'],
toTable: 'customers',
toColumns: ['id'],
relationship: 'MANY_TO_ONE',
source: 'formal',
},
],
});
expect(result.tablesProcessed).toBe(2);
expect(shardObject(result.shards)).toEqual({
public: {
tables: {
orders: {
table: 'public.orders',
descriptions: {
user: 'Pinned analyst description',
db: 'Fresh db description',
ai: 'Generated AI description',
},
columns: [
{
name: 'id',
type: 'integer',
pk: true,
nullable: false,
descriptions: {
user: 'Pinned id description',
db: 'Fresh id description',
},
},
{
name: 'customer_id',
type: 'integer',
},
],
joins: [
{
to: 'customers',
on: 'orders.customer_id = customers.id',
relationship: 'many_to_one',
source: 'formal',
},
{
to: 'customers',
on: 'orders.account_id = customers.id',
relationship: 'many_to_one',
source: 'manual',
},
],
},
customers: {
table: 'public.customers',
columns: [
{
name: 'id',
type: 'integer',
pk: true,
nullable: false,
},
],
joins: [
{
to: 'orders',
on: 'customers.id = orders.customer_id',
relationship: 'one_to_many',
source: 'formal',
},
],
},
},
},
});
});
it('uses warehouse and schema shard keys for snowflake-style connections', () => {
const result = buildLiveDatabaseManifestShards({
connectionType: 'SNOWFLAKE',
mapColumnType: (nativeType) => nativeType.toLowerCase(),
tables: [
{
name: 'accounts',
catalog: 'ANALYTICS',
db: 'CORE',
columns: [{ name: 'id', type: 'NUMBER' }],
},
],
joins: [],
});
expect(shardObject(result.shards)).toEqual({
'ANALYTICS.CORE': {
tables: {
accounts: {
table: 'ANALYTICS.CORE.accounts',
columns: [{ name: 'id', type: 'number' }],
},
},
},
});
});
it('preserves external usage keys while replacing historic SQL managed keys', () => {
const existingUsage = new Map([
[
'orders',
{
narrative: 'Old generated usage narrative.',
frequencyTier: 'low' as const,
commonFilters: ['old_status'],
commonJoins: [],
ownerNote: 'Pinned analyst note',
},
],
]);
const result = buildLiveDatabaseManifestShards({
connectionType: 'POSTGRESQL',
mapColumnType: (nativeType) => nativeType.toLowerCase(),
existingUsage,
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
usage: {
narrative: 'Fresh generated usage narrative.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['created_at'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
},
columns: [{ name: 'id', type: 'INTEGER' }],
},
],
joins: [],
});
expect(shardObject(result.shards)).toEqual({
public: {
tables: {
orders: {
table: 'public.orders',
usage: {
ownerNote: 'Pinned analyst note',
narrative: 'Fresh generated usage narrative.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['created_at'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
},
columns: [{ name: 'id', type: 'integer' }],
},
},
},
});
});
it('renders ordered multi-column joins in both directions', () => {
const result = buildLiveDatabaseManifestShards({
connectionType: 'POSTGRESQL',
mapColumnType: (nativeType) => nativeType,
tables: [
{
name: 'order_lines',
catalog: null,
db: 'public',
columns: [
{ name: 'order_id', type: 'integer' },
{ name: 'line_number', type: 'integer' },
],
},
{
name: 'order_line_allocations',
catalog: null,
db: 'public',
columns: [
{ name: 'order_id', type: 'integer' },
{ name: 'line_number', type: 'integer' },
],
},
],
joins: [
{
fromTable: 'order_line_allocations',
fromColumns: ['order_id', 'line_number'],
toTable: 'order_lines',
toColumns: ['order_id', 'line_number'],
relationship: 'many_to_one',
source: 'inferred',
},
],
});
expect(shardObject(result.shards)).toMatchObject({
public: {
tables: {
order_line_allocations: {
joins: [
{
to: 'order_lines',
on: 'order_line_allocations.order_id = order_lines.order_id AND order_line_allocations.line_number = order_lines.line_number',
relationship: 'many_to_one',
source: 'inferred',
},
],
},
order_lines: {
joins: [
{
to: 'order_line_allocations',
on: 'order_lines.order_id = order_line_allocations.order_id AND order_lines.line_number = order_line_allocations.line_number',
relationship: 'one_to_many',
source: 'inferred',
},
],
},
},
},
});
});
});

View file

@ -1,178 +0,0 @@
import { mkdtemp, readFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import {
detectLiveDatabaseStagedDir,
LIVE_DATABASE_FOREIGN_KEYS_FILE,
LIVE_DATABASE_META_FILE,
LIVE_DATABASE_WARNINGS_FILE,
liveDatabaseTablePath,
readLiveDatabaseTableFiles,
writeLiveDatabaseSnapshot,
} from './stage.js';
import type { KtxSchemaSnapshot } from '../../../scan/types.js';
function snapshot(): KtxSchemaSnapshot {
return {
connectionId: 'conn-1',
driver: 'postgres',
extractedAt: '2026-04-27T00:00:00.000Z',
scope: { schemas: ['public'] },
metadata: { dialect: 'postgres' },
tables: [
{
name: 'orders',
catalog: null,
db: 'public',
kind: 'table',
comment: 'Orders placed by customers',
estimatedRows: 200,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
{
name: 'customer_id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: false,
comment: null,
},
{
name: 'total',
nativeType: 'numeric',
normalizedType: 'numeric',
dimensionType: 'number',
nullable: false,
primaryKey: false,
comment: null,
},
],
foreignKeys: [
{
fromColumn: 'customer_id',
toCatalog: null,
toDb: 'public',
toTable: 'customers',
toColumn: 'id',
constraintName: null,
},
],
},
{
name: 'customers',
catalog: null,
db: 'public',
kind: 'table',
comment: null,
estimatedRows: 50,
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
],
foreignKeys: [],
},
],
};
}
describe('live-database staged snapshot files', () => {
it('writes deterministic metadata, table, and foreign-key files', async () => {
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-stage-'));
await writeLiveDatabaseSnapshot(dir, snapshot());
await expect(readFile(join(dir, LIVE_DATABASE_META_FILE), 'utf8')).resolves.toContain('"connectionId": "conn-1"');
await expect(readFile(join(dir, LIVE_DATABASE_FOREIGN_KEYS_FILE), 'utf8')).resolves.toContain(
'"fromTable": "orders"',
);
const connectionJson = await readFile(join(dir, LIVE_DATABASE_META_FILE), 'utf8');
expect(connectionJson).toContain('"driver": "postgres"');
expect(connectionJson).toContain('"schemas"');
const ordersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'orders' });
const customersPath = liveDatabaseTablePath({ catalog: null, db: 'public', name: 'customers' });
expect(ordersPath).toMatch(/^tables\/[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.json$/);
await expect(readFile(join(dir, ordersPath), 'utf8')).resolves.toContain('"name": "orders"');
await expect(readFile(join(dir, customersPath), 'utf8')).resolves.toContain('"name": "customers"');
const ordersJson = await readFile(join(dir, ordersPath), 'utf8');
expect(ordersJson).toContain('"kind": "table"');
expect(ordersJson).toContain('"estimatedRows": 200');
expect(ordersJson).toContain('"nativeType": "integer"');
expect(ordersJson).toContain('"normalizedType": "integer"');
expect(ordersJson).not.toContain('"type": "integer"');
const tableFiles = await readLiveDatabaseTableFiles(dir);
expect(tableFiles.map((file) => file.table.name)).toEqual(['customers', 'orders']);
expect(await detectLiveDatabaseStagedDir(dir)).toBe(true);
});
it('redacts sensitive snapshot metadata before writing connection metadata', async () => {
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-redacted-stage-'));
await writeLiveDatabaseSnapshot(dir, {
...snapshot(),
metadata: {
dialect: 'postgres',
url: 'postgres://reader:secret@example.test/db', // pragma: allowlist secret
serviceAccountJson: {
client_email: 'reader@example.test',
private_key: 'pem-value', // pragma: allowlist secret
},
},
});
const connectionJson = await readFile(join(dir, LIVE_DATABASE_META_FILE), 'utf8');
expect(connectionJson).toContain('"dialect": "postgres"');
expect(connectionJson).toContain('"client_email": "reader@example.test"');
expect(connectionJson).toContain('"url": "<redacted>"');
expect(connectionJson).toContain('"private_key": "<redacted>"');
expect(connectionJson).not.toContain('postgres://reader:secret@example.test/db'); // pragma: allowlist secret
expect(connectionJson).not.toContain('pem-value');
});
it('writes redacted scan warnings next to live database metadata', async () => {
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-warning-stage-'));
await writeLiveDatabaseSnapshot(dir, {
...snapshot(),
warnings: [
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in public (insufficient grants on system catalogs)',
recoverable: true,
metadata: {
schema: 'public',
kind: 'primary_key',
url: 'postgres://reader:secret@example.test/db', // pragma: allowlist secret
},
},
],
});
const warningsJson = await readFile(join(dir, LIVE_DATABASE_WARNINGS_FILE), 'utf8');
expect(warningsJson).toContain('"constraint_discovery_unauthorized"');
expect(warningsJson).toContain('"schema": "public"');
expect(warningsJson).toContain('"url": "<redacted>"');
expect(warningsJson).not.toContain('postgres://reader:secret@example.test/db'); // pragma: allowlist secret
});
it('returns false for a directory that is missing live database metadata', async () => {
const dir = await mkdtemp(join(tmpdir(), 'ktx-live-db-empty-'));
expect(await detectLiveDatabaseStagedDir(dir)).toBe(false);
});
});

View file

@ -1,154 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { chunkLookerStagedDir } from './chunk.js';
import { writeLookerEvidenceDocuments } from './evidence-documents.js';
async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise<void> {
const abs = join(stagedDir, relPath);
await mkdir(join(abs, '..'), { recursive: true });
await writeFile(abs, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
async function writeSmallFixture(stagedDir: string): Promise<void> {
await writeJson(stagedDir, 'sync-config.json', {
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
fetchedAt: '2026-04-30T12:30:00.000Z',
});
await writeJson(stagedDir, 'lookml_models.json', {
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
});
await writeJson(stagedDir, 'explores/b2b/sales_pipeline.json', {
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
fields: { dimensions: [{ name: 'opportunities.id' }], measures: [{ name: 'opportunities.arr' }] },
joins: [],
});
await writeJson(stagedDir, 'dashboards/10.json', {
lookerId: '10',
title: 'Sales Pipeline',
description: null,
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T12:00:00.000Z',
tiles: [{ id: '100', title: 'ARR', lookId: null, query: { model: 'b2b', view: 'sales_pipeline' } }],
});
await writeJson(stagedDir, 'looks/20.json', {
lookerId: '20',
title: 'Open Pipeline',
description: null,
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T12:00:00.000Z',
query: { model: 'b2b', view: 'sales_pipeline', fields: ['opportunities.arr'] },
});
await writeJson(stagedDir, 'folders/tree.json', {
folders: [{ id: '7', name: 'Sandbox', parentId: null, path: ['Sandbox'] }],
});
await writeJson(stagedDir, 'users/3.json', { id: '3', displayName: 'Ada Lovelace', email: null });
await writeJson(stagedDir, 'signals/dashboard_usage.json', [
{ contentId: '10', queryCount30d: 50, uniqueUsers30d: 8 },
]);
await writeJson(stagedDir, 'signals/look_usage.json', [{ contentId: '20', queryCount30d: 20, uniqueUsers30d: 5 }]);
await writeJson(stagedDir, 'signals/scheduled_plans.json', [
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 3 },
]);
await writeJson(stagedDir, 'signals/favorites.json', [
{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 },
]);
await writeLookerEvidenceDocuments(stagedDir);
}
describe('chunkLookerStagedDir', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-chunk-'));
await writeSmallFixture(stagedDir);
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('emits one WU per explore, dashboard, and Look with readable dependencies', async () => {
const result = await chunkLookerStagedDir(stagedDir);
expect(result.reconcileNotes).toEqual([
expect.stringContaining('emit_artifact_resolution with actionType="subsumed"'),
]);
expect(result.workUnits.map((wu) => wu.unitKey).sort()).toEqual([
'looker-dashboard-10',
'looker-explore-b2b-sales_pipeline',
'looker-look-20',
]);
const dashboard = result.workUnits.find((wu) => wu.unitKey === 'looker-dashboard-10');
expect(dashboard?.rawFiles).toEqual([
'dashboards/10.json',
'evidence/dashboards/10/metadata.json',
'evidence/dashboards/10/page.md',
]);
expect(dashboard?.notes).toContain('context_candidate_write');
expect(dashboard?.notes).not.toContain('wiki_write');
expect(dashboard?.dependencyPaths.sort()).toEqual([
'explores/b2b/sales_pipeline.json',
'folders/tree.json',
'signals/dashboard_usage.json',
'signals/favorites.json',
'signals/scheduled_plans.json',
'users/3.json',
]);
const explore = result.workUnits.find((wu) => wu.unitKey === 'looker-explore-b2b-sales_pipeline');
expect(explore?.rawFiles).toEqual([
'explores/b2b/sales_pipeline.json',
'evidence/explores/b2b/sales_pipeline/metadata.json',
'evidence/explores/b2b/sales_pipeline/page.md',
]);
expect(explore?.dependencyPaths).toEqual(['lookml_models.json']);
});
it('keeps downstream dashboard and Look WUs when an explore dependency changes', async () => {
const result = await chunkLookerStagedDir(stagedDir, {
added: [],
modified: ['explores/b2b/sales_pipeline.json'],
deleted: [],
unchanged: [
'dashboards/10.json',
'looks/20.json',
'lookml_models.json',
'folders/tree.json',
'users/3.json',
'signals/dashboard_usage.json',
'signals/look_usage.json',
'signals/scheduled_plans.json',
'signals/favorites.json',
],
});
expect(result.workUnits.map((wu) => wu.unitKey).sort()).toEqual([
'looker-dashboard-10',
'looker-explore-b2b-sales_pipeline',
'looker-look-20',
]);
expect(result.workUnits.find((wu) => wu.unitKey === 'looker-dashboard-10')?.rawFiles).toEqual([
'dashboards/10.json',
'evidence/dashboards/10/metadata.json',
'evidence/dashboards/10/page.md',
]);
});
it('returns an EvictionUnit for deleted runtime entity raw paths', async () => {
const result = await chunkLookerStagedDir(stagedDir, {
added: [],
modified: [],
deleted: ['looks/20.json'],
unchanged: ['dashboards/10.json', 'explores/b2b/sales_pipeline.json'],
});
expect(result.eviction).toEqual({ deletedRawPaths: ['looks/20.json'] });
});
});

View file

@ -1,14 +0,0 @@
import { readFile } from 'node:fs/promises';
import { describe, expect, it } from 'vitest';
describe('LookerClient boundary', () => {
it('does not import server or NestJS modules', async () => {
const source = await readFile(new URL('./client.ts', import.meta.url), 'utf-8');
expect(source).not.toMatch(/@nestjs\/common/);
expect(source).not.toMatch(/DataSourceClient/);
expect(source).not.toMatch(/\.\.\/interfaces/);
expect(source).not.toMatch(/\.\.\/types/);
expect(source).not.toMatch(/server\/src/);
});
});

View file

@ -1,473 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { LookerClient, type LookerSdkPort } from './client.js';
const clientSecretParam = 'client_secret'; // pragma: allowlist secret
function params(): Record<string, unknown> {
return {
base_url: 'https://example.looker.com',
client_id: 'id',
[clientSecretParam]: 'credential', // pragma: allowlist secret
};
}
function sdk(overrides: Partial<LookerSdkPort> = {}): LookerSdkPort {
const port: LookerSdkPort = {
me: vi.fn().mockResolvedValue({ id: '1', display_name: 'API User', email: 'api@example.com' }),
search_dashboards: vi.fn().mockResolvedValue([{ id: '10' }]),
dashboard: vi.fn().mockResolvedValue({
id: '10',
title: 'Revenue Dashboard',
description: 'Revenue concepts',
folder_id: '20',
user_id: '1',
updated_at: '2026-04-30T00:00:00.000Z',
dashboard_elements: [
{
id: '99',
title: 'ARR',
look_id: null,
query: {
id: 'q1',
model: 'b2b',
view: 'sales_pipeline',
fields: ['opportunities.arr', 'opportunities.stage'],
filters: { 'opportunities.stage': 'open' },
sorts: ['opportunities.arr desc'],
limit: '500',
},
},
],
}),
search_looks: vi.fn().mockResolvedValue([{ id: '30' }]),
search_scheduled_plans: vi.fn().mockResolvedValue([]),
look: vi.fn().mockResolvedValue({
id: '30',
title: 'Open Pipeline ARR',
description: 'ARR for open opportunities',
folder_id: '20',
user_id: '1',
updated_at: '2026-04-30T00:00:00.000Z',
query: {
id: 'q2',
model: 'b2b',
view: 'sales_pipeline',
fields: ['opportunities.arr'],
filters: { 'opportunities.stage': 'open' },
},
}),
all_folders: vi.fn().mockResolvedValue([{ id: '20', name: 'Executive', parent_id: null }]),
all_users: vi.fn().mockResolvedValue([{ id: '1', display_name: 'API User', email: 'api@example.com' }]),
all_groups: vi.fn().mockResolvedValue([{ id: '2', name: 'Finance' }]),
all_connections: vi.fn().mockResolvedValue([
{
name: 'b2b_sandbox_bq',
host: 'warehouse.example.com',
database: 'analytics',
schema: 'public',
dialect_name: 'bigquery_standard_sql',
},
]),
all_lookml_models: vi
.fn()
.mockResolvedValue([
{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] },
]),
lookml_model_explore: vi.fn().mockResolvedValue({
name: 'sales_pipeline',
label: 'Sales Pipeline',
description: 'Opportunity pipeline',
sql_table_name: 'proj.dataset.opportunities AS opportunities',
connection_name: 'b2b_sandbox_bq',
view_name: 'opportunities',
fields: {
dimensions: [{ name: 'opportunities.stage', label: 'Stage', type: 'string', sql: '$' + '{TABLE}.stage' }],
measures: [{ name: 'opportunities.arr', label: 'ARR', type: 'sum', sql: '$' + '{TABLE}.arr' }],
},
joins: [
{
name: 'accounts',
type: 'left_outer',
relationship: 'many_to_one',
sql_table_name: 'proj.dataset.accounts',
sql_on: '$' + '{opportunities.account_id} = $' + '{accounts.id}',
from: null,
},
],
}),
run_inline_query: vi.fn().mockResolvedValue('[]'),
logout: vi.fn().mockResolvedValue(undefined),
...overrides,
};
return port;
}
describe('LookerClient', () => {
it('validates credentials with me()', async () => {
const client = new LookerClient(params(), { sdkFactory: () => sdk() });
await expect(client.testConnection()).resolves.toEqual({
success: true,
metadata: { userId: '1', displayName: 'API User', email: 'api@example.com' },
});
});
it('does not warn to console when optional prioritization inputs fail by default', async () => {
const warn = vi.spyOn(console, 'warn').mockImplementation(() => undefined);
const fakeSdk = sdk({
search_dashboards: vi.fn().mockRejectedValue(new Error('dashboards unavailable')),
search_looks: vi.fn().mockRejectedValue(new Error('looks unavailable')),
});
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
await expect(client.getSignals()).resolves.toMatchObject({
dashboardUsage: [],
lookUsage: [],
scheduledPlans: [],
favorites: [],
});
expect(warn).not.toHaveBeenCalled();
});
it('maps dashboards, looks, folders, models, explores, users, and groups to staged DTOs', async () => {
const fakeSdk = sdk();
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: null }]);
await expect(client.getDashboard('10')).resolves.toMatchObject({
lookerId: '10',
title: 'Revenue Dashboard',
tiles: [{ id: '99', query: { model: 'b2b', view: 'sales_pipeline' } }],
});
await expect(client.listLooks()).resolves.toEqual([{ id: '30', updatedAt: null }]);
await expect(client.getLook('30')).resolves.toMatchObject({
lookerId: '30',
title: 'Open Pipeline ARR',
query: { model: 'b2b', view: 'sales_pipeline' },
});
await expect(client.listFolders()).resolves.toEqual({
folders: [{ id: '20', name: 'Executive', parentId: null, path: ['Executive'] }],
});
await expect(client.listLookmlModels()).resolves.toEqual({
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
});
await expect(client.listLookerConnections()).resolves.toEqual([
{
name: 'b2b_sandbox_bq',
host: 'warehouse.example.com',
database: 'analytics',
schema: 'public',
dialect: 'bigquery_standard_sql',
},
]);
await expect(client.getExplore('b2b', 'sales_pipeline')).resolves.toMatchObject({
modelName: 'b2b',
exploreName: 'sales_pipeline',
rawSqlTableName: 'proj.dataset.opportunities AS opportunities',
connectionName: 'b2b_sandbox_bq',
viewName: 'opportunities',
fields: { dimensions: [{ name: 'opportunities.stage' }], measures: [{ name: 'opportunities.arr' }] },
joins: [
{
name: 'accounts',
rawSqlTableName: 'proj.dataset.accounts',
sqlOn: '$' + '{opportunities.account_id} = $' + '{accounts.id}',
from: null,
targetTable: null,
},
],
targetWarehouseConnectionId: null,
targetTable: null,
});
expect(fakeSdk.dashboard).toHaveBeenCalledWith(
'10',
'id,title,description,folder_id,user_id,updated_at,dashboard_elements(id,title,look_id,query(id,model,view,fields,filters,sorts,limit,dynamic_fields))',
);
expect(fakeSdk.look).toHaveBeenCalledWith(
'30',
'id,title,description,folder_id,user_id,updated_at,query(id,model,view,fields,filters,sorts,limit,dynamic_fields)',
);
expect(fakeSdk.lookml_model_explore).toHaveBeenCalledWith(
'b2b',
'sales_pipeline',
'name,label,description,sql_table_name,connection_name,view_name,fields,joins(name,type,relationship,sql_table_name,sql_on,from)',
);
expect(fakeSdk.all_connections).toHaveBeenCalledWith('name,host,database,schema,dialect_name');
});
it('returns empty usage signals when system activity access fails', async () => {
const client = new LookerClient(params(), {
sdkFactory: () =>
sdk({
run_inline_query: vi.fn().mockRejectedValue(new Error('access denied')),
search_dashboards: vi.fn().mockResolvedValue([{ id: '10', favorite_count: 4 }]),
search_looks: vi.fn().mockResolvedValue([{ id: '30', favorite_count: 2 }]),
search_scheduled_plans: vi.fn().mockResolvedValue([]),
}),
});
await expect(client.getSignals()).resolves.toEqual({
dashboardUsage: [],
lookUsage: [],
scheduledPlans: [],
favorites: [
{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 },
{ contentId: '30', contentType: 'look', favoriteCount: 2 },
],
});
});
it('paginates dashboard and Look searches', async () => {
const dashboardPageOne = Array.from({ length: 500 }, (_, index) => ({ id: String(index + 1) }));
const lookPageOne = Array.from({ length: 500 }, (_, index) => ({ id: String(index + 1001) }));
const fakeSdk = sdk({
search_dashboards: vi
.fn()
.mockResolvedValueOnce(dashboardPageOne)
.mockResolvedValueOnce([{ id: '501' }]),
search_looks: vi
.fn()
.mockResolvedValueOnce(lookPageOne)
.mockResolvedValueOnce([{ id: '1501' }]),
});
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
await expect(client.listDashboards()).resolves.toHaveLength(501);
await expect(client.listLooks()).resolves.toHaveLength(501);
expect(fakeSdk.search_dashboards).toHaveBeenNthCalledWith(
1,
expect.objectContaining({
deleted: false,
fields: 'id,updated_at',
limit: 500,
offset: 0,
sorts: 'id',
}),
);
expect(fakeSdk.search_dashboards).toHaveBeenNthCalledWith(
2,
expect.objectContaining({
limit: 500,
offset: 500,
}),
);
expect(fakeSdk.search_looks).toHaveBeenNthCalledWith(
1,
expect.objectContaining({
deleted: false,
fields: 'id,updated_at',
limit: 500,
offset: 0,
sorts: 'id',
}),
);
expect(fakeSdk.search_looks).toHaveBeenNthCalledWith(
2,
expect.objectContaining({
limit: 500,
offset: 500,
}),
);
});
it('returns updatedAt cursors from dashboard and Look listing rows', async () => {
const fakeSdk = sdk({
search_dashboards: vi.fn().mockResolvedValue([{ id: '10', updated_at: '2026-04-30T12:00:00.000Z' }]),
search_looks: vi.fn().mockResolvedValue([{ id: '30', updated_at: '2026-04-30T11:00:00.000Z' }]),
});
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: '2026-04-30T12:00:00.000Z' }]);
await expect(client.listLooks()).resolves.toEqual([{ id: '30', updatedAt: '2026-04-30T11:00:00.000Z' }]);
});
it('logs out the SDK session during cleanup', async () => {
const fakeSdk = sdk();
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
await client.testConnection();
await client.cleanup();
expect(fakeSdk.logout).toHaveBeenCalledTimes(1);
});
it('aggregates usage, scheduled-plan, and favorite signals', async () => {
const runInlineQuery = vi
.fn()
.mockResolvedValueOnce(
JSON.stringify([
{
'dashboard.id': '10',
'history.query_run_count': 3,
'history.created_date': '2026-04-30',
'user.id': 'user-1',
},
{
'dashboard.id': '10',
'history.query_run_count': '2',
'history.created_date': '2026-04-29',
'user.id': 'user-2',
},
]),
)
.mockResolvedValueOnce(
JSON.stringify([
{
'look.id': '30',
'history.query_run_count': 7,
'history.created_date': '2026-04-28',
'user.id': 'user-1',
},
]),
);
const fakeSdk = sdk({
run_inline_query: runInlineQuery,
search_dashboards: vi.fn().mockResolvedValueOnce([{ id: '10', favorite_count: 4 }]),
search_looks: vi.fn().mockResolvedValueOnce([{ id: '30', favorite_count: 2 }]),
search_scheduled_plans: vi.fn().mockResolvedValueOnce([
{
id: 'sp-dashboard',
dashboard_id: '10',
look_id: null,
enabled: true,
scheduled_plan_destination: [{ id: 'dest-1' }, { id: 'dest-2' }],
},
{
id: 'sp-look',
dashboard_id: null,
look_id: '30',
enabled: true,
scheduled_plan_destination: [{ id: 'dest-3' }],
},
]),
});
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk });
await expect(client.getSignals()).resolves.toEqual({
dashboardUsage: [
{
contentId: '10',
queryCount30d: 5,
uniqueUsers30d: 2,
lastRunAt: '2026-04-30',
topUsers: ['user-1', 'user-2'],
},
],
lookUsage: [
{
contentId: '30',
queryCount30d: 7,
uniqueUsers30d: 1,
lastRunAt: '2026-04-28',
topUsers: ['user-1'],
},
],
scheduledPlans: [
{
contentId: '10',
contentType: 'dashboard',
isScheduled: true,
scheduleCount: 1,
recipientCount: 2,
},
{
contentId: '30',
contentType: 'look',
isScheduled: true,
scheduleCount: 1,
recipientCount: 1,
},
],
favorites: [
{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 },
{ contentId: '30', contentType: 'look', favoriteCount: 2 },
],
});
expect(runInlineQuery).toHaveBeenNthCalledWith(
1,
expect.objectContaining({
result_format: 'json',
body: expect.objectContaining({
model: 'system__activity',
view: 'history',
fields: ['dashboard.id', 'history.query_run_count', 'history.created_date', 'user.id'],
}),
}),
);
expect(fakeSdk.search_scheduled_plans).toHaveBeenCalledWith(
expect.objectContaining({
all_users: true,
fields: 'id,dashboard_id,look_id,enabled,scheduled_plan_destination',
limit: 500,
offset: 0,
sorts: 'id',
}),
);
});
it('retries a 429 response once using Retry-After seconds', async () => {
const sleep = vi.fn().mockResolvedValue(undefined);
const rateLimitError = new Error('rate limited');
Object.assign(rateLimitError, { statusCode: 429, headers: { 'retry-after': '2' } });
const fakeSdk = sdk({
search_dashboards: vi
.fn()
.mockRejectedValueOnce(rateLimitError)
.mockResolvedValueOnce([{ id: '10' }]),
});
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk, sleep });
await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: null }]);
expect(sleep).toHaveBeenCalledWith(2000);
expect(fakeSdk.search_dashboards).toHaveBeenCalledTimes(2);
});
it('does not retry non-429 errors', async () => {
const sleep = vi.fn().mockResolvedValue(undefined);
const error = new Error('broken dashboard');
Object.assign(error, { statusCode: 500 });
const fakeSdk = sdk({ dashboard: vi.fn().mockRejectedValue(error) });
const client = new LookerClient(params(), { sdkFactory: () => fakeSdk, sleep });
await expect(client.getDashboard('10')).rejects.toThrow('broken dashboard');
expect(sleep).not.toHaveBeenCalled();
expect(fakeSdk.dashboard).toHaveBeenCalledTimes(1);
});
it('initializes the real @looker/sdk-node SDK with inline credentials without throwing', async () => {
const client = new LookerClient(params());
const result = await client.testConnection();
// Without injected sdkFactory the real SDK is constructed via InlineLookerSettings.
// This used to throw "Missing required configuration values like base_url" because
// the parent NodeSettingsIniFile constructor validated config before the override
// could supply credentials. Whatever happens now (auth/network failure against the
// bogus example URL is fine) — what must NOT happen is a synchronous SDK-init throw.
expect(result.success).toBe(false);
expect(result.error).toBeDefined();
expect(result.error).not.toMatch(/Missing required configuration values/i);
await client.cleanup();
});
it('strips trailing /api/4.0 from base_url so the SDK does not double-prefix it', async () => {
const clientWithSuffix = new LookerClient({
base_url: 'https://example.looker.com/api/4.0',
client_id: 'id',
[clientSecretParam]: 'credential', // pragma: allowlist secret
});
const result = await clientWithSuffix.testConnection();
expect(result.success).toBe(false);
// If base_url is double-prefixed the SDK would hit /api/4.0/api/4.0/login. Either
// the URL is correctly normalized (transport-level network failure) or we'd see a
// 404/HTML response — either way the stack must not be a config-validation throw.
expect(result.error).not.toMatch(/Missing required configuration values/i);
await clientWithSuffix.cleanup();
});
});

View file

@ -1,44 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import { createDaemonLookerTableIdentifierParser } from './daemon-table-identifier-parser.js';
describe('createDaemonLookerTableIdentifierParser', () => {
it('posts parse items to the daemon endpoint', async () => {
const requestJson = vi.fn(async () => ({
results: {
orders: {
ok: true,
catalog: null,
schema: 'public',
name: 'orders',
canonical_table: 'public.orders',
},
},
}));
const parser = createDaemonLookerTableIdentifierParser({
baseUrl: 'http://127.0.0.1:8765',
requestJson,
});
await expect(parser.parse([{ key: 'orders', sql_table_name: 'public.orders', dialect: 'postgres' }])).resolves.toEqual({
orders: {
ok: true,
catalog: null,
schema: 'public',
name: 'orders',
canonical_table: 'public.orders',
},
});
expect(requestJson).toHaveBeenCalledWith('/sql/parse-table-identifier', {
items: [{ key: 'orders', sql_table_name: 'public.orders', dialect: 'postgres' }],
});
});
it('rejects non-object daemon responses', async () => {
const parser = createDaemonLookerTableIdentifierParser({
baseUrl: 'http://127.0.0.1:8765',
requestJson: async () => ({ results: null }),
});
await expect(parser.parse([])).rejects.toThrow('ktx-daemon table identifier parser returned invalid results');
});
});

View file

@ -1,47 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { detectLookerStagedDir } from './detect.js';
async function touch(stagedDir: string, relPath: string, body = '{}\n'): Promise<void> {
const abs = join(stagedDir, relPath);
await mkdir(join(abs, '..'), { recursive: true });
await writeFile(abs, body, 'utf-8');
}
describe('detectLookerStagedDir', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-detect-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('returns true when sync-config.json and at least one runtime entity are present', async () => {
await touch(stagedDir, 'sync-config.json');
await touch(stagedDir, 'explores/b2b/sales_pipeline.json');
expect(await detectLookerStagedDir(stagedDir)).toBe(true);
});
it('returns true for dashboard-only staged dirs', async () => {
await touch(stagedDir, 'sync-config.json');
await touch(stagedDir, 'dashboards/10.json');
expect(await detectLookerStagedDir(stagedDir)).toBe(true);
});
it('returns false without sync-config.json', async () => {
await touch(stagedDir, 'looks/20.json');
expect(await detectLookerStagedDir(stagedDir)).toBe(false);
});
it('returns false when only control files are present', async () => {
await touch(stagedDir, 'sync-config.json');
await touch(stagedDir, 'lookml_models.json');
await touch(stagedDir, 'signals/dashboard_usage.json', '[]\n');
expect(await detectLookerStagedDir(stagedDir)).toBe(false);
});
});

View file

@ -1,188 +0,0 @@
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { dirname, join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { getLookerTriageSignals, writeLookerEvidenceDocuments } from './evidence-documents.js';
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
const target = join(root, relPath);
await mkdir(dirname(target), { recursive: true });
await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
async function readJson<T>(root: string, relPath: string): Promise<T> {
return JSON.parse(await readFile(join(root, relPath), 'utf-8')) as T;
}
describe('Looker evidence documents', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-evidence-docs-'));
await writeJson(stagedDir, 'explores/b2b/sales_pipeline.json', {
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: 'Pipeline analysis explore.',
fields: {
dimensions: [
{ name: 'opportunities.stage', label: 'Stage', type: 'string', sql: '${TABLE}.stage', description: null },
],
measures: [
{
name: 'opportunities.arr',
label: 'ARR',
type: 'sum',
sql: '${TABLE}.arr',
description: 'Annual recurring revenue.',
},
],
},
joins: [{ name: 'accounts', type: 'left_outer', relationship: 'many_to_one' }],
});
await writeJson(stagedDir, 'dashboards/10.json', {
lookerId: '10',
title: 'Sales Pipeline Overview',
description: 'Executive dashboard for open pipeline ARR.',
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T10:00:00.000Z',
tiles: [
{
id: '100',
title: 'Open Pipeline ARR',
lookId: null,
query: {
model: 'b2b',
view: 'sales_pipeline',
fields: ['opportunities.arr', 'opportunities.stage'],
filters: { 'opportunities.stage': 'open' },
sorts: ['opportunities.arr desc'],
limit: '500',
},
},
],
});
await writeJson(stagedDir, 'looks/20.json', {
lookerId: '20',
title: 'Active Opportunity Pipeline',
description: 'Saved Look for active opportunity pipeline review.',
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T11:00:00.000Z',
query: {
model: 'b2b',
view: 'sales_pipeline',
fields: ['opportunities.arr'],
filters: { 'opportunities.stage': 'open' },
sorts: [],
limit: '500',
},
});
await writeJson(stagedDir, 'signals/dashboard_usage.json', [
{
contentId: '10',
queryCount30d: 80,
uniqueUsers30d: 12,
lastRunAt: '2026-04-30T09:00:00.000Z',
topUsers: ['3'],
},
]);
await writeJson(stagedDir, 'signals/look_usage.json', [
{
contentId: '20',
queryCount30d: 2,
uniqueUsers30d: 1,
lastRunAt: '2026-04-29T09:00:00.000Z',
topUsers: ['3'],
},
]);
await writeJson(stagedDir, 'signals/scheduled_plans.json', [
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 2, recipientCount: 5 },
]);
await writeJson(stagedDir, 'signals/favorites.json', [
{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 },
]);
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('writes indexable metadata and markdown for explores, dashboards, and Looks', async () => {
await writeLookerEvidenceDocuments(stagedDir);
await expect(readJson(stagedDir, 'evidence/explores/b2b/sales_pipeline/metadata.json')).resolves.toMatchObject({
objectType: 'looker_explore',
id: 'looker:explore:b2b.sales_pipeline',
title: 'Sales Pipeline',
path: 'Looker / Explores / b2b.sales_pipeline',
properties: {
rawPath: 'explores/b2b/sales_pipeline.json',
modelName: 'b2b',
exploreName: 'sales_pipeline',
},
});
await expect(readJson(stagedDir, 'evidence/dashboards/10/metadata.json')).resolves.toMatchObject({
objectType: 'looker_dashboard',
id: 'looker:dashboard:10',
title: 'Sales Pipeline Overview',
path: 'Looker / Dashboards / Sales Pipeline Overview',
lastEditedAt: '2026-04-30T10:00:00.000Z',
properties: {
rawPath: 'dashboards/10.json',
lookerId: '10',
},
});
await expect(readJson(stagedDir, 'evidence/looks/20/metadata.json')).resolves.toMatchObject({
objectType: 'looker_look',
id: 'looker:look:20',
title: 'Active Opportunity Pipeline',
path: 'Looker / Looks / Active Opportunity Pipeline',
properties: {
rawPath: 'looks/20.json',
lookerId: '20',
},
});
const dashboardMarkdown = await readFile(join(stagedDir, 'evidence/dashboards/10/page.md'), 'utf-8');
expect(dashboardMarkdown).toContain('# Sales Pipeline Overview');
expect(dashboardMarkdown).toContain('Executive dashboard for open pipeline ARR.');
expect(dashboardMarkdown).toContain('## Tile: Open Pipeline ARR');
expect(dashboardMarkdown).toContain('- model: b2b');
expect(dashboardMarkdown).toContain('- explore: sales_pipeline');
expect(dashboardMarkdown).toContain('- opportunities.stage = open');
expect(dashboardMarkdown).not.toContain('80');
expect(dashboardMarkdown).not.toContain('queryCount30d');
expect(dashboardMarkdown).not.toContain('recipient');
expect(dashboardMarkdown).not.toContain('favorite');
expect(dashboardMarkdown).not.toContain('owner');
});
it('returns usage-aware triage signals without exposing usage as document prose', async () => {
await writeLookerEvidenceDocuments(stagedDir);
await expect(getLookerTriageSignals(stagedDir, 'looker:dashboard:10')).resolves.toEqual({
objectType: 'looker_dashboard',
propertyHints: {
contentType: 'dashboard',
queryCount30d: '80',
uniqueUsers30d: '12',
isScheduled: 'true',
favoriteCount: '4',
},
lastEditedAt: '2026-04-30T10:00:00.000Z',
});
await expect(getLookerTriageSignals(stagedDir, 'looker:look:20')).resolves.toEqual({
objectType: 'looker_look',
propertyHints: {
contentType: 'look',
queryCount30d: '2',
uniqueUsers30d: '1',
isScheduled: 'false',
favoriteCount: '0',
},
lastEditedAt: '2026-04-30T11:00:00.000Z',
});
});
});

View file

@ -1,74 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import type { FetchContext } from '../../types.js';
import type { LookerSdkPort } from './client.js';
import {
DefaultLookerClientFactory,
DefaultLookerConnectionClientFactory,
type LookerCredentialResolver,
} from './factory.js';
import type { LookerRuntimeClient } from './fetch.js';
import type { LookerPullConfig } from './types.js';
function sdk(): LookerSdkPort {
return {
me: vi.fn().mockResolvedValue({ id: '1', display_name: 'API User', email: 'api@example.com' }),
search_dashboards: vi.fn().mockResolvedValue([{ id: '10' }]),
dashboard: vi.fn(),
search_looks: vi.fn().mockResolvedValue([]),
search_scheduled_plans: vi.fn().mockResolvedValue([]),
look: vi.fn(),
all_folders: vi.fn().mockResolvedValue([]),
all_users: vi.fn().mockResolvedValue([]),
all_groups: vi.fn().mockResolvedValue([]),
all_connections: vi.fn().mockResolvedValue([]),
all_lookml_models: vi.fn().mockResolvedValue([]),
lookml_model_explore: vi.fn(),
run_inline_query: vi.fn().mockResolvedValue('[]'),
logout: vi.fn().mockResolvedValue(undefined),
};
}
describe('DefaultLookerConnectionClientFactory', () => {
it('resolves credentials by Looker connection id and creates a KTX Looker client', async () => {
const fakeSdk = sdk();
const resolver: LookerCredentialResolver = {
resolve: vi.fn().mockResolvedValue({
base_url: 'https://example.looker.com',
client_id: 'id',
client_secret: 'credential', // pragma: allowlist secret
}),
};
const factory = new DefaultLookerConnectionClientFactory(resolver, { sdkFactory: () => fakeSdk });
const client = await factory.createClient('prod-looker');
await expect(client.listDashboards()).resolves.toEqual([{ id: '10', updatedAt: null }]);
expect(resolver.resolve).toHaveBeenCalledWith('prod-looker');
});
});
describe('DefaultLookerClientFactory', () => {
const ctx: FetchContext = { connectionId: 'ctx-looker', sourceKey: 'looker' };
it('uses pullConfig.lookerConnectionId when present', async () => {
const runtimeClient = { listDashboards: vi.fn() } as unknown as LookerRuntimeClient;
const inner = { createClient: vi.fn().mockResolvedValue(runtimeClient) };
const factory = new DefaultLookerClientFactory(inner);
const config = { lookerConnectionId: 'prod-looker' } as LookerPullConfig;
await expect(factory.createClient(config, ctx)).resolves.toBe(runtimeClient);
expect(inner.createClient).toHaveBeenCalledWith('prod-looker');
});
it('falls back to ctx.connectionId when pullConfig.lookerConnectionId is absent', async () => {
const runtimeClient = { listDashboards: vi.fn() } as unknown as LookerRuntimeClient;
const inner = { createClient: vi.fn().mockResolvedValue(runtimeClient) };
const factory = new DefaultLookerClientFactory(inner);
const config = {} as LookerPullConfig;
await expect(factory.createClient(config, ctx)).resolves.toBe(runtimeClient);
expect(inner.createClient).toHaveBeenCalledWith('ctx-looker');
});
});

View file

@ -1,77 +0,0 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { readLookerFetchReport, writeLookerFetchReport } from './fetch-report.js';
describe('Looker staged fetch report', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-fetch-report-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('returns null when a staged bundle has no fetch report', async () => {
await expect(readLookerFetchReport(stagedDir)).resolves.toBeNull();
});
it('round-trips partial fetch issues', async () => {
await writeLookerFetchReport(stagedDir, {
status: 'partial',
retryRecommended: true,
skipped: [
{
rawPath: 'dashboards/10.json',
entityType: 'dashboard',
entityId: '10',
severity: 'error',
statusCode: 429,
message: 'Looker API rate limit remained after retry',
retryRecommended: true,
},
],
warnings: [
{
rawPath: 'signals/dashboard_usage.json',
entityType: 'signals',
entityId: null,
severity: 'warning',
statusCode: 403,
message: 'system__activity unavailable',
retryRecommended: false,
},
],
});
await expect(readLookerFetchReport(stagedDir)).resolves.toEqual({
status: 'partial',
retryRecommended: true,
skipped: [
{
rawPath: 'dashboards/10.json',
entityType: 'dashboard',
entityId: '10',
severity: 'error',
statusCode: 429,
message: 'Looker API rate limit remained after retry',
retryRecommended: true,
},
],
warnings: [
{
rawPath: 'signals/dashboard_usage.json',
entityType: 'signals',
entityId: null,
severity: 'warning',
statusCode: 403,
message: 'system__activity unavailable',
retryRecommended: false,
},
],
});
});
});

View file

@ -1,645 +0,0 @@
import { mkdtemp, readdir, readFile, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { chunkLookerStagedDir } from './chunk.js';
import { fetchLookerRuntimeBundle, type LookerRuntimeClient } from './fetch.js';
const connectionId = '11111111-1111-4111-8111-111111111111';
function makeClient(): LookerRuntimeClient {
return {
listDashboards: vi.fn().mockResolvedValue([{ id: '10' }]),
getDashboard: vi.fn().mockResolvedValue({
lookerId: '10',
title: 'Sales Pipeline',
description: 'Pipeline health',
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T12:00:00.000Z',
tiles: [{ id: '100', title: 'ARR', lookId: null, query: { model: 'b2b', view: 'sales_pipeline' } }],
}),
listLooks: vi.fn().mockResolvedValue([{ id: '20' }]),
getLook: vi.fn().mockResolvedValue({
lookerId: '20',
title: 'Open Pipeline',
description: null,
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T12:00:00.000Z',
query: { model: 'b2b', view: 'sales_pipeline', fields: ['opportunities.arr'] },
}),
listFolders: vi
.fn()
.mockResolvedValue({ folders: [{ id: '7', name: 'Sandbox', parentId: null, path: ['Sandbox'] }] }),
listUsers: vi.fn().mockResolvedValue([{ id: '3', displayName: 'Ada Lovelace', email: null }]),
listGroups: vi.fn().mockResolvedValue([{ id: '4', name: 'Sales' }]),
listLookmlModels: vi.fn().mockResolvedValue({
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
}),
getExplore: vi.fn().mockResolvedValue({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
fields: { dimensions: [{ name: 'opportunities.id' }], measures: [{ name: 'opportunities.arr' }] },
joins: [],
}),
getSignals: vi.fn().mockResolvedValue({
dashboardUsage: [{ contentId: '10', queryCount30d: 50, uniqueUsers30d: 8, lastRunAt: null, topUsers: ['3'] }],
lookUsage: [{ contentId: '20', queryCount30d: 20, uniqueUsers30d: 5, lastRunAt: null, topUsers: ['3'] }],
scheduledPlans: [
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 3 },
],
favorites: [{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 }],
}),
cleanup: vi.fn().mockResolvedValue(undefined),
};
}
describe('fetchLookerRuntimeBundle', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-fetch-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('writes dashboards, looks, folders, users, groups, models, explores, signals, and sync config', async () => {
const client = makeClient();
await fetchLookerRuntimeBundle({
pullConfig: { lookerConnectionId: connectionId, instanceBaseUrl: 'https://example.looker.com' },
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
expect(await readdir(join(stagedDir, 'dashboards'))).toEqual(['10.json']);
expect(await readdir(join(stagedDir, 'looks'))).toEqual(['20.json']);
expect(await readdir(join(stagedDir, 'users'))).toEqual(['3.json']);
expect(await readdir(join(stagedDir, 'groups'))).toEqual(['4.json']);
expect(await readdir(join(stagedDir, 'explores/b2b'))).toEqual(['sales_pipeline.json']);
const syncConfig = JSON.parse(await readFile(join(stagedDir, 'sync-config.json'), 'utf-8'));
expect(syncConfig).toEqual({
lookerConnectionId: connectionId,
fetchedAt: '2026-04-30T12:30:00.000Z',
instanceBaseUrl: 'https://example.looker.com',
previousCursors: {
dashboardsLastSyncedAt: null,
looksLastSyncedAt: null,
},
nextCursors: {
dashboardsLastSyncedAt: null,
looksLastSyncedAt: null,
},
});
const scope = JSON.parse(await readFile(join(stagedDir, 'looker-scope.json'), 'utf-8'));
expect(scope).toEqual({
mode: 'full',
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
fetchedRawPaths: ['dashboards/10.json', 'looks/20.json'],
});
const dashboardUsage = JSON.parse(await readFile(join(stagedDir, 'signals/dashboard_usage.json'), 'utf-8'));
expect(dashboardUsage).toEqual([
{ contentId: '10', queryCount30d: 50, uniqueUsers30d: 8, lastRunAt: null, topUsers: ['3'] },
]);
const lookUsage = JSON.parse(await readFile(join(stagedDir, 'signals/look_usage.json'), 'utf-8'));
const scheduledPlans = JSON.parse(await readFile(join(stagedDir, 'signals/scheduled_plans.json'), 'utf-8'));
const favorites = JSON.parse(await readFile(join(stagedDir, 'signals/favorites.json'), 'utf-8'));
expect(lookUsage).toEqual([
{ contentId: '20', queryCount30d: 20, uniqueUsers30d: 5, lastRunAt: null, topUsers: ['3'] },
]);
expect(scheduledPlans).toEqual([
{ contentId: '10', contentType: 'dashboard', isScheduled: true, scheduleCount: 1, recipientCount: 3 },
]);
expect(favorites).toEqual([{ contentId: '10', contentType: 'dashboard', favoriteCount: 4 }]);
});
it('stages only changed Dashboard and Look entity bodies during incremental pulls', async () => {
const client = makeClient();
vi.mocked(client.listDashboards).mockResolvedValue([
{ id: '10', updatedAt: '2026-04-30T12:00:00.000Z' },
{ id: '11', updatedAt: '2026-04-30T12:10:00.000Z' },
]);
vi.mocked(client.getDashboard).mockImplementation(async (id: string) => ({
lookerId: id,
title: `Dashboard ${id}`,
description: null,
folderId: '7',
ownerId: '3',
updatedAt: id === '11' ? '2026-04-30T12:10:00.000Z' : '2026-04-30T12:00:00.000Z',
tiles: [],
}));
vi.mocked(client.listLooks).mockResolvedValue([
{ id: '20', updatedAt: '2026-04-30T11:00:00.000Z' },
{ id: '21', updatedAt: null },
]);
vi.mocked(client.getLook).mockImplementation(async (id: string) => ({
lookerId: id,
title: `Look ${id}`,
description: null,
folderId: '7',
ownerId: '3',
updatedAt: id === '21' ? null : '2026-04-30T11:00:00.000Z',
query: null,
}));
await fetchLookerRuntimeBundle({
pullConfig: {
lookerConnectionId: connectionId,
dashboardUpdatedSince: '2026-04-30T12:00:00.000Z',
lookUpdatedSince: '2026-04-30T11:00:00.000Z',
},
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
expect(client.getDashboard).toHaveBeenCalledTimes(1);
expect(client.getDashboard).toHaveBeenCalledWith('11');
expect(client.getLook).toHaveBeenCalledTimes(1);
expect(client.getLook).toHaveBeenCalledWith('21');
await expect(readdir(join(stagedDir, 'dashboards'))).resolves.toEqual(['11.json']);
await expect(readdir(join(stagedDir, 'looks'))).resolves.toEqual(['21.json']);
const syncConfig = JSON.parse(await readFile(join(stagedDir, 'sync-config.json'), 'utf-8'));
expect(syncConfig.previousCursors).toEqual({
dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z',
looksLastSyncedAt: '2026-04-30T11:00:00.000Z',
});
expect(syncConfig.nextCursors).toEqual({
dashboardsLastSyncedAt: '2026-04-30T12:10:00.000Z',
looksLastSyncedAt: '2026-04-30T11:00:00.000Z',
});
const scope = JSON.parse(await readFile(join(stagedDir, 'looker-scope.json'), 'utf-8'));
expect(scope).toEqual({
mode: 'incremental',
knownCurrentRawPaths: ['dashboards/10.json', 'dashboards/11.json', 'looks/20.json', 'looks/21.json'],
fetchedRawPaths: ['dashboards/11.json', 'looks/21.json'],
});
});
it('falls back to empty signal files when the client has no signal support', async () => {
const client = makeClient();
delete client.getSignals;
await fetchLookerRuntimeBundle({
pullConfig: { lookerConnectionId: connectionId },
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
expect(JSON.parse(await readFile(join(stagedDir, 'signals/look_usage.json'), 'utf-8'))).toEqual([]);
});
it('stamps explore warehouse targets from pull config and reports unmapped Looker connections', async () => {
const client = makeClient();
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
vi.mocked(client.listLookmlModels).mockResolvedValue({
models: [
{
name: 'b2b',
label: 'B2B',
explores: [
{ name: 'sales_pipeline', label: 'Sales Pipeline' },
{ name: 'marketing', label: 'Marketing' },
],
},
],
});
vi.mocked(client.getExplore).mockImplementation(async (_modelName: string, exploreName: string) => {
if (exploreName === 'marketing') {
return {
modelName: 'b2b',
exploreName: 'marketing',
label: 'Marketing',
description: null,
rawSqlTableName: 'proj.dataset.marketing',
connectionName: 'missing_mapping',
viewName: 'marketing',
fields: {
dimensions: [{ name: 'marketing.id', label: null, type: null, sql: null, description: null }],
measures: [{ name: 'marketing.spend', label: null, type: null, sql: null, description: null }],
},
joins: [],
targetWarehouseConnectionId: null,
targetTable: null,
};
}
return {
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
rawSqlTableName: 'proj.dataset.opportunities AS opportunities',
connectionName: 'b2b_sandbox_bq',
viewName: 'opportunities',
fields: {
dimensions: [{ name: 'opportunities.id', label: null, type: null, sql: null, description: null }],
measures: [{ name: 'opportunities.arr', label: null, type: null, sql: null, description: null }],
},
joins: [
{
name: 'accounts',
type: 'left_outer',
relationship: 'many_to_one',
rawSqlTableName: 'proj.dataset.accounts',
sqlOn: '$' + '{opportunities.account_id} = $' + '{accounts.id}',
from: null,
targetTable: null,
},
],
targetWarehouseConnectionId: null,
targetTable: null,
};
});
await fetchLookerRuntimeBundle({
pullConfig: {
lookerConnectionId: connectionId,
connectionMappings: { b2b_sandbox_bq: warehouseConnectionId },
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
parsedTargetTables: {
'b2b.sales_pipeline': {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
},
'b2b.sales_pipeline.accounts': {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'accounts',
canonicalTable: 'proj.dataset.accounts',
},
},
},
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
const salesPipeline = JSON.parse(await readFile(join(stagedDir, 'explores/b2b/sales_pipeline.json'), 'utf-8'));
expect(salesPipeline).toMatchObject({
connectionName: 'b2b_sandbox_bq',
targetWarehouseConnectionId: warehouseConnectionId,
targetTable: {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
},
joins: [
{
name: 'accounts',
targetTable: {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'accounts',
canonicalTable: 'proj.dataset.accounts',
},
},
],
});
const marketing = JSON.parse(await readFile(join(stagedDir, 'explores/b2b/marketing.json'), 'utf-8'));
expect(marketing).toMatchObject({
connectionName: 'missing_mapping',
targetWarehouseConnectionId: null,
targetTable: {
ok: false,
reason: 'no_connection_mapping',
},
});
const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8'));
expect(report.status).toBe('partial');
expect(report.skipped).toEqual([]);
expect(report.warnings).toEqual([
{
rawPath: 'looker_connection_mappings/missing_mapping',
entityType: 'looker_connection_mapping',
entityId: 'missing_mapping',
severity: 'warning',
statusCode: null,
message: 'Looker connection missing_mapping is not mapped to a warehouse connection; 1 explore will be wiki-only.',
retryRecommended: false,
kind: 'unmapped_looker_connection',
details: {
lookerConnectionName: 'missing_mapping',
affectedExplores: ['b2b.marketing'],
},
},
]);
});
it('reports parsed target table failures without retrying the Looker fetch', async () => {
const client = makeClient();
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
vi.mocked(client.getExplore).mockResolvedValue({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
rawSqlTableName: '$' + '{derived.SQL_TABLE_NAME}',
connectionName: 'b2b_sandbox_bq',
viewName: 'opportunities',
fields: {
dimensions: [{ name: 'opportunities.id', label: null, type: null, sql: null, description: null }],
measures: [{ name: 'opportunities.arr', label: null, type: null, sql: null, description: null }],
},
joins: [],
targetWarehouseConnectionId: null,
targetTable: null,
});
await fetchLookerRuntimeBundle({
pullConfig: {
lookerConnectionId: connectionId,
connectionMappings: { b2b_sandbox_bq: warehouseConnectionId },
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
parsedTargetTables: {
'b2b.sales_pipeline': {
ok: false,
reason: 'looker_template_unresolved',
detail: 'Looker template markers cannot be resolved before parsing.',
},
},
},
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
const explore = JSON.parse(await readFile(join(stagedDir, 'explores/b2b/sales_pipeline.json'), 'utf-8'));
expect(explore).toMatchObject({
targetWarehouseConnectionId: warehouseConnectionId,
targetTable: {
ok: false,
reason: 'looker_template_unresolved',
},
});
const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8'));
expect(report).toMatchObject({
status: 'partial',
retryRecommended: false,
skipped: [],
warnings: [
{
rawPath: 'looker_connection_mappings/b2b_sandbox_bq',
entityType: 'looker_connection_mapping',
entityId: 'b2b_sandbox_bq',
severity: 'warning',
statusCode: null,
message:
'Looker explore b2b.sales_pipeline has sql_table_name that cannot be mapped to a physical warehouse table: looker_template_unresolved.',
retryRecommended: false,
kind: 'looker_template_unresolved',
details: {
lookerConnectionName: 'b2b_sandbox_bq',
rawSqlTableName: '$' + '{derived.SQL_TABLE_NAME}',
reason: 'looker_template_unresolved',
},
},
],
});
});
it('propagates parent explore warehouse targets onto Dashboard tile and Look queries', async () => {
const client = makeClient();
const warehouseConnectionId = '22222222-2222-4222-8222-222222222222';
vi.mocked(client.getExplore).mockResolvedValue({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
rawSqlTableName: 'proj.dataset.opportunities AS opportunities',
connectionName: 'b2b_sandbox_bq',
viewName: 'opportunities',
fields: {
dimensions: [{ name: 'opportunities.id', label: null, type: null, sql: null, description: null }],
measures: [{ name: 'opportunities.arr', label: null, type: null, sql: null, description: null }],
},
joins: [],
targetWarehouseConnectionId: null,
targetTable: null,
});
await fetchLookerRuntimeBundle({
pullConfig: {
lookerConnectionId: connectionId,
connectionMappings: { b2b_sandbox_bq: warehouseConnectionId },
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
parsedTargetTables: {
'b2b.sales_pipeline': {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
},
},
},
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
const dashboard = JSON.parse(await readFile(join(stagedDir, 'dashboards/10.json'), 'utf-8'));
expect(dashboard.tiles[0].query).toMatchObject({
model: 'b2b',
view: 'sales_pipeline',
targetWarehouseConnectionId: warehouseConnectionId,
targetTable: {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
},
});
const look = JSON.parse(await readFile(join(stagedDir, 'looks/20.json'), 'utf-8'));
expect(look.query).toMatchObject({
model: 'b2b',
view: 'sales_pipeline',
targetWarehouseConnectionId: warehouseConnectionId,
targetTable: {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
},
});
});
it('records skipped detail entities and keeps cursors pinned for affected entity types', async () => {
const client = makeClient();
vi.mocked(client.listDashboards).mockResolvedValue([
{ id: '10', updatedAt: '2026-04-30T12:00:00.000Z' },
{ id: '11', updatedAt: '2026-04-30T12:10:00.000Z' },
]);
vi.mocked(client.getDashboard).mockImplementation(async (id: string) => {
if (id === '11') {
const error = new Error('Looker API rate limit remained after retry');
Object.assign(error, { statusCode: 429 });
throw error;
}
return {
lookerId: id,
title: `Dashboard ${id}`,
description: null,
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T12:00:00.000Z',
tiles: [],
};
});
vi.mocked(client.listLooks).mockResolvedValue([{ id: '20', updatedAt: '2026-04-30T11:15:00.000Z' }]);
vi.mocked(client.getLook).mockResolvedValue({
lookerId: '20',
title: 'Look 20',
description: null,
folderId: '7',
ownerId: '3',
updatedAt: '2026-04-30T11:15:00.000Z',
query: null,
});
await fetchLookerRuntimeBundle({
pullConfig: {
lookerConnectionId: connectionId,
dashboardUpdatedSince: '2026-04-30T12:00:00.000Z',
lookUpdatedSince: '2026-04-30T11:00:00.000Z',
},
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
await expect(readdir(join(stagedDir, 'dashboards'))).rejects.toMatchObject({ code: 'ENOENT' });
await expect(readdir(join(stagedDir, 'looks'))).resolves.toEqual(['20.json']);
const syncConfig = JSON.parse(await readFile(join(stagedDir, 'sync-config.json'), 'utf-8'));
expect(syncConfig.nextCursors).toEqual({
dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z',
looksLastSyncedAt: '2026-04-30T11:15:00.000Z',
});
const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8'));
expect(report).toEqual({
status: 'partial',
retryRecommended: true,
skipped: [
{
rawPath: 'dashboards/11.json',
entityType: 'dashboard',
entityId: '11',
severity: 'error',
statusCode: 429,
message: 'Looker API rate limit remained after retry',
retryRecommended: true,
},
],
warnings: [],
});
});
it('continues without explore bootstrap when LookML model listing is denied', async () => {
const client = makeClient();
const error = new Error('LookML model access denied');
Object.assign(error, { statusCode: 403 });
vi.mocked(client.listLookmlModels).mockRejectedValue(error);
await fetchLookerRuntimeBundle({
pullConfig: { lookerConnectionId: connectionId },
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
await expect(readdir(join(stagedDir, 'dashboards'))).resolves.toEqual(['10.json']);
await expect(readdir(join(stagedDir, 'looks'))).resolves.toEqual(['20.json']);
await expect(readFile(join(stagedDir, 'lookml_models.json'), 'utf-8')).resolves.toBe('{\n "models": []\n}\n');
await expect(readdir(join(stagedDir, 'explores'))).rejects.toMatchObject({ code: 'ENOENT' });
expect(client.getExplore).not.toHaveBeenCalled();
const report = JSON.parse(await readFile(join(stagedDir, 'looker-fetch-report.json'), 'utf-8'));
expect(report).toEqual({
status: 'success',
retryRecommended: false,
skipped: [],
warnings: [
{
rawPath: 'lookml_models.json',
entityType: 'lookml_models',
entityId: null,
severity: 'warning',
statusCode: 403,
message: 'LookML model access denied',
retryRecommended: false,
},
],
});
const chunked = await chunkLookerStagedDir(stagedDir);
expect(chunked.workUnits.map((wu) => wu.unitKey).sort()).toEqual(['looker-dashboard-10', 'looker-look-20']);
expect(chunked.workUnits.flatMap((wu) => wu.dependencyPaths)).not.toContain('explores/b2b/sales_pipeline.json');
});
it('cleans up the Looker client after a successful fetch', async () => {
const client = makeClient();
await fetchLookerRuntimeBundle({
pullConfig: { lookerConnectionId: connectionId },
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
expect(client.cleanup).toHaveBeenCalledTimes(1);
});
it('cleans up the Looker client when fetch throws', async () => {
const client = makeClient();
vi.mocked(client.listDashboards).mockRejectedValue(new Error('Looker API unavailable'));
await expect(
fetchLookerRuntimeBundle({
pullConfig: { lookerConnectionId: connectionId },
stagedDir,
ctx: { connectionId, sourceKey: 'looker' },
clientFactory: { createClient: vi.fn().mockResolvedValue(client) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
}),
).rejects.toThrow('Looker API unavailable');
expect(client.cleanup).toHaveBeenCalledTimes(1);
});
});

View file

@ -1,116 +0,0 @@
import { mkdtemp } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import { LocalLookerRuntimeStore } from './local-runtime-store.js';
describe('LocalLookerRuntimeStore', () => {
async function store() {
const dir = await mkdtemp(join(tmpdir(), 'ktx-looker-store-'));
return new LocalLookerRuntimeStore({
dbPath: join(dir, 'db.sqlite'),
now: () => new Date('2026-05-05T12:00:00.000Z'),
});
}
it('stores cursors and connection mappings', async () => {
const local = await store();
await local.setCursors('prod-looker', {
dashboardsLastSyncedAt: '2026-05-01T00:00:00.000Z',
looksLastSyncedAt: null,
});
await local.upsertConnectionMapping({
lookerConnectionId: 'prod-looker',
lookerConnectionName: 'bq_reporting',
ktxConnectionId: 'prod-warehouse',
source: 'cli',
});
await expect(local.readCursors('prod-looker')).resolves.toEqual({
dashboardsLastSyncedAt: '2026-05-01T00:00:00.000Z',
looksLastSyncedAt: null,
});
await expect(local.readMappings('prod-looker')).resolves.toEqual([
{
lookerConnectionName: 'bq_reporting',
ktxConnectionId: 'prod-warehouse',
lookerHost: null,
lookerDatabase: null,
lookerDialect: null,
},
]);
});
it('refreshes discovered metadata without dropping local targets', async () => {
const local = await store();
await local.upsertConnectionMapping({
lookerConnectionId: 'prod-looker',
lookerConnectionName: 'bq_reporting',
ktxConnectionId: 'prod-warehouse',
source: 'cli',
});
await local.refreshDiscoveredConnections({
lookerConnectionId: 'prod-looker',
discovered: [
{
name: 'bq_reporting',
host: 'bigquery.googleapis.com',
database: 'analytics',
schema: null,
dialect: 'bigquery_standard_sql',
},
],
});
await expect(local.listConnectionMappings('prod-looker')).resolves.toEqual([
{
lookerConnectionName: 'bq_reporting',
ktxConnectionId: 'prod-warehouse',
lookerHost: 'bigquery.googleapis.com',
lookerDatabase: 'analytics',
lookerDialect: 'bigquery_standard_sql',
source: 'refresh',
},
]);
});
it('applies yaml mapping intent while preserving refresh metadata and cli overrides', async () => {
const local = await store();
await local.refreshDiscoveredConnections({
lookerConnectionId: 'prod-looker',
discovered: [{ name: 'analytics', host: 'looker-db.test', database: 'warehouse', schema: null, dialect: 'postgres' }],
});
await local.upsertConnectionMapping({
lookerConnectionId: 'prod-looker',
lookerConnectionName: 'manual',
ktxConnectionId: 'cli-warehouse',
source: 'cli',
});
await local.applyYamlBootstrap({
lookerConnectionId: 'prod-looker',
mappings: [
{ lookerConnectionName: 'analytics', ktxConnectionId: 'yaml-warehouse' },
{ lookerConnectionName: 'manual', ktxConnectionId: 'yaml-warehouse' },
],
});
await expect(local.listConnectionMappings('prod-looker')).resolves.toMatchObject([
{
lookerConnectionName: 'analytics',
ktxConnectionId: 'yaml-warehouse',
lookerHost: 'looker-db.test',
lookerDatabase: 'warehouse',
lookerDialect: 'postgres',
source: 'ktx.yaml',
},
{
lookerConnectionName: 'manual',
ktxConnectionId: 'cli-warehouse',
source: 'cli',
},
]);
});
});

View file

@ -1,125 +0,0 @@
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import type { LookerRuntimeClient } from './fetch.js';
import { LookerSourceAdapter } from './looker.adapter.js';
const connectionId = '11111111-1111-4111-8111-111111111111';
function makeClient(): LookerRuntimeClient {
return {
listDashboards: vi.fn().mockResolvedValue([]),
getDashboard: vi.fn(),
listLooks: vi.fn().mockResolvedValue([]),
getLook: vi.fn(),
listFolders: vi.fn().mockResolvedValue({ folders: [] }),
listUsers: vi.fn().mockResolvedValue([]),
listGroups: vi.fn().mockResolvedValue([]),
listLookmlModels: vi.fn().mockResolvedValue({
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
}),
getExplore: vi.fn().mockResolvedValue({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
fields: { dimensions: [], measures: [] },
joins: [],
}),
};
}
describe('LookerSourceAdapter', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-adapter-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('exposes source="looker" and skillNames=["looker_ingest"]', () => {
const adapter = new LookerSourceAdapter({ clientFactory: { createClient: () => makeClient() } });
expect(adapter.source).toBe('looker');
expect(adapter.skillNames).toEqual(['looker_ingest']);
});
it('enables context evidence indexing and delegates triage signals', async () => {
const adapter = new LookerSourceAdapter({ clientFactory: { createClient: () => makeClient() } });
expect(adapter.evidenceIndexing).toBe('documents');
expect(adapter.triageSupported).toBe(true);
await expect(adapter.getTriageSignals?.(stagedDir, 'looker:dashboard:10')).resolves.toMatchObject({
objectType: 'looker_dashboard',
});
});
it('fetches, detects, and chunks a runtime bundle through the composed adapter', async () => {
const adapter = new LookerSourceAdapter({
clientFactory: { createClient: vi.fn().mockResolvedValue(makeClient()) },
now: () => new Date('2026-04-30T12:30:00.000Z'),
});
await mkdir(stagedDir, { recursive: true });
await adapter.fetch({ lookerConnectionId: connectionId }, stagedDir, { connectionId, sourceKey: 'looker' });
expect(await adapter.detect(stagedDir)).toBe(true);
expect(await readFile(join(stagedDir, 'explores/b2b/sales_pipeline.json'), 'utf-8')).toContain('sales_pipeline');
const result = await adapter.chunk(stagedDir);
expect(result.workUnits.map((wu) => wu.unitKey)).toEqual(['looker-explore-b2b-sales_pipeline']);
});
it('passes pull success notifications to the server callback', async () => {
const onPullSucceeded = vi.fn().mockResolvedValue(undefined);
const adapter = new LookerSourceAdapter({
clientFactory: { createClient: () => makeClient() },
onPullSucceeded,
});
const completedAt = new Date('2026-04-30T12:00:00.000Z');
await adapter.onPullSucceeded({
connectionId,
sourceKey: 'looker',
syncId: 'sync-1',
trigger: 'scheduled_pull',
completedAt,
stagedDir: '/tmp/staged',
});
expect(onPullSucceeded).toHaveBeenCalledWith({
connectionId,
sourceKey: 'looker',
syncId: 'sync-1',
trigger: 'scheduled_pull',
completedAt,
stagedDir: '/tmp/staged',
});
});
it('describes incremental fetch scope from the staged scope file', async () => {
await mkdir(join(stagedDir, 'dashboards'), { recursive: true });
await writeFile(
join(stagedDir, 'looker-scope.json'),
JSON.stringify(
{
mode: 'incremental',
knownCurrentRawPaths: ['dashboards/10.json', 'dashboards/11.json'],
fetchedRawPaths: ['dashboards/11.json'],
},
null,
2,
),
);
const adapter = new LookerSourceAdapter({ clientFactory: { createClient: () => makeClient() } });
const scope = await adapter.describeScope(stagedDir);
expect(scope.isPathInScope('dashboards/10.json')).toBe(false);
expect(scope.isPathInScope('dashboards/11.json')).toBe(true);
expect(scope.isPathInScope('dashboards/12.json')).toBe(true);
});
});

View file

@ -1,385 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import type { StagedExploreFile, StagedLookmlModelsFile } from './types.js';
import {
buildLookerPullConfigFromInputs,
collectExploreParseItems,
computeLookerMappingDrift,
discoverLookerConnections,
lookerDialectToConnectionType,
projectParsedIdentifier,
refreshLookerMappingPlaceholders,
sqlglotDialectForConnectionType,
suggestKtxConnectionForLookerConnection,
validateLookerMappings,
validateLookerWarehouseTarget,
} from './mapping.js';
const liveConnections = [
{
name: 'b2b_sandbox_bq',
host: 'warehouse.example.com',
database: 'analytics',
schema: null,
dialect: 'bigquery_standard_sql',
},
{
name: 'pg_runtime',
host: 'pg.internal:5432',
database: 'app',
schema: 'public',
dialect: 'postgres',
},
];
const mappedExplore: StagedExploreFile = {
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
rawSqlTableName: 'proj.analytics.opportunities AS opportunities',
connectionName: 'b2b_sandbox_bq',
viewName: 'opportunities',
fields: { dimensions: [], measures: [] },
joins: [
{
name: 'accounts',
type: 'left_outer',
relationship: 'many_to_one',
rawSqlTableName: 'proj.analytics.accounts',
sqlOn: null,
from: null,
targetTable: null,
},
],
targetWarehouseConnectionId: null,
targetTable: null,
};
const models: StagedLookmlModelsFile = {
models: [{ name: 'b2b', label: 'B2B', explores: [{ name: 'sales_pipeline', label: 'Sales Pipeline' }] }],
};
describe('discoverLookerConnections', () => {
it('delegates to the runtime client connection discovery method', async () => {
const client = { listLookerConnections: vi.fn().mockResolvedValue(liveConnections) };
await expect(discoverLookerConnections(client)).resolves.toEqual(liveConnections);
expect(client.listLookerConnections).toHaveBeenCalledTimes(1);
});
});
describe('looker dialect and target validation helpers', () => {
it('maps Looker dialect names to KTX connection types', () => {
expect(lookerDialectToConnectionType('bigquery_standard_sql')).toBe('BIGQUERY');
expect(lookerDialectToConnectionType('postgres')).toBe('POSTGRESQL');
expect(lookerDialectToConnectionType('mssql')).toBeNull();
expect(lookerDialectToConnectionType('tsql')).toBeNull();
expect(lookerDialectToConnectionType('unknown')).toBeNull();
});
it('maps supported warehouse connection types to sqlglot dialects', () => {
expect(sqlglotDialectForConnectionType('BIGQUERY')).toBe('bigquery');
expect(sqlglotDialectForConnectionType('POSTGRESQL')).toBe('postgres');
expect(sqlglotDialectForConnectionType('LOOKER')).toBeNull();
});
it('returns a structured failure for unsupported Looker warehouse targets', () => {
expect(validateLookerWarehouseTarget('LOOKER')).toEqual({
ok: false,
reason: 'Connection type LOOKER cannot be used as a Looker warehouse mapping target',
});
});
});
describe('suggestKtxConnectionForLookerConnection', () => {
it('returns the single deterministic target with matching type, host, and database', () => {
expect(
suggestKtxConnectionForLookerConnection({
lookerConnection: liveConnections[1],
candidateConnections: [
{
id: 'wrong-type',
connection_type: 'MYSQL',
connection_params: { host: 'pg.internal', database: 'app' },
},
{
id: 'pg-target',
connection_type: 'POSTGRESQL',
connection_params: { host: 'PG.INTERNAL', database: 'APP' },
},
],
}),
).toBe('pg-target');
});
it('returns null when more than one target matches', () => {
expect(
suggestKtxConnectionForLookerConnection({
lookerConnection: liveConnections[1],
candidateConnections: [
{
id: 'first',
connection_type: 'POSTGRESQL',
connection_params: { host: 'pg.internal', database: 'app' },
},
{
id: 'second',
connection_type: 'POSTGRESQL',
connection_params: { host: 'pg.internal:5432', database: 'APP' },
},
],
}),
).toBeNull();
});
});
describe('refreshLookerMappingPlaceholders', () => {
it('adds newly discovered placeholders and refreshes live metadata without dropping saved targets', () => {
expect(
refreshLookerMappingPlaceholders({
stored: [
{
lookerConnectionName: 'b2b_sandbox_bq',
ktxConnectionId: 'warehouse',
lookerHost: null,
lookerDatabase: null,
lookerDialect: null,
},
],
live: liveConnections,
}),
).toEqual({
changed: true,
mappings: [
{
lookerConnectionName: 'b2b_sandbox_bq',
ktxConnectionId: 'warehouse',
lookerHost: 'warehouse.example.com',
lookerDatabase: 'analytics',
lookerDialect: 'bigquery_standard_sql',
},
{
lookerConnectionName: 'pg_runtime',
ktxConnectionId: null,
lookerHost: 'pg.internal:5432',
lookerDatabase: 'app',
lookerDialect: 'postgres',
},
],
});
});
});
describe('computeLookerMappingDrift and validateLookerMappings', () => {
it('reports unmapped live connections, stale stored mappings, and in-sync mappings', () => {
expect(
computeLookerMappingDrift({
storedMappings: [
{
lookerConnectionName: 'b2b_sandbox_bq',
ktxConnectionId: 'warehouse',
lookerHost: null,
lookerDatabase: null,
lookerDialect: null,
},
{
lookerConnectionName: 'stale_runtime',
ktxConnectionId: 'warehouse',
lookerHost: null,
lookerDatabase: null,
lookerDialect: null,
},
],
discovered: liveConnections,
}),
).toEqual({
unmappedDiscovered: [liveConnections[1]],
staleMappings: [{ lookerConnectionName: 'stale_runtime', reason: 'looker_connection_not_found' }],
inSync: [{ lookerConnectionName: 'b2b_sandbox_bq', ktxConnectionId: 'warehouse' }],
});
});
it('validates missing and unsupported target connection ids', () => {
expect(
validateLookerMappings({
mappings: [
{
lookerConnectionName: 'b2b_sandbox_bq',
ktxConnectionId: 'missing',
lookerHost: null,
lookerDatabase: null,
lookerDialect: null,
},
{
lookerConnectionName: 'pg_runtime',
ktxConnectionId: 'looker-target',
lookerHost: null,
lookerDatabase: null,
lookerDialect: null,
},
],
knownKtxConnectionIds: new Set(['looker-target']),
knownConnectionTypes: new Map([['looker-target', 'LOOKER']]),
}),
).toEqual({
ok: false,
errors: [
{ key: 'b2b_sandbox_bq', reason: 'KTX connection missing does not exist' },
{
key: 'pg_runtime',
reason: 'Connection type LOOKER cannot be used as a Looker warehouse mapping target',
},
],
});
});
});
describe('collectExploreParseItems and projectParsedIdentifier', () => {
it('collects base explore and join parser inputs for mapped explores', () => {
expect(
collectExploreParseItems({
explore: mappedExplore,
connectionMappings: { b2b_sandbox_bq: 'warehouse' },
targetConnections: new Map([['warehouse', { id: 'warehouse', connection_type: 'BIGQUERY' }]]),
}),
).toEqual({
parsedTargetTables: {},
parseItems: [
{
key: 'b2b.sales_pipeline',
sql_table_name: 'proj.analytics.opportunities AS opportunities',
dialect: 'bigquery',
},
{
key: 'b2b.sales_pipeline.accounts',
sql_table_name: 'proj.analytics.accounts',
dialect: 'bigquery',
},
],
});
});
it('projects successful and failed parser rows into KTX parsed target tables', () => {
expect(
projectParsedIdentifier({
ok: true,
catalog: 'proj',
schema: 'analytics',
name: 'accounts',
canonical_table: 'proj.analytics.accounts',
}),
).toEqual({
ok: true,
catalog: 'proj',
schema: 'analytics',
name: 'accounts',
canonicalTable: 'proj.analytics.accounts',
});
expect(projectParsedIdentifier({ ok: false, reason: 'derived_table_not_supported' })).toEqual({
ok: false,
reason: 'derived_table_not_supported',
});
});
});
describe('buildLookerPullConfigFromInputs', () => {
it('builds the hosted-equivalent Looker pull config from caller-loaded inputs', async () => {
const parser = {
parse: vi.fn().mockResolvedValue({
'b2b.sales_pipeline': {
ok: true,
catalog: 'proj',
schema: 'analytics',
name: 'opportunities',
canonical_table: 'proj.analytics.opportunities',
},
'b2b.sales_pipeline.accounts': {
ok: true,
catalog: 'proj',
schema: 'analytics',
name: 'accounts',
canonical_table: 'proj.analytics.accounts',
},
}),
};
const client = {
listLookmlModels: vi.fn().mockResolvedValue(models),
getExplore: vi.fn().mockResolvedValue(mappedExplore),
};
await expect(
buildLookerPullConfigFromInputs({
lookerConnectionId: 'prod-looker',
cursors: {
dashboardsLastSyncedAt: '2026-05-01T00:00:00.000Z',
looksLastSyncedAt: null,
},
refreshedMappings: [
{
lookerConnectionName: 'b2b_sandbox_bq',
ktxConnectionId: 'warehouse',
lookerHost: 'warehouse.example.com',
lookerDatabase: 'analytics',
lookerDialect: 'bigquery_standard_sql',
},
],
targetConnections: new Map([['warehouse', { id: 'warehouse', connection_type: 'BIGQUERY' }]]),
client,
parser,
}),
).resolves.toEqual({
lookerConnectionId: 'prod-looker',
dashboardUpdatedSince: '2026-05-01T00:00:00.000Z',
lookUpdatedSince: null,
connectionMappings: { b2b_sandbox_bq: 'warehouse' },
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
parsedTargetTables: {
'b2b.sales_pipeline': {
ok: true,
catalog: 'proj',
schema: 'analytics',
name: 'opportunities',
canonicalTable: 'proj.analytics.opportunities',
},
'b2b.sales_pipeline.accounts': {
ok: true,
catalog: 'proj',
schema: 'analytics',
name: 'accounts',
canonicalTable: 'proj.analytics.accounts',
},
},
});
});
it('marks parser failures as parse_error without blocking pull-config construction', async () => {
const parser = { parse: vi.fn().mockRejectedValue(new Error('python unavailable')) };
const client = {
listLookmlModels: vi.fn().mockResolvedValue(models),
getExplore: vi.fn().mockResolvedValue(mappedExplore),
};
const config = await buildLookerPullConfigFromInputs({
lookerConnectionId: 'prod-looker',
cursors: { dashboardsLastSyncedAt: null, looksLastSyncedAt: null },
refreshedMappings: [
{
lookerConnectionName: 'b2b_sandbox_bq',
ktxConnectionId: 'warehouse',
lookerHost: null,
lookerDatabase: null,
lookerDialect: null,
},
],
targetConnections: new Map([['warehouse', { id: 'warehouse', connection_type: 'BIGQUERY' }]]),
client,
parser,
});
expect(config.parsedTargetTables).toMatchObject({
'b2b.sales_pipeline': { ok: false, reason: 'parse_error' },
'b2b.sales_pipeline.accounts': { ok: false, reason: 'parse_error' },
});
});
});

View file

@ -1,13 +0,0 @@
import { describe, expect, it } from 'vitest';
import { buildLookerReconcileNotes } from './reconcile.js';
describe('buildLookerReconcileNotes', () => {
it('instructs reconciliation to record subsumed provenance', () => {
expect(buildLookerReconcileNotes()).toEqual([
[
'Looker runtime API-derived SL sources use looker__<model>__<explore>.',
'If the unprefixed file-adapter source <model>__<explore> exists, prefer it in wiki sl_refs, delete or avoid the API-derived source, and call emit_artifact_resolution with actionType="subsumed" for the API raw explore path.',
].join(' '),
]);
});
});

View file

@ -1,101 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { describeLookerScope, hashLookerScope, isPathInLookerScope } from './scope.js';
async function writeJson(stagedDir: string, relPath: string, value: unknown): Promise<void> {
const abs = join(stagedDir, relPath);
await mkdir(join(abs, '..'), { recursive: true });
await writeFile(abs, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
describe('Looker runtime fetch scope', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-scope-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('keeps omitted known-current entity files out of the deletion baseline', () => {
const scope = {
mode: 'incremental' as const,
knownCurrentRawPaths: ['dashboards/10.json', 'dashboards/11.json', 'looks/20.json'],
fetchedRawPaths: ['dashboards/11.json'],
};
expect(isPathInLookerScope('dashboards/10.json', scope)).toBe(false);
expect(isPathInLookerScope('looks/20.json', scope)).toBe(false);
expect(isPathInLookerScope('dashboards/11.json', scope)).toBe(true);
expect(isPathInLookerScope('looks/21.json', scope)).toBe(true);
expect(isPathInLookerScope('signals/dashboard_usage.json', scope)).toBe(true);
expect(isPathInLookerScope('explores/b2b/sales_pipeline.json', scope)).toBe(true);
});
it('keeps omitted unchanged evidence documents out of incremental delete scope', () => {
const scope = {
mode: 'incremental' as const,
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
fetchedRawPaths: ['dashboards/10.json'],
};
expect(isPathInLookerScope('evidence/dashboards/10/page.md', scope)).toBe(true);
expect(isPathInLookerScope('evidence/dashboards/10/metadata.json', scope)).toBe(true);
expect(isPathInLookerScope('evidence/looks/20/page.md', scope)).toBe(false);
expect(isPathInLookerScope('evidence/looks/20/metadata.json', scope)).toBe(false);
});
it('treats full scope as all raw paths in scope', () => {
const scope = {
mode: 'full' as const,
knownCurrentRawPaths: ['dashboards/10.json'],
fetchedRawPaths: ['dashboards/10.json'],
};
expect(isPathInLookerScope('dashboards/10.json', scope)).toBe(true);
expect(isPathInLookerScope('dashboards/99.json', scope)).toBe(true);
expect(isPathInLookerScope('looks/20.json', scope)).toBe(true);
});
it('hashes scope order-insensitively', () => {
const a = hashLookerScope({
mode: 'incremental',
knownCurrentRawPaths: ['looks/20.json', 'dashboards/10.json'],
fetchedRawPaths: ['dashboards/10.json'],
});
const b = hashLookerScope({
mode: 'incremental',
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
fetchedRawPaths: ['dashboards/10.json'],
});
expect(a).toBe(b);
expect(a).toMatch(/^[0-9a-f]{64}$/);
});
it('reads staged scope and returns a SourceAdapter ScopeDescriptor', async () => {
await writeJson(stagedDir, 'looker-scope.json', {
mode: 'incremental',
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
fetchedRawPaths: ['dashboards/10.json'],
});
const descriptor = await describeLookerScope(stagedDir);
expect(descriptor.fingerprint).toMatch(/^[0-9a-f]{64}$/);
expect(descriptor.isPathInScope('dashboards/10.json')).toBe(true);
expect(descriptor.isPathInScope('looks/20.json')).toBe(false);
expect(descriptor.isPathInScope('looks/99.json')).toBe(true);
});
it('falls back to full scope when old fixtures do not have a scope file', async () => {
const descriptor = await describeLookerScope(stagedDir);
expect(descriptor.isPathInScope('dashboards/10.json')).toBe(true);
expect(descriptor.isPathInScope('looks/20.json')).toBe(true);
});
});

View file

@ -1,86 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { listLookerTargetConnectionIds } from './target-connections.js';
describe('listLookerTargetConnectionIds', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'looker-targets-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('collects unique target warehouse IDs from explores, dashboard queries, and Look queries', async () => {
await mkdir(join(stagedDir, 'explores', 'b2b'), { recursive: true });
await mkdir(join(stagedDir, 'dashboards'), { recursive: true });
await mkdir(join(stagedDir, 'looks'), { recursive: true });
await writeFile(
join(stagedDir, 'explores', 'b2b', 'sales_pipeline.json'),
JSON.stringify({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: null,
description: null,
fields: { dimensions: [], measures: [] },
joins: [],
targetWarehouseConnectionId: '22222222-2222-4222-8222-222222222222',
}),
);
await writeFile(
join(stagedDir, 'dashboards', '1.json'),
JSON.stringify({
lookerId: '1',
title: 'Pipeline',
description: null,
folderId: null,
ownerId: null,
updatedAt: null,
tiles: [
{
id: '11',
title: 'ARR',
lookId: null,
query: {
model: 'b2b',
view: 'sales_pipeline',
fields: [],
filters: {},
sorts: [],
targetWarehouseConnectionId: '33333333-3333-4333-8333-333333333333',
},
},
],
}),
);
await writeFile(
join(stagedDir, 'looks', '2.json'),
JSON.stringify({
lookerId: '2',
title: 'Customers',
description: null,
folderId: null,
ownerId: null,
updatedAt: null,
query: {
model: 'b2b',
view: 'sales_pipeline',
fields: [],
filters: {},
sorts: [],
targetWarehouseConnectionId: '22222222-2222-4222-8222-222222222222',
},
}),
);
await expect(listLookerTargetConnectionIds(stagedDir)).resolves.toEqual([
'22222222-2222-4222-8222-222222222222',
'33333333-3333-4333-8333-333333333333',
]);
});
});

View file

@ -1,243 +0,0 @@
import { describe, expect, it } from 'vitest';
import type { ToolOutput } from '../../../../../context/tools/base-tool.js';
import { buildLookerSlProposal, createLookerQueryToSlTool, type LookerSlProposal } from './looker-query-to-sl.tool.js';
describe('buildLookerSlProposal', () => {
it('suggests a measure and segment for an aggregated filtered Looker query', () => {
const proposal = buildLookerSlProposal({
contentTitle: 'Open Pipeline ARR',
contentType: 'look',
usage: { queryCount30d: 42, uniqueUsers30d: 7 },
query: {
model: 'b2b',
view: 'sales_pipeline',
fields: ['opportunities.arr', 'opportunities.stage'],
filters: { 'opportunities.stage': 'open' },
sorts: ['opportunities.arr desc'],
limit: '500',
},
});
expect(proposal.sourceName).toBe('looker__b2b__sales_pipeline');
expect(proposal.triageLane).toBe('full');
expect(proposal.decision).toBe('measure_added');
expect(proposal.measures).toEqual([
{
name: 'arr',
lookerField: 'opportunities.arr',
expr: 'sum(opportunities.arr)',
description: 'Suggested from Looker look "Open Pipeline ARR"; verify against explore field SQL before writing.',
},
]);
expect(proposal.dimensions).toEqual([{ name: 'stage', lookerField: 'opportunities.stage' }]);
expect(proposal.segments).toEqual([
{
name: 'open_pipeline_arr',
filters: { 'opportunities.stage': 'open' },
suggestedPredicate: "opportunities.stage = 'open'",
description: 'Reusable filter candidate from Looker look "Open Pipeline ARR".',
},
]);
expect(proposal.notes).toContain(
'Usage signals can raise priority, but query counts, users, owners, and folders must not be written as wiki narrative.',
);
});
it('keeps simple saved views as wiki-only candidates', () => {
const proposal = buildLookerSlProposal({
contentTitle: 'Accounts By Region',
query: {
model: 'b2b',
view: 'accounts',
fields: ['accounts.region', 'accounts.segment'],
filters: {},
},
});
expect(proposal.sourceName).toBe('looker__b2b__accounts');
expect(proposal.triageLane).toBe('light');
expect(proposal.decision).toBe('wiki_only');
expect(proposal.measures).toEqual([]);
expect(proposal.dimensions).toEqual([
{ name: 'region', lookerField: 'accounts.region' },
{ name: 'segment', lookerField: 'accounts.segment' },
]);
expect(proposal.segments).toEqual([]);
});
it('promotes high-usage filter-only queries as derived-source candidates', () => {
const proposal = buildLookerSlProposal({
contentTitle: 'Active Customers',
usage: { queryCount30d: 15, uniqueUsers30d: 4 },
query: {
model: 'b2b',
view: 'customers',
fields: ['customers.id', 'customers.name'],
filters: { 'customers.status': 'active', 'customers.is_test': '-yes' },
},
});
expect(proposal.sourceName).toBe('looker__b2b__customers');
expect(proposal.decision).toBe('source_created');
expect(proposal.segments).toEqual([
{
name: 'active_customers',
filters: { 'customers.status': 'active', 'customers.is_test': '-yes' },
suggestedPredicate: "customers.status = 'active' AND customers.is_test != 'yes'",
description: 'Reusable filter candidate from Looker look "Active Customers".',
},
]);
});
it('surfaces mapped warehouse target metadata for direct SL writes', () => {
const proposal = buildLookerSlProposal({
contentTitle: 'Open Pipeline ARR',
contentType: 'dashboard_tile',
usage: { queryCount30d: 42, uniqueUsers30d: 7 },
query: {
model: 'b2b',
view: 'sales_pipeline',
fields: ['opportunities.arr', 'opportunities.stage'],
filters: { 'opportunities.stage': 'open' },
targetWarehouseConnectionId: '22222222-2222-4222-8222-222222222222',
targetTable: {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
},
},
});
expect(proposal.sourceName).toBe('looker__b2b__sales_pipeline');
expect(proposal.targetStatus).toBe('mapped');
expect(proposal.targetWarehouseConnectionId).toBe('22222222-2222-4222-8222-222222222222');
expect(proposal.sourceTable).toBe('proj.dataset.opportunities');
expect(proposal.canWriteStandaloneSource).toBe(true);
expect(proposal.targetTable).toEqual({
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
});
expect(proposal.notes).toContain(
'targetTable.ok is true: write or edit SL on targetWarehouseConnectionId using targetTable.canonicalTable as source.table.',
);
});
it('surfaces unmapped and unparseable target reasons for wiki-only fallback', () => {
const unmapped = buildLookerSlProposal({
contentTitle: 'Revenue Trend',
query: {
model: 'b2b',
view: 'revenue',
fields: ['revenue.arr'],
filters: {},
targetWarehouseConnectionId: null,
targetTable: {
ok: false,
reason: 'no_connection_mapping',
},
},
});
expect(unmapped.targetStatus).toBe('unmapped');
expect(unmapped.targetWarehouseConnectionId).toBeNull();
expect(unmapped.sourceTable).toBeNull();
expect(unmapped.canWriteStandaloneSource).toBe(false);
expect(unmapped.notes).toContain(
'targetTable.ok is false (no_connection_mapping): keep this query wiki-only and pass the reason through emit_unmapped_fallback.',
);
const unparseable = buildLookerSlProposal({
contentTitle: 'Templated Source',
query: {
model: 'b2b',
view: 'templated',
fields: ['templated.count'],
filters: {},
targetWarehouseConnectionId: '22222222-2222-4222-8222-222222222222',
targetTable: {
ok: false,
reason: 'looker_template_unresolved',
detail: 'The sql_table_name contains ${derived.SQL_TABLE_NAME}.',
},
},
});
expect(unparseable.targetStatus).toBe('unparseable');
expect(unparseable.targetWarehouseConnectionId).toBe('22222222-2222-4222-8222-222222222222');
expect(unparseable.sourceTable).toBeNull();
expect(unparseable.canWriteStandaloneSource).toBe(false);
expect(unparseable.notes).toContain(
'targetTable.ok is false (looker_template_unresolved): keep this query wiki-only and pass the reason through emit_unmapped_fallback.',
);
});
});
describe('createLookerQueryToSlTool', () => {
it('returns markdown plus the structured proposal', async () => {
const lookerQueryToSl = createLookerQueryToSlTool();
if (!lookerQueryToSl.execute) {
throw new Error('looker_query_to_sl tool must be executable');
}
const output = (await lookerQueryToSl.execute(
{
contentTitle: 'Revenue Trend',
contentType: 'dashboard_tile',
query: {
model: 'finance',
view: 'orders',
fields: ['orders.total_revenue', 'orders.created_month'],
filters: { 'orders.status': 'paid' },
sorts: [],
targetWarehouseConnectionId: null,
targetTable: null,
},
},
{ toolCallId: 'call-1', messages: [] } as never,
)) as ToolOutput<LookerSlProposal>;
expect(output.markdown).toContain('Looker query SL proposal');
expect(output.markdown).toContain('looker__finance__orders');
expect(output.structured.sourceName).toBe('looker__finance__orders');
expect(output.structured.measures[0]?.name).toBe('total_revenue');
});
it('prints target connection and canonical table in markdown output', async () => {
const lookerQueryToSl = createLookerQueryToSlTool();
if (!lookerQueryToSl.execute) {
throw new Error('looker_query_to_sl tool must be executable');
}
const output = (await lookerQueryToSl.execute(
{
contentTitle: 'Revenue Trend',
contentType: 'dashboard_tile',
query: {
model: 'finance',
view: 'orders',
fields: ['orders.total_revenue', 'orders.created_month'],
filters: { 'orders.status': 'paid' },
sorts: [],
targetWarehouseConnectionId: '33333333-3333-4333-8333-333333333333',
targetTable: {
ok: true,
catalog: 'proj',
schema: 'finance',
name: 'orders',
canonicalTable: 'proj.finance.orders',
},
},
},
{ toolCallId: 'call-1', messages: [] } as never,
)) as ToolOutput<LookerSlProposal>;
expect(output.markdown).toContain('- targetStatus: mapped');
expect(output.markdown).toContain('- targetWarehouseConnectionId: 33333333-3333-4333-8333-333333333333');
expect(output.markdown).toContain('- sourceTable: proj.finance.orders');
expect(output.structured.canWriteStandaloneSource).toBe(true);
});
});

View file

@ -1,329 +0,0 @@
import { describe, expect, it } from 'vitest';
import { parsedTargetTableSchema } from '../../parsed-target-table.js';
import {
lookerPullConfigSchema,
parseLookerPullConfig,
stagedDashboardFileSchema,
stagedExploreFileSchema,
stagedLookerFetchIssueSchema,
stagedLookerQuerySchema,
stagedLookerScopeFileSchema,
stagedLookerSignalsFileSchema,
stagedLookFileSchema,
stagedSyncConfigSchema,
} from './types.js';
describe('Looker staged runtime schemas', () => {
it('parses pull config and staged sync config', () => {
expect(
lookerPullConfigSchema.parse({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
instanceBaseUrl: 'https://example.looker.com',
}),
).toEqual({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
instanceBaseUrl: 'https://example.looker.com',
connectionMappings: {},
connectionTypes: {},
parsedTargetTables: {},
});
expect(
stagedSyncConfigSchema.parse({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
fetchedAt: '2026-04-30T12:00:00.000Z',
instanceBaseUrl: 'https://example.looker.com',
}),
).toMatchObject({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
instanceBaseUrl: 'https://example.looker.com',
});
});
it('parses incremental pull cursors and scope manifests', () => {
expect(
parseLookerPullConfig({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
dashboardUpdatedSince: '2026-04-30T10:00:00.000Z',
lookUpdatedSince: '2026-04-30T11:00:00.000Z',
}),
).toEqual({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
dashboardUpdatedSince: '2026-04-30T10:00:00.000Z',
lookUpdatedSince: '2026-04-30T11:00:00.000Z',
connectionMappings: {},
connectionTypes: {},
parsedTargetTables: {},
});
expect(
stagedLookerScopeFileSchema.parse({
mode: 'incremental',
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
fetchedRawPaths: ['dashboards/10.json'],
}),
).toEqual({
mode: 'incremental',
knownCurrentRawPaths: ['dashboards/10.json', 'looks/20.json'],
fetchedRawPaths: ['dashboards/10.json'],
});
expect(
stagedSyncConfigSchema.parse({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
fetchedAt: '2026-04-30T12:30:00.000Z',
previousCursors: {
dashboardsLastSyncedAt: null,
looksLastSyncedAt: '2026-04-30T11:00:00.000Z',
},
nextCursors: {
dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z',
looksLastSyncedAt: '2026-04-30T11:00:00.000Z',
},
}).nextCursors,
).toEqual({
dashboardsLastSyncedAt: '2026-04-30T12:00:00.000Z',
looksLastSyncedAt: '2026-04-30T11:00:00.000Z',
});
});
it('normalizes numeric Looker ids to strings', () => {
const dashboard = stagedDashboardFileSchema.parse({
lookerId: 10,
title: 'Sales Pipeline',
description: null,
folderId: 7,
ownerId: 3,
updatedAt: '2026-04-30T12:00:00.000Z',
tiles: [{ id: 100, title: 'ARR', lookId: null, query: { model: 'b2b', view: 'sales_pipeline' } }],
});
expect(dashboard.lookerId).toBe('10');
expect(dashboard.folderId).toBe('7');
expect(dashboard.ownerId).toBe('3');
expect(dashboard.tiles[0].id).toBe('100');
});
it('parses explores, looks, and signal files with defaults', () => {
expect(
stagedExploreFileSchema.parse({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
fields: {
dimensions: [{ name: 'opportunities.id', label: 'Opportunity ID', type: 'number', sql: '${TABLE}.id' }],
measures: [{ name: 'opportunities.arr', label: 'ARR', type: 'sum', sql: '${TABLE}.arr' }],
},
joins: [{ name: 'accounts', type: 'left_outer', relationship: 'many_to_one' }],
}),
).toMatchObject({
modelName: 'b2b',
exploreName: 'sales_pipeline',
fields: { dimensions: [{ name: 'opportunities.id' }], measures: [{ name: 'opportunities.arr' }] },
});
expect(
stagedLookFileSchema.parse({
lookerId: '20',
title: 'Open Pipeline',
description: null,
folderId: null,
ownerId: null,
updatedAt: null,
query: { model: 'b2b', view: 'sales_pipeline', fields: ['opportunities.arr'] },
}),
).toMatchObject({ lookerId: '20', query: { fields: ['opportunities.arr'] } });
expect(stagedLookerSignalsFileSchema.parse({}).dashboardUsage).toEqual([]);
});
it('parses warehouse SL mapping pull config and staged target table fields', () => {
const targetConnectionId = '22222222-2222-4222-8222-222222222222';
const parsedTargetTable = {
ok: true as const,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
};
expect(parsedTargetTableSchema.parse(parsedTargetTable)).toEqual(parsedTargetTable);
expect(
parseLookerPullConfig({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
connectionMappings: { b2b_sandbox_bq: targetConnectionId },
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
parsedTargetTables: { 'b2b.sales_pipeline': parsedTargetTable },
}),
).toEqual({
lookerConnectionId: '11111111-1111-4111-8111-111111111111',
connectionMappings: { b2b_sandbox_bq: targetConnectionId },
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
parsedTargetTables: { 'b2b.sales_pipeline': parsedTargetTable },
});
expect(
stagedExploreFileSchema.parse({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
rawSqlTableName: 'proj.dataset.opportunities AS opportunities',
connectionName: 'b2b_sandbox_bq',
viewName: 'opportunities',
fields: {
dimensions: [{ name: 'opportunities.id', label: 'Opportunity ID', type: 'number', sql: '${TABLE}.id' }],
measures: [{ name: 'opportunities.arr', label: 'ARR', type: 'sum', sql: '${TABLE}.arr' }],
},
joins: [
{
name: 'accounts',
type: 'left_outer',
relationship: 'many_to_one',
rawSqlTableName: 'proj.dataset.accounts',
sqlOn: '${opportunities.account_id} = ${accounts.id}',
from: null,
targetTable: {
ok: true,
catalog: 'proj',
schema: 'dataset',
name: 'accounts',
canonicalTable: 'proj.dataset.accounts',
},
},
],
targetWarehouseConnectionId: targetConnectionId,
targetTable: parsedTargetTable,
}),
).toMatchObject({
modelName: 'b2b',
exploreName: 'sales_pipeline',
connectionName: 'b2b_sandbox_bq',
targetWarehouseConnectionId: targetConnectionId,
targetTable: parsedTargetTable,
joins: [{ name: 'accounts', targetTable: { ok: true, name: 'accounts' } }],
});
});
it('parses structured Looker mapping fetch warnings', () => {
expect(
stagedLookerFetchIssueSchema.parse({
rawPath: 'looker_connection_mappings/b2b_sandbox_bq',
entityType: 'looker_connection_mapping',
entityId: 'b2b_sandbox_bq',
severity: 'warning',
statusCode: null,
message: 'Looker connection b2b_sandbox_bq is not mapped to a warehouse connection.',
retryRecommended: false,
kind: 'unmapped_looker_connection',
details: {
lookerConnectionName: 'b2b_sandbox_bq',
affectedExplores: ['b2b.sales_pipeline'],
},
}),
).toMatchObject({
entityType: 'looker_connection_mapping',
kind: 'unmapped_looker_connection',
details: {
lookerConnectionName: 'b2b_sandbox_bq',
affectedExplores: ['b2b.sales_pipeline'],
},
});
});
it('parses LookML model listing warnings in fetch reports', () => {
expect(
stagedLookerFetchIssueSchema.parse({
rawPath: 'lookml_models.json',
entityType: 'lookml_models',
entityId: null,
severity: 'warning',
statusCode: 403,
message: 'LookML model access denied',
retryRecommended: false,
}),
).toEqual({
rawPath: 'lookml_models.json',
entityType: 'lookml_models',
entityId: null,
severity: 'warning',
statusCode: 403,
message: 'LookML model access denied',
retryRecommended: false,
});
});
it('accepts slug-shaped connection ids inside KTX Looker runtime schemas', () => {
const parsedTargetTable = {
ok: true as const,
catalog: 'proj',
schema: 'dataset',
name: 'opportunities',
canonicalTable: 'proj.dataset.opportunities',
};
expect(
parseLookerPullConfig({
lookerConnectionId: 'prod-looker',
connectionMappings: { b2b_sandbox_bq: 'prod-warehouse' },
connectionTypes: { b2b_sandbox_bq: 'BIGQUERY' },
parsedTargetTables: { 'b2b.sales_pipeline': parsedTargetTable },
}),
).toMatchObject({
lookerConnectionId: 'prod-looker',
connectionMappings: { b2b_sandbox_bq: 'prod-warehouse' },
});
expect(
stagedSyncConfigSchema.parse({
lookerConnectionId: 'prod-looker',
fetchedAt: '2026-04-30T12:00:00.000Z',
}),
).toMatchObject({
lookerConnectionId: 'prod-looker',
});
expect(
stagedLookerQuerySchema.parse({
model: 'b2b',
view: 'sales_pipeline',
targetWarehouseConnectionId: 'prod-warehouse',
targetTable: parsedTargetTable,
}),
).toMatchObject({
targetWarehouseConnectionId: 'prod-warehouse',
targetTable: parsedTargetTable,
});
expect(
stagedExploreFileSchema.parse({
modelName: 'b2b',
exploreName: 'sales_pipeline',
label: 'Sales Pipeline',
description: null,
fields: { dimensions: [], measures: [] },
targetWarehouseConnectionId: 'prod-warehouse',
targetTable: parsedTargetTable,
}),
).toMatchObject({
targetWarehouseConnectionId: 'prod-warehouse',
targetTable: parsedTargetTable,
});
});
it('rejects unsafe KTX Looker connection ids', () => {
expect(() =>
parseLookerPullConfig({
lookerConnectionId: '../prod-looker',
}),
).toThrow();
expect(() =>
parseLookerPullConfig({
connectionMappings: { b2b_sandbox_bq: 'prod/warehouse' },
}),
).toThrow();
});
});

View file

@ -1,230 +0,0 @@
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import { chunkLookmlProject } from './chunk.js';
import { type ParsedLookmlProject, parseLookmlStagedDir } from './parse.js';
const FIXTURE_ROOT = join(__dirname, '../../../../test/fixtures/lookml');
describe('chunkLookmlProject — first run', () => {
it('single-model bundle → 1 WU with model + all views in rawFiles', async () => {
const stagedDir = join(FIXTURE_ROOT, 'single-model');
const project = await parseLookmlStagedDir(stagedDir);
const result = chunkLookmlProject(project);
expect(result.workUnits).toHaveLength(1);
const wu = result.workUnits[0];
expect(wu.unitKey).toBe('lookml-orders');
expect(wu.rawFiles.sort()).toEqual(['orders.model.lkml', 'views/customers.view.lkml', 'views/orders.view.lkml']);
expect(wu.peerFileIndex).toEqual([]);
expect(wu.dependencyPaths).toEqual([]);
expect(result.eviction).toBeUndefined();
});
it('multi-model bundle → 1 WU per model; shared view owned by lex-first model; others see it in dependencyPaths + peerFileIndex is pathless-index', async () => {
const stagedDir = join(FIXTURE_ROOT, 'multi-model');
const project = await parseLookmlStagedDir(stagedDir);
const result = chunkLookmlProject(project);
expect(result.workUnits).toHaveLength(2);
const marketing = result.workUnits.find((wu) => wu.unitKey === 'lookml-marketing');
const orders = result.workUnits.find((wu) => wu.unitKey === 'lookml-orders');
expect(marketing).toBeDefined();
expect(orders).toBeDefined();
if (!marketing || !orders) {
throw new Error('expected marketing and orders work units');
}
// marketing sorts before orders → marketing owns shared_dims
expect(marketing.rawFiles).toContain('views/shared_dims.view.lkml');
expect(marketing.rawFiles).toContain('views/campaigns.view.lkml');
expect(marketing.rawFiles).toContain('marketing.model.lkml');
expect(marketing.rawFiles).not.toContain('views/orders.view.lkml');
expect(marketing.dependencyPaths).toEqual([]);
// orders does NOT own shared_dims — it's in dependencyPaths (read-only upstream).
expect(orders.rawFiles).not.toContain('views/shared_dims.view.lkml');
expect(orders.dependencyPaths).toEqual(['views/shared_dims.view.lkml']);
expect(orders.rawFiles).toContain('views/orders.view.lkml');
expect(orders.rawFiles).toContain('orders.model.lkml');
// Each WU's peerFileIndex lists the OTHER model's files (paths-only index).
expect(orders.peerFileIndex).toContain('marketing.model.lkml');
expect(orders.peerFileIndex).toContain('views/campaigns.view.lkml');
// Dependency paths should not be duplicated into peerFileIndex.
expect(orders.peerFileIndex).not.toContain('views/shared_dims.view.lkml');
});
it('extends-chain fixture: single WU contains base + orders + orders_ext; chain order visible via graph', async () => {
const stagedDir = join(FIXTURE_ROOT, 'extends-chain');
const project = await parseLookmlStagedDir(stagedDir);
const result = chunkLookmlProject(project);
// One model ("orders") includes views/*.view.lkml — so all three views land in its WU.
expect(result.workUnits).toHaveLength(1);
const wu = result.workUnits[0];
expect(wu.unitKey).toBe('lookml-orders');
expect(wu.rawFiles.sort()).toEqual([
'orders.model.lkml',
'views/base.view.lkml',
'views/orders.view.lkml',
'views/orders_ext.view.lkml',
]);
expect(wu.dependencyPaths).toEqual([]); // all ancestors already in rawFiles on first run
expect(wu.notes).toMatch(/orders/);
});
it('is deterministic: two calls on the same project return structurally identical WorkUnits', async () => {
const stagedDir = join(FIXTURE_ROOT, 'multi-model');
const project = await parseLookmlStagedDir(stagedDir);
const r1 = chunkLookmlProject(project);
const r2 = chunkLookmlProject(project);
expect(r1.workUnits).toEqual(r2.workUnits);
});
it('unitKey is model-name-derived (stable across parse+chunk cycles and across re-syncs)', async () => {
const project = await parseLookmlStagedDir(join(FIXTURE_ROOT, 'multi-model'));
const { workUnits } = chunkLookmlProject(project);
expect(workUnits.map((wu) => wu.unitKey).sort()).toEqual(['lookml-marketing', 'lookml-orders']);
});
it('marks mismatched model WorkUnits as SL-disallowed and keeps wiki ingest enabled', () => {
const project: ParsedLookmlProject = {
models: [
{
path: 'b2b.model.lkml',
name: 'b2b',
includes: ['views/orders.view.lkml'],
explores: ['orders'],
connectionName: 'wrong_connection',
},
],
views: [{ path: 'views/orders.view.lkml', name: 'orders', extendsFrom: [], rawSqlTableName: 'public.orders' }],
dashboards: [],
allPaths: ['b2b.model.lkml', 'views/orders.view.lkml'],
};
const result = chunkLookmlProject(project, { mismatchedModelNames: new Set(['b2b']) });
const wu = result.workUnits[0];
expect(wu.unitKey).toBe('lookml-b2b');
expect(wu.rawFiles).toEqual(['b2b.model.lkml', 'views/orders.view.lkml']);
expect(wu.slDisallowed).toBe(true);
expect(wu.slDisallowedReason).toBe('lookml_connection_mismatch');
expect(wu.notes).toContain('[LOOKML SL WRITES DISALLOWED]');
expect(wu.notes).toContain('reason: lookml_connection_mismatch');
expect(wu.notes).toContain('Do not call sl_write_source or sl_edit_source for this WorkUnit.');
});
});
describe('chunkLookmlProject — re-sync', () => {
it("modified file in one model only emits that model's WU", async () => {
const stagedDir = join(FIXTURE_ROOT, 'multi-model');
const project = await parseLookmlStagedDir(stagedDir);
const result = chunkLookmlProject(project, {
diffSet: {
added: [],
modified: ['views/campaigns.view.lkml'],
deleted: [],
unchanged: [
'marketing.model.lkml',
'orders.model.lkml',
'views/orders.view.lkml',
'views/shared_dims.view.lkml',
],
},
});
expect(result.workUnits).toHaveLength(1);
expect(result.workUnits[0].unitKey).toBe('lookml-marketing');
});
it("added file under a model emits that model's WU with the new path in rawFiles", async () => {
const stagedDir = join(FIXTURE_ROOT, 'single-model');
const project = await parseLookmlStagedDir(stagedDir);
const result = chunkLookmlProject(project, {
diffSet: {
added: ['views/customers.view.lkml'],
modified: [],
deleted: [],
unchanged: ['orders.model.lkml', 'views/orders.view.lkml'],
},
});
expect(result.workUnits).toHaveLength(1);
expect(result.workUnits[0].rawFiles).toContain('views/customers.view.lkml');
});
it('widens dependencyPaths with transitive extends ancestors on re-sync', async () => {
const stagedDir = join(FIXTURE_ROOT, 'extends-chain');
const project = await parseLookmlStagedDir(stagedDir);
// Only orders_ext is touched; base and orders are upstream ancestors.
// Because the single-model WU's rawFiles ALREADY include all three on first run,
// they remain in rawFiles — dependencyPaths stays empty. Widening matters when
// re-sync drops some files from rawFiles, which doesn't apply for a monolithic
// single-model WU. Assert the baseline invariant.
const result = chunkLookmlProject(project, {
diffSet: {
added: [],
modified: ['views/orders_ext.view.lkml'],
deleted: [],
unchanged: ['orders.model.lkml', 'views/base.view.lkml', 'views/orders.view.lkml'],
},
});
expect(result.workUnits).toHaveLength(1);
const wu = result.workUnits[0];
expect(wu.rawFiles).toContain('views/orders_ext.view.lkml');
// Ancestors already in rawFiles → not duplicated into dependencyPaths.
expect(wu.dependencyPaths).toEqual([]);
});
it('widens dependencyPaths when an ancestor is OUTSIDE the WU (synthesized cross-model case)', () => {
// Synthesize a scenario in-memory: two models, "a" owns base.view.lkml,
// "b" owns derived.view.lkml which extends base. A diff that only touches
// derived.view.lkml should widen b's WU with base.view.lkml in dependencyPaths
// if base lives outside b's rawFiles. In practice with the current emit rules,
// base.view.lkml would already be in dependencyPaths because model b lists
// base.view.lkml under its `include:`. Here we confirm the widening is idempotent.
const project: ParsedLookmlProject = {
models: [
{ path: 'a.model.lkml', name: 'a', includes: ['views/base.view.lkml'], explores: [], connectionName: null },
{
path: 'b.model.lkml',
name: 'b',
includes: ['views/base.view.lkml', 'views/derived.view.lkml'],
explores: [],
connectionName: null,
},
],
views: [
{ path: 'views/base.view.lkml', name: 'base', extendsFrom: [], rawSqlTableName: null },
{ path: 'views/derived.view.lkml', name: 'derived', extendsFrom: ['base'], rawSqlTableName: null },
],
dashboards: [],
allPaths: ['a.model.lkml', 'b.model.lkml', 'views/base.view.lkml', 'views/derived.view.lkml'],
};
const result = chunkLookmlProject(project, {
diffSet: {
added: [],
modified: ['views/derived.view.lkml'],
deleted: [],
unchanged: ['a.model.lkml', 'b.model.lkml', 'views/base.view.lkml'],
},
});
const b = result.workUnits.find((wu) => wu.unitKey === 'lookml-b');
expect(b).toBeDefined();
if (!b) {
throw new Error('expected lookml-b work unit');
}
expect(b.dependencyPaths).toContain('views/base.view.lkml');
});
it('passes through diffSet.deleted as an EvictionUnit', async () => {
const project = await parseLookmlStagedDir(join(FIXTURE_ROOT, 'single-model'));
const result = chunkLookmlProject(project, {
diffSet: {
added: [],
modified: [],
deleted: ['views/zombie.view.lkml'],
unchanged: ['orders.model.lkml', 'views/customers.view.lkml', 'views/orders.view.lkml'],
},
});
expect(result.eviction).toEqual({ deletedRawPaths: ['views/zombie.view.lkml'] });
// No WU emitted because no current files are touched.
expect(result.workUnits).toEqual([]);
});
});

View file

@ -1,46 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { detectLookmlStagedDir } from './detect.js';
describe('detectLookmlStagedDir', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'lkml-detect-'));
});
afterEach(async () => rm(stagedDir, { recursive: true, force: true }));
it('returns true when a .model.lkml is present at root', async () => {
await writeFile(join(stagedDir, 'orders.model.lkml'), 'include: "views/*"\n', 'utf-8');
expect(await detectLookmlStagedDir(stagedDir)).toBe(true);
});
it('returns true when only a .view.lkml is present (no model)', async () => {
await writeFile(join(stagedDir, 'x.view.lkml'), 'view: x {}\n', 'utf-8');
expect(await detectLookmlStagedDir(stagedDir)).toBe(true);
});
it('returns true when .lkml files are nested under any subdirectory', async () => {
await mkdir(join(stagedDir, 'nested', 'deeper'), { recursive: true });
await writeFile(join(stagedDir, 'nested', 'deeper', 'x.view.lkml'), 'view: x {}\n', 'utf-8');
expect(await detectLookmlStagedDir(stagedDir)).toBe(true);
});
it('accepts the .lookml extension as well as .lkml', async () => {
await writeFile(join(stagedDir, 'x.view.lookml'), 'view: x {}\n', 'utf-8');
expect(await detectLookmlStagedDir(stagedDir)).toBe(true);
});
it('returns false for a bundle with no .lkml files at all', async () => {
await writeFile(join(stagedDir, 'README.md'), '# hi\n', 'utf-8');
await writeFile(join(stagedDir, 'config.yaml'), 'a: 1\n', 'utf-8');
expect(await detectLookmlStagedDir(stagedDir)).toBe(false);
});
it('returns false for an empty directory', async () => {
expect(await detectLookmlStagedDir(stagedDir)).toBe(false);
});
});

View file

@ -1,113 +0,0 @@
import { mkdtemp, readFile, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import type { ParsedLookmlProject } from './parse.js';
import {
LOOKML_FETCH_REPORT_FILE,
LOOKML_MISMATCHED_MODELS_FILE,
buildLookmlValidationArtifacts,
readLookmlFetchReport,
readLookmlMismatchedModelNames,
writeLookmlValidationArtifacts,
} from './fetch-report.js';
function project(models: ParsedLookmlProject['models']): ParsedLookmlProject {
return { models, views: [], dashboards: [], allPaths: models.map((m) => m.path) };
}
describe('LookML validation fetch report', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'lookml-report-'));
});
afterEach(async () => rm(stagedDir, { recursive: true, force: true }));
it('emits partial warning artifacts for mismatched model connection names', async () => {
const artifacts = buildLookmlValidationArtifacts(
project([
{
path: 'b2b.model.lkml',
name: 'b2b',
includes: [],
explores: ['orders'],
connectionName: 'staging_pg',
},
{
path: 'finance.model.lkml',
name: 'finance',
includes: [],
explores: ['revenue'],
connectionName: 'b2b_sandbox_bq',
},
]),
{ expectedLookerConnectionName: 'b2b_sandbox_bq' },
);
expect(artifacts.mismatchedModelNames).toEqual(['b2b']);
expect(artifacts.report.status).toBe('partial');
expect(artifacts.report.warnings).toEqual([
{
rawPath: 'b2b.model.lkml',
entityType: 'lookml_models',
entityId: 'b2b',
severity: 'warning',
statusCode: null,
message:
'LookML model b2b declares connection staging_pg but this warehouse expects b2b_sandbox_bq; SL writes are disabled for this model.',
retryRecommended: false,
kind: 'lookml_connection_mismatch',
details: { model: 'b2b', declared: 'staging_pg', expected: 'b2b_sandbox_bq' },
},
]);
});
it('emits success when no expected connection is configured', () => {
const artifacts = buildLookmlValidationArtifacts(
project([
{
path: 'b2b.model.lkml',
name: 'b2b',
includes: [],
explores: [],
connectionName: 'staging_pg',
},
]),
{ expectedLookerConnectionName: null },
);
expect(artifacts.mismatchedModelNames).toEqual([]);
expect(artifacts.report).toEqual({
status: 'success',
retryRecommended: false,
skipped: [],
warnings: [],
});
});
it('round-trips the fetch report and mismatched model sidecar', async () => {
const artifacts = buildLookmlValidationArtifacts(
project([
{
path: 'orders.model.lkml',
name: 'orders',
includes: [],
explores: [],
connectionName: 'wrong',
},
]),
{ expectedLookerConnectionName: 'expected' },
);
await writeLookmlValidationArtifacts(stagedDir, artifacts);
await expect(readFile(join(stagedDir, LOOKML_FETCH_REPORT_FILE), 'utf-8')).resolves.toContain(
'lookml_connection_mismatch',
);
await expect(readFile(join(stagedDir, LOOKML_MISMATCHED_MODELS_FILE), 'utf-8')).resolves.toContain('orders');
await expect(readLookmlFetchReport(stagedDir)).resolves.toEqual(artifacts.report);
await expect(readLookmlMismatchedModelNames(stagedDir)).resolves.toEqual(new Set(['orders']));
});
});

View file

@ -1,146 +0,0 @@
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { makeLocalGitRepo } from '../../../test/make-local-git-repo.js';
import { fetchLookmlRepo } from './fetch.js';
import type { LookmlPullConfig } from './pull-config.js';
const FIXTURE_ROOT = join(__dirname, '../../../../test/fixtures/lookml');
function pullConfig(overrides: Partial<LookmlPullConfig> & Pick<LookmlPullConfig, 'repoUrl'>): LookmlPullConfig {
return {
branch: 'main',
path: null,
authToken: null,
expectedLookerConnectionName: null,
parsedTargetTables: {},
...overrides,
};
}
describe('fetchLookmlRepo', () => {
let tmpRoot: string;
beforeEach(async () => {
tmpRoot = await mkdtemp(join(tmpdir(), 'fetch-lookml-'));
});
afterEach(async () => rm(tmpRoot, { recursive: true, force: true }));
it('clones a local file:// repo and materializes only .lkml/.lookml files into stagedDir', async () => {
const repo = await makeLocalGitRepo(join(FIXTURE_ROOT, 'single-model'), join(tmpRoot, 'origin'));
// Add a non-LookML file to prove we filter it out.
await repo.writeFile('README.md', '# readme\n');
await repo.commit('add readme');
const stagedDir = join(tmpRoot, 'staged');
const cacheDir = join(tmpRoot, 'cache', 'conn-1');
await mkdir(stagedDir, { recursive: true });
const result = await fetchLookmlRepo({
config: pullConfig({ repoUrl: repo.repoUrl }),
cacheDir,
stagedDir,
});
expect(result.filesCopied).toBe(3); // orders.model.lkml + 2 views
expect(result.commitHash).toMatch(/^[0-9a-f]{40}$/);
await expect(readFile(join(stagedDir, 'orders.model.lkml'), 'utf-8')).resolves.toMatch(/connection:/);
await expect(readFile(join(stagedDir, 'views', 'orders.view.lkml'), 'utf-8')).resolves.toMatch(/view: orders/);
// README.md is present in the cache but NOT in stagedDir.
await expect(readFile(join(stagedDir, 'README.md'), 'utf-8')).rejects.toThrow();
await expect(readFile(join(cacheDir, 'README.md'), 'utf-8')).resolves.toMatch(/readme/);
});
it('pulls an existing cache dir (second call) and surfaces the new commit', async () => {
const repo = await makeLocalGitRepo(join(FIXTURE_ROOT, 'single-model'), join(tmpRoot, 'origin'));
const stagedDir1 = join(tmpRoot, 'staged-1');
const stagedDir2 = join(tmpRoot, 'staged-2');
const cacheDir = join(tmpRoot, 'cache', 'conn-1');
await mkdir(stagedDir1, { recursive: true });
await mkdir(stagedDir2, { recursive: true });
const r1 = await fetchLookmlRepo({
config: pullConfig({ repoUrl: repo.repoUrl }),
cacheDir,
stagedDir: stagedDir1,
});
// Commit a new revision in the origin — a modified view.
await repo.writeFile('views/orders.view.lkml', 'view: orders { sql_table_name: public.orders_v2 ;; }\n');
await repo.commit('bump');
const r2 = await fetchLookmlRepo({
config: pullConfig({ repoUrl: repo.repoUrl }),
cacheDir,
stagedDir: stagedDir2,
});
expect(r2.commitHash).not.toBe(r1.commitHash);
await expect(readFile(join(stagedDir2, 'views', 'orders.view.lkml'), 'utf-8')).resolves.toMatch(/orders_v2/);
});
it('respects config.path — only files under that subtree land in stagedDir', async () => {
// Build a multi-subdir repo: models/... + views/...
const originRoot = join(tmpRoot, 'origin');
await mkdir(originRoot, { recursive: true });
await mkdir(join(originRoot, 'fixture-src', 'models'), { recursive: true });
await mkdir(join(originRoot, 'fixture-src', 'views'), { recursive: true });
await writeFile(join(originRoot, 'fixture-src', 'models', 'orders.model.lkml'), 'connection: "c"\n', 'utf-8');
await writeFile(join(originRoot, 'fixture-src', 'views', 'orders.view.lkml'), 'view: orders {}\n', 'utf-8');
const repo = await makeLocalGitRepo(join(originRoot, 'fixture-src'), join(originRoot, 'git'));
const stagedDir = join(tmpRoot, 'staged');
const cacheDir = join(tmpRoot, 'cache', 'conn-path');
await mkdir(stagedDir, { recursive: true });
const result = await fetchLookmlRepo({
config: pullConfig({ repoUrl: repo.repoUrl, path: 'views' }),
cacheDir,
stagedDir,
});
expect(result.filesCopied).toBe(1);
await expect(readFile(join(stagedDir, 'orders.view.lkml'), 'utf-8')).resolves.toMatch(/view: orders/);
// The model under `models/` is NOT copied because we scoped to `views/`.
await expect(readFile(join(stagedDir, 'orders.model.lkml'), 'utf-8')).rejects.toThrow();
});
it('falls back to fresh clone when the cache dir is corrupt', async () => {
const repo = await makeLocalGitRepo(join(FIXTURE_ROOT, 'single-model'), join(tmpRoot, 'origin'));
const stagedDir = join(tmpRoot, 'staged');
const cacheDir = join(tmpRoot, 'cache', 'conn-bad');
await mkdir(stagedDir, { recursive: true });
// Pre-create a cacheDir that looks like a git repo but is corrupt.
await mkdir(join(cacheDir, '.git'), { recursive: true });
await writeFile(join(cacheDir, '.git', 'HEAD'), 'garbage\n', 'utf-8');
const result = await fetchLookmlRepo({
config: pullConfig({ repoUrl: repo.repoUrl }),
cacheDir,
stagedDir,
});
expect(result.filesCopied).toBeGreaterThan(0);
});
it('sanitizes auth tokens out of error messages when clone fails', async () => {
const stagedDir = join(tmpRoot, 'staged');
const cacheDir = join(tmpRoot, 'cache', 'conn-bad-url');
await mkdir(stagedDir, { recursive: true });
await expect(
fetchLookmlRepo({
config: pullConfig({
repoUrl: 'http://definitely-not-a-real-host.test/r.git',
authToken: 'supersecret-token',
}),
cacheDir,
stagedDir,
}),
).rejects.toThrow(
// Error is thrown with sanitized message — the token is replaced by '***'.
// The exact message depends on simple-git's failure mode; we assert the token does NOT appear.
expect.objectContaining({ message: expect.not.stringContaining('supersecret-token') }),
);
});
});

View file

@ -1,118 +0,0 @@
import { describe, expect, it } from 'vitest';
import { buildLookmlGraph } from './graph.js';
import type { ParsedLookmlProject } from './parse.js';
type LooseParsedLookmlProject = Omit<Partial<ParsedLookmlProject>, 'models' | 'views'> & {
models?: Array<Omit<ParsedLookmlProject['models'][number], 'connectionName'> & { connectionName?: string | null }>;
views?: Array<Omit<ParsedLookmlProject['views'][number], 'rawSqlTableName'> & { rawSqlTableName?: string | null }>;
};
const mkProject = (overrides: LooseParsedLookmlProject): ParsedLookmlProject => ({
dashboards: [],
allPaths: [],
...overrides,
models: (overrides.models ?? []).map((model) => ({ connectionName: null, ...model })),
views: (overrides.views ?? []).map((view) => ({ rawSqlTableName: null, ...view })),
});
describe('buildLookmlGraph', () => {
it('assigns a single model as owner of all its included views', () => {
const project = mkProject({
models: [{ path: 'orders.model.lkml', name: 'orders', includes: ['views/*.view.lkml'], explores: ['orders'] }],
views: [
{ path: 'views/orders.view.lkml', name: 'orders', extendsFrom: [] },
{ path: 'views/customers.view.lkml', name: 'customers', extendsFrom: [] },
],
allPaths: ['orders.model.lkml', 'views/customers.view.lkml', 'views/orders.view.lkml'],
});
const graph = buildLookmlGraph(project);
expect(graph.ownerByViewPath.get('views/orders.view.lkml')).toBe('orders');
expect(graph.ownerByViewPath.get('views/customers.view.lkml')).toBe('orders');
expect(graph.viewsIncludedByModel.get('orders')?.sort()).toEqual([
'views/customers.view.lkml',
'views/orders.view.lkml',
]);
});
it('assigns shared views to the lexicographically-first model that includes them', () => {
const project = mkProject({
models: [
{ path: 'marketing.model.lkml', name: 'marketing', includes: ['views/shared.view.lkml'], explores: [] },
{
path: 'orders.model.lkml',
name: 'orders',
includes: ['views/shared.view.lkml', 'views/orders.view.lkml'],
explores: [],
},
],
views: [
{ path: 'views/shared.view.lkml', name: 'shared', extendsFrom: [] },
{ path: 'views/orders.view.lkml', name: 'orders', extendsFrom: [] },
],
allPaths: ['marketing.model.lkml', 'orders.model.lkml', 'views/orders.view.lkml', 'views/shared.view.lkml'],
});
const graph = buildLookmlGraph(project);
// "marketing" sorts before "orders", so marketing owns the shared view.
expect(graph.ownerByViewPath.get('views/shared.view.lkml')).toBe('marketing');
expect(graph.ownerByViewPath.get('views/orders.view.lkml')).toBe('orders');
// Both models list the shared view in their include set:
expect(graph.includersByViewPath.get('views/shared.view.lkml')?.sort()).toEqual(['marketing', 'orders']);
});
it('resolves transitive extends chains into dependency paths', () => {
const project = mkProject({
models: [{ path: 'orders.model.lkml', name: 'orders', includes: ['views/*.view.lkml'], explores: [] }],
views: [
{ path: 'views/base.view.lkml', name: 'base', extendsFrom: [] },
{ path: 'views/orders.view.lkml', name: 'orders', extendsFrom: ['base'] },
{ path: 'views/orders_ext.view.lkml', name: 'orders_ext', extendsFrom: ['orders'] },
],
allPaths: ['orders.model.lkml', 'views/base.view.lkml', 'views/orders.view.lkml', 'views/orders_ext.view.lkml'],
});
const graph = buildLookmlGraph(project);
expect(graph.extendsAncestorsByViewName.get('orders_ext')?.sort()).toEqual(['base', 'orders']);
expect(graph.extendsAncestorsByViewName.get('orders')?.sort()).toEqual(['base']);
expect(graph.extendsAncestorsByViewName.get('base')?.sort()).toEqual([]);
});
it('resolves glob-style include patterns (views/*.view.lkml) against allPaths', () => {
const project = mkProject({
models: [{ path: 'orders.model.lkml', name: 'orders', includes: ['views/*.view.lkml'], explores: [] }],
views: [
{ path: 'views/a.view.lkml', name: 'a', extendsFrom: [] },
{ path: 'views/sub/b.view.lkml', name: 'b', extendsFrom: [] },
],
allPaths: ['orders.model.lkml', 'views/a.view.lkml', 'views/sub/b.view.lkml'],
});
const graph = buildLookmlGraph(project);
// Single-star glob matches one path segment — "views/sub/b.view.lkml" is NOT matched.
expect(graph.viewsIncludedByModel.get('orders')?.sort()).toEqual(['views/a.view.lkml']);
});
it('resolves double-star include patterns (views/**/*.view.lkml) recursively', () => {
const project = mkProject({
models: [{ path: 'orders.model.lkml', name: 'orders', includes: ['views/**/*.view.lkml'], explores: [] }],
views: [
{ path: 'views/a.view.lkml', name: 'a', extendsFrom: [] },
{ path: 'views/sub/b.view.lkml', name: 'b', extendsFrom: [] },
],
allPaths: ['orders.model.lkml', 'views/a.view.lkml', 'views/sub/b.view.lkml'],
});
const graph = buildLookmlGraph(project);
expect(graph.viewsIncludedByModel.get('orders')?.sort()).toEqual(['views/a.view.lkml', 'views/sub/b.view.lkml']);
});
it('leaves a view ownerless when no model includes it', () => {
const project = mkProject({
models: [{ path: 'other.model.lkml', name: 'other', includes: ['views/included.view.lkml'], explores: [] }],
views: [
{ path: 'views/included.view.lkml', name: 'included', extendsFrom: [] },
{ path: 'views/orphan.view.lkml', name: 'orphan', extendsFrom: [] },
],
allPaths: ['other.model.lkml', 'views/included.view.lkml', 'views/orphan.view.lkml'],
});
const graph = buildLookmlGraph(project);
expect(graph.ownerByViewPath.has('views/orphan.view.lkml')).toBe(false);
expect(graph.ownerByViewPath.get('views/included.view.lkml')).toBe('other');
});
});

View file

@ -1,72 +0,0 @@
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { makeLocalGitRepo } from '../../../test/make-local-git-repo.js';
import { LOOKML_FETCH_REPORT_FILE } from './fetch-report.js';
import { LookmlSourceAdapter } from './lookml.adapter.js';
describe('LookmlSourceAdapter validation sidecars', () => {
let tmpRoot: string;
beforeEach(async () => {
tmpRoot = await mkdtemp(join(tmpdir(), 'lookml-adapter-'));
});
afterEach(async () => rm(tmpRoot, { recursive: true, force: true }));
it('returns configured target warehouse connection ids', async () => {
const adapter = new LookmlSourceAdapter({
homeDir: join(tmpRoot, 'home'),
targetConnectionIds: ['warehouse', 'analytics', 'warehouse'],
});
await expect(adapter.listTargetConnectionIds?.(join(tmpRoot, 'staged'))).resolves.toEqual([
'analytics',
'warehouse',
]);
});
it('writes a partial fetch report and marks mismatched chunks as SL-disallowed', async () => {
const originRoot = join(tmpRoot, 'origin-src');
await mkdir(join(originRoot, 'views'), { recursive: true });
await writeFile(
join(originRoot, 'b2b.model.lkml'),
'connection: "wrong_connection"\ninclude: "views/*.view.lkml"\nexplore: orders {}\n',
'utf-8',
);
await writeFile(
join(originRoot, 'views', 'orders.view.lkml'),
'view: orders { sql_table_name: public.orders ;; }\n',
'utf-8',
);
const repo = await makeLocalGitRepo(originRoot, join(tmpRoot, 'origin'));
const stagedDir = join(tmpRoot, 'staged');
await mkdir(stagedDir, { recursive: true });
const adapter = new LookmlSourceAdapter({ homeDir: join(tmpRoot, 'home') });
await adapter.fetch(
{
repoUrl: repo.repoUrl,
branch: 'main',
path: null,
authToken: null,
expectedLookerConnectionName: 'expected_connection',
},
stagedDir,
{ connectionId: '11111111-1111-4111-8111-111111111111', sourceKey: 'lookml' },
);
await expect(readFile(join(stagedDir, LOOKML_FETCH_REPORT_FILE), 'utf-8')).resolves.toContain(
'lookml_connection_mismatch',
);
await expect(adapter.readFetchReport(stagedDir)).resolves.toMatchObject({ status: 'partial' });
const chunks = await adapter.chunk(stagedDir);
expect(chunks.workUnits[0]).toMatchObject({
unitKey: 'lookml-b2b',
slDisallowed: true,
slDisallowedReason: 'lookml_connection_mismatch',
});
});
});

View file

@ -1,166 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { parseLookmlStagedDir } from './parse.js';
describe('parseLookmlStagedDir', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'lkml-parse-'));
});
afterEach(async () => rm(stagedDir, { recursive: true, force: true }));
it('parses a single view file and reports it under views with a relative path', async () => {
await writeFile(
join(stagedDir, 'customers.view.lkml'),
`view: customers {
dimension: id {
type: number
primary_key: yes
sql: \${TABLE}.id ;;
}
}
`,
'utf-8',
);
const result = await parseLookmlStagedDir(stagedDir);
expect(result.views.map((v) => v.path)).toEqual(['customers.view.lkml']);
expect(result.views[0].name).toBe('customers');
expect(result.models).toEqual([]);
expect(result.dashboards).toEqual([]);
});
it('parses a model file and extracts include globs', async () => {
await mkdir(join(stagedDir, 'views'), { recursive: true });
await writeFile(
join(stagedDir, 'orders.model.lkml'),
`connection: "my_bq"
include: "views/*.view.lkml"
explore: orders {}
`,
'utf-8',
);
await writeFile(
join(stagedDir, 'views', 'orders.view.lkml'),
`view: orders {
sql_table_name: public.orders ;;
}
`,
'utf-8',
);
const result = await parseLookmlStagedDir(stagedDir);
expect(result.models.map((m) => m.path)).toEqual(['orders.model.lkml']);
expect(result.models[0].name).toBe('orders');
expect(result.models[0].includes).toEqual(['views/*.view.lkml']);
expect(result.models[0].explores).toEqual(['orders']);
expect(result.views.map((v) => v.path)).toEqual(['views/orders.view.lkml']);
});
it('extracts model connection names and raw view sql_table_name declarations', async () => {
await mkdir(join(stagedDir, 'views'), { recursive: true });
await writeFile(
join(stagedDir, 'b2b.model.lkml'),
`connection: "b2b_sandbox_bq"
include: "views/*.view.lkml"
explore: orders {}
`,
'utf-8',
);
await writeFile(
join(stagedDir, 'views', 'orders.view.lkml'),
`view: orders {
sql_table_name: analytics.orders AS o ;;
}
`,
'utf-8',
);
const result = await parseLookmlStagedDir(stagedDir);
expect(result.models[0]).toMatchObject({
path: 'b2b.model.lkml',
name: 'b2b',
connectionName: 'b2b_sandbox_bq',
});
expect(result.views[0]).toMatchObject({
path: 'views/orders.view.lkml',
name: 'orders',
rawSqlTableName: 'analytics.orders AS o',
});
});
it('captures extends declarations on views', async () => {
await writeFile(
join(stagedDir, 'base.view.lkml'),
`view: base {
dimension: id {
type: number
primary_key: yes
sql: \${TABLE}.id ;;
}
}
`,
'utf-8',
);
await writeFile(
join(stagedDir, 'orders.view.lkml'),
`view: orders {
extends: [base]
sql_table_name: public.orders ;;
}
`,
'utf-8',
);
const result = await parseLookmlStagedDir(stagedDir);
const orders = result.views.find((v) => v.name === 'orders');
expect(orders).toBeDefined();
if (!orders) {
throw new Error('expected orders view');
}
expect(orders.extendsFrom).toEqual(['base']);
});
it('collects .dashboard.lkml files structurally (no deep parsing)', async () => {
await writeFile(join(stagedDir, 'overview.dashboard.lkml'), '- dashboard: overview\n title: Overview\n', 'utf-8');
const result = await parseLookmlStagedDir(stagedDir);
expect(result.dashboards.map((d) => d.path)).toEqual(['overview.dashboard.lkml']);
expect(result.dashboards[0].name).toBe('overview');
});
it('ignores non-.lkml files', async () => {
await writeFile(join(stagedDir, 'README.md'), '# readme\n', 'utf-8');
await writeFile(join(stagedDir, 'notes.txt'), 'note\n', 'utf-8');
const result = await parseLookmlStagedDir(stagedDir);
expect(result.models).toEqual([]);
expect(result.views).toEqual([]);
expect(result.dashboards).toEqual([]);
});
it('returns a sorted deterministic order across runs', async () => {
await writeFile(
join(stagedDir, 'zeta.view.lkml'),
`view: zeta {
}
`,
'utf-8',
);
await writeFile(
join(stagedDir, 'alpha.view.lkml'),
`view: alpha {
}
`,
'utf-8',
);
const r1 = await parseLookmlStagedDir(stagedDir);
const r2 = await parseLookmlStagedDir(stagedDir);
expect(r1.views.map((v) => v.path)).toEqual(['alpha.view.lkml', 'zeta.view.lkml']);
expect(r2.views.map((v) => v.path)).toEqual(r1.views.map((v) => v.path));
});
});

View file

@ -1,140 +0,0 @@
import { describe, expect, it } from 'vitest';
import { parseLookmlPullConfig, pullConfigFromIntegrationConfig } from './pull-config.js';
describe('lookml pull config', () => {
it('parses a minimal valid config with defaulted branch', () => {
const config = parseLookmlPullConfig({ repoUrl: 'https://github.com/acme/r.git' });
expect(config.repoUrl).toBe('https://github.com/acme/r.git');
expect(config.branch).toBe('main');
expect(config.path).toBeNull();
expect(config.authToken).toBeNull();
expect(config.expectedLookerConnectionName).toBeNull();
expect(config.parsedTargetTables).toEqual({});
});
it('defaults expectedLookerConnectionName and parsedTargetTables for LookML pulls', () => {
const config = parseLookmlPullConfig({ repoUrl: 'https://github.com/acme/r.git' });
expect(config.expectedLookerConnectionName).toBeNull();
expect(config.parsedTargetTables).toEqual({});
});
it('parses a fully specified config', () => {
const config = parseLookmlPullConfig({
repoUrl: 'https://gitlab.com/team/proj.git',
branch: 'develop',
path: 'views',
authToken: 'glpat-xyz',
});
expect(config).toEqual({
repoUrl: 'https://gitlab.com/team/proj.git',
branch: 'develop',
path: 'views',
authToken: 'glpat-xyz',
expectedLookerConnectionName: null,
parsedTargetTables: {},
});
});
it('parses the validation-only expected connection and parsed target table map', () => {
const config = parseLookmlPullConfig({
repoUrl: 'https://github.com/acme/r.git',
expectedLookerConnectionName: 'b2b_sandbox_bq',
parsedTargetTables: {
'b2b.orders': {
ok: true,
catalog: 'proj',
schema: 'analytics',
name: 'orders',
canonicalTable: 'proj.analytics.orders',
},
'b2b.derived': {
ok: false,
reason: 'derived_table_not_supported',
},
},
});
expect(config.expectedLookerConnectionName).toBe('b2b_sandbox_bq');
expect(config.parsedTargetTables['b2b.orders']).toEqual({
ok: true,
catalog: 'proj',
schema: 'analytics',
name: 'orders',
canonicalTable: 'proj.analytics.orders',
});
expect(config.parsedTargetTables['b2b.derived']).toEqual({
ok: false,
reason: 'derived_table_not_supported',
});
});
it('rejects a non-URL repoUrl', () => {
expect(() => parseLookmlPullConfig({ repoUrl: 'not-a-url' })).toThrow();
});
it('rejects a missing repoUrl', () => {
expect(() => parseLookmlPullConfig({ branch: 'main' })).toThrow();
});
it('pullConfigFromIntegrationConfig extracts the adapter-visible fields', () => {
const integration = {
pullEnabled: true,
repoUrl: 'https://github.com/acme/r.git',
branch: 'main',
path: 'models',
authToken: 'ghp_x',
pullSchedule: 'daily' as const,
nextPullAt: '2026-05-01T00:00:00.000Z',
lastPulledAt: null,
lastCommitHash: null,
};
expect(pullConfigFromIntegrationConfig(integration)).toEqual({
repoUrl: 'https://github.com/acme/r.git',
branch: 'main',
path: 'models',
authToken: 'ghp_x',
expectedLookerConnectionName: null,
parsedTargetTables: {},
});
});
it('pullConfigFromIntegrationConfig forwards the expected connection name', () => {
const integration = {
pullEnabled: true,
repoUrl: 'https://github.com/acme/r.git',
branch: 'main',
path: 'models',
authToken: 'ghp_x',
pullSchedule: 'daily' as const,
nextPullAt: '2026-05-01T00:00:00.000Z',
lastPulledAt: null,
lastCommitHash: null,
expectedLookerConnectionName: 'warehouse_bq',
};
expect(pullConfigFromIntegrationConfig(integration)).toEqual({
repoUrl: 'https://github.com/acme/r.git',
branch: 'main',
path: 'models',
authToken: 'ghp_x',
expectedLookerConnectionName: 'warehouse_bq',
parsedTargetTables: {},
});
});
it('pullConfigFromIntegrationConfig throws when repoUrl is null', () => {
const integration = {
pullEnabled: false,
repoUrl: null,
branch: null,
path: null,
authToken: null,
pullSchedule: null,
nextPullAt: null,
lastPulledAt: null,
lastCommitHash: null,
};
expect(() => pullConfigFromIntegrationConfig(integration)).toThrow(/repoUrl/);
});
});

View file

@ -1,44 +0,0 @@
import { describe, expect, it } from 'vitest';
import { CardReferenceCycleError, expandCardReferences } from './card-references.js';
describe('expandCardReferences', () => {
const fetchCard = (id: number): Promise<{ native_query: string }> => {
const cards: Record<number, string> = {
100: 'SELECT id FROM base_table',
101: 'SELECT * FROM {{#100}}',
102: 'SELECT * FROM {{#101}} WHERE x = 1',
200: 'SELECT * FROM {{#201}}',
201: 'SELECT * FROM {{#200}}',
};
if (!(id in cards)) {
return Promise.reject(new Error(`no card ${id}`));
}
return Promise.resolve({ native_query: cards[id] });
};
it('returns SQL unchanged when there are no references', async () => {
const result = await expandCardReferences('SELECT 1', { fetchCard });
expect(result).toBe('SELECT 1');
});
it('inlines a single card reference as a subquery', async () => {
const result = await expandCardReferences('SELECT * FROM {{#100}}', { fetchCard });
expect(result).toBe('SELECT * FROM (SELECT id FROM base_table)');
});
it('handles slugged references like {{#100-pretty-slug}}', async () => {
const result = await expandCardReferences('SELECT * FROM {{#100-pretty-slug}}', { fetchCard });
expect(result).toBe('SELECT * FROM (SELECT id FROM base_table)');
});
it('recursively resolves nested references', async () => {
const result = await expandCardReferences('SELECT * FROM {{#102}}', { fetchCard });
expect(result).toBe('SELECT * FROM (SELECT * FROM (SELECT * FROM (SELECT id FROM base_table)) WHERE x = 1)');
});
it('detects cycles and throws CardReferenceCycleError', async () => {
await expect(expandCardReferences('SELECT * FROM {{#200}}', { fetchCard })).rejects.toBeInstanceOf(
CardReferenceCycleError,
);
});
});

View file

@ -1,319 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join, resolve } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { chunkMetabaseStagedDir } from './chunk.js';
import { stagedSyncConfigSchema } from './types.js';
const FIXTURES = resolve(__dirname, '../../../../test/fixtures/metabase');
const SIMPLE = join(FIXTURES, 'simple');
const MULTI = join(FIXTURES, 'multi-collection');
const CARD_REF = join(FIXTURES, 'card-ref');
describe('chunkMetabaseStagedDir — first run', () => {
it('simple fixture emits one WU for collection 5 containing cards + collection file; shared control files in dependencyPaths', async () => {
const result = await chunkMetabaseStagedDir(SIMPLE);
expect(result.workUnits).toHaveLength(1);
const wu = result.workUnits[0];
expect(wu.unitKey).toBe('metabase-col-5');
expect(wu.rawFiles.sort()).toEqual(['cards/1.json', 'cards/2.json', 'collections/5.json']);
expect(wu.dependencyPaths.sort()).toEqual(['databases/42.json', 'sync-config.json']);
expect(wu.peerFileIndex).toEqual([]);
expect(wu.notes).toContain('collection 5');
expect(wu.notes).toContain('2 cards');
});
it('multi-collection fixture emits two WUs — one per collection — deterministic by id', async () => {
const result = await chunkMetabaseStagedDir(MULTI);
expect(result.workUnits).toHaveLength(2);
expect(result.workUnits.map((wu) => wu.unitKey)).toEqual(['metabase-col-5', 'metabase-col-6']);
expect(result.workUnits[0].rawFiles).toContain('cards/1.json');
expect(result.workUnits[0].rawFiles).toContain('cards/2.json');
expect(result.workUnits[0].rawFiles).not.toContain('cards/3.json');
expect(result.workUnits[1].rawFiles).toContain('cards/3.json');
expect(result.workUnits[1].rawFiles).not.toContain('cards/1.json');
// Each WU's peerFileIndex contains the OTHER collection's card files.
expect(result.workUnits[0].peerFileIndex).toContain('cards/3.json');
expect(result.workUnits[1].peerFileIndex).toContain('cards/1.json');
});
it('card-ref fixture: cross-card reference inside the same collection lands in rawFiles, NOT dependencyPaths', async () => {
const result = await chunkMetabaseStagedDir(CARD_REF);
expect(result.workUnits).toHaveLength(1);
const wu = result.workUnits[0];
expect(wu.rawFiles).toContain('cards/10.json');
expect(wu.rawFiles).toContain('cards/11.json');
expect(wu.dependencyPaths).not.toContain('cards/10.json');
expect(wu.dependencyPaths).not.toContain('cards/11.json');
});
it('is deterministic: two identical invocations return structurally-equal WUs', async () => {
const r1 = await chunkMetabaseStagedDir(SIMPLE);
const r2 = await chunkMetabaseStagedDir(SIMPLE);
expect(JSON.stringify(r1)).toBe(JSON.stringify(r2));
});
it('DiffSet re-sync keeps only WUs with a changed card; unchanged siblings land in dependencyPaths', async () => {
const result = await chunkMetabaseStagedDir(SIMPLE, {
diffSet: {
added: [],
modified: ['cards/1.json'],
deleted: [],
unchanged: ['cards/2.json', 'collections/5.json', 'databases/42.json', 'sync-config.json'],
},
});
expect(result.workUnits).toHaveLength(1);
const wu = result.workUnits[0];
expect(wu.rawFiles).toEqual(['cards/1.json']);
expect(wu.dependencyPaths.sort()).toEqual([
'cards/2.json',
'collections/5.json',
'databases/42.json',
'sync-config.json',
]);
});
it('DiffSet re-sync: all-unchanged yields zero WUs and no eviction', async () => {
const result = await chunkMetabaseStagedDir(SIMPLE, {
diffSet: {
added: [],
modified: [],
deleted: [],
unchanged: ['cards/1.json', 'cards/2.json', 'collections/5.json', 'databases/42.json', 'sync-config.json'],
},
});
expect(result.workUnits).toEqual([]);
expect(result.eviction).toBeUndefined();
});
it('DiffSet re-sync: deleted card emits an EvictionUnit', async () => {
const result = await chunkMetabaseStagedDir(SIMPLE, {
diffSet: {
added: [],
modified: [],
deleted: ['cards/1.json'],
unchanged: ['cards/2.json', 'collections/5.json', 'databases/42.json', 'sync-config.json'],
},
});
expect(result.workUnits).toEqual([]);
expect(result.eviction).toEqual({ deletedRawPaths: ['cards/1.json'] });
});
it('DiffSet re-sync: sync-config.json change alone does NOT trigger any WU', async () => {
const result = await chunkMetabaseStagedDir(SIMPLE, {
diffSet: {
added: [],
modified: ['sync-config.json'],
deleted: [],
unchanged: ['cards/1.json', 'cards/2.json', 'collections/5.json', 'databases/42.json'],
},
});
expect(result.workUnits).toEqual([]);
expect(result.eviction).toBeUndefined();
});
it('DiffSet re-sync: databases/{id}.json change alone does NOT trigger any WU', async () => {
const result = await chunkMetabaseStagedDir(SIMPLE, {
diffSet: {
added: [],
modified: ['databases/42.json'],
deleted: [],
unchanged: ['cards/1.json', 'cards/2.json', 'collections/5.json', 'sync-config.json'],
},
});
expect(result.workUnits).toEqual([]);
expect(result.eviction).toBeUndefined();
});
});
async function writeInline(stagedDir: string, rel: string, body: object): Promise<void> {
const abs = join(stagedDir, rel);
await mkdir(join(abs, '..'), { recursive: true });
await writeFile(abs, JSON.stringify(body), 'utf-8');
}
describe('chunkMetabaseStagedDir — selected mode filters non-matching cards', () => {
let dir: string;
beforeEach(async () => {
dir = await mkdtemp(join(tmpdir(), 'mb-chunk-select-'));
});
afterEach(async () => {
await rm(dir, { recursive: true, force: true });
});
it('cards outside selected collections are NOT in any WU', async () => {
await writeInline(dir, 'sync-config.json', {
metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678',
metabaseDatabaseId: 42,
syncMode: 'ONLY',
selections: [{ selectionType: 'collection', metabaseObjectId: 5 }],
defaultTagNames: [],
mapping: {
metabaseDatabaseId: 42,
metabaseDatabaseName: 'Analytics',
metabaseEngine: 'postgres',
targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789',
},
});
await writeInline(dir, 'databases/42.json', {
metabaseDatabaseId: 42,
metabaseDatabaseName: 'Analytics',
metabaseEngine: 'postgres',
targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789',
});
await writeInline(dir, 'collections/5.json', { metabaseId: 5, name: 'A', parentId: 'root' });
await writeInline(dir, 'collections/6.json', { metabaseId: 6, name: 'B', parentId: 'root' });
await writeInline(dir, 'cards/100.json', {
metabaseId: 100,
name: 'In',
description: null,
type: 'model',
databaseId: 42,
collectionId: 5,
archived: false,
resolvedSql: 'SELECT 1',
templateTags: [],
resultMetadata: [],
collectionPath: ['A'],
referencedCardIds: [],
resolutionStatus: 'resolved',
});
await writeInline(dir, 'cards/200.json', {
metabaseId: 200,
name: 'Out',
description: null,
type: 'model',
databaseId: 42,
collectionId: 6,
archived: false,
resolvedSql: 'SELECT 1',
templateTags: [],
resultMetadata: [],
collectionPath: ['B'],
referencedCardIds: [],
resolutionStatus: 'resolved',
});
const result = await chunkMetabaseStagedDir(dir);
expect(result.workUnits).toHaveLength(1);
expect(result.workUnits[0].unitKey).toBe('metabase-col-5');
expect(result.workUnits[0].rawFiles).toContain('cards/100.json');
expect(result.workUnits[0].rawFiles).not.toContain('cards/200.json');
});
});
describe('chunkMetabaseStagedDir — syncMode enum coverage', () => {
let dir: string;
beforeEach(async () => {
dir = await mkdtemp(join(tmpdir(), 'mb-chunk-enum-'));
await writeInline(dir, 'databases/42.json', {
metabaseDatabaseId: 42,
metabaseDatabaseName: 'Analytics',
metabaseEngine: 'postgres',
targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789',
});
await writeInline(dir, 'collections/5.json', { metabaseId: 5, name: 'A', parentId: 'root' });
await writeInline(dir, 'collections/6.json', { metabaseId: 6, name: 'B', parentId: 'root' });
await writeInline(dir, 'cards/100.json', {
metabaseId: 100,
name: 'In',
description: null,
type: 'model',
databaseId: 42,
collectionId: 5,
archived: false,
resolvedSql: 'SELECT 1',
templateTags: [],
resultMetadata: [],
collectionPath: ['A'],
referencedCardIds: [],
resolutionStatus: 'resolved',
});
await writeInline(dir, 'cards/200.json', {
metabaseId: 200,
name: 'Out',
description: null,
type: 'model',
databaseId: 42,
collectionId: 6,
archived: false,
resolvedSql: 'SELECT 1',
templateTags: [],
resultMetadata: [],
collectionPath: ['B'],
referencedCardIds: [],
resolutionStatus: 'resolved',
});
});
afterEach(async () => {
await rm(dir, { recursive: true, force: true });
});
const BASE_SYNC = {
metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678',
metabaseDatabaseId: 42,
defaultTagNames: [] as string[],
mapping: {
metabaseDatabaseId: 42,
metabaseDatabaseName: 'Analytics',
metabaseEngine: 'postgres',
targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789',
},
};
it('ALL includes every non-archived card on the matching database', async () => {
await writeInline(dir, 'sync-config.json', {
...BASE_SYNC,
syncMode: 'ALL',
selections: [],
});
const result = await chunkMetabaseStagedDir(dir);
const allRawFiles = result.workUnits.flatMap((wu) => wu.rawFiles);
expect(allRawFiles).toContain('cards/100.json');
expect(allRawFiles).toContain('cards/200.json');
});
it('ONLY includes cards in selected collections; excludes the rest', async () => {
await writeInline(dir, 'sync-config.json', {
...BASE_SYNC,
syncMode: 'ONLY',
selections: [{ selectionType: 'collection', metabaseObjectId: 5 }],
});
const result = await chunkMetabaseStagedDir(dir);
const allRawFiles = result.workUnits.flatMap((wu) => wu.rawFiles);
expect(allRawFiles).toContain('cards/100.json');
expect(allRawFiles).not.toContain('cards/200.json');
});
it('ONLY with no selections includes every matching card for old generated configs', async () => {
await writeInline(dir, 'sync-config.json', {
...BASE_SYNC,
syncMode: 'ONLY',
selections: [],
});
const result = await chunkMetabaseStagedDir(dir);
const allRawFiles = result.workUnits.flatMap((wu) => wu.rawFiles);
expect(allRawFiles).toContain('cards/100.json');
expect(allRawFiles).toContain('cards/200.json');
});
it('EXCEPT excludes cards in selected collections; includes the rest', async () => {
await writeInline(dir, 'sync-config.json', {
...BASE_SYNC,
syncMode: 'EXCEPT',
selections: [{ selectionType: 'collection', metabaseObjectId: 5 }],
});
const result = await chunkMetabaseStagedDir(dir);
const allRawFiles = result.workUnits.flatMap((wu) => wu.rawFiles);
expect(allRawFiles).not.toContain('cards/100.json');
expect(allRawFiles).toContain('cards/200.json');
});
it('lowercase syncMode is rejected at parse time', () => {
const parsed = stagedSyncConfigSchema.safeParse({
...BASE_SYNC,
syncMode: 'all',
selections: [],
});
expect(parsed.success).toBe(false);
});
});

View file

@ -1,43 +0,0 @@
import { readFile } from 'node:fs/promises';
import { dirname, join } from 'node:path';
import { fileURLToPath } from 'node:url';
import { describe, expect, it } from 'vitest';
const metabaseDir = dirname(fileURLToPath(import.meta.url));
async function readMetabaseFile(name: string): Promise<string> {
return readFile(join(metabaseDir, name), 'utf-8');
}
describe('KTX Metabase client boundary', () => {
it('keeps NestJS, server data-source base classes, and server-relative imports out of the KTX client', async () => {
const client = await readMetabaseFile('client.ts');
expect(client).not.toContain(`@${'nestjs'}`);
expect(client).not.toContain(`DataSource${'Client'}`);
expect(client).not.toContain(`../base/data-source-${'client'}`);
expect(client).not.toContain('../types');
expect(client).not.toContain('../../types/brand');
});
it('keeps proxy implementation code out of the KTX v1 client', async () => {
const client = await readMetabaseFile('client.ts');
expect(client).not.toContain(`network-${'proxy'}`);
expect(client).not.toContain(`ssh${'2'}`);
expect(client).not.toContain(`tail${'scale'}`);
expect(client).not.toContain('resolveNetworkProxy');
expect(client).not.toContain('establishProxy');
expect(client).not.toContain('executeProxiedRequest');
expect(client).not.toContain('originalHost');
expect(client).not.toContain('originalHostname');
expect(client).not.toContain('servername');
});
it('keeps the runtime config proxy-free in v1', async () => {
const port = await readMetabaseFile('client-port.ts');
const runtimeConfigBlock = port.match(/export interface MetabaseClientRuntimeConfig \{[\s\S]*?\n\}/)?.[0] ?? '';
expect(runtimeConfigBlock).toContain('apiUrl: string');
expect(runtimeConfigBlock).toContain('apiKey: string');
expect(runtimeConfigBlock).not.toContain('proxy');
expect(runtimeConfigBlock).not.toContain('networkProxy');
});
});

View file

@ -1,107 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import type { FetchContext } from '../../types.js';
import {
IngestMetabaseClientFactory,
type MetabaseCard,
type MetabaseConnectionClientFactory,
type MetabaseDatasetQuery,
type MetabaseRuntimeClient,
type MetabaseTemplateTag,
type TestConnectionResult,
} from './client-port.js';
import type { MetabasePullConfig } from './types.js';
function makeRuntimeClient(): MetabaseRuntimeClient {
return {
testConnection: vi.fn(),
getCurrentUser: vi.fn(),
getDatabases: vi.fn(),
getDatabase: vi.fn(),
getCollectionTree: vi.fn(),
getCollection: vi.fn(),
getCollectionItems: vi.fn(),
getCard: vi.fn(),
getAllCards: vi.fn(),
convertMbqlToNative: vi.fn(),
getNativeSql: vi.fn(),
getTemplateTags: vi.fn(),
getCardSql: vi.fn(),
getResolvedSql: vi.fn(),
cleanup: vi.fn(),
};
}
describe('IngestMetabaseClientFactory', () => {
const config: MetabasePullConfig = {
metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678',
metabaseDatabaseId: 42,
};
const ctx: FetchContext = {
connectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789',
sourceKey: 'metabase',
};
it('delegates to the connection-level factory with the Metabase source connection id, not ctx.connectionId', async () => {
const runtimeClient = makeRuntimeClient();
const connectionFactory: MetabaseConnectionClientFactory = {
createClient: vi.fn().mockResolvedValue(runtimeClient),
};
const factory = new IngestMetabaseClientFactory(connectionFactory);
await expect(factory.createClient(config, ctx)).resolves.toBe(runtimeClient);
expect(connectionFactory.createClient).toHaveBeenCalledTimes(1);
expect(connectionFactory.createClient).toHaveBeenCalledWith(config.metabaseConnectionId);
expect(connectionFactory.createClient).not.toHaveBeenCalledWith(ctx.connectionId);
});
it('supports synchronous connection-level factories', async () => {
const runtimeClient = makeRuntimeClient();
const connectionFactory: MetabaseConnectionClientFactory = {
createClient: vi.fn().mockReturnValue(runtimeClient),
};
const factory = new IngestMetabaseClientFactory(connectionFactory);
await expect(factory.createClient(config, ctx)).resolves.toBe(runtimeClient);
});
});
it('allows the concrete client result shapes used by the relocated Metabase client', () => {
const connectionResult: TestConnectionResult = {
success: false,
error: 'API key is invalid',
metadata: { databases: [] },
};
expect(connectionResult.success).toBe(false);
const templateTag: MetabaseTemplateTag = {
id: 'tag-1',
name: 'created_at',
type: 'dimension',
'display-name': 'Created At',
'widget-type': 'date/range',
};
expect(templateTag['widget-type']).toBe('date/range');
const datasetQuery: MetabaseDatasetQuery = {
type: 'native',
database: 42,
stages: [
{
'lib/type': 'mbql.stage/native',
native: 'SELECT * FROM orders WHERE created_at > {{ created_at }}',
'template-tags': { created_at: templateTag },
},
],
};
const card: MetabaseCard = {
id: 1,
name: 'Orders',
type: 'model',
query_type: 'native',
database_id: 42,
dataset_query: datasetQuery,
};
expect(card.dataset_query).toBe(datasetQuery);
});

View file

@ -1,463 +0,0 @@
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import {
DEFAULT_METABASE_CLIENT_CONFIG,
DefaultMetabaseConnectionClientFactory,
getDummyValueForWidgetType,
MetabaseClient,
stripOptionalClauses,
} from './client.js';
import type { MetabaseCard, MetabaseTemplateTag } from './client-port.js';
const runtime = {
apiUrl: 'https://metabase.example.test/api',
apiKey: 'test-key-1234', // pragma: allowlist secret
};
const fastRetryConfig = {
maxRetries: 2,
baseDelayMs: 1,
maxDelayMs: 1,
timeoutMs: 5000,
jitter: false,
retryableStatuses: [429, 500, 502, 503, 504],
};
function nativeCard(query: string, templateTags: Record<string, MetabaseTemplateTag> = {}): MetabaseCard {
return {
id: 1,
name: 'Native card',
type: 'model',
query_type: 'native',
database_id: 6,
dataset_query: {
type: 'native',
database: 6,
stages: [{ 'lib/type': 'mbql.stage/native', native: query, 'template-tags': templateTags }],
},
};
}
function legacyNativeCard(query: string, templateTags: Record<string, MetabaseTemplateTag> = {}): MetabaseCard {
return {
id: 1,
name: 'Legacy native card',
type: 'model',
query_type: 'native',
database_id: 6,
dataset_query: {
type: 'native',
database: 6,
native: { query, 'template-tags': templateTags },
},
};
}
describe('DefaultMetabaseConnectionClientFactory', () => {
it('resolves runtime credentials by the explicit Metabase source connection id and merges overrides', async () => {
const resolveCredentials = vi.fn().mockResolvedValue(runtime);
const factory = new DefaultMetabaseConnectionClientFactory(resolveCredentials, {
...DEFAULT_METABASE_CLIENT_CONFIG,
timeoutMs: 60000,
maxRetries: 4,
});
const client = await factory.createClient('metabase-source-1', { timeoutMs: 1000 });
expect(resolveCredentials).toHaveBeenCalledWith('metabase-source-1');
expect(client).toBeInstanceOf(MetabaseClient);
expect(Reflect.get(client, 'baseUrl')).toBe('https://metabase.example.test/api');
expect(Reflect.get(client, 'runtime').apiKey).toBe('test-key-1234');
expect(Reflect.get(client, 'config').timeoutMs).toBe(1000);
expect(Reflect.get(client, 'config').maxRetries).toBe(4);
});
});
describe('MetabaseClient retry exhaustion', () => {
let originalFetch: typeof fetch;
beforeEach(() => {
originalFetch = globalThis.fetch;
});
afterEach(() => {
globalThis.fetch = originalFetch;
vi.restoreAllMocks();
});
it('does not warn to console when retrying by default', async () => {
const warn = vi.spyOn(console, 'warn').mockImplementation(() => undefined);
globalThis.fetch = vi
.fn<typeof fetch>()
.mockRejectedValueOnce(Object.assign(new Error('read ECONNRESET'), { code: 'ECONNRESET' }))
.mockResolvedValueOnce(new Response(JSON.stringify([]), { status: 200 }));
const client = new MetabaseClient(
{ apiUrl: 'https://metabase.example.test', apiKey: 'key' }, // pragma: allowlist secret
{
...DEFAULT_METABASE_CLIENT_CONFIG,
baseDelayMs: 0,
maxRetries: 1,
},
);
await client.getDatabases();
expect(warn).not.toHaveBeenCalled();
});
it('wraps an exhausted ECONNRESET retry chain with method, path, attempt count, and original cause', async () => {
const sysErr = Object.assign(new Error('read ECONNRESET'), {
code: 'ECONNRESET',
errno: -104,
syscall: 'read',
});
const fetchMock = vi.fn<typeof fetch>().mockRejectedValue(sysErr);
globalThis.fetch = fetchMock;
const client = new MetabaseClient(runtime, fastRetryConfig);
let caught: unknown;
try {
await client.getDatabases();
} catch (err) {
caught = err;
}
expect(caught).toBeInstanceOf(Error);
const e = caught as Error & { cause?: unknown; code?: string };
expect(e.message).toContain('Metabase request failed (3 attempts)');
expect(e.message).toContain('GET /api/database/');
expect(e.message).toContain('ECONNRESET');
expect(e.cause).toBe(sysErr);
expect(e.code).toBe('ECONNRESET');
expect(fetchMock).toHaveBeenCalledTimes(3);
});
it('classifies undici mid-TLS-handshake error as TLS-handshake failure', async () => {
const undiciTlsErr = new Error('Client network socket disconnected before secure TLS connection was established');
const fetchMock = vi.fn<typeof fetch>().mockRejectedValue(undiciTlsErr);
globalThis.fetch = fetchMock;
const client = new MetabaseClient(runtime, { ...fastRetryConfig, maxRetries: 0 });
let caught: unknown;
try {
await client.getDatabases();
} catch (err) {
caught = err;
}
expect(caught).toBeInstanceOf(Error);
const e = caught as Error & { cause?: unknown };
expect(e.message).toMatch(/^Metabase request failed:/);
expect(e.message).not.toContain('attempts');
expect(e.message).toContain('TLS handshake to metabase.example.test did not complete');
expect(e.message).toContain('before secure TLS connection was established');
expect(e.cause).toBeInstanceOf(Error);
expect(((e.cause as Error & { cause?: unknown }).cause as Error)?.message).toContain(
'before secure TLS connection was established',
);
});
it('does not wrap when a non-retryable error short-circuits the loop', async () => {
const fetchMock = vi
.fn<typeof fetch>()
.mockResolvedValue(
new Response('{"message":"unauthorized"}', { status: 401, headers: { 'content-type': 'application/json' } }),
);
globalThis.fetch = fetchMock;
const client = new MetabaseClient(runtime, fastRetryConfig);
let caught: unknown;
try {
await client.getDatabases();
} catch (err) {
caught = err;
}
expect(caught).toBeInstanceOf(Error);
const e = caught as Error;
expect(e.message).not.toContain('after 3 attempts');
expect(fetchMock).toHaveBeenCalledTimes(1);
});
});
describe('MetabaseClient admin auth helpers', () => {
let originalFetch: typeof fetch;
beforeEach(() => {
originalFetch = globalThis.fetch;
});
afterEach(() => {
globalThis.fetch = originalFetch;
vi.restoreAllMocks();
});
it('creates a session without sending an auth header', async () => {
const sessionFixture = 'session-fixture';
const adminCredentialFixture = 'admin-fixture';
const fetchMock = vi
.fn<typeof fetch>()
.mockResolvedValue(new Response(JSON.stringify({ id: sessionFixture }), { status: 200 }));
globalThis.fetch = fetchMock;
const client = new MetabaseClient({ apiUrl: 'https://metabase.example.test', apiKey: '' }, fastRetryConfig);
await expect(client.createSession('admin@example.test', adminCredentialFixture)).resolves.toBe(sessionFixture);
expect(fetchMock).toHaveBeenCalledWith(
'https://metabase.example.test/api/session',
expect.objectContaining({
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ username: 'admin@example.test', password: adminCredentialFixture }),
}),
);
});
it('uses the configured auth header for permission groups and API-key creation', async () => {
const mintedMetabaseCredential = 'mb_generated';
const sessionFixture = 'session-fixture';
const fetchMock = vi
.fn<typeof fetch>()
.mockResolvedValueOnce(new Response(JSON.stringify([{ id: 2, name: 'Administrators' }]), { status: 200 }))
.mockResolvedValueOnce(new Response(JSON.stringify({ unmasked_key: mintedMetabaseCredential }), { status: 200 }));
globalThis.fetch = fetchMock;
const client = new MetabaseClient(
{ apiUrl: 'https://metabase.example.test', apiKey: sessionFixture, authHeaderName: 'X-Metabase-Session' },
fastRetryConfig,
);
await expect(client.getPermissionGroups()).resolves.toEqual([{ id: 2, name: 'Administrators' }]);
await expect(client.createApiKey({ name: 'KTX CLI test', groupId: 2 })).resolves.toBe(mintedMetabaseCredential);
expect(fetchMock).toHaveBeenNthCalledWith(
1,
'https://metabase.example.test/api/permissions/group',
expect.objectContaining({
method: 'GET',
headers: { 'Content-Type': 'application/json', 'X-Metabase-Session': sessionFixture },
}),
);
expect(fetchMock).toHaveBeenNthCalledWith(
2,
'https://metabase.example.test/api/api-key',
expect.objectContaining({
method: 'POST',
body: JSON.stringify({ name: 'KTX CLI test', group_id: 2 }),
}),
);
});
});
describe('stripOptionalClauses', () => {
it('drops optional blocks that contain Metabase template variables', () => {
const input = 'SELECT * FROM x WHERE 1=1 [[AND a = {{ a }} ]] [[AND b = {{ b }} ]]';
expect(stripOptionalClauses(input)).toBe('SELECT * FROM x WHERE 1=1 ');
});
it('preserves bracket sequences that contain no template variables', () => {
const input = "SELECT * FROM x WHERE col LIKE '[[abc]]'";
expect(stripOptionalClauses(input)).toBe(input);
});
it('leaves naked template variables intact', () => {
const input = 'SELECT * FROM x WHERE id = {{ id }}';
expect(stripOptionalClauses(input)).toBe(input);
});
});
describe('getDummyValueForWidgetType', () => {
it('returns widget-specific date and number values', () => {
expect(getDummyValueForWidgetType('date/range')).toBe('2020-01-01~2020-12-31');
expect(getDummyValueForWidgetType('date/all-options')).toBe('2020-01-01~2020-12-31');
expect(getDummyValueForWidgetType('date/single')).toBe('2020-01-01');
expect(getDummyValueForWidgetType('date/relative')).toBe('past30days');
expect(getDummyValueForWidgetType('date/month-year')).toBe('2020-01');
expect(getDummyValueForWidgetType('date/quarter-year')).toBe('Q1-2020');
expect(getDummyValueForWidgetType('number/=')).toBe('1');
expect(getDummyValueForWidgetType('number/between')).toBe('1');
});
it('falls back to an array placeholder for string, identifier, and unknown widgets', () => {
expect(getDummyValueForWidgetType('string/=')).toEqual(['placeholder']);
expect(getDummyValueForWidgetType('category')).toEqual(['placeholder']);
expect(getDummyValueForWidgetType(undefined)).toEqual(['placeholder']);
});
});
describe('MetabaseClient legacy native dataset query support', () => {
it('reads SQL and template tags from dataset_query.native', async () => {
const client = new MetabaseClient(runtime, fastRetryConfig);
const card = legacyNativeCard('SELECT * FROM orders WHERE status = {{ status }}', {
status: {
name: 'status',
type: 'text',
default: 'paid',
},
});
expect(client.getNativeSql(card)).toBe('SELECT * FROM orders WHERE status = {{ status }}');
expect(client.getTemplateTags(card)).toEqual({
status: expect.objectContaining({ name: 'status', type: 'text' }),
});
await expect(client.getCardSql(card)).resolves.toBe('SELECT * FROM orders WHERE status = {{ status }}');
});
});
describe('MetabaseClient.getResolvedSql', () => {
function makeClient(setup?: (client: MetabaseClient) => void): MetabaseClient {
const client = new MetabaseClient({ apiUrl: 'http://test', apiKey: 'k' });
setup?.(client);
return client;
}
it('strips optional clauses locally and skips /api/dataset/native when no naked variables remain', async () => {
const requestSpy = vi.fn();
const client = makeClient((client) => {
Reflect.set(client, 'requestWithCustomRetry', requestSpy);
});
const card = nativeCard('SELECT * FROM x WHERE 1=1 [[AND end > {{ auction_end }} ]]', {
auction_end: {
id: 'tag-1',
name: 'auction_end',
type: 'dimension',
'widget-type': 'date/all-options',
'display-name': 'Auction End',
},
});
const result = await client.getResolvedSql(card);
expect(requestSpy).not.toHaveBeenCalled();
expect(result?.resolutionStatus).toBe('resolved');
expect(result?.resolvedSql).toBe('SELECT * FROM x WHERE 1=1 ');
expect(result?.templateTags[0]).toMatchObject({ name: 'auction_end', type: 'dimension' });
});
it('inlines saved-question references locally and skips /api/dataset/native when no other variables remain', async () => {
const requestSpy = vi.fn();
const getCardSpy = vi.fn().mockResolvedValue({
id: 5996,
name: 'Base card',
type: 'model',
query_type: 'native',
database_id: 6,
dataset_query: {
type: 'native',
database: 6,
stages: [{ 'lib/type': 'mbql.stage/native', native: 'SELECT a, b FROM base' }],
},
});
const client = makeClient((client) => {
Reflect.set(client, 'requestWithCustomRetry', requestSpy);
Reflect.set(client, 'getCard', getCardSpy);
});
const card = nativeCard('SELECT * FROM {{#5996-base}} t [[WHERE end > {{ end }}]]', {
'#5996-base': {
id: 't1',
name: '#5996-base',
type: 'card',
'card-id': 5996,
},
end: {
id: 't2',
name: 'end',
type: 'dimension',
'widget-type': 'date/range',
},
});
const result = await client.getResolvedSql(card);
expect(requestSpy).not.toHaveBeenCalled();
expect(getCardSpy).toHaveBeenCalledWith(5996);
expect(result?.resolutionStatus).toBe('resolved');
expect(result?.resolvedSql).toBe('SELECT * FROM (SELECT a, b FROM base) t ');
});
it('inlines native-query snippets before checking for remaining variables', async () => {
const requestSpy = vi.fn().mockResolvedValue([
{
id: 1,
name: 'account_join',
content: 'LEFT JOIN accounts a ON a.account_id = mart.account_id',
},
]);
const requestWithCustomRetrySpy = vi.fn();
const client = makeClient((client) => {
Reflect.set(client, 'request', requestSpy);
Reflect.set(client, 'requestWithCustomRetry', requestWithCustomRetrySpy);
});
const card = nativeCard('SELECT a.account_name FROM mart {{snippet: account_join}}', {
'snippet: account_join': {
id: 'snippet-tag',
name: 'snippet: account_join',
type: 'snippet',
'snippet-name': 'account_join',
'snippet-id': 1,
},
});
const result = await client.getResolvedSql(card);
expect(requestSpy).toHaveBeenCalledWith('GET', '/api/native-query-snippet');
expect(requestWithCustomRetrySpy).not.toHaveBeenCalled();
expect(result?.resolutionStatus).toBe('resolved');
expect(result?.resolvedSql).toBe(
'SELECT a.account_name FROM mart LEFT JOIN accounts a ON a.account_id = mart.account_id',
);
expect(result?.resolvedSql).not.toContain('{{snippet:');
});
it('uses /api/dataset/native for naked variables and prepends a warning comment', async () => {
const requestSpy = vi.fn().mockResolvedValue({ query: "SELECT * WHERE id = 'placeholder' AND n = 1" });
const client = makeClient((client) => {
Reflect.set(client, 'requestWithCustomRetry', requestSpy);
});
const card = nativeCard('SELECT * WHERE id = {{ id }} AND n = {{ n }}', {
id: { id: 't1', name: 'id', type: 'text' },
n: { id: 't2', name: 'n', type: 'number' },
});
const result = await client.getResolvedSql(card);
expect(requestSpy).toHaveBeenCalledTimes(1);
expect(result?.resolutionStatus).toBe('resolved');
const sql = result?.resolvedSql ?? '';
expect(sql.startsWith('--')).toBe(true);
expect(sql).toMatch(/KTX_PLACEHOLDER_WARNING/);
expect(sql).toMatch(/\bid\b/);
expect(sql).toMatch(/\bn\b/);
});
it('falls back to raw native SQL with truthful template tags when /api/dataset/native errors', async () => {
const requestSpy = vi.fn().mockRejectedValue(new Error('Metabase 500'));
const client = makeClient((client) => {
Reflect.set(client, 'requestWithCustomRetry', requestSpy);
});
const card = nativeCard('SELECT * FROM x WHERE end > {{ auction_end }}', {
auction_end: {
id: 'tag-id',
name: 'auction_end',
type: 'dimension',
'widget-type': 'date/range',
'display-name': 'Auction End',
},
});
const result = await client.getResolvedSql(card);
expect(result?.resolutionStatus).toBe('fallback');
expect(result?.resolvedSql).toContain('{{ auction_end }}');
expect(result?.templateTags).toHaveLength(1);
expect(result?.templateTags[0]).toMatchObject({
name: 'auction_end',
type: 'dimension',
displayName: 'Auction End',
});
});
});

View file

@ -1,49 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { detectMetabaseStagedDir } from './detect.js';
async function touch(stagedDir: string, relPath: string, body: string): Promise<void> {
const abs = join(stagedDir, relPath);
await mkdir(join(abs, '..'), { recursive: true });
await writeFile(abs, body, 'utf-8');
}
describe('detectMetabaseStagedDir', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'mb-detect-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('returns true when sync-config.json + cards/*.json are present', async () => {
await touch(stagedDir, 'sync-config.json', '{}');
await touch(stagedDir, 'cards/1.json', '{}');
expect(await detectMetabaseStagedDir(stagedDir)).toBe(true);
});
it('returns false when sync-config.json is missing', async () => {
await touch(stagedDir, 'cards/1.json', '{}');
expect(await detectMetabaseStagedDir(stagedDir)).toBe(false);
});
it('returns false when cards/ is empty', async () => {
await touch(stagedDir, 'sync-config.json', '{}');
await mkdir(join(stagedDir, 'cards'), { recursive: true });
expect(await detectMetabaseStagedDir(stagedDir)).toBe(false);
});
it('returns false for an empty staged dir', async () => {
expect(await detectMetabaseStagedDir(stagedDir)).toBe(false);
});
it('returns true even when the cards dir has one file and extra non-JSON siblings', async () => {
await touch(stagedDir, 'sync-config.json', '{}');
await touch(stagedDir, 'cards/1.json', '{}');
await touch(stagedDir, 'README.md', '# readme');
expect(await detectMetabaseStagedDir(stagedDir)).toBe(true);
});
});

View file

@ -1,52 +0,0 @@
import { describe, expect, it } from 'vitest';
import { planMetabaseFanoutChildren } from './fanout-planner.js';
describe('planMetabaseFanoutChildren', () => {
it('builds ordered child plans for sync-enabled mapped Metabase databases', () => {
const plans = planMetabaseFanoutChildren({
metabaseConnectionId: 'prod-metabase',
mappings: [
{ metabaseDatabaseId: 1, targetConnectionId: 'warehouse_a', syncEnabled: true },
{ metabaseDatabaseId: 2, targetConnectionId: null, syncEnabled: true },
{ metabaseDatabaseId: 3, targetConnectionId: 'warehouse_c', syncEnabled: false },
{ metabaseDatabaseId: 4, targetConnectionId: 'warehouse_b', syncEnabled: true },
],
});
expect(plans).toEqual([
{
metabaseConnectionId: 'prod-metabase',
metabaseDatabaseId: 1,
targetConnectionId: 'warehouse_a',
pullConfig: { metabaseConnectionId: 'prod-metabase', metabaseDatabaseId: 1 },
},
{
metabaseConnectionId: 'prod-metabase',
metabaseDatabaseId: 4,
targetConnectionId: 'warehouse_b',
pullConfig: { metabaseConnectionId: 'prod-metabase', metabaseDatabaseId: 4 },
},
]);
});
it('rejects invalid generated pull configs before any host enqueues work', () => {
expect(() =>
planMetabaseFanoutChildren({
metabaseConnectionId: 'prod-metabase',
mappings: [{ metabaseDatabaseId: 0, targetConnectionId: 'warehouse_a', syncEnabled: true }],
}),
).toThrow(/metabaseDatabaseId/);
});
it('rejects source states with no sync-enabled target mappings', () => {
expect(() =>
planMetabaseFanoutChildren({
metabaseConnectionId: 'prod-metabase',
mappings: [
{ metabaseDatabaseId: 1, targetConnectionId: null, syncEnabled: true },
{ metabaseDatabaseId: 2, targetConnectionId: 'warehouse_b', syncEnabled: false },
],
}),
).toThrow('no sync-enabled mappings with a target connection for Metabase connection prod-metabase');
});
});

View file

@ -1,140 +0,0 @@
import { describe, expect, it } from 'vitest';
import { computeFetchScope, type FetchScope, hashScope, isPathInMetabaseScope } from './fetch-scope.js';
import type { StagedSyncConfig } from './types.js';
const BASE_CONFIG = {
metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678',
metabaseDatabaseId: 42,
defaultTagNames: [] as string[],
mapping: {
metabaseDatabaseId: 42,
metabaseDatabaseName: 'Analytics',
metabaseEngine: 'postgres',
targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789',
},
} satisfies Omit<StagedSyncConfig, 'syncMode' | 'selections'>;
describe('computeFetchScope', () => {
it('returns { kind: "all" } for syncMode ALL', () => {
const scope = computeFetchScope({
...BASE_CONFIG,
syncMode: 'ALL',
selections: [{ selectionType: 'item', metabaseObjectId: 5 }],
});
expect(scope).toEqual({ kind: 'all' });
});
it('returns { kind: "all-except", ... } for syncMode EXCEPT', () => {
const scope = computeFetchScope({
...BASE_CONFIG,
syncMode: 'EXCEPT',
selections: [
{ selectionType: 'item', metabaseObjectId: 5 },
{ selectionType: 'collection', metabaseObjectId: 7 },
],
});
expect(scope).toEqual({
kind: 'all-except',
excludeCardIds: new Set([5]),
excludeCollectionIds: new Set([7]),
});
});
it('returns { kind: "explicit", ... } for syncMode ONLY', () => {
const scope = computeFetchScope({
...BASE_CONFIG,
syncMode: 'ONLY',
selections: [
{ selectionType: 'item', metabaseObjectId: 5 },
{ selectionType: 'item', metabaseObjectId: 11 },
{ selectionType: 'collection', metabaseObjectId: 7 },
],
});
expect(scope).toEqual({
kind: 'explicit',
includeCardIds: new Set([5, 11]),
includeCollectionIds: new Set([7]),
});
});
it('treats generated ONLY with no selections as all', () => {
const scope = computeFetchScope({ ...BASE_CONFIG, syncMode: 'ONLY', selections: [] });
expect(scope).toEqual({ kind: 'all' });
});
});
describe('hashScope', () => {
it('produces the same hash for identical inputs', () => {
const a = hashScope({
kind: 'explicit',
includeCardIds: new Set([1, 2, 3]),
includeCollectionIds: new Set([7]),
});
const b = hashScope({
kind: 'explicit',
includeCardIds: new Set([3, 2, 1]),
includeCollectionIds: new Set([7]),
});
expect(a).toBe(b);
});
it('produces different hashes for different scopes', () => {
const a = hashScope({ kind: 'all' });
const b = hashScope({
kind: 'explicit',
includeCardIds: new Set([1]),
includeCollectionIds: new Set(),
});
expect(a).not.toBe(b);
});
it('produces a 64-char hex string', () => {
const fp = hashScope({ kind: 'all' });
expect(fp).toMatch(/^[0-9a-f]{64}$/);
});
});
describe('isPathInMetabaseScope', () => {
const allScope: FetchScope = { kind: 'all' };
const exceptScope: FetchScope = {
kind: 'all-except',
excludeCardIds: new Set([100]),
excludeCollectionIds: new Set([5]),
};
const explicitScope: FetchScope = {
kind: 'explicit',
includeCardIds: new Set([1, 2]),
includeCollectionIds: new Set([7]),
};
it('always includes sync-config.json', () => {
expect(isPathInMetabaseScope('sync-config.json', allScope)).toBe(true);
expect(isPathInMetabaseScope('sync-config.json', exceptScope)).toBe(true);
expect(isPathInMetabaseScope('sync-config.json', explicitScope)).toBe(true);
});
it('always includes collections/* and databases/*', () => {
expect(isPathInMetabaseScope('collections/5.json', explicitScope)).toBe(true);
expect(isPathInMetabaseScope('databases/42.json', explicitScope)).toBe(true);
});
it('for `all` scope, every cards/<id>.json is in scope', () => {
expect(isPathInMetabaseScope('cards/1.json', allScope)).toBe(true);
expect(isPathInMetabaseScope('cards/999.json', allScope)).toBe(true);
});
it('for `all-except` scope, excluded card ids are out of scope', () => {
expect(isPathInMetabaseScope('cards/100.json', exceptScope)).toBe(false);
expect(isPathInMetabaseScope('cards/101.json', exceptScope)).toBe(true);
});
it('for `explicit` scope, only include-set card ids are in scope', () => {
expect(isPathInMetabaseScope('cards/1.json', explicitScope)).toBe(true);
expect(isPathInMetabaseScope('cards/2.json', explicitScope)).toBe(true);
expect(isPathInMetabaseScope('cards/3.json', explicitScope)).toBe(false);
});
it('unknown path shapes default to in-scope (conservative)', () => {
expect(isPathInMetabaseScope('some-new-dir/whatever.json', explicitScope)).toBe(true);
});
});

View file

@ -1,610 +0,0 @@
import { mkdtemp, readdir, readFile, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import type { FetchContext } from '../../types.js';
import { fetchMetabaseBundle } from './fetch.js';
const metabaseConnectionId = 'a1b2c3d4-e5f6-4789-9abc-def012345678';
const targetConnectionId = 'b2c3d4e5-f6a7-4890-abcd-ef0123456789';
function makeMockClient() {
return {
getAllCards: vi.fn().mockResolvedValue([
{ id: 1, name: 'Orders', archived: false, database_id: 42, collection_id: 5 },
{ id: 2, name: 'Old orders (archived)', archived: true, database_id: 42, collection_id: 5 },
{ id: 3, name: 'Wrong DB', archived: false, database_id: 999, collection_id: 5 },
]),
getCard: vi.fn().mockImplementation((id: number) =>
Promise.resolve({
id,
name: `Card ${id}`,
description: null,
type: 'model',
database_id: 42,
collection_id: 5,
archived: false,
result_metadata: [{ name: 'id', base_type: 'type/Integer' }],
}),
),
getResolvedSql: vi.fn().mockImplementation((card: { id: number }) =>
Promise.resolve({
resolvedSql: `SELECT * FROM card_${card.id}`,
templateTags: [],
resolutionStatus: 'resolved',
}),
),
getCollectionTree: vi.fn().mockResolvedValue([{ id: 5, name: 'Orders Team', parent_id: null, children: [] }]),
getCollectionItems: vi.fn().mockResolvedValue([]),
getDatabase: vi.fn().mockResolvedValue({
id: 42,
name: 'Analytics',
engine: 'postgres',
details: { host: 'db.example.test', dbname: 'analytics' },
}),
cleanup: vi.fn().mockResolvedValue(undefined),
};
}
describe('fetchMetabaseBundle', () => {
let stagedDir: string;
let clientFactory: ReturnType<typeof makeClientFactory>;
let sourceStateReader: ReturnType<typeof makeSourceStateReader>;
function makeClientFactory() {
const mockClient = makeMockClient();
return {
createClient: vi.fn().mockResolvedValue(mockClient),
__client: mockClient,
};
}
function makeFetchContext(connectionId = targetConnectionId): FetchContext {
return {
connectionId,
sourceKey: 'metabase',
};
}
function makeSourceStateReader() {
return {
getSourceState: vi.fn().mockResolvedValue({
syncMode: 'ALL',
selections: [],
mappings: [
{
metabaseDatabaseId: 42,
metabaseDatabaseName: 'Analytics',
metabaseEngine: 'postgres',
targetConnectionId: targetConnectionId,
syncEnabled: true,
},
],
defaultTagNames: [],
}),
};
}
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'mb-fetch-'));
clientFactory = makeClientFactory();
sourceStateReader = makeSourceStateReader();
});
afterEach(async () => {
vi.restoreAllMocks();
await rm(stagedDir, { recursive: true, force: true });
});
it('writes sync-config.json, one database file, one collection file, and only non-archived cards matching databaseId', async () => {
await fetchMetabaseBundle({
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
stagedDir,
ctx: makeFetchContext(),
clientFactory,
sourceStateReader,
});
const cardFiles = await readdir(join(stagedDir, 'cards'));
expect(cardFiles.sort()).toEqual(['1.json']);
const collections = await readdir(join(stagedDir, 'collections'));
expect(collections).toEqual(['5.json']);
const databases = await readdir(join(stagedDir, 'databases'));
expect(databases).toEqual(['42.json']);
const syncConfig = JSON.parse(await readFile(join(stagedDir, 'sync-config.json'), 'utf-8'));
expect(syncConfig.metabaseDatabaseId).toBe(42);
expect(syncConfig.mapping.targetConnectionId).toBe(targetConnectionId);
const card = JSON.parse(await readFile(join(stagedDir, 'cards/1.json'), 'utf-8'));
expect(card.metabaseId).toBe(1);
expect(card.resolvedSql).toBe('SELECT * FROM card_1');
expect(card.resolutionStatus).toBe('resolved');
expect(card.collectionPath).toEqual(['Orders Team']);
expect(card.archived).toBe(false);
});
it('does not write Metabase fetch progress to console by default', async () => {
const log = vi.spyOn(console, 'log').mockImplementation(() => undefined);
const warn = vi.spyOn(console, 'warn').mockImplementation(() => undefined);
await fetchMetabaseBundle({
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
stagedDir,
ctx: makeFetchContext(),
clientFactory,
sourceStateReader,
});
expect(log).not.toHaveBeenCalled();
expect(warn).not.toHaveBeenCalled();
});
it('emits memory-flow progress while fetching Metabase cards', async () => {
const events: unknown[] = [];
await fetchMetabaseBundle({
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
stagedDir,
ctx: {
...makeFetchContext(),
memoryFlow: {
emit: (event) => events.push(event),
update: vi.fn(),
finish: vi.fn(),
snapshot: vi.fn(),
},
},
clientFactory,
sourceStateReader,
});
expect(events).toEqual(
expect.arrayContaining([
expect.objectContaining({
type: 'stage_progress',
stage: 'source',
message: 'Fetching Metabase database 42 metadata',
}),
expect.objectContaining({
type: 'stage_progress',
stage: 'source',
message: 'Fetching 1 Metabase card for database 42',
}),
expect.objectContaining({
type: 'stage_progress',
stage: 'source',
message: 'Checked 1/1 Metabase cards for database 42; wrote 1',
transient: true,
}),
expect.objectContaining({
type: 'stage_progress',
stage: 'source',
message: 'Fetched Metabase database 42: 1 cards, 0 unresolved',
}),
]),
);
});
it('routes Metabase fetch warnings through the injected logger', async () => {
const logger = {
log: vi.fn(),
warn: vi.fn(),
};
clientFactory.__client.getCard.mockRejectedValueOnce(new Error('card read failed'));
await fetchMetabaseBundle({
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
stagedDir,
ctx: makeFetchContext(),
clientFactory,
sourceStateReader,
logger,
});
expect(logger.warn).toHaveBeenCalledWith('failed to load card 1: card read failed');
});
it('passes the Metabase source pull config and target fetch context to the client factory', async () => {
await fetchMetabaseBundle({
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
stagedDir,
ctx: makeFetchContext(),
clientFactory,
sourceStateReader,
});
expect(clientFactory.createClient).toHaveBeenCalledTimes(1);
expect(clientFactory.createClient).toHaveBeenCalledWith(
{ metabaseConnectionId, metabaseDatabaseId: 42 },
{ connectionId: targetConnectionId, sourceKey: 'metabase' },
);
});
it('reads source state by the Metabase source connection id, not the target fetch context connection id', async () => {
await fetchMetabaseBundle({
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
stagedDir,
ctx: makeFetchContext(),
clientFactory,
sourceStateReader,
});
expect(sourceStateReader.getSourceState).toHaveBeenCalledTimes(1);
expect(sourceStateReader.getSourceState).toHaveBeenCalledWith(metabaseConnectionId);
expect(sourceStateReader.getSourceState).not.toHaveBeenCalledWith(targetConnectionId);
});
it('cleans up the client after a successful fetch', async () => {
await fetchMetabaseBundle({
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
stagedDir,
ctx: makeFetchContext(),
clientFactory,
sourceStateReader,
});
expect(clientFactory.__client.cleanup).toHaveBeenCalledTimes(1);
});
it('cleans up the client when fetch fails after client creation', async () => {
clientFactory.__client.getCollectionTree.mockRejectedValueOnce(new Error('collection tree unavailable'));
await expect(
fetchMetabaseBundle({
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
stagedDir,
ctx: makeFetchContext(),
clientFactory,
sourceStateReader,
}),
).rejects.toThrow('collection tree unavailable');
expect(clientFactory.__client.cleanup).toHaveBeenCalledTimes(1);
});
it('throws BadRequestException when the requested metabaseDatabaseId has no matching sync-enabled mapping', async () => {
sourceStateReader.getSourceState.mockResolvedValue({
syncMode: 'ALL',
selections: [],
mappings: [],
defaultTagNames: [],
});
await expect(
fetchMetabaseBundle({
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
stagedDir,
ctx: makeFetchContext(),
clientFactory,
sourceStateReader,
}),
).rejects.toThrow(/no sync-enabled mapping for database 42/);
});
it('throws BadRequestException when the mapping points to a different target connection than the job', async () => {
sourceStateReader.getSourceState.mockResolvedValue({
syncMode: 'ALL',
selections: [],
mappings: [
{
metabaseDatabaseId: 42,
metabaseDatabaseName: 'Analytics',
metabaseEngine: 'postgres',
targetConnectionId: 'c3d4e5f6-a7b8-4901-bcde-f01234567890',
syncEnabled: true,
},
],
defaultTagNames: [],
});
await expect(
fetchMetabaseBundle({
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
stagedDir,
ctx: makeFetchContext(),
clientFactory,
sourceStateReader,
}),
).rejects.toThrow(/mapping.*does not point to connection/);
});
it('hydrates missing mapping metadata from Metabase instead of requiring a prior refresh', async () => {
sourceStateReader.getSourceState.mockResolvedValue({
syncMode: 'ALL',
selections: [],
mappings: [
{
metabaseDatabaseId: 42,
metabaseDatabaseName: null,
metabaseEngine: 'postgres',
targetConnectionId,
syncEnabled: true,
},
],
defaultTagNames: [],
});
await fetchMetabaseBundle({
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
stagedDir,
ctx: makeFetchContext(),
clientFactory,
sourceStateReader,
});
expect(clientFactory.__client.getDatabase).toHaveBeenCalledWith(42);
const databaseFile = JSON.parse(await readFile(join(stagedDir, 'databases/42.json'), 'utf-8'));
expect(databaseFile).toMatchObject({
metabaseDatabaseId: 42,
metabaseDatabaseName: 'Analytics',
metabaseEngine: 'postgres',
targetConnectionId,
});
});
it('skips cards whose getResolvedSql returns null and records them in unresolved-cards.json', async () => {
clientFactory.__client.getResolvedSql.mockResolvedValue(null);
await fetchMetabaseBundle({
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
stagedDir,
ctx: makeFetchContext(),
clientFactory,
sourceStateReader,
});
const cardFiles = await readdir(join(stagedDir, 'cards')).catch(() => []);
expect(cardFiles).toEqual([]);
const unresolved = JSON.parse(await readFile(join(stagedDir, 'unresolved-cards.json'), 'utf-8'));
expect(unresolved).toEqual([expect.objectContaining({ cardId: 1, name: 'Card 1', reason: 'api_500' })]);
});
it('records referenced cards via `{{#N}}` in resolvedSql', async () => {
clientFactory.__client.getResolvedSql.mockImplementation((card: { id: number }) =>
Promise.resolve({
resolvedSql: card.id === 1 ? 'SELECT * FROM {{#999}}' : `SELECT * FROM card_${card.id}`,
templateTags: card.id === 1 ? [{ name: 'r', type: 'card', cardReference: 999 }] : [],
resolutionStatus: 'resolved',
}),
);
await fetchMetabaseBundle({
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
stagedDir,
ctx: makeFetchContext(),
clientFactory,
sourceStateReader,
});
const card = JSON.parse(await readFile(join(stagedDir, 'cards/1.json'), 'utf-8'));
expect(card.referencedCardIds).toEqual([999]);
});
});
/* eslint-disable @typescript-eslint/require-await -- mock fixtures return constants */
describe('fetchMetabaseBundle — scoped fetch', () => {
it('ONLY scope fetches exactly the selected card ids (no reference closure)', async () => {
const staged = await mkdtemp(join(tmpdir(), 'mb-fetch-only-'));
try {
const catalog = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map((id) => ({
id,
name: `Card ${id}`,
type: 'model',
database_id: 42,
collection_id: 5,
archived: false,
result_metadata: [],
}));
const getCardCalls: number[] = [];
const client: any = {
getAllCards: async () =>
catalog.map((c) => ({
id: c.id,
database_id: c.database_id,
archived: false,
collection_id: c.collection_id,
})),
getCard: async (id: number) => {
getCardCalls.push(id);
const c = catalog.find((x) => x.id === id);
if (!c) {
throw new Error(`no such card ${id}`);
}
return c;
},
getResolvedSql: async () => ({ resolvedSql: 'SELECT 1', templateTags: [], resolutionStatus: 'resolved' }),
getCollectionTree: async () => [{ id: 5, name: 'Col5', parent_id: null }],
getCollectionItems: async () => [],
cleanup: async () => {},
};
await fetchMetabaseBundle({
pullConfig: { metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678', metabaseDatabaseId: 42 },
stagedDir: staged,
ctx: { connectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', sourceKey: 'metabase' },
clientFactory: { createClient: () => client },
sourceStateReader: {
getSourceState: async () => ({
syncMode: 'ONLY',
selections: [
{ selectionType: 'item', metabaseObjectId: 2 },
{ selectionType: 'item', metabaseObjectId: 5 },
{ selectionType: 'item', metabaseObjectId: 8 },
],
mappings: [
{
metabaseDatabaseId: 42,
metabaseDatabaseName: 'Analytics',
metabaseEngine: 'postgres',
targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789',
syncEnabled: true,
},
],
defaultTagNames: [],
}),
} as any,
});
expect([...getCardCalls].sort((a, b) => a - b)).toEqual([2, 5, 8]);
} finally {
await rm(staged, { recursive: true, force: true });
}
});
it('ONLY scope walks collections via getCollectionItems', async () => {
const staged = await mkdtemp(join(tmpdir(), 'mb-fetch-col-'));
try {
const getCardCalls: number[] = [];
const collectionItems = [
{ id: 100, model: 'card' },
{ id: 101, model: 'card' },
];
const client: any = {
getAllCards: async () => [],
getCard: async (id: number) => {
getCardCalls.push(id);
return {
id,
name: `Card ${id}`,
type: 'model',
database_id: 42,
collection_id: 7,
archived: false,
result_metadata: [],
};
},
getResolvedSql: async () => ({ resolvedSql: 'SELECT 1', templateTags: [], resolutionStatus: 'resolved' }),
getCollectionTree: async () => [{ id: 7, name: 'Col7', parent_id: null }],
getCollectionItems: async (cid: number) => (cid === 7 ? collectionItems : []),
cleanup: async () => {},
};
await fetchMetabaseBundle({
pullConfig: { metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678', metabaseDatabaseId: 42 },
stagedDir: staged,
ctx: { connectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', sourceKey: 'metabase' },
clientFactory: { createClient: () => client },
sourceStateReader: {
getSourceState: async () => ({
syncMode: 'ONLY',
selections: [{ selectionType: 'collection', metabaseObjectId: 7 }],
mappings: [
{
metabaseDatabaseId: 42,
metabaseDatabaseName: 'Analytics',
metabaseEngine: 'postgres',
targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789',
syncEnabled: true,
},
],
defaultTagNames: [],
}),
} as any,
});
expect([...getCardCalls].sort((a, b) => a - b)).toEqual([100, 101]);
} finally {
await rm(staged, { recursive: true, force: true });
}
});
it('ONLY scope closes over {{#N}} references, bounded', async () => {
const staged = await mkdtemp(join(tmpdir(), 'mb-fetch-ref-'));
try {
const getCardCalls: number[] = [];
const refs: Record<number, number[]> = { 1: [2], 2: [3], 3: [] };
const client: any = {
getAllCards: async () => [],
getCard: async (id: number) => {
getCardCalls.push(id);
return {
id,
name: `Card ${id}`,
type: 'model',
database_id: 42,
collection_id: null,
archived: false,
result_metadata: [],
};
},
getResolvedSql: async (card: any) => ({
resolvedSql: `SELECT 1 ${(refs[card.id] ?? []).map((r) => `{{#${r}}}`).join(' ')}`,
templateTags: (refs[card.id] ?? []).map((r) => ({ name: `#${r}`, type: 'card', cardReference: r })),
resolutionStatus: 'resolved',
}),
getCollectionTree: async () => [],
getCollectionItems: async () => [],
cleanup: async () => {},
};
await fetchMetabaseBundle({
pullConfig: { metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678', metabaseDatabaseId: 42 },
stagedDir: staged,
ctx: { connectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', sourceKey: 'metabase' },
clientFactory: { createClient: () => client },
sourceStateReader: {
getSourceState: async () => ({
syncMode: 'ONLY',
selections: [{ selectionType: 'item', metabaseObjectId: 1 }],
mappings: [
{
metabaseDatabaseId: 42,
metabaseDatabaseName: 'Analytics',
metabaseEngine: 'postgres',
targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789',
syncEnabled: true,
},
],
defaultTagNames: [],
}),
} as any,
});
expect([...getCardCalls].sort((a, b) => a - b)).toEqual([1, 2, 3]);
} finally {
await rm(staged, { recursive: true, force: true });
}
});
it('ONLY with cyclical refs does not infinite-loop', async () => {
const staged = await mkdtemp(join(tmpdir(), 'mb-fetch-cycle-'));
try {
const getCardCalls: number[] = [];
const refs: Record<number, number[]> = { 1: [2], 2: [1] };
const client: any = {
getAllCards: async () => [],
getCard: async (id: number) => {
getCardCalls.push(id);
return {
id,
name: `Card ${id}`,
type: 'model',
database_id: 42,
collection_id: null,
archived: false,
result_metadata: [],
};
},
getResolvedSql: async (card: any) => ({
resolvedSql: `SELECT 1`,
templateTags: (refs[card.id] ?? []).map((r) => ({ name: `#${r}`, type: 'card', cardReference: r })),
resolutionStatus: 'resolved',
}),
getCollectionTree: async () => [],
getCollectionItems: async () => [],
cleanup: async () => {},
};
await fetchMetabaseBundle({
pullConfig: { metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678', metabaseDatabaseId: 42 },
stagedDir: staged,
ctx: { connectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', sourceKey: 'metabase' },
clientFactory: { createClient: () => client },
sourceStateReader: {
getSourceState: async () => ({
syncMode: 'ONLY',
selections: [{ selectionType: 'item', metabaseObjectId: 1 }],
mappings: [
{
metabaseDatabaseId: 42,
metabaseDatabaseName: 'Analytics',
metabaseEngine: 'postgres',
targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789',
syncEnabled: true,
},
],
defaultTagNames: [],
}),
} as any,
});
expect([...getCardCalls].sort((a, b) => a - b)).toEqual([1, 2]);
} finally {
await rm(staged, { recursive: true, force: true });
}
});
});

View file

@ -1,74 +0,0 @@
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import type { KtxProjectConnectionConfig } from '../../../../context/project/config.js';
import { metabaseRuntimeConfigFromLocalConnection } from './local-metabase.adapter.js';
describe('metabaseRuntimeConfigFromLocalConnection', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-metabase-runtime-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('resolves api_url and env-backed api_key_ref from a flat ktx.yaml connection', () => {
const connection: KtxProjectConnectionConfig = {
driver: 'metabase',
api_url: 'https://metabase.example.com',
api_key_ref: 'env:METABASE_API_KEY', // pragma: allowlist secret
};
expect(
metabaseRuntimeConfigFromLocalConnection('prod-metabase', connection, {
METABASE_API_KEY: 'mb_key', // pragma: allowlist secret
}),
).toEqual({
apiUrl: 'https://metabase.example.com',
apiKey: 'mb_key', // pragma: allowlist secret
});
});
it('resolves file-backed api_key_ref from pasted setup secrets', async () => {
const keyPath = join(tempDir, 'metabase-main-api-key');
await writeFile(keyPath, 'mb_file_key\n', 'utf-8'); // pragma: allowlist secret
const connection: KtxProjectConnectionConfig = {
driver: 'metabase',
api_url: 'https://metabase.example.com',
api_key_ref: `file:${keyPath}`, // pragma: allowlist secret
};
expect(metabaseRuntimeConfigFromLocalConnection('prod-metabase', connection)).toEqual({
apiUrl: 'https://metabase.example.com',
apiKey: 'mb_file_key', // pragma: allowlist secret
});
});
it('rejects proxy-bearing local Metabase connections', () => {
const connection: KtxProjectConnectionConfig = {
driver: 'metabase',
api_url: 'https://metabase.example.com',
api_key: 'literal-test-key', // pragma: allowlist secret
networkProxy: { type: 'ssh' },
};
expect(() => metabaseRuntimeConfigFromLocalConnection('prod-metabase', connection)).toThrow(
'Standalone KTX does not support proxy-bearing Metabase connections yet',
);
});
it('rejects non-Metabase source connections', () => {
const connection: KtxProjectConnectionConfig = {
driver: 'postgres',
url: 'postgres://localhost/db',
};
expect(() => metabaseRuntimeConfigFromLocalConnection('warehouse', connection)).toThrow(
'Connection "warehouse" is not a Metabase connection',
);
});
});

View file

@ -1,115 +0,0 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { buildDefaultKtxProjectConfig } from '../../../../context/project/config.js';
import { connectionConfigSchema } from '../../../project/driver-schemas.js';
import { KtxYamlMetabaseSourceStateReader, LocalMetabaseDiscoveryCache } from './local-source-state-store.js';
describe('Metabase YAML source state and discovery cache', () => {
let tempDir: string;
let discoveryCache: LocalMetabaseDiscoveryCache;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-metabase-cache-'));
discoveryCache = new LocalMetabaseDiscoveryCache({ dbPath: join(tempDir, '.ktx', 'db.sqlite') });
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
function projectWithMetabaseMappings(mappings: Record<string, unknown>) {
return {
config: {
...buildDefaultKtxProjectConfig(),
connections: {
'prod-metabase': connectionConfigSchema.parse({
driver: 'metabase',
api_url: 'https://metabase.example.com',
mappings,
}),
},
},
};
}
it('reads Metabase mapping intent from ktx.yaml config', async () => {
const reader = new KtxYamlMetabaseSourceStateReader(
projectWithMetabaseMappings({
databaseMappings: { '2': 'warehouse' },
syncEnabled: { '2': true },
syncMode: 'ONLY',
selections: { collections: [12], items: [99] },
defaultTagNames: ['analytics'],
}),
{ discoveryCache },
);
await expect(reader.getSourceState('prod-metabase')).resolves.toEqual({
syncMode: 'ONLY',
defaultTagNames: ['analytics'],
selections: [
{ selectionType: 'collection', metabaseObjectId: 12 },
{ selectionType: 'item', metabaseObjectId: 99 },
],
mappings: [
{
metabaseDatabaseId: 2,
metabaseDatabaseName: null,
metabaseEngine: null,
metabaseHost: null,
metabaseDbName: null,
targetConnectionId: 'warehouse',
syncEnabled: true,
},
],
});
});
it('enriches YAML mapping rows with recreatable discovery metadata', async () => {
await discoveryCache.refreshDiscoveredDatabases({
connectionId: 'prod-metabase',
discovered: [{ id: 2, name: 'Analytics', engine: 'postgres', host: 'pg.internal', dbName: 'analytics' }],
});
const reader = new KtxYamlMetabaseSourceStateReader(
projectWithMetabaseMappings({
databaseMappings: { '2': 'warehouse' },
syncEnabled: { '2': true },
}),
{ discoveryCache },
);
await expect(reader.listDatabaseMappings('prod-metabase')).resolves.toMatchObject([
{
metabaseDatabaseId: 2,
metabaseDatabaseName: 'Analytics',
metabaseEngine: 'postgres',
metabaseHost: 'pg.internal',
metabaseDbName: 'analytics',
targetConnectionId: 'warehouse',
syncEnabled: true,
source: 'ktx.yaml',
},
]);
});
it('lists discovered-only rows as refresh cache data without turning them into config state', async () => {
await discoveryCache.refreshDiscoveredDatabases({
connectionId: 'prod-metabase',
discovered: [{ id: 7, name: 'Unmapped', engine: 'mysql', host: 'mysql.internal', dbName: 'sales' }],
});
const reader = new KtxYamlMetabaseSourceStateReader(projectWithMetabaseMappings({}), { discoveryCache });
await expect(reader.getSourceState('prod-metabase')).resolves.toMatchObject({ mappings: [] });
await expect(reader.listDatabaseMappings('prod-metabase')).resolves.toMatchObject([
{
metabaseDatabaseId: 7,
metabaseDatabaseName: 'Unmapped',
targetConnectionId: null,
syncEnabled: false,
source: 'refresh',
},
]);
});
});

View file

@ -1,295 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import type { MetabaseRuntimeClient } from './client-port.js';
import {
METABASE_ENGINE_TO_CONNECTION_TYPE,
computeMetabaseMappingDrift,
computeMetabaseMappingPhysicalMismatches,
discoverMetabaseDatabases,
findBestMatch,
refreshMetabaseMapping,
validateMappingPhysicalMatch,
validateMetabaseMappings,
} from './mapping.js';
describe('discoverMetabaseDatabases', () => {
it('filters sample databases and extracts host plus database names from Metabase details', async () => {
const client = {
getDatabases: vi.fn().mockResolvedValue([
{
id: 1,
name: 'Sample',
engine: 'postgres',
details: { host: 'sample.internal', dbname: 'sample' },
is_sample: true,
},
{
id: 2,
name: 'Analytics',
engine: 'postgres',
details: { host: 'pg.internal:5432', dbname: 'analytics' },
is_sample: false,
},
{
id: 3,
name: 'Warehouse',
engine: 'mysql',
details: { host: 'mysql.internal', db: 'warehouse' },
is_sample: false,
},
]),
} as Pick<MetabaseRuntimeClient, 'getDatabases'> as MetabaseRuntimeClient;
await expect(discoverMetabaseDatabases(client)).resolves.toEqual([
{ id: 2, name: 'Analytics', engine: 'postgres', host: 'pg.internal:5432', dbName: 'analytics' },
{ id: 3, name: 'Warehouse', engine: 'mysql', host: 'mysql.internal', dbName: 'warehouse' },
]);
});
});
describe('computeMetabaseMappingDrift', () => {
it('reports unmapped discovered databases, stale mappings, and in-sync mappings', () => {
const drift = computeMetabaseMappingDrift({
currentMappings: {
'2': 'target-postgres',
'9': 'target-stale',
},
discovered: [
{ id: 2, name: 'Analytics', engine: 'postgres', host: 'pg.internal', dbName: 'analytics' },
{ id: 3, name: 'Warehouse', engine: 'mysql', host: 'mysql.internal', dbName: 'warehouse' },
],
});
expect(drift).toEqual({
unmappedDiscovered: [
{ id: 3, name: 'Warehouse', engine: 'mysql', host: 'mysql.internal', dbName: 'warehouse' },
],
staleMappings: [{ id: '9', reason: 'database_not_found' }],
inSync: [{ id: 2, ktxConnectionId: 'target-postgres' }],
});
});
});
describe('validateMetabaseMappings', () => {
it('accepts mappings whose target connection ids exist', () => {
expect(
validateMetabaseMappings({
mappings: { '2': 'target-postgres' },
knownKtxConnectionIds: new Set(['target-postgres']),
}),
).toEqual({ ok: true });
});
it('returns one error per missing target connection id', () => {
expect(
validateMetabaseMappings({
mappings: { '2': 'missing-target', '3': 'target-mysql' },
knownKtxConnectionIds: new Set(['target-mysql']),
}),
).toEqual({
ok: false,
errors: [{ key: '2', reason: 'KTX connection missing-target does not exist' }],
});
});
});
describe('validateMappingPhysicalMatch', () => {
it('returns null when Snowflake mapping points at the same database', () => {
expect(
validateMappingPhysicalMatch(
{ metabaseEngine: 'snowflake', metabaseDbName: 'ANALYTICS', metabaseHost: null },
{ connection_type: 'SNOWFLAKE', database: 'ANALYTICS', account: 'EMOVRJS-CZ07756' },
),
).toBeNull();
});
it('returns a reason when Snowflake mapping points at a different database', () => {
const reason = validateMappingPhysicalMatch(
{ metabaseEngine: 'snowflake', metabaseDbName: 'SNAPSHOTS', metabaseHost: null },
{ connection_type: 'SNOWFLAKE', database: 'ANALYTICS', account: 'EMOVRJS-CZ07756' },
);
expect(reason).toContain('SNAPSHOTS');
expect(reason).toContain('ANALYTICS');
});
it('returns a reason when engine type mismatches', () => {
const reason = validateMappingPhysicalMatch(
{ metabaseEngine: 'snowflake', metabaseDbName: 'ANALYTICS', metabaseHost: null },
{ connection_type: 'POSTGRESQL', database: 'ANALYTICS', host: 'pg.internal' },
);
expect(reason).toContain('engine');
});
it('returns null when Postgres host and database both match after normalization', () => {
expect(
validateMappingPhysicalMatch(
{ metabaseEngine: 'postgres', metabaseDbName: 'app', metabaseHost: 'PG.INTERNAL:5432' },
{ connection_type: 'POSTGRESQL', host: 'pg.internal', database: 'APP' },
),
).toBeNull();
});
it('returns a reason when Postgres host matches but database differs', () => {
const reason = validateMappingPhysicalMatch(
{ metabaseEngine: 'postgres', metabaseDbName: 'app', metabaseHost: 'pg.internal' },
{ connection_type: 'POSTGRESQL', host: 'pg.internal', database: 'other_app' },
);
expect(reason).toContain('app');
expect(reason).toContain('other_app');
});
it('uses BigQuery dataset_id before project_id when comparing database names', () => {
expect(
validateMappingPhysicalMatch(
{ metabaseEngine: 'bigquery', metabaseDbName: 'analytics_dataset', metabaseHost: null },
{ connection_type: 'BIGQUERY', dataset_id: 'analytics_dataset', project_id: 'warehouse-project' },
),
).toBeNull();
});
it('returns null for unknown engines because KTX cannot validate them', () => {
expect(
validateMappingPhysicalMatch(
{ metabaseEngine: 'unknown-engine', metabaseDbName: 'X', metabaseHost: 'host' },
{ connection_type: 'OTHER' },
),
).toBeNull();
});
});
describe('computeMetabaseMappingPhysicalMismatches', () => {
it('returns only mismatched physical mappings', () => {
expect(
computeMetabaseMappingPhysicalMismatches([
{
mappingId: 'mapping-ok',
metabase: { metabaseEngine: 'postgres', metabaseHost: 'pg.internal', metabaseDbName: 'app' },
target: { connection_type: 'POSTGRESQL', host: 'pg.internal', database: 'app' },
},
{
mappingId: 'mapping-bad',
metabase: { metabaseEngine: 'postgres', metabaseHost: 'pg.internal', metabaseDbName: 'app' },
target: { connection_type: 'POSTGRESQL', host: 'pg.internal', database: 'other_app' },
},
]),
).toEqual([
{
mappingId: 'mapping-bad',
reason: "Metabase database 'app' does not match KTX connection database 'other_app'",
},
]);
});
});
describe('refreshMetabaseMapping', () => {
it('combines discovery drift and physical validation through a caller-provided target resolver', async () => {
const client = {
getDatabases: vi.fn().mockResolvedValue([
{
id: 2,
name: 'Analytics',
engine: 'postgres',
details: { host: 'pg.internal', dbname: 'analytics' },
is_sample: false,
},
]),
} as Pick<MetabaseRuntimeClient, 'getDatabases'> as MetabaseRuntimeClient;
await expect(
refreshMetabaseMapping({
client,
currentMappings: { '2': 'target-postgres' },
resolveKtxConnectionPhysicalInfo: vi.fn().mockResolvedValue({
connection_type: 'POSTGRESQL',
host: 'pg.internal',
database: 'wrong_database',
}),
}),
).resolves.toEqual({
drift: {
unmappedDiscovered: [],
staleMappings: [],
inSync: [{ id: 2, ktxConnectionId: 'target-postgres' }],
},
physicalMismatches: [
{
mappingId: '2',
reason: "Metabase database 'analytics' does not match KTX connection database 'wrong_database'",
},
],
});
});
});
describe('findBestMatch', () => {
const candidates = [
{
id: 'snowflake-target',
name: 'Warehouse Snowflake',
connection_type: 'SNOWFLAKE',
connection_params: { account: 'EMOVRJS-CZ07756', database: 'ANALYTICS' },
},
{
id: 'postgres-host-only',
name: 'Host Only Postgres',
connection_type: 'POSTGRESQL',
connection_params: { host: 'pg.internal', database: 'other_app' },
},
{
id: 'postgres-db-only',
name: 'Database Only Postgres',
connection_type: 'POSTGRESQL',
connection_params: { host: 'other.internal', database: 'app' },
},
{
id: 'postgres-full',
name: 'Full Postgres',
connection_type: 'POSTGRESQL',
connection_params: { host: 'pg.internal', database: 'app' },
},
];
it('chooses a host-and-database match over weaker matches', () => {
expect(
findBestMatch({ metabaseEngine: 'postgres', metabaseHost: 'pg.internal:5432', metabaseDbName: 'APP' }, candidates),
).toEqual({
connectionId: 'postgres-full',
connectionName: 'Full Postgres',
reason: 'host_and_database',
});
});
it('falls back to database-only matching when host does not match', () => {
expect(
findBestMatch(
{ metabaseEngine: 'postgres', metabaseHost: 'unknown.internal', metabaseDbName: 'app' },
candidates,
),
).toEqual({
connectionId: 'postgres-db-only',
connectionName: 'Database Only Postgres',
reason: 'database_only',
});
});
it('returns null for unsupported Metabase engines', () => {
expect(
findBestMatch({ metabaseEngine: 'unknown-engine', metabaseHost: 'pg.internal', metabaseDbName: 'app' }, candidates),
).toBeNull();
});
});
describe('METABASE_ENGINE_TO_CONNECTION_TYPE', () => {
it('keeps the server-supported Metabase engine table in KTX', () => {
expect(METABASE_ENGINE_TO_CONNECTION_TYPE).toMatchObject({
postgres: 'POSTGRESQL',
bigquery: 'BIGQUERY',
'bigquery-cloud-sdk': 'BIGQUERY',
snowflake: 'SNOWFLAKE',
sqlserver: 'SQLSERVER',
mysql: 'MYSQL',
});
});
});

View file

@ -1,153 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { MetabaseSourceAdapter } from './metabase.adapter.js';
describe('MetabaseSourceAdapter', () => {
let stagedDir: string;
let adapter: MetabaseSourceAdapter;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'mb-adapter-'));
adapter = new MetabaseSourceAdapter({} as any);
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('declares the expected source key and skill list', () => {
expect(adapter.source).toBe('metabase');
expect(adapter.skillNames).toEqual(['metabase_ingest']);
});
it('detect: true for a valid staged dir', async () => {
await writeFile(join(stagedDir, 'sync-config.json'), '{}', 'utf-8');
await mkdir(join(stagedDir, 'cards'), { recursive: true });
await writeFile(join(stagedDir, 'cards/1.json'), '{}', 'utf-8');
expect(await adapter.detect(stagedDir)).toBe(true);
});
it('detect: false for a random empty dir', async () => {
expect(await adapter.detect(stagedDir)).toBe(false);
});
it('exposes a fetch() method (network-bound — real calls covered by fetch.spec.ts)', () => {
expect(typeof adapter.fetch).toBe('function');
});
it('forwards fetch dependencies using the source-state reader port', async () => {
const client = {
getAllCards: vi.fn().mockResolvedValue([]),
getCollectionTree: vi.fn().mockResolvedValue([]),
getCollectionItems: vi.fn().mockResolvedValue([]),
cleanup: vi.fn().mockResolvedValue(undefined),
};
const clientFactory = {
createClient: vi.fn().mockResolvedValue(client),
};
const sourceStateReader = {
getSourceState: vi.fn().mockResolvedValue({
syncMode: 'ALL',
selections: [],
defaultTagNames: [],
mappings: [
{
metabaseDatabaseId: 42,
metabaseDatabaseName: 'Analytics',
metabaseEngine: 'postgres',
targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789',
syncEnabled: true,
},
],
}),
};
const forwardingAdapter = new MetabaseSourceAdapter({ clientFactory, sourceStateReader });
await forwardingAdapter.fetch(
{
metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678',
metabaseDatabaseId: 42,
},
stagedDir,
{ connectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', sourceKey: 'metabase' },
);
expect(sourceStateReader.getSourceState).toHaveBeenCalledWith('a1b2c3d4-e5f6-4789-9abc-def012345678');
expect(clientFactory.createClient).toHaveBeenCalledWith(
{
metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678',
metabaseDatabaseId: 42,
},
{ connectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789', sourceKey: 'metabase' },
);
});
});
describe('MetabaseSourceAdapter.describeScope', () => {
const adapter = new MetabaseSourceAdapter({} as any);
let dir: string;
beforeEach(async () => {
dir = await mkdtemp(join(tmpdir(), 'mb-scope-'));
});
afterEach(async () => {
await rm(dir, { recursive: true, force: true });
});
async function writeSyncConfig(cfg: unknown): Promise<void> {
await writeFile(join(dir, 'sync-config.json'), JSON.stringify(cfg), 'utf-8');
}
const BASE = {
metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678',
metabaseDatabaseId: 42,
defaultTagNames: [],
mapping: {
metabaseDatabaseId: 42,
metabaseDatabaseName: 'Analytics',
metabaseEngine: 'postgres',
targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789',
},
};
it('returns a fingerprint + predicate for ONLY-scope staged dir', async () => {
await writeSyncConfig({
...BASE,
syncMode: 'ONLY',
selections: [{ selectionType: 'item', metabaseObjectId: 5 }],
});
const scope = await adapter.describeScope(dir);
expect(scope.fingerprint).toMatch(/^[0-9a-f]{64}$/);
expect(scope.isPathInScope('cards/5.json')).toBe(true);
expect(scope.isPathInScope('cards/99.json')).toBe(false);
expect(scope.isPathInScope('sync-config.json')).toBe(true);
});
it('fingerprint is stable across invocations', async () => {
await writeSyncConfig({
...BASE,
syncMode: 'ONLY',
selections: [
{ selectionType: 'item', metabaseObjectId: 1 },
{ selectionType: 'item', metabaseObjectId: 2 },
],
});
const a = await adapter.describeScope(dir);
const b = await adapter.describeScope(dir);
expect(a.fingerprint).toBe(b.fingerprint);
});
it('different syncMode produces different fingerprint', async () => {
await writeSyncConfig({ ...BASE, syncMode: 'ALL', selections: [] });
const all = await adapter.describeScope(dir);
await writeSyncConfig({
...BASE,
syncMode: 'ONLY',
selections: [{ selectionType: 'item', metabaseObjectId: 1 }],
});
const only = await adapter.describeScope(dir);
expect(all.fingerprint).not.toBe(only.fingerprint);
});
});

View file

@ -1,222 +0,0 @@
import { describe, expect, it } from 'vitest';
import { extractReferencedCardIds, serializeCard } from './serialize-card.js';
describe('extractReferencedCardIds', () => {
it('pulls ids out of template tags with type=card', () => {
const tags = [
{ name: 'orders', type: 'card', cardReference: 42 },
{ name: 'param', type: 'text' },
];
expect(extractReferencedCardIds(tags, '')).toEqual([42]);
});
it('finds `{{#N}}` references in the SQL body even when the tag list lacks cardReference', () => {
const tags = [{ name: 'orders_ref', type: 'card' }];
const sql = 'SELECT * FROM ({{#42}}) UNION ALL (SELECT * FROM {{#101}})';
expect(extractReferencedCardIds(tags, sql).sort((a, b) => a - b)).toEqual([42, 101]);
});
it('dedupes card ids across tags and SQL body', () => {
const tags = [{ name: 'a', type: 'card', cardReference: 42 }];
const sql = 'SELECT * FROM {{#42}}';
expect(extractReferencedCardIds(tags, sql)).toEqual([42]);
});
it('returns [] when no references exist', () => {
expect(extractReferencedCardIds([], 'SELECT 1')).toEqual([]);
});
});
describe('serializeCard', () => {
const baseCard = {
id: 7,
name: 'Daily orders',
description: 'Orders by day',
type: 'model',
database_id: 42,
collection_id: 5,
archived: false,
result_metadata: [
{
name: 'order_count',
display_name: 'Count',
base_type: 'type/Integer',
semantic_type: null,
description: null,
fk_target_field_id: null,
},
],
} as const;
it('returns a valid StagedCardFile with resolved SQL and template tags', () => {
const staged = serializeCard({
card: baseCard as any,
resolvedSql: 'SELECT COUNT(*) AS order_count FROM orders',
templateTags: [],
collectionPath: ['Data', 'Orders'],
resolutionStatus: 'resolved',
});
expect(staged.metabaseId).toBe(7);
expect(staged.name).toBe('Daily orders');
expect(staged.collectionPath).toEqual(['Data', 'Orders']);
expect(staged.resolvedSql).toBe('SELECT COUNT(*) AS order_count FROM orders');
expect(staged.referencedCardIds).toEqual([]);
expect(staged.resultMetadata).toHaveLength(1);
expect(staged.resultMetadata[0].name).toBe('order_count');
});
it('persists resolutionStatus="resolved" when caller passes it', () => {
const staged = serializeCard({
card: baseCard as any,
resolvedSql: 'SELECT 1',
templateTags: [],
collectionPath: [],
resolutionStatus: 'resolved',
});
expect(staged.resolutionStatus).toBe('resolved');
});
it('persists resolutionStatus="fallback" when caller passes it', () => {
const staged = serializeCard({
card: baseCard as any,
resolvedSql: 'SELECT * FROM {{#101}}',
templateTags: [{ name: 'ref', type: 'card', cardReference: 101 }],
collectionPath: [],
resolutionStatus: 'fallback',
});
expect(staged.resolutionStatus).toBe('fallback');
});
it('extracts referencedCardIds from template tags + SQL body', () => {
const staged = serializeCard({
card: baseCard as any,
resolvedSql: 'SELECT * FROM {{#101}}',
templateTags: [{ name: 'ref', type: 'card', cardReference: 101 }],
collectionPath: [],
resolutionStatus: 'resolved',
});
expect(staged.referencedCardIds).toEqual([101]);
});
it('null description passes through as null, not empty string', () => {
const staged = serializeCard({
card: { ...baseCard, description: null } as any,
resolvedSql: '',
templateTags: [],
collectionPath: [],
resolutionStatus: 'resolved',
});
expect(staged.description).toBeNull();
});
it('collectionId=`root` stays as the string literal "root"', () => {
const staged = serializeCard({
card: { ...baseCard, collection_id: 'root' } as any,
resolvedSql: '',
templateTags: [],
collectionPath: [],
resolutionStatus: 'resolved',
});
expect(staged.collectionId).toBe('root');
});
it('persists parameters[] from the input card', () => {
const out = serializeCard({
card: {
id: 1,
name: 'X',
description: null,
type: 'question',
database_id: 6,
collection_id: null,
archived: false,
result_metadata: [],
parameters: [
{ id: 'p1', name: 'auction_end', type: 'date/range', slug: 'auction_end', default: null, sectionId: 'date' },
{ id: 'p2', name: 'status', type: 'category', slug: 'status', default: 'active', sectionId: 'string' },
],
} as any,
resolvedSql: 'SELECT 1',
templateTags: [],
collectionPath: [],
resolutionStatus: 'resolved',
});
expect(out.parameters).toHaveLength(2);
expect(out.parameters?.[0]).toMatchObject({ id: 'p1', name: 'auction_end', type: 'date/range' });
});
it('persists field_ref on each result-metadata column', () => {
const out = serializeCard({
card: {
id: 1,
name: 'X',
description: null,
type: 'question',
database_id: 6,
collection_id: null,
archived: false,
result_metadata: [
{
name: 'customer_id',
base_type: 'type/Integer',
semantic_type: 'type/FK',
fk_target_field_id: 42,
field_ref: ['field', 99, null],
},
],
} as any,
resolvedSql: 'SELECT customer_id FROM x',
templateTags: [],
collectionPath: [],
resolutionStatus: 'resolved',
});
expect(out.resultMetadata[0].field_ref).toEqual(['field', 99, null]);
});
it('persists lastRunAt and dashboardCount when present on the card', () => {
const out = serializeCard({
card: {
id: 1,
name: 'X',
description: null,
type: 'question',
database_id: 6,
collection_id: null,
archived: false,
result_metadata: [],
last_run_at: '2026-04-27T10:00:00Z',
dashboard_count: 3,
} as any,
resolvedSql: 'SELECT 1',
templateTags: [],
collectionPath: [],
resolutionStatus: 'resolved',
});
expect(out.lastRunAt).toBe('2026-04-27T10:00:00Z');
expect(out.dashboardCount).toBe(3);
});
it('omits the new fields gracefully when the card lacks them', () => {
const out = serializeCard({
card: {
id: 1,
name: 'X',
description: null,
type: 'question',
database_id: 6,
collection_id: null,
archived: false,
result_metadata: [],
} as any,
resolvedSql: 'SELECT 1',
templateTags: [],
collectionPath: [],
resolutionStatus: 'resolved',
});
expect(out.parameters).toEqual([]);
expect(out.lastRunAt).toBeNull();
expect(out.dashboardCount).toBeNull();
});
});

View file

@ -1,87 +0,0 @@
import { describe, expect, it } from 'vitest';
import {
metabasePullConfigSchema,
parseMetabasePullConfig,
stagedCardFileSchema,
stagedSyncConfigSchema,
} from './types.js';
describe('metabase adapter types', () => {
it('parses a valid MetabasePullConfig', () => {
const parsed = parseMetabasePullConfig({
metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678',
metabaseDatabaseId: 42,
});
expect(parsed.metabaseConnectionId).toBe('a1b2c3d4-e5f6-4789-9abc-def012345678');
expect(parsed.metabaseDatabaseId).toBe(42);
});
it('parses local-safe Metabase connection IDs for standalone projects', () => {
const parsed = parseMetabasePullConfig({ metabaseConnectionId: 'prod-metabase', metabaseDatabaseId: 42 });
expect(parsed.metabaseConnectionId).toBe('prod-metabase');
});
it('rejects unsafe metabaseConnectionId values', () => {
expect(() => parseMetabasePullConfig({ metabaseConnectionId: '../prod', metabaseDatabaseId: 42 })).toThrow();
});
it('rejects missing metabaseDatabaseId', () => {
const parsed = metabasePullConfigSchema.safeParse({ metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678' });
expect(parsed.success).toBe(false);
});
it('stagedCardFileSchema accepts a minimal card', () => {
const parsed = stagedCardFileSchema.parse({
metabaseId: 1,
name: 'Orders',
description: null,
type: 'model',
databaseId: 42,
collectionId: 5,
archived: false,
resolvedSql: 'SELECT * FROM orders',
templateTags: [],
resultMetadata: [],
collectionPath: ['Data', 'Orders'],
referencedCardIds: [],
resolutionStatus: 'resolved',
});
expect(parsed.metabaseId).toBe(1);
expect(parsed.collectionPath).toEqual(['Data', 'Orders']);
});
it('stagedSyncConfigSchema accepts selections + mappings snapshot', () => {
const parsed = stagedSyncConfigSchema.parse({
metabaseConnectionId: 'a1b2c3d4-e5f6-4789-9abc-def012345678',
metabaseDatabaseId: 42,
syncMode: 'ALL',
selections: [],
defaultTagNames: [],
mapping: {
metabaseDatabaseId: 42,
metabaseDatabaseName: 'Analytics',
metabaseEngine: 'postgres',
targetConnectionId: 'b2c3d4e5-f6a7-4890-abcd-ef0123456789',
},
});
expect(parsed.syncMode).toBe('ALL');
});
it('stagedSyncConfigSchema accepts local-safe connection IDs', () => {
const parsed = stagedSyncConfigSchema.parse({
metabaseConnectionId: 'prod-metabase',
metabaseDatabaseId: 42,
syncMode: 'ALL',
selections: [],
defaultTagNames: [],
mapping: {
metabaseDatabaseId: 42,
metabaseDatabaseName: 'Analytics',
metabaseEngine: 'postgres',
targetConnectionId: 'warehouse_a',
},
});
expect(parsed.metabaseConnectionId).toBe('prod-metabase');
expect(parsed.mapping.targetConnectionId).toBe('warehouse_a');
});
});

View file

@ -1,124 +0,0 @@
import { join, resolve } from 'node:path';
import { describe, expect, it } from 'vitest';
import { chunkMetricFlowProject } from './chunk.js';
import { parseMetricFlowStagedDir } from './parse.js';
const FIXTURES = resolve(__dirname, '../../../../test/fixtures/metricflow');
const SINGLE = join(FIXTURES, 'single-model');
const EXTENDS_CHAIN = join(FIXTURES, 'extends-chain');
const MULTI = join(FIXTURES, 'multi-component');
const DBT_MIXED = join(FIXTURES, 'dbt-mixed');
describe('chunkMetricFlowProject — first run', () => {
it('single-model fixture emits one WU with the orders model + its metric file (collapsed via metric refs)', async () => {
const project = await parseMetricFlowStagedDir(SINGLE);
const result = chunkMetricFlowProject(project);
expect(result.workUnits).toHaveLength(1);
const wu = result.workUnits[0];
expect(wu.unitKey).toBe('metricflow-orders');
expect(wu.rawFiles).toEqual(['models/orders.yml']);
expect(wu.dependencyPaths).toEqual([]);
expect(wu.peerFileIndex).toEqual([]);
});
it('extends-chain fixture collapses orders + orders_ext + metrics/orders_final into ONE WU', async () => {
const project = await parseMetricFlowStagedDir(EXTENDS_CHAIN);
const result = chunkMetricFlowProject(project);
expect(result.workUnits).toHaveLength(1);
const wu = result.workUnits[0];
expect(wu.unitKey).toBe('metricflow-orders');
expect(wu.rawFiles.sort()).toEqual(['metrics/orders_final.yml', 'models/orders.yml', 'models/orders_ext.yml']);
expect(wu.notes).toContain('orders');
expect(wu.notes).toContain('orders_ext');
expect(wu.notes).toContain('revenue');
});
it('multi-component fixture emits two disjoint WUs ordered by leadName', async () => {
const project = await parseMetricFlowStagedDir(MULTI);
const result = chunkMetricFlowProject(project);
expect(result.workUnits).toHaveLength(2);
expect(result.workUnits.map((wu) => wu.unitKey)).toEqual(['metricflow-campaigns', 'metricflow-orders']);
expect(result.workUnits[0].rawFiles).toEqual(['models/marketing/campaigns.yml']);
expect(result.workUnits[0].peerFileIndex).toEqual(['models/sales/orders.yml']);
expect(result.workUnits[1].rawFiles).toEqual(['models/sales/orders.yml']);
expect(result.workUnits[1].peerFileIndex).toEqual(['models/marketing/campaigns.yml']);
});
it('dbt-mixed fixture: non-MetricFlow YAML (dbt_project.yml) lands in peerFileIndex, not in any WU', async () => {
const project = await parseMetricFlowStagedDir(DBT_MIXED);
const result = chunkMetricFlowProject(project);
expect(result.workUnits).toHaveLength(1);
expect(result.workUnits[0].rawFiles).toEqual(['models/orders.yml']);
expect(result.workUnits[0].peerFileIndex).toEqual(['dbt_project.yml']);
});
it('chunk is deterministic: two identical invocations return structurally-equal WUs', async () => {
const p1 = await parseMetricFlowStagedDir(EXTENDS_CHAIN);
const p2 = await parseMetricFlowStagedDir(EXTENDS_CHAIN);
const r1 = chunkMetricFlowProject(p1);
const r2 = chunkMetricFlowProject(p2);
expect(JSON.stringify(r1)).toBe(JSON.stringify(r2));
});
it('DiffSet re-sync: only WUs with a touched rawFile are kept', async () => {
const project = await parseMetricFlowStagedDir(MULTI);
const result = chunkMetricFlowProject(project, {
diffSet: {
added: [],
modified: ['models/sales/orders.yml'],
deleted: [],
unchanged: ['models/marketing/campaigns.yml'],
},
});
expect(result.workUnits).toHaveLength(1);
expect(result.workUnits[0].unitKey).toBe('metricflow-orders');
expect(result.workUnits[0].rawFiles).toEqual(['models/sales/orders.yml']);
expect(result.workUnits[0].dependencyPaths).toEqual([]); // no unchanged sibling in this component
});
it('DiffSet re-sync: unchanged component siblings move from rawFiles into dependencyPaths', async () => {
const project = await parseMetricFlowStagedDir(EXTENDS_CHAIN);
const result = chunkMetricFlowProject(project, {
diffSet: {
added: [],
modified: ['models/orders_ext.yml'], // only the extension file changed
deleted: [],
unchanged: ['models/orders.yml', 'metrics/orders_final.yml'],
},
});
expect(result.workUnits).toHaveLength(1);
const wu = result.workUnits[0];
expect(wu.rawFiles).toEqual(['models/orders_ext.yml']);
expect(wu.dependencyPaths.sort()).toEqual(['metrics/orders_final.yml', 'models/orders.yml']);
});
it('DiffSet re-sync: all-unchanged yields zero WUs', async () => {
const project = await parseMetricFlowStagedDir(EXTENDS_CHAIN);
const result = chunkMetricFlowProject(project, {
diffSet: {
added: [],
modified: [],
deleted: [],
unchanged: ['models/orders.yml', 'models/orders_ext.yml', 'metrics/orders_final.yml'],
},
});
expect(result.workUnits).toEqual([]);
expect(result.eviction).toBeUndefined();
});
it('DiffSet re-sync: deleted files produce an EvictionUnit', async () => {
const project = await parseMetricFlowStagedDir(MULTI);
const result = chunkMetricFlowProject(project, {
diffSet: {
added: [],
modified: [],
deleted: ['models/marketing/campaigns.yml'],
unchanged: ['models/sales/orders.yml'],
},
});
expect(result.workUnits).toEqual([]);
expect(result.eviction).toEqual({
deletedRawPaths: ['models/marketing/campaigns.yml'],
});
});
});

View file

@ -1,51 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { detectMetricFlowStagedDir } from './detect.js';
async function touch(stagedDir: string, relPath: string, body = ''): Promise<void> {
const abs = join(stagedDir, relPath);
await mkdir(join(abs, '..'), { recursive: true });
await writeFile(abs, body, 'utf-8');
}
describe('detectMetricFlowStagedDir', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'mf-detect-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('returns true when any YAML has top-level semantic_models:', async () => {
await touch(stagedDir, 'models/a.yml', 'semantic_models:\n - {name: a, model: x, measures: []}\n');
expect(await detectMetricFlowStagedDir(stagedDir)).toBe(true);
});
it('returns true when any YAML has top-level metrics:', async () => {
await touch(stagedDir, 'metrics/m.yaml', 'metrics:\n - {name: m, type: simple, type_params: {measure: x}}\n');
expect(await detectMetricFlowStagedDir(stagedDir)).toBe(true);
});
it('returns false for a directory with only dbt_project.yml', async () => {
await touch(stagedDir, 'dbt_project.yml', 'name: my_proj\nversion: "1.0.0"\n');
expect(await detectMetricFlowStagedDir(stagedDir)).toBe(false);
});
it('returns false for an empty directory', async () => {
expect(await detectMetricFlowStagedDir(stagedDir)).toBe(false);
});
it('returns false for only broken YAML', async () => {
await touch(stagedDir, 'broken.yml', '{ not: valid :::');
expect(await detectMetricFlowStagedDir(stagedDir)).toBe(false);
});
it('ignores non-YAML files and returns false when no YAML qualifies', async () => {
await touch(stagedDir, 'readme.md', '# readme');
await touch(stagedDir, 'script.py', 'print("hi")');
expect(await detectMetricFlowStagedDir(stagedDir)).toBe(false);
});
});

View file

@ -1,110 +0,0 @@
import { access, mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { makeLocalGitRepo } from '../../../test/make-local-git-repo.js';
import { fetchMetricflowRepo } from './fetch.js';
async function exists(path: string): Promise<boolean> {
try {
await access(path);
return true;
} catch {
return false;
}
}
async function makeRepo(tmpRoot: string, files: Record<string, string>) {
const fixtureDir = join(tmpRoot, 'fixture-src');
for (const [path, content] of Object.entries(files)) {
const dest = join(fixtureDir, path);
await mkdir(join(dest, '..'), { recursive: true });
await writeFile(dest, content, 'utf-8');
}
return makeLocalGitRepo(fixtureDir, join(tmpRoot, 'origin'));
}
describe('fetchMetricflowRepo', () => {
let tmpRoot: string;
beforeEach(async () => {
tmpRoot = await mkdtemp(join(tmpdir(), 'metricflow-fetch-'));
});
afterEach(async () => {
await rm(tmpRoot, { recursive: true, force: true });
});
it('clones a dbt repo and stages only YAML files', async () => {
const repo = await makeRepo(tmpRoot, {
'dbt_project.yml': 'name: analytics\n',
'models/orders.yml': 'semantic_models:\n - name: orders\n model: ref("orders")\n',
'models/readme.md': '# not staged\n',
'macros/util.sql': 'select 1\n',
});
const result = await fetchMetricflowRepo({
config: {
repoUrl: repo.repoUrl,
branch: 'main',
path: null,
authToken: null,
parsedTargetTables: {},
},
cacheDir: join(tmpRoot, 'cache'),
stagedDir: join(tmpRoot, 'stage'),
});
expect(result.filesCopied).toBe(2);
expect(result.commitHash).toMatch(/^[0-9a-f]{40}$/);
await expect(readFile(join(tmpRoot, 'stage/dbt_project.yml'), 'utf-8')).resolves.toContain('analytics');
await expect(readFile(join(tmpRoot, 'stage/models/orders.yml'), 'utf-8')).resolves.toContain('semantic_models');
expect(await exists(join(tmpRoot, 'stage/models/readme.md'))).toBe(false);
expect(await exists(join(tmpRoot, 'stage/macros/util.sql'))).toBe(false);
});
it('honors a configured repo subdirectory', async () => {
const repo = await makeRepo(tmpRoot, {
'warehouse/dbt_project.yml': 'name: warehouse\n',
'warehouse/models/orders.yaml': 'semantic_models:\n - name: orders\n model: ref("orders")\n',
'outside/ignored.yml': 'semantic_models:\n - name: ignored\n model: ref("ignored")\n',
});
const result = await fetchMetricflowRepo({
config: {
repoUrl: repo.repoUrl,
branch: 'main',
path: 'warehouse',
authToken: null,
parsedTargetTables: {},
},
cacheDir: join(tmpRoot, 'cache'),
stagedDir: join(tmpRoot, 'stage'),
});
expect(result.filesCopied).toBe(2);
await expect(readFile(join(tmpRoot, 'stage/models/orders.yaml'), 'utf-8')).resolves.toContain('orders');
expect(await exists(join(tmpRoot, 'stage/outside/ignored.yml'))).toBe(false);
});
it('returns zero files when the configured subdirectory is absent', async () => {
const repo = await makeRepo(tmpRoot, {
'dbt_project.yml': 'name: analytics\n',
});
await mkdir(join(tmpRoot, 'stage'), { recursive: true });
const result = await fetchMetricflowRepo({
config: {
repoUrl: repo.repoUrl,
branch: 'main',
path: 'missing',
authToken: null,
parsedTargetTables: {},
},
cacheDir: join(tmpRoot, 'cache'),
stagedDir: join(tmpRoot, 'stage'),
});
expect(result.filesCopied).toBe(0);
});
});

View file

@ -1,268 +0,0 @@
import { describe, expect, it } from 'vitest';
import { buildMetricFlowGraph } from './graph.js';
import type { ParsedMetricFlowProject } from './parse.js';
function project(parts: Partial<ParsedMetricFlowProject>): ParsedMetricFlowProject {
return {
semanticModels: parts.semanticModels ?? [],
metrics: parts.metrics ?? [],
allPaths: parts.allPaths ?? [],
files: parts.files ?? [],
};
}
describe('buildMetricFlowGraph', () => {
it('puts each standalone semantic_model in its own component', () => {
const graph = buildMetricFlowGraph(
project({
semanticModels: [
{
path: 'models/a.yml',
name: 'a',
modelRef: 'a',
extendsFrom: [],
measureNames: ['m1'],
dimensionNames: [],
entityNames: [],
primaryEntities: [],
foreignEntities: [],
defaultTimeDimension: null,
},
{
path: 'models/b.yml',
name: 'b',
modelRef: 'b',
extendsFrom: [],
measureNames: ['m2'],
dimensionNames: [],
entityNames: [],
primaryEntities: [],
foreignEntities: [],
defaultTimeDimension: null,
},
],
allPaths: ['models/a.yml', 'models/b.yml'],
}),
);
expect(graph.components).toHaveLength(2);
const byPath = new Map(graph.components.flatMap((c) => c.paths.map((p) => [p, c.id])));
expect(byPath.get('models/a.yml')).not.toBe(byPath.get('models/b.yml'));
});
it('unions two files when one semantic_model extends another', () => {
const graph = buildMetricFlowGraph(
project({
semanticModels: [
{
path: 'models/orders.yml',
name: 'orders',
modelRef: 'orders',
extendsFrom: [],
measureNames: ['gross_amount'],
dimensionNames: [],
entityNames: [],
primaryEntities: [],
foreignEntities: [],
defaultTimeDimension: null,
},
{
path: 'models/orders_ext.yml',
name: 'orders_ext',
modelRef: 'orders_ext',
extendsFrom: ['orders'],
measureNames: ['refund_amount'],
dimensionNames: [],
entityNames: [],
primaryEntities: [],
foreignEntities: [],
defaultTimeDimension: null,
},
],
allPaths: ['models/orders.yml', 'models/orders_ext.yml'],
}),
);
expect(graph.components).toHaveLength(1);
expect(graph.components[0].paths.sort()).toEqual(['models/orders.yml', 'models/orders_ext.yml']);
});
it('unions a metric-only file with the semantic_model files whose measures it references', () => {
const graph = buildMetricFlowGraph(
project({
semanticModels: [
{
path: 'models/orders.yml',
name: 'orders',
modelRef: 'orders',
extendsFrom: [],
measureNames: ['gross_amount'],
dimensionNames: [],
entityNames: [],
primaryEntities: [],
foreignEntities: [],
defaultTimeDimension: null,
},
{
path: 'models/orders_ext.yml',
name: 'orders_ext',
modelRef: 'orders_ext',
extendsFrom: ['orders'],
measureNames: ['refund_amount'],
dimensionNames: [],
entityNames: [],
primaryEntities: [],
foreignEntities: [],
defaultTimeDimension: null,
},
],
metrics: [
{
path: 'metrics/orders_final.yml',
name: 'revenue',
type: 'derived',
measureRef: null,
dependsOn: ['gross_amount', 'refund_amount'],
},
],
allPaths: ['metrics/orders_final.yml', 'models/orders.yml', 'models/orders_ext.yml'],
}),
);
expect(graph.components).toHaveLength(1);
expect(graph.components[0].paths.sort()).toEqual([
'metrics/orders_final.yml',
'models/orders.yml',
'models/orders_ext.yml',
]);
});
it('leaves unrelated semantic_models in separate components (two disjoint groups)', () => {
const graph = buildMetricFlowGraph(
project({
semanticModels: [
{
path: 'models/sales/orders.yml',
name: 'orders',
modelRef: 'orders',
extendsFrom: [],
measureNames: ['order_count'],
dimensionNames: [],
entityNames: [],
primaryEntities: [],
foreignEntities: [],
defaultTimeDimension: null,
},
{
path: 'models/marketing/campaigns.yml',
name: 'campaigns',
modelRef: 'campaigns',
extendsFrom: [],
measureNames: ['spend'],
dimensionNames: [],
entityNames: [],
primaryEntities: [],
foreignEntities: [],
defaultTimeDimension: null,
},
],
allPaths: ['models/marketing/campaigns.yml', 'models/sales/orders.yml'],
}),
);
expect(graph.components).toHaveLength(2);
});
it('returns components ordered lexicographically by their first-name-member', () => {
const graph = buildMetricFlowGraph(
project({
semanticModels: [
{
path: 'models/z.yml',
name: 'z_model',
modelRef: 'z',
extendsFrom: [],
measureNames: ['m'],
dimensionNames: [],
entityNames: [],
primaryEntities: [],
foreignEntities: [],
defaultTimeDimension: null,
},
{
path: 'models/a.yml',
name: 'a_model',
modelRef: 'a',
extendsFrom: [],
measureNames: ['m'],
dimensionNames: [],
entityNames: [],
primaryEntities: [],
foreignEntities: [],
defaultTimeDimension: null,
},
],
allPaths: ['models/a.yml', 'models/z.yml'],
}),
);
expect(graph.components.map((c) => c.leadName)).toEqual(['a_model', 'z_model']);
});
it('metric that references an unknown measure still anchors its own file as a singleton', () => {
const graph = buildMetricFlowGraph(
project({
metrics: [
{ path: 'metrics/dangling.yml', name: 'dangling', type: 'simple', measureRef: 'nowhere', dependsOn: [] },
],
allPaths: ['metrics/dangling.yml'],
}),
);
expect(graph.components).toHaveLength(1);
expect(graph.components[0].paths).toEqual(['metrics/dangling.yml']);
expect(graph.components[0].leadName).toBe('dangling');
});
it('transitive extends forms one component across 3 files', () => {
const graph = buildMetricFlowGraph(
project({
semanticModels: [
{
path: 'a.yml',
name: 'a',
modelRef: 'a',
extendsFrom: [],
measureNames: [],
dimensionNames: [],
entityNames: [],
primaryEntities: [],
foreignEntities: [],
defaultTimeDimension: null,
},
{
path: 'b.yml',
name: 'b',
modelRef: 'b',
extendsFrom: ['a'],
measureNames: [],
dimensionNames: [],
entityNames: [],
primaryEntities: [],
foreignEntities: [],
defaultTimeDimension: null,
},
{
path: 'c.yml',
name: 'c',
modelRef: 'c',
extendsFrom: ['b'],
measureNames: [],
dimensionNames: [],
entityNames: [],
primaryEntities: [],
foreignEntities: [],
defaultTimeDimension: null,
},
],
allPaths: ['a.yml', 'b.yml', 'c.yml'],
}),
);
expect(graph.components).toHaveLength(1);
expect(graph.components[0].paths.sort()).toEqual(['a.yml', 'b.yml', 'c.yml']);
});
});

View file

@ -1,382 +0,0 @@
import { describe, expect, it, vi } from 'vitest';
import type { MetricFlowParseResult } from './deep-parse.js';
import { importMetricflowSemanticModels } from './import-semantic-models.js';
const DBT_SYSTEM_EMAIL = ['system@kae', 'lio.dev'].join('');
function parseResult(): MetricFlowParseResult {
return {
semanticModels: [
{
name: 'orders',
description: 'Orders',
modelRef: 'orders',
dimensions: [{ name: 'status', column: 'status', type: 'string', label: 'Status' }],
measures: [{ type: 'simple', name: 'order_count', column: 'id', aggregation: 'count' }],
entities: [{ name: 'customer', type: 'foreign', expr: 'customer_id' }],
defaultTimeDimension: null,
},
],
crossModelMetrics: [
{
name: 'global_revenue',
label: null,
description: 'Revenue everywhere',
type: 'derived',
expr: 'sum(revenue)',
dependsOn: [{ metricName: 'orders' }],
filter: null,
},
],
relationships: [{ fromTable: 'orders', fromColumn: 'customer_id', toTable: 'customers', toColumn: 'id' }],
warnings: ['parser warning'],
};
}
describe('importMetricflowSemanticModels', () => {
it('writes semantic models through a worktree-scoped semantic layer service and returns touched sources', async () => {
const scoped = {
getManifestEntry: vi.fn().mockResolvedValue(null),
isManifestBacked: vi.fn().mockResolvedValue(false),
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
loadSource: vi.fn().mockResolvedValue(null),
writeSource: vi.fn().mockResolvedValue({ warnings: [] }),
};
const semanticLayerService = {
forWorktree: vi.fn().mockReturnValue(scoped),
getManifestEntry: vi.fn(),
isManifestBacked: vi.fn(),
loadAllSources: vi.fn(),
loadSource: vi.fn(),
writeSource: vi.fn(),
};
const result = await importMetricflowSemanticModels(
{ semanticLayerService },
{
connectionId: 'warehouse-1',
parseResult: parseResult(),
targetSchema: null,
hostTables: [
{ id: 'orders-table', name: 'orders', catalog: null, db: null, columns: [{ id: 'c1', name: 'customer_id' }] },
{ id: 'customers-table', name: 'customers', catalog: null, db: null, columns: [{ id: 'c2', name: 'id' }] },
],
workdir: '/tmp/session-worktree',
},
);
expect(semanticLayerService.forWorktree).toHaveBeenCalledWith('/tmp/session-worktree');
expect(scoped.writeSource).toHaveBeenCalledTimes(2);
expect(scoped.writeSource).toHaveBeenNthCalledWith(
1,
'warehouse-1',
expect.objectContaining({ name: 'orders' }),
'dbt MetricFlow',
DBT_SYSTEM_EMAIL,
'dbt MetricFlow sync: create source orders',
{ skipValidation: true },
);
expect(scoped.writeSource).toHaveBeenNthCalledWith(
2,
'warehouse-1',
expect.objectContaining({ name: 'global-revenue' }),
'dbt MetricFlow',
DBT_SYSTEM_EMAIL,
'dbt MetricFlow sync: create cross-model source global-revenue',
{ skipValidation: true },
);
expect(result).toEqual({
sourcesCreated: 1,
sourcesUpdated: 0,
sourcesSkipped: 0,
crossModelSourcesCreated: 1,
relationshipsImported: 0,
warnings: ['parser warning'],
errors: [],
touchedSources: [
{ connectionId: 'warehouse-1', sourceName: 'global-revenue' },
{ connectionId: 'warehouse-1', sourceName: 'orders' },
],
});
});
it('updates count when an existing semantic model source exists', async () => {
const scoped = {
getManifestEntry: vi.fn().mockResolvedValue(null),
isManifestBacked: vi.fn().mockResolvedValue(false),
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
loadSource: vi.fn().mockImplementation((connectionId: string, sourceName: string) =>
Promise.resolve(sourceName === 'orders' ? { name: 'orders' } : null),
),
writeSource: vi.fn().mockResolvedValue({ warnings: [] }),
};
const semanticLayerService = {
forWorktree: vi.fn().mockReturnValue(scoped),
getManifestEntry: vi.fn(),
isManifestBacked: vi.fn(),
loadAllSources: vi.fn(),
loadSource: vi.fn(),
writeSource: vi.fn(),
};
const result = await importMetricflowSemanticModels(
{ semanticLayerService },
{
connectionId: 'warehouse-1',
parseResult: { ...parseResult(), crossModelMetrics: [], relationships: [] },
targetSchema: null,
hostTables: [],
workdir: '/tmp/session-worktree',
},
);
expect(result.sourcesCreated).toBe(0);
expect(result.sourcesUpdated).toBe(1);
expect(result.crossModelSourcesCreated).toBe(0);
});
it('keeps domain write failures structured and continues processing', async () => {
const scoped = {
getManifestEntry: vi.fn().mockResolvedValue(null),
isManifestBacked: vi.fn().mockResolvedValue(false),
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
loadSource: vi.fn().mockResolvedValue(null),
writeSource: vi.fn().mockRejectedValueOnce(new Error('cannot write orders')).mockResolvedValue({ warnings: [] }),
};
const semanticLayerService = {
forWorktree: vi.fn().mockReturnValue(scoped),
getManifestEntry: vi.fn(),
isManifestBacked: vi.fn(),
loadAllSources: vi.fn(),
loadSource: vi.fn(),
writeSource: vi.fn(),
};
const result = await importMetricflowSemanticModels(
{ semanticLayerService },
{
connectionId: 'warehouse-1',
parseResult: parseResult(),
targetSchema: null,
hostTables: [],
workdir: '/tmp/session-worktree',
},
);
expect(result.sourcesSkipped).toBe(1);
expect(result.crossModelSourcesCreated).toBe(1);
expect(result.errors).toEqual(["Failed to import semantic model 'orders': cannot write orders"]);
expect(result.touchedSources).toEqual([{ connectionId: 'warehouse-1', sourceName: 'global-revenue' }]);
});
it('writes manifest-backed semantic models as overlays', async () => {
const manifestOrders = {
name: 'orders',
table: 'analytics.orders',
grain: ['id'],
columns: [
{ name: 'id', type: 'string' },
{ name: 'customer_id', type: 'string' },
],
joins: [],
measures: [],
descriptions: { db: 'Orders table from scan' },
};
const written: Array<{ name: string; table?: string; columns?: unknown[]; joins?: unknown[] }> = [];
const scoped = {
getManifestEntry: vi.fn().mockImplementation(async (_connectionId: string, sourceName: string) => {
return sourceName === 'orders' ? manifestOrders : null;
}),
isManifestBacked: vi.fn().mockImplementation(async (_connectionId: string, sourceName: string) => {
return sourceName === 'orders';
}),
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
loadSource: vi.fn().mockResolvedValue(null),
writeSource: vi.fn().mockImplementation(async (_connectionId: string, source: (typeof written)[number]) => {
written.push(source);
return { warnings: [] };
}),
};
const semanticLayerService = {
forWorktree: vi.fn().mockReturnValue(scoped),
getManifestEntry: vi.fn(),
isManifestBacked: vi.fn(),
loadAllSources: vi.fn(),
loadSource: vi.fn(),
writeSource: vi.fn(),
};
const result = await importMetricflowSemanticModels(
{ semanticLayerService },
{
connectionId: 'warehouse-1',
parseResult: {
...parseResult(),
semanticModels: [
parseResult().semanticModels[0],
{
name: 'customers',
description: null,
modelRef: 'customers',
dimensions: [{ name: 'id', column: 'id', type: 'string' }],
measures: [],
entities: [],
defaultTimeDimension: null,
},
],
crossModelMetrics: [],
},
targetSchema: null,
hostTables: [
{
id: 'orders-table',
name: 'orders',
catalog: null,
db: null,
columns: [
{ id: 'c1', name: 'customer_id' },
{ id: 'c2', name: 'id' },
],
},
{ id: 'customers-table', name: 'customers', catalog: null, db: null, columns: [{ id: 'c3', name: 'id' }] },
],
workdir: '/tmp/session-worktree',
},
);
expect(written[0]).toMatchObject({
name: 'orders',
joins: [{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }],
descriptions: { dbt: 'Orders' },
});
expect(written[0]).not.toHaveProperty('table');
expect(written[0]).not.toHaveProperty('columns');
expect(result.sourcesUpdated).toBe(1);
expect(result.relationshipsImported).toBe(1);
});
it('drops joins whose keys are absent from manifest-backed source columns', async () => {
const scoped = {
getManifestEntry: vi.fn().mockResolvedValue({
name: 'orders',
table: 'analytics.orders',
grain: ['id'],
columns: [{ name: 'id', type: 'string' }],
joins: [],
measures: [],
}),
isManifestBacked: vi.fn().mockImplementation(async (_connectionId: string, sourceName: string) => {
return sourceName === 'orders';
}),
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
loadSource: vi.fn().mockResolvedValue(null),
writeSource: vi.fn().mockResolvedValue({ warnings: [] }),
};
const semanticLayerService = {
forWorktree: vi.fn().mockReturnValue(scoped),
getManifestEntry: vi.fn(),
isManifestBacked: vi.fn(),
loadAllSources: vi.fn(),
loadSource: vi.fn(),
writeSource: vi.fn(),
};
const result = await importMetricflowSemanticModels(
{ semanticLayerService },
{
connectionId: 'warehouse-1',
parseResult: { ...parseResult(), crossModelMetrics: [] },
targetSchema: null,
hostTables: [
{ id: 'orders-table', name: 'orders', catalog: null, db: null, columns: [{ id: 'c1', name: 'id' }] },
{ id: 'customers-table', name: 'customers', catalog: null, db: null, columns: [{ id: 'c2', name: 'id' }] },
],
workdir: '/tmp/session-worktree',
},
);
expect(scoped.writeSource).toHaveBeenCalledWith(
'warehouse-1',
expect.not.objectContaining({ joins: expect.anything() }),
expect.any(String),
expect.any(String),
expect.any(String),
{ skipValidation: true },
);
expect(result.relationshipsImported).toBe(0);
});
it('repairs earlier sources when a later related model fails to write', async () => {
const written: Array<{ name: string; joins?: unknown[] }> = [];
const scoped = {
getManifestEntry: vi.fn().mockResolvedValue(null),
isManifestBacked: vi.fn().mockResolvedValue(false),
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
loadSource: vi.fn().mockResolvedValue(null),
writeSource: vi
.fn()
.mockImplementationOnce(async (_connectionId: string, source: (typeof written)[number]) => {
written.push(source);
return { warnings: [] };
})
.mockRejectedValueOnce(new Error('disk full'))
.mockImplementation(async (_connectionId: string, source: (typeof written)[number]) => {
written.push(source);
return { warnings: [] };
}),
};
const semanticLayerService = {
forWorktree: vi.fn().mockReturnValue(scoped),
getManifestEntry: vi.fn(),
isManifestBacked: vi.fn(),
loadAllSources: vi.fn(),
loadSource: vi.fn(),
writeSource: vi.fn(),
};
const result = await importMetricflowSemanticModels(
{ semanticLayerService },
{
connectionId: 'warehouse-1',
parseResult: {
...parseResult(),
semanticModels: [
parseResult().semanticModels[0],
{
name: 'customers',
description: null,
modelRef: 'customers',
dimensions: [{ name: 'id', column: 'id', type: 'string' }],
measures: [],
entities: [],
defaultTimeDimension: null,
},
],
crossModelMetrics: [],
},
targetSchema: null,
hostTables: [
{
id: 'orders-table',
name: 'orders',
catalog: null,
db: null,
columns: [
{ id: 'c1', name: 'customer_id' },
{ id: 'c2', name: 'id' },
],
},
{ id: 'customers-table', name: 'customers', catalog: null, db: null, columns: [{ id: 'c3', name: 'id' }] },
],
workdir: '/tmp/session-worktree',
},
);
expect(result.sourcesCreated).toBe(1);
expect(result.sourcesSkipped).toBe(1);
expect(result.relationshipsImported).toBe(0);
expect(result.errors).toContain("Failed to import semantic model 'customers': disk full");
expect(written.filter((source) => source.name === 'orders')).toHaveLength(2);
expect(written[written.length - 1]).toMatchObject({ name: 'orders', joins: [] });
});
});

View file

@ -1,266 +0,0 @@
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { makeLocalGitRepo } from '../../../test/make-local-git-repo.js';
import type { SourceAdapter } from '../../types.js';
import type { MetricFlowParseResult } from './deep-parse.js';
import { MetricflowSourceAdapter } from './metricflow.adapter.js';
import { readMetricflowProjectionConfig, writeMetricflowProjectionConfig } from './projection-config.js';
function compileOnlyRequiredDepsCheck(): void {
// @ts-expect-error MetricflowSourceAdapter requires an explicit cache home.
new MetricflowSourceAdapter();
}
void compileOnlyRequiredDepsCheck;
async function makeRepo(tmpRoot: string, files: Record<string, string>) {
const fixtureDir = join(tmpRoot, 'fixture-src');
for (const [path, content] of Object.entries(files)) {
const dest = join(fixtureDir, path);
await mkdir(join(dest, '..'), { recursive: true });
await writeFile(dest, content, 'utf-8');
}
return makeLocalGitRepo(fixtureDir, join(tmpRoot, 'origin'));
}
function metricflowParseResult(): MetricFlowParseResult {
return {
semanticModels: [
{
name: 'orders',
description: 'Orders',
modelRef: 'orders',
dimensions: [{ name: 'status', column: 'status', type: 'string', label: 'Status' }],
measures: [{ type: 'simple', name: 'order_count', column: 'id', aggregation: 'count' }],
entities: [{ name: 'customer', type: 'foreign', expr: 'customer_id' }],
defaultTimeDimension: null,
},
],
crossModelMetrics: [],
relationships: [],
warnings: ['parser warning'],
};
}
describe('MetricflowSourceAdapter', () => {
let tmpRoot: string;
let stagedDir: string;
let adapter: SourceAdapter;
beforeEach(async () => {
tmpRoot = await mkdtemp(join(tmpdir(), 'mf-adapter-'));
stagedDir = join(tmpRoot, 'stage');
adapter = new MetricflowSourceAdapter({ homeDir: join(tmpRoot, 'cache-home') });
});
afterEach(async () => {
await rm(tmpRoot, { recursive: true, force: true });
});
it('declares the expected source key and skill list', () => {
expect(adapter.source).toBe('metricflow');
expect(adapter.skillNames).toEqual(['metricflow_ingest']);
});
it('returns configured target warehouse connection ids', async () => {
const metricflow = new MetricflowSourceAdapter({
homeDir: join(tmpRoot, 'cache-home'),
targetConnectionIds: ['warehouse', 'analytics', 'warehouse'],
});
await expect(metricflow.listTargetConnectionIds?.(stagedDir)).resolves.toEqual(['analytics', 'warehouse']);
});
it('detects a staged dir with a semantic_models YAML', async () => {
await mkdir(join(stagedDir, 'models'), { recursive: true });
await writeFile(
join(stagedDir, 'models/orders.yml'),
'semantic_models:\n - {name: orders, model: x, measures: [{name: c, agg: count, expr: id}]}\n',
'utf-8',
);
expect(await adapter.detect(stagedDir)).toBe(true);
});
it('rejects a staged dir with no MetricFlow-shaped YAML', async () => {
await mkdir(stagedDir, { recursive: true });
await writeFile(join(stagedDir, 'dbt_project.yml'), 'name: proj\n', 'utf-8');
expect(await adapter.detect(stagedDir)).toBe(false);
});
it('chunk: first-run on a minimal single-model dir emits one WU', async () => {
await mkdir(join(stagedDir, 'models'), { recursive: true });
await writeFile(
join(stagedDir, 'models/orders.yml'),
'semantic_models:\n - {name: orders, model: x, measures: [{name: c, agg: count, expr: id}]}\n',
'utf-8',
);
const result = await adapter.chunk(stagedDir);
expect(result.workUnits).toHaveLength(1);
expect(result.workUnits[0].unitKey).toBe('metricflow-orders');
});
it('attaches deep parse artifacts to the chunk result', async () => {
await mkdir(stagedDir, { recursive: true });
await writeFile(
join(stagedDir, 'semantic_models.yml'),
[
'semantic_models:',
' - name: orders',
" model: ref('orders')",
' dimensions: []',
' measures:',
' - name: order_count',
' agg: count',
" expr: '1'",
].join('\n'),
);
const chunk = await adapter.chunk(stagedDir);
expect(chunk.parseArtifacts).toMatchObject({
semanticModels: [{ name: 'orders', modelRef: 'orders' }],
crossModelMetrics: [],
relationships: [],
warnings: [],
});
});
it('fetches repo YAML files into the staged directory using a per-connection cache', async () => {
const repo = await makeRepo(tmpRoot, {
'dbt_project.yml': 'name: analytics\n',
'models/orders.yml': 'semantic_models:\n - name: orders\n model: ref("orders")\n',
'models/readme.md': '# ignored\n',
});
await adapter.fetch?.(
{
repoUrl: repo.repoUrl,
branch: 'main',
path: null,
authToken: null,
parsedTargetTables: {},
},
stagedDir,
{ connectionId: 'warehouse-1', sourceKey: 'metricflow' },
);
await expect(readFile(join(stagedDir, 'models/orders.yml'), 'utf-8')).resolves.toContain('semantic_models');
expect(await adapter.detect(stagedDir)).toBe(true);
});
it('persists parsed target tables for deterministic projection during fetch', async () => {
const repo = await makeRepo(tmpRoot, {
'dbt_project.yml': 'name: analytics\n',
'models/orders.yml': 'semantic_models:\n - name: orders\n model: ref("orders")\n',
});
await adapter.fetch?.(
{
repoUrl: repo.repoUrl,
branch: 'main',
path: null,
authToken: null,
parsedTargetTables: {
orders: {
ok: true,
catalog: null,
schema: 'analytics',
name: 'orders',
canonicalTable: 'analytics.orders',
},
},
},
stagedDir,
{ connectionId: 'warehouse-1', sourceKey: 'metricflow' },
);
await expect(readMetricflowProjectionConfig(stagedDir)).resolves.toMatchObject({
parsedTargetTables: {
orders: {
ok: true,
schema: 'analytics',
name: 'orders',
},
},
});
});
it('projects parsed MetricFlow semantic models in the integration worktree', async () => {
await writeMetricflowProjectionConfig(stagedDir, {
parsedTargetTables: {
orders: {
ok: true,
catalog: null,
schema: 'analytics',
name: 'orders',
canonicalTable: 'analytics.orders',
},
},
});
const scoped = {
getManifestEntry: vi.fn().mockResolvedValue(null),
isManifestBacked: vi.fn().mockResolvedValue(false),
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
loadSource: vi.fn().mockResolvedValue(null),
writeSource: vi.fn().mockResolvedValue({ warnings: [] }),
};
const semanticLayerService = {
forWorktree: vi.fn().mockReturnValue(scoped),
getManifestEntry: vi.fn(),
isManifestBacked: vi.fn(),
loadAllSources: vi.fn(),
loadSource: vi.fn(),
writeSource: vi.fn(),
};
const result = await adapter.project?.({
connectionId: 'warehouse-1',
sourceKey: 'metricflow',
syncId: 'sync-1',
jobId: 'job-1',
runId: 'run-1',
stagedDir,
workdir: '/tmp/metricflow-integration',
parseArtifacts: metricflowParseResult(),
semanticLayerService: semanticLayerService as never,
});
expect(semanticLayerService.forWorktree).toHaveBeenCalledWith('/tmp/metricflow-integration');
expect(scoped.writeSource).toHaveBeenCalledWith(
'warehouse-1',
expect.objectContaining({ name: 'orders' }),
'dbt MetricFlow',
expect.any(String),
'dbt MetricFlow sync: create source orders',
{ skipValidation: true },
);
expect(result).toMatchObject({
warnings: ['parser warning'],
errors: [],
touchedSources: [{ connectionId: 'warehouse-1', sourceName: 'orders' }],
changedWikiPageKeys: [],
});
});
it('returns a projection error when parse artifacts are missing', async () => {
const result = await adapter.project?.({
connectionId: 'warehouse-1',
sourceKey: 'metricflow',
syncId: 'sync-1',
jobId: 'job-1',
runId: 'run-1',
stagedDir,
workdir: '/tmp/metricflow-integration',
parseArtifacts: undefined,
semanticLayerService: {} as never,
});
expect(result).toMatchObject({
warnings: [],
errors: ['MetricFlow deterministic projection requires parseArtifacts from chunk()'],
touchedSources: [],
changedWikiPageKeys: [],
});
});
});

View file

@ -1,206 +0,0 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { parseMetricFlowStagedDir } from './parse.js';
async function writeFixture(stagedDir: string, relPath: string, body: string): Promise<void> {
const abs = join(stagedDir, relPath);
await mkdir(join(abs, '..'), { recursive: true });
await writeFile(abs, body, 'utf-8');
}
describe('parseMetricFlowStagedDir', () => {
let stagedDir: string;
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'mf-parse-'));
});
afterEach(async () => {
await rm(stagedDir, { recursive: true, force: true });
});
it('extracts one semantic_model with its measures + dimensions + entities', async () => {
await writeFixture(
stagedDir,
'models/orders.yml',
[
'semantic_models:',
' - name: orders',
' description: Order fact table.',
" model: ref('orders')",
' entities:',
' - name: order_id',
' type: primary',
' - name: customer_id',
' type: foreign',
' dimensions:',
' - name: ordered_at',
' type: time',
' type_params:',
' time_granularity: day',
' measures:',
' - name: order_count',
' agg: count',
' expr: order_id',
' - name: gross_amount',
' agg: sum',
' expr: amount',
'',
].join('\n'),
);
const project = await parseMetricFlowStagedDir(stagedDir);
expect(project.semanticModels).toHaveLength(1);
const sm = project.semanticModels[0];
expect(sm.path).toBe('models/orders.yml');
expect(sm.name).toBe('orders');
expect(sm.modelRef).toBe('orders');
expect(sm.measureNames).toEqual(['gross_amount', 'order_count']);
expect(sm.dimensionNames).toEqual(['ordered_at']);
expect(sm.entityNames).toEqual(['customer_id', 'order_id']);
expect(sm.primaryEntities).toEqual(['order_id']);
expect(sm.foreignEntities).toEqual(['customer_id']);
expect(sm.extendsFrom).toEqual([]);
expect(project.files).toEqual([
{
path: 'models/orders.yml',
content: expect.stringContaining('semantic_models:'),
},
]);
});
it('captures `extends:` as a string OR a list', async () => {
await writeFixture(
stagedDir,
'models/orders.yml',
[
'semantic_models:',
' - name: orders',
" model: ref('orders')",
' measures:',
' - {name: order_count, agg: count, expr: order_id}',
'',
].join('\n'),
);
await writeFixture(
stagedDir,
'models/orders_ext_list.yml',
[
'semantic_models:',
' - name: orders_ext_list',
" model: ref('orders_ext')",
' extends: [orders]',
' measures:',
' - {name: refund_amount, agg: sum, expr: refund_amt}',
'',
].join('\n'),
);
await writeFixture(
stagedDir,
'models/orders_ext_str.yml',
[
'semantic_models:',
' - name: orders_ext_str',
" model: ref('orders_ext')",
' extends: orders',
' measures:',
' - {name: refund_amount2, agg: sum, expr: refund_amt2}',
'',
].join('\n'),
);
const project = await parseMetricFlowStagedDir(stagedDir);
const list = project.semanticModels.find((sm) => sm.name === 'orders_ext_list');
const str = project.semanticModels.find((sm) => sm.name === 'orders_ext_str');
expect(list?.extendsFrom).toEqual(['orders']);
expect(str?.extendsFrom).toEqual(['orders']);
});
it('extracts metrics with referenced measures for simple + derived + ratio + cumulative', async () => {
await writeFixture(
stagedDir,
'metrics/core.yml',
[
'metrics:',
' - name: total_orders',
' type: simple',
' type_params:',
' measure: order_count',
' - name: revenue',
' type: derived',
' type_params:',
' expr: gross_amount - refund_amount',
' metrics:',
' - name: gross_amount',
' - name: refund_amount',
' - name: refund_rate',
' type: ratio',
' type_params:',
' numerator: refund_amount',
' denominator: gross_amount',
' - name: cum_revenue',
' type: cumulative',
' type_params:',
' measure: gross_amount',
' window: 7 days',
'',
].join('\n'),
);
const project = await parseMetricFlowStagedDir(stagedDir);
expect(project.metrics).toHaveLength(4);
const byName = new Map(project.metrics.map((m) => [m.name, m]));
expect(byName.get('total_orders')?.type).toBe('simple');
expect(byName.get('total_orders')?.measureRef).toBe('order_count');
expect(byName.get('revenue')?.type).toBe('derived');
expect(byName.get('revenue')?.dependsOn.sort()).toEqual(['gross_amount', 'refund_amount']);
expect(byName.get('refund_rate')?.type).toBe('ratio');
expect(byName.get('refund_rate')?.dependsOn.sort()).toEqual(['gross_amount', 'refund_amount']);
expect(byName.get('cum_revenue')?.type).toBe('cumulative');
expect(byName.get('cum_revenue')?.measureRef).toBe('gross_amount');
});
it('returns empty arrays for a non-MetricFlow YAML (e.g. dbt_project.yml)', async () => {
await writeFixture(stagedDir, 'dbt_project.yml', 'name: my_proj\nversion: "1.0.0"\n');
const project = await parseMetricFlowStagedDir(stagedDir);
expect(project.semanticModels).toEqual([]);
expect(project.metrics).toEqual([]);
expect(project.allPaths).toEqual(['dbt_project.yml']);
});
it('skips files that are not YAML (or fail to parse) without throwing', async () => {
await writeFixture(stagedDir, 'broken.yml', '{ this is: not valid YAML :::');
await writeFixture(stagedDir, 'other.txt', 'ignore me');
const project = await parseMetricFlowStagedDir(stagedDir);
expect(project.semanticModels).toEqual([]);
expect(project.metrics).toEqual([]);
// allPaths includes `.yml` / `.yaml` only, even when unparseable:
expect(project.allPaths).toEqual(['broken.yml']);
});
it('allPaths is sorted deterministically', async () => {
await writeFixture(stagedDir, 'models/z.yml', 'semantic_models: []\n');
await writeFixture(stagedDir, 'models/a.yml', 'semantic_models: []\n');
await writeFixture(stagedDir, 'metrics/b.yaml', 'metrics: []\n');
const project = await parseMetricFlowStagedDir(stagedDir);
expect(project.allPaths).toEqual(['metrics/b.yaml', 'models/a.yml', 'models/z.yml']);
});
it("extracts modelRef from ref('name') and source('src','table') and literal strings", async () => {
await writeFixture(
stagedDir,
'models/a.yml',
[
'semantic_models:',
' - {name: a, model: "ref(\'orders\')", measures: [{name: c, agg: count, expr: id}]}',
" - {name: b, model: \"source('raw','orders_raw')\", measures: [{name: c, agg: count, expr: id}]}",
' - {name: c, model: plain_table, measures: [{name: c, agg: count, expr: id}]}',
'',
].join('\n'),
);
const project = await parseMetricFlowStagedDir(stagedDir);
const byName = new Map(project.semanticModels.map((s) => [s.name, s]));
expect(byName.get('a')?.modelRef).toBe('orders');
expect(byName.get('b')?.modelRef).toBe('orders_raw');
expect(byName.get('c')?.modelRef).toBe('plain_table');
});
});

View file

@ -1,68 +0,0 @@
import { describe, expect, it } from 'vitest';
import { parseMetricflowPullConfig, pullConfigFromMetricflowIntegration } from './pull-config.js';
describe('metricflow pull config', () => {
it('applies defaults for optional git fields', () => {
const parsed = parseMetricflowPullConfig({
repoUrl: 'https://github.com/acme/analytics.git',
});
expect(parsed).toEqual({
repoUrl: 'https://github.com/acme/analytics.git',
branch: 'main',
path: null,
authToken: null,
parsedTargetTables: {},
});
});
it('preserves provided branch, path, token, and parsed target tables', () => {
const parsed = parseMetricflowPullConfig({
repoUrl: 'https://github.com/acme/analytics.git',
branch: 'release',
path: 'dbt',
authToken: 'secret-token',
parsedTargetTables: {
orders: {
catalog: 'warehouse',
schema: 'marts',
name: 'orders',
ok: true,
canonicalTable: 'analytics.marts.orders',
},
},
});
expect(parsed.branch).toBe('release');
expect(parsed.path).toBe('dbt');
expect(parsed.authToken).toBe('secret-token');
expect(parsed.parsedTargetTables.orders).toMatchObject({ ok: true, name: 'orders' });
});
it('rejects missing repoUrl', () => {
expect(() => parseMetricflowPullConfig({})).toThrow();
});
it('builds pull config from a local metricflow integration block', () => {
expect(
pullConfigFromMetricflowIntegration({
repoUrl: 'https://github.com/acme/analytics.git',
branch: null,
path: null,
authToken: null,
}),
).toEqual({
repoUrl: 'https://github.com/acme/analytics.git',
branch: 'main',
path: null,
authToken: null,
parsedTargetTables: {},
});
});
it('throws a clear error when the integration block has no repo URL', () => {
expect(() => pullConfigFromMetricflowIntegration({ repoUrl: null })).toThrow(
'metricflow integration config missing repoUrl',
);
});
});

View file

@ -1,259 +0,0 @@
import { describe, expect, it } from 'vitest';
import { composeOverlay } from '../../../../context/sl/semantic-layer.service.js';
import type { SemanticLayerSource } from '../../../../context/sl/types.js';
import type { ParsedCrossModelMetric, ParsedMetricflowRelationship, ParsedSemanticModel } from './deep-parse.js';
import {
buildMetricflowColumns,
buildMetricflowJoinsForModel,
buildMetricflowSemanticModelSource,
countImportableMetricflowRelationships,
findMatchingMetricflowTable,
mapCrossModelMetricToSource,
mapSemanticModelToSource,
resolveMetricflowSemanticModelSourceName,
rewriteMetricflowManifestJoins,
toKebabCaseMetricflowName,
} from './semantic-models.js';
const ordersModel: ParsedSemanticModel = {
name: 'orders',
description: 'Order facts',
modelRef: 'fct_orders',
dimensions: [
{ name: 'status', column: 'status', type: 'string', label: 'Status', description: 'Order status' },
{ name: 'ordered_at', column: 'ordered_at', type: 'time', label: 'Ordered At' },
],
measures: [
{
type: 'simple',
name: 'total_revenue',
column: 'amount',
aggregation: 'sum',
label: 'Total Revenue',
description: 'Revenue',
filter: "status = 'completed'",
},
{
type: 'derived',
name: 'average_revenue',
expr: 'total_revenue / NULLIF(order_count, 0)',
dependsOn: ['total_revenue', 'order_count'],
},
],
entities: [],
defaultTimeDimension: 'ordered_at',
};
describe('metricflow semantic model mapping', () => {
it('normalizes source names the same way the server importer did', () => {
expect(toKebabCaseMetricflowName('Fct Orders!')).toBe('fct-orders');
});
it('maps a parsed semantic model to a SemanticLayerSource', () => {
expect(mapSemanticModelToSource(ordersModel, 'analytics.orders')).toEqual({
name: 'fct-orders',
table: 'analytics.orders',
grain: ['status', 'ordered_at'],
columns: [
{ name: 'status', type: 'string', description: 'Order status' },
{ name: 'ordered_at', type: 'time' },
],
measures: [
{
name: 'total_revenue',
expr: 'sum(amount)',
description: 'Revenue',
filter: "status = 'completed'",
},
{
name: 'average_revenue',
expr: 'total_revenue / NULLIF(order_count, 0)',
},
],
joins: [],
descriptions: { dbt: 'Order facts' },
});
});
it('maps a cross-model metric to a SQL standalone source', () => {
const metric: ParsedCrossModelMetric = {
name: 'roas',
label: 'ROAS',
description: 'Return on ad spend',
type: 'derived',
expr: 'revenue / spend',
dependsOn: [
{ metricName: 'orders', alias: 'revenue' },
{ metricName: 'campaigns', alias: 'spend' },
],
filter: "channel = 'paid'",
};
expect(mapCrossModelMetricToSource(metric)).toEqual({
name: 'roas',
sql: 'revenue / spend',
descriptions: { dbt: 'Return on ad spend' },
grain: [],
columns: [],
measures: [
{
name: 'roas',
expr: 'revenue / spend',
description: 'Return on ad spend',
filter: "channel = 'paid'",
},
],
joins: [],
});
});
it('finds matching tables using target schema, exact name, dotted suffix, and underscore suffix', () => {
const tables = [
{ id: '1', name: 'fct_orders', catalog: null, db: 'analytics', columns: [] },
{ id: '2', name: 'warehouse.marts.fct_orders', catalog: null, db: 'marts', columns: [] },
{ id: '3', name: 'warehouse_fct_customers', catalog: null, db: null, columns: [] },
];
expect(findMatchingMetricflowTable('fct_orders', tables, 'analytics')?.id).toBe('1');
expect(findMatchingMetricflowTable('fct_orders', [tables[1]], null)?.id).toBe('2');
expect(findMatchingMetricflowTable('fct_customers', [tables[2]], null)?.id).toBe('3');
expect(findMatchingMetricflowTable('missing', tables, null)).toBeUndefined();
});
it('counts only relationships whose tables and columns exist', () => {
const relationships: ParsedMetricflowRelationship[] = [
{ fromTable: 'orders', fromColumn: 'customer_id', toTable: 'customers', toColumn: 'id' },
{ fromTable: 'orders', fromColumn: 'missing', toTable: 'customers', toColumn: 'id' },
{ fromTable: 'orders', fromColumn: 'customer_id', toTable: 'missing_table', toColumn: 'id' },
];
const tables = [
{ id: '1', name: 'orders', catalog: null, db: null, columns: [{ id: 'c1', name: 'customer_id' }] },
{ id: '2', name: 'customers', catalog: null, db: null, columns: [{ id: 'c2', name: 'id' }] },
];
expect(countImportableMetricflowRelationships(relationships, tables)).toBe(1);
});
it('resolves semantic-model source names to lowercase snake_case identifiers', () => {
expect(
resolveMetricflowSemanticModelSourceName(ordersModel, {
id: '1',
name: 'ANALYTICS.Fct Orders',
catalog: null,
db: 'analytics',
columns: [],
}),
).toBe('fct_orders');
expect(resolveMetricflowSemanticModelSourceName({ ...ordersModel, modelRef: 'fallback_model' }, undefined)).toBe(
'fallback_model',
);
});
it('materializes entity join keys as hidden standalone columns', () => {
expect(
buildMetricflowColumns({
...ordersModel,
entities: [{ name: 'customer', type: 'foreign', expr: 'customer_id', description: 'FK to customers' }],
}),
).toContainEqual({ name: 'customer_id', type: 'string', visibility: 'hidden', description: 'FK to customers' });
});
it('builds standalone sources with semantic-model joins', () => {
const orders: ParsedSemanticModel = {
...ordersModel,
modelRef: 'orders',
entities: [{ name: 'customer', type: 'foreign', expr: 'customer_id' }],
};
const customers: ParsedSemanticModel = {
...ordersModel,
name: 'customers',
modelRef: 'customers',
dimensions: [{ name: 'id', column: 'id', type: 'string' }],
measures: [],
entities: [],
};
const sourceNameByModelRef = new Map([
[orders.modelRef, 'orders'],
[customers.modelRef, 'customers'],
]);
const joins = buildMetricflowJoinsForModel(
orders,
[{ fromTable: 'orders', fromColumn: 'customer_id', toTable: 'customers', toColumn: 'id' }],
sourceNameByModelRef,
);
expect(
buildMetricflowSemanticModelSource(
{
model: orders,
matchedTable: undefined,
sourceName: 'orders',
manifestSource: null,
},
joins,
new Map(),
),
).toMatchObject({
name: 'orders',
table: 'orders',
joins: [{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }],
});
});
it('builds overlays for exact manifest matches so scanned columns remain manifest-owned', () => {
const manifestSource: SemanticLayerSource = {
name: 'orders',
table: 'analytics.orders',
grain: ['id'],
columns: [
{ name: 'id', type: 'string' },
{ name: 'customer_id', type: 'string' },
],
joins: [],
measures: [],
descriptions: { db: 'Orders table from scan' },
};
const overlay = buildMetricflowSemanticModelSource(
{
model: { ...ordersModel, modelRef: 'orders', description: 'dbt-described orders' },
matchedTable: undefined,
sourceName: 'orders',
manifestSource,
},
[{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }],
new Map(),
);
expect(overlay).not.toHaveProperty('table');
expect(overlay).not.toHaveProperty('grain');
expect(overlay).not.toHaveProperty('columns');
expect(overlay).toMatchObject({
name: 'orders',
joins: [{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }],
descriptions: { dbt: 'dbt-described orders' },
});
const composed = composeOverlay(manifestSource, overlay);
expect(composed.columns.map((column) => column.name)).toEqual(['id', 'customer_id']);
expect(composed.joins).toHaveLength(1);
expect(composed.descriptions).toEqual({ db: 'Orders table from scan', dbt: 'dbt-described orders' });
});
it('rewrites preserved manifest joins to synced bare source names', () => {
expect(
rewriteMetricflowManifestJoins(
[
{
to: 'analytics.customers',
on: 'analytics.orders.customer_id = analytics.customers.id',
relationship: 'many_to_one',
},
],
new Map([
['analytics.orders', 'orders'],
['analytics.customers', 'customers'],
]),
),
).toEqual([{ to: 'customers', on: 'orders.customer_id = customers.id', relationship: 'many_to_one' }]);
});
});

View file

@ -1,137 +0,0 @@
import { mkdir, mkdtemp, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, test } from 'vitest';
import type { KtxEmbeddingPort } from '../../../core/embedding.js';
import type { WorkUnit } from '../../types.js';
import { clusterNotionWorkUnits, MIN_PAGES_TO_CLUSTER } from './cluster.js';
function fakeEmbedding(text: string): number[] {
const v = [0, 0, 0, 0];
for (const ch of text) {
v[ch.charCodeAt(0) % 4] += 1;
}
return v;
}
const mockEmbed: KtxEmbeddingPort = {
maxBatchSize: 100,
computeEmbedding: async (t: string) => fakeEmbedding(t),
computeEmbeddingsBulk: async (texts: string[]) => texts.map(fakeEmbedding),
};
async function makeStaged(pages: Array<{ id: string; title: string; body: string }>): Promise<string> {
const dir = await mkdtemp(join(tmpdir(), 'notion-cluster-'));
for (const p of pages) {
const pageDir = join(dir, 'pages', p.id);
await mkdir(pageDir, { recursive: true });
await writeFile(join(pageDir, 'page.md'), p.body);
await writeFile(
join(pageDir, 'metadata.json'),
JSON.stringify({
id: p.id,
title: p.title,
path: p.title,
objectType: 'page',
properties: {},
}),
);
}
return dir;
}
function makeWorkUnits(pages: Array<{ id: string }>): WorkUnit[] {
return pages.map((p) => ({
unitKey: `notion-${p.id}`,
rawFiles: [`pages/${p.id}/page.md`, `pages/${p.id}/metadata.json`],
peerFileIndex: [],
dependencyPaths: ['manifest.json'],
}));
}
describe('clusterNotionWorkUnits', () => {
test('returns input unchanged when below threshold', async () => {
const pages = Array.from({ length: 3 }, (_, i) => ({
id: `p${i}`,
title: `Page ${i}`,
body: 'short body',
}));
const stagedDir = await makeStaged(pages);
const wus = makeWorkUnits(pages);
const out = await clusterNotionWorkUnits({ workUnits: wus, stagedDir, embedding: mockEmbed });
expect(out).toHaveLength(3);
expect(out[0].unitKey).toBe('notion-p0');
});
test('groups pages into k=ceil(N/8) clusters when above threshold', async () => {
const n = MIN_PAGES_TO_CLUSTER + 4;
const pages = Array.from({ length: n }, (_, i) => ({
id: `p${i}`,
title: `Topic ${i % 2 === 0 ? 'alpha' : 'beta'} ${i}`,
body: `Body for page ${i}`.repeat(20),
}));
const stagedDir = await makeStaged(pages);
const wus = makeWorkUnits(pages);
const out = await clusterNotionWorkUnits({ workUnits: wus, stagedDir, embedding: mockEmbed });
expect(out.length).toBeLessThanOrEqual(wus.length);
expect(out.length).toBe(Math.ceil(wus.length / 8));
for (const wu of out) {
expect(wu.unitKey).toMatch(/^notion-cluster-\d+$/);
expect(wu.rawFiles.length).toBeGreaterThan(0);
expect(wu.notes).toMatch(/Synthesize/);
expect(wu.notes).toContain('emit_unmapped_fallback');
expect(wu.notes).toContain('Do not create SL sources under the Notion connection');
}
});
test('merges pages into one synthesis unit at the clustering threshold', async () => {
const pages = Array.from({ length: MIN_PAGES_TO_CLUSTER }, (_, i) => ({
id: `p${i}`,
title: `Customer source reference ${i}`,
body: `Customer source reference maps to orbit_analytics.customer ${i}`.repeat(10),
}));
const stagedDir = await makeStaged(pages);
const wus = makeWorkUnits(pages);
const out = await clusterNotionWorkUnits({ workUnits: wus, stagedDir, embedding: mockEmbed });
expect(out).toHaveLength(1);
expect(out[0].unitKey).toBe('notion-cluster-1');
expect(new Set(out[0].rawFiles)).toEqual(new Set(wus.flatMap((wu) => wu.rawFiles)));
expect(out[0].notes).toContain('emit_unmapped_fallback');
expect(out[0].notes).toContain('Do not create SL sources under the Notion connection');
});
test('preserves coverage: every input rawFile appears in some cluster', async () => {
const pages = Array.from({ length: 12 }, (_, i) => ({
id: `p${i}`,
title: `Page ${i}`,
body: 'body content',
}));
const stagedDir = await makeStaged(pages);
const wus = makeWorkUnits(pages);
const inputFiles = new Set(wus.flatMap((wu) => wu.rawFiles));
const out = await clusterNotionWorkUnits({ workUnits: wus, stagedDir, embedding: mockEmbed });
const outFiles = new Set(out.flatMap((wu) => wu.rawFiles));
expect(outFiles).toEqual(inputFiles);
});
test('falls back to input when embedding fails', async () => {
const pages = Array.from({ length: 10 }, (_, i) => ({
id: `p${i}`,
title: `Page ${i}`,
body: 'b',
}));
const stagedDir = await makeStaged(pages);
const wus = makeWorkUnits(pages);
const failingEmbed: KtxEmbeddingPort = {
maxBatchSize: 100,
computeEmbedding: async () => {
throw new Error('embedding down');
},
computeEmbeddingsBulk: async () => {
throw new Error('embedding down');
},
};
const out = await clusterNotionWorkUnits({ workUnits: wus, stagedDir, embedding: failingEmbed });
expect(out).toEqual(wus);
});
});

Some files were not shown because too many files have changed in this diff Show more