mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-22 08:38:08 +02:00
refactor(workspace): fold internal packages into cli
This commit is contained in:
parent
8c2333cc15
commit
ac3885b652
945 changed files with 517 additions and 2686 deletions
|
|
@ -1,6 +1,6 @@
|
|||
import { createRequire } from 'node:module';
|
||||
|
||||
import type { ReindexSummary } from '@ktx/context/index-sync';
|
||||
import type { ReindexSummary } from './context/index-sync/index.js';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { renderReindexJson, renderReindexPlain, reindexHasErrors } from './admin-reindex.js';
|
||||
import { runKtxCli } from './index.js';
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import { KtxIngestEmbeddingPortAdapter, type KtxEmbeddingPort } from '@ktx/context';
|
||||
import { reindexLocalIndexes, type ReindexScopeResult, type ReindexSummary } from '@ktx/context/index-sync';
|
||||
import { loadKtxProject } from '@ktx/context/project';
|
||||
import { KtxIngestEmbeddingPortAdapter, type KtxEmbeddingPort } from './context/index.js';
|
||||
import { reindexLocalIndexes, type ReindexScopeResult, type ReindexSummary } from './context/index-sync/index.js';
|
||||
import { loadKtxProject } from './context/project/index.js';
|
||||
import { Option, type Command } from '@commander-js/extra-typings';
|
||||
import { cancel, intro, log, note, outro } from '@clack/prompts';
|
||||
import type { KtxCliCommandContext } from './cli-program.js';
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ export function registerAdminCommands(program: Command, context: KtxCliCommandCo
|
|||
.description('Print a JSON Schema describing ktx.yaml (for editors and LLM agents)')
|
||||
.option('--output <file>', 'Write the schema to a file instead of stdout')
|
||||
.action(async (options: { output?: string }) => {
|
||||
const { generateKtxProjectConfigJsonSchema } = await import('@ktx/context/project');
|
||||
const { generateKtxProjectConfigJsonSchema } = await import('./context/project/index.js');
|
||||
const json = `${JSON.stringify(generateKtxProjectConfigJsonSchema(), null, 2)}\n`;
|
||||
if (options.output) {
|
||||
const { writeFile } = await import('node:fs/promises');
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import type { KtxProjectLlmConfig } from '@ktx/context/project';
|
||||
import type { KtxProjectLlmConfig } from './context/project/index.js';
|
||||
|
||||
const CLAUDE_CODE_IGNORED_PROMPT_CACHING_FIELDS = [
|
||||
'systemTtl',
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ function stubPackageInfo(): KtxCliPackageInfo {
|
|||
return {
|
||||
name: '@ktx/cli',
|
||||
version: '0.0.0-test',
|
||||
contextPackageName: '@ktx/context',
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { buildDefaultKtxProjectConfig, type KtxLocalProject, type KtxProjectConfig } from '@ktx/context/project';
|
||||
import { buildDefaultKtxProjectConfig, type KtxLocalProject, type KtxProjectConfig } from './context/project/index.js';
|
||||
import { loadKtxCliProject } from './cli-project.js';
|
||||
|
||||
function projectWithConfig(config: KtxProjectConfig): KtxLocalProject {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { loadKtxProject, type KtxLocalProject } from '@ktx/context/project';
|
||||
import { loadKtxProject, type KtxLocalProject } from './context/project/index.js';
|
||||
|
||||
export interface LoadKtxCliProjectOptions {
|
||||
projectDir: string;
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ const requirePackageJson = createRequire(import.meta.url);
|
|||
export interface KtxCliPackageInfo {
|
||||
name: string;
|
||||
version: string;
|
||||
contextPackageName: '@ktx/context';
|
||||
}
|
||||
|
||||
export interface KtxCliIo {
|
||||
|
|
@ -67,12 +66,11 @@ export function packageInfoFromJson(packageJson: unknown): KtxCliPackageInfo {
|
|||
return {
|
||||
name: packageJson.name,
|
||||
version: assertCliVersion(packageJson.version, `${packageJson.name}/package.json`),
|
||||
contextPackageName: '@ktx/context',
|
||||
};
|
||||
}
|
||||
|
||||
async function runInit(args: { projectDir: string; force: boolean }, io: KtxCliIo): Promise<number> {
|
||||
const { initKtxProject } = await import('@ktx/context/project');
|
||||
const { initKtxProject } = await import('./context/project/index.js');
|
||||
const result = await initKtxProject({
|
||||
projectDir: args.projectDir,
|
||||
force: args.force,
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ function makeContext(overrides: Partial<KtxCliCommandContext> = {}): KtxCliComma
|
|||
stderr: { write: vi.fn() },
|
||||
},
|
||||
deps: {},
|
||||
packageInfo: { name: '@ktx/cli', version: '0.0.0-test', contextPackageName: '@ktx/context' },
|
||||
packageInfo: { name: '@ktx/cli', version: '0.0.0-test' },
|
||||
setExitCode: (code) => {
|
||||
exitCode = code;
|
||||
},
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ function makeContext(overrides: Partial<KtxCliCommandContext> = {}): KtxCliComma
|
|||
stderr: { write: vi.fn() },
|
||||
},
|
||||
deps: {},
|
||||
packageInfo: { name: '@ktx/cli', version: '0.0.0-test', contextPackageName: '@ktx/context' },
|
||||
packageInfo: { name: '@ktx/cli', version: '0.0.0-test' },
|
||||
setExitCode: (code) => {
|
||||
exitCode = code;
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import type { LookerClient, MetabaseRuntimeClient, NotionClient } from '@ktx/context/ingest';
|
||||
import { initKtxProject, parseKtxProjectConfig, serializeKtxProjectConfig } from '@ktx/context/project';
|
||||
import type { KtxConnectionDriver, KtxScanConnector } from '@ktx/context/scan';
|
||||
import type { LookerClient, MetabaseRuntimeClient, NotionClient } from './context/ingest/index.js';
|
||||
import { initKtxProject, parseKtxProjectConfig, serializeKtxProjectConfig } from './context/project/index.js';
|
||||
import type { KtxConnectionDriver, KtxScanConnector } from './context/scan/index.js';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { runKtxConnection } from './connection.js';
|
||||
|
||||
|
|
|
|||
|
|
@ -9,11 +9,11 @@ import {
|
|||
createLocalLookerCredentialResolver,
|
||||
metabaseRuntimeConfigFromLocalConnection,
|
||||
testRepoConnection,
|
||||
} from '@ktx/context/ingest';
|
||||
import { parseNotionConnectionConfig, resolveNotionConnectionAuthToken } from '@ktx/context/connections';
|
||||
import { resolveKtxConfigReference } from '@ktx/context/core';
|
||||
import { type KtxLocalProject, loadKtxProject } from '@ktx/context/project';
|
||||
import type { KtxScanConnector } from '@ktx/context/scan';
|
||||
} from './context/ingest/index.js';
|
||||
import { parseNotionConnectionConfig, resolveNotionConnectionAuthToken } from './context/connections/index.js';
|
||||
import { resolveKtxConfigReference } from './context/core/index.js';
|
||||
import { type KtxLocalProject, loadKtxProject } from './context/project/index.js';
|
||||
import type { KtxScanConnector } from './context/scan/index.js';
|
||||
import type { KtxCliIo } from './index.js';
|
||||
import { bold, dim, green, red, SYMBOLS } from './io/symbols.js';
|
||||
import { createKtxCliScanConnector } from './local-scan-connectors.js';
|
||||
|
|
|
|||
324
packages/cli/src/connectors/bigquery/connector.test.ts
Normal file
324
packages/cli/src/connectors/bigquery/connector.test.ts
Normal file
|
|
@ -0,0 +1,324 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
bigQueryConnectionConfigFromConfig,
|
||||
createBigQueryLiveDatabaseIntrospection,
|
||||
isKtxBigQueryConnectionConfig,
|
||||
type KtxBigQueryClient,
|
||||
KtxBigQueryScanConnector,
|
||||
type KtxBigQueryClientFactory,
|
||||
type KtxBigQueryDataset,
|
||||
type KtxBigQueryQueryJob,
|
||||
type KtxBigQueryTableRef,
|
||||
} from './index.js';
|
||||
|
||||
function fakeClientFactory(): KtxBigQueryClientFactory {
|
||||
const queryResults = vi.fn(async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ id: 1, status: 'paid' }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'id', type: 'INT64' }, { name: 'status', type: 'STRING' }] } },
|
||||
]);
|
||||
const createQueryJob = vi.fn(async (input: { query: string }): ReturnType<KtxBigQueryClient['createQueryJob']> => {
|
||||
if (input.query.includes('INFORMATION_SCHEMA.TABLE_CONSTRAINTS')) {
|
||||
return [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ table_name: 'orders', column_name: 'id' }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'table_name', type: 'STRING' }, { name: 'column_name', type: 'STRING' }] } },
|
||||
],
|
||||
},
|
||||
];
|
||||
}
|
||||
if (input.query.includes('APPROX_COUNT_DISTINCT')) {
|
||||
return [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ cardinality: 2 }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'cardinality', type: 'INT64' }] } },
|
||||
],
|
||||
},
|
||||
];
|
||||
}
|
||||
if (input.query.includes('SELECT DISTINCT CAST')) {
|
||||
return [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ val: 'open' }, { val: 'paid' }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'val', type: 'STRING' }] } },
|
||||
],
|
||||
},
|
||||
];
|
||||
}
|
||||
if (input.query.includes('SELECT `status`')) {
|
||||
return [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ status: 'paid' }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'status', type: 'STRING' }] } },
|
||||
],
|
||||
},
|
||||
];
|
||||
}
|
||||
return [{ getQueryResults: queryResults }];
|
||||
});
|
||||
const getTable = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
|
||||
{
|
||||
metadata: {
|
||||
type: 'TABLE',
|
||||
numRows: '12',
|
||||
description: 'Orders table',
|
||||
schema: {
|
||||
fields: [
|
||||
{ name: 'id', type: 'INT64', mode: 'REQUIRED', description: 'Order id' },
|
||||
{ name: 'status', type: 'STRING', mode: 'NULLABLE' },
|
||||
{ name: 'payload', type: 'RECORD', mode: 'NULLABLE' },
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
]);
|
||||
const tableRef: KtxBigQueryTableRef = { id: 'orders', get: getTable };
|
||||
return {
|
||||
createClient: vi.fn(() => ({
|
||||
getDatasets: vi.fn(async (): ReturnType<KtxBigQueryClient['getDatasets']> => [[{ id: 'analytics' }, { id: 'staging' }]]),
|
||||
dataset: vi.fn(
|
||||
(datasetId: string): KtxBigQueryDataset => ({
|
||||
get: vi.fn(async () => [{ id: datasetId }]),
|
||||
getTables: vi.fn(async (): ReturnType<KtxBigQueryDataset['getTables']> => [[tableRef]]),
|
||||
}),
|
||||
),
|
||||
createQueryJob,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
const connection = {
|
||||
driver: 'bigquery',
|
||||
dataset_id: 'analytics',
|
||||
credentials_json: JSON.stringify({ project_id: 'project-1', client_email: 'reader@example.test' }),
|
||||
location: 'US',
|
||||
} as const;
|
||||
|
||||
describe('KtxBigQueryScanConnector', () => {
|
||||
it('resolves configuration safely', () => {
|
||||
expect(isKtxBigQueryConnectionConfig(connection)).toBe(true);
|
||||
expect(isKtxBigQueryConnectionConfig({ driver: 'mysql' })).toBe(false);
|
||||
expect(bigQueryConnectionConfigFromConfig({ connectionId: 'warehouse', connection })).toMatchObject({
|
||||
projectId: 'project-1',
|
||||
datasetIds: ['analytics'],
|
||||
location: 'US',
|
||||
});
|
||||
});
|
||||
|
||||
it('introspects datasets, table metadata, primary keys, and normalized types', async () => {
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection,
|
||||
clientFactory: fakeClientFactory(),
|
||||
now: () => new Date('2026-04-29T17:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'bigquery' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'bigquery',
|
||||
extractedAt: '2026-04-29T17:00:00.000Z',
|
||||
scope: { catalogs: ['project-1'], datasets: ['analytics'] },
|
||||
metadata: {
|
||||
project_id: 'project-1',
|
||||
datasets: ['analytics'],
|
||||
table_count: 1,
|
||||
total_columns: 3,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables[0]).toMatchObject({
|
||||
catalog: 'project-1',
|
||||
db: 'analytics',
|
||||
name: 'orders',
|
||||
kind: 'table',
|
||||
comment: 'Orders table',
|
||||
estimatedRows: 12,
|
||||
foreignKeys: [],
|
||||
});
|
||||
expect(snapshot.tables[0]?.columns).toEqual([
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'INT64',
|
||||
normalizedType: 'BIGINT',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'Order id',
|
||||
},
|
||||
{
|
||||
name: 'status',
|
||||
nativeType: 'STRING',
|
||||
normalizedType: 'VARCHAR',
|
||||
dimensionType: 'string',
|
||||
nullable: true,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
{
|
||||
name: 'payload',
|
||||
nativeType: 'RECORD',
|
||||
normalizedType: 'JSON',
|
||||
dimensionType: 'string',
|
||||
nullable: true,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('runs samples, read-only SQL, distinct values, dataset listing, row counts, and cleanup', async () => {
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection,
|
||||
clientFactory: fakeClientFactory(),
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
table: { catalog: 'project-1', db: 'analytics', name: 'orders' },
|
||||
columns: ['id', 'status'],
|
||||
limit: 1,
|
||||
},
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({
|
||||
headers: ['id', 'status'],
|
||||
headerTypes: ['INT64', 'STRING'],
|
||||
rows: [[1, 'paid']],
|
||||
totalRows: 1,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
table: { catalog: 'project-1', db: 'analytics', name: 'orders' },
|
||||
column: 'status',
|
||||
limit: 5,
|
||||
},
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ values: ['paid'], nullCount: null, distinctCount: null });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[1, 'paid']], totalRows: 1, rowCount: 1 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
|
||||
await expect(
|
||||
connector.getColumnDistinctValues(
|
||||
{ catalog: 'project-1', db: 'analytics', name: 'orders' },
|
||||
'status',
|
||||
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
|
||||
),
|
||||
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
|
||||
await expect(connector.getTableRowCount('orders')).resolves.toBe(12);
|
||||
await expect(connector.listDatasets()).resolves.toEqual(['analytics', 'staging']);
|
||||
await expect(
|
||||
connector.columnStats(
|
||||
{ connectionId: 'warehouse', table: { catalog: 'project-1', db: 'analytics', name: 'orders' }, column: 'status' },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toBeNull();
|
||||
await connector.cleanup();
|
||||
});
|
||||
|
||||
it('applies maximumBytesBilled to read-only queries when configured', async () => {
|
||||
const clientFactory = fakeClientFactory();
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection,
|
||||
clientFactory,
|
||||
maxBytesBilled: 123456789,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ rows: [[1, 'paid']], rowCount: 1 });
|
||||
|
||||
const client = vi.mocked(clientFactory.createClient).mock.results[0]?.value as KtxBigQueryClient;
|
||||
expect(client.createQueryJob).toHaveBeenLastCalledWith(
|
||||
expect.objectContaining({
|
||||
maximumBytesBilled: '123456789',
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('applies canonical BigQuery YAML scan limits to query jobs', async () => {
|
||||
const clientFactory = fakeClientFactory();
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: { ...connection, max_bytes_billed: '987654321', job_timeout_ms: 30_000 },
|
||||
clientFactory,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ rows: [[1, 'paid']], rowCount: 1 });
|
||||
|
||||
const client = vi.mocked(clientFactory.createClient).mock.results[0]?.value as KtxBigQueryClient;
|
||||
expect(client.createQueryJob).toHaveBeenLastCalledWith(
|
||||
expect.objectContaining({
|
||||
maximumBytesBilled: '987654321',
|
||||
jobTimeoutMs: 30_000,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('adapts native snapshots to live-database introspection snapshots', async () => {
|
||||
const introspection = createBigQueryLiveDatabaseIntrospection({
|
||||
connections: { warehouse: connection },
|
||||
clientFactory: fakeClientFactory(),
|
||||
now: () => new Date('2026-04-29T17:00:00.000Z'),
|
||||
});
|
||||
|
||||
await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
metadata: { project_id: 'project-1' },
|
||||
tables: expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
catalog: 'project-1',
|
||||
db: 'analytics',
|
||||
name: 'orders',
|
||||
columns: expect.arrayContaining([
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'INT64',
|
||||
normalizedType: 'BIGINT',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'Order id',
|
||||
},
|
||||
]),
|
||||
}),
|
||||
]),
|
||||
});
|
||||
});
|
||||
});
|
||||
536
packages/cli/src/connectors/bigquery/connector.ts
Normal file
536
packages/cli/src/connectors/bigquery/connector.ts
Normal file
|
|
@ -0,0 +1,536 @@
|
|||
import { BigQuery, type TableField } from '@google-cloud/bigquery';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/index.js';
|
||||
import {
|
||||
createKtxConnectorCapabilities,
|
||||
type KtxColumnSampleInput,
|
||||
type KtxColumnSampleResult,
|
||||
type KtxColumnStatsInput,
|
||||
type KtxColumnStatsResult,
|
||||
type KtxQueryResult,
|
||||
type KtxReadOnlyQueryInput,
|
||||
type KtxScanConnector,
|
||||
type KtxScanContext,
|
||||
type KtxScanInput,
|
||||
type KtxSchemaColumn,
|
||||
type KtxSchemaSnapshot,
|
||||
type KtxSchemaTable,
|
||||
type KtxTableListEntry,
|
||||
type KtxTableRef,
|
||||
type KtxTableSampleInput,
|
||||
type KtxTableSampleResult,
|
||||
} from '../../context/scan/index.js';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { KtxBigQueryDialect } from './dialect.js';
|
||||
|
||||
export interface KtxBigQueryConnectionConfig {
|
||||
driver?: string;
|
||||
dataset_id?: string;
|
||||
dataset_ids?: string[];
|
||||
credentials_json?: string;
|
||||
location?: string;
|
||||
max_bytes_billed?: number | string;
|
||||
job_timeout_ms?: number;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxBigQueryResolvedConnectionConfig {
|
||||
projectId: string;
|
||||
credentials: Record<string, unknown>;
|
||||
datasetIds: string[];
|
||||
location?: string;
|
||||
}
|
||||
|
||||
export interface KtxBigQueryReadOnlyQueryInput extends KtxReadOnlyQueryInput {
|
||||
params?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface KtxBigQueryColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KtxBigQueryColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
export interface KtxBigQueryQueryJob {
|
||||
getQueryResults(): Promise<
|
||||
[Array<Record<string, unknown>>, unknown, { schema?: { fields?: TableField[] } }?, ...unknown[]]
|
||||
>;
|
||||
}
|
||||
|
||||
export interface KtxBigQueryTableRef {
|
||||
id?: string;
|
||||
metadata?: { type?: string };
|
||||
get(): Promise<
|
||||
[
|
||||
{
|
||||
metadata: {
|
||||
type?: string;
|
||||
numRows?: string | number;
|
||||
description?: string;
|
||||
schema?: { fields?: TableField[] };
|
||||
};
|
||||
},
|
||||
...unknown[],
|
||||
]
|
||||
>;
|
||||
}
|
||||
|
||||
export interface KtxBigQueryDataset {
|
||||
get(): Promise<unknown>;
|
||||
getTables(): Promise<[KtxBigQueryTableRef[], ...unknown[]]>;
|
||||
}
|
||||
|
||||
export interface KtxBigQueryClient {
|
||||
getDatasets(input?: { maxResults?: number }): Promise<[Array<{ id?: string }>, ...unknown[]]>;
|
||||
dataset(datasetId: string): KtxBigQueryDataset;
|
||||
createQueryJob(input: {
|
||||
query: string;
|
||||
location?: string;
|
||||
params?: Record<string, unknown>;
|
||||
maximumBytesBilled?: string;
|
||||
jobTimeoutMs?: number;
|
||||
}): Promise<[KtxBigQueryQueryJob, ...unknown[]]>;
|
||||
}
|
||||
|
||||
export interface KtxBigQueryClientFactory {
|
||||
createClient(input: { projectId: string; credentials: Record<string, unknown> }): KtxBigQueryClient;
|
||||
}
|
||||
|
||||
export interface KtxBigQueryScanConnectorOptions {
|
||||
connectionId: string;
|
||||
connection: KtxBigQueryConnectionConfig | undefined;
|
||||
clientFactory?: KtxBigQueryClientFactory;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
now?: () => Date;
|
||||
maxBytesBilled?: number | string;
|
||||
queryTimeoutMs?: number;
|
||||
}
|
||||
|
||||
class DefaultBigQueryClientFactory implements KtxBigQueryClientFactory {
|
||||
createClient(input: { projectId: string; credentials: Record<string, unknown> }): KtxBigQueryClient {
|
||||
const client = new BigQuery(input);
|
||||
return {
|
||||
getDatasets: (options) => client.getDatasets(options) as Promise<[Array<{ id?: string }>, ...unknown[]]>,
|
||||
dataset: (datasetId) => {
|
||||
const dataset = client.dataset(datasetId);
|
||||
return {
|
||||
get: () => dataset.get() as Promise<unknown>,
|
||||
getTables: () => dataset.getTables() as Promise<[KtxBigQueryTableRef[], ...unknown[]]>,
|
||||
};
|
||||
},
|
||||
createQueryJob: (options) => client.createQueryJob(options) as Promise<[KtxBigQueryQueryJob, ...unknown[]]>,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxBigQueryConnectionConfig | undefined,
|
||||
key: keyof KtxBigQueryConnectionConfig,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function datasetIds(connection: KtxBigQueryConnectionConfig, env: NodeJS.ProcessEnv): string[] {
|
||||
if (Array.isArray(connection.dataset_ids) && connection.dataset_ids.length > 0) {
|
||||
return connection.dataset_ids
|
||||
.filter((dataset) => dataset.trim().length > 0)
|
||||
.map((dataset) => resolveStringReference(dataset, env));
|
||||
}
|
||||
const datasetId = stringConfigValue(connection, 'dataset_id', env);
|
||||
return datasetId ? [datasetId] : [];
|
||||
}
|
||||
|
||||
function bigQueryMaxBytesBilledFromConnection(
|
||||
connection: KtxBigQueryConnectionConfig | undefined,
|
||||
): number | string | undefined {
|
||||
const value = connection?.max_bytes_billed;
|
||||
if (typeof value === 'number') {
|
||||
return Number.isFinite(value) && value > 0 ? value : undefined;
|
||||
}
|
||||
if (typeof value === 'string') {
|
||||
const trimmed = value.trim();
|
||||
return trimmed.length > 0 ? trimmed : undefined;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function bigQueryJobTimeoutMsFromConnection(connection: KtxBigQueryConnectionConfig | undefined): number | undefined {
|
||||
const value = connection?.job_timeout_ms;
|
||||
if (typeof value !== 'number') {
|
||||
return undefined;
|
||||
}
|
||||
return Number.isInteger(value) && value > 0 ? value : undefined;
|
||||
}
|
||||
|
||||
function tableKind(metadataType: string | undefined): KtxSchemaTable['kind'] {
|
||||
const type = String(metadataType ?? '').toUpperCase();
|
||||
if (type === 'VIEW' || type === 'MATERIALIZED_VIEW') {
|
||||
return 'view';
|
||||
}
|
||||
if (type === 'EXTERNAL' || type === 'EXTERNAL_TABLE') {
|
||||
return 'external';
|
||||
}
|
||||
return 'table';
|
||||
}
|
||||
|
||||
function firstNumber(value: unknown): number | null {
|
||||
const numberValue = Number(value);
|
||||
return Number.isFinite(numberValue) ? numberValue : null;
|
||||
}
|
||||
|
||||
function normalizeValue(value: unknown): unknown {
|
||||
if (value === null || value === undefined) {
|
||||
return null;
|
||||
}
|
||||
if (Array.isArray(value)) {
|
||||
return value.map((item) => String(item)).join(', ');
|
||||
}
|
||||
if (typeof value === 'object') {
|
||||
if ('toNumber' in value && typeof value.toNumber === 'function' && 'toFixed' in value && typeof value.toFixed === 'function') {
|
||||
return value.toNumber();
|
||||
}
|
||||
if ('value' in value && Object.keys(value).length === 1 && typeof value.value !== 'object') {
|
||||
return value.value;
|
||||
}
|
||||
return JSON.stringify(value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export function isKtxBigQueryConnectionConfig(
|
||||
connection: KtxBigQueryConnectionConfig | undefined,
|
||||
): connection is KtxBigQueryConnectionConfig {
|
||||
return String(connection?.driver ?? '').toLowerCase() === 'bigquery';
|
||||
}
|
||||
|
||||
export function bigQueryConnectionConfigFromConfig(input: {
|
||||
connectionId: string;
|
||||
connection: KtxBigQueryConnectionConfig | undefined;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): KtxBigQueryResolvedConnectionConfig {
|
||||
const inputDriver = input.connection?.driver ?? 'unknown';
|
||||
if (!isKtxBigQueryConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native BigQuery connector cannot run driver "${inputDriver}"`);
|
||||
}
|
||||
|
||||
const env = input.env ?? process.env;
|
||||
const credentialsJson = stringConfigValue(input.connection, 'credentials_json', env);
|
||||
if (!credentialsJson) {
|
||||
throw new Error(`Native BigQuery connector requires connections.${input.connectionId}.credentials_json`);
|
||||
}
|
||||
const credentials = JSON.parse(credentialsJson) as Record<string, unknown>;
|
||||
const projectId = typeof credentials.project_id === 'string' ? credentials.project_id : undefined;
|
||||
if (!projectId) {
|
||||
throw new Error(`Native BigQuery connector requires credentials_json.project_id for connections.${input.connectionId}`);
|
||||
}
|
||||
const resolvedDatasetIds = datasetIds(input.connection, env);
|
||||
if (resolvedDatasetIds.length === 0) {
|
||||
throw new Error(`Native BigQuery connector requires connections.${input.connectionId}.dataset_id or dataset_ids`);
|
||||
}
|
||||
const location = stringConfigValue(input.connection, 'location', env);
|
||||
return { projectId, credentials, datasetIds: resolvedDatasetIds, ...(location ? { location } : {}) };
|
||||
}
|
||||
|
||||
export class KtxBigQueryScanConnector implements KtxScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'bigquery' as const;
|
||||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: false,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: true,
|
||||
formalForeignKeys: false,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly connectionId: string;
|
||||
private readonly resolved: KtxBigQueryResolvedConnectionConfig;
|
||||
private readonly clientFactory: KtxBigQueryClientFactory;
|
||||
private readonly now: () => Date;
|
||||
private readonly maxBytesBilled?: number | string;
|
||||
private readonly queryTimeoutMs?: number;
|
||||
private readonly dialect = new KtxBigQueryDialect();
|
||||
private client: KtxBigQueryClient | null = null;
|
||||
|
||||
constructor(options: KtxBigQueryScanConnectorOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.resolved = bigQueryConnectionConfigFromConfig({
|
||||
connectionId: options.connectionId,
|
||||
connection: options.connection,
|
||||
env: options.env,
|
||||
});
|
||||
this.clientFactory = options.clientFactory ?? new DefaultBigQueryClientFactory();
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.maxBytesBilled = options.maxBytesBilled ?? bigQueryMaxBytesBilledFromConnection(options.connection);
|
||||
this.queryTimeoutMs = options.queryTimeoutMs ?? bigQueryJobTimeoutMsFromConnection(options.connection);
|
||||
this.id = `bigquery:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
try {
|
||||
const client = this.getClient();
|
||||
await client.getDatasets({ maxResults: 1 });
|
||||
for (const datasetId of this.resolved.datasetIds) {
|
||||
await client.dataset(datasetId).get();
|
||||
}
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const tables: KtxSchemaTable[] = [];
|
||||
for (const datasetId of this.resolved.datasetIds) {
|
||||
tables.push(...(await this.introspectDataset(datasetId)));
|
||||
}
|
||||
return {
|
||||
connectionId: this.connectionId,
|
||||
driver: 'bigquery',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: { catalogs: [this.resolved.projectId], datasets: this.resolved.datasetIds },
|
||||
metadata: {
|
||||
project_id: this.resolved.projectId,
|
||||
datasets: this.resolved.datasetIds,
|
||||
table_count: tables.length,
|
||||
total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxTableSampleResult & { headerTypes?: string[] }> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
|
||||
return { headers: result.headers, headerTypes: result.headerTypes, rows: result.rows, totalRows: result.totalRows };
|
||||
}
|
||||
|
||||
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
return { values: result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]), nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
return null;
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxBigQueryReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
|
||||
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
|
||||
const result = await this.query(prepared.sql, prepared.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KtxTableRef,
|
||||
columnName: string,
|
||||
options: KtxBigQueryColumnDistinctValuesOptions,
|
||||
): Promise<KtxBigQueryColumnDistinctValuesResult | null> {
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinality = await this.singleNumber(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, options.sampleSize ?? 10000),
|
||||
'cardinality',
|
||||
);
|
||||
if (cardinality === null) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valueRows = await this.queryRaw<{ val: unknown }>(
|
||||
this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit),
|
||||
);
|
||||
return { values: valueRows.filter((row) => row.val !== null).map((row) => String(row.val)), cardinality };
|
||||
}
|
||||
|
||||
async getTableRowCount(tableName: string, datasetId = this.resolved.datasetIds[0]): Promise<number> {
|
||||
if (!datasetId) {
|
||||
return 0;
|
||||
}
|
||||
const tables = await this.introspectDataset(datasetId);
|
||||
return tables.find((table) => table.name === tableName)?.estimatedRows ?? 0;
|
||||
}
|
||||
|
||||
qTableName(table: Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
async listDatasets(): Promise<string[]> {
|
||||
const [datasets] = await this.getClient().getDatasets();
|
||||
return datasets.map((dataset) => dataset.id).filter((id): id is string => Boolean(id));
|
||||
}
|
||||
|
||||
async listTables(datasetIds?: string[]): Promise<KtxTableListEntry[]> {
|
||||
const filterDatasets = datasetIds ?? (await this.listDatasets());
|
||||
const entries: KtxTableListEntry[] = [];
|
||||
for (const datasetId of filterDatasets) {
|
||||
const dataset = this.getClient().dataset(datasetId);
|
||||
const [tables] = await dataset.getTables();
|
||||
for (const table of tables) {
|
||||
if (!table.id) continue;
|
||||
entries.push({
|
||||
schema: datasetId,
|
||||
name: table.id,
|
||||
kind: table.metadata?.type === 'VIEW' ? 'view' : 'table',
|
||||
});
|
||||
}
|
||||
}
|
||||
entries.sort((a, b) => a.schema.localeCompare(b.schema) || a.name.localeCompare(b.name));
|
||||
return entries;
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
this.client = null;
|
||||
}
|
||||
|
||||
private getClient(): KtxBigQueryClient {
|
||||
if (!this.client) {
|
||||
this.client = this.clientFactory.createClient({
|
||||
projectId: this.resolved.projectId,
|
||||
credentials: this.resolved.credentials,
|
||||
});
|
||||
}
|
||||
return this.client;
|
||||
}
|
||||
|
||||
private async query(sql: string, params?: Record<string, unknown>): Promise<KtxQueryResult> {
|
||||
const [job] = await this.getClient().createQueryJob({
|
||||
query: sql,
|
||||
...(this.resolved.location ? { location: this.resolved.location } : {}),
|
||||
...(params && Object.keys(params).length > 0 ? { params } : {}),
|
||||
...(this.maxBytesBilled ? { maximumBytesBilled: String(this.maxBytesBilled) } : {}),
|
||||
...(this.queryTimeoutMs ? { jobTimeoutMs: this.queryTimeoutMs } : {}),
|
||||
});
|
||||
const [rows, , response] = await job.getQueryResults();
|
||||
let headers = response?.schema?.fields?.map((field) => field.name || '') ?? [];
|
||||
const headerTypes = response?.schema?.fields?.map((field) => String(field.type || 'STRING')) ?? [];
|
||||
if (headers.length === 0 && rows.length > 0) {
|
||||
headers = Object.keys(rows[0]!);
|
||||
}
|
||||
return {
|
||||
headers,
|
||||
headerTypes: headerTypes.length > 0 ? headerTypes : undefined,
|
||||
rows: rows.map((row) => headers.map((header) => normalizeValue(row[header]))),
|
||||
totalRows: rows.length,
|
||||
rowCount: rows.length,
|
||||
};
|
||||
}
|
||||
|
||||
private async queryRaw<T extends Record<string, unknown>>(sql: string, params?: Record<string, unknown>): Promise<T[]> {
|
||||
const result = await this.query(sql, params);
|
||||
return result.rows.map((row) => Object.fromEntries(result.headers.map((header, index) => [header, row[index]])) as T);
|
||||
}
|
||||
|
||||
private async singleNumber(sql: string, header: string): Promise<number | null> {
|
||||
const rows = await this.queryRaw<Record<string, unknown>>(sql);
|
||||
return firstNumber(rows[0]?.[header]);
|
||||
}
|
||||
|
||||
private async introspectDataset(datasetId: string): Promise<KtxSchemaTable[]> {
|
||||
const dataset = this.getClient().dataset(datasetId);
|
||||
const [tableRefs] = await dataset.getTables();
|
||||
const primaryKeys = await this.primaryKeys(datasetId);
|
||||
const tables: KtxSchemaTable[] = [];
|
||||
for (const tableRef of tableRefs) {
|
||||
const tableName = tableRef.id || '';
|
||||
const [table] = await tableRef.get();
|
||||
const fields = table.metadata.schema?.fields ?? [];
|
||||
tables.push({
|
||||
catalog: this.resolved.projectId,
|
||||
db: datasetId,
|
||||
name: tableName,
|
||||
kind: tableKind(table.metadata.type),
|
||||
comment: table.metadata.description || null,
|
||||
estimatedRows: firstNumber(table.metadata.numRows) ?? 0,
|
||||
columns: fields.map((field) => this.toSchemaColumn(tableName, field, primaryKeys)),
|
||||
foreignKeys: [],
|
||||
});
|
||||
}
|
||||
return tables;
|
||||
}
|
||||
|
||||
private async primaryKeys(datasetId: string): Promise<Map<string, Set<string>>> {
|
||||
const rows = await this.queryRaw<{ table_name: string; column_name: string }>(
|
||||
'SELECT tc.table_name, kcu.column_name ' +
|
||||
'FROM `' +
|
||||
this.resolved.projectId +
|
||||
'.' +
|
||||
datasetId +
|
||||
'.INFORMATION_SCHEMA.TABLE_CONSTRAINTS` tc ' +
|
||||
'JOIN `' +
|
||||
this.resolved.projectId +
|
||||
'.' +
|
||||
datasetId +
|
||||
'.INFORMATION_SCHEMA.KEY_COLUMN_USAGE` kcu ' +
|
||||
'ON tc.constraint_name = kcu.constraint_name ' +
|
||||
'AND tc.table_schema = kcu.table_schema ' +
|
||||
'AND tc.table_name = kcu.table_name ' +
|
||||
"WHERE tc.constraint_type = 'PRIMARY KEY' " +
|
||||
"AND tc.table_schema = '" +
|
||||
datasetId +
|
||||
"' " +
|
||||
"AND NOT REGEXP_CONTAINS(kcu.column_name, r'^(stacksync_record_id|sync_primary_key)_') " +
|
||||
'ORDER BY tc.table_name, kcu.ordinal_position',
|
||||
);
|
||||
const grouped = new Map<string, Set<string>>();
|
||||
for (const row of rows) {
|
||||
const columns = grouped.get(row.table_name) ?? new Set<string>();
|
||||
columns.add(row.column_name);
|
||||
grouped.set(row.table_name, columns);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
private toSchemaColumn(tableName: string, field: TableField, primaryKeys: Map<string, Set<string>>): KtxSchemaColumn {
|
||||
const nativeType = String(field.type || 'STRING').toUpperCase();
|
||||
return {
|
||||
name: field.name || '',
|
||||
nativeType,
|
||||
normalizedType: this.dialect.mapDataType(nativeType),
|
||||
dimensionType: this.dialect.mapToDimensionType(nativeType),
|
||||
nullable: field.mode !== 'REQUIRED',
|
||||
primaryKey: primaryKeys.get(tableName)?.has(field.name || '') ?? false,
|
||||
comment: field.description || null,
|
||||
};
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`BigQuery connector ${this.connectionId} cannot scan connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
52
packages/cli/src/connectors/bigquery/dialect.test.ts
Normal file
52
packages/cli/src/connectors/bigquery/dialect.test.ts
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KtxBigQueryDialect } from './dialect.js';
|
||||
|
||||
describe('KtxBigQueryDialect', () => {
|
||||
const dialect = new KtxBigQueryDialect();
|
||||
|
||||
it('quotes identifiers and formats project.dataset.table names', () => {
|
||||
expect(dialect.quoteIdentifier('order`items')).toBe('`order\\`items`');
|
||||
expect(dialect.formatTableName({ catalog: 'project-1', db: 'analytics', name: 'orders' })).toBe(
|
||||
'`project-1`.`analytics`.`orders`',
|
||||
);
|
||||
expect(dialect.formatTableName({ db: 'analytics', name: 'orders' })).toBe('`analytics`.`orders`');
|
||||
expect(dialect.formatTableName({ name: 'orders' })).toBe('`orders`');
|
||||
});
|
||||
|
||||
it('maps native BigQuery types to normalized types and scan dimensions', () => {
|
||||
expect(dialect.mapDataType('INT64')).toBe('BIGINT');
|
||||
expect(dialect.mapDataType('STRUCT')).toBe('JSON');
|
||||
expect(dialect.mapDataType('GEOGRAPHY')).toBe('GEOGRAPHY');
|
||||
expect(dialect.mapToDimensionType('TIMESTAMP')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('NUMERIC')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('BOOL')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('JSON')).toBe('string');
|
||||
});
|
||||
|
||||
it('generates sampling, cardinality, and distinct-value SQL', () => {
|
||||
expect(dialect.generateSampleQuery('`p`.`d`.`orders`', 5, ['id', 'status'])).toBe(
|
||||
'SELECT `id`, `status` FROM `p`.`d`.`orders` ORDER BY RAND() LIMIT 5',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('`p`.`d`.`orders`', 'status', 10)).toBe(
|
||||
"SELECT `status` FROM `p`.`d`.`orders` WHERE `status` IS NOT NULL AND TRIM(CAST(`status` AS STRING)) != '' ORDER BY RAND() LIMIT 10",
|
||||
);
|
||||
expect(dialect.generateCardinalitySampleQuery('`p`.`d`.`orders`', '`status`', 100)).toContain(
|
||||
'SELECT APPROX_COUNT_DISTINCT(val) AS cardinality',
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('`p`.`d`.`orders`', '`status`', 20)).toContain(
|
||||
'SELECT DISTINCT CAST(`status` AS STRING) AS val',
|
||||
);
|
||||
});
|
||||
|
||||
it('rewrites colon parameters to BigQuery named parameters', () => {
|
||||
expect(dialect.prepareQuery('SELECT * FROM orders WHERE id = :id AND id_2 = :id_2', { id: 1, id_2: 2 })).toEqual({
|
||||
sql: 'SELECT * FROM orders WHERE id = @id AND id_2 = @id_2',
|
||||
params: { id: 1, id_2: 2 },
|
||||
});
|
||||
expect(dialect.prepareQuery('SELECT * FROM orders')).toEqual({ sql: 'SELECT * FROM orders', params: undefined });
|
||||
});
|
||||
|
||||
it('keeps unsupported statistics explicit', () => {
|
||||
expect(dialect.generateColumnStatisticsQuery('analytics', 'orders')).toBeNull();
|
||||
});
|
||||
});
|
||||
207
packages/cli/src/connectors/bigquery/dialect.ts
Normal file
207
packages/cli/src/connectors/bigquery/dialect.ts
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/index.js';
|
||||
|
||||
type BigQueryTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxBigQueryDialect {
|
||||
readonly type = 'bigquery';
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
TIMESTAMP: 'time',
|
||||
DATETIME: 'time',
|
||||
DATE: 'time',
|
||||
TIME: 'time',
|
||||
INT64: 'number',
|
||||
INTEGER: 'number',
|
||||
FLOAT64: 'number',
|
||||
FLOAT: 'number',
|
||||
NUMERIC: 'number',
|
||||
BIGNUMERIC: 'number',
|
||||
STRING: 'string',
|
||||
BYTES: 'string',
|
||||
BOOL: 'boolean',
|
||||
BOOLEAN: 'boolean',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `\`${identifier.replace(/`/g, '\\`')}\``;
|
||||
}
|
||||
|
||||
formatTableName(table: BigQueryTableNameRef): string {
|
||||
if (table.catalog && table.db) {
|
||||
return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
|
||||
}
|
||||
if (table.db) {
|
||||
return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
|
||||
}
|
||||
return this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
const fieldType = nativeType.toUpperCase().trim();
|
||||
if (fieldType === 'RECORD' || fieldType === 'STRUCT') {
|
||||
return 'JSON';
|
||||
}
|
||||
const typeMapping: Record<string, string> = {
|
||||
STRING: 'VARCHAR',
|
||||
BYTES: 'VARBINARY',
|
||||
INTEGER: 'BIGINT',
|
||||
INT64: 'BIGINT',
|
||||
FLOAT: 'DOUBLE',
|
||||
FLOAT64: 'DOUBLE',
|
||||
NUMERIC: 'DECIMAL',
|
||||
BIGNUMERIC: 'DECIMAL',
|
||||
BOOLEAN: 'BOOLEAN',
|
||||
BOOL: 'BOOLEAN',
|
||||
TIMESTAMP: 'TIMESTAMP',
|
||||
DATE: 'DATE',
|
||||
TIME: 'TIME',
|
||||
DATETIME: 'DATETIME',
|
||||
GEOGRAPHY: 'GEOGRAPHY',
|
||||
JSON: 'JSON',
|
||||
};
|
||||
return typeMapping[fieldType] || fieldType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
const normalizedType = nativeType.toUpperCase().trim();
|
||||
if (this.typeMappings[normalizedType]) {
|
||||
return this.typeMappings[normalizedType];
|
||||
}
|
||||
if (normalizedType.includes('TIME') || normalizedType.includes('DATE')) {
|
||||
return 'time';
|
||||
}
|
||||
if (normalizedType.includes('INT') || normalizedType.includes('NUM') || normalizedType.includes('FLOAT')) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalizedType.includes('BOOL')) {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT ${columnList} FROM ${tableName} ORDER BY RAND() LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quotedColumn = this.quoteIdentifier(columnName);
|
||||
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS STRING)) != '' ORDER BY RAND() LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
|
||||
if (!params) {
|
||||
return { sql, params: undefined };
|
||||
}
|
||||
let processedSql = sql;
|
||||
const processedParams: Record<string, unknown> = {};
|
||||
for (const [key, value] of Object.entries(params)) {
|
||||
processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
|
||||
processedParams[key] = value;
|
||||
}
|
||||
return { sql: processedSql, params: Object.keys(processedParams).length > 0 ? processedParams : undefined };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `RAND() < ${samplePct}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `TABLESAMPLE SYSTEM (${samplePct * 100} PERCENT)`;
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `COUNTIF(${column} IS NULL)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `APPROX_COUNT_DISTINCT(${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT APPROX_COUNT_DISTINCT(val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT DISTINCT CAST(${columnName} AS STRING) AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY val
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY RAND()
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT APPROX_COUNT_DISTINCT(val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
timezone?: string,
|
||||
): string {
|
||||
const bigQueryGranularity = granularity.toUpperCase();
|
||||
if (timezone) {
|
||||
return `DATE_TRUNC(DATETIME(${column}, '${timezone}'), ${bigQueryGranularity})`;
|
||||
}
|
||||
return `DATE_TRUNC(${column}, ${bigQueryGranularity})`;
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
|
||||
const col = timezone ? `DATETIME(${column}, '${timezone}')` : column;
|
||||
const [rawAmount, rawUnit] = interval.split(' ');
|
||||
let diffUnit = rawUnit!.toUpperCase();
|
||||
let amount = Number(rawAmount);
|
||||
let addUnit = diffUnit;
|
||||
if (diffUnit === 'WEEK') {
|
||||
diffUnit = 'DAY';
|
||||
amount = amount * 7;
|
||||
addUnit = 'DAY';
|
||||
}
|
||||
const originExpr = origin ? `TIMESTAMP '${origin}'` : `TIMESTAMP '1970-01-01'`;
|
||||
return `TIMESTAMP_ADD(${originExpr}, INTERVAL CAST(FLOOR(TIMESTAMP_DIFF(${col}, ${originExpr}, ${diffUnit}) / ${amount}) * ${amount} AS INT64) ${addUnit})`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
const [amount, unit] = interval.split(' ');
|
||||
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
|
||||
}
|
||||
}
|
||||
18
packages/cli/src/connectors/bigquery/index.ts
Normal file
18
packages/cli/src/connectors/bigquery/index.ts
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
export { KtxBigQueryDialect } from './dialect.js';
|
||||
export {
|
||||
bigQueryConnectionConfigFromConfig,
|
||||
isKtxBigQueryConnectionConfig,
|
||||
KtxBigQueryScanConnector,
|
||||
type KtxBigQueryClient,
|
||||
type KtxBigQueryClientFactory,
|
||||
type KtxBigQueryColumnDistinctValuesOptions,
|
||||
type KtxBigQueryColumnDistinctValuesResult,
|
||||
type KtxBigQueryConnectionConfig,
|
||||
type KtxBigQueryDataset,
|
||||
type KtxBigQueryQueryJob,
|
||||
type KtxBigQueryReadOnlyQueryInput,
|
||||
type KtxBigQueryResolvedConnectionConfig,
|
||||
type KtxBigQueryScanConnectorOptions,
|
||||
type KtxBigQueryTableRef,
|
||||
} from './connector.js';
|
||||
export { createBigQueryLiveDatabaseIntrospection } from './live-database-introspection.js';
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/index.js';
|
||||
import type { KtxProjectConnectionConfig } from '../../context/project/index.js';
|
||||
import {
|
||||
KtxBigQueryScanConnector,
|
||||
type KtxBigQueryClientFactory,
|
||||
type KtxBigQueryConnectionConfig,
|
||||
} from './connector.js';
|
||||
|
||||
interface CreateBigQueryLiveDatabaseIntrospectionOptions {
|
||||
connections: Record<string, KtxProjectConnectionConfig>;
|
||||
clientFactory?: KtxBigQueryClientFactory;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createBigQueryLiveDatabaseIntrospection(
|
||||
options: CreateBigQueryLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KtxBigQueryConnectionConfig | undefined;
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
clientFactory: options.clientFactory,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect({ connectionId, driver: 'bigquery' }, { runId: `bigquery-${connectionId}` });
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
286
packages/cli/src/connectors/clickhouse/connector.test.ts
Normal file
286
packages/cli/src/connectors/clickhouse/connector.test.ts
Normal file
|
|
@ -0,0 +1,286 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
clickHouseClientConfigFromConfig,
|
||||
createClickHouseLiveDatabaseIntrospection,
|
||||
isKtxClickHouseConnectionConfig,
|
||||
KtxClickHouseScanConnector,
|
||||
type KtxClickHouseClientFactory,
|
||||
} from './index.js';
|
||||
|
||||
function result<T>(payload: T) {
|
||||
return {
|
||||
async json(): Promise<T> {
|
||||
return payload;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function fakeClientFactory(): KtxClickHouseClientFactory {
|
||||
const query = vi.fn(async (input: { query: string; format: string; query_params?: Record<string, unknown> }) => {
|
||||
if (input.query.includes('FROM system.tables')) {
|
||||
return result([
|
||||
{ name: 'events', engine: 'MergeTree', comment: 'Event stream' },
|
||||
{ name: 'event_summary', engine: 'View', comment: '' },
|
||||
]);
|
||||
}
|
||||
if (input.query.includes('FROM system.columns')) {
|
||||
return result([
|
||||
{ table: 'events', name: 'id', type: 'UInt64', comment: 'PK', is_in_primary_key: 1 },
|
||||
{ table: 'events', name: 'event_name', type: 'LowCardinality(String)', comment: '', is_in_primary_key: 0 },
|
||||
{ table: 'event_summary', name: 'event_name', type: 'String', comment: '', is_in_primary_key: 0 },
|
||||
]);
|
||||
}
|
||||
if (input.query.includes('FROM system.parts') && input.query.includes('GROUP BY table')) {
|
||||
return result([{ table: 'events', row_count: '2' }]);
|
||||
}
|
||||
if (input.query.includes('SELECT `id`, `event_name` FROM `analytics`.`events` LIMIT 1')) {
|
||||
return result({
|
||||
meta: [
|
||||
{ name: 'id', type: 'UInt64' },
|
||||
{ name: 'event_name', type: 'String' },
|
||||
],
|
||||
data: [[10, 'signup']],
|
||||
rows: 1,
|
||||
});
|
||||
}
|
||||
if (input.query.includes('SELECT `event_name` FROM `analytics`.`events`')) {
|
||||
return result({
|
||||
meta: [{ name: 'event_name', type: 'String' }],
|
||||
data: [['signup'], ['purchase']],
|
||||
rows: 2,
|
||||
});
|
||||
}
|
||||
if (input.query.includes('COUNT(DISTINCT val)')) {
|
||||
return result({
|
||||
meta: [{ name: 'cardinality', type: 'UInt64' }],
|
||||
data: [[2]],
|
||||
rows: 1,
|
||||
});
|
||||
}
|
||||
if (input.query.includes('SELECT DISTINCT toString(`event_name`) AS val')) {
|
||||
return result({
|
||||
meta: [{ name: 'val', type: 'String' }],
|
||||
data: [['purchase'], ['signup']],
|
||||
rows: 2,
|
||||
});
|
||||
}
|
||||
if (input.query.includes('sum(rows) AS count')) {
|
||||
return result({
|
||||
meta: [{ name: 'count', type: 'UInt64' }],
|
||||
data: [[2]],
|
||||
rows: 1,
|
||||
});
|
||||
}
|
||||
if (input.query.includes('FROM system.databases')) {
|
||||
return result([{ name: 'analytics' }, { name: 'warehouse' }]);
|
||||
}
|
||||
if (input.query.trim() === 'SELECT 1') {
|
||||
return result({ meta: [{ name: '1', type: 'UInt8' }], data: [[1]], rows: 1 });
|
||||
}
|
||||
if (input.query.includes('select * from (select id, event_name from analytics.events) as ktx_query_result limit 1')) {
|
||||
return result({
|
||||
meta: [
|
||||
{ name: 'id', type: 'UInt64' },
|
||||
{ name: 'event_name', type: 'String' },
|
||||
],
|
||||
data: [[10, 'signup']],
|
||||
rows: 1,
|
||||
});
|
||||
}
|
||||
throw new Error(`Unexpected SQL: ${input.query}`);
|
||||
});
|
||||
const close = vi.fn(async () => undefined);
|
||||
return {
|
||||
createClient: vi.fn(() => ({ query, close })),
|
||||
};
|
||||
}
|
||||
|
||||
describe('KtxClickHouseScanConnector', () => {
|
||||
it('resolves ClickHouse connection configuration safely', () => {
|
||||
expect(isKtxClickHouseConnectionConfig({ driver: 'clickhouse', host: 'localhost', database: 'analytics' })).toBe(
|
||||
true,
|
||||
);
|
||||
expect(isKtxClickHouseConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(false);
|
||||
expect(
|
||||
clickHouseClientConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'clickhouse',
|
||||
host: 'ch.example.test',
|
||||
port: 9440,
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-pass', // pragma: allowlist secret
|
||||
ssl: true,
|
||||
},
|
||||
}),
|
||||
).toMatchObject({
|
||||
host: 'ch.example.test',
|
||||
port: 9440,
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-pass', // pragma: allowlist secret
|
||||
ssl: true,
|
||||
});
|
||||
});
|
||||
|
||||
it('introspects schema, primary keys, comments, row counts, and views', async () => {
|
||||
const connector = new KtxClickHouseScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'clickhouse',
|
||||
host: 'ch.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-pass', // pragma: allowlist secret
|
||||
},
|
||||
clientFactory: fakeClientFactory(),
|
||||
now: () => new Date('2026-04-29T14:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'clickhouse' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'clickhouse',
|
||||
extractedAt: '2026-04-29T14:00:00.000Z',
|
||||
scope: { schemas: ['analytics'] },
|
||||
metadata: {
|
||||
database: 'analytics',
|
||||
host: 'ch.example.test',
|
||||
table_count: 2,
|
||||
total_columns: 3,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([
|
||||
['events', 'table', 2, 'Event stream'],
|
||||
['event_summary', 'view', null, null],
|
||||
]);
|
||||
expect(snapshot.tables.find((table) => table.name === 'events')?.columns[0]).toMatchObject({
|
||||
name: 'id',
|
||||
nativeType: 'UInt64',
|
||||
normalizedType: 'UInt64',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'PK',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'events')?.foreignKeys).toEqual([]);
|
||||
});
|
||||
|
||||
it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => {
|
||||
const clientFactory = fakeClientFactory();
|
||||
const connector = new KtxClickHouseScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'clickhouse',
|
||||
host: 'ch.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-pass', // pragma: allowlist secret
|
||||
},
|
||||
clientFactory,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
table: { catalog: null, db: 'analytics', name: 'events' },
|
||||
columns: ['id', 'event_name'],
|
||||
limit: 1,
|
||||
},
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({ headers: ['id', 'event_name'], rows: [[10, 'signup']], totalRows: 1 });
|
||||
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'events' }, column: 'event_name', limit: 5 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ values: ['signup', 'purchase'], nullCount: null, distinctCount: null });
|
||||
|
||||
await expect(
|
||||
connector.getColumnDistinctValues(
|
||||
{ catalog: null, db: 'analytics', name: 'events' },
|
||||
'event_name',
|
||||
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
|
||||
),
|
||||
).resolves.toEqual({ values: ['purchase', 'signup'], cardinality: 2 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, event_name from analytics.events', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ headers: ['id', 'event_name'], rows: [[10, 'signup']], totalRows: 1, rowCount: 1 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from events' }, { runId: 'scan-run-1' }),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
|
||||
await expect(connector.getTableRowCount('events')).resolves.toBe(2);
|
||||
await expect(connector.listSchemas()).resolves.toEqual(['analytics', 'warehouse']);
|
||||
await expect(
|
||||
connector.columnStats(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'events' }, column: 'event_name' },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toBeNull();
|
||||
|
||||
await connector.cleanup();
|
||||
});
|
||||
|
||||
it('adapts native ClickHouse snapshots to live-database introspection for local ingest', async () => {
|
||||
const introspection = createClickHouseLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'clickhouse',
|
||||
host: 'ch.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-pass', // pragma: allowlist secret
|
||||
},
|
||||
},
|
||||
clientFactory: fakeClientFactory(),
|
||||
now: () => new Date('2026-04-29T14:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
extractedAt: '2026-04-29T14:00:00.000Z',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'events')).toMatchObject({
|
||||
name: 'events',
|
||||
catalog: null,
|
||||
db: 'analytics',
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'UInt64',
|
||||
normalizedType: 'UInt64',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'PK',
|
||||
},
|
||||
{
|
||||
name: 'event_name',
|
||||
nativeType: 'LowCardinality(String)',
|
||||
normalizedType: 'LowCardinality(String)',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
});
|
||||
});
|
||||
});
|
||||
550
packages/cli/src/connectors/clickhouse/connector.ts
Normal file
550
packages/cli/src/connectors/clickhouse/connector.ts
Normal file
|
|
@ -0,0 +1,550 @@
|
|||
import { createClient } from '@clickhouse/client';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/index.js';
|
||||
import {
|
||||
createKtxConnectorCapabilities,
|
||||
type KtxColumnSampleInput,
|
||||
type KtxColumnSampleResult,
|
||||
type KtxColumnStatsInput,
|
||||
type KtxColumnStatsResult,
|
||||
type KtxQueryResult,
|
||||
type KtxReadOnlyQueryInput,
|
||||
type KtxScanConnector,
|
||||
type KtxScanContext,
|
||||
type KtxScanInput,
|
||||
type KtxSchemaColumn,
|
||||
type KtxSchemaSnapshot,
|
||||
type KtxSchemaTable,
|
||||
type KtxTableRef,
|
||||
type KtxTableSampleInput,
|
||||
type KtxTableListEntry,
|
||||
type KtxTableSampleResult,
|
||||
} from '../../context/scan/index.js';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { Agent as HttpsAgent } from 'node:https';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { KtxClickHouseDialect } from './dialect.js';
|
||||
|
||||
export interface KtxClickHouseConnectionConfig {
|
||||
driver?: string;
|
||||
host?: string;
|
||||
port?: number;
|
||||
database?: string;
|
||||
username?: string;
|
||||
user?: string;
|
||||
password?: string;
|
||||
url?: string;
|
||||
ssl?: boolean;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxClickHouseResolvedClientConfig {
|
||||
host: string;
|
||||
port: number;
|
||||
database: string;
|
||||
username: string;
|
||||
password?: string;
|
||||
ssl: boolean;
|
||||
}
|
||||
|
||||
interface ClickHouseQueryInput {
|
||||
query: string;
|
||||
format: 'JSONCompact' | 'JSONEachRow';
|
||||
query_params?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
interface ClickHouseResultSet {
|
||||
json(): Promise<unknown>;
|
||||
}
|
||||
|
||||
export interface KtxClickHouseClient {
|
||||
query(input: ClickHouseQueryInput): Promise<ClickHouseResultSet>;
|
||||
close(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxClickHouseClientFactory {
|
||||
createClient(config: Parameters<typeof createClient>[0]): KtxClickHouseClient;
|
||||
}
|
||||
|
||||
interface KtxClickHouseResolvedEndpoint {
|
||||
host: string;
|
||||
port: number;
|
||||
close?: () => Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxClickHouseEndpointResolver {
|
||||
resolve(input: {
|
||||
host: string;
|
||||
port: number;
|
||||
connection: KtxClickHouseConnectionConfig;
|
||||
}): Promise<KtxClickHouseResolvedEndpoint>;
|
||||
}
|
||||
|
||||
export interface KtxClickHouseScanConnectorOptions {
|
||||
connectionId: string;
|
||||
connection: KtxClickHouseConnectionConfig | undefined;
|
||||
clientFactory?: KtxClickHouseClientFactory;
|
||||
endpointResolver?: KtxClickHouseEndpointResolver;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export interface KtxClickHouseReadOnlyQueryInput extends KtxReadOnlyQueryInput {
|
||||
params?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface KtxClickHouseColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KtxClickHouseColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
interface ClickHouseTableRow {
|
||||
name: string;
|
||||
engine: string;
|
||||
comment: string;
|
||||
}
|
||||
|
||||
interface ClickHouseColumnRow {
|
||||
table: string;
|
||||
name: string;
|
||||
type: string;
|
||||
comment: string;
|
||||
is_in_primary_key: number;
|
||||
}
|
||||
|
||||
interface ClickHouseRowCountRow {
|
||||
table?: string;
|
||||
row_count?: string | number;
|
||||
count?: string | number;
|
||||
}
|
||||
|
||||
interface ClickHouseDatabaseRow {
|
||||
name: string;
|
||||
}
|
||||
|
||||
interface ClickHouseTableListRow {
|
||||
database: string;
|
||||
name: string;
|
||||
engine: string;
|
||||
}
|
||||
|
||||
interface ClickHouseCompactResponse {
|
||||
meta?: Array<{ name: string; type: string }>;
|
||||
data?: unknown[][];
|
||||
rows?: number;
|
||||
}
|
||||
|
||||
class DefaultClickHouseClientFactory implements KtxClickHouseClientFactory {
|
||||
createClient(config: Parameters<typeof createClient>[0]): KtxClickHouseClient {
|
||||
return createClient(config);
|
||||
}
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxClickHouseConnectionConfig | undefined,
|
||||
key: keyof KtxClickHouseConnectionConfig,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
const envName = value.slice('env:'.length);
|
||||
return env[envName] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function maybeNumber(value: unknown): number | undefined {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
||||
function parseClickHouseUrl(url: string): Partial<KtxClickHouseConnectionConfig> {
|
||||
const parsed = new URL(url);
|
||||
return {
|
||||
host: parsed.hostname,
|
||||
port: parsed.port ? Number(parsed.port) : undefined,
|
||||
database: parsed.pathname.replace(/^\/+/, '') || undefined,
|
||||
username: parsed.username ? decodeURIComponent(parsed.username) : undefined,
|
||||
password: parsed.password ? decodeURIComponent(parsed.password) : undefined,
|
||||
ssl: parsed.protocol === 'https:' || parsed.searchParams.get('ssl') === 'true',
|
||||
};
|
||||
}
|
||||
|
||||
function tableKind(engine: string): KtxSchemaTable['kind'] {
|
||||
return engine === 'View' || engine === 'MaterializedView' ? 'view' : 'table';
|
||||
}
|
||||
|
||||
function isNullableClickHouseType(type: string): boolean {
|
||||
return type.startsWith('Nullable(') || type.startsWith('LowCardinality(Nullable(');
|
||||
}
|
||||
|
||||
export function isKtxClickHouseConnectionConfig(
|
||||
connection: KtxClickHouseConnectionConfig | undefined,
|
||||
): connection is KtxClickHouseConnectionConfig {
|
||||
return String(connection?.driver ?? '').toLowerCase() === 'clickhouse';
|
||||
}
|
||||
|
||||
export function clickHouseClientConfigFromConfig(input: {
|
||||
connectionId: string;
|
||||
connection: KtxClickHouseConnectionConfig | undefined;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): KtxClickHouseResolvedClientConfig {
|
||||
const inputDriver = input.connection?.driver ?? 'unknown';
|
||||
if (!isKtxClickHouseConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native ClickHouse connector cannot run driver "${inputDriver}"`);
|
||||
}
|
||||
|
||||
const env = input.env ?? process.env;
|
||||
const referencedUrl = stringConfigValue(input.connection, 'url', env);
|
||||
const urlConfig = referencedUrl ? parseClickHouseUrl(referencedUrl) : {};
|
||||
const merged: KtxClickHouseConnectionConfig = { ...urlConfig, ...input.connection };
|
||||
const host = stringConfigValue(merged, 'host', env);
|
||||
const database = stringConfigValue(merged, 'database', env) ?? 'default';
|
||||
const username = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env) ?? 'default';
|
||||
|
||||
if (!host) {
|
||||
throw new Error(`Native ClickHouse connector requires connections.${input.connectionId}.host or url`);
|
||||
}
|
||||
|
||||
return {
|
||||
host,
|
||||
port: maybeNumber(merged.port) ?? 8123,
|
||||
database,
|
||||
username,
|
||||
password: stringConfigValue(merged, 'password', env),
|
||||
ssl: merged.ssl === true,
|
||||
};
|
||||
}
|
||||
|
||||
export class KtxClickHouseScanConnector implements KtxScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'clickhouse' as const;
|
||||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: false,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: true,
|
||||
formalForeignKeys: false,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly connectionId: string;
|
||||
private readonly connection: KtxClickHouseConnectionConfig;
|
||||
private readonly clientConfig: KtxClickHouseResolvedClientConfig;
|
||||
private readonly clientFactory: KtxClickHouseClientFactory;
|
||||
private readonly endpointResolver?: KtxClickHouseEndpointResolver;
|
||||
private readonly now: () => Date;
|
||||
private readonly dialect = new KtxClickHouseDialect();
|
||||
private client: KtxClickHouseClient | null = null;
|
||||
private resolvedEndpoint: KtxClickHouseResolvedEndpoint | null = null;
|
||||
|
||||
constructor(options: KtxClickHouseScanConnectorOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.connection = options.connection ?? {};
|
||||
this.clientConfig = clickHouseClientConfigFromConfig({
|
||||
connectionId: options.connectionId,
|
||||
connection: options.connection,
|
||||
env: options.env,
|
||||
});
|
||||
this.clientFactory = options.clientFactory ?? new DefaultClickHouseClientFactory();
|
||||
this.endpointResolver = options.endpointResolver;
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.id = `clickhouse:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
try {
|
||||
await this.query('SELECT 1');
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const database = this.clientConfig.database;
|
||||
const tables = await this.queryEachRow<ClickHouseTableRow>(
|
||||
`
|
||||
SELECT name, engine, comment
|
||||
FROM system.tables
|
||||
WHERE database = {database:String}
|
||||
AND engine NOT IN ('Dictionary')
|
||||
ORDER BY name
|
||||
`,
|
||||
{ database },
|
||||
);
|
||||
const columns = await this.queryEachRow<ClickHouseColumnRow>(
|
||||
`
|
||||
SELECT table, name, type, comment, is_in_primary_key
|
||||
FROM system.columns
|
||||
WHERE database = {database:String}
|
||||
ORDER BY table, position
|
||||
`,
|
||||
{ database },
|
||||
);
|
||||
const rowCounts = await this.queryEachRow<ClickHouseRowCountRow>(
|
||||
`
|
||||
SELECT table, sum(rows) AS row_count
|
||||
FROM system.parts
|
||||
WHERE database = {database:String}
|
||||
AND active = 1
|
||||
GROUP BY table
|
||||
`,
|
||||
{ database },
|
||||
);
|
||||
const columnsByTable = new Map<string, ClickHouseColumnRow[]>();
|
||||
for (const column of columns) {
|
||||
columnsByTable.set(column.table, [...(columnsByTable.get(column.table) ?? []), column]);
|
||||
}
|
||||
const rowCountByTable = new Map(rowCounts.map((row) => [String(row.table), Number(row.row_count ?? 0)]));
|
||||
const schemaTables = tables.map((table) =>
|
||||
this.toSchemaTable(table, columnsByTable.get(table.name) ?? [], rowCountByTable.get(table.name) ?? 0),
|
||||
);
|
||||
|
||||
return {
|
||||
connectionId: this.connectionId,
|
||||
driver: 'clickhouse',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: { schemas: [database] },
|
||||
metadata: {
|
||||
database,
|
||||
host: this.clientConfig.host,
|
||||
table_count: schemaTables.length,
|
||||
total_columns: schemaTables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables: schemaTables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxTableSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(
|
||||
this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns),
|
||||
);
|
||||
return { headers: result.headers, rows: result.rows, totalRows: result.totalRows };
|
||||
}
|
||||
|
||||
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
|
||||
return { values, nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
return null;
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxClickHouseReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
|
||||
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
|
||||
const result = await this.query(prepared.sql, prepared.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KtxTableRef,
|
||||
columnName: string,
|
||||
options: KtxClickHouseColumnDistinctValuesOptions,
|
||||
): Promise<KtxClickHouseColumnDistinctValuesResult | null> {
|
||||
const sampleSize = options.sampleSize ?? 10000;
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinalityResult = await this.query(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, sampleSize),
|
||||
);
|
||||
const cardinality = Number(cardinalityResult.rows[0]?.[0]);
|
||||
if (Number.isNaN(cardinality)) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valuesResult = await this.query(this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit));
|
||||
return {
|
||||
values: valuesResult.rows.filter((row) => row[0] !== null).map((row) => String(row[0])),
|
||||
cardinality,
|
||||
};
|
||||
}
|
||||
|
||||
async getTableRowCount(tableName: string): Promise<number> {
|
||||
const result = await this.query(
|
||||
`
|
||||
SELECT sum(rows) AS count
|
||||
FROM system.parts
|
||||
WHERE database = {database:String}
|
||||
AND table = {table:String}
|
||||
AND active = 1
|
||||
`,
|
||||
{ database: this.clientConfig.database, table: tableName },
|
||||
);
|
||||
return Number(result.rows[0]?.[0] ?? 0);
|
||||
}
|
||||
|
||||
qTableName(table: Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
async listSchemas(): Promise<string[]> {
|
||||
const rows = await this.queryEachRow<ClickHouseDatabaseRow>(
|
||||
`
|
||||
SELECT name
|
||||
FROM system.databases
|
||||
WHERE name NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')
|
||||
ORDER BY name
|
||||
`,
|
||||
);
|
||||
return rows.map((row) => row.name);
|
||||
}
|
||||
|
||||
async listTables(schemas?: string[]): Promise<KtxTableListEntry[]> {
|
||||
const filterSchemas = schemas ?? (await this.listSchemas());
|
||||
if (filterSchemas.length === 0) return [];
|
||||
const rows = await this.queryEachRow<ClickHouseTableListRow>(
|
||||
`
|
||||
SELECT database, name, engine
|
||||
FROM system.tables
|
||||
WHERE database IN ({schemas:Array(String)})
|
||||
ORDER BY database, name
|
||||
`,
|
||||
{ schemas: filterSchemas },
|
||||
);
|
||||
return rows.map((row) => ({
|
||||
schema: row.database,
|
||||
name: row.name,
|
||||
kind: row.engine === 'View' || row.engine === 'MaterializedView' ? ('view' as const) : ('table' as const),
|
||||
}));
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
if (this.client) {
|
||||
await this.client.close();
|
||||
this.client = null;
|
||||
}
|
||||
if (this.resolvedEndpoint?.close) {
|
||||
await this.resolvedEndpoint.close();
|
||||
this.resolvedEndpoint = null;
|
||||
}
|
||||
}
|
||||
|
||||
private toSchemaTable(table: ClickHouseTableRow, columns: ClickHouseColumnRow[], estimatedRows: number): KtxSchemaTable {
|
||||
const kind = tableKind(table.engine);
|
||||
return {
|
||||
catalog: null,
|
||||
db: this.clientConfig.database,
|
||||
name: table.name,
|
||||
kind,
|
||||
comment: table.comment || null,
|
||||
estimatedRows: kind === 'view' ? null : estimatedRows,
|
||||
columns: columns.map((column) => this.toSchemaColumn(column)),
|
||||
foreignKeys: [],
|
||||
};
|
||||
}
|
||||
|
||||
private toSchemaColumn(column: ClickHouseColumnRow): KtxSchemaColumn {
|
||||
return {
|
||||
name: column.name,
|
||||
nativeType: column.type,
|
||||
normalizedType: this.dialect.mapDataType(column.type),
|
||||
dimensionType: this.dialect.mapToDimensionType(column.type),
|
||||
nullable: isNullableClickHouseType(column.type),
|
||||
primaryKey: column.is_in_primary_key === 1,
|
||||
comment: column.comment || null,
|
||||
};
|
||||
}
|
||||
|
||||
private async clientForQuery(): Promise<KtxClickHouseClient> {
|
||||
if (!this.client) {
|
||||
const config = { ...this.clientConfig };
|
||||
if (this.endpointResolver) {
|
||||
this.resolvedEndpoint = await this.endpointResolver.resolve({
|
||||
host: config.host,
|
||||
port: config.port,
|
||||
connection: this.connection,
|
||||
});
|
||||
config.host = this.resolvedEndpoint.host;
|
||||
config.port = this.resolvedEndpoint.port;
|
||||
}
|
||||
const protocol = config.ssl ? 'https' : 'http';
|
||||
const isProxied = config.host !== this.clientConfig.host;
|
||||
this.client = this.clientFactory.createClient({
|
||||
url: `${protocol}://${config.host}:${config.port}`,
|
||||
username: config.username,
|
||||
password: config.password ?? '',
|
||||
database: config.database,
|
||||
request_timeout: 30_000,
|
||||
clickhouse_settings: {
|
||||
output_format_json_quote_64bit_integers: 1,
|
||||
},
|
||||
...(isProxied && config.ssl
|
||||
? {
|
||||
http_agent: new HttpsAgent({
|
||||
servername: this.clientConfig.host,
|
||||
keepAlive: true,
|
||||
}),
|
||||
}
|
||||
: {}),
|
||||
});
|
||||
}
|
||||
return this.client;
|
||||
}
|
||||
|
||||
private async queryEachRow<T>(sql: string, params?: Record<string, unknown>): Promise<T[]> {
|
||||
const client = await this.clientForQuery();
|
||||
const resultSet = await client.query({
|
||||
query: assertReadOnlySql(sql),
|
||||
format: 'JSONEachRow',
|
||||
...(params ? { query_params: params } : {}),
|
||||
});
|
||||
return (await resultSet.json()) as T[];
|
||||
}
|
||||
|
||||
private async query(sql: string, params?: Record<string, unknown>): Promise<Omit<KtxQueryResult, 'rowCount'>> {
|
||||
const client = await this.clientForQuery();
|
||||
const resultSet = await client.query({
|
||||
query: assertReadOnlySql(sql),
|
||||
format: 'JSONCompact',
|
||||
...(params ? { query_params: params } : {}),
|
||||
});
|
||||
const response = (await resultSet.json()) as ClickHouseCompactResponse;
|
||||
const meta = response.meta ?? [];
|
||||
return {
|
||||
headers: meta.map((field) => field.name),
|
||||
headerTypes: meta.map((field) => field.type),
|
||||
rows: response.data ?? [],
|
||||
totalRows: response.rows ?? response.data?.length ?? 0,
|
||||
};
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`KTX ClickHouse connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
49
packages/cli/src/connectors/clickhouse/dialect.test.ts
Normal file
49
packages/cli/src/connectors/clickhouse/dialect.test.ts
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KtxClickHouseDialect } from './dialect.js';
|
||||
|
||||
describe('KtxClickHouseDialect', () => {
|
||||
const dialect = new KtxClickHouseDialect();
|
||||
|
||||
it('quotes identifiers and formats database-qualified table names', () => {
|
||||
expect(dialect.quoteIdentifier('events')).toBe('`events`');
|
||||
expect(dialect.quoteIdentifier('odd`name')).toBe('`odd``name`');
|
||||
expect(dialect.formatTableName({ catalog: null, db: 'analytics', name: 'events' })).toBe(
|
||||
'`analytics`.`events`',
|
||||
);
|
||||
expect(dialect.formatTableName({ catalog: null, db: null, name: 'events' })).toBe('`events`');
|
||||
});
|
||||
|
||||
it('maps nullable and low-cardinality ClickHouse types to KTX dimension types', () => {
|
||||
expect(dialect.mapToDimensionType('Nullable(DateTime64(3))')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('LowCardinality(Nullable(String))')).toBe('string');
|
||||
expect(dialect.mapToDimensionType('UInt64')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('Decimal(18, 4)')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('Bool')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('IPv4')).toBe('string');
|
||||
expect(dialect.mapToDimensionType('')).toBe('string');
|
||||
});
|
||||
|
||||
it('builds sampling, distinct-value, pagination, and time SQL', () => {
|
||||
expect(dialect.generateSampleQuery('`analytics`.`events`', 25, ['id', 'event_name'])).toBe(
|
||||
'SELECT `id`, `event_name` FROM `analytics`.`events` LIMIT 25',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('`analytics`.`events`', 'event_name', 10)).toBe(
|
||||
"SELECT `event_name` FROM `analytics`.`events` WHERE `event_name` IS NOT NULL AND trim(toString(`event_name`)) != '' LIMIT 10",
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('`analytics`.`events`', '`event_name`', 5)).toContain(
|
||||
'SELECT DISTINCT toString(`event_name`) AS val',
|
||||
);
|
||||
expect(dialect.getLimitOffsetClause(10, 20)).toBe('LIMIT 10 OFFSET 20');
|
||||
expect(dialect.getTimeTruncExpression('created_at', 'week')).toBe('toStartOfWeek(created_at, 1)');
|
||||
});
|
||||
|
||||
it('prepares named parameters using ClickHouse typed placeholders', () => {
|
||||
expect(dialect.prepareQuery('select * from events where id = :id and event_name = :name', {
|
||||
id: 10,
|
||||
name: 'signup',
|
||||
})).toEqual({
|
||||
sql: 'select * from events where id = {id:Int64} and event_name = {name:String}',
|
||||
params: { id: 10, name: 'signup' },
|
||||
});
|
||||
});
|
||||
});
|
||||
279
packages/cli/src/connectors/clickhouse/dialect.ts
Normal file
279
packages/cli/src/connectors/clickhouse/dialect.ts
Normal file
|
|
@ -0,0 +1,279 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/index.js';
|
||||
|
||||
type ClickHouseTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxClickHouseDialect {
|
||||
readonly type = 'clickhouse';
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
date: 'time',
|
||||
date32: 'time',
|
||||
datetime: 'time',
|
||||
datetime64: 'time',
|
||||
uint8: 'number',
|
||||
uint16: 'number',
|
||||
uint32: 'number',
|
||||
uint64: 'number',
|
||||
uint128: 'number',
|
||||
uint256: 'number',
|
||||
int8: 'number',
|
||||
int16: 'number',
|
||||
int32: 'number',
|
||||
int64: 'number',
|
||||
int128: 'number',
|
||||
int256: 'number',
|
||||
float32: 'number',
|
||||
float64: 'number',
|
||||
decimal: 'number',
|
||||
decimal32: 'number',
|
||||
decimal64: 'number',
|
||||
decimal128: 'number',
|
||||
decimal256: 'number',
|
||||
string: 'string',
|
||||
fixedstring: 'string',
|
||||
uuid: 'string',
|
||||
ipv4: 'string',
|
||||
ipv6: 'string',
|
||||
enum8: 'string',
|
||||
enum16: 'string',
|
||||
bool: 'boolean',
|
||||
boolean: 'boolean',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `\`${identifier.replace(/`/g, '``')}\``;
|
||||
}
|
||||
|
||||
formatTableName(table: ClickHouseTableNameRef): string {
|
||||
return table.db
|
||||
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
|
||||
: this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
return nativeType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
|
||||
let normalizedType = nativeType.toLowerCase().trim();
|
||||
normalizedType = this.unwrapClickHouseType(normalizedType, 'nullable');
|
||||
normalizedType = this.unwrapClickHouseType(normalizedType, 'lowcardinality');
|
||||
normalizedType = this.unwrapClickHouseType(normalizedType, 'nullable');
|
||||
if (normalizedType.includes('(')) {
|
||||
normalizedType = normalizedType.split('(')[0] ?? normalizedType;
|
||||
}
|
||||
|
||||
if (this.typeMappings[normalizedType]) {
|
||||
return this.typeMappings[normalizedType];
|
||||
}
|
||||
if (normalizedType.includes('date') || normalizedType.includes('time')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalizedType.includes('int') ||
|
||||
normalizedType.includes('float') ||
|
||||
normalizedType.includes('decimal')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalizedType === 'bool' || normalizedType === 'boolean') {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT ${columnList} FROM ${tableName} LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quotedColumn = this.quoteIdentifier(columnName);
|
||||
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND trim(toString(${quotedColumn})) != '' LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
|
||||
if (!params) {
|
||||
return { sql, params: undefined };
|
||||
}
|
||||
|
||||
let parameterizedQuery = sql;
|
||||
const queryParams: Record<string, unknown> = {};
|
||||
const sortedKeys = Object.keys(params).sort((a, b) => b.length - a.length);
|
||||
|
||||
for (const key of sortedKeys) {
|
||||
const placeholder = `:${key}`;
|
||||
if (parameterizedQuery.includes(placeholder)) {
|
||||
parameterizedQuery = parameterizedQuery.replace(
|
||||
new RegExp(`:${key}\\b`, 'g'),
|
||||
`{${key}:${this.inferClickHouseType(params[key])}}`,
|
||||
);
|
||||
queryParams[key] = params[key];
|
||||
}
|
||||
}
|
||||
|
||||
return { sql: parameterizedQuery, params: queryParams };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `rand() / 4294967295.0 < ${samplePct}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(_samplePct: number): string {
|
||||
return '';
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `countIf(${column} IS NULL)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `COUNT(DISTINCT ${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT DISTINCT toString(${columnName}) AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY val
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY rand()
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
timezone?: string,
|
||||
): string {
|
||||
const tz = timezone ? `, '${timezone}'` : '';
|
||||
switch (granularity) {
|
||||
case 'day':
|
||||
return `toStartOfDay(${column}${tz})`;
|
||||
case 'week':
|
||||
return `toStartOfWeek(${column}, 1${tz})`;
|
||||
case 'month':
|
||||
return `toStartOfMonth(${column}${tz})`;
|
||||
case 'quarter':
|
||||
return `toStartOfQuarter(${column}${tz})`;
|
||||
case 'year':
|
||||
return `toStartOfYear(${column}${tz})`;
|
||||
}
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
|
||||
const col = timezone ? `toTimezone(${column}, '${timezone}')` : column;
|
||||
const [rawAmount, rawUnit] = interval.split(' ');
|
||||
const amount = Number(rawAmount);
|
||||
const unit = rawUnit!.toLowerCase();
|
||||
const originExpr = origin ? `toDateTime('${origin}')` : "toDateTime('1970-01-01')";
|
||||
const calendarUnit = this.toClickHouseDateDiffUnit(unit);
|
||||
if (calendarUnit) {
|
||||
return `dateAdd(${calendarUnit}, intDiv(dateDiff(${calendarUnit}, ${originExpr}, ${col}), ${amount}) * ${amount}, ${originExpr})`;
|
||||
}
|
||||
const seconds = this.intervalToSeconds(amount, unit);
|
||||
return `addSeconds(${originExpr}, intDiv(toUInt64(dateDiff('second', ${originExpr}, ${col})), ${seconds}) * ${seconds})`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
const [amount, unit] = interval.split(' ');
|
||||
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
|
||||
}
|
||||
|
||||
private unwrapClickHouseType(value: string, wrapper: string): string {
|
||||
const prefix = `${wrapper}(`;
|
||||
return value.startsWith(prefix) && value.endsWith(')') ? value.slice(prefix.length, -1) : value;
|
||||
}
|
||||
|
||||
private inferClickHouseType(value: unknown): string {
|
||||
if (value === null || value === undefined) {
|
||||
return 'String';
|
||||
}
|
||||
if (typeof value === 'boolean') {
|
||||
return 'Bool';
|
||||
}
|
||||
if (typeof value === 'number') {
|
||||
return Number.isInteger(value) ? 'Int64' : 'Float64';
|
||||
}
|
||||
if (value instanceof Date) {
|
||||
return 'DateTime';
|
||||
}
|
||||
return 'String';
|
||||
}
|
||||
|
||||
private toClickHouseDateDiffUnit(unit: string): string | null {
|
||||
if (unit === 'month' || unit === 'months') {
|
||||
return "'month'";
|
||||
}
|
||||
if (unit === 'quarter' || unit === 'quarters') {
|
||||
return "'quarter'";
|
||||
}
|
||||
if (unit === 'year' || unit === 'years') {
|
||||
return "'year'";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private intervalToSeconds(amount: number, unit: string): number {
|
||||
switch (unit) {
|
||||
case 'second':
|
||||
case 'seconds':
|
||||
return amount;
|
||||
case 'minute':
|
||||
case 'minutes':
|
||||
return amount * 60;
|
||||
case 'hour':
|
||||
case 'hours':
|
||||
return amount * 3600;
|
||||
case 'day':
|
||||
case 'days':
|
||||
return amount * 86400;
|
||||
case 'week':
|
||||
case 'weeks':
|
||||
return amount * 604800;
|
||||
default:
|
||||
return amount * 86400;
|
||||
}
|
||||
}
|
||||
}
|
||||
16
packages/cli/src/connectors/clickhouse/index.ts
Normal file
16
packages/cli/src/connectors/clickhouse/index.ts
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
export { KtxClickHouseDialect } from './dialect.js';
|
||||
export {
|
||||
clickHouseClientConfigFromConfig,
|
||||
isKtxClickHouseConnectionConfig,
|
||||
KtxClickHouseScanConnector,
|
||||
type KtxClickHouseClient,
|
||||
type KtxClickHouseClientFactory,
|
||||
type KtxClickHouseColumnDistinctValuesOptions,
|
||||
type KtxClickHouseColumnDistinctValuesResult,
|
||||
type KtxClickHouseConnectionConfig,
|
||||
type KtxClickHouseEndpointResolver,
|
||||
type KtxClickHouseReadOnlyQueryInput,
|
||||
type KtxClickHouseResolvedClientConfig,
|
||||
type KtxClickHouseScanConnectorOptions,
|
||||
} from './connector.js';
|
||||
export { createClickHouseLiveDatabaseIntrospection } from './live-database-introspection.js';
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/index.js';
|
||||
import type { KtxProjectConnectionConfig } from '../../context/project/index.js';
|
||||
import {
|
||||
KtxClickHouseScanConnector,
|
||||
type KtxClickHouseClientFactory,
|
||||
type KtxClickHouseConnectionConfig,
|
||||
type KtxClickHouseEndpointResolver,
|
||||
} from './connector.js';
|
||||
|
||||
interface CreateClickHouseLiveDatabaseIntrospectionOptions {
|
||||
connections: Record<string, KtxProjectConnectionConfig>;
|
||||
clientFactory?: KtxClickHouseClientFactory;
|
||||
endpointResolver?: KtxClickHouseEndpointResolver;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createClickHouseLiveDatabaseIntrospection(
|
||||
options: CreateClickHouseLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KtxClickHouseConnectionConfig | undefined;
|
||||
const connector = new KtxClickHouseScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
clientFactory: options.clientFactory,
|
||||
endpointResolver: options.endpointResolver,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect(
|
||||
{ connectionId, driver: 'clickhouse' },
|
||||
{ runId: `clickhouse-${connectionId}` },
|
||||
);
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
282
packages/cli/src/connectors/mysql/connector.test.ts
Normal file
282
packages/cli/src/connectors/mysql/connector.test.ts
Normal file
|
|
@ -0,0 +1,282 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import type { FieldPacket, RowDataPacket } from 'mysql2/promise';
|
||||
import {
|
||||
createMysqlLiveDatabaseIntrospection,
|
||||
isKtxMysqlConnectionConfig,
|
||||
KtxMysqlScanConnector,
|
||||
mysqlConnectionPoolConfigFromConfig,
|
||||
type KtxMysqlPoolFactory,
|
||||
} from './index.js';
|
||||
|
||||
function mysqlResult(rows: Record<string, unknown>[], fields: Array<{ name: string; type?: number }>): [RowDataPacket[], FieldPacket[]] {
|
||||
return [rows as RowDataPacket[], fields as FieldPacket[]];
|
||||
}
|
||||
|
||||
function fakePoolFactory(): KtxMysqlPoolFactory {
|
||||
const query = vi.fn(async (sql: string, params?: unknown): Promise<[RowDataPacket[], FieldPacket[]]> => {
|
||||
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
|
||||
return mysqlResult(
|
||||
[
|
||||
{ TABLE_NAME: 'customers', TABLE_TYPE: 'BASE TABLE', TABLE_COMMENT: 'Customer table', TABLE_ROWS: 2 },
|
||||
{ TABLE_NAME: 'orders', TABLE_TYPE: 'BASE TABLE', TABLE_COMMENT: 'InnoDB free: 1 kB; Order table', TABLE_ROWS: 2 },
|
||||
{ TABLE_NAME: 'order_summary', TABLE_TYPE: 'VIEW', TABLE_COMMENT: '', TABLE_ROWS: null },
|
||||
],
|
||||
[{ name: 'TABLE_NAME' }, { name: 'TABLE_TYPE' }, { name: 'TABLE_COMMENT' }, { name: 'TABLE_ROWS' }],
|
||||
);
|
||||
}
|
||||
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
|
||||
return mysqlResult(
|
||||
[
|
||||
{ TABLE_NAME: 'customers', COLUMN_NAME: 'id', DATA_TYPE: 'int', IS_NULLABLE: 'NO', COLUMN_COMMENT: 'PK' },
|
||||
{ TABLE_NAME: 'customers', COLUMN_NAME: 'name', DATA_TYPE: 'varchar', IS_NULLABLE: 'NO', COLUMN_COMMENT: '' },
|
||||
{ TABLE_NAME: 'orders', COLUMN_NAME: 'id', DATA_TYPE: 'int', IS_NULLABLE: 'NO', COLUMN_COMMENT: '' },
|
||||
{ TABLE_NAME: 'orders', COLUMN_NAME: 'customer_id', DATA_TYPE: 'int', IS_NULLABLE: 'NO', COLUMN_COMMENT: '' },
|
||||
{ TABLE_NAME: 'orders', COLUMN_NAME: 'status', DATA_TYPE: 'varchar', IS_NULLABLE: 'YES', COLUMN_COMMENT: '' },
|
||||
{ TABLE_NAME: 'order_summary', COLUMN_NAME: 'status', DATA_TYPE: 'varchar', IS_NULLABLE: 'YES', COLUMN_COMMENT: '' },
|
||||
],
|
||||
[{ name: 'TABLE_NAME' }, { name: 'COLUMN_NAME' }, { name: 'DATA_TYPE' }, { name: 'IS_NULLABLE' }],
|
||||
);
|
||||
}
|
||||
if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes("CONSTRAINT_NAME = 'PRIMARY'")) {
|
||||
return mysqlResult([{ TABLE_NAME: 'customers', COLUMN_NAME: 'id' }, { TABLE_NAME: 'orders', COLUMN_NAME: 'id' }], []);
|
||||
}
|
||||
if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes('REFERENCED_TABLE_NAME IS NOT NULL')) {
|
||||
return mysqlResult(
|
||||
[
|
||||
{
|
||||
TABLE_NAME: 'orders',
|
||||
COLUMN_NAME: 'customer_id',
|
||||
REFERENCED_TABLE_NAME: 'customers',
|
||||
REFERENCED_COLUMN_NAME: 'id',
|
||||
CONSTRAINT_NAME: 'orders_customer_id_fk',
|
||||
},
|
||||
],
|
||||
[],
|
||||
);
|
||||
}
|
||||
if (sql.includes('SELECT `id`, `status` FROM `analytics`.`orders` LIMIT 1')) {
|
||||
return mysqlResult([{ id: 10, status: 'paid' }], [{ name: 'id', type: 3 }, { name: 'status', type: 253 }]);
|
||||
}
|
||||
if (sql.includes('select * from (select id, status from analytics.orders) as ktx_query_result limit 1')) {
|
||||
return mysqlResult([{ id: 10, status: 'paid' }], [{ name: 'id', type: 3 }, { name: 'status', type: 253 }]);
|
||||
}
|
||||
if (sql.includes('SELECT `status` FROM `analytics`.`orders`')) {
|
||||
return mysqlResult([{ status: 'paid' }, { status: 'open' }], [{ name: 'status', type: 253 }]);
|
||||
}
|
||||
if (sql.includes('COUNT(DISTINCT val)')) {
|
||||
return mysqlResult([{ cardinality: 2 }], [{ name: 'cardinality', type: 8 }]);
|
||||
}
|
||||
if (sql.includes('SELECT DISTINCT CAST(`status` AS CHAR) AS val')) {
|
||||
return mysqlResult([{ val: 'open' }, { val: 'paid' }], [{ name: 'val', type: 253 }]);
|
||||
}
|
||||
if (sql.includes('COUNT(*) AS count')) {
|
||||
return mysqlResult([{ count: 2 }], [{ name: 'count', type: 8 }]);
|
||||
}
|
||||
if (sql.includes('INFORMATION_SCHEMA.SCHEMATA')) {
|
||||
return mysqlResult([{ SCHEMA_NAME: 'analytics' }, { SCHEMA_NAME: 'warehouse' }], [{ name: 'SCHEMA_NAME' }]);
|
||||
}
|
||||
if (sql.trim() === 'SELECT 1') {
|
||||
return mysqlResult([{ '1': 1 }], [{ name: '1', type: 8 }]);
|
||||
}
|
||||
throw new Error(`Unexpected SQL: ${sql} params=${JSON.stringify(params)}`);
|
||||
});
|
||||
const release = vi.fn();
|
||||
const end = vi.fn(async () => undefined);
|
||||
return {
|
||||
createPool: vi.fn(() => ({
|
||||
getConnection: vi.fn(async () => ({ query, release })),
|
||||
end,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
describe('KtxMysqlScanConnector', () => {
|
||||
it('resolves MySQL connection configuration safely', () => {
|
||||
expect(isKtxMysqlConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(true);
|
||||
expect(isKtxMysqlConnectionConfig({ driver: 'postgres', host: 'localhost', database: 'analytics' })).toBe(false);
|
||||
expect(
|
||||
mysqlConnectionPoolConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'mysql',
|
||||
host: 'db.example.test',
|
||||
port: 3307,
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'secret', // pragma: allowlist secret
|
||||
ssl: true,
|
||||
},
|
||||
}),
|
||||
).toMatchObject({
|
||||
host: 'db.example.test',
|
||||
port: 3307,
|
||||
database: 'analytics',
|
||||
user: 'reader',
|
||||
password: 'secret', // pragma: allowlist secret
|
||||
ssl: { rejectUnauthorized: false },
|
||||
});
|
||||
});
|
||||
|
||||
it('introspects schema, primary keys, comments, row counts, views, and foreign keys', async () => {
|
||||
const connector = new KtxMysqlScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'mysql',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'secret', // pragma: allowlist secret
|
||||
},
|
||||
poolFactory: fakePoolFactory(),
|
||||
now: () => new Date('2026-04-29T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'mysql' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'mysql',
|
||||
extractedAt: '2026-04-29T12:00:00.000Z',
|
||||
scope: { schemas: ['analytics'] },
|
||||
metadata: {
|
||||
database: 'analytics',
|
||||
host: 'db.example.test',
|
||||
table_count: 3,
|
||||
total_columns: 6,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([
|
||||
['customers', 'table', 2, 'Customer table'],
|
||||
['orders', 'table', 2, 'Order table'],
|
||||
['order_summary', 'view', null, null],
|
||||
]);
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
|
||||
name: 'id',
|
||||
nativeType: 'int',
|
||||
normalizedType: 'int',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'PK',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
|
||||
{
|
||||
fromColumn: 'customer_id',
|
||||
toCatalog: null,
|
||||
toDb: 'analytics',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
constraintName: 'orders_customer_id_fk',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => {
|
||||
const poolFactory = fakePoolFactory();
|
||||
const connector = new KtxMysqlScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'mysql',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'secret', // pragma: allowlist secret
|
||||
},
|
||||
poolFactory,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'orders' }, columns: ['id', 'status'], limit: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1 });
|
||||
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'orders' }, column: 'status', limit: 5 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
|
||||
|
||||
await expect(
|
||||
connector.getColumnDistinctValues(
|
||||
{ catalog: null, db: 'analytics', name: 'orders' },
|
||||
'status',
|
||||
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
|
||||
),
|
||||
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from analytics.orders', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
|
||||
await expect(connector.getTableRowCount('orders')).resolves.toBe(2);
|
||||
await expect(connector.listSchemas()).resolves.toEqual(['analytics', 'warehouse']);
|
||||
await expect(connector.columnStats(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'orders' }, column: 'status' },
|
||||
{ runId: 'scan-run-1' },
|
||||
)).resolves.toBeNull();
|
||||
|
||||
await connector.cleanup();
|
||||
});
|
||||
|
||||
it('adapts native MySQL snapshots to live-database introspection for local ingest', async () => {
|
||||
const introspection = createMysqlLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'mysql',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'secret', // pragma: allowlist secret
|
||||
},
|
||||
},
|
||||
poolFactory: fakePoolFactory(),
|
||||
now: () => new Date('2026-04-29T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
extractedAt: '2026-04-29T12:00:00.000Z',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: 'analytics',
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'int',
|
||||
normalizedType: 'int',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'PK',
|
||||
},
|
||||
{
|
||||
name: 'name',
|
||||
nativeType: 'varchar',
|
||||
normalizedType: 'varchar',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
});
|
||||
});
|
||||
});
|
||||
605
packages/cli/src/connectors/mysql/connector.ts
Normal file
605
packages/cli/src/connectors/mysql/connector.ts
Normal file
|
|
@ -0,0 +1,605 @@
|
|||
import mysql, { type FieldPacket, type Pool, type RowDataPacket } from 'mysql2/promise';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/index.js';
|
||||
import {
|
||||
createKtxConnectorCapabilities,
|
||||
type KtxColumnSampleInput,
|
||||
type KtxColumnSampleResult,
|
||||
type KtxColumnStatsInput,
|
||||
type KtxColumnStatsResult,
|
||||
type KtxQueryResult,
|
||||
type KtxReadOnlyQueryInput,
|
||||
type KtxScanConnector,
|
||||
type KtxScanContext,
|
||||
type KtxScanInput,
|
||||
type KtxSchemaColumn,
|
||||
type KtxTableListEntry,
|
||||
type KtxSchemaForeignKey,
|
||||
type KtxSchemaSnapshot,
|
||||
type KtxSchemaTable,
|
||||
type KtxTableRef,
|
||||
type KtxTableSampleInput,
|
||||
type KtxTableSampleResult,
|
||||
} from '../../context/scan/index.js';
|
||||
import { KtxMysqlDialect } from './dialect.js';
|
||||
|
||||
export interface KtxMysqlConnectionConfig {
|
||||
driver?: string;
|
||||
host?: string;
|
||||
port?: number;
|
||||
database?: string;
|
||||
username?: string;
|
||||
user?: string;
|
||||
password?: string;
|
||||
url?: string;
|
||||
ssl?: boolean | { rejectUnauthorized?: boolean };
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxMysqlPoolConfig {
|
||||
host: string;
|
||||
port: number;
|
||||
database: string;
|
||||
user: string;
|
||||
password?: string;
|
||||
connectionLimit: number;
|
||||
waitForConnections: true;
|
||||
ssl?: { rejectUnauthorized: boolean };
|
||||
}
|
||||
|
||||
interface KtxMysqlConnection {
|
||||
query(sql: string, params?: unknown): Promise<[RowDataPacket[], FieldPacket[]]>;
|
||||
release(): void;
|
||||
}
|
||||
|
||||
interface KtxMysqlPool {
|
||||
getConnection(): Promise<KtxMysqlConnection>;
|
||||
end(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxMysqlPoolFactory {
|
||||
createPool(config: KtxMysqlPoolConfig): KtxMysqlPool;
|
||||
}
|
||||
|
||||
interface KtxMysqlResolvedEndpoint {
|
||||
host: string;
|
||||
port: number;
|
||||
close?: () => Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxMysqlEndpointResolver {
|
||||
resolve(input: { host: string; port: number; connection: KtxMysqlConnectionConfig }): Promise<KtxMysqlResolvedEndpoint>;
|
||||
}
|
||||
|
||||
export interface KtxMysqlScanConnectorOptions {
|
||||
connectionId: string;
|
||||
connection: KtxMysqlConnectionConfig | undefined;
|
||||
poolFactory?: KtxMysqlPoolFactory;
|
||||
endpointResolver?: KtxMysqlEndpointResolver;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export interface KtxMysqlReadOnlyQueryInput extends KtxReadOnlyQueryInput {
|
||||
params?: Record<string, unknown> | unknown[];
|
||||
}
|
||||
|
||||
export interface KtxMysqlColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KtxMysqlColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
interface MysqlTableRow extends RowDataPacket {
|
||||
TABLE_NAME: string;
|
||||
TABLE_TYPE: string;
|
||||
TABLE_COMMENT: string | null;
|
||||
TABLE_ROWS: number | null;
|
||||
}
|
||||
|
||||
interface MysqlColumnRow extends RowDataPacket {
|
||||
TABLE_NAME: string;
|
||||
COLUMN_NAME: string;
|
||||
DATA_TYPE: string;
|
||||
IS_NULLABLE: string;
|
||||
COLUMN_COMMENT: string | null;
|
||||
}
|
||||
|
||||
interface MysqlPrimaryKeyRow extends RowDataPacket {
|
||||
TABLE_NAME: string;
|
||||
COLUMN_NAME: string;
|
||||
}
|
||||
|
||||
interface MysqlForeignKeyRow extends RowDataPacket {
|
||||
TABLE_NAME: string;
|
||||
COLUMN_NAME: string;
|
||||
REFERENCED_TABLE_NAME: string;
|
||||
REFERENCED_COLUMN_NAME: string;
|
||||
CONSTRAINT_NAME: string;
|
||||
}
|
||||
|
||||
interface MysqlSchemaRow extends RowDataPacket {
|
||||
SCHEMA_NAME: string;
|
||||
}
|
||||
|
||||
interface MysqlTableListRow extends RowDataPacket {
|
||||
TABLE_SCHEMA: string;
|
||||
TABLE_NAME: string;
|
||||
TABLE_TYPE: string;
|
||||
}
|
||||
|
||||
interface MysqlCountRow extends RowDataPacket {
|
||||
count?: unknown;
|
||||
cardinality?: unknown;
|
||||
}
|
||||
|
||||
interface MysqlDistinctValueRow extends RowDataPacket {
|
||||
val: unknown;
|
||||
}
|
||||
|
||||
class DefaultMysqlPoolFactory implements KtxMysqlPoolFactory {
|
||||
createPool(config: KtxMysqlPoolConfig): KtxMysqlPool {
|
||||
return mysql.createPool(config) as Pool;
|
||||
}
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxMysqlConnectionConfig | undefined,
|
||||
key: keyof KtxMysqlConnectionConfig,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
const envName = value.slice('env:'.length);
|
||||
return env[envName] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function maybeNumber(value: unknown): number | undefined {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
||||
function parseMysqlUrl(url: string): Partial<KtxMysqlConnectionConfig> {
|
||||
const parsed = new URL(url);
|
||||
const sslParam = parsed.searchParams.get('ssl') ?? parsed.searchParams.get('sslmode');
|
||||
return {
|
||||
host: parsed.hostname,
|
||||
port: parsed.port ? Number(parsed.port) : undefined,
|
||||
database: parsed.pathname.replace(/^\/+/, '') || undefined,
|
||||
username: parsed.username ? decodeURIComponent(parsed.username) : undefined,
|
||||
password: parsed.password ? decodeURIComponent(parsed.password) : undefined,
|
||||
ssl: sslParam === 'true' || sslParam === 'required',
|
||||
};
|
||||
}
|
||||
|
||||
function cleanMySqlTableComment(comment: string | null): string | null {
|
||||
if (!comment) {
|
||||
return null;
|
||||
}
|
||||
if (comment.startsWith('InnoDB free:')) {
|
||||
const semiIndex = comment.indexOf(';');
|
||||
if (semiIndex === -1) {
|
||||
return null;
|
||||
}
|
||||
const userComment = comment.slice(semiIndex + 1).trim();
|
||||
return userComment || null;
|
||||
}
|
||||
return comment;
|
||||
}
|
||||
|
||||
function groupByTable<T extends { TABLE_NAME: string }>(rows: T[]): Map<string, T[]> {
|
||||
const grouped = new Map<string, T[]>();
|
||||
for (const row of rows) {
|
||||
const tableRows = grouped.get(row.TABLE_NAME) ?? [];
|
||||
tableRows.push(row);
|
||||
grouped.set(row.TABLE_NAME, tableRows);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
function primaryKeyMap(rows: MysqlPrimaryKeyRow[]): Map<string, Set<string>> {
|
||||
const grouped = new Map<string, Set<string>>();
|
||||
for (const row of rows) {
|
||||
const columns = grouped.get(row.TABLE_NAME) ?? new Set<string>();
|
||||
columns.add(row.COLUMN_NAME);
|
||||
grouped.set(row.TABLE_NAME, columns);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
function queryParams(params: Record<string, unknown> | unknown[] | undefined): unknown[] | undefined {
|
||||
if (!params) {
|
||||
return undefined;
|
||||
}
|
||||
return Array.isArray(params) ? params : Object.values(params);
|
||||
}
|
||||
|
||||
export function isKtxMysqlConnectionConfig(
|
||||
connection: KtxMysqlConnectionConfig | undefined,
|
||||
): connection is KtxMysqlConnectionConfig {
|
||||
return String(connection?.driver ?? '').toLowerCase() === 'mysql';
|
||||
}
|
||||
|
||||
export function mysqlConnectionPoolConfigFromConfig(input: {
|
||||
connectionId: string;
|
||||
connection: KtxMysqlConnectionConfig | undefined;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): KtxMysqlPoolConfig {
|
||||
const inputDriver = input.connection?.driver ?? 'unknown';
|
||||
if (!isKtxMysqlConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native MySQL connector cannot run driver "${inputDriver}"`);
|
||||
}
|
||||
|
||||
const env = input.env ?? process.env;
|
||||
const referencedUrl = stringConfigValue(input.connection, 'url', env);
|
||||
const urlConfig = referencedUrl ? parseMysqlUrl(referencedUrl) : {};
|
||||
const merged: KtxMysqlConnectionConfig = { ...urlConfig, ...input.connection };
|
||||
const host = stringConfigValue(merged, 'host', env);
|
||||
const database = stringConfigValue(merged, 'database', env);
|
||||
const user = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env);
|
||||
|
||||
if (!host) {
|
||||
throw new Error(`Native MySQL connector requires connections.${input.connectionId}.host or url`);
|
||||
}
|
||||
if (!database) {
|
||||
throw new Error(`Native MySQL connector requires connections.${input.connectionId}.database or url`);
|
||||
}
|
||||
if (!user) {
|
||||
throw new Error(`Native MySQL connector requires connections.${input.connectionId}.username, user, or url`);
|
||||
}
|
||||
|
||||
const ssl = merged.ssl === true ? { rejectUnauthorized: false } : typeof merged.ssl === 'object' ? merged.ssl : undefined;
|
||||
return {
|
||||
host,
|
||||
port: maybeNumber(merged.port) ?? 3306,
|
||||
database,
|
||||
user,
|
||||
password: stringConfigValue(merged, 'password', env),
|
||||
connectionLimit: 10,
|
||||
waitForConnections: true,
|
||||
...(ssl ? { ssl: { rejectUnauthorized: ssl.rejectUnauthorized ?? false } } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
export class KtxMysqlScanConnector implements KtxScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'mysql' as const;
|
||||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: false,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: true,
|
||||
formalForeignKeys: true,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly connectionId: string;
|
||||
private readonly connection: KtxMysqlConnectionConfig;
|
||||
private readonly poolConfig: KtxMysqlPoolConfig;
|
||||
private readonly poolFactory: KtxMysqlPoolFactory;
|
||||
private readonly endpointResolver?: KtxMysqlEndpointResolver;
|
||||
private readonly now: () => Date;
|
||||
private readonly dialect = new KtxMysqlDialect();
|
||||
private pool: KtxMysqlPool | null = null;
|
||||
private resolvedEndpoint: KtxMysqlResolvedEndpoint | null = null;
|
||||
|
||||
constructor(options: KtxMysqlScanConnectorOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.connection = options.connection ?? {};
|
||||
this.poolConfig = mysqlConnectionPoolConfigFromConfig({
|
||||
connectionId: options.connectionId,
|
||||
connection: options.connection,
|
||||
env: options.env,
|
||||
});
|
||||
this.poolFactory = options.poolFactory ?? new DefaultMysqlPoolFactory();
|
||||
this.endpointResolver = options.endpointResolver;
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.id = `mysql:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
try {
|
||||
await this.query('SELECT 1');
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const database = this.poolConfig.database;
|
||||
const tables = await this.queryRaw<MysqlTableRow>(
|
||||
`
|
||||
SELECT TABLE_NAME, TABLE_TYPE, TABLE_COMMENT, TABLE_ROWS
|
||||
FROM INFORMATION_SCHEMA.TABLES
|
||||
WHERE TABLE_SCHEMA = ? AND TABLE_TYPE IN ('BASE TABLE', 'VIEW')
|
||||
ORDER BY TABLE_NAME
|
||||
`,
|
||||
[database],
|
||||
);
|
||||
const columns = await this.queryRaw<MysqlColumnRow>(
|
||||
`
|
||||
SELECT TABLE_NAME, COLUMN_NAME, DATA_TYPE, IS_NULLABLE, COLUMN_COMMENT
|
||||
FROM INFORMATION_SCHEMA.COLUMNS
|
||||
WHERE TABLE_SCHEMA = ?
|
||||
ORDER BY TABLE_NAME, ORDINAL_POSITION
|
||||
`,
|
||||
[database],
|
||||
);
|
||||
const primaryKeys = await this.queryRaw<MysqlPrimaryKeyRow>(
|
||||
`
|
||||
SELECT TABLE_NAME, COLUMN_NAME
|
||||
FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
|
||||
WHERE TABLE_SCHEMA = ?
|
||||
AND CONSTRAINT_NAME = 'PRIMARY'
|
||||
ORDER BY TABLE_NAME, ORDINAL_POSITION
|
||||
`,
|
||||
[database],
|
||||
);
|
||||
const foreignKeys = await this.queryRaw<MysqlForeignKeyRow>(
|
||||
`
|
||||
SELECT TABLE_NAME, COLUMN_NAME, REFERENCED_TABLE_NAME, REFERENCED_COLUMN_NAME, CONSTRAINT_NAME
|
||||
FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
|
||||
WHERE TABLE_SCHEMA = ?
|
||||
AND REFERENCED_TABLE_NAME IS NOT NULL
|
||||
ORDER BY TABLE_NAME, COLUMN_NAME
|
||||
`,
|
||||
[database],
|
||||
);
|
||||
|
||||
const columnsByTable = groupByTable(columns);
|
||||
const primaryKeysByTable = primaryKeyMap(primaryKeys);
|
||||
const foreignKeysByTable = groupByTable(foreignKeys);
|
||||
const schemaTables = tables.map((table) =>
|
||||
this.toSchemaTable(table, columnsByTable.get(table.TABLE_NAME) ?? [], primaryKeysByTable, foreignKeysByTable),
|
||||
);
|
||||
|
||||
return {
|
||||
connectionId: this.connectionId,
|
||||
driver: 'mysql',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: { schemas: [database] },
|
||||
metadata: {
|
||||
database,
|
||||
host: this.poolConfig.host,
|
||||
table_count: schemaTables.length,
|
||||
total_columns: schemaTables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables: schemaTables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxTableSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
|
||||
return { headers: result.headers, rows: result.rows, totalRows: result.totalRows };
|
||||
}
|
||||
|
||||
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
|
||||
return { values, nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
return null;
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxMysqlReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
|
||||
const prepared = Array.isArray(input.params)
|
||||
? { sql: limitedSql, params: input.params }
|
||||
: this.dialect.prepareQuery(limitedSql, input.params);
|
||||
const result = await this.query(prepared.sql, prepared.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KtxTableRef,
|
||||
columnName: string,
|
||||
options: KtxMysqlColumnDistinctValuesOptions,
|
||||
): Promise<KtxMysqlColumnDistinctValuesResult | null> {
|
||||
const sampleSize = options.sampleSize ?? 10000;
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinalityRows = await this.queryRaw<MysqlCountRow>(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, sampleSize),
|
||||
);
|
||||
const cardinality = Number(cardinalityRows[0]?.cardinality);
|
||||
if (Number.isNaN(cardinality)) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valuesRows = await this.queryRaw<MysqlDistinctValueRow>(
|
||||
this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit),
|
||||
);
|
||||
return {
|
||||
values: valuesRows.filter((row) => row.val !== null).map((row) => String(row.val)),
|
||||
cardinality,
|
||||
};
|
||||
}
|
||||
|
||||
async getTableRowCount(tableName: string): Promise<number> {
|
||||
const rows = await this.queryRaw<MysqlCountRow>(
|
||||
`SELECT COUNT(*) AS count FROM ${this.dialect.quoteIdentifier(tableName)}`,
|
||||
);
|
||||
return Number(rows[0]?.count ?? 0);
|
||||
}
|
||||
|
||||
qTableName(table: Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
async listSchemas(): Promise<string[]> {
|
||||
const rows = await this.queryRaw<MysqlSchemaRow>(`
|
||||
SELECT SCHEMA_NAME
|
||||
FROM INFORMATION_SCHEMA.SCHEMATA
|
||||
WHERE SCHEMA_NAME NOT IN ('information_schema', 'mysql', 'performance_schema', 'sys')
|
||||
ORDER BY SCHEMA_NAME
|
||||
`);
|
||||
return rows.map((row) => row.SCHEMA_NAME);
|
||||
}
|
||||
|
||||
async listTables(schemas?: string[]): Promise<KtxTableListEntry[]> {
|
||||
const filterSchemas = schemas ?? (await this.listSchemas());
|
||||
if (filterSchemas.length === 0) return [];
|
||||
const placeholders = filterSchemas.map(() => '?').join(', ');
|
||||
const rows = await this.queryRaw<MysqlTableListRow>(
|
||||
`
|
||||
SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE
|
||||
FROM INFORMATION_SCHEMA.TABLES
|
||||
WHERE TABLE_SCHEMA IN (${placeholders})
|
||||
AND TABLE_TYPE IN ('BASE TABLE', 'VIEW')
|
||||
ORDER BY TABLE_SCHEMA, TABLE_NAME
|
||||
`,
|
||||
filterSchemas,
|
||||
);
|
||||
return rows.map((row) => ({
|
||||
schema: row.TABLE_SCHEMA,
|
||||
name: row.TABLE_NAME,
|
||||
kind: row.TABLE_TYPE === 'VIEW' ? ('view' as const) : ('table' as const),
|
||||
}));
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
if (this.pool) {
|
||||
await this.pool.end();
|
||||
this.pool = null;
|
||||
}
|
||||
if (this.resolvedEndpoint?.close) {
|
||||
await this.resolvedEndpoint.close();
|
||||
this.resolvedEndpoint = null;
|
||||
}
|
||||
}
|
||||
|
||||
private toSchemaTable(
|
||||
table: MysqlTableRow,
|
||||
columns: MysqlColumnRow[],
|
||||
primaryKeysByTable: Map<string, Set<string>>,
|
||||
foreignKeysByTable: Map<string, MysqlForeignKeyRow[]>,
|
||||
): KtxSchemaTable {
|
||||
const tableName = table.TABLE_NAME;
|
||||
const kind = table.TABLE_TYPE === 'VIEW' ? 'view' : 'table';
|
||||
const estimatedRows = kind === 'view' ? null : Number(table.TABLE_ROWS ?? 0);
|
||||
return {
|
||||
catalog: null,
|
||||
db: this.poolConfig.database,
|
||||
name: tableName,
|
||||
kind,
|
||||
comment: cleanMySqlTableComment(table.TABLE_COMMENT),
|
||||
estimatedRows: Number.isFinite(estimatedRows) ? estimatedRows : null,
|
||||
columns: columns.map((column) => this.toSchemaColumn(column, primaryKeysByTable.get(tableName) ?? new Set())),
|
||||
foreignKeys: (foreignKeysByTable.get(tableName) ?? []).map((row) => this.toSchemaForeignKey(row)),
|
||||
};
|
||||
}
|
||||
|
||||
private toSchemaColumn(column: MysqlColumnRow, primaryKeys: Set<string>): KtxSchemaColumn {
|
||||
return {
|
||||
name: column.COLUMN_NAME,
|
||||
nativeType: column.DATA_TYPE,
|
||||
normalizedType: this.dialect.mapDataType(column.DATA_TYPE),
|
||||
dimensionType: this.dialect.mapToDimensionType(column.DATA_TYPE),
|
||||
nullable: column.IS_NULLABLE === 'YES',
|
||||
primaryKey: primaryKeys.has(column.COLUMN_NAME),
|
||||
comment: column.COLUMN_COMMENT || null,
|
||||
};
|
||||
}
|
||||
|
||||
private toSchemaForeignKey(row: MysqlForeignKeyRow): KtxSchemaForeignKey {
|
||||
return {
|
||||
fromColumn: row.COLUMN_NAME,
|
||||
toCatalog: null,
|
||||
toDb: this.poolConfig.database,
|
||||
toTable: row.REFERENCED_TABLE_NAME,
|
||||
toColumn: row.REFERENCED_COLUMN_NAME,
|
||||
constraintName: row.CONSTRAINT_NAME || null,
|
||||
};
|
||||
}
|
||||
|
||||
private async poolForQuery(): Promise<KtxMysqlPool> {
|
||||
if (!this.pool) {
|
||||
const config = { ...this.poolConfig };
|
||||
if (this.endpointResolver) {
|
||||
this.resolvedEndpoint = await this.endpointResolver.resolve({
|
||||
host: config.host,
|
||||
port: config.port,
|
||||
connection: this.connection,
|
||||
});
|
||||
config.host = this.resolvedEndpoint.host;
|
||||
config.port = this.resolvedEndpoint.port;
|
||||
}
|
||||
this.pool = this.poolFactory.createPool(config);
|
||||
}
|
||||
return this.pool;
|
||||
}
|
||||
|
||||
private async queryRaw<T extends RowDataPacket>(sql: string, params?: unknown): Promise<T[]> {
|
||||
const pool = await this.poolForQuery();
|
||||
const connection = await pool.getConnection();
|
||||
try {
|
||||
const [rows] = await connection.query(sql, params);
|
||||
return rows as T[];
|
||||
} finally {
|
||||
connection.release();
|
||||
}
|
||||
}
|
||||
|
||||
private async query(
|
||||
sql: string,
|
||||
params?: Record<string, unknown> | unknown[],
|
||||
): Promise<Omit<KtxQueryResult, 'rowCount'>> {
|
||||
const pool = await this.poolForQuery();
|
||||
const connection = await pool.getConnection();
|
||||
try {
|
||||
const [rows, fields] = await connection.query(assertReadOnlySql(sql), queryParams(params));
|
||||
const headers = fields.map((field) => field.name);
|
||||
const headerTypes = fields.map((field) => String(field.type ?? 'unknown'));
|
||||
return {
|
||||
headers,
|
||||
headerTypes,
|
||||
rows: rows.map((row) => headers.map((header) => row[header])),
|
||||
totalRows: rows.length,
|
||||
};
|
||||
} finally {
|
||||
connection.release();
|
||||
}
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`KTX MySQL connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
49
packages/cli/src/connectors/mysql/dialect.test.ts
Normal file
49
packages/cli/src/connectors/mysql/dialect.test.ts
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KtxMysqlDialect } from './dialect.js';
|
||||
|
||||
describe('KtxMysqlDialect', () => {
|
||||
const dialect = new KtxMysqlDialect();
|
||||
|
||||
it('quotes identifiers and formats database-qualified table names', () => {
|
||||
expect(dialect.quoteIdentifier('orders')).toBe('`orders`');
|
||||
expect(dialect.quoteIdentifier('odd`name')).toBe('`odd``name`');
|
||||
expect(dialect.formatTableName({ catalog: null, db: 'analytics', name: 'orders' })).toBe(
|
||||
'`analytics`.`orders`',
|
||||
);
|
||||
expect(dialect.formatTableName({ catalog: null, db: null, name: 'orders' })).toBe('`orders`');
|
||||
});
|
||||
|
||||
it('maps native MySQL types to KTX dimension types', () => {
|
||||
expect(dialect.mapToDimensionType('tinyint(1)')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('int')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('decimal(10,2)')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('timestamp')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('varchar(255)')).toBe('string');
|
||||
expect(dialect.mapToDimensionType('json')).toBe('string');
|
||||
expect(dialect.mapToDimensionType('')).toBe('string');
|
||||
});
|
||||
|
||||
it('builds sampling, distinct-value, pagination, and time SQL', () => {
|
||||
expect(dialect.generateSampleQuery('`analytics`.`orders`', 25, ['id', 'status'])).toBe(
|
||||
'SELECT `id`, `status` FROM `analytics`.`orders` LIMIT 25',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('`analytics`.`orders`', 'status', 10)).toBe(
|
||||
"SELECT `status` FROM `analytics`.`orders` WHERE `status` IS NOT NULL AND TRIM(CAST(`status` AS CHAR)) != '' LIMIT 10",
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('`analytics`.`orders`', '`status`', 5)).toContain(
|
||||
'SELECT DISTINCT CAST(`status` AS CHAR) AS val',
|
||||
);
|
||||
expect(dialect.getLimitOffsetClause(10, 20)).toBe('LIMIT 10 OFFSET 20');
|
||||
expect(dialect.getTimeTruncExpression('created_at', 'month')).toBe("DATE_FORMAT(created_at, '%Y-%m-01')");
|
||||
});
|
||||
|
||||
it('prepares named parameters in deterministic SQL placeholder order', () => {
|
||||
expect(dialect.prepareQuery('select * from orders where id = :id and status = :status', {
|
||||
status: 'paid',
|
||||
id: 10,
|
||||
})).toEqual({
|
||||
sql: 'select * from orders where id = ? and status = ?',
|
||||
params: [10, 'paid'],
|
||||
});
|
||||
});
|
||||
});
|
||||
202
packages/cli/src/connectors/mysql/dialect.ts
Normal file
202
packages/cli/src/connectors/mysql/dialect.ts
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/index.js';
|
||||
|
||||
type MysqlTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxMysqlDialect {
|
||||
readonly type = 'mysql';
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
datetime: 'time',
|
||||
timestamp: 'time',
|
||||
date: 'time',
|
||||
time: 'time',
|
||||
year: 'time',
|
||||
tinyint: 'number',
|
||||
smallint: 'number',
|
||||
mediumint: 'number',
|
||||
int: 'number',
|
||||
integer: 'number',
|
||||
bigint: 'number',
|
||||
decimal: 'number',
|
||||
numeric: 'number',
|
||||
float: 'number',
|
||||
double: 'number',
|
||||
real: 'number',
|
||||
varchar: 'string',
|
||||
char: 'string',
|
||||
text: 'string',
|
||||
tinytext: 'string',
|
||||
mediumtext: 'string',
|
||||
longtext: 'string',
|
||||
enum: 'string',
|
||||
set: 'string',
|
||||
json: 'string',
|
||||
bit: 'boolean',
|
||||
bool: 'boolean',
|
||||
boolean: 'boolean',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `\`${identifier.replace(/`/g, '``')}\``;
|
||||
}
|
||||
|
||||
formatTableName(table: MysqlTableNameRef): string {
|
||||
return table.db
|
||||
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
|
||||
: this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
return nativeType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
const lower = nativeType.toLowerCase().trim();
|
||||
if (lower.includes('tinyint(1)')) {
|
||||
return 'boolean';
|
||||
}
|
||||
const normalized = lower.includes('(') ? lower.split('(')[0] : lower;
|
||||
if (this.typeMappings[normalized]) {
|
||||
return this.typeMappings[normalized];
|
||||
}
|
||||
if (normalized.includes('time') || normalized.includes('date')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalized.includes('int') ||
|
||||
normalized.includes('num') ||
|
||||
normalized.includes('dec') ||
|
||||
normalized.includes('float') ||
|
||||
normalized.includes('double')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalized.includes('bit') || normalized === 'bool' || normalized === 'boolean') {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT ${columnList} FROM ${tableName} LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quotedColumn = this.quoteIdentifier(columnName);
|
||||
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS CHAR)) != '' LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown[] } {
|
||||
if (!params) {
|
||||
return { sql, params: undefined };
|
||||
}
|
||||
const values: unknown[] = [];
|
||||
const parameterizedQuery = sql.replace(/:([A-Za-z_][A-Za-z0-9_]*)\b/g, (placeholder, key: string) => {
|
||||
if (!(key in params)) {
|
||||
return placeholder;
|
||||
}
|
||||
values.push(params[key]);
|
||||
return '?';
|
||||
});
|
||||
return { sql: parameterizedQuery, params: values };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `RAND() < ${samplePct}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(_samplePct: number): string {
|
||||
return '';
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `SUM(CASE WHEN ${column} IS NULL THEN 1 ELSE 0 END)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `COUNT(DISTINCT ${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
LIMIT ${sampleSize}
|
||||
) AS sampled
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT DISTINCT CAST(${columnName} AS CHAR) AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY val
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY RAND()
|
||||
LIMIT ${sampleSize}
|
||||
) AS sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
timezone?: string,
|
||||
): string {
|
||||
const col = timezone ? `CONVERT_TZ(${column}, '+00:00', '${timezone}')` : column;
|
||||
switch (granularity) {
|
||||
case 'day':
|
||||
return `DATE(${col})`;
|
||||
case 'week':
|
||||
return `DATE(${col} - INTERVAL WEEKDAY(${col}) DAY)`;
|
||||
case 'month':
|
||||
return `DATE_FORMAT(${col}, '%Y-%m-01')`;
|
||||
case 'quarter':
|
||||
return `MAKEDATE(YEAR(${col}), 1) + INTERVAL (QUARTER(${col}) - 1) QUARTER`;
|
||||
case 'year':
|
||||
return `DATE_FORMAT(${col}, '%Y-01-01')`;
|
||||
}
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
|
||||
const col = timezone ? `CONVERT_TZ(${column}, '+00:00', '${timezone}')` : column;
|
||||
const [amount, unit] = interval.split(' ');
|
||||
const originExpr = origin ? `'${origin}'` : `'1970-01-01'`;
|
||||
return `DATE_ADD(${originExpr}, INTERVAL FLOOR(TIMESTAMPDIFF(${unit!.toUpperCase()}, ${originExpr}, ${col}) / ${amount}) * ${amount} ${unit!.toUpperCase()})`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
const [amount, unit] = interval.split(' ');
|
||||
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
|
||||
}
|
||||
}
|
||||
15
packages/cli/src/connectors/mysql/index.ts
Normal file
15
packages/cli/src/connectors/mysql/index.ts
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
export { KtxMysqlDialect } from './dialect.js';
|
||||
export {
|
||||
isKtxMysqlConnectionConfig,
|
||||
KtxMysqlScanConnector,
|
||||
mysqlConnectionPoolConfigFromConfig,
|
||||
type KtxMysqlColumnDistinctValuesOptions,
|
||||
type KtxMysqlColumnDistinctValuesResult,
|
||||
type KtxMysqlConnectionConfig,
|
||||
type KtxMysqlEndpointResolver,
|
||||
type KtxMysqlPoolConfig,
|
||||
type KtxMysqlPoolFactory,
|
||||
type KtxMysqlReadOnlyQueryInput,
|
||||
type KtxMysqlScanConnectorOptions,
|
||||
} from './connector.js';
|
||||
export { createMysqlLiveDatabaseIntrospection } from './live-database-introspection.js';
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/index.js';
|
||||
import type { KtxProjectConnectionConfig } from '../../context/project/index.js';
|
||||
import {
|
||||
KtxMysqlScanConnector,
|
||||
type KtxMysqlConnectionConfig,
|
||||
type KtxMysqlEndpointResolver,
|
||||
type KtxMysqlPoolFactory,
|
||||
} from './connector.js';
|
||||
|
||||
interface CreateMysqlLiveDatabaseIntrospectionOptions {
|
||||
connections: Record<string, KtxProjectConnectionConfig>;
|
||||
poolFactory?: KtxMysqlPoolFactory;
|
||||
endpointResolver?: KtxMysqlEndpointResolver;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createMysqlLiveDatabaseIntrospection(
|
||||
options: CreateMysqlLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KtxMysqlConnectionConfig | undefined;
|
||||
const connector = new KtxMysqlScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
poolFactory: options.poolFactory,
|
||||
endpointResolver: options.endpointResolver,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect({ connectionId, driver: 'mysql' }, { runId: `mysql-${connectionId}` });
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
392
packages/cli/src/connectors/postgres/connector.test.ts
Normal file
392
packages/cli/src/connectors/postgres/connector.test.ts
Normal file
|
|
@ -0,0 +1,392 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
createPostgresLiveDatabaseIntrospection,
|
||||
isKtxPostgresConnectionConfig,
|
||||
KtxPostgresScanConnector,
|
||||
postgresPoolConfigFromConfig,
|
||||
type KtxPostgresPoolFactory,
|
||||
} from './index.js';
|
||||
|
||||
interface FakeQueryResult {
|
||||
rows: Record<string, unknown>[];
|
||||
fields?: Array<{ name: string; dataTypeID: number }>;
|
||||
}
|
||||
|
||||
function fakePoolFactory(results: Map<string, FakeQueryResult>): KtxPostgresPoolFactory {
|
||||
const query = vi.fn(async (sql: string, params?: unknown[]) => {
|
||||
const normalized = sql.replace(/\s+/g, ' ').trim();
|
||||
for (const [key, value] of results.entries()) {
|
||||
if (normalized.includes(key)) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
throw new Error(`Unexpected SQL: ${normalized} params=${JSON.stringify(params ?? [])}`);
|
||||
});
|
||||
return {
|
||||
createPool() {
|
||||
return {
|
||||
async connect() {
|
||||
return {
|
||||
query,
|
||||
release: vi.fn(),
|
||||
};
|
||||
},
|
||||
end: vi.fn(async () => undefined),
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function metadataResults(): Map<string, FakeQueryResult> {
|
||||
return new Map<string, FakeQueryResult>([
|
||||
[
|
||||
'FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n',
|
||||
{
|
||||
rows: [
|
||||
{ table_name: 'customers', table_kind: 'r', row_count: '2', table_comment: 'Customers' },
|
||||
{ table_name: 'orders', table_kind: 'r', row_count: '3', table_comment: null },
|
||||
{ table_name: 'recent_orders', table_kind: 'v', row_count: '0', table_comment: 'Recent orders' },
|
||||
],
|
||||
},
|
||||
],
|
||||
[
|
||||
'FROM pg_catalog.pg_attribute a JOIN pg_catalog.pg_class c',
|
||||
{
|
||||
rows: [
|
||||
{ table_name: 'customers', column_name: 'id', data_type: 'integer', is_nullable: false, column_comment: null },
|
||||
{ table_name: 'customers', column_name: 'name', data_type: 'text', is_nullable: false, column_comment: 'Name' },
|
||||
{ table_name: 'orders', column_name: 'id', data_type: 'integer', is_nullable: false, column_comment: null },
|
||||
{ table_name: 'orders', column_name: 'customer_id', data_type: 'integer', is_nullable: false, column_comment: null },
|
||||
{ table_name: 'orders', column_name: 'status', data_type: 'text', is_nullable: true, column_comment: null },
|
||||
{ table_name: 'recent_orders', column_name: 'id', data_type: 'integer', is_nullable: true, column_comment: null },
|
||||
],
|
||||
},
|
||||
],
|
||||
[
|
||||
"tc.constraint_type = 'FOREIGN KEY'",
|
||||
{
|
||||
rows: [
|
||||
{
|
||||
table_name: 'orders',
|
||||
column_name: 'customer_id',
|
||||
foreign_table_schema: 'public',
|
||||
foreign_table_name: 'customers',
|
||||
foreign_column_name: 'id',
|
||||
constraint_name: 'orders_customer_id_fkey',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
[
|
||||
"tc.constraint_type = 'PRIMARY KEY'",
|
||||
{
|
||||
rows: [
|
||||
{ table_name: 'customers', column_name: 'id' },
|
||||
{ table_name: 'orders', column_name: 'id' },
|
||||
],
|
||||
},
|
||||
],
|
||||
['SELECT "id" FROM "public"."orders" LIMIT 1', { rows: [{ id: 10 }], fields: [{ name: 'id', dataTypeID: 23 }] }],
|
||||
[
|
||||
'SELECT "status" FROM "public"."orders" WHERE "status" IS NOT NULL',
|
||||
{ rows: [{ status: 'paid' }, { status: 'open' }], fields: [{ name: 'status', dataTypeID: 25 }] },
|
||||
],
|
||||
['COUNT(DISTINCT val) AS cardinality', { rows: [{ cardinality: '2' }] }],
|
||||
['SELECT DISTINCT "status"::text AS val', { rows: [{ val: 'open' }, { val: 'paid' }] }],
|
||||
['SELECT COUNT(*) AS count FROM "public"."orders"', { rows: [{ count: '3' }] }],
|
||||
['FROM pg_stats s', { rows: [{ column_name: 'status', estimated_cardinality: '2' }] }],
|
||||
['SELECT 1', { rows: [{ '?column?': 1 }], fields: [{ name: '?column?', dataTypeID: 23 }] }],
|
||||
['SELECT schema_name FROM information_schema.schemata', { rows: [{ schema_name: 'public' }] }],
|
||||
]);
|
||||
}
|
||||
|
||||
describe('KtxPostgresScanConnector', () => {
|
||||
it('resolves configuration safely', () => {
|
||||
expect(isKtxPostgresConnectionConfig({ driver: 'postgres', url: 'env:DATABASE_URL' })).toBe(true);
|
||||
expect(isKtxPostgresConnectionConfig({ driver: 'postgresql', host: 'db', database: 'analytics' })).toBe(true);
|
||||
expect(isKtxPostgresConnectionConfig({ driver: 'mysql', host: 'db' })).toBe(false);
|
||||
expect(
|
||||
postgresPoolConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
schemas: ['analytics', 'public'],
|
||||
ssl: true,
|
||||
rejectUnauthorized: false,
|
||||
},
|
||||
}),
|
||||
).toMatchObject({
|
||||
host: 'db.example.test',
|
||||
port: 5432,
|
||||
database: 'analytics',
|
||||
user: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
options: '-c search_path=analytics,public',
|
||||
ssl: { rejectUnauthorized: false },
|
||||
});
|
||||
const libpqPreferConfig = postgresPoolConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'postgres',
|
||||
url: 'env:DEMO_DATABASE_URL',
|
||||
},
|
||||
env: {
|
||||
DEMO_DATABASE_URL: 'postgresql://reader@demo.example.test:5432/demo?sslmode=prefer',
|
||||
},
|
||||
});
|
||||
expect(libpqPreferConfig).toMatchObject({
|
||||
host: 'demo.example.test',
|
||||
port: 5432,
|
||||
database: 'demo',
|
||||
user: 'reader',
|
||||
});
|
||||
expect(libpqPreferConfig).not.toHaveProperty('connectionString');
|
||||
expect(libpqPreferConfig).not.toHaveProperty('ssl');
|
||||
expect(
|
||||
postgresPoolConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'postgres', host: 'db.example.test', database: 'analytics', username: 'reader' },
|
||||
}),
|
||||
).toMatchObject({
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
user: 'reader',
|
||||
});
|
||||
});
|
||||
|
||||
it('introspects schemas, tables, views, primary keys, comments, row counts, and foreign keys', async () => {
|
||||
const connector = new KtxPostgresScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
schema: 'public',
|
||||
},
|
||||
poolFactory: fakePoolFactory(metadataResults()),
|
||||
now: () => new Date('2026-04-29T10:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'postgres' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
extractedAt: '2026-04-29T10:00:00.000Z',
|
||||
scope: { schemas: ['public'] },
|
||||
metadata: {
|
||||
database: 'analytics',
|
||||
schemas: ['public'],
|
||||
host: 'db.example.test',
|
||||
table_count: 3,
|
||||
total_columns: 6,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables.map((table) => [table.db, table.name, table.kind, table.estimatedRows])).toEqual([
|
||||
['public', 'customers', 'table', 2],
|
||||
['public', 'orders', 'table', 3],
|
||||
['public', 'recent_orders', 'view', null],
|
||||
]);
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
|
||||
{
|
||||
fromColumn: 'customer_id',
|
||||
toCatalog: null,
|
||||
toDb: 'public',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
constraintName: 'orders_customer_id_fkey',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('runs samples, distinct values, statistics, read-only SQL, and schema listing', async () => {
|
||||
const connector = new KtxPostgresScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
schema: 'public',
|
||||
},
|
||||
poolFactory: fakePoolFactory(metadataResults()),
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: 'public', name: 'orders' }, columns: ['id'], limit: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({ headers: ['id'], headerTypes: ['integer'], rows: [[10]], totalRows: 1 });
|
||||
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: 'public', name: 'orders' }, column: 'status', limit: 5 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
|
||||
|
||||
await expect(
|
||||
connector.getColumnDistinctValues(
|
||||
{ catalog: null, db: 'public', name: 'orders' },
|
||||
'status',
|
||||
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
|
||||
),
|
||||
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
|
||||
|
||||
await expect(connector.getColumnStatistics({ catalog: null, db: 'public', name: 'orders' })).resolves.toEqual({
|
||||
cardinalityByColumn: new Map([['status', 2]]),
|
||||
});
|
||||
await expect(connector.getTableRowCount({ db: 'public', name: 'orders' })).resolves.toBe(3);
|
||||
await expect(connector.listSchemas()).resolves.toEqual(['public']);
|
||||
await expect(connector.testConnection()).resolves.toEqual({ success: true });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
});
|
||||
|
||||
it('adapts native PostgreSQL snapshots to live-database introspection for local ingest', async () => {
|
||||
const introspection = createPostgresLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
schema: 'public',
|
||||
},
|
||||
},
|
||||
poolFactory: fakePoolFactory(metadataResults()),
|
||||
now: () => new Date('2026-04-29T10:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
extractedAt: '2026-04-29T10:00:00.000Z',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: null,
|
||||
},
|
||||
{
|
||||
name: 'name',
|
||||
nativeType: 'text',
|
||||
normalizedType: 'text',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: 'Name',
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('does not end the pool before introspection completes', async () => {
|
||||
let endCalled = false;
|
||||
const endAwarePoolFactory: KtxPostgresPoolFactory = {
|
||||
createPool() {
|
||||
const inner = fakePoolFactory(metadataResults()).createPool({
|
||||
max: 1,
|
||||
idleTimeoutMillis: 1,
|
||||
connectionTimeoutMillis: 1,
|
||||
});
|
||||
return {
|
||||
async connect() {
|
||||
if (endCalled) {
|
||||
throw new Error('Cannot use a pool after calling end on the pool');
|
||||
}
|
||||
return inner.connect();
|
||||
},
|
||||
async end() {
|
||||
endCalled = true;
|
||||
return inner.end();
|
||||
},
|
||||
};
|
||||
},
|
||||
};
|
||||
const introspection = createPostgresLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
schema: 'public',
|
||||
},
|
||||
},
|
||||
poolFactory: endAwarePoolFactory,
|
||||
now: () => new Date('2026-04-29T10:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
expect(snapshot.tables.length).toBeGreaterThan(0);
|
||||
expect(endCalled).toBe(true);
|
||||
});
|
||||
|
||||
it('attaches an error listener to the pg pool', async () => {
|
||||
const on = vi.fn();
|
||||
const poolFactory: KtxPostgresPoolFactory = {
|
||||
createPool() {
|
||||
return {
|
||||
on,
|
||||
async connect() {
|
||||
return {
|
||||
query: vi.fn(async () => ({ rows: [{ '?column?': 1 }], fields: [{ name: '?column?', dataTypeID: 23 }] })),
|
||||
release: vi.fn(),
|
||||
};
|
||||
},
|
||||
end: vi.fn(async () => undefined),
|
||||
};
|
||||
},
|
||||
};
|
||||
const connector = new KtxPostgresScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
},
|
||||
poolFactory,
|
||||
});
|
||||
|
||||
await expect(connector.testConnection()).resolves.toEqual({ success: true });
|
||||
|
||||
expect(on).toHaveBeenCalledWith('error', expect.any(Function));
|
||||
});
|
||||
});
|
||||
760
packages/cli/src/connectors/postgres/connector.ts
Normal file
760
packages/cli/src/connectors/postgres/connector.ts
Normal file
|
|
@ -0,0 +1,760 @@
|
|||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/index.js';
|
||||
import {
|
||||
createKtxConnectorCapabilities,
|
||||
type KtxColumnSampleInput,
|
||||
type KtxColumnSampleResult,
|
||||
type KtxColumnStatsInput,
|
||||
type KtxColumnStatsResult,
|
||||
type KtxQueryResult,
|
||||
type KtxReadOnlyQueryInput,
|
||||
type KtxScanConnector,
|
||||
type KtxScanContext,
|
||||
type KtxScanInput,
|
||||
type KtxSchemaColumn,
|
||||
type KtxSchemaForeignKey,
|
||||
type KtxSchemaSnapshot,
|
||||
type KtxSchemaTable,
|
||||
type KtxTableListEntry,
|
||||
type KtxTableRef,
|
||||
type KtxTableSampleInput,
|
||||
type KtxTableSampleResult,
|
||||
} from '../../context/scan/index.js';
|
||||
import { Pool } from 'pg';
|
||||
import { KtxPostgresDialect } from './dialect.js';
|
||||
|
||||
const PG_OID_TYPE_MAP: Record<number, string> = {
|
||||
16: 'boolean',
|
||||
20: 'bigint',
|
||||
21: 'smallint',
|
||||
23: 'integer',
|
||||
25: 'text',
|
||||
700: 'real',
|
||||
701: 'double precision',
|
||||
1043: 'varchar',
|
||||
1082: 'date',
|
||||
1114: 'timestamp',
|
||||
1184: 'timestamptz',
|
||||
1700: 'numeric',
|
||||
2950: 'uuid',
|
||||
3802: 'jsonb',
|
||||
114: 'json',
|
||||
1009: 'text[]',
|
||||
1007: 'integer[]',
|
||||
1016: 'bigint[]',
|
||||
};
|
||||
|
||||
export interface KtxPostgresConnectionConfig {
|
||||
driver?: string;
|
||||
host?: string;
|
||||
port?: number;
|
||||
database?: string;
|
||||
username?: string;
|
||||
user?: string;
|
||||
password?: string;
|
||||
url?: string;
|
||||
schema?: string;
|
||||
schemas?: string[];
|
||||
ssl?: boolean;
|
||||
sslmode?: string;
|
||||
sslMode?: string;
|
||||
rejectUnauthorized?: boolean;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxPostgresPoolConfig {
|
||||
host?: string;
|
||||
port?: number;
|
||||
database?: string;
|
||||
user?: string;
|
||||
password?: string;
|
||||
connectionString?: string;
|
||||
max: number;
|
||||
idleTimeoutMillis: number;
|
||||
connectionTimeoutMillis: number;
|
||||
options?: string;
|
||||
ssl?: { rejectUnauthorized: boolean };
|
||||
}
|
||||
|
||||
interface KtxPostgresQueryResult {
|
||||
fields?: Array<{ name: string; dataTypeID: number }>;
|
||||
rows: Record<string, unknown>[];
|
||||
}
|
||||
|
||||
interface KtxPostgresClient {
|
||||
query(sql: string, params?: unknown[]): Promise<KtxPostgresQueryResult>;
|
||||
release(): void;
|
||||
}
|
||||
|
||||
interface KtxPostgresPool {
|
||||
connect(): Promise<KtxPostgresClient>;
|
||||
end(): Promise<void>;
|
||||
on?(event: 'error', listener: (error: Error) => void): void;
|
||||
}
|
||||
|
||||
export interface KtxPostgresPoolFactory {
|
||||
createPool(config: KtxPostgresPoolConfig): KtxPostgresPool;
|
||||
}
|
||||
|
||||
interface KtxPostgresResolvedEndpoint {
|
||||
host: string;
|
||||
port: number;
|
||||
close?: () => Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxPostgresEndpointResolver {
|
||||
resolve(input: {
|
||||
host: string;
|
||||
port: number;
|
||||
connection: KtxPostgresConnectionConfig;
|
||||
}): Promise<KtxPostgresResolvedEndpoint>;
|
||||
}
|
||||
|
||||
export interface KtxPostgresScanConnectorOptions {
|
||||
connectionId: string;
|
||||
connection: KtxPostgresConnectionConfig | undefined;
|
||||
poolFactory?: KtxPostgresPoolFactory;
|
||||
endpointResolver?: KtxPostgresEndpointResolver;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export interface KtxPostgresReadOnlyQueryInput extends KtxReadOnlyQueryInput {
|
||||
params?: Record<string, unknown> | unknown[];
|
||||
}
|
||||
|
||||
export interface KtxPostgresColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KtxPostgresColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
export interface KtxPostgresColumnStatisticsResult {
|
||||
cardinalityByColumn: Map<string, number>;
|
||||
}
|
||||
|
||||
export interface KtxPostgresTableSampleResult extends KtxTableSampleResult {
|
||||
headerTypes?: string[];
|
||||
}
|
||||
|
||||
type PostgresTableRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
interface PostgresTableRow {
|
||||
table_name: string;
|
||||
table_kind: string;
|
||||
row_count: unknown;
|
||||
table_comment: string | null;
|
||||
}
|
||||
|
||||
interface PostgresColumnRow {
|
||||
table_name: string;
|
||||
column_name: string;
|
||||
data_type: string;
|
||||
is_nullable: boolean;
|
||||
column_comment: string | null;
|
||||
}
|
||||
|
||||
interface PostgresPrimaryKeyRow {
|
||||
table_name: string;
|
||||
column_name: string;
|
||||
}
|
||||
|
||||
interface PostgresForeignKeyRow {
|
||||
table_name: string;
|
||||
column_name: string;
|
||||
foreign_table_schema: string | null;
|
||||
foreign_table_name: string;
|
||||
foreign_column_name: string;
|
||||
constraint_name: string | null;
|
||||
}
|
||||
|
||||
interface PostgresSchemaRow {
|
||||
schema_name: string;
|
||||
}
|
||||
|
||||
interface PostgresTableListRow {
|
||||
schema_name: string;
|
||||
table_name: string;
|
||||
table_kind: string;
|
||||
}
|
||||
|
||||
interface PostgresCountRow {
|
||||
count?: unknown;
|
||||
cardinality?: unknown;
|
||||
}
|
||||
|
||||
interface PostgresDistinctValueRow {
|
||||
val: unknown;
|
||||
}
|
||||
|
||||
interface PostgresStatsRow {
|
||||
column_name: string;
|
||||
estimated_cardinality: unknown;
|
||||
}
|
||||
|
||||
class DefaultPostgresPoolFactory implements KtxPostgresPoolFactory {
|
||||
createPool(config: KtxPostgresPoolConfig): KtxPostgresPool {
|
||||
return new Pool(config);
|
||||
}
|
||||
}
|
||||
|
||||
function groupByTable<T extends { table_name: string }>(rows: T[]): Map<string, T[]> {
|
||||
const grouped = new Map<string, T[]>();
|
||||
for (const row of rows) {
|
||||
const tableRows = grouped.get(row.table_name) ?? [];
|
||||
tableRows.push(row);
|
||||
grouped.set(row.table_name, tableRows);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
function primaryKeyMap(rows: PostgresPrimaryKeyRow[]): Map<string, Set<string>> {
|
||||
const grouped = new Map<string, Set<string>>();
|
||||
for (const row of rows) {
|
||||
const columns = grouped.get(row.table_name) ?? new Set<string>();
|
||||
columns.add(row.column_name);
|
||||
grouped.set(row.table_name, columns);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
function queryRows(result: KtxPostgresQueryResult): unknown[][] {
|
||||
const headers = (result.fields ?? []).map((field) => field.name);
|
||||
return result.rows.map((row) => headers.map((header) => row[header]));
|
||||
}
|
||||
|
||||
function finiteNumber(value: unknown): number | null {
|
||||
const parsed = Number(value);
|
||||
return Number.isFinite(parsed) ? parsed : null;
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxPostgresConnectionConfig | undefined,
|
||||
key: keyof KtxPostgresConnectionConfig,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function numberValue(value: unknown): number | undefined {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
||||
function parsePostgresUrl(url: string): Partial<KtxPostgresConnectionConfig> {
|
||||
const parsed = new URL(url);
|
||||
const sslmode = parsed.searchParams.get('sslmode') ?? undefined;
|
||||
return {
|
||||
host: parsed.hostname,
|
||||
port: parsed.port ? Number(parsed.port) : undefined,
|
||||
database: parsed.pathname.replace(/^\/+/, '') || undefined,
|
||||
username: parsed.username ? decodeURIComponent(parsed.username) : undefined,
|
||||
password: parsed.password ? decodeURIComponent(parsed.password) : undefined,
|
||||
...(sslmode ? { sslmode } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function normalizedSslMode(connection: KtxPostgresConnectionConfig): string | undefined {
|
||||
const value = connection.sslmode ?? connection.sslMode;
|
||||
return typeof value === 'string' && value.trim().length > 0 ? value.trim().toLowerCase() : undefined;
|
||||
}
|
||||
|
||||
function schemasFromConnection(connection: KtxPostgresConnectionConfig): string[] {
|
||||
if (Array.isArray(connection.schemas) && connection.schemas.length > 0) {
|
||||
return connection.schemas.filter((schema): schema is string => typeof schema === 'string' && schema.length > 0);
|
||||
}
|
||||
return typeof connection.schema === 'string' && connection.schema.length > 0 ? [connection.schema] : ['public'];
|
||||
}
|
||||
|
||||
function searchPathSchemasFromConnection(connection: KtxPostgresConnectionConfig): string[] {
|
||||
const schemas = schemasFromConnection(connection);
|
||||
return schemas.includes('public') ? schemas : [...schemas, 'public'];
|
||||
}
|
||||
|
||||
export function isKtxPostgresConnectionConfig(
|
||||
connection: KtxPostgresConnectionConfig | undefined,
|
||||
): connection is KtxPostgresConnectionConfig {
|
||||
const driver = String(connection?.driver ?? '').toLowerCase();
|
||||
return driver === 'postgres' || driver === 'postgresql';
|
||||
}
|
||||
|
||||
export function postgresPoolConfigFromConfig(input: {
|
||||
connectionId: string;
|
||||
connection: KtxPostgresConnectionConfig | undefined;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): KtxPostgresPoolConfig {
|
||||
const inputDriver = input.connection?.driver ?? 'unknown';
|
||||
if (!isKtxPostgresConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native PostgreSQL connector cannot run driver "${inputDriver}"`);
|
||||
}
|
||||
|
||||
const env = input.env ?? process.env;
|
||||
const referencedUrl = stringConfigValue(input.connection, 'url', env);
|
||||
const urlConfig = referencedUrl ? parsePostgresUrl(referencedUrl) : {};
|
||||
const merged: KtxPostgresConnectionConfig = { ...urlConfig, ...input.connection };
|
||||
const host = stringConfigValue(merged, 'host', env);
|
||||
const database = stringConfigValue(merged, 'database', env);
|
||||
const user = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env);
|
||||
const password = stringConfigValue(merged, 'password', env);
|
||||
const sslmode = normalizedSslMode(merged);
|
||||
|
||||
if (!referencedUrl && !host) {
|
||||
throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.host or url`);
|
||||
}
|
||||
if (!database && !referencedUrl) {
|
||||
throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.database or url`);
|
||||
}
|
||||
if (!user && !referencedUrl) {
|
||||
throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.username, user, or url`);
|
||||
}
|
||||
|
||||
const config: KtxPostgresPoolConfig = {
|
||||
max: 10,
|
||||
idleTimeoutMillis: 30_000,
|
||||
connectionTimeoutMillis: 10_000,
|
||||
...(referencedUrl && sslmode !== 'prefer' && sslmode !== 'disable'
|
||||
? { connectionString: referencedUrl }
|
||||
: { host, port: numberValue(merged.port) ?? 5432, database, user, password }),
|
||||
};
|
||||
const searchPathSchemas = searchPathSchemasFromConnection(merged);
|
||||
if (searchPathSchemas.length > 0) {
|
||||
config.options = `-c search_path=${searchPathSchemas.join(',')}`;
|
||||
}
|
||||
if (merged.ssl && sslmode !== 'prefer' && sslmode !== 'disable') {
|
||||
config.ssl = { rejectUnauthorized: merged.rejectUnauthorized ?? true };
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
export class KtxPostgresScanConnector implements KtxScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'postgres' as const;
|
||||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: true,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: true,
|
||||
formalForeignKeys: true,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly connectionId: string;
|
||||
private readonly connection: KtxPostgresConnectionConfig;
|
||||
private readonly poolConfig: KtxPostgresPoolConfig;
|
||||
private readonly poolFactory: KtxPostgresPoolFactory;
|
||||
private readonly endpointResolver?: KtxPostgresEndpointResolver;
|
||||
private readonly now: () => Date;
|
||||
private readonly dialect = new KtxPostgresDialect();
|
||||
private pool: KtxPostgresPool | null = null;
|
||||
private lastIdlePoolError: Error | null = null;
|
||||
private resolvedEndpoint: KtxPostgresResolvedEndpoint | null = null;
|
||||
|
||||
constructor(options: KtxPostgresScanConnectorOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.connection = options.connection ?? {};
|
||||
this.poolConfig = postgresPoolConfigFromConfig({
|
||||
connectionId: options.connectionId,
|
||||
connection: options.connection,
|
||||
env: options.env,
|
||||
});
|
||||
this.poolFactory = options.poolFactory ?? new DefaultPostgresPoolFactory();
|
||||
this.endpointResolver = options.endpointResolver;
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.id = `postgres:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
try {
|
||||
await this.query('SELECT 1');
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const schemas = schemasFromConnection(this.connection);
|
||||
const allTables: KtxSchemaTable[] = [];
|
||||
for (const schema of schemas) {
|
||||
const tables = await this.loadSchemaTables(schema);
|
||||
allTables.push(...tables);
|
||||
}
|
||||
return {
|
||||
connectionId: this.connectionId,
|
||||
driver: 'postgres',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: { schemas },
|
||||
metadata: {
|
||||
database: this.poolConfig.database ?? this.connection.database ?? null,
|
||||
schemas,
|
||||
host: this.poolConfig.host ?? this.connection.host ?? null,
|
||||
table_count: allTables.length,
|
||||
total_columns: allTables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables: allTables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxPostgresTableSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
|
||||
return {
|
||||
headers: result.headers,
|
||||
headerTypes: result.headerTypes,
|
||||
rows: result.rows,
|
||||
totalRows: result.totalRows,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
|
||||
return { values, nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
const stats = await this.getColumnStatistics(input.table);
|
||||
const value = stats?.cardinalityByColumn.get(input.column);
|
||||
return value === undefined
|
||||
? null
|
||||
: { min: null, max: null, average: null, nullCount: null, distinctCount: value };
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxPostgresReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
|
||||
const prepared = Array.isArray(input.params)
|
||||
? { sql: limitedSql, params: input.params }
|
||||
: this.dialect.prepareQuery(limitedSql, input.params);
|
||||
const result = await this.query(prepared.sql, prepared.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KtxTableRef,
|
||||
columnName: string,
|
||||
options: KtxPostgresColumnDistinctValuesOptions,
|
||||
): Promise<KtxPostgresColumnDistinctValuesResult | null> {
|
||||
const sampleSize = options.sampleSize ?? 10000;
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinalityRows = await this.queryRaw<PostgresCountRow>(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, sampleSize),
|
||||
);
|
||||
const cardinality = finiteNumber(cardinalityRows[0]?.cardinality);
|
||||
if (cardinality === null) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valuesRows = await this.queryRaw<PostgresDistinctValueRow>(
|
||||
this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit),
|
||||
);
|
||||
return {
|
||||
values: valuesRows.filter((row) => row.val !== null).map((row) => String(row.val)),
|
||||
cardinality,
|
||||
};
|
||||
}
|
||||
|
||||
async getColumnStatistics(table: KtxTableRef): Promise<KtxPostgresColumnStatisticsResult | null> {
|
||||
const schema = table.db ?? schemasFromConnection(this.connection)[0] ?? 'public';
|
||||
const sql = this.dialect.generateColumnStatisticsQuery(schema, table.name);
|
||||
if (!sql) {
|
||||
return null;
|
||||
}
|
||||
const rows = await this.queryRaw<PostgresStatsRow>(sql);
|
||||
const cardinalityByColumn = new Map<string, number>();
|
||||
for (const row of rows) {
|
||||
const cardinality = finiteNumber(row.estimated_cardinality);
|
||||
if (cardinality !== null) {
|
||||
cardinalityByColumn.set(row.column_name, cardinality);
|
||||
}
|
||||
}
|
||||
return cardinalityByColumn.size > 0 ? { cardinalityByColumn } : null;
|
||||
}
|
||||
|
||||
async getTableRowCount(table: string | PostgresTableRef): Promise<number> {
|
||||
const tableRef =
|
||||
typeof table === 'string'
|
||||
? { catalog: null, db: schemasFromConnection(this.connection)[0] ?? 'public', name: table }
|
||||
: table;
|
||||
const rows = await this.queryRaw<PostgresCountRow>(`SELECT COUNT(*) AS count FROM ${this.qTableName(tableRef)}`);
|
||||
return finiteNumber(rows[0]?.count) ?? 0;
|
||||
}
|
||||
|
||||
qTableName(table: PostgresTableRef): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
async listSchemas(): Promise<string[]> {
|
||||
const rows = await this.queryRaw<PostgresSchemaRow>(`
|
||||
SELECT schema_name
|
||||
FROM information_schema.schemata
|
||||
WHERE schema_name <> 'information_schema'
|
||||
AND schema_name NOT LIKE 'pg_%'
|
||||
ORDER BY schema_name
|
||||
`);
|
||||
return rows.map((row) => row.schema_name);
|
||||
}
|
||||
|
||||
async listTables(schemas?: string[]): Promise<KtxTableListEntry[]> {
|
||||
const filterSchemas = schemas ?? (await this.listSchemas());
|
||||
if (filterSchemas.length === 0) return [];
|
||||
const rows = await this.queryRaw<PostgresTableListRow>(
|
||||
`
|
||||
SELECT n.nspname AS schema_name, c.relname AS table_name, c.relkind AS table_kind
|
||||
FROM pg_catalog.pg_class c
|
||||
JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
|
||||
WHERE n.nspname = ANY($1)
|
||||
AND c.relkind IN ('r', 'v')
|
||||
ORDER BY n.nspname, c.relname
|
||||
`,
|
||||
[filterSchemas],
|
||||
);
|
||||
return rows.map((row) => ({
|
||||
schema: row.schema_name,
|
||||
name: row.table_name,
|
||||
kind: row.table_kind === 'v' ? ('view' as const) : ('table' as const),
|
||||
}));
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
if (this.pool) {
|
||||
await this.pool.end();
|
||||
this.pool = null;
|
||||
}
|
||||
if (this.resolvedEndpoint?.close) {
|
||||
await this.resolvedEndpoint.close();
|
||||
this.resolvedEndpoint = null;
|
||||
}
|
||||
}
|
||||
|
||||
private async loadSchemaTables(schema: string): Promise<KtxSchemaTable[]> {
|
||||
const tables = await this.queryRaw<PostgresTableRow>(
|
||||
`
|
||||
SELECT
|
||||
c.relname AS table_name,
|
||||
c.relkind AS table_kind,
|
||||
c.reltuples::bigint AS row_count,
|
||||
d.description AS table_comment
|
||||
FROM pg_catalog.pg_class c
|
||||
JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
|
||||
LEFT JOIN pg_catalog.pg_description d
|
||||
ON d.objoid = c.oid AND d.objsubid = 0
|
||||
WHERE n.nspname = $1
|
||||
AND c.relkind IN ('r', 'v')
|
||||
ORDER BY c.relname
|
||||
`,
|
||||
[schema],
|
||||
);
|
||||
const columns = await this.queryRaw<PostgresColumnRow>(
|
||||
`
|
||||
SELECT
|
||||
c.relname AS table_name,
|
||||
a.attname AS column_name,
|
||||
format_type(a.atttypid, a.atttypmod) AS data_type,
|
||||
NOT a.attnotnull AS is_nullable,
|
||||
d.description AS column_comment
|
||||
FROM pg_catalog.pg_attribute a
|
||||
JOIN pg_catalog.pg_class c ON a.attrelid = c.oid
|
||||
JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
|
||||
LEFT JOIN pg_catalog.pg_description d
|
||||
ON d.objoid = c.oid AND d.objsubid = a.attnum
|
||||
WHERE n.nspname = $1
|
||||
AND c.relkind IN ('r', 'v')
|
||||
AND a.attnum > 0
|
||||
AND NOT a.attisdropped
|
||||
ORDER BY c.relname, a.attnum
|
||||
`,
|
||||
[schema],
|
||||
);
|
||||
const primaryKeys = await this.queryRaw<PostgresPrimaryKeyRow>(
|
||||
`
|
||||
SELECT tc.table_name, kcu.column_name
|
||||
FROM information_schema.table_constraints tc
|
||||
JOIN information_schema.key_column_usage kcu
|
||||
ON tc.constraint_name = kcu.constraint_name
|
||||
AND tc.table_schema = kcu.table_schema
|
||||
WHERE tc.constraint_type = 'PRIMARY KEY'
|
||||
AND tc.table_schema = $1
|
||||
ORDER BY tc.table_name, kcu.ordinal_position
|
||||
`,
|
||||
[schema],
|
||||
);
|
||||
const foreignKeys = await this.queryRaw<PostgresForeignKeyRow>(
|
||||
`
|
||||
SELECT
|
||||
tc.table_name,
|
||||
kcu.column_name,
|
||||
ccu.table_schema AS foreign_table_schema,
|
||||
ccu.table_name AS foreign_table_name,
|
||||
ccu.column_name AS foreign_column_name,
|
||||
tc.constraint_name
|
||||
FROM information_schema.table_constraints AS tc
|
||||
JOIN information_schema.key_column_usage AS kcu
|
||||
ON tc.constraint_name = kcu.constraint_name
|
||||
AND tc.table_schema = kcu.table_schema
|
||||
JOIN information_schema.constraint_column_usage AS ccu
|
||||
ON ccu.constraint_name = tc.constraint_name
|
||||
AND ccu.table_schema = tc.table_schema
|
||||
WHERE tc.constraint_type = 'FOREIGN KEY'
|
||||
AND tc.table_schema = $1
|
||||
ORDER BY tc.table_name, kcu.column_name
|
||||
`,
|
||||
[schema],
|
||||
);
|
||||
|
||||
const columnsByTable = groupByTable(columns);
|
||||
const primaryKeysByTable = primaryKeyMap(primaryKeys);
|
||||
const foreignKeysByTable = groupByTable(foreignKeys);
|
||||
return tables.map((table) =>
|
||||
this.toSchemaTable(
|
||||
schema,
|
||||
table,
|
||||
columnsByTable.get(table.table_name) ?? [],
|
||||
primaryKeysByTable.get(table.table_name) ?? new Set<string>(),
|
||||
foreignKeysByTable.get(table.table_name) ?? [],
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
private toSchemaTable(
|
||||
schema: string,
|
||||
table: PostgresTableRow,
|
||||
columns: PostgresColumnRow[],
|
||||
primaryKeys: Set<string>,
|
||||
foreignKeys: PostgresForeignKeyRow[],
|
||||
): KtxSchemaTable {
|
||||
const kind = table.table_kind === 'v' ? 'view' : 'table';
|
||||
return {
|
||||
catalog: null,
|
||||
db: schema,
|
||||
name: table.table_name,
|
||||
kind,
|
||||
comment: table.table_comment || null,
|
||||
estimatedRows: kind === 'view' ? null : finiteNumber(table.row_count),
|
||||
columns: columns.map((column) => this.toSchemaColumn(column, primaryKeys)),
|
||||
foreignKeys: foreignKeys.map((foreignKey) => this.toSchemaForeignKey(foreignKey)),
|
||||
};
|
||||
}
|
||||
|
||||
private toSchemaColumn(column: PostgresColumnRow, primaryKeys: Set<string>): KtxSchemaColumn {
|
||||
return {
|
||||
name: column.column_name,
|
||||
nativeType: column.data_type,
|
||||
normalizedType: this.dialect.mapDataType(column.data_type),
|
||||
dimensionType: this.dialect.mapToDimensionType(column.data_type),
|
||||
nullable: column.is_nullable,
|
||||
primaryKey: primaryKeys.has(column.column_name),
|
||||
comment: column.column_comment || null,
|
||||
};
|
||||
}
|
||||
|
||||
private toSchemaForeignKey(row: PostgresForeignKeyRow): KtxSchemaForeignKey {
|
||||
return {
|
||||
fromColumn: row.column_name,
|
||||
toCatalog: null,
|
||||
toDb: row.foreign_table_schema,
|
||||
toTable: row.foreign_table_name,
|
||||
toColumn: row.foreign_column_name,
|
||||
constraintName: row.constraint_name || null,
|
||||
};
|
||||
}
|
||||
|
||||
private async getPool(): Promise<KtxPostgresPool> {
|
||||
if (!this.pool) {
|
||||
let config = { ...this.poolConfig };
|
||||
if (this.endpointResolver) {
|
||||
const endpoint = await this.endpointResolver.resolve({
|
||||
host: config.host ?? this.connection.host ?? 'localhost',
|
||||
port: config.port ?? numberValue(this.connection.port) ?? 5432,
|
||||
connection: this.connection,
|
||||
});
|
||||
this.resolvedEndpoint = endpoint;
|
||||
config = { ...config, host: endpoint.host, port: endpoint.port };
|
||||
}
|
||||
this.pool = this.poolFactory.createPool(config);
|
||||
this.pool.on?.('error', (error) => {
|
||||
this.lastIdlePoolError = error;
|
||||
});
|
||||
}
|
||||
return this.pool;
|
||||
}
|
||||
|
||||
private async queryRaw<T>(sql: string, params?: unknown[]): Promise<T[]> {
|
||||
this.throwIdlePoolErrorIfPresent();
|
||||
const pool = await this.getPool();
|
||||
const client = await pool.connect();
|
||||
try {
|
||||
const result = await client.query(sql, params);
|
||||
return result.rows as T[];
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
private async query(sql: string, params?: Record<string, unknown> | unknown[]): Promise<KtxQueryResult> {
|
||||
this.throwIdlePoolErrorIfPresent();
|
||||
const pool = await this.getPool();
|
||||
const client = await pool.connect();
|
||||
try {
|
||||
const result = await client.query(assertReadOnlySql(sql), Array.isArray(params) ? params : undefined);
|
||||
return {
|
||||
headers: (result.fields ?? []).map((field) => field.name),
|
||||
headerTypes: (result.fields ?? []).map((field) => PG_OID_TYPE_MAP[field.dataTypeID] ?? `oid:${field.dataTypeID}`),
|
||||
rows: queryRows(result),
|
||||
totalRows: result.rows.length,
|
||||
rowCount: result.rows.length,
|
||||
};
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`PostgreSQL connector ${this.connectionId} cannot run scan for ${connectionId}`);
|
||||
}
|
||||
}
|
||||
|
||||
private throwIdlePoolErrorIfPresent(): void {
|
||||
if (!this.lastIdlePoolError) {
|
||||
return;
|
||||
}
|
||||
const error = this.lastIdlePoolError;
|
||||
this.lastIdlePoolError = null;
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
52
packages/cli/src/connectors/postgres/dialect.test.ts
Normal file
52
packages/cli/src/connectors/postgres/dialect.test.ts
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KtxPostgresDialect } from './dialect.js';
|
||||
|
||||
describe('KtxPostgresDialect', () => {
|
||||
const dialect = new KtxPostgresDialect();
|
||||
|
||||
it('quotes identifiers and formats schema-qualified tables', () => {
|
||||
expect(dialect.quoteIdentifier('order"items')).toBe('"order""items"');
|
||||
expect(dialect.formatTableName({ catalog: null, db: 'public', name: 'orders' })).toBe('"public"."orders"');
|
||||
expect(dialect.formatTableName({ catalog: null, db: null, name: 'orders' })).toBe('"orders"');
|
||||
});
|
||||
|
||||
it('maps native PostgreSQL types to KTX dimension types', () => {
|
||||
expect(dialect.mapToDimensionType('timestamp with time zone')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('numeric(12,2)')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('uuid')).toBe('string');
|
||||
expect(dialect.mapToDimensionType('boolean')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('jsonb')).toBe('string');
|
||||
});
|
||||
|
||||
it('generates sample, distinct-value, statistics, and time SQL', () => {
|
||||
expect(dialect.generateSampleQuery('"public"."orders"', 5, ['id', 'status'])).toBe(
|
||||
'SELECT "id", "status" FROM "public"."orders" LIMIT 5',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('"public"."orders"', 'status', 10)).toContain(
|
||||
'TRIM(CAST("status" AS TEXT)) != \'\'',
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('"public"."orders"', '"status"', 20)).toContain(
|
||||
'SELECT DISTINCT "status"::text AS val',
|
||||
);
|
||||
expect(dialect.generateColumnStatisticsQuery('public', 'orders')).toContain('FROM pg_stats s');
|
||||
expect(dialect.getTimeTruncExpression('"created_at"', 'month')).toBe('DATE_TRUNC(\'month\', "created_at")');
|
||||
});
|
||||
|
||||
it('prepares named parameters with PostgreSQL positional parameters', () => {
|
||||
expect(
|
||||
dialect.prepareQuery('select * from orders where id = :id and status = :status', { id: 1, status: 'paid' }),
|
||||
).toEqual({
|
||||
sql: 'select * from orders where id = $1 and status = $2',
|
||||
params: [1, 'paid'],
|
||||
});
|
||||
expect(
|
||||
dialect.prepareQuery('select :Client_Name_10, :Client_Name_1', {
|
||||
Client_Name_1: 'short',
|
||||
Client_Name_10: 'long',
|
||||
}),
|
||||
).toEqual({
|
||||
sql: 'select $2, $1',
|
||||
params: ['short', 'long'],
|
||||
});
|
||||
});
|
||||
});
|
||||
213
packages/cli/src/connectors/postgres/dialect.ts
Normal file
213
packages/cli/src/connectors/postgres/dialect.ts
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/index.js';
|
||||
|
||||
type PostgresTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxPostgresDialect {
|
||||
readonly type = 'postgresql';
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
timestamp: 'time',
|
||||
'timestamp without time zone': 'time',
|
||||
'timestamp with time zone': 'time',
|
||||
timestamptz: 'time',
|
||||
datetime: 'time',
|
||||
date: 'time',
|
||||
time: 'time',
|
||||
integer: 'number',
|
||||
int: 'number',
|
||||
int2: 'number',
|
||||
int4: 'number',
|
||||
int8: 'number',
|
||||
bigint: 'number',
|
||||
smallint: 'number',
|
||||
decimal: 'number',
|
||||
numeric: 'number',
|
||||
float: 'number',
|
||||
float4: 'number',
|
||||
float8: 'number',
|
||||
'double precision': 'number',
|
||||
real: 'number',
|
||||
money: 'number',
|
||||
text: 'string',
|
||||
varchar: 'string',
|
||||
'character varying': 'string',
|
||||
char: 'string',
|
||||
character: 'string',
|
||||
uuid: 'string',
|
||||
json: 'string',
|
||||
jsonb: 'string',
|
||||
boolean: 'boolean',
|
||||
bool: 'boolean',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `"${identifier.replace(/"/g, '""')}"`;
|
||||
}
|
||||
|
||||
formatTableName(table: PostgresTableNameRef): string {
|
||||
return table.db
|
||||
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
|
||||
: this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
return nativeType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
const lower = nativeType.toLowerCase().trim();
|
||||
const normalized = lower.includes('(') ? lower.split('(')[0]!.trim() : lower;
|
||||
if (this.typeMappings[normalized]) {
|
||||
return this.typeMappings[normalized];
|
||||
}
|
||||
if (normalized.includes('time') || normalized.includes('date')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalized.includes('int') ||
|
||||
normalized.includes('num') ||
|
||||
normalized.includes('dec') ||
|
||||
normalized.includes('float') ||
|
||||
normalized.includes('double')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalized.includes('bool')) {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT ${columnList} FROM ${tableName} LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quotedColumn = this.quoteIdentifier(columnName);
|
||||
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS TEXT)) != '' LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown[] } {
|
||||
if (!params) {
|
||||
return { sql, params: undefined };
|
||||
}
|
||||
const paramNames = Object.keys(params);
|
||||
const values: unknown[] = new Array(paramNames.length);
|
||||
const paramIndexMap = new Map<string, number>();
|
||||
paramNames.forEach((name, index) => {
|
||||
paramIndexMap.set(name, index + 1);
|
||||
values[index] = params[name];
|
||||
});
|
||||
const sortedKeys = [...paramNames].sort((a, b) => b.length - a.length);
|
||||
let parameterizedQuery = sql;
|
||||
for (const name of sortedKeys) {
|
||||
parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${name}\\b`, 'g'), `$${paramIndexMap.get(name)}`);
|
||||
}
|
||||
return { sql: parameterizedQuery, params: values };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `RANDOM() < ${samplePct}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `TABLESAMPLE SYSTEM (${samplePct * 100})`;
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `COUNT(*) FILTER (WHERE ${column} IS NULL)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `COUNT(DISTINCT ${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT DISTINCT ${columnName}::text AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY val
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(schemaName: string, tableName: string): string | null {
|
||||
return `
|
||||
SELECT
|
||||
s.attname AS column_name,
|
||||
CASE
|
||||
WHEN s.n_distinct > 0 THEN s.n_distinct::bigint
|
||||
WHEN s.n_distinct < 0 THEN (-s.n_distinct * c.reltuples)::bigint
|
||||
ELSE NULL
|
||||
END AS estimated_cardinality
|
||||
FROM pg_stats s
|
||||
JOIN pg_class c ON c.relname = s.tablename
|
||||
JOIN pg_namespace n ON c.relnamespace = n.oid AND n.nspname = s.schemaname
|
||||
WHERE s.schemaname = '${schemaName.replace(/'/g, "''")}'
|
||||
AND s.tablename = '${tableName.replace(/'/g, "''")}'
|
||||
AND s.n_distinct IS NOT NULL
|
||||
`;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY RANDOM()
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
timezone?: string,
|
||||
): string {
|
||||
const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column;
|
||||
return `DATE_TRUNC('${granularity}', ${col})`;
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
|
||||
const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column;
|
||||
const originExpr = origin ? `TIMESTAMP '${origin.replace(/'/g, "''")}'` : "TIMESTAMP '1970-01-01'";
|
||||
return `${originExpr} + FLOOR(EXTRACT(EPOCH FROM (${col} - ${originExpr})) / EXTRACT(EPOCH FROM INTERVAL '${interval.replace(/'/g, "''")}')) * INTERVAL '${interval.replace(/'/g, "''")}'`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
return `INTERVAL '${interval.replace(/'/g, "''")}'`;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { KtxPostgresHistoricSqlQueryClient } from './historic-sql-query-client.js';
|
||||
import type { KtxPostgresPoolConfig, KtxPostgresPoolFactory } from './connector.js';
|
||||
|
||||
describe('KtxPostgresHistoricSqlQueryClient', () => {
|
||||
it('executes parameterized read-only SQL through the native Postgres connector pool', async () => {
|
||||
const queryCalls: Array<{ sql: string; params?: unknown[] }> = [];
|
||||
const release = vi.fn();
|
||||
const end = vi.fn(async () => {});
|
||||
const poolFactory: KtxPostgresPoolFactory = {
|
||||
createPool(_config: KtxPostgresPoolConfig) {
|
||||
return {
|
||||
async connect() {
|
||||
return {
|
||||
async query(sql: string, params?: unknown[]) {
|
||||
queryCalls.push({ sql, params });
|
||||
return {
|
||||
fields: [{ name: 'answer', dataTypeID: 23 }],
|
||||
rows: [{ answer: 42 }],
|
||||
};
|
||||
},
|
||||
release,
|
||||
};
|
||||
},
|
||||
end,
|
||||
};
|
||||
},
|
||||
};
|
||||
const client = new KtxPostgresHistoricSqlQueryClient({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'postgres',
|
||||
url: 'postgresql://readonly:secret@pg.example.test/warehouse', // pragma: allowlist secret
|
||||
},
|
||||
poolFactory,
|
||||
});
|
||||
|
||||
await expect(client.executeQuery('SELECT $1::int AS answer', [42])).resolves.toEqual({
|
||||
headers: ['answer'],
|
||||
rows: [[42]],
|
||||
totalRows: 1,
|
||||
});
|
||||
expect(queryCalls).toEqual([{ sql: 'SELECT $1::int AS answer', params: [42] }]);
|
||||
|
||||
await client.cleanup();
|
||||
expect(release).toHaveBeenCalledTimes(1);
|
||||
expect(end).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
import type { KtxPostgresQueryClient } from '../../context/ingest/index.js';
|
||||
import { KtxPostgresScanConnector, type KtxPostgresScanConnectorOptions } from './connector.js';
|
||||
|
||||
export type KtxPostgresHistoricSqlQueryClientOptions = KtxPostgresScanConnectorOptions;
|
||||
|
||||
export class KtxPostgresHistoricSqlQueryClient implements KtxPostgresQueryClient {
|
||||
private readonly connectionId: string;
|
||||
private readonly connector: KtxPostgresScanConnector;
|
||||
|
||||
constructor(options: KtxPostgresHistoricSqlQueryClientOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.connector = new KtxPostgresScanConnector(options);
|
||||
}
|
||||
|
||||
async executeQuery(
|
||||
sql: string,
|
||||
params?: unknown[],
|
||||
): Promise<{ headers: string[]; rows: unknown[][]; totalRows: number }> {
|
||||
const result = await this.connector.executeReadOnly(
|
||||
{
|
||||
connectionId: this.connectionId,
|
||||
sql,
|
||||
params,
|
||||
},
|
||||
{} as never,
|
||||
);
|
||||
return {
|
||||
headers: result.headers,
|
||||
rows: result.rows,
|
||||
totalRows: result.totalRows,
|
||||
};
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
await this.connector.cleanup();
|
||||
}
|
||||
}
|
||||
21
packages/cli/src/connectors/postgres/index.ts
Normal file
21
packages/cli/src/connectors/postgres/index.ts
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
export { KtxPostgresDialect } from './dialect.js';
|
||||
export {
|
||||
isKtxPostgresConnectionConfig,
|
||||
KtxPostgresScanConnector,
|
||||
postgresPoolConfigFromConfig,
|
||||
type KtxPostgresColumnDistinctValuesOptions,
|
||||
type KtxPostgresColumnDistinctValuesResult,
|
||||
type KtxPostgresColumnStatisticsResult,
|
||||
type KtxPostgresConnectionConfig,
|
||||
type KtxPostgresEndpointResolver,
|
||||
type KtxPostgresPoolConfig,
|
||||
type KtxPostgresPoolFactory,
|
||||
type KtxPostgresReadOnlyQueryInput,
|
||||
type KtxPostgresScanConnectorOptions,
|
||||
type KtxPostgresTableSampleResult,
|
||||
} from './connector.js';
|
||||
export {
|
||||
KtxPostgresHistoricSqlQueryClient,
|
||||
type KtxPostgresHistoricSqlQueryClientOptions,
|
||||
} from './historic-sql-query-client.js';
|
||||
export { createPostgresLiveDatabaseIntrospection } from './live-database-introspection.js';
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/index.js';
|
||||
import type { KtxProjectConnectionConfig } from '../../context/project/index.js';
|
||||
import {
|
||||
KtxPostgresScanConnector,
|
||||
type KtxPostgresConnectionConfig,
|
||||
type KtxPostgresEndpointResolver,
|
||||
type KtxPostgresPoolFactory,
|
||||
} from './connector.js';
|
||||
|
||||
interface CreatePostgresLiveDatabaseIntrospectionOptions {
|
||||
connections: Record<string, KtxProjectConnectionConfig>;
|
||||
poolFactory?: KtxPostgresPoolFactory;
|
||||
endpointResolver?: KtxPostgresEndpointResolver;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createPostgresLiveDatabaseIntrospection(
|
||||
options: CreatePostgresLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KtxPostgresConnectionConfig | undefined;
|
||||
const connector = new KtxPostgresScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
poolFactory: options.poolFactory,
|
||||
endpointResolver: options.endpointResolver,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect({ connectionId, driver: 'postgres' }, { runId: `postgres-${connectionId}` });
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
250
packages/cli/src/connectors/snowflake/connector.test.ts
Normal file
250
packages/cli/src/connectors/snowflake/connector.test.ts
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
createSnowflakeLiveDatabaseIntrospection,
|
||||
isKtxSnowflakeConnectionConfig,
|
||||
KtxSnowflakeScanConnector,
|
||||
snowflakeConnectionConfigFromConfig,
|
||||
type KtxSnowflakeDriver,
|
||||
type KtxSnowflakeDriverFactory,
|
||||
} from './index.js';
|
||||
|
||||
function fakeDriverFactory(): KtxSnowflakeDriverFactory {
|
||||
const driver: KtxSnowflakeDriver = {
|
||||
test: vi.fn(async () => ({ success: true })),
|
||||
query: vi.fn(async (sql: string) => {
|
||||
if (sql.includes('TABLE_CONSTRAINTS')) {
|
||||
return { headers: ['TABLE_NAME', 'COLUMN_NAME'], rows: [['ORDERS', 'ID']], totalRows: 1, rowCount: 1 };
|
||||
}
|
||||
if (sql.includes('SELECT "ID", "STATUS" FROM "ANALYTICS"."PUBLIC"."ORDERS"')) {
|
||||
return {
|
||||
headers: ['ID', 'STATUS'],
|
||||
headerTypes: ['NUMBER', 'VARCHAR'],
|
||||
rows: [[1, 'paid']],
|
||||
totalRows: 1,
|
||||
rowCount: 1,
|
||||
};
|
||||
}
|
||||
if (sql.includes('select * from (select ID, STATUS from ORDERS) as ktx_query_result limit 1')) {
|
||||
return { headers: ['ID', 'STATUS'], rows: [[1, 'paid']], totalRows: 1, rowCount: 1 };
|
||||
}
|
||||
if (sql.includes('SELECT "STATUS" FROM "ANALYTICS"."PUBLIC"."ORDERS"')) {
|
||||
return { headers: ['STATUS'], rows: [['paid'], ['open']], totalRows: 2, rowCount: 2 };
|
||||
}
|
||||
if (sql.includes('COUNT(DISTINCT val)')) {
|
||||
return { headers: ['CARDINALITY'], rows: [[2]], totalRows: 1, rowCount: 1 };
|
||||
}
|
||||
if (sql.includes('SELECT DISTINCT "STATUS"::VARCHAR AS val')) {
|
||||
return { headers: ['VAL'], rows: [['open'], ['paid']], totalRows: 2, rowCount: 2 };
|
||||
}
|
||||
throw new Error(`Unexpected SQL: ${sql}`);
|
||||
}),
|
||||
getSchemaMetadata: vi.fn(async () => [
|
||||
{
|
||||
name: 'ORDERS',
|
||||
catalog: 'ANALYTICS',
|
||||
db: 'PUBLIC',
|
||||
rowCount: 12,
|
||||
comment: 'Orders',
|
||||
columns: [
|
||||
{ name: 'ID', type: 'NUMBER(38,0)', nullable: false, comment: 'Primary key' },
|
||||
{ name: 'STATUS', type: 'VARCHAR', nullable: true, comment: null },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'ORDER_SUMMARY',
|
||||
catalog: 'ANALYTICS',
|
||||
db: 'PUBLIC',
|
||||
rowCount: 3,
|
||||
comment: null,
|
||||
columns: [{ name: 'STATUS', type: 'VARCHAR', nullable: true, comment: null }],
|
||||
},
|
||||
]),
|
||||
listSchemas: vi.fn(async () => ['PUBLIC', 'MART']),
|
||||
listTables: vi.fn(async () => [
|
||||
{ schema: 'PUBLIC', name: 'ORDERS', kind: 'table' as const },
|
||||
{ schema: 'PUBLIC', name: 'ORDER_SUMMARY', kind: 'view' as const },
|
||||
]),
|
||||
cleanup: vi.fn(async () => undefined),
|
||||
};
|
||||
return { createDriver: vi.fn(() => driver) };
|
||||
}
|
||||
|
||||
describe('KtxSnowflakeScanConnector', () => {
|
||||
it('resolves Snowflake connection configuration safely', () => {
|
||||
expect(
|
||||
isKtxSnowflakeConnectionConfig({
|
||||
driver: 'snowflake',
|
||||
account: 'acct',
|
||||
warehouse: 'WH',
|
||||
database: 'ANALYTICS',
|
||||
username: 'reader',
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(isKtxSnowflakeConnectionConfig({ driver: 'bigquery' })).toBe(false);
|
||||
expect(
|
||||
snowflakeConnectionConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'snowflake',
|
||||
authMethod: 'password',
|
||||
account: 'acct',
|
||||
warehouse: 'WH',
|
||||
database: 'ANALYTICS',
|
||||
schema_name: 'PUBLIC',
|
||||
username: 'reader',
|
||||
password: 'fixture-pass', // pragma: allowlist secret
|
||||
},
|
||||
}),
|
||||
).toMatchObject({
|
||||
account: 'acct',
|
||||
warehouse: 'WH',
|
||||
database: 'ANALYTICS',
|
||||
schemas: ['PUBLIC'],
|
||||
username: 'reader',
|
||||
authMethod: 'password',
|
||||
});
|
||||
});
|
||||
|
||||
it('introspects schema, primary keys, comments, row counts, and dimensions', async () => {
|
||||
const connector = new KtxSnowflakeScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'snowflake',
|
||||
authMethod: 'password',
|
||||
account: 'acct',
|
||||
warehouse: 'WH',
|
||||
database: 'ANALYTICS',
|
||||
schema_name: 'PUBLIC',
|
||||
username: 'reader',
|
||||
password: 'fixture-pass', // pragma: allowlist secret
|
||||
},
|
||||
driverFactory: fakeDriverFactory(),
|
||||
now: () => new Date('2026-04-29T18:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'snowflake' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'snowflake',
|
||||
extractedAt: '2026-04-29T18:00:00.000Z',
|
||||
scope: { catalogs: ['ANALYTICS'], schemas: ['PUBLIC'] },
|
||||
metadata: {
|
||||
account: 'acct',
|
||||
warehouse: 'WH',
|
||||
database: 'ANALYTICS',
|
||||
schemas: ['PUBLIC'],
|
||||
table_count: 2,
|
||||
total_columns: 3,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'ORDERS')?.columns).toEqual([
|
||||
{
|
||||
name: 'ID',
|
||||
nativeType: 'NUMBER(38,0)',
|
||||
normalizedType: 'NUMBER(38,0)',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'Primary key',
|
||||
},
|
||||
{
|
||||
name: 'STATUS',
|
||||
nativeType: 'VARCHAR',
|
||||
normalizedType: 'VARCHAR',
|
||||
dimensionType: 'string',
|
||||
nullable: true,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('supports read-only query, sampling, distinct values, row counts, schema listing, and cleanup', async () => {
|
||||
const driverFactory = fakeDriverFactory();
|
||||
const connector = new KtxSnowflakeScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'snowflake',
|
||||
authMethod: 'password',
|
||||
account: 'acct',
|
||||
warehouse: 'WH',
|
||||
database: 'ANALYTICS',
|
||||
schema_name: 'PUBLIC',
|
||||
username: 'reader',
|
||||
password: 'fixture-pass', // pragma: allowlist secret
|
||||
},
|
||||
driverFactory,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
table: { catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' },
|
||||
limit: 1,
|
||||
columns: ['ID', 'STATUS'],
|
||||
},
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ headers: ['ID', 'STATUS'], rows: [[1, 'paid']], totalRows: 1 });
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select ID, STATUS from ORDERS', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ headers: ['ID', 'STATUS'], rows: [[1, 'paid']], rowCount: 1 });
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
table: { catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' },
|
||||
column: 'STATUS',
|
||||
limit: 2,
|
||||
},
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
|
||||
await expect(
|
||||
connector.getColumnDistinctValues({ catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' }, 'STATUS', {
|
||||
maxCardinality: 10,
|
||||
limit: 5,
|
||||
}),
|
||||
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
|
||||
await expect(connector.getTableRowCount('ORDERS')).resolves.toBe(12);
|
||||
await expect(connector.listSchemas()).resolves.toEqual(['PUBLIC', 'MART']);
|
||||
await connector.cleanup();
|
||||
const driver = (driverFactory.createDriver as ReturnType<typeof vi.fn>).mock.results[0]?.value as KtxSnowflakeDriver;
|
||||
expect(driver.cleanup).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('converts a native snapshot into a live-database introspection snapshot', async () => {
|
||||
const introspection = createSnowflakeLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'snowflake',
|
||||
authMethod: 'password',
|
||||
account: 'acct',
|
||||
warehouse: 'WH',
|
||||
database: 'ANALYTICS',
|
||||
schema_name: 'PUBLIC',
|
||||
username: 'reader',
|
||||
password: 'fixture-pass', // pragma: allowlist secret
|
||||
},
|
||||
},
|
||||
driverFactory: fakeDriverFactory(),
|
||||
now: () => new Date('2026-04-29T18:00:00.000Z'),
|
||||
});
|
||||
|
||||
await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
metadata: { database: 'ANALYTICS', schemas: ['PUBLIC'] },
|
||||
tables: expect.arrayContaining([
|
||||
expect.objectContaining({ catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' }),
|
||||
]),
|
||||
});
|
||||
});
|
||||
});
|
||||
717
packages/cli/src/connectors/snowflake/connector.ts
Normal file
717
packages/cli/src/connectors/snowflake/connector.ts
Normal file
|
|
@ -0,0 +1,717 @@
|
|||
import { createPrivateKey } from 'node:crypto';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/index.js';
|
||||
import {
|
||||
createKtxConnectorCapabilities,
|
||||
type KtxColumnSampleInput,
|
||||
type KtxColumnSampleResult,
|
||||
type KtxColumnStatsInput,
|
||||
type KtxColumnStatsResult,
|
||||
type KtxQueryResult,
|
||||
type KtxReadOnlyQueryInput,
|
||||
type KtxScanConnector,
|
||||
type KtxScanContext,
|
||||
type KtxScanInput,
|
||||
type KtxSchemaColumn,
|
||||
type KtxSchemaSnapshot,
|
||||
type KtxSchemaTable,
|
||||
type KtxTableRef,
|
||||
type KtxTableSampleInput,
|
||||
type KtxTableListEntry,
|
||||
type KtxTableSampleResult,
|
||||
} from '../../context/scan/index.js';
|
||||
import * as snowflake from 'snowflake-sdk';
|
||||
import { KtxSnowflakeDialect } from './dialect.js';
|
||||
|
||||
export interface KtxSnowflakeConnectionConfig {
|
||||
driver?: string;
|
||||
authMethod?: 'password' | 'rsa';
|
||||
account?: string;
|
||||
warehouse?: string;
|
||||
database?: string;
|
||||
schema_name?: string;
|
||||
schema_names?: string[];
|
||||
username?: string;
|
||||
password?: string;
|
||||
privateKey?: string;
|
||||
passphrase?: string;
|
||||
role?: string;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeResolvedConnectionConfig {
|
||||
authMethod: 'password' | 'rsa';
|
||||
account: string;
|
||||
warehouse: string;
|
||||
database: string;
|
||||
schemas: string[];
|
||||
username: string;
|
||||
password?: string;
|
||||
privateKey?: string;
|
||||
passphrase?: string;
|
||||
role?: string;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeRawColumnMetadata {
|
||||
name: string;
|
||||
type: string;
|
||||
nullable: boolean;
|
||||
comment: string | null;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeRawTableMetadata {
|
||||
name: string;
|
||||
catalog: string;
|
||||
db: string;
|
||||
rowCount: number | null;
|
||||
comment: string | null;
|
||||
columns: KtxSnowflakeRawColumnMetadata[];
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeDriver {
|
||||
test(): Promise<{ success: boolean; error?: string }>;
|
||||
query(sql: string, params?: unknown): Promise<KtxQueryResult>;
|
||||
getSchemaMetadata(schemaName?: string): Promise<KtxSnowflakeRawTableMetadata[]>;
|
||||
listSchemas(): Promise<string[]>;
|
||||
listTables(schemas?: string[]): Promise<KtxTableListEntry[]>;
|
||||
cleanup(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeDriverFactory {
|
||||
createDriver(input: {
|
||||
resolved: KtxSnowflakeResolvedConnectionConfig;
|
||||
sdkOptionsProvider?: KtxSnowflakeSdkOptionsProvider;
|
||||
}): KtxSnowflakeDriver;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeSdkOptionsProvider {
|
||||
resolve(input: {
|
||||
account: string;
|
||||
connection: KtxSnowflakeConnectionConfig;
|
||||
}): Promise<{ sdkOptions: Record<string, unknown>; close?: () => Promise<void> } | undefined>;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeScanConnectorOptions {
|
||||
connectionId: string;
|
||||
connection: KtxSnowflakeConnectionConfig | undefined;
|
||||
driverFactory?: KtxSnowflakeDriverFactory;
|
||||
sdkOptionsProvider?: KtxSnowflakeSdkOptionsProvider;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeReadOnlyQueryInput extends KtxReadOnlyQueryInput {
|
||||
params?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
const DATE_TYPES = ['DATE', 'TIMESTAMP', 'TIMESTAMP_LTZ', 'TIMESTAMP_NTZ', 'TIMESTAMP_TZ', 'TIME'];
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxSnowflakeConnectionConfig | undefined,
|
||||
key: keyof KtxSnowflakeConnectionConfig,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function schemaNames(connection: KtxSnowflakeConnectionConfig, env: NodeJS.ProcessEnv): string[] {
|
||||
if (Array.isArray(connection.schema_names) && connection.schema_names.length > 0) {
|
||||
return connection.schema_names
|
||||
.filter((schema) => schema.trim().length > 0)
|
||||
.map((schema) => resolveStringReference(schema, env));
|
||||
}
|
||||
return [stringConfigValue(connection, 'schema_name', env) ?? 'PUBLIC'];
|
||||
}
|
||||
|
||||
function firstNumber(value: unknown): number | null {
|
||||
const numberValue = Number(value);
|
||||
return Number.isFinite(numberValue) ? numberValue : null;
|
||||
}
|
||||
|
||||
function normalizeSnowflakeValue(value: unknown, columnType?: string): unknown {
|
||||
if (columnType && DATE_TYPES.some((type) => columnType.toUpperCase().includes(type))) {
|
||||
if (typeof value === 'number') {
|
||||
return new Date(value).toISOString();
|
||||
}
|
||||
if (value instanceof Date) {
|
||||
return value.toISOString();
|
||||
}
|
||||
}
|
||||
if (typeof value === 'string') {
|
||||
const trimmed = value.trim();
|
||||
if ((trimmed.startsWith('{') && trimmed.endsWith('}')) || (trimmed.startsWith('[') && trimmed.endsWith(']'))) {
|
||||
try {
|
||||
return JSON.parse(trimmed) as unknown;
|
||||
} catch {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function toSnowflakeBind(value: unknown): snowflake.Bind {
|
||||
if (value === null || typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
|
||||
return value;
|
||||
}
|
||||
if (value instanceof Date) {
|
||||
return value.toISOString();
|
||||
}
|
||||
return String(value);
|
||||
}
|
||||
|
||||
function toSnowflakeBinds(params: unknown[] | undefined): snowflake.Binds | undefined {
|
||||
return params?.map((value) => toSnowflakeBind(value));
|
||||
}
|
||||
|
||||
export function isKtxSnowflakeConnectionConfig(
|
||||
connection: KtxSnowflakeConnectionConfig | undefined,
|
||||
): connection is KtxSnowflakeConnectionConfig {
|
||||
return String(connection?.driver ?? '').toLowerCase() === 'snowflake';
|
||||
}
|
||||
|
||||
export function snowflakeConnectionConfigFromConfig(input: {
|
||||
connectionId: string;
|
||||
connection: KtxSnowflakeConnectionConfig | undefined;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): KtxSnowflakeResolvedConnectionConfig {
|
||||
const inputDriver = input.connection?.driver ?? 'unknown';
|
||||
if (!isKtxSnowflakeConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native Snowflake connector cannot run driver "${inputDriver}"`);
|
||||
}
|
||||
const env = input.env ?? process.env;
|
||||
const authMethod = input.connection?.authMethod ?? 'password';
|
||||
const account = stringConfigValue(input.connection, 'account', env);
|
||||
const warehouse = stringConfigValue(input.connection, 'warehouse', env);
|
||||
const database = stringConfigValue(input.connection, 'database', env);
|
||||
const username = stringConfigValue(input.connection, 'username', env);
|
||||
if (!account) {
|
||||
throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.account`);
|
||||
}
|
||||
if (!warehouse) {
|
||||
throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.warehouse`);
|
||||
}
|
||||
if (!database) {
|
||||
throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.database`);
|
||||
}
|
||||
if (!username) {
|
||||
throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.username`);
|
||||
}
|
||||
const resolved: KtxSnowflakeResolvedConnectionConfig = {
|
||||
authMethod,
|
||||
account,
|
||||
warehouse,
|
||||
database,
|
||||
schemas: schemaNames(input.connection!, env),
|
||||
username,
|
||||
};
|
||||
const role = stringConfigValue(input.connection, 'role', env);
|
||||
if (role) {
|
||||
resolved.role = role;
|
||||
}
|
||||
if (authMethod === 'rsa') {
|
||||
resolved.privateKey = stringConfigValue(input.connection, 'privateKey', env);
|
||||
const passphrase = stringConfigValue(input.connection, 'passphrase', env);
|
||||
if (passphrase) {
|
||||
resolved.passphrase = passphrase;
|
||||
}
|
||||
if (!resolved.privateKey) {
|
||||
throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.privateKey for RSA auth`);
|
||||
}
|
||||
} else {
|
||||
resolved.password = stringConfigValue(input.connection, 'password', env);
|
||||
if (!resolved.password) {
|
||||
throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.password`);
|
||||
}
|
||||
}
|
||||
return resolved;
|
||||
}
|
||||
|
||||
class DefaultSnowflakeDriverFactory implements KtxSnowflakeDriverFactory {
|
||||
createDriver(input: {
|
||||
resolved: KtxSnowflakeResolvedConnectionConfig;
|
||||
sdkOptionsProvider?: KtxSnowflakeSdkOptionsProvider;
|
||||
}): KtxSnowflakeDriver {
|
||||
return new SnowflakeSdkDriver(input.resolved, input.sdkOptionsProvider);
|
||||
}
|
||||
}
|
||||
|
||||
class SnowflakeSdkDriver implements KtxSnowflakeDriver {
|
||||
private closeSdkOptions: Array<() => Promise<void>> = [];
|
||||
|
||||
constructor(
|
||||
private readonly resolved: KtxSnowflakeResolvedConnectionConfig,
|
||||
private readonly sdkOptionsProvider?: KtxSnowflakeSdkOptionsProvider,
|
||||
) {}
|
||||
|
||||
async test(): Promise<{ success: boolean; error?: string }> {
|
||||
const timeoutMs = 60_000;
|
||||
return Promise.race([
|
||||
this.runTest(),
|
||||
new Promise<{ success: boolean; error: string }>((resolveTest) =>
|
||||
setTimeout(
|
||||
() => resolveTest({ success: false, error: `Connection test timed out after ${timeoutMs / 1000}s` }),
|
||||
timeoutMs,
|
||||
),
|
||||
),
|
||||
]);
|
||||
}
|
||||
|
||||
async query(sql: string, params?: unknown): Promise<KtxQueryResult> {
|
||||
let connection: snowflake.Connection | null = null;
|
||||
try {
|
||||
connection = await this.createConnection();
|
||||
const binds = Array.isArray(params) ? toSnowflakeBinds(params) : undefined;
|
||||
const result = await this.executeSnowflakeQuery(connection, sql, binds);
|
||||
return { ...result, totalRows: result.rows.length, rowCount: result.rows.length };
|
||||
} finally {
|
||||
if (connection) {
|
||||
await this.destroyConnection(connection);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async getSchemaMetadata(schemaName = this.resolved.schemas[0] ?? 'PUBLIC'): Promise<KtxSnowflakeRawTableMetadata[]> {
|
||||
const tablesResult = await this.query(
|
||||
`
|
||||
SELECT TABLE_NAME, TABLE_TYPE, COMMENT, ROW_COUNT
|
||||
FROM INFORMATION_SCHEMA.TABLES
|
||||
WHERE TABLE_SCHEMA = ? AND TABLE_CATALOG = ?
|
||||
ORDER BY TABLE_NAME
|
||||
`,
|
||||
[schemaName, this.resolved.database],
|
||||
);
|
||||
const columnsResult = await this.query(
|
||||
`
|
||||
SELECT TABLE_NAME, COLUMN_NAME, DATA_TYPE, IS_NULLABLE, COMMENT, ORDINAL_POSITION
|
||||
FROM INFORMATION_SCHEMA.COLUMNS
|
||||
WHERE TABLE_SCHEMA = ? AND TABLE_CATALOG = ?
|
||||
ORDER BY TABLE_NAME, ORDINAL_POSITION
|
||||
`,
|
||||
[schemaName, this.resolved.database],
|
||||
);
|
||||
const columnsByTable = new Map<string, KtxSnowflakeRawColumnMetadata[]>();
|
||||
for (const row of columnsResult.rows) {
|
||||
const tableName = String(row[0]);
|
||||
const columns = columnsByTable.get(tableName) ?? [];
|
||||
columns.push({
|
||||
name: String(row[1]),
|
||||
type: String(row[2]),
|
||||
nullable: row[3] === 'YES',
|
||||
comment: row[4] ? String(row[4]) : null,
|
||||
});
|
||||
columnsByTable.set(tableName, columns);
|
||||
}
|
||||
return tablesResult.rows.map((row) => ({
|
||||
name: String(row[0]),
|
||||
catalog: this.resolved.database,
|
||||
db: schemaName,
|
||||
rowCount: firstNumber(row[3]) ?? 0,
|
||||
comment: row[2] ? String(row[2]) : null,
|
||||
columns: columnsByTable.get(String(row[0])) ?? [],
|
||||
}));
|
||||
}
|
||||
|
||||
async listSchemas(): Promise<string[]> {
|
||||
const result = await this.query(`SHOW SCHEMAS IN DATABASE "${this.resolved.database}"`);
|
||||
return result.rows.map((row) => String(row[1])).filter((name) => name !== 'INFORMATION_SCHEMA');
|
||||
}
|
||||
|
||||
async listTables(schemas?: string[]): Promise<KtxTableListEntry[]> {
|
||||
const filterSchemas = schemas ?? (await this.listSchemas());
|
||||
if (filterSchemas.length === 0) return [];
|
||||
const entries: KtxTableListEntry[] = [];
|
||||
for (const schemaName of filterSchemas) {
|
||||
const result = await this.query(
|
||||
`
|
||||
SELECT TABLE_NAME, TABLE_TYPE
|
||||
FROM INFORMATION_SCHEMA.TABLES
|
||||
WHERE TABLE_SCHEMA = ? AND TABLE_CATALOG = ?
|
||||
ORDER BY TABLE_NAME
|
||||
`,
|
||||
[schemaName, this.resolved.database],
|
||||
);
|
||||
for (const row of result.rows) {
|
||||
entries.push({
|
||||
schema: schemaName,
|
||||
name: String(row[0]),
|
||||
kind: String(row[1]) === 'VIEW' ? 'view' : 'table',
|
||||
});
|
||||
}
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
const closers = this.closeSdkOptions;
|
||||
this.closeSdkOptions = [];
|
||||
await Promise.all(closers.map((close) => close()));
|
||||
}
|
||||
|
||||
private async runTest(): Promise<{ success: boolean; error?: string }> {
|
||||
let connection: snowflake.Connection | null = null;
|
||||
try {
|
||||
connection = await this.createConnection();
|
||||
await this.executeSnowflakeQuery(connection, 'SELECT 1');
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
} finally {
|
||||
if (connection) {
|
||||
await this.destroyConnection(connection);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async createConnection(): Promise<snowflake.Connection> {
|
||||
const patch = await this.sdkOptionsProvider?.resolve({
|
||||
account: this.resolved.account,
|
||||
connection: { ...this.resolved, driver: 'snowflake' },
|
||||
});
|
||||
if (patch?.close) {
|
||||
this.closeSdkOptions.push(patch.close);
|
||||
}
|
||||
const baseConfig: snowflake.ConnectionOptions = {
|
||||
account: this.resolved.account,
|
||||
username: this.resolved.username,
|
||||
warehouse: this.resolved.warehouse,
|
||||
database: this.resolved.database,
|
||||
schema: this.resolved.schemas[0] ?? 'PUBLIC',
|
||||
role: this.resolved.role,
|
||||
...patch?.sdkOptions,
|
||||
};
|
||||
const connectionConfig: snowflake.ConnectionOptions =
|
||||
this.resolved.authMethod === 'rsa'
|
||||
? { ...baseConfig, authenticator: 'SNOWFLAKE_JWT', privateKey: this.decryptPrivateKey() }
|
||||
: { ...baseConfig, password: this.resolved.password };
|
||||
const connection = snowflake.createConnection(connectionConfig);
|
||||
return new Promise((resolveConnection, rejectConnection) => {
|
||||
connection.connect((error, connected) => {
|
||||
if (error) {
|
||||
rejectConnection(error);
|
||||
return;
|
||||
}
|
||||
const resolvedConnection = connected ?? connection;
|
||||
this.setConnectionContext(resolvedConnection).then(
|
||||
() => resolveConnection(resolvedConnection),
|
||||
(contextError) => {
|
||||
resolvedConnection.destroy(() => undefined);
|
||||
rejectConnection(contextError);
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
private async setConnectionContext(connection: snowflake.Connection): Promise<void> {
|
||||
if (this.resolved.role) {
|
||||
await this.executeSnowflakeQuery(connection, `USE ROLE "${this.resolved.role}"`);
|
||||
}
|
||||
await this.executeSnowflakeQuery(connection, `USE WAREHOUSE "${this.resolved.warehouse}"`);
|
||||
await this.executeSnowflakeQuery(connection, `USE DATABASE "${this.resolved.database}"`);
|
||||
await this.executeSnowflakeQuery(connection, `USE SCHEMA "${this.resolved.schemas[0] ?? 'PUBLIC'}"`);
|
||||
}
|
||||
|
||||
private async executeSnowflakeQuery(
|
||||
connection: snowflake.Connection,
|
||||
sqlText: string,
|
||||
binds?: snowflake.Binds,
|
||||
): Promise<{ headers: string[]; headerTypes?: string[]; rows: unknown[][] }> {
|
||||
return new Promise((resolveQuery, rejectQuery) => {
|
||||
connection.execute({
|
||||
sqlText,
|
||||
binds,
|
||||
complete: (error, statement, rows) => {
|
||||
if (error) {
|
||||
rejectQuery(error);
|
||||
return;
|
||||
}
|
||||
const columns = statement.getColumns();
|
||||
const headers = columns ? columns.map((column) => column.getName()) : [];
|
||||
const headerTypes = columns ? columns.map((column) => column.getType()) : [];
|
||||
const normalizedRows = rows
|
||||
? rows.map((row) => headers.map((header, index) => normalizeSnowflakeValue(row[header], headerTypes[index])))
|
||||
: [];
|
||||
resolveQuery({ headers, headerTypes, rows: normalizedRows });
|
||||
},
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
private destroyConnection(connection: snowflake.Connection): Promise<void> {
|
||||
return new Promise((resolveDestroy, rejectDestroy) => {
|
||||
connection.destroy((error) => {
|
||||
if (error) {
|
||||
rejectDestroy(error);
|
||||
return;
|
||||
}
|
||||
resolveDestroy();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
private decryptPrivateKey(): string {
|
||||
if (!this.resolved.privateKey) {
|
||||
throw new Error('Private key is required for RSA authentication');
|
||||
}
|
||||
const privateKeyObject = createPrivateKey({
|
||||
key: this.resolved.privateKey,
|
||||
format: 'pem',
|
||||
...(this.resolved.passphrase ? { passphrase: this.resolved.passphrase } : {}),
|
||||
});
|
||||
return privateKeyObject.export({ format: 'pem', type: 'pkcs8' }) as string;
|
||||
}
|
||||
}
|
||||
|
||||
export class KtxSnowflakeScanConnector implements KtxScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'snowflake' as const;
|
||||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: false,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: true,
|
||||
formalForeignKeys: false,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly resolved: KtxSnowflakeResolvedConnectionConfig;
|
||||
private readonly driverFactory: KtxSnowflakeDriverFactory;
|
||||
private readonly dialect = new KtxSnowflakeDialect();
|
||||
private readonly now: () => Date;
|
||||
private driverInstance: KtxSnowflakeDriver | null = null;
|
||||
|
||||
constructor(private readonly options: KtxSnowflakeScanConnectorOptions) {
|
||||
this.resolved = snowflakeConnectionConfigFromConfig(options);
|
||||
this.driverFactory = options.driverFactory ?? new DefaultSnowflakeDriverFactory();
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.id = `snowflake:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
return this.getDriver().test();
|
||||
}
|
||||
|
||||
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const tables: KtxSchemaTable[] = [];
|
||||
for (const schemaName of this.resolved.schemas) {
|
||||
const rawTables = await this.getDriver().getSchemaMetadata(schemaName);
|
||||
const primaryKeys = await this.primaryKeys(rawTables.map((table) => table.name), schemaName);
|
||||
tables.push(...rawTables.map((table) => this.toSchemaTable(table, primaryKeys)));
|
||||
}
|
||||
return {
|
||||
connectionId: this.options.connectionId,
|
||||
driver: 'snowflake',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: { catalogs: [this.resolved.database], schemas: this.resolved.schemas },
|
||||
metadata: {
|
||||
account: this.resolved.account,
|
||||
warehouse: this.resolved.warehouse,
|
||||
database: this.resolved.database,
|
||||
schemas: this.resolved.schemas,
|
||||
table_count: tables.length,
|
||||
total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxTableSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.getDriver().query(
|
||||
this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns),
|
||||
);
|
||||
return { headers: result.headers, rows: result.rows, totalRows: result.totalRows };
|
||||
}
|
||||
|
||||
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.getDriver().query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
return {
|
||||
values: result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]),
|
||||
nullCount: null,
|
||||
distinctCount: null,
|
||||
};
|
||||
}
|
||||
|
||||
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
return null;
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxSnowflakeReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
|
||||
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
|
||||
return this.getDriver().query(prepared.sql, prepared.params);
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KtxTableRef,
|
||||
columnName: string,
|
||||
options: KtxSnowflakeColumnDistinctValuesOptions,
|
||||
): Promise<KtxSnowflakeColumnDistinctValuesResult | null> {
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinality = await this.singleNumber(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, options.sampleSize ?? 10000),
|
||||
'CARDINALITY',
|
||||
);
|
||||
if (cardinality === null) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valueRows = await this.queryRaw<Record<string, unknown>>(
|
||||
this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit),
|
||||
);
|
||||
return { values: valueRows.map((row) => String(row.VAL ?? row.val)).filter((value) => value !== 'null'), cardinality };
|
||||
}
|
||||
|
||||
async getTableRowCount(tableName: string, schemaName = this.resolved.schemas[0] ?? 'PUBLIC'): Promise<number> {
|
||||
const tables = await this.getDriver().getSchemaMetadata(schemaName);
|
||||
return tables.find((table) => table.name === tableName)?.rowCount ?? 0;
|
||||
}
|
||||
|
||||
qTableName(table: Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
listSchemas(): Promise<string[]> {
|
||||
return this.getDriver().listSchemas();
|
||||
}
|
||||
|
||||
listTables(schemas?: string[]): Promise<KtxTableListEntry[]> {
|
||||
return this.getDriver().listTables(schemas);
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
if (this.driverInstance) {
|
||||
await this.driverInstance.cleanup();
|
||||
this.driverInstance = null;
|
||||
}
|
||||
}
|
||||
|
||||
private getDriver(): KtxSnowflakeDriver {
|
||||
if (!this.driverInstance) {
|
||||
this.driverInstance = this.driverFactory.createDriver({
|
||||
resolved: this.resolved,
|
||||
sdkOptionsProvider: this.options.sdkOptionsProvider,
|
||||
});
|
||||
}
|
||||
return this.driverInstance;
|
||||
}
|
||||
|
||||
private async primaryKeys(tableNames: string[], schemaName: string): Promise<Map<string, Set<string>>> {
|
||||
if (tableNames.length === 0) {
|
||||
return new Map();
|
||||
}
|
||||
const result = await this.getDriver().query(
|
||||
`
|
||||
SELECT tc.TABLE_NAME, kcu.COLUMN_NAME
|
||||
FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
|
||||
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu
|
||||
ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME
|
||||
AND tc.TABLE_SCHEMA = kcu.TABLE_SCHEMA
|
||||
AND tc.TABLE_CATALOG = kcu.TABLE_CATALOG
|
||||
WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY'
|
||||
AND tc.TABLE_SCHEMA = ?
|
||||
AND tc.TABLE_CATALOG = ?
|
||||
ORDER BY tc.TABLE_NAME, kcu.ORDINAL_POSITION
|
||||
`,
|
||||
[schemaName, this.resolved.database],
|
||||
);
|
||||
const grouped = new Map<string, Set<string>>();
|
||||
for (const tableName of tableNames) {
|
||||
grouped.set(tableName, new Set());
|
||||
}
|
||||
for (const row of result.rows) {
|
||||
const tableName = String(row[0]);
|
||||
const columnName = String(row[1]);
|
||||
grouped.get(tableName)?.add(columnName);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
private toSchemaTable(table: KtxSnowflakeRawTableMetadata, primaryKeys: Map<string, Set<string>>): KtxSchemaTable {
|
||||
return {
|
||||
catalog: table.catalog,
|
||||
db: table.db,
|
||||
name: table.name,
|
||||
kind: 'table',
|
||||
comment: table.comment,
|
||||
estimatedRows: table.rowCount,
|
||||
columns: table.columns.map((column) => this.toSchemaColumn(table.name, column, primaryKeys)),
|
||||
foreignKeys: [],
|
||||
};
|
||||
}
|
||||
|
||||
private toSchemaColumn(
|
||||
tableName: string,
|
||||
column: KtxSnowflakeRawColumnMetadata,
|
||||
primaryKeys: Map<string, Set<string>>,
|
||||
): KtxSchemaColumn {
|
||||
return {
|
||||
name: column.name,
|
||||
nativeType: column.type,
|
||||
normalizedType: this.dialect.mapDataType(column.type),
|
||||
dimensionType: this.dialect.mapToDimensionType(column.type),
|
||||
nullable: column.nullable,
|
||||
primaryKey: primaryKeys.get(tableName)?.has(column.name) ?? false,
|
||||
comment: column.comment,
|
||||
};
|
||||
}
|
||||
|
||||
private async queryRaw<T extends Record<string, unknown>>(sql: string, params?: unknown): Promise<T[]> {
|
||||
const result = await this.getDriver().query(sql, params);
|
||||
return result.rows.map((row) => Object.fromEntries(result.headers.map((header, index) => [header, row[index]])) as T);
|
||||
}
|
||||
|
||||
private async singleNumber(sql: string, header: string): Promise<number | null> {
|
||||
const rows = await this.queryRaw<Record<string, unknown>>(sql);
|
||||
return firstNumber(rows[0]?.[header] ?? rows[0]?.[header.toLowerCase()]);
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.options.connectionId) {
|
||||
throw new Error(`Snowflake connector ${this.options.connectionId} cannot scan connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
50
packages/cli/src/connectors/snowflake/dialect.test.ts
Normal file
50
packages/cli/src/connectors/snowflake/dialect.test.ts
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KtxSnowflakeDialect } from './dialect.js';
|
||||
|
||||
describe('KtxSnowflakeDialect', () => {
|
||||
const dialect = new KtxSnowflakeDialect();
|
||||
|
||||
it('quotes identifiers and formats database.schema.table names', () => {
|
||||
expect(dialect.quoteIdentifier('order"items')).toBe('"order""items"');
|
||||
expect(dialect.formatTableName({ catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' })).toBe(
|
||||
'"ANALYTICS"."PUBLIC"."ORDERS"',
|
||||
);
|
||||
expect(dialect.formatTableName({ db: 'PUBLIC', name: 'ORDERS' })).toBe('"PUBLIC"."ORDERS"');
|
||||
expect(dialect.formatTableName({ name: 'ORDERS' })).toBe('"ORDERS"');
|
||||
});
|
||||
|
||||
it('maps native Snowflake types to scan dimensions', () => {
|
||||
expect(dialect.mapDataType('NUMBER(38,0)')).toBe('NUMBER(38,0)');
|
||||
expect(dialect.mapToDimensionType('TIMESTAMP_NTZ')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('NUMBER(38,0)')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('BOOLEAN')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('VARIANT')).toBe('string');
|
||||
});
|
||||
|
||||
it('generates sampling and dictionary SQL', () => {
|
||||
expect(dialect.generateSampleQuery('"PUBLIC"."ORDERS"', 5, ['ID', 'STATUS'])).toBe(
|
||||
'SELECT "ID", "STATUS" FROM "PUBLIC"."ORDERS" SAMPLE ROW (5 ROWS)',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('"PUBLIC"."ORDERS"', 'STATUS', 10)).toBe(
|
||||
'SELECT "STATUS" FROM "PUBLIC"."ORDERS" WHERE "STATUS" IS NOT NULL AND TRIM(CAST("STATUS" AS STRING)) != \'\' LIMIT 10',
|
||||
);
|
||||
expect(dialect.generateCardinalitySampleQuery('"PUBLIC"."ORDERS"', '"STATUS"', 100)).toContain(
|
||||
'SELECT COUNT(DISTINCT val) AS cardinality',
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('"PUBLIC"."ORDERS"', '"STATUS"', 20)).toContain(
|
||||
'SELECT DISTINCT "STATUS"::VARCHAR AS val',
|
||||
);
|
||||
});
|
||||
|
||||
it('passes Snowflake positional parameters as bind arrays', () => {
|
||||
expect(dialect.prepareQuery('SELECT * FROM ORDERS WHERE ID = ? AND STATUS = ?', { id: 1, status: 'paid' })).toEqual({
|
||||
sql: 'SELECT * FROM ORDERS WHERE ID = ? AND STATUS = ?',
|
||||
params: [1, 'paid'],
|
||||
});
|
||||
expect(dialect.prepareQuery('SELECT * FROM ORDERS')).toEqual({ sql: 'SELECT * FROM ORDERS', params: undefined });
|
||||
});
|
||||
|
||||
it('keeps unsupported statistics explicit', () => {
|
||||
expect(dialect.generateColumnStatisticsQuery('PUBLIC', 'ORDERS')).toBeNull();
|
||||
});
|
||||
});
|
||||
187
packages/cli/src/connectors/snowflake/dialect.ts
Normal file
187
packages/cli/src/connectors/snowflake/dialect.ts
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/index.js';
|
||||
|
||||
type SnowflakeTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxSnowflakeDialect {
|
||||
readonly type = 'snowflake';
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
TIMESTAMP_NTZ: 'time',
|
||||
TIMESTAMP_LTZ: 'time',
|
||||
TIMESTAMP_TZ: 'time',
|
||||
TIMESTAMP: 'time',
|
||||
DATE: 'time',
|
||||
TIME: 'time',
|
||||
NUMBER: 'number',
|
||||
DECIMAL: 'number',
|
||||
NUMERIC: 'number',
|
||||
INT: 'number',
|
||||
INTEGER: 'number',
|
||||
BIGINT: 'number',
|
||||
SMALLINT: 'number',
|
||||
TINYINT: 'number',
|
||||
BYTEINT: 'number',
|
||||
FLOAT: 'number',
|
||||
FLOAT4: 'number',
|
||||
FLOAT8: 'number',
|
||||
DOUBLE: 'number',
|
||||
'DOUBLE PRECISION': 'number',
|
||||
REAL: 'number',
|
||||
VARCHAR: 'string',
|
||||
CHAR: 'string',
|
||||
CHARACTER: 'string',
|
||||
STRING: 'string',
|
||||
TEXT: 'string',
|
||||
BINARY: 'string',
|
||||
VARBINARY: 'string',
|
||||
BOOLEAN: 'boolean',
|
||||
VARIANT: 'string',
|
||||
OBJECT: 'string',
|
||||
ARRAY: 'string',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `"${identifier.replace(/"/g, '""')}"`;
|
||||
}
|
||||
|
||||
formatTableName(table: SnowflakeTableNameRef): string {
|
||||
if (table.catalog && table.db) {
|
||||
return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
|
||||
}
|
||||
if (table.db) {
|
||||
return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
|
||||
}
|
||||
return this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
return nativeType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
const upper = nativeType.toUpperCase().trim();
|
||||
const normalized = upper.includes('(') ? upper.split('(')[0]! : upper;
|
||||
if (this.typeMappings[normalized]) {
|
||||
return this.typeMappings[normalized];
|
||||
}
|
||||
if (normalized.includes('TIME') || normalized.includes('DATE')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalized.includes('INT') ||
|
||||
normalized.includes('NUM') ||
|
||||
normalized.includes('DEC') ||
|
||||
normalized.includes('FLOAT') ||
|
||||
normalized.includes('DOUBLE')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalized.includes('BOOL')) {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT ${columnList} FROM ${tableName} SAMPLE ROW (${limit} ROWS)`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quotedColumn = this.quoteIdentifier(columnName);
|
||||
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS STRING)) != '' LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown[] } {
|
||||
return { sql, params: params ? Object.values(params) : undefined };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `UNIFORM(0::FLOAT, 1::FLOAT, RANDOM()) < ${samplePct}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `SAMPLE (${samplePct * 100})`;
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `COUNT_IF(${column} IS NULL)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `APPROX_COUNT_DISTINCT(${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT DISTINCT ${columnName}::VARCHAR AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY val
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName} SAMPLE ROW (${sampleSize} ROWS)
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
timezone?: string,
|
||||
): string {
|
||||
const target = timezone ? `CONVERT_TIMEZONE('UTC', '${timezone}', ${column})` : column;
|
||||
return `DATE_TRUNC('${granularity}', ${target})`;
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
|
||||
const target = timezone ? `CONVERT_TIMEZONE('UTC', '${timezone}', ${column})` : column;
|
||||
const [amount, unit] = interval.split(' ');
|
||||
const originExpr = origin ? `'${origin}'::TIMESTAMP` : `'1970-01-01'::TIMESTAMP`;
|
||||
return `DATEADD(${unit}, FLOOR(DATEDIFF(${unit}, ${originExpr}, ${target}) / ${amount}) * ${amount}, ${originExpr})`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
return `INTERVAL '${interval}'`;
|
||||
}
|
||||
}
|
||||
18
packages/cli/src/connectors/snowflake/index.ts
Normal file
18
packages/cli/src/connectors/snowflake/index.ts
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
export { KtxSnowflakeDialect } from './dialect.js';
|
||||
export {
|
||||
isKtxSnowflakeConnectionConfig,
|
||||
KtxSnowflakeScanConnector,
|
||||
snowflakeConnectionConfigFromConfig,
|
||||
type KtxSnowflakeColumnDistinctValuesOptions,
|
||||
type KtxSnowflakeColumnDistinctValuesResult,
|
||||
type KtxSnowflakeConnectionConfig,
|
||||
type KtxSnowflakeDriver,
|
||||
type KtxSnowflakeDriverFactory,
|
||||
type KtxSnowflakeRawColumnMetadata,
|
||||
type KtxSnowflakeRawTableMetadata,
|
||||
type KtxSnowflakeReadOnlyQueryInput,
|
||||
type KtxSnowflakeResolvedConnectionConfig,
|
||||
type KtxSnowflakeScanConnectorOptions,
|
||||
type KtxSnowflakeSdkOptionsProvider,
|
||||
} from './connector.js';
|
||||
export { createSnowflakeLiveDatabaseIntrospection } from './live-database-introspection.js';
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/index.js';
|
||||
import type { KtxProjectConnectionConfig } from '../../context/project/index.js';
|
||||
import {
|
||||
KtxSnowflakeScanConnector,
|
||||
type KtxSnowflakeConnectionConfig,
|
||||
type KtxSnowflakeDriverFactory,
|
||||
type KtxSnowflakeSdkOptionsProvider,
|
||||
} from './connector.js';
|
||||
|
||||
interface CreateSnowflakeLiveDatabaseIntrospectionOptions {
|
||||
connections: Record<string, KtxProjectConnectionConfig>;
|
||||
driverFactory?: KtxSnowflakeDriverFactory;
|
||||
sdkOptionsProvider?: KtxSnowflakeSdkOptionsProvider;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createSnowflakeLiveDatabaseIntrospection(
|
||||
options: CreateSnowflakeLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KtxSnowflakeConnectionConfig | undefined;
|
||||
const connector = new KtxSnowflakeScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
driverFactory: options.driverFactory,
|
||||
sdkOptionsProvider: options.sdkOptionsProvider,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect(
|
||||
{ connectionId, driver: 'snowflake' },
|
||||
{ runId: `snowflake-${connectionId}` },
|
||||
);
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
260
packages/cli/src/connectors/sqlite/connector.test.ts
Normal file
260
packages/cli/src/connectors/sqlite/connector.test.ts
Normal file
|
|
@ -0,0 +1,260 @@
|
|||
import Database from 'better-sqlite3';
|
||||
import { writeFileSync } from 'node:fs';
|
||||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import {
|
||||
createSqliteLiveDatabaseIntrospection,
|
||||
isKtxSqliteConnectionConfig,
|
||||
KtxSqliteScanConnector,
|
||||
sqliteDatabasePathFromConfig,
|
||||
} from './index.js';
|
||||
|
||||
describe('KtxSqliteScanConnector', () => {
|
||||
let tempDir: string;
|
||||
let dbPath: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-connector-sqlite-'));
|
||||
dbPath = join(tempDir, 'warehouse.db');
|
||||
const db = new Database(dbPath);
|
||||
db.exec(`
|
||||
PRAGMA foreign_keys = ON;
|
||||
CREATE TABLE customers (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
tier TEXT
|
||||
);
|
||||
CREATE TABLE orders (
|
||||
id INTEGER PRIMARY KEY,
|
||||
customer_id INTEGER NOT NULL,
|
||||
status TEXT,
|
||||
total NUMERIC,
|
||||
created_at TEXT,
|
||||
FOREIGN KEY(customer_id) REFERENCES customers(id)
|
||||
);
|
||||
CREATE VIEW recent_orders AS SELECT id, customer_id, status FROM orders;
|
||||
INSERT INTO customers (id, name, tier) VALUES (1, 'Ada', 'enterprise'), (2, 'Grace', 'growth');
|
||||
INSERT INTO orders (id, customer_id, status, total, created_at)
|
||||
VALUES (10, 1, 'paid', 42.5, '2026-04-28'), (11, 2, 'open', 9.5, '2026-04-29');
|
||||
`);
|
||||
db.close();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('resolves SQLite path configuration safely', () => {
|
||||
const originalDatabaseUrl = process.env.KTX_SQLITE_TEST_URL;
|
||||
const pointerPath = join(tempDir, 'sqlite-path.txt');
|
||||
process.env.KTX_SQLITE_TEST_URL = `sqlite:${dbPath}`;
|
||||
writeFileSync(pointerPath, dbPath, 'utf-8');
|
||||
|
||||
try {
|
||||
expect(isKtxSqliteConnectionConfig({ driver: 'sqlite', path: 'warehouse.db' })).toBe(true);
|
||||
expect(isKtxSqliteConnectionConfig({ driver: 'postgres', url: 'env:DATABASE_URL' })).toBe(false);
|
||||
expect(
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', path: 'warehouse.db' },
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
expect(
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', url: 'env:KTX_SQLITE_TEST_URL' },
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
expect(
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', url: `file://${dbPath}` },
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
expect(
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', path: `file:${pointerPath}` },
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
expect(
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', path: 'warehouse.db' },
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
expect(() =>
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', file_path: 'warehouse.db' },
|
||||
}),
|
||||
).toThrow('Native SQLite connector requires connections.warehouse.path or url');
|
||||
} finally {
|
||||
if (originalDatabaseUrl === undefined) {
|
||||
delete process.env.KTX_SQLITE_TEST_URL;
|
||||
} else {
|
||||
process.env.KTX_SQLITE_TEST_URL = originalDatabaseUrl;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('introspects schema, primary keys, row counts, views, and foreign keys', async () => {
|
||||
const connector = new KtxSqliteScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'sqlite', path: dbPath },
|
||||
now: () => new Date('2026-04-29T10:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'sqlite' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'sqlite',
|
||||
extractedAt: '2026-04-29T10:00:00.000Z',
|
||||
metadata: {
|
||||
file_path: dbPath,
|
||||
table_count: 3,
|
||||
total_columns: 11,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows])).toEqual([
|
||||
['customers', 'table', 2],
|
||||
['orders', 'table', 2],
|
||||
['recent_orders', 'view', null],
|
||||
]);
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
|
||||
name: 'id',
|
||||
nativeType: 'INTEGER',
|
||||
normalizedType: 'INTEGER',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
|
||||
{
|
||||
fromColumn: 'customer_id',
|
||||
toCatalog: null,
|
||||
toDb: null,
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
constraintName: null,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('runs samples, distinct values, statistics, and read-only SQL', async () => {
|
||||
const connector = new KtxSqliteScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'sqlite', path: dbPath },
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, columns: ['id'], limit: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({ headers: ['id'], rows: [[10]], totalRows: 1 });
|
||||
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, column: 'status', limit: 5 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
|
||||
|
||||
await expect(
|
||||
connector.getColumnDistinctValues(
|
||||
{ catalog: null, db: null, name: 'orders' },
|
||||
'status',
|
||||
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
|
||||
),
|
||||
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from orders order by id', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
|
||||
await expect(
|
||||
connector.columnStats(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, column: 'status' },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toBeNull();
|
||||
});
|
||||
|
||||
it('adapts native SQLite snapshots to live-database introspection for local ingest', async () => {
|
||||
const introspection = createSqliteLiveDatabaseIntrospection({
|
||||
projectDir: tempDir,
|
||||
connections: {
|
||||
warehouse: { driver: 'sqlite', path: 'warehouse.db' },
|
||||
},
|
||||
now: () => new Date('2026-04-29T10:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
extractedAt: '2026-04-29T10:00:00.000Z',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: null,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'INTEGER',
|
||||
normalizedType: 'INTEGER',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: null,
|
||||
},
|
||||
{
|
||||
name: 'name',
|
||||
nativeType: 'TEXT',
|
||||
normalizedType: 'TEXT',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
{
|
||||
name: 'tier',
|
||||
nativeType: 'TEXT',
|
||||
normalizedType: 'TEXT',
|
||||
dimensionType: 'string',
|
||||
nullable: true,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'orders')).toMatchObject({
|
||||
name: 'orders',
|
||||
catalog: null,
|
||||
db: null,
|
||||
foreignKeys: [{ fromColumn: 'customer_id', toTable: 'customers', toColumn: 'id' }],
|
||||
});
|
||||
});
|
||||
});
|
||||
368
packages/cli/src/connectors/sqlite/connector.ts
Normal file
368
packages/cli/src/connectors/sqlite/connector.ts
Normal file
|
|
@ -0,0 +1,368 @@
|
|||
import Database from 'better-sqlite3';
|
||||
import { existsSync, readFileSync, statSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { isAbsolute, resolve } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { assertReadOnlySql, limitSqlForExecution, normalizeQueryRows } from '../../context/connections/index.js';
|
||||
import {
|
||||
createKtxConnectorCapabilities,
|
||||
type KtxColumnSampleInput,
|
||||
type KtxColumnSampleResult,
|
||||
type KtxColumnStatsInput,
|
||||
type KtxColumnStatsResult,
|
||||
type KtxQueryResult,
|
||||
type KtxReadOnlyQueryInput,
|
||||
type KtxScanConnector,
|
||||
type KtxScanContext,
|
||||
type KtxScanInput,
|
||||
type KtxSchemaForeignKey,
|
||||
type KtxSchemaSnapshot,
|
||||
type KtxSchemaTable,
|
||||
type KtxTableRef,
|
||||
type KtxTableSampleInput,
|
||||
type KtxTableSampleResult,
|
||||
} from '../../context/scan/index.js';
|
||||
import { KtxSqliteDialect } from './dialect.js';
|
||||
|
||||
export interface KtxSqliteConnectionConfig {
|
||||
driver?: string;
|
||||
path?: string;
|
||||
url?: string;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface SqliteDatabasePathInput {
|
||||
connectionId: string;
|
||||
projectDir?: string;
|
||||
connection: KtxSqliteConnectionConfig | undefined;
|
||||
}
|
||||
|
||||
export interface KtxSqliteScanConnectorOptions extends SqliteDatabasePathInput {
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export interface KtxSqliteReadOnlyQueryInput extends KtxReadOnlyQueryInput {
|
||||
params?: Record<string, unknown> | unknown[];
|
||||
}
|
||||
|
||||
export interface KtxSqliteColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KtxSqliteColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
interface SqliteMasterRow {
|
||||
name: string;
|
||||
type: 'table' | 'view';
|
||||
}
|
||||
|
||||
interface SqliteTableInfoRow {
|
||||
cid: number;
|
||||
name: string;
|
||||
type: string;
|
||||
notnull: number;
|
||||
dflt_value: unknown;
|
||||
pk: number;
|
||||
}
|
||||
|
||||
interface SqliteForeignKeyRow {
|
||||
id: number;
|
||||
seq: number;
|
||||
table: string;
|
||||
from: string;
|
||||
to: string;
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxSqliteConnectionConfig | undefined,
|
||||
key: keyof KtxSqliteConnectionConfig,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(key, value.trim()) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(key: keyof KtxSqliteConnectionConfig, value: string): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return process.env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
// `file:` on the `url` key is SQLite's native URI form (e.g. `file:///db.sqlite`), not a
|
||||
// file-contents reference — skip the read so the URI passes through verbatim.
|
||||
if (key !== 'url' && value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function sqlitePathFromUrl(url: string): string {
|
||||
if (url.startsWith('file:')) {
|
||||
return fileURLToPath(url);
|
||||
}
|
||||
if (url.startsWith('sqlite:')) {
|
||||
const parsed = new URL(url);
|
||||
return decodeURIComponent(parsed.pathname);
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
function stripLeadingSqlComments(sql: string): string {
|
||||
let index = 0;
|
||||
while (index < sql.length) {
|
||||
while (/\s/.test(sql[index] ?? '')) {
|
||||
index += 1;
|
||||
}
|
||||
if (sql.startsWith('--', index)) {
|
||||
const end = sql.indexOf('\n', index + 2);
|
||||
index = end === -1 ? sql.length : end + 1;
|
||||
continue;
|
||||
}
|
||||
if (sql.startsWith('/*', index)) {
|
||||
const end = sql.indexOf('*/', index + 2);
|
||||
if (end === -1) {
|
||||
return sql.slice(index);
|
||||
}
|
||||
index = end + 2;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return sql.slice(index);
|
||||
}
|
||||
|
||||
export function isKtxSqliteConnectionConfig(
|
||||
connection: KtxSqliteConnectionConfig | undefined,
|
||||
): connection is KtxSqliteConnectionConfig {
|
||||
const driver = String(connection?.driver ?? '').toLowerCase();
|
||||
return driver === 'sqlite' || driver === 'sqlite3';
|
||||
}
|
||||
|
||||
export function sqliteDatabasePathFromConfig(input: SqliteDatabasePathInput): string {
|
||||
const inputDriver = input.connection?.driver ?? 'unknown';
|
||||
if (!isKtxSqliteConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native SQLite connector cannot run driver "${inputDriver}"`);
|
||||
}
|
||||
const configuredPath = stringConfigValue(input.connection, 'path') ?? sqlitePathFromUrl(stringConfigValue(input.connection, 'url') ?? '');
|
||||
if (!configuredPath) {
|
||||
throw new Error(`Native SQLite connector requires connections.${input.connectionId}.path or url`);
|
||||
}
|
||||
return isAbsolute(configuredPath) ? configuredPath : resolve(input.projectDir ?? process.cwd(), configuredPath);
|
||||
}
|
||||
|
||||
export class KtxSqliteScanConnector implements KtxScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'sqlite' as const;
|
||||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: false,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: false,
|
||||
formalForeignKeys: true,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly connectionId: string;
|
||||
private readonly dbPath: string;
|
||||
private readonly now: () => Date;
|
||||
private readonly dialect = new KtxSqliteDialect();
|
||||
private db: Database.Database | null = null;
|
||||
|
||||
constructor(options: KtxSqliteScanConnectorOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.dbPath = sqliteDatabasePathFromConfig(options);
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.id = `sqlite:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
try {
|
||||
if (!existsSync(this.dbPath) || !statSync(this.dbPath).isFile()) {
|
||||
return { success: false, error: `File not found: ${this.dbPath}` };
|
||||
}
|
||||
this.database().prepare('SELECT 1').get();
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const database = this.database();
|
||||
const rawTables = database
|
||||
.prepare(
|
||||
`SELECT name, type FROM sqlite_master WHERE type IN ('table', 'view') AND name NOT LIKE 'sqlite_%' ORDER BY name`,
|
||||
)
|
||||
.all() as SqliteMasterRow[];
|
||||
const tables = rawTables.map((table) => this.readTable(database, table));
|
||||
const fileStats = existsSync(this.dbPath) ? statSync(this.dbPath) : null;
|
||||
return {
|
||||
connectionId: this.connectionId,
|
||||
driver: 'sqlite',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: {},
|
||||
metadata: {
|
||||
file_path: this.dbPath,
|
||||
file_size: fileStats ? fileStats.size : 0,
|
||||
table_count: tables.length,
|
||||
total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxTableSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
|
||||
return { headers: result.headers, rows: result.rows, totalRows: result.totalRows };
|
||||
}
|
||||
|
||||
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = this.query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
|
||||
return { values, nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
return null;
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxSqliteReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = this.query(limitSqlForExecution(stripLeadingSqlComments(input.sql), input.maxRows), input.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KtxTableRef,
|
||||
columnName: string,
|
||||
options: KtxSqliteColumnDistinctValuesOptions,
|
||||
): Promise<KtxSqliteColumnDistinctValuesResult | null> {
|
||||
const sampleSize = options.sampleSize ?? 10000;
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinalityResult = this.query(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, sampleSize),
|
||||
);
|
||||
if (cardinalityResult.rows.length === 0) {
|
||||
return null;
|
||||
}
|
||||
const cardinality = Number(cardinalityResult.rows[0][0]);
|
||||
if (Number.isNaN(cardinality)) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valuesResult = this.query(this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit));
|
||||
return {
|
||||
values: valuesResult.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => String(row[0])),
|
||||
cardinality,
|
||||
};
|
||||
}
|
||||
|
||||
async getTableRowCount(tableName: string): Promise<number> {
|
||||
const result = this.query(`SELECT COUNT(*) AS count FROM ${this.dialect.quoteIdentifier(tableName)}`);
|
||||
return Number(result.rows[0]?.[0] ?? 0);
|
||||
}
|
||||
|
||||
qTableName(table: Pick<KtxTableRef, 'name'>): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
if (this.db) {
|
||||
this.db.close();
|
||||
this.db = null;
|
||||
}
|
||||
}
|
||||
|
||||
private database(): Database.Database {
|
||||
if (!this.db) {
|
||||
this.db = new Database(this.dbPath, { readonly: true, fileMustExist: true });
|
||||
}
|
||||
return this.db;
|
||||
}
|
||||
|
||||
private query(sql: string, params?: Record<string, unknown> | unknown[]): Omit<KtxQueryResult, 'rowCount'> {
|
||||
const statement = this.database().prepare(assertReadOnlySql(sql));
|
||||
const rows = (params ? statement.all(params) : statement.all()) as unknown[];
|
||||
return {
|
||||
headers: statement.columns().map((column) => column.name),
|
||||
rows: normalizeQueryRows(rows),
|
||||
totalRows: rows.length,
|
||||
};
|
||||
}
|
||||
|
||||
private readTable(database: Database.Database, table: SqliteMasterRow): KtxSchemaTable {
|
||||
const columns = database
|
||||
.prepare(`PRAGMA table_info(${this.dialect.quoteIdentifier(table.name)})`)
|
||||
.all() as SqliteTableInfoRow[];
|
||||
const foreignKeys = database
|
||||
.prepare(`PRAGMA foreign_key_list(${this.dialect.quoteIdentifier(table.name)})`)
|
||||
.all() as SqliteForeignKeyRow[];
|
||||
const estimatedRows =
|
||||
table.type === 'table'
|
||||
? Number(
|
||||
(
|
||||
database
|
||||
.prepare(`SELECT COUNT(*) AS count FROM ${this.dialect.quoteIdentifier(table.name)}`)
|
||||
.get() as { count: unknown }
|
||||
).count,
|
||||
)
|
||||
: null;
|
||||
return {
|
||||
catalog: null,
|
||||
db: null,
|
||||
name: table.name,
|
||||
kind: table.type,
|
||||
comment: null,
|
||||
estimatedRows,
|
||||
columns: columns.map((column) => ({
|
||||
name: column.name,
|
||||
nativeType: column.type,
|
||||
normalizedType: this.dialect.mapDataType(column.type),
|
||||
dimensionType: this.dialect.mapToDimensionType(column.type),
|
||||
nullable: column.notnull === 0 && column.pk === 0,
|
||||
primaryKey: column.pk > 0,
|
||||
comment: null,
|
||||
})),
|
||||
foreignKeys: this.mapForeignKeys(foreignKeys),
|
||||
};
|
||||
}
|
||||
|
||||
private mapForeignKeys(rows: SqliteForeignKeyRow[]): KtxSchemaForeignKey[] {
|
||||
return rows
|
||||
.sort((a, b) => a.id - b.id || a.seq - b.seq)
|
||||
.map((row) => ({
|
||||
fromColumn: row.from,
|
||||
toCatalog: null,
|
||||
toDb: null,
|
||||
toTable: row.table,
|
||||
toColumn: row.to,
|
||||
constraintName: null,
|
||||
}));
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`KTX SQLite connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
33
packages/cli/src/connectors/sqlite/dialect.test.ts
Normal file
33
packages/cli/src/connectors/sqlite/dialect.test.ts
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KtxSqliteDialect } from './dialect.js';
|
||||
|
||||
describe('KtxSqliteDialect', () => {
|
||||
const dialect = new KtxSqliteDialect();
|
||||
|
||||
it('quotes identifiers and formats single-file SQLite table names', () => {
|
||||
expect(dialect.quoteIdentifier('orders')).toBe('"orders"');
|
||||
expect(dialect.quoteIdentifier('weird"name')).toBe('"weird""name"');
|
||||
expect(dialect.formatTableName({ catalog: 'ignored', db: 'ignored', name: 'orders' })).toBe('"orders"');
|
||||
});
|
||||
|
||||
it('maps native SQLite types to KTX dimension types', () => {
|
||||
expect(dialect.mapToDimensionType('INTEGER')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('numeric(10,2)')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('timestamp')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('VARCHAR(255)')).toBe('string');
|
||||
expect(dialect.mapToDimensionType('bool')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('')).toBe('string');
|
||||
});
|
||||
|
||||
it('builds sampling and distinct-value SQL without host-specific state', () => {
|
||||
expect(dialect.generateSampleQuery('"orders"', 25, ['id', 'status'])).toBe(
|
||||
'SELECT "id", "status" FROM "orders" LIMIT 25',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('"orders"', 'status', 10)).toBe(
|
||||
'SELECT "status" FROM "orders" WHERE "status" IS NOT NULL AND TRIM(CAST("status" AS TEXT)) != \'\' LIMIT 10',
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('"orders"', '"status"', 5)).toContain(
|
||||
'SELECT DISTINCT CAST("status" AS TEXT) AS val',
|
||||
);
|
||||
});
|
||||
});
|
||||
177
packages/cli/src/connectors/sqlite/dialect.ts
Normal file
177
packages/cli/src/connectors/sqlite/dialect.ts
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/index.js';
|
||||
|
||||
type SqliteTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxSqliteDialect {
|
||||
readonly type = 'sqlite';
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
DATETIME: 'time',
|
||||
DATE: 'time',
|
||||
TIMESTAMP: 'time',
|
||||
TIME: 'time',
|
||||
INTEGER: 'number',
|
||||
INT: 'number',
|
||||
REAL: 'number',
|
||||
NUMERIC: 'number',
|
||||
FLOAT: 'number',
|
||||
DOUBLE: 'number',
|
||||
TEXT: 'string',
|
||||
VARCHAR: 'string',
|
||||
CHAR: 'string',
|
||||
BLOB: 'string',
|
||||
BOOLEAN: 'boolean',
|
||||
BOOL: 'boolean',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `"${identifier.replace(/"/g, '""')}"`;
|
||||
}
|
||||
|
||||
formatTableName(table: SqliteTableNameRef): string {
|
||||
return this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
return nativeType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
let normalized = nativeType.toUpperCase().trim();
|
||||
if (normalized.includes('(')) {
|
||||
normalized = normalized.split('(')[0];
|
||||
}
|
||||
if (this.typeMappings[normalized]) {
|
||||
return this.typeMappings[normalized];
|
||||
}
|
||||
if (normalized.includes('TIME') || normalized.includes('DATE')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalized.includes('INT') ||
|
||||
normalized.includes('NUM') ||
|
||||
normalized.includes('REAL') ||
|
||||
normalized.includes('FLOAT') ||
|
||||
normalized.includes('DOUBLE')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalized.includes('BOOL')) {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT ${columnList} FROM ${tableName} LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quoted = this.quoteIdentifier(columnName);
|
||||
return `SELECT ${quoted} FROM ${tableName} WHERE ${quoted} IS NOT NULL AND TRIM(CAST(${quoted} AS TEXT)) != '' LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown } {
|
||||
return params ? { sql, params } : { sql };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `(RANDOM() % 100) < ${Math.round(samplePct * 100)}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(_samplePct: number): string {
|
||||
return '';
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `SUM(CASE WHEN ${column} IS NULL THEN 1 ELSE 0 END)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `COUNT(DISTINCT ${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT DISTINCT CAST(${columnName} AS TEXT) AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY val
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY RANDOM()
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
_timezone?: string,
|
||||
): string {
|
||||
switch (granularity) {
|
||||
case 'day':
|
||||
return `DATE(${column})`;
|
||||
case 'week':
|
||||
return `DATE(${column}, 'weekday 0', '-6 days')`;
|
||||
case 'month':
|
||||
return `DATE(${column}, 'start of month')`;
|
||||
case 'quarter':
|
||||
return `DATE(${column}, 'start of month', '-' || ((CAST(STRFTIME('%m', ${column}) AS INTEGER) - 1) % 3) || ' months')`;
|
||||
case 'year':
|
||||
return `DATE(${column}, 'start of year')`;
|
||||
}
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, _timezone?: string): string {
|
||||
const [amount, unit] = interval.split(' ');
|
||||
const originExpr = origin ? `julianday('${origin}')` : `julianday('1970-01-01')`;
|
||||
const unitDays = unit === 'day' ? 1 : unit === 'week' ? 7 : 30;
|
||||
const intervalDays = Number(amount) * unitDays;
|
||||
return `DATE(julianday('1970-01-01') + (CAST((julianday(${column}) - ${originExpr}) / ${intervalDays} AS INTEGER) * ${intervalDays}))`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
return `'${interval}'`;
|
||||
}
|
||||
}
|
||||
16
packages/cli/src/connectors/sqlite/index.ts
Normal file
16
packages/cli/src/connectors/sqlite/index.ts
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
export { KtxSqliteDialect } from './dialect.js';
|
||||
export {
|
||||
isKtxSqliteConnectionConfig,
|
||||
KtxSqliteScanConnector,
|
||||
sqliteDatabasePathFromConfig,
|
||||
type KtxSqliteColumnDistinctValuesOptions,
|
||||
type KtxSqliteColumnDistinctValuesResult,
|
||||
type KtxSqliteConnectionConfig,
|
||||
type KtxSqliteReadOnlyQueryInput,
|
||||
type KtxSqliteScanConnectorOptions,
|
||||
type SqliteDatabasePathInput,
|
||||
} from './connector.js';
|
||||
export {
|
||||
createSqliteLiveDatabaseIntrospection,
|
||||
type CreateSqliteLiveDatabaseIntrospectionOptions,
|
||||
} from './live-database-introspection.js';
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/index.js';
|
||||
import type { KtxProjectConnectionConfig } from '../../context/project/index.js';
|
||||
import { KtxSqliteScanConnector, type KtxSqliteConnectionConfig } from './connector.js';
|
||||
|
||||
export interface CreateSqliteLiveDatabaseIntrospectionOptions {
|
||||
projectDir?: string;
|
||||
connections: Record<string, KtxProjectConnectionConfig>;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createSqliteLiveDatabaseIntrospection(
|
||||
options: CreateSqliteLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KtxSqliteConnectionConfig | undefined;
|
||||
const connector = new KtxSqliteScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
projectDir: options.projectDir,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect({ connectionId, driver: 'sqlite' }, { runId: `sqlite-${connectionId}` });
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
347
packages/cli/src/connectors/sqlserver/connector.test.ts
Normal file
347
packages/cli/src/connectors/sqlserver/connector.test.ts
Normal file
|
|
@ -0,0 +1,347 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
createSqlServerLiveDatabaseIntrospection,
|
||||
isKtxSqlServerConnectionConfig,
|
||||
KtxSqlServerScanConnector,
|
||||
sqlServerConnectionPoolConfigFromConfig,
|
||||
type KtxSqlServerPoolFactory,
|
||||
type KtxSqlServerQueryResult,
|
||||
} from './index.js';
|
||||
|
||||
function recordset<T extends Record<string, unknown>>(
|
||||
rows: T[],
|
||||
columnNames: string[],
|
||||
): T[] & { columns: Record<string, { type: { declaration: string } }> } {
|
||||
const withColumns = rows as T[] & { columns: Record<string, { type: { declaration: string } }> };
|
||||
withColumns.columns = Object.fromEntries(columnNames.map((name) => [name, { type: { declaration: 'nvarchar' } }]));
|
||||
return withColumns;
|
||||
}
|
||||
|
||||
function result<T extends Record<string, unknown>>(rows: T[], columnNames: string[]): KtxSqlServerQueryResult {
|
||||
return { recordset: recordset(rows, columnNames) };
|
||||
}
|
||||
|
||||
function fakePoolFactory(): KtxSqlServerPoolFactory {
|
||||
const query = vi.fn(async (sql: string): Promise<KtxSqlServerQueryResult> => {
|
||||
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
|
||||
return result(
|
||||
[
|
||||
{ table_name: 'customers', table_type: 'BASE TABLE' },
|
||||
{ table_name: 'orders', table_type: 'BASE TABLE' },
|
||||
{ table_name: 'order_summary', table_type: 'VIEW' },
|
||||
],
|
||||
['table_name', 'table_type'],
|
||||
);
|
||||
}
|
||||
if (sql.includes("ep.name = 'MS_Description'") && sql.includes('ep.minor_id = 0')) {
|
||||
return result([{ table_name: 'customers', table_comment: 'Customer table' }], [
|
||||
'table_name',
|
||||
'table_comment',
|
||||
]);
|
||||
}
|
||||
if (sql.includes("ep.name = 'MS_Description'") && sql.includes('ep.minor_id = c.column_id')) {
|
||||
return result([{ table_name: 'customers', column_name: 'id', column_comment: 'PK' }], [
|
||||
'table_name',
|
||||
'column_name',
|
||||
'column_comment',
|
||||
]);
|
||||
}
|
||||
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
|
||||
return result(
|
||||
[
|
||||
{ table_name: 'customers', column_name: 'id', data_type: 'int', is_nullable: 'NO' },
|
||||
{ table_name: 'customers', column_name: 'name', data_type: 'nvarchar', is_nullable: 'NO' },
|
||||
{ table_name: 'orders', column_name: 'id', data_type: 'int', is_nullable: 'NO' },
|
||||
{ table_name: 'orders', column_name: 'customer_id', data_type: 'int', is_nullable: 'NO' },
|
||||
{ table_name: 'orders', column_name: 'status', data_type: 'nvarchar', is_nullable: 'YES' },
|
||||
{ table_name: 'order_summary', column_name: 'status', data_type: 'nvarchar', is_nullable: 'YES' },
|
||||
],
|
||||
['table_name', 'column_name', 'data_type', 'is_nullable'],
|
||||
);
|
||||
}
|
||||
if (sql.includes("CONSTRAINT_TYPE = 'PRIMARY KEY'")) {
|
||||
return result(
|
||||
[
|
||||
{ table_name: 'customers', column_name: 'id' },
|
||||
{ table_name: 'orders', column_name: 'id' },
|
||||
],
|
||||
['table_name', 'column_name'],
|
||||
);
|
||||
}
|
||||
if (sql.includes('REFERENTIAL_CONSTRAINTS')) {
|
||||
return result(
|
||||
[
|
||||
{
|
||||
table_name: 'orders',
|
||||
column_name: 'customer_id',
|
||||
referenced_table_schema: 'dbo',
|
||||
referenced_table_name: 'customers',
|
||||
referenced_column_name: 'id',
|
||||
constraint_name: 'orders_customer_id_fk',
|
||||
},
|
||||
],
|
||||
[
|
||||
'table_name',
|
||||
'column_name',
|
||||
'referenced_table_schema',
|
||||
'referenced_table_name',
|
||||
'referenced_column_name',
|
||||
'constraint_name',
|
||||
],
|
||||
);
|
||||
}
|
||||
if (sql.includes('sys.partitions') && sql.includes('GROUP BY t.name')) {
|
||||
return result(
|
||||
[
|
||||
{ table_name: 'customers', row_count: 2 },
|
||||
{ table_name: 'orders', row_count: 2 },
|
||||
],
|
||||
['table_name', 'row_count'],
|
||||
);
|
||||
}
|
||||
if (sql.includes('SELECT TOP 1 [id], [status] FROM [dbo].[orders]')) {
|
||||
return result([{ id: 10, status: 'paid' }], ['id', 'status']);
|
||||
}
|
||||
if (sql.includes('SELECT TOP 1 * FROM (select id, status from dbo.orders) AS ktx_query_result')) {
|
||||
return result([{ id: 10, status: 'paid' }], ['id', 'status']);
|
||||
}
|
||||
if (sql.includes('SELECT TOP 5 [status] FROM [dbo].[orders]')) {
|
||||
return result([{ status: 'paid' }, { status: 'open' }], ['status']);
|
||||
}
|
||||
if (sql.includes('COUNT(DISTINCT val)')) {
|
||||
return result([{ cardinality: 2 }], ['cardinality']);
|
||||
}
|
||||
if (sql.includes('SELECT TOP 10 val')) {
|
||||
return result([{ val: 'open' }, { val: 'paid' }], ['val']);
|
||||
}
|
||||
if (sql.includes('SUM(p.rows) AS row_count') && sql.includes('t.name = @tableName')) {
|
||||
return result([{ row_count: 2 }], ['row_count']);
|
||||
}
|
||||
if (sql.includes('SELECT s.name AS schema_name')) {
|
||||
return result([{ schema_name: 'dbo' }, { schema_name: 'sales' }], ['schema_name']);
|
||||
}
|
||||
if (sql.trim() === 'SELECT 1') {
|
||||
return result([{ ok: 1 }], ['ok']);
|
||||
}
|
||||
throw new Error(`Unexpected SQL: ${sql}`);
|
||||
});
|
||||
const request: { input(name: string, value: unknown): typeof request; query: typeof query } = {
|
||||
input: vi.fn((_key: string, _value: unknown) => request),
|
||||
query,
|
||||
};
|
||||
const close = vi.fn(async () => undefined);
|
||||
return {
|
||||
createPool: vi.fn(async () => ({
|
||||
request: () => request,
|
||||
close,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
describe('KtxSqlServerScanConnector', () => {
|
||||
it('resolves SQL Server connection configuration safely', () => {
|
||||
expect(
|
||||
isKtxSqlServerConnectionConfig({
|
||||
driver: 'sqlserver',
|
||||
host: 'localhost',
|
||||
database: 'analytics',
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(isKtxSqlServerConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(false);
|
||||
expect(
|
||||
sqlServerConnectionPoolConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'sqlserver',
|
||||
host: 'db.example.test',
|
||||
port: 14330,
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
trustServerCertificate: false,
|
||||
},
|
||||
}),
|
||||
).toMatchObject({
|
||||
server: 'db.example.test',
|
||||
port: 14330,
|
||||
database: 'analytics',
|
||||
user: 'reader',
|
||||
options: { encrypt: true, trustServerCertificate: false },
|
||||
});
|
||||
});
|
||||
|
||||
it('introspects schema, primary keys, comments, row counts, views, and foreign keys', async () => {
|
||||
const connector = new KtxSqlServerScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'sqlserver',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
schema: 'dbo',
|
||||
},
|
||||
poolFactory: fakePoolFactory(),
|
||||
now: () => new Date('2026-04-29T16:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'sqlserver' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'sqlserver',
|
||||
extractedAt: '2026-04-29T16:00:00.000Z',
|
||||
scope: { catalogs: ['analytics'], schemas: ['dbo'] },
|
||||
metadata: {
|
||||
database: 'analytics',
|
||||
host: 'db.example.test',
|
||||
schemas: ['dbo'],
|
||||
table_count: 3,
|
||||
total_columns: 6,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([
|
||||
['customers', 'table', 2, 'Customer table'],
|
||||
['orders', 'table', 2, null],
|
||||
['order_summary', 'view', null, null],
|
||||
]);
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
|
||||
name: 'id',
|
||||
nativeType: 'int',
|
||||
normalizedType: 'int',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'PK',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
|
||||
{
|
||||
fromColumn: 'customer_id',
|
||||
toCatalog: 'analytics',
|
||||
toDb: 'dbo',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
constraintName: 'orders_customer_id_fk',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => {
|
||||
const poolFactory = fakePoolFactory();
|
||||
const connector = new KtxSqlServerScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'sqlserver',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
schema: 'dbo',
|
||||
},
|
||||
poolFactory,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
table: { catalog: 'analytics', db: 'dbo', name: 'orders' },
|
||||
columns: ['id', 'status'],
|
||||
limit: 1,
|
||||
},
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({
|
||||
headers: ['id', 'status'],
|
||||
headerTypes: ['nvarchar', 'nvarchar'],
|
||||
rows: [[10, 'paid']],
|
||||
totalRows: 1,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{ connectionId: 'warehouse', table: { catalog: 'analytics', db: 'dbo', name: 'orders' }, column: 'status', limit: 5 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
|
||||
|
||||
await expect(
|
||||
connector.getColumnDistinctValues(
|
||||
{ catalog: 'analytics', db: 'dbo', name: 'orders' },
|
||||
'status',
|
||||
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
|
||||
),
|
||||
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from dbo.orders', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
|
||||
await expect(connector.getTableRowCount('orders')).resolves.toBe(2);
|
||||
await expect(connector.listSchemas()).resolves.toEqual(['dbo', 'sales']);
|
||||
await expect(
|
||||
connector.columnStats(
|
||||
{ connectionId: 'warehouse', table: { catalog: 'analytics', db: 'dbo', name: 'orders' }, column: 'status' },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toBeNull();
|
||||
|
||||
await connector.cleanup();
|
||||
});
|
||||
|
||||
it('adapts native SQL Server snapshots to live-database introspection for local ingest', async () => {
|
||||
const introspection = createSqlServerLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'sqlserver',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
schema: 'dbo',
|
||||
},
|
||||
},
|
||||
poolFactory: fakePoolFactory(),
|
||||
now: () => new Date('2026-04-29T16:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
extractedAt: '2026-04-29T16:00:00.000Z',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
|
||||
name: 'customers',
|
||||
catalog: 'analytics',
|
||||
db: 'dbo',
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'int',
|
||||
normalizedType: 'int',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'PK',
|
||||
},
|
||||
{
|
||||
name: 'name',
|
||||
nativeType: 'nvarchar',
|
||||
normalizedType: 'nvarchar',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
});
|
||||
});
|
||||
});
|
||||
727
packages/cli/src/connectors/sqlserver/connector.ts
Normal file
727
packages/cli/src/connectors/sqlserver/connector.ts
Normal file
|
|
@ -0,0 +1,727 @@
|
|||
import { assertReadOnlySql } from '../../context/connections/index.js';
|
||||
import {
|
||||
createKtxConnectorCapabilities,
|
||||
type KtxColumnSampleInput,
|
||||
type KtxColumnSampleResult,
|
||||
type KtxColumnStatsInput,
|
||||
type KtxColumnStatsResult,
|
||||
type KtxQueryResult,
|
||||
type KtxReadOnlyQueryInput,
|
||||
type KtxScanConnector,
|
||||
type KtxScanContext,
|
||||
type KtxScanInput,
|
||||
type KtxSchemaColumn,
|
||||
type KtxSchemaForeignKey,
|
||||
type KtxSchemaSnapshot,
|
||||
type KtxSchemaTable,
|
||||
type KtxTableListEntry,
|
||||
type KtxTableRef,
|
||||
type KtxTableSampleInput,
|
||||
type KtxTableSampleResult,
|
||||
} from '../../context/scan/index.js';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import sql from 'mssql';
|
||||
import { KtxSqlServerDialect } from './dialect.js';
|
||||
|
||||
export interface KtxSqlServerConnectionConfig {
|
||||
driver?: string;
|
||||
host?: string;
|
||||
port?: number;
|
||||
database?: string;
|
||||
username?: string;
|
||||
user?: string;
|
||||
password?: string;
|
||||
url?: string;
|
||||
schema?: string;
|
||||
schemas?: string[];
|
||||
trustServerCertificate?: boolean;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerPoolConfig {
|
||||
server: string;
|
||||
port: number;
|
||||
database: string;
|
||||
user: string;
|
||||
password?: string;
|
||||
options: { encrypt: true; trustServerCertificate: boolean };
|
||||
pool: { max: number; min: number; idleTimeoutMillis: number };
|
||||
}
|
||||
|
||||
export interface KtxSqlServerQueryResult {
|
||||
recordset?: Array<Record<string, unknown>> & { columns?: Record<string, { type?: { declaration?: string } }> };
|
||||
}
|
||||
|
||||
interface KtxSqlServerRequest {
|
||||
input(name: string, value: unknown): KtxSqlServerRequest;
|
||||
query(query: string): Promise<KtxSqlServerQueryResult>;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerPool {
|
||||
request(): KtxSqlServerRequest;
|
||||
close(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerPoolFactory {
|
||||
createPool(config: KtxSqlServerPoolConfig): Promise<KtxSqlServerPool>;
|
||||
}
|
||||
|
||||
interface KtxSqlServerResolvedEndpoint {
|
||||
host: string;
|
||||
port: number;
|
||||
close?: () => Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerEndpointResolver {
|
||||
resolve(input: {
|
||||
host: string;
|
||||
port: number;
|
||||
connection: KtxSqlServerConnectionConfig;
|
||||
}): Promise<KtxSqlServerResolvedEndpoint>;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerScanConnectorOptions {
|
||||
connectionId: string;
|
||||
connection: KtxSqlServerConnectionConfig | undefined;
|
||||
poolFactory?: KtxSqlServerPoolFactory;
|
||||
endpointResolver?: KtxSqlServerEndpointResolver;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerReadOnlyQueryInput extends KtxReadOnlyQueryInput {
|
||||
params?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
interface KtxSqlServerTableSampleResult extends KtxTableSampleResult {
|
||||
headerTypes?: string[];
|
||||
}
|
||||
|
||||
function sqlTypeDeclaration(type: unknown): string {
|
||||
if (typeof type === 'function') {
|
||||
try {
|
||||
return sqlTypeDeclaration(type());
|
||||
} catch {
|
||||
return 'unknown';
|
||||
}
|
||||
}
|
||||
if (typeof type === 'object' && type !== null && 'declaration' in type) {
|
||||
const declaration = (type as { declaration?: unknown }).declaration;
|
||||
return typeof declaration === 'string' ? declaration : 'unknown';
|
||||
}
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
function sqlRecordset(
|
||||
rows: Array<Record<string, unknown>> | undefined,
|
||||
columns: Record<string, { type?: unknown }> | undefined,
|
||||
): NonNullable<KtxSqlServerQueryResult['recordset']> {
|
||||
const recordset = [...(rows ?? [])] as NonNullable<KtxSqlServerQueryResult['recordset']>;
|
||||
recordset.columns = Object.fromEntries(
|
||||
Object.entries(columns ?? {}).map(([name, metadata]) => [
|
||||
name,
|
||||
{ type: { declaration: sqlTypeDeclaration(metadata.type) } },
|
||||
]),
|
||||
);
|
||||
return recordset;
|
||||
}
|
||||
|
||||
class DefaultSqlServerPoolFactory implements KtxSqlServerPoolFactory {
|
||||
async createPool(config: KtxSqlServerPoolConfig): Promise<KtxSqlServerPool> {
|
||||
const pool = await new sql.ConnectionPool(config as sql.config).connect();
|
||||
return {
|
||||
request() {
|
||||
const request = pool.request();
|
||||
return {
|
||||
input(name: string, value: unknown) {
|
||||
request.input(name, value);
|
||||
return this;
|
||||
},
|
||||
async query(query: string) {
|
||||
const result = await request.query(query);
|
||||
return {
|
||||
recordset: sqlRecordset(result.recordset as Array<Record<string, unknown>> | undefined, result.recordset?.columns),
|
||||
};
|
||||
},
|
||||
};
|
||||
},
|
||||
close: () => pool.close(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxSqlServerConnectionConfig | undefined,
|
||||
key: keyof KtxSqlServerConnectionConfig,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function parseSqlServerUrl(url: string): Partial<KtxSqlServerConnectionConfig> {
|
||||
const parsed = new URL(url);
|
||||
return {
|
||||
host: parsed.hostname,
|
||||
port: parsed.port ? Number(parsed.port) : undefined,
|
||||
database: parsed.pathname.replace(/^\/+/, '') || undefined,
|
||||
username: parsed.username ? decodeURIComponent(parsed.username) : undefined,
|
||||
password: parsed.password ? decodeURIComponent(parsed.password) : undefined,
|
||||
trustServerCertificate: parsed.searchParams.get('trustServerCertificate') === 'true',
|
||||
};
|
||||
}
|
||||
|
||||
function maybeNumber(value: unknown): number | undefined {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
||||
function schemaNames(connection: KtxSqlServerConnectionConfig, env: NodeJS.ProcessEnv): string[] {
|
||||
if (Array.isArray(connection.schemas) && connection.schemas.length > 0) {
|
||||
return connection.schemas.filter((schema) => schema.trim().length > 0).map((schema) => resolveStringReference(schema, env));
|
||||
}
|
||||
return [stringConfigValue(connection, 'schema', env) ?? 'dbo'];
|
||||
}
|
||||
|
||||
function groupByTable<T extends { table_name: string }>(rows: T[]): Map<string, T[]> {
|
||||
const grouped = new Map<string, T[]>();
|
||||
for (const row of rows) {
|
||||
const values = grouped.get(row.table_name) ?? [];
|
||||
values.push(row);
|
||||
grouped.set(row.table_name, values);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
function firstNumber(value: unknown): number | null {
|
||||
const numberValue = Number(value);
|
||||
return Number.isFinite(numberValue) ? numberValue : null;
|
||||
}
|
||||
|
||||
function limitSqlForSqlServerExecution(sqlText: string, maxRows: number | undefined): string {
|
||||
const trimmed = assertReadOnlySql(sqlText).replace(/;+\s*$/, '');
|
||||
if (!maxRows) {
|
||||
return trimmed;
|
||||
}
|
||||
if (!Number.isInteger(maxRows) || maxRows <= 0) {
|
||||
throw new Error('maxRows must be a positive integer.');
|
||||
}
|
||||
return `SELECT TOP ${maxRows} * FROM (${trimmed}) AS ktx_query_result`;
|
||||
}
|
||||
|
||||
export function isKtxSqlServerConnectionConfig(
|
||||
connection: KtxSqlServerConnectionConfig | undefined,
|
||||
): connection is KtxSqlServerConnectionConfig {
|
||||
return String(connection?.driver ?? '').toLowerCase() === 'sqlserver';
|
||||
}
|
||||
|
||||
export function sqlServerConnectionPoolConfigFromConfig(input: {
|
||||
connectionId: string;
|
||||
connection: KtxSqlServerConnectionConfig | undefined;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): KtxSqlServerPoolConfig {
|
||||
const inputDriver = input.connection?.driver ?? 'unknown';
|
||||
if (!isKtxSqlServerConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native SQL Server connector cannot run driver "${inputDriver}"`);
|
||||
}
|
||||
|
||||
const env = input.env ?? process.env;
|
||||
const referencedUrl = stringConfigValue(input.connection, 'url', env);
|
||||
const urlConfig = referencedUrl ? parseSqlServerUrl(referencedUrl) : {};
|
||||
const merged: KtxSqlServerConnectionConfig = { ...urlConfig, ...input.connection };
|
||||
const server = stringConfigValue(merged, 'host', env);
|
||||
const database = stringConfigValue(merged, 'database', env);
|
||||
const user = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env);
|
||||
|
||||
if (!server) {
|
||||
throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.host or url`);
|
||||
}
|
||||
if (!database) {
|
||||
throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.database or url`);
|
||||
}
|
||||
if (!user) {
|
||||
throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.username, user, or url`);
|
||||
}
|
||||
|
||||
return {
|
||||
server,
|
||||
port: maybeNumber(merged.port) ?? 1433,
|
||||
database,
|
||||
user,
|
||||
password: stringConfigValue(merged, 'password', env),
|
||||
options: { encrypt: true, trustServerCertificate: merged.trustServerCertificate ?? true },
|
||||
pool: { max: 10, min: 0, idleTimeoutMillis: 30000 },
|
||||
};
|
||||
}
|
||||
|
||||
export class KtxSqlServerScanConnector implements KtxScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'sqlserver' as const;
|
||||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: false,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: false,
|
||||
formalForeignKeys: true,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly connectionId: string;
|
||||
private readonly connection: KtxSqlServerConnectionConfig;
|
||||
private readonly poolConfig: KtxSqlServerPoolConfig;
|
||||
private readonly schemas: string[];
|
||||
private readonly poolFactory: KtxSqlServerPoolFactory;
|
||||
private readonly endpointResolver?: KtxSqlServerEndpointResolver;
|
||||
private readonly now: () => Date;
|
||||
private readonly dialect = new KtxSqlServerDialect();
|
||||
private pool: KtxSqlServerPool | null = null;
|
||||
private resolvedEndpoint: KtxSqlServerResolvedEndpoint | null = null;
|
||||
|
||||
constructor(options: KtxSqlServerScanConnectorOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.connection = options.connection ?? {};
|
||||
const env = options.env ?? process.env;
|
||||
this.poolConfig = sqlServerConnectionPoolConfigFromConfig({
|
||||
connectionId: options.connectionId,
|
||||
connection: options.connection,
|
||||
env,
|
||||
});
|
||||
this.schemas = schemaNames(this.connection, env);
|
||||
this.poolFactory = options.poolFactory ?? new DefaultSqlServerPoolFactory();
|
||||
this.endpointResolver = options.endpointResolver;
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.id = `sqlserver:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
try {
|
||||
await this.query('SELECT 1');
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const tables: KtxSchemaTable[] = [];
|
||||
for (const schemaName of this.schemas) {
|
||||
tables.push(...(await this.introspectSchema(schemaName)));
|
||||
}
|
||||
return {
|
||||
connectionId: this.connectionId,
|
||||
driver: 'sqlserver',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: { catalogs: [this.poolConfig.database], schemas: this.schemas },
|
||||
metadata: {
|
||||
database: this.poolConfig.database,
|
||||
schemas: this.schemas,
|
||||
host: this.poolConfig.server,
|
||||
table_count: tables.length,
|
||||
total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxSqlServerTableSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
|
||||
return { headers: result.headers, headerTypes: result.headerTypes, rows: result.rows, totalRows: result.totalRows };
|
||||
}
|
||||
|
||||
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
|
||||
return { values, nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
return null;
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxSqlServerReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const limitedSql = limitSqlForSqlServerExecution(input.sql, input.maxRows);
|
||||
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
|
||||
const result = await this.query(prepared.sql, prepared.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KtxTableRef,
|
||||
columnName: string,
|
||||
options: KtxSqlServerColumnDistinctValuesOptions,
|
||||
): Promise<KtxSqlServerColumnDistinctValuesResult | null> {
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinalityRows = await this.queryRaw<{ cardinality: unknown }>(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, options.sampleSize ?? 10000),
|
||||
);
|
||||
const cardinality = Number(cardinalityRows[0]?.cardinality);
|
||||
if (Number.isNaN(cardinality)) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valuesRows = await this.queryRaw<{ val: unknown }>(
|
||||
this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit),
|
||||
);
|
||||
return { values: valuesRows.filter((row) => row.val !== null).map((row) => String(row.val)), cardinality };
|
||||
}
|
||||
|
||||
async getTableRowCount(tableName: string, schemaName = this.schemas[0] ?? 'dbo'): Promise<number> {
|
||||
const rows = await this.queryRaw<{ row_count: unknown }>(
|
||||
`
|
||||
SELECT SUM(p.rows) AS row_count
|
||||
FROM sys.tables t
|
||||
INNER JOIN sys.partitions p ON t.object_id = p.object_id
|
||||
INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
|
||||
WHERE s.name = @schemaName
|
||||
AND t.name = @tableName
|
||||
AND p.index_id IN (0, 1)
|
||||
`,
|
||||
{ schemaName, tableName },
|
||||
);
|
||||
return firstNumber(rows[0]?.row_count) ?? 0;
|
||||
}
|
||||
|
||||
qTableName(table: Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
async listSchemas(): Promise<string[]> {
|
||||
const rows = await this.queryRaw<{ schema_name: string }>(`
|
||||
SELECT s.name AS schema_name
|
||||
FROM sys.schemas s
|
||||
WHERE s.name NOT IN (
|
||||
'INFORMATION_SCHEMA', 'sys', 'guest',
|
||||
'db_owner', 'db_accessadmin', 'db_securityadmin', 'db_ddladmin',
|
||||
'db_backupoperator', 'db_datareader', 'db_datawriter',
|
||||
'db_denydatareader', 'db_denydatawriter'
|
||||
)
|
||||
ORDER BY s.name
|
||||
`);
|
||||
return rows.map((row) => row.schema_name);
|
||||
}
|
||||
|
||||
async listTables(schemas?: string[]): Promise<KtxTableListEntry[]> {
|
||||
const filterSchemas = schemas ?? (await this.listSchemas());
|
||||
if (filterSchemas.length === 0) return [];
|
||||
const params: Record<string, unknown> = {};
|
||||
const placeholders = filterSchemas.map((s, i) => {
|
||||
params[`schema${i}`] = s;
|
||||
return `@schema${i}`;
|
||||
});
|
||||
const rows = await this.queryRaw<{ schema_name: string; table_name: string; table_type: string }>(
|
||||
`
|
||||
SELECT s.name AS schema_name, o.name AS table_name, o.type_desc AS table_type
|
||||
FROM sys.objects o
|
||||
JOIN sys.schemas s ON o.schema_id = s.schema_id
|
||||
WHERE o.type IN ('U', 'V')
|
||||
AND s.name IN (${placeholders.join(', ')})
|
||||
ORDER BY s.name, o.name
|
||||
`,
|
||||
params,
|
||||
);
|
||||
return rows.map((row) => ({
|
||||
schema: row.schema_name,
|
||||
name: row.table_name,
|
||||
kind: row.table_type === 'VIEW' ? ('view' as const) : ('table' as const),
|
||||
}));
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
if (this.pool) {
|
||||
await this.pool.close();
|
||||
this.pool = null;
|
||||
}
|
||||
if (this.resolvedEndpoint?.close) {
|
||||
await this.resolvedEndpoint.close();
|
||||
this.resolvedEndpoint = null;
|
||||
}
|
||||
}
|
||||
|
||||
private async introspectSchema(schemaName: string): Promise<KtxSchemaTable[]> {
|
||||
const tables = await this.queryRaw<{ table_name: string; table_type: string }>(
|
||||
`
|
||||
SELECT TABLE_NAME AS table_name, TABLE_TYPE AS table_type
|
||||
FROM INFORMATION_SCHEMA.TABLES
|
||||
WHERE TABLE_SCHEMA = @schemaName
|
||||
AND TABLE_TYPE IN ('BASE TABLE', 'VIEW')
|
||||
ORDER BY TABLE_NAME
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
const columns = await this.queryRaw<{
|
||||
table_name: string;
|
||||
column_name: string;
|
||||
data_type: string;
|
||||
is_nullable: string;
|
||||
}>(
|
||||
`
|
||||
SELECT TABLE_NAME AS table_name, COLUMN_NAME AS column_name, DATA_TYPE AS data_type, IS_NULLABLE AS is_nullable
|
||||
FROM INFORMATION_SCHEMA.COLUMNS
|
||||
WHERE TABLE_SCHEMA = @schemaName
|
||||
ORDER BY TABLE_NAME, ORDINAL_POSITION
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
const tableComments = await this.tableComments(schemaName);
|
||||
const columnComments = await this.columnComments(schemaName);
|
||||
const primaryKeys = await this.primaryKeys(schemaName);
|
||||
const foreignKeys = await this.foreignKeys(schemaName);
|
||||
const rowCounts = await this.rowCounts(schemaName);
|
||||
const columnsByTable = groupByTable(columns);
|
||||
const foreignKeysByTable = groupByTable(foreignKeys);
|
||||
|
||||
return tables.map((table) => ({
|
||||
catalog: this.poolConfig.database,
|
||||
db: schemaName,
|
||||
name: table.table_name,
|
||||
kind: table.table_type === 'VIEW' ? 'view' : 'table',
|
||||
comment: tableComments.get(table.table_name) ?? null,
|
||||
estimatedRows: table.table_type === 'VIEW' ? null : rowCounts.get(table.table_name) ?? 0,
|
||||
columns: (columnsByTable.get(table.table_name) ?? []).map((column) =>
|
||||
this.toSchemaColumn(column, primaryKeys.get(table.table_name) ?? new Set(), columnComments),
|
||||
),
|
||||
foreignKeys: (foreignKeysByTable.get(table.table_name) ?? []).map((row) => this.toSchemaForeignKey(row)),
|
||||
}));
|
||||
}
|
||||
|
||||
private async tableComments(schemaName: string): Promise<Map<string, string>> {
|
||||
const rows = await this.queryRaw<{ table_name: string; table_comment: string }>(
|
||||
`
|
||||
SELECT o.name AS table_name, CAST(ep.value AS NVARCHAR(MAX)) AS table_comment
|
||||
FROM sys.objects o
|
||||
INNER JOIN sys.schemas s ON o.schema_id = s.schema_id
|
||||
INNER JOIN sys.extended_properties ep ON ep.major_id = o.object_id
|
||||
AND ep.minor_id = 0
|
||||
AND ep.name = 'MS_Description'
|
||||
WHERE s.name = @schemaName
|
||||
AND o.type IN ('U', 'V')
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
return new Map(rows.map((row) => [row.table_name, row.table_comment]));
|
||||
}
|
||||
|
||||
private async columnComments(schemaName: string): Promise<Map<string, string>> {
|
||||
const rows = await this.queryRaw<{ table_name: string; column_name: string; column_comment: string }>(
|
||||
`
|
||||
SELECT o.name AS table_name, c.name AS column_name, CAST(ep.value AS NVARCHAR(MAX)) AS column_comment
|
||||
FROM sys.columns c
|
||||
INNER JOIN sys.objects o ON c.object_id = o.object_id
|
||||
INNER JOIN sys.schemas s ON o.schema_id = s.schema_id
|
||||
INNER JOIN sys.extended_properties ep ON ep.major_id = c.object_id
|
||||
AND ep.minor_id = c.column_id
|
||||
AND ep.name = 'MS_Description'
|
||||
WHERE s.name = @schemaName
|
||||
AND o.type IN ('U', 'V')
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
return new Map(rows.map((row) => [`${row.table_name}.${row.column_name}`, row.column_comment]));
|
||||
}
|
||||
|
||||
private async primaryKeys(schemaName: string): Promise<Map<string, Set<string>>> {
|
||||
const rows = await this.queryRaw<{ table_name: string; column_name: string }>(
|
||||
`
|
||||
SELECT tc.TABLE_NAME AS table_name, kcu.COLUMN_NAME AS column_name
|
||||
FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
|
||||
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu
|
||||
ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME
|
||||
AND tc.TABLE_SCHEMA = kcu.TABLE_SCHEMA
|
||||
WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY'
|
||||
AND tc.TABLE_SCHEMA = @schemaName
|
||||
ORDER BY tc.TABLE_NAME, kcu.ORDINAL_POSITION
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
const grouped = new Map<string, Set<string>>();
|
||||
for (const row of rows) {
|
||||
const columns = grouped.get(row.table_name) ?? new Set<string>();
|
||||
columns.add(row.column_name);
|
||||
grouped.set(row.table_name, columns);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
private async foreignKeys(schemaName: string): Promise<
|
||||
Array<{
|
||||
table_name: string;
|
||||
column_name: string;
|
||||
referenced_table_schema: string;
|
||||
referenced_table_name: string;
|
||||
referenced_column_name: string;
|
||||
constraint_name: string;
|
||||
}>
|
||||
> {
|
||||
return this.queryRaw(
|
||||
`
|
||||
SELECT
|
||||
fk.TABLE_NAME AS table_name,
|
||||
fk.COLUMN_NAME AS column_name,
|
||||
pk.TABLE_SCHEMA AS referenced_table_schema,
|
||||
pk.TABLE_NAME AS referenced_table_name,
|
||||
pk.COLUMN_NAME AS referenced_column_name,
|
||||
fk.CONSTRAINT_NAME AS constraint_name
|
||||
FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS rc
|
||||
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE fk
|
||||
ON fk.CONSTRAINT_CATALOG = rc.CONSTRAINT_CATALOG
|
||||
AND fk.CONSTRAINT_SCHEMA = rc.CONSTRAINT_SCHEMA
|
||||
AND fk.CONSTRAINT_NAME = rc.CONSTRAINT_NAME
|
||||
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE pk
|
||||
ON pk.CONSTRAINT_CATALOG = rc.UNIQUE_CONSTRAINT_CATALOG
|
||||
AND pk.CONSTRAINT_SCHEMA = rc.UNIQUE_CONSTRAINT_SCHEMA
|
||||
AND pk.CONSTRAINT_NAME = rc.UNIQUE_CONSTRAINT_NAME
|
||||
AND pk.ORDINAL_POSITION = fk.ORDINAL_POSITION
|
||||
WHERE fk.TABLE_SCHEMA = @schemaName
|
||||
ORDER BY fk.TABLE_NAME, fk.COLUMN_NAME
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
}
|
||||
|
||||
private async rowCounts(schemaName: string): Promise<Map<string, number>> {
|
||||
const rows = await this.queryRaw<{ table_name: string; row_count: unknown }>(
|
||||
`
|
||||
SELECT t.name AS table_name, SUM(p.rows) AS row_count
|
||||
FROM sys.tables t
|
||||
INNER JOIN sys.partitions p ON t.object_id = p.object_id
|
||||
INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
|
||||
WHERE s.name = @schemaName
|
||||
AND p.index_id IN (0, 1)
|
||||
GROUP BY t.name
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
return new Map(rows.map((row) => [row.table_name, firstNumber(row.row_count) ?? 0]));
|
||||
}
|
||||
|
||||
private toSchemaColumn(
|
||||
column: { table_name: string; column_name: string; data_type: string; is_nullable: string },
|
||||
primaryKeys: Set<string>,
|
||||
comments: Map<string, string>,
|
||||
): KtxSchemaColumn {
|
||||
return {
|
||||
name: column.column_name,
|
||||
nativeType: column.data_type,
|
||||
normalizedType: this.dialect.mapDataType(column.data_type),
|
||||
dimensionType: this.dialect.mapToDimensionType(column.data_type),
|
||||
nullable: column.is_nullable === 'YES',
|
||||
primaryKey: primaryKeys.has(column.column_name),
|
||||
comment: comments.get(`${column.table_name}.${column.column_name}`) ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
private toSchemaForeignKey(row: {
|
||||
column_name: string;
|
||||
referenced_table_schema: string;
|
||||
referenced_table_name: string;
|
||||
referenced_column_name: string;
|
||||
constraint_name: string;
|
||||
}): KtxSchemaForeignKey {
|
||||
return {
|
||||
fromColumn: row.column_name,
|
||||
toCatalog: this.poolConfig.database,
|
||||
toDb: row.referenced_table_schema,
|
||||
toTable: row.referenced_table_name,
|
||||
toColumn: row.referenced_column_name,
|
||||
constraintName: row.constraint_name || null,
|
||||
};
|
||||
}
|
||||
|
||||
private async poolForQuery(): Promise<KtxSqlServerPool> {
|
||||
if (!this.pool) {
|
||||
const config = { ...this.poolConfig };
|
||||
if (this.endpointResolver) {
|
||||
this.resolvedEndpoint = await this.endpointResolver.resolve({
|
||||
host: config.server,
|
||||
port: config.port,
|
||||
connection: this.connection,
|
||||
});
|
||||
config.server = this.resolvedEndpoint.host;
|
||||
config.port = this.resolvedEndpoint.port;
|
||||
}
|
||||
this.pool = await this.poolFactory.createPool(config);
|
||||
}
|
||||
return this.pool;
|
||||
}
|
||||
|
||||
private async queryRaw<T extends Record<string, unknown>>(query: string, params?: Record<string, unknown>): Promise<T[]> {
|
||||
const pool = await this.poolForQuery();
|
||||
const request = pool.request();
|
||||
if (params) {
|
||||
for (const [key, value] of Object.entries(params)) {
|
||||
request.input(key, value);
|
||||
}
|
||||
}
|
||||
const result = await request.query(query);
|
||||
return (result.recordset ?? []) as T[];
|
||||
}
|
||||
|
||||
private async query(query: string, params?: Record<string, unknown>): Promise<Omit<KtxQueryResult, 'rowCount'>> {
|
||||
const pool = await this.poolForQuery();
|
||||
const request = pool.request();
|
||||
if (params) {
|
||||
for (const [key, value] of Object.entries(params)) {
|
||||
request.input(key, value);
|
||||
}
|
||||
}
|
||||
const result = await request.query(assertReadOnlySql(query));
|
||||
const recordset = result.recordset ?? [];
|
||||
const columnMetadata = recordset.columns ?? {};
|
||||
const metadataHeaders = Object.keys(columnMetadata);
|
||||
const headers = metadataHeaders.length > 0 ? metadataHeaders : Object.keys(recordset[0] ?? {});
|
||||
const headerTypes = headers.map((header) => columnMetadata[header]?.type?.declaration ?? 'unknown');
|
||||
return {
|
||||
headers,
|
||||
headerTypes,
|
||||
rows: recordset.map((row) => headers.map((header) => row[header])),
|
||||
totalRows: recordset.length,
|
||||
};
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`KTX SQL Server connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
49
packages/cli/src/connectors/sqlserver/dialect.test.ts
Normal file
49
packages/cli/src/connectors/sqlserver/dialect.test.ts
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KtxSqlServerDialect } from './dialect.js';
|
||||
|
||||
describe('KtxSqlServerDialect', () => {
|
||||
const dialect = new KtxSqlServerDialect();
|
||||
|
||||
it('quotes identifiers and formats schema-qualified table names', () => {
|
||||
expect(dialect.quoteIdentifier('events')).toBe('[events]');
|
||||
expect(dialect.quoteIdentifier('odd]name')).toBe('[odd]]name]');
|
||||
expect(dialect.formatTableName({ catalog: 'warehouse', db: 'dbo', name: 'events' })).toBe('[dbo].[events]');
|
||||
expect(dialect.formatTableName({ catalog: null, db: null, name: 'events' })).toBe('[events]');
|
||||
});
|
||||
|
||||
it('maps SQL Server types to KTX dimension types', () => {
|
||||
expect(dialect.mapToDimensionType('datetime2')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('decimal(18, 2)')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('bigint')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('bit')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('uniqueidentifier')).toBe('string');
|
||||
expect(dialect.mapToDimensionType('')).toBe('string');
|
||||
});
|
||||
|
||||
it('builds sampling, distinct-value, pagination, and time SQL', () => {
|
||||
expect(dialect.generateSampleQuery('[dbo].[events]', 25, ['id', 'event_name'])).toBe(
|
||||
'SELECT TOP 25 [id], [event_name] FROM [dbo].[events]',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('[dbo].[events]', 'event_name', 10)).toBe(
|
||||
"SELECT TOP 10 [event_name] FROM [dbo].[events] WHERE [event_name] IS NOT NULL AND LTRIM(RTRIM(CAST([event_name] AS NVARCHAR(MAX)))) != ''",
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('[dbo].[events]', '[event_name]', 5)).toContain('SELECT TOP 5 val');
|
||||
expect(dialect.getTopClause(10)).toBe('TOP 10');
|
||||
expect(dialect.getLimitOffsetClause(10, 20)).toBe('OFFSET 20 ROWS FETCH NEXT 10 ROWS ONLY');
|
||||
expect(dialect.getTimeTruncExpression('created_at', 'month')).toBe(
|
||||
'DATEFROMPARTS(YEAR(created_at), MONTH(created_at), 1)',
|
||||
);
|
||||
});
|
||||
|
||||
it('prepares named parameters using SQL Server @ parameters', () => {
|
||||
expect(
|
||||
dialect.prepareQuery('select * from events where id = :id and name = :name', {
|
||||
id: 10,
|
||||
name: 'signup',
|
||||
}),
|
||||
).toEqual({
|
||||
sql: 'select * from events where id = @id and name = @name',
|
||||
params: { id: 10, name: 'signup' },
|
||||
});
|
||||
});
|
||||
});
|
||||
201
packages/cli/src/connectors/sqlserver/dialect.ts
Normal file
201
packages/cli/src/connectors/sqlserver/dialect.ts
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/index.js';
|
||||
|
||||
type SqlServerTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxSqlServerDialect {
|
||||
readonly type = 'sqlserver';
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
datetime: 'time',
|
||||
datetime2: 'time',
|
||||
date: 'time',
|
||||
time: 'time',
|
||||
datetimeoffset: 'time',
|
||||
smalldatetime: 'time',
|
||||
timestamp: 'time',
|
||||
int: 'number',
|
||||
bigint: 'number',
|
||||
smallint: 'number',
|
||||
tinyint: 'number',
|
||||
decimal: 'number',
|
||||
numeric: 'number',
|
||||
float: 'number',
|
||||
real: 'number',
|
||||
money: 'number',
|
||||
smallmoney: 'number',
|
||||
varchar: 'string',
|
||||
nvarchar: 'string',
|
||||
char: 'string',
|
||||
nchar: 'string',
|
||||
text: 'string',
|
||||
ntext: 'string',
|
||||
uniqueidentifier: 'string',
|
||||
xml: 'string',
|
||||
bit: 'boolean',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `[${identifier.replace(/\]/g, ']]')}]`;
|
||||
}
|
||||
|
||||
formatTableName(table: SqlServerTableNameRef): string {
|
||||
return table.db
|
||||
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
|
||||
: this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
return nativeType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
const lower = nativeType.toLowerCase().trim();
|
||||
const normalized = lower.includes('(') ? lower.split('(')[0]! : lower;
|
||||
if (this.typeMappings[normalized]) {
|
||||
return this.typeMappings[normalized];
|
||||
}
|
||||
if (normalized.includes('time') || normalized.includes('date')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalized.includes('int') ||
|
||||
normalized.includes('num') ||
|
||||
normalized.includes('dec') ||
|
||||
normalized.includes('float') ||
|
||||
normalized.includes('money')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalized.includes('bit')) {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT TOP ${limit} ${columnList} FROM ${tableName}`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quotedColumn = this.quoteIdentifier(columnName);
|
||||
return `SELECT TOP ${limit} ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND LTRIM(RTRIM(CAST(${quotedColumn} AS NVARCHAR(MAX)))) != ''`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
|
||||
if (!params) {
|
||||
return { sql, params: undefined };
|
||||
}
|
||||
let parameterizedQuery = sql;
|
||||
for (const key of Object.keys(params)) {
|
||||
parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
|
||||
}
|
||||
return { sql: parameterizedQuery, params };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `ABS(CHECKSUM(NEWID())) % 100 < ${Math.round(samplePct * 100)}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `TABLESAMPLE (${samplePct * 100} PERCENT)`;
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `OFFSET ${offset} ROWS FETCH NEXT ${limit} ROWS ONLY` : '';
|
||||
}
|
||||
|
||||
getTopClause(limit: number): string {
|
||||
return `TOP ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `SUM(CASE WHEN ${column} IS NULL THEN 1 ELSE 0 END)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `COUNT(DISTINCT ${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT TOP ${sampleSize} ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT TOP ${limit} val
|
||||
FROM (
|
||||
SELECT DISTINCT CAST(${columnName} AS NVARCHAR(MAX)) AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
) AS distinct_vals
|
||||
ORDER BY val
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT TOP ${sampleSize} ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY NEWID()
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
timezone?: string,
|
||||
): string {
|
||||
const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column;
|
||||
switch (granularity) {
|
||||
case 'day':
|
||||
return `CAST(${col} AS DATE)`;
|
||||
case 'week':
|
||||
return `DATEADD(WEEK, DATEDIFF(WEEK, 0, ${col}), 0)`;
|
||||
case 'month':
|
||||
return `DATEFROMPARTS(YEAR(${col}), MONTH(${col}), 1)`;
|
||||
case 'quarter':
|
||||
return `DATEFROMPARTS(YEAR(${col}), (DATEPART(QUARTER, ${col}) - 1) * 3 + 1, 1)`;
|
||||
case 'year':
|
||||
return `DATEFROMPARTS(YEAR(${col}), 1, 1)`;
|
||||
}
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
|
||||
const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column;
|
||||
const [amount, unit] = interval.split(' ');
|
||||
const originExpr = origin ? `'${origin}'` : `'1970-01-01'`;
|
||||
return `DATEADD(${unit}, (DATEDIFF(${unit}, ${originExpr}, ${col}) / ${amount}) * ${amount}, ${originExpr})`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
return `'${interval}'`;
|
||||
}
|
||||
}
|
||||
17
packages/cli/src/connectors/sqlserver/index.ts
Normal file
17
packages/cli/src/connectors/sqlserver/index.ts
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
export { KtxSqlServerDialect } from './dialect.js';
|
||||
export {
|
||||
isKtxSqlServerConnectionConfig,
|
||||
KtxSqlServerScanConnector,
|
||||
sqlServerConnectionPoolConfigFromConfig,
|
||||
type KtxSqlServerColumnDistinctValuesOptions,
|
||||
type KtxSqlServerColumnDistinctValuesResult,
|
||||
type KtxSqlServerConnectionConfig,
|
||||
type KtxSqlServerEndpointResolver,
|
||||
type KtxSqlServerPool,
|
||||
type KtxSqlServerPoolConfig,
|
||||
type KtxSqlServerPoolFactory,
|
||||
type KtxSqlServerQueryResult,
|
||||
type KtxSqlServerReadOnlyQueryInput,
|
||||
type KtxSqlServerScanConnectorOptions,
|
||||
} from './connector.js';
|
||||
export { createSqlServerLiveDatabaseIntrospection } from './live-database-introspection.js';
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/index.js';
|
||||
import type { KtxProjectConnectionConfig } from '../../context/project/index.js';
|
||||
import {
|
||||
KtxSqlServerScanConnector,
|
||||
type KtxSqlServerConnectionConfig,
|
||||
type KtxSqlServerEndpointResolver,
|
||||
type KtxSqlServerPoolFactory,
|
||||
} from './connector.js';
|
||||
|
||||
interface CreateSqlServerLiveDatabaseIntrospectionOptions {
|
||||
connections: Record<string, KtxProjectConnectionConfig>;
|
||||
poolFactory?: KtxSqlServerPoolFactory;
|
||||
endpointResolver?: KtxSqlServerEndpointResolver;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createSqlServerLiveDatabaseIntrospection(
|
||||
options: CreateSqlServerLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KtxSqlServerConnectionConfig | undefined;
|
||||
const connector = new KtxSqlServerScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
poolFactory: options.poolFactory,
|
||||
endpointResolver: options.endpointResolver,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect(
|
||||
{ connectionId, driver: 'sqlserver' },
|
||||
{ runId: `sqlserver-${connectionId}` },
|
||||
);
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
import { buildDefaultKtxProjectConfig, type KtxProjectConfig } from '@ktx/context/project';
|
||||
import { buildDefaultKtxProjectConfig, type KtxProjectConfig } from './context/project/index.js';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import type { KtxPublicIngestProject, KtxPublicIngestTargetResult } from './public-ingest.js';
|
||||
import {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import type { KtxProgressPort, KtxProgressUpdateOptions } from '@ktx/context/scan';
|
||||
import type { KtxProgressPort, KtxProgressUpdateOptions } from './context/scan/index.js';
|
||||
import type { KtxCliIo } from './index.js';
|
||||
import type { KtxIngestProgressUpdate } from './ingest.js';
|
||||
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
|
||||
|
|
|
|||
9
packages/cli/src/context/agent/index.ts
Normal file
9
packages/cli/src/context/agent/index.ts
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
export type {
|
||||
AgentRunnerPort,
|
||||
RunLoopParams,
|
||||
RunLoopResult,
|
||||
RunLoopStepInfo,
|
||||
RunLoopStopReason,
|
||||
} from '../llm/runtime-port.js';
|
||||
export { RuntimeAgentRunner } from '../llm/runtime-port.js';
|
||||
export type { AgentTelemetryPort } from '../llm/ai-sdk-runtime.js';
|
||||
27
packages/cli/src/context/connections/connection-type.ts
Normal file
27
packages/cli/src/context/connections/connection-type.ts
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
import { z } from 'zod';
|
||||
|
||||
export const connectionTypeSchema = z.enum([
|
||||
'POSTGRESQL',
|
||||
'SQLITE',
|
||||
'SQLSERVER',
|
||||
'BIGQUERY',
|
||||
'SNOWFLAKE',
|
||||
'CENTRALREACH',
|
||||
'EPIC',
|
||||
'CERNER',
|
||||
'ATHENA',
|
||||
'QUICKBOOKS',
|
||||
'WORKDAY',
|
||||
'REST',
|
||||
'S3',
|
||||
'SLACK',
|
||||
'METABASE',
|
||||
'LOOKER',
|
||||
'NOTION',
|
||||
'MYSQL',
|
||||
'CLICKHOUSE',
|
||||
'PLAIN',
|
||||
'BETTERSTACK',
|
||||
]);
|
||||
|
||||
export type ConnectionType = z.infer<typeof connectionTypeSchema>;
|
||||
30
packages/cli/src/context/connections/dialects.test.ts
Normal file
30
packages/cli/src/context/connections/dialects.test.ts
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { getDialectForDriver } from './dialects.js';
|
||||
|
||||
describe('getDialectForDriver', () => {
|
||||
it.each([
|
||||
['postgres', '"public"."orders"'],
|
||||
['postgresql', '"public"."orders"'],
|
||||
['mysql', '`public`.`orders`'],
|
||||
['clickhouse', '`public`.`orders`'],
|
||||
['sqlite', '"orders"'],
|
||||
['snowflake', '"analytics"."public"."orders"'],
|
||||
['bigquery', '`analytics`.`public`.`orders`'],
|
||||
['sqlserver', '[analytics].[public].[orders]'],
|
||||
] as const)('formats table names for %s', (driver, expected) => {
|
||||
const dialect = getDialectForDriver(driver);
|
||||
expect(
|
||||
dialect.formatTableName({
|
||||
catalog: driver === 'snowflake' || driver === 'bigquery' || driver === 'sqlserver' ? 'analytics' : null,
|
||||
db: driver === 'sqlite' ? null : 'public',
|
||||
name: 'orders',
|
||||
}),
|
||||
).toBe(expected);
|
||||
});
|
||||
|
||||
it('throws with a supported-driver list for unknown drivers', () => {
|
||||
expect(() => getDialectForDriver('oracle')).toThrow(
|
||||
'Unsupported warehouse driver "oracle". Supported drivers: bigquery, clickhouse, mysql, postgres, postgresql, sqlite, sqlite3, snowflake, sqlserver',
|
||||
);
|
||||
});
|
||||
});
|
||||
102
packages/cli/src/context/connections/dialects.ts
Normal file
102
packages/cli/src/context/connections/dialects.ts
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../scan/types.js';
|
||||
|
||||
export type SupportedDriver =
|
||||
| 'postgres'
|
||||
| 'postgresql'
|
||||
| 'mysql'
|
||||
| 'sqlserver'
|
||||
| 'snowflake'
|
||||
| 'bigquery'
|
||||
| 'clickhouse'
|
||||
| 'sqlite'
|
||||
| 'sqlite3';
|
||||
|
||||
export interface KtxDialect {
|
||||
readonly type: SupportedDriver;
|
||||
quoteIdentifier(identifier: string): string;
|
||||
formatTableName(table: KtxTableRef): string;
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType;
|
||||
}
|
||||
|
||||
const supportedDrivers: SupportedDriver[] = [
|
||||
'bigquery',
|
||||
'clickhouse',
|
||||
'mysql',
|
||||
'postgres',
|
||||
'postgresql',
|
||||
'sqlite',
|
||||
'sqlite3',
|
||||
'snowflake',
|
||||
'sqlserver',
|
||||
];
|
||||
|
||||
function doubleQuoted(identifier: string): string {
|
||||
return `"${identifier.replace(/"/g, '""')}"`;
|
||||
}
|
||||
|
||||
function backtickQuoted(identifier: string): string {
|
||||
return `\`${identifier.replace(/`/g, '``')}\``;
|
||||
}
|
||||
|
||||
function bigQueryQuoted(identifier: string): string {
|
||||
return `\`${identifier.replace(/`/g, '\\`')}\``;
|
||||
}
|
||||
|
||||
function bracketQuoted(identifier: string): string {
|
||||
return `[${identifier.replace(/\]/g, ']]')}]`;
|
||||
}
|
||||
|
||||
function inferDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
const normalized = nativeType.toLowerCase().trim();
|
||||
if (normalized.includes('date') || normalized.includes('time')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalized.includes('int') ||
|
||||
normalized.includes('num') ||
|
||||
normalized.includes('dec') ||
|
||||
normalized.includes('float') ||
|
||||
normalized.includes('double') ||
|
||||
normalized.includes('real')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalized.includes('bool') || normalized === 'bit') {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
function formatWithParts(table: KtxTableRef, quote: (identifier: string) => string, sqlite = false): string {
|
||||
const parts = sqlite ? [table.name] : [table.catalog, table.db, table.name].filter((part): part is string => !!part);
|
||||
return parts.map(quote).join('.');
|
||||
}
|
||||
|
||||
function createDialect(type: SupportedDriver, quote: (identifier: string) => string, sqlite = false): KtxDialect {
|
||||
return {
|
||||
type,
|
||||
quoteIdentifier: quote,
|
||||
formatTableName: (table) => formatWithParts(table, quote, sqlite),
|
||||
mapToDimensionType: inferDimensionType,
|
||||
};
|
||||
}
|
||||
|
||||
const dialects: Record<SupportedDriver, KtxDialect> = {
|
||||
postgres: createDialect('postgres', doubleQuoted),
|
||||
postgresql: createDialect('postgresql', doubleQuoted),
|
||||
mysql: createDialect('mysql', backtickQuoted),
|
||||
clickhouse: createDialect('clickhouse', backtickQuoted),
|
||||
sqlite: createDialect('sqlite', doubleQuoted, true),
|
||||
sqlite3: createDialect('sqlite3', doubleQuoted, true),
|
||||
snowflake: createDialect('snowflake', doubleQuoted),
|
||||
bigquery: createDialect('bigquery', bigQueryQuoted),
|
||||
sqlserver: createDialect('sqlserver', bracketQuoted),
|
||||
};
|
||||
|
||||
export function getDialectForDriver(driver: string): KtxDialect {
|
||||
const normalized = driver.toLowerCase().trim();
|
||||
if (normalized in dialects) {
|
||||
return dialects[normalized as SupportedDriver];
|
||||
}
|
||||
throw new Error(`Unsupported warehouse driver "${driver}". Supported drivers: ${supportedDrivers.join(', ')}`);
|
||||
}
|
||||
30
packages/cli/src/context/connections/index.ts
Normal file
30
packages/cli/src/context/connections/index.ts
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
export type {
|
||||
KtxSqlQueryExecutionInput,
|
||||
KtxSqlQueryExecutionResult,
|
||||
KtxSqlQueryExecutorPort,
|
||||
} from './query-executor.js';
|
||||
export type { KtxDialect, SupportedDriver } from './dialects.js';
|
||||
export { createDefaultLocalQueryExecutor, type DefaultLocalQueryExecutorOptions } from './local-query-executor.js';
|
||||
export { getDialectForDriver } from './dialects.js';
|
||||
export { normalizeQueryRows } from './query-executor.js';
|
||||
export { createPostgresQueryExecutor } from './postgres-query-executor.js';
|
||||
export { assertReadOnlySql, limitSqlForExecution } from './read-only-sql.js';
|
||||
export { createSqliteQueryExecutor, sqliteDatabasePathFromConnection } from './sqlite-query-executor.js';
|
||||
export { connectionTypeSchema, type ConnectionType } from './connection-type.js';
|
||||
export {
|
||||
localConnectionInfoFromConfig,
|
||||
localConnectionToWarehouseDescriptor,
|
||||
localConnectionTypeForConfig,
|
||||
type LocalConnectionInfo,
|
||||
type LocalWarehouseDescriptor,
|
||||
} from './local-warehouse-descriptor.js';
|
||||
export {
|
||||
KTX_NOTION_ORG_KNOWLEDGE_WARNING,
|
||||
notionConnectionToPullConfig,
|
||||
parseNotionConnectionConfig,
|
||||
redactNotionConnectionConfig,
|
||||
resolveNotionConnectionAuthToken,
|
||||
resolveNotionAuthToken,
|
||||
type KtxNotionConnectionConfig,
|
||||
type RedactedKtxNotionConnectionConfig,
|
||||
} from './notion-config.js';
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createDefaultLocalQueryExecutor } from './local-query-executor.js';
|
||||
|
||||
describe('createDefaultLocalQueryExecutor', () => {
|
||||
it('dispatches postgres and sqlite drivers to their executors', async () => {
|
||||
const postgres = {
|
||||
execute: vi.fn(async () => ({
|
||||
headers: ['pg'],
|
||||
rows: [[1]],
|
||||
totalRows: 1,
|
||||
command: 'SELECT',
|
||||
rowCount: 1,
|
||||
})),
|
||||
};
|
||||
const sqlite = {
|
||||
execute: vi.fn(async () => ({
|
||||
headers: ['sqlite'],
|
||||
rows: [[2]],
|
||||
totalRows: 1,
|
||||
command: 'SELECT',
|
||||
rowCount: 1,
|
||||
})),
|
||||
};
|
||||
const executor = createDefaultLocalQueryExecutor({ postgres, sqlite });
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'pg',
|
||||
connection: { driver: 'postgres' },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).resolves.toMatchObject({ headers: ['pg'] });
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'local',
|
||||
connection: { driver: 'sqlite' },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).resolves.toMatchObject({ headers: ['sqlite'] });
|
||||
|
||||
expect(postgres.execute).toHaveBeenCalledTimes(1);
|
||||
expect(sqlite.execute).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('rejects unsupported local execution drivers', async () => {
|
||||
const executor = createDefaultLocalQueryExecutor({
|
||||
postgres: { execute: vi.fn() },
|
||||
sqlite: { execute: vi.fn() },
|
||||
});
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'snowflake' },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).rejects.toThrow('No local query executor is configured for driver "snowflake".');
|
||||
});
|
||||
});
|
||||
34
packages/cli/src/context/connections/local-query-executor.ts
Normal file
34
packages/cli/src/context/connections/local-query-executor.ts
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import { createPostgresQueryExecutor } from './postgres-query-executor.js';
|
||||
import type {
|
||||
KtxSqlQueryExecutionInput,
|
||||
KtxSqlQueryExecutionResult,
|
||||
KtxSqlQueryExecutorPort,
|
||||
} from './query-executor.js';
|
||||
import { createSqliteQueryExecutor } from './sqlite-query-executor.js';
|
||||
|
||||
export interface DefaultLocalQueryExecutorOptions {
|
||||
postgres?: KtxSqlQueryExecutorPort;
|
||||
sqlite?: KtxSqlQueryExecutorPort;
|
||||
}
|
||||
|
||||
function driverFor(input: KtxSqlQueryExecutionInput): string {
|
||||
return String(input.connection?.driver ?? '').toLowerCase();
|
||||
}
|
||||
|
||||
export function createDefaultLocalQueryExecutor(options: DefaultLocalQueryExecutorOptions = {}): KtxSqlQueryExecutorPort {
|
||||
const postgres = options.postgres ?? createPostgresQueryExecutor();
|
||||
const sqlite = options.sqlite ?? createSqliteQueryExecutor();
|
||||
|
||||
return {
|
||||
async execute(input: KtxSqlQueryExecutionInput): Promise<KtxSqlQueryExecutionResult> {
|
||||
const driver = driverFor(input);
|
||||
if (driver === 'postgres' || driver === 'postgresql') {
|
||||
return postgres.execute(input);
|
||||
}
|
||||
if (driver === 'sqlite' || driver === 'sqlite3') {
|
||||
return sqlite.execute(input);
|
||||
}
|
||||
throw new Error(`No local query executor is configured for driver "${input.connection?.driver ?? 'unknown'}".`);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
localConnectionInfoFromConfig,
|
||||
localConnectionToWarehouseDescriptor,
|
||||
localConnectionTypeForConfig,
|
||||
} from './local-warehouse-descriptor.js';
|
||||
|
||||
describe('localConnectionToWarehouseDescriptor', () => {
|
||||
it('maps local Postgres URLs to canonical warehouse descriptors', () => {
|
||||
expect(
|
||||
localConnectionToWarehouseDescriptor('warehouse', {
|
||||
driver: 'postgres',
|
||||
url: 'postgresql://readonly@db.example.test/analytics',
|
||||
}),
|
||||
).toMatchObject({
|
||||
id: 'warehouse',
|
||||
connection_type: 'POSTGRESQL',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
});
|
||||
});
|
||||
|
||||
it('maps BigQuery project and dataset from explicit fields', () => {
|
||||
expect(
|
||||
localConnectionToWarehouseDescriptor('bq', {
|
||||
driver: 'bigquery',
|
||||
project_id: 'acme',
|
||||
dataset_id: 'warehouse',
|
||||
}),
|
||||
).toMatchObject({
|
||||
id: 'bq',
|
||||
connection_type: 'BIGQUERY',
|
||||
project_id: 'acme',
|
||||
dataset_id: 'warehouse',
|
||||
});
|
||||
});
|
||||
|
||||
it('returns null for non-warehouse adapters', () => {
|
||||
expect(
|
||||
localConnectionToWarehouseDescriptor('looker', {
|
||||
driver: 'looker',
|
||||
base_url: 'https://looker.example.com',
|
||||
client_id: 'client',
|
||||
}),
|
||||
).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('local connection info helpers', () => {
|
||||
it('returns canonical warehouse connection types for local catalogs', () => {
|
||||
expect(localConnectionTypeForConfig('warehouse', { driver: 'postgres' })).toBe('POSTGRESQL');
|
||||
expect(localConnectionTypeForConfig('bq', { driver: 'bigquery', project_id: 'acme' })).toBe('BIGQUERY');
|
||||
expect(localConnectionTypeForConfig('snowflake', { driver: 'snowflake' })).toBe('SNOWFLAKE');
|
||||
});
|
||||
|
||||
it('keeps non-warehouse adapter labels for display-only local connection surfaces', () => {
|
||||
expect(localConnectionTypeForConfig('prod-metabase', { driver: 'metabase', api_url: 'https://metabase.example.com' })).toBe(
|
||||
'metabase',
|
||||
);
|
||||
expect(localConnectionTypeForConfig('missing-driver', {} as never)).toBe('unknown');
|
||||
});
|
||||
|
||||
it('builds nullable local connection info records', () => {
|
||||
expect(localConnectionInfoFromConfig('warehouse', { driver: 'postgres' })).toEqual({
|
||||
id: 'warehouse',
|
||||
name: 'warehouse',
|
||||
connectionType: 'POSTGRESQL',
|
||||
});
|
||||
expect(localConnectionInfoFromConfig('missing', undefined)).toBeNull();
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
import type { KtxProjectConnectionConfig } from '../project/config.js';
|
||||
import type { ConnectionType } from './connection-type.js';
|
||||
|
||||
export interface LocalWarehouseDescriptor {
|
||||
id: string;
|
||||
connection_type: ConnectionType;
|
||||
host?: string | null;
|
||||
database?: string | null;
|
||||
account?: string | null;
|
||||
project_id?: string | null;
|
||||
dataset_id?: string | null;
|
||||
connection_params: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface LocalConnectionInfo {
|
||||
id: string;
|
||||
name: string;
|
||||
connectionType: string;
|
||||
}
|
||||
|
||||
const DRIVER_TO_CONNECTION_TYPE: Record<string, ConnectionType> = {
|
||||
postgres: 'POSTGRESQL',
|
||||
postgresql: 'POSTGRESQL',
|
||||
sqlite: 'SQLITE',
|
||||
sqlserver: 'SQLSERVER',
|
||||
mssql: 'SQLSERVER',
|
||||
mysql: 'MYSQL',
|
||||
clickhouse: 'CLICKHOUSE',
|
||||
snowflake: 'SNOWFLAKE',
|
||||
bigquery: 'BIGQUERY',
|
||||
};
|
||||
|
||||
export function localConnectionToWarehouseDescriptor(
|
||||
id: string,
|
||||
connection: KtxProjectConnectionConfig | undefined,
|
||||
): LocalWarehouseDescriptor | null {
|
||||
if (!connection) {
|
||||
return null;
|
||||
}
|
||||
const connectionType = DRIVER_TO_CONNECTION_TYPE[String(connection.driver ?? '').toLowerCase()];
|
||||
if (!connectionType) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const info: LocalWarehouseDescriptor = {
|
||||
id,
|
||||
connection_type: connectionType,
|
||||
connection_params: { ...connection },
|
||||
};
|
||||
const url = typeof connection.url === 'string' ? connection.url : null;
|
||||
if (url && !url.startsWith('env:') && !url.startsWith('file:')) {
|
||||
try {
|
||||
const parsed = new URL(url);
|
||||
info.host = parsed.hostname || null;
|
||||
if (parsed.pathname.length > 1) {
|
||||
const [first, second] = parsed.pathname.slice(1).split('/');
|
||||
if (connectionType === 'BIGQUERY') {
|
||||
info.project_id = stringField(connection.project_id) ?? parsed.hostname ?? first ?? null;
|
||||
info.dataset_id = stringField(connection.dataset_id) ?? second ?? null;
|
||||
} else {
|
||||
info.database = first ?? null;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
info.host = stringField(connection.host);
|
||||
}
|
||||
}
|
||||
|
||||
info.host = stringField(connection.host) ?? info.host ?? null;
|
||||
info.database = stringField(connection.database) ?? info.database ?? null;
|
||||
info.account = stringField(connection.account) ?? null;
|
||||
info.project_id = stringField(connection.project_id) ?? info.project_id ?? null;
|
||||
info.dataset_id = stringField(connection.dataset_id) ?? info.dataset_id ?? null;
|
||||
return info;
|
||||
}
|
||||
|
||||
export function localConnectionTypeForConfig(id: string, connection: KtxProjectConnectionConfig | undefined): string {
|
||||
const descriptor = localConnectionToWarehouseDescriptor(id, connection);
|
||||
if (descriptor) {
|
||||
return descriptor.connection_type;
|
||||
}
|
||||
const driver = typeof connection?.driver === 'string' ? connection.driver.trim() : '';
|
||||
return driver.length > 0 ? driver : 'unknown';
|
||||
}
|
||||
|
||||
export function localConnectionInfoFromConfig(
|
||||
id: string,
|
||||
connection: KtxProjectConnectionConfig | undefined,
|
||||
): LocalConnectionInfo | null {
|
||||
if (!connection) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
id,
|
||||
name: id,
|
||||
connectionType: localConnectionTypeForConfig(id, connection),
|
||||
};
|
||||
}
|
||||
|
||||
function stringField(value: unknown): string | null {
|
||||
return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null;
|
||||
}
|
||||
157
packages/cli/src/context/connections/notion-config.test.ts
Normal file
157
packages/cli/src/context/connections/notion-config.test.ts
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import {
|
||||
notionConnectionToPullConfig,
|
||||
parseNotionConnectionConfig,
|
||||
redactNotionConnectionConfig,
|
||||
resolveNotionAuthToken,
|
||||
} from './notion-config.js';
|
||||
|
||||
describe('standalone Notion connection config', () => {
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-notion-config-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('parses selected-root Notion config with safe defaults', () => {
|
||||
const parsed = parseNotionConnectionConfig({
|
||||
driver: 'notion',
|
||||
auth_token_ref: 'env:NOTION_TOKEN',
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: ['page-1'],
|
||||
});
|
||||
|
||||
expect(parsed).toEqual({
|
||||
driver: 'notion',
|
||||
auth_token: null,
|
||||
auth_token_ref: 'env:NOTION_TOKEN',
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: ['page-1'],
|
||||
root_database_ids: [],
|
||||
root_data_source_ids: [],
|
||||
max_pages_per_run: 1000,
|
||||
max_knowledge_creates_per_run: 25,
|
||||
max_knowledge_updates_per_run: 20,
|
||||
});
|
||||
expect(parsed).not.toHaveProperty('last_successful_cursor');
|
||||
});
|
||||
|
||||
it('parses inline Notion auth tokens without requiring auth_token_ref', () => {
|
||||
const parsed = parseNotionConnectionConfig({
|
||||
driver: 'notion',
|
||||
auth_token: ' ntn_inline_token ',
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: ['page-1'],
|
||||
});
|
||||
|
||||
expect(parsed).toMatchObject({
|
||||
driver: 'notion',
|
||||
auth_token: 'ntn_inline_token',
|
||||
auth_token_ref: null,
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: ['page-1'],
|
||||
});
|
||||
});
|
||||
|
||||
it('redacts token references from display output', () => {
|
||||
expect(
|
||||
redactNotionConnectionConfig(
|
||||
parseNotionConnectionConfig({
|
||||
driver: 'notion',
|
||||
auth_token_ref: 'file:/Users/example/.config/notion-token',
|
||||
crawl_mode: 'all_accessible',
|
||||
max_pages_per_run: 80,
|
||||
}),
|
||||
),
|
||||
).toEqual({
|
||||
driver: 'notion',
|
||||
hasAuthToken: true,
|
||||
crawlMode: 'all_accessible',
|
||||
rootPageIds: [],
|
||||
rootDatabaseIds: [],
|
||||
rootDataSourceIds: [],
|
||||
maxPagesPerRun: 80,
|
||||
maxKnowledgeCreatesPerRun: 25,
|
||||
maxKnowledgeUpdatesPerRun: 20,
|
||||
warning: 'Anything accessible to this Notion integration can become organization knowledge.',
|
||||
});
|
||||
});
|
||||
|
||||
it('requires at least one selected root in selected_roots mode', () => {
|
||||
expect(() =>
|
||||
parseNotionConnectionConfig({
|
||||
driver: 'notion',
|
||||
auth_token_ref: 'env:NOTION_TOKEN',
|
||||
crawl_mode: 'selected_roots',
|
||||
}),
|
||||
).toThrow('selected_roots requires at least one root page, database, or data source id');
|
||||
});
|
||||
|
||||
it('resolves env and file token references without exposing the reference in errors', async () => {
|
||||
const tokenPath = join(tempDir, 'notion-token.txt');
|
||||
await writeFile(tokenPath, 'ntn_file_token\n', 'utf-8');
|
||||
|
||||
await expect(
|
||||
resolveNotionAuthToken('env:NOTION_TOKEN', {
|
||||
env: { NOTION_TOKEN: 'ntn_env_token' },
|
||||
}),
|
||||
).resolves.toBe('ntn_env_token');
|
||||
await expect(resolveNotionAuthToken(`file:${tokenPath}`)).resolves.toBe('ntn_file_token');
|
||||
await expect(resolveNotionAuthToken('env:MISSING_NOTION_TOKEN', { env: {} })).rejects.toThrow(
|
||||
'Notion token environment variable MISSING_NOTION_TOKEN is not set',
|
||||
);
|
||||
});
|
||||
|
||||
it('converts standalone config into adapter pull config', async () => {
|
||||
const pullConfig = await notionConnectionToPullConfig(
|
||||
parseNotionConnectionConfig({
|
||||
driver: 'notion',
|
||||
auth_token_ref: 'env:NOTION_TOKEN',
|
||||
crawl_mode: 'all_accessible',
|
||||
max_pages_per_run: 12,
|
||||
max_knowledge_creates_per_run: 2,
|
||||
max_knowledge_updates_per_run: 7,
|
||||
last_successful_cursor: '{"phase":"all_accessible_pages","cursor":"cursor-1"}',
|
||||
}),
|
||||
{ env: { NOTION_TOKEN: 'ntn_env_token' } },
|
||||
);
|
||||
|
||||
expect(pullConfig).toEqual({
|
||||
authToken: 'ntn_env_token',
|
||||
crawlMode: 'all_accessible',
|
||||
rootPageIds: [],
|
||||
rootDatabaseIds: [],
|
||||
rootDataSourceIds: [],
|
||||
maxPagesPerRun: 12,
|
||||
maxKnowledgeCreatesPerRun: 2,
|
||||
maxKnowledgeUpdatesPerRun: 7,
|
||||
lastSuccessfulCursor: null,
|
||||
});
|
||||
});
|
||||
|
||||
it('uses inline Notion auth_token when building adapter pull config', async () => {
|
||||
const pullConfig = await notionConnectionToPullConfig(
|
||||
parseNotionConnectionConfig({
|
||||
driver: 'notion',
|
||||
auth_token: 'ntn_inline_token',
|
||||
auth_token_ref: 'env:STALE_NOTION_TOKEN',
|
||||
crawl_mode: 'all_accessible',
|
||||
}),
|
||||
{
|
||||
env: {},
|
||||
readTextFile: async () => {
|
||||
throw new Error('readTextFile should not be called for inline auth_token');
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
expect(pullConfig.authToken).toBe('ntn_inline_token');
|
||||
});
|
||||
});
|
||||
221
packages/cli/src/context/connections/notion-config.ts
Normal file
221
packages/cli/src/context/connections/notion-config.ts
Normal file
|
|
@ -0,0 +1,221 @@
|
|||
import { readFile } from 'node:fs/promises';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import {
|
||||
NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN,
|
||||
type NotionPullConfig,
|
||||
notionPullConfigSchema,
|
||||
} from '../ingest/adapters/notion/types.js';
|
||||
import type { KtxProjectConnectionConfig } from '../project/config.js';
|
||||
|
||||
export const KTX_NOTION_ORG_KNOWLEDGE_WARNING =
|
||||
'Anything accessible to this Notion integration can become organization knowledge.';
|
||||
|
||||
type KtxNotionCrawlMode = 'all_accessible' | 'selected_roots';
|
||||
|
||||
type RawKtxNotionConnectionConfig = Extract<KtxProjectConnectionConfig, { driver: 'notion' }>;
|
||||
|
||||
export type KtxNotionConnectionConfig = Omit<
|
||||
RawKtxNotionConnectionConfig,
|
||||
| 'auth_token'
|
||||
| 'auth_token_ref'
|
||||
| 'crawl_mode'
|
||||
| 'root_page_ids'
|
||||
| 'root_database_ids'
|
||||
| 'root_data_source_ids'
|
||||
| 'max_pages_per_run'
|
||||
| 'max_knowledge_creates_per_run'
|
||||
| 'max_knowledge_updates_per_run'
|
||||
> & {
|
||||
driver: 'notion';
|
||||
auth_token: string | null;
|
||||
auth_token_ref: string | null;
|
||||
crawl_mode: KtxNotionCrawlMode;
|
||||
root_page_ids: string[];
|
||||
root_database_ids: string[];
|
||||
root_data_source_ids: string[];
|
||||
max_pages_per_run: number;
|
||||
max_knowledge_creates_per_run: number;
|
||||
max_knowledge_updates_per_run: number;
|
||||
};
|
||||
|
||||
export interface RedactedKtxNotionConnectionConfig {
|
||||
driver: 'notion';
|
||||
hasAuthToken: boolean;
|
||||
crawlMode: KtxNotionCrawlMode;
|
||||
rootPageIds: string[];
|
||||
rootDatabaseIds: string[];
|
||||
rootDataSourceIds: string[];
|
||||
maxPagesPerRun: number;
|
||||
maxKnowledgeCreatesPerRun: number;
|
||||
maxKnowledgeUpdatesPerRun: number;
|
||||
warning: typeof KTX_NOTION_ORG_KNOWLEDGE_WARNING;
|
||||
}
|
||||
|
||||
interface ResolveNotionTokenOptions {
|
||||
env?: Record<string, string | undefined>;
|
||||
readTextFile?: (path: string) => Promise<string>;
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function record(value: unknown): Record<string, unknown> {
|
||||
if (!isRecord(value)) {
|
||||
throw new Error('Notion connection config must be an object');
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function stringValue(value: unknown, fallback: string): string {
|
||||
return typeof value === 'string' && value.trim().length > 0 ? value.trim() : fallback;
|
||||
}
|
||||
|
||||
function optionalString(value: unknown): string | null {
|
||||
return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null;
|
||||
}
|
||||
|
||||
function stringArray(value: unknown): string[] {
|
||||
if (!Array.isArray(value)) {
|
||||
return [];
|
||||
}
|
||||
return value.filter((item): item is string => typeof item === 'string' && item.trim().length > 0);
|
||||
}
|
||||
|
||||
function integerWithFallback(value: unknown, fallback: number, name: string): number {
|
||||
if (value === undefined || value === null) {
|
||||
return fallback;
|
||||
}
|
||||
if (typeof value !== 'number' || !Number.isInteger(value)) {
|
||||
throw new Error(`${name} must be an integer`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function boundedInteger(value: unknown, fallback: number, name: string, min: number, max: number): number {
|
||||
const parsed = integerWithFallback(value, fallback, name);
|
||||
if (parsed < min || parsed > max) {
|
||||
throw new Error(`${name} must be between ${min} and ${max}`);
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
export function parseNotionConnectionConfig(raw: unknown): KtxNotionConnectionConfig {
|
||||
const input = record(raw);
|
||||
if (input.driver !== 'notion') {
|
||||
throw new Error('Notion connection config requires driver: notion');
|
||||
}
|
||||
const authToken = optionalString(input.auth_token);
|
||||
const authTokenRef = optionalString(input.auth_token_ref);
|
||||
if (!authToken && !authTokenRef) {
|
||||
throw new Error('Notion connection config requires auth_token or auth_token_ref');
|
||||
}
|
||||
if (authTokenRef && !authTokenRef.startsWith('env:') && !authTokenRef.startsWith('file:')) {
|
||||
throw new Error('Notion auth_token_ref must use env:NAME or file:/path');
|
||||
}
|
||||
|
||||
const crawlMode = stringValue(input.crawl_mode, 'selected_roots');
|
||||
if (crawlMode !== 'selected_roots' && crawlMode !== 'all_accessible') {
|
||||
throw new Error(`Unsupported Notion crawl_mode: ${crawlMode}`);
|
||||
}
|
||||
const rootPageIds = stringArray(input.root_page_ids);
|
||||
const rootDatabaseIds = stringArray(input.root_database_ids);
|
||||
const rootDataSourceIds = stringArray(input.root_data_source_ids);
|
||||
if (crawlMode === 'selected_roots' && rootPageIds.length + rootDatabaseIds.length + rootDataSourceIds.length === 0) {
|
||||
throw new Error('selected_roots requires at least one root page, database, or data source id');
|
||||
}
|
||||
|
||||
return {
|
||||
driver: 'notion',
|
||||
auth_token: authToken,
|
||||
auth_token_ref: authTokenRef,
|
||||
crawl_mode: crawlMode,
|
||||
root_page_ids: rootPageIds,
|
||||
root_database_ids: rootDatabaseIds,
|
||||
root_data_source_ids: rootDataSourceIds,
|
||||
max_pages_per_run: boundedInteger(input.max_pages_per_run, 1000, 'max_pages_per_run', 1, 10_000),
|
||||
max_knowledge_creates_per_run: boundedInteger(
|
||||
input.max_knowledge_creates_per_run,
|
||||
NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN,
|
||||
'max_knowledge_creates_per_run',
|
||||
0,
|
||||
25,
|
||||
),
|
||||
max_knowledge_updates_per_run: boundedInteger(
|
||||
input.max_knowledge_updates_per_run,
|
||||
20,
|
||||
'max_knowledge_updates_per_run',
|
||||
0,
|
||||
100,
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
export function redactNotionConnectionConfig(config: KtxNotionConnectionConfig): RedactedKtxNotionConnectionConfig {
|
||||
return {
|
||||
driver: 'notion',
|
||||
hasAuthToken: Boolean(config.auth_token ?? config.auth_token_ref),
|
||||
crawlMode: config.crawl_mode,
|
||||
rootPageIds: config.root_page_ids,
|
||||
rootDatabaseIds: config.root_database_ids,
|
||||
rootDataSourceIds: config.root_data_source_ids,
|
||||
maxPagesPerRun: config.max_pages_per_run,
|
||||
maxKnowledgeCreatesPerRun: config.max_knowledge_creates_per_run,
|
||||
maxKnowledgeUpdatesPerRun: config.max_knowledge_updates_per_run,
|
||||
warning: KTX_NOTION_ORG_KNOWLEDGE_WARNING,
|
||||
};
|
||||
}
|
||||
|
||||
function expandHome(path: string): string {
|
||||
return path === '~' || path.startsWith('~/') ? resolve(homedir(), path.slice(2)) : path;
|
||||
}
|
||||
|
||||
export async function resolveNotionAuthToken(
|
||||
authTokenRef: string,
|
||||
options: ResolveNotionTokenOptions = {},
|
||||
): Promise<string> {
|
||||
if (authTokenRef.startsWith('env:')) {
|
||||
const envName = authTokenRef.slice('env:'.length);
|
||||
const value = (options.env ?? process.env)[envName];
|
||||
if (!value) {
|
||||
throw new Error(`Notion token environment variable ${envName} is not set`);
|
||||
}
|
||||
return value.trim();
|
||||
}
|
||||
if (authTokenRef.startsWith('file:')) {
|
||||
const path = expandHome(authTokenRef.slice('file:'.length));
|
||||
const readTextFile = options.readTextFile ?? ((filePath: string) => readFile(filePath, 'utf-8'));
|
||||
const value = (await readTextFile(path)).trim();
|
||||
if (!value) {
|
||||
throw new Error(`Notion token file is empty: ${path}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
throw new Error('Notion auth_token_ref must use env:NAME or file:/path');
|
||||
}
|
||||
|
||||
export async function resolveNotionConnectionAuthToken(
|
||||
config: Pick<KtxNotionConnectionConfig, 'auth_token' | 'auth_token_ref'>,
|
||||
options: ResolveNotionTokenOptions = {},
|
||||
): Promise<string> {
|
||||
return config.auth_token ?? (await resolveNotionAuthToken(config.auth_token_ref ?? '', options));
|
||||
}
|
||||
|
||||
export async function notionConnectionToPullConfig(
|
||||
config: KtxNotionConnectionConfig,
|
||||
options: ResolveNotionTokenOptions = {},
|
||||
): Promise<NotionPullConfig> {
|
||||
const authToken = await resolveNotionConnectionAuthToken(config, options);
|
||||
return notionPullConfigSchema.parse({
|
||||
authToken,
|
||||
crawlMode: config.crawl_mode,
|
||||
rootPageIds: config.root_page_ids,
|
||||
rootDatabaseIds: config.root_database_ids,
|
||||
rootDataSourceIds: config.root_data_source_ids,
|
||||
maxPagesPerRun: config.max_pages_per_run,
|
||||
maxKnowledgeCreatesPerRun: config.max_knowledge_creates_per_run,
|
||||
maxKnowledgeUpdatesPerRun: config.max_knowledge_updates_per_run,
|
||||
lastSuccessfulCursor: null,
|
||||
});
|
||||
}
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createPostgresQueryExecutor } from './postgres-query-executor.js';
|
||||
|
||||
function makeClient() {
|
||||
const calls: unknown[] = [];
|
||||
const client = {
|
||||
connect: vi.fn(async () => undefined),
|
||||
query: vi.fn(async (input: unknown) => {
|
||||
calls.push(input);
|
||||
if (input === 'BEGIN READ ONLY') {
|
||||
return { rows: [], fields: [], rowCount: null, command: 'BEGIN' };
|
||||
}
|
||||
if (input === 'COMMIT') {
|
||||
return { rows: [], fields: [], rowCount: null, command: 'COMMIT' };
|
||||
}
|
||||
return {
|
||||
rows: [
|
||||
['paid', 2],
|
||||
['open', 1],
|
||||
],
|
||||
fields: [{ name: 'status' }, { name: 'order_count' }],
|
||||
rowCount: 2,
|
||||
command: 'SELECT',
|
||||
};
|
||||
}),
|
||||
end: vi.fn(async () => undefined),
|
||||
};
|
||||
return { client, calls };
|
||||
}
|
||||
|
||||
describe('createPostgresQueryExecutor', () => {
|
||||
it('runs a read-only transaction in array row mode and closes the client', async () => {
|
||||
const { client, calls } = makeClient();
|
||||
const executor = createPostgresQueryExecutor({
|
||||
clientFactory: vi.fn(() => client),
|
||||
});
|
||||
|
||||
const result = await executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'postgres', url: 'postgres://example/db' },
|
||||
sql: 'select status, count(*) as order_count from public.orders group by status',
|
||||
maxRows: 50,
|
||||
});
|
||||
|
||||
expect(client.connect).toHaveBeenCalledTimes(1);
|
||||
expect(calls[0]).toBe('BEGIN READ ONLY');
|
||||
expect(calls[1]).toEqual({
|
||||
text: 'select * from (select status, count(*) as order_count from public.orders group by status) as ktx_query_result limit 50',
|
||||
rowMode: 'array',
|
||||
});
|
||||
expect(calls[2]).toBe('COMMIT');
|
||||
expect(client.end).toHaveBeenCalledTimes(1);
|
||||
expect(result).toEqual({
|
||||
headers: ['status', 'order_count'],
|
||||
rows: [
|
||||
['paid', 2],
|
||||
['open', 1],
|
||||
],
|
||||
totalRows: 2,
|
||||
command: 'SELECT',
|
||||
rowCount: 2,
|
||||
});
|
||||
});
|
||||
|
||||
it('rolls back and closes the client when query execution fails', async () => {
|
||||
const client = {
|
||||
connect: vi.fn(async () => undefined),
|
||||
query: vi.fn(async (input: unknown) => {
|
||||
if (input === 'BEGIN READ ONLY' || input === 'ROLLBACK') {
|
||||
return { rows: [], fields: [], rowCount: null, command: String(input) };
|
||||
}
|
||||
throw new Error('syntax error');
|
||||
}),
|
||||
end: vi.fn(async () => undefined),
|
||||
};
|
||||
const executor = createPostgresQueryExecutor({
|
||||
clientFactory: vi.fn(() => client),
|
||||
});
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'postgres', url: 'postgres://example/db' },
|
||||
sql: 'select * from broken',
|
||||
maxRows: 10,
|
||||
}),
|
||||
).rejects.toThrow('syntax error');
|
||||
expect(client.query).toHaveBeenCalledWith('ROLLBACK');
|
||||
expect(client.end).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('requires a Postgres url', async () => {
|
||||
const executor = createPostgresQueryExecutor({ clientFactory: vi.fn() });
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'postgres' },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).rejects.toThrow('Local Postgres execution requires connections.warehouse.url');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
import { Client, type ClientConfig } from 'pg';
|
||||
import type {
|
||||
KtxSqlQueryExecutionInput,
|
||||
KtxSqlQueryExecutionResult,
|
||||
KtxSqlQueryExecutorPort,
|
||||
} from './query-executor.js';
|
||||
import { limitSqlForExecution } from './read-only-sql.js';
|
||||
|
||||
interface PgClientLike {
|
||||
connect(): Promise<unknown>;
|
||||
query(input: string | { text: string; rowMode: 'array' }): Promise<{
|
||||
fields: Array<{ name: string }>;
|
||||
rows: unknown[][];
|
||||
command: string;
|
||||
rowCount: number | null;
|
||||
}>;
|
||||
end(): Promise<void>;
|
||||
}
|
||||
|
||||
interface PostgresQueryExecutorOptions {
|
||||
statementTimeoutMs?: number;
|
||||
queryTimeoutMs?: number;
|
||||
connectionTimeoutMs?: number;
|
||||
clientFactory?: (config: ClientConfig) => PgClientLike;
|
||||
}
|
||||
|
||||
function connectionDriver(input: KtxSqlQueryExecutionInput): string {
|
||||
return String(input.connection?.driver ?? '').toLowerCase();
|
||||
}
|
||||
|
||||
function createDefaultClient(config: ClientConfig): PgClientLike {
|
||||
return new Client(config);
|
||||
}
|
||||
|
||||
export function createPostgresQueryExecutor(options: PostgresQueryExecutorOptions = {}): KtxSqlQueryExecutorPort {
|
||||
const clientFactory = options.clientFactory ?? createDefaultClient;
|
||||
return {
|
||||
async execute(input: KtxSqlQueryExecutionInput): Promise<KtxSqlQueryExecutionResult> {
|
||||
const driver = connectionDriver(input);
|
||||
const connection = input.connection;
|
||||
if (driver !== 'postgres' && driver !== 'postgresql') {
|
||||
throw new Error(`Local Postgres execution cannot run driver "${connection?.driver ?? 'unknown'}".`);
|
||||
}
|
||||
if (typeof connection?.url !== 'string' || connection.url.trim().length === 0) {
|
||||
throw new Error(`Local Postgres execution requires connections.${input.connectionId}.url.`);
|
||||
}
|
||||
|
||||
const client = clientFactory({
|
||||
connectionString: connection.url,
|
||||
statement_timeout: options.statementTimeoutMs ?? 30_000,
|
||||
query_timeout: options.queryTimeoutMs ?? 35_000,
|
||||
connectionTimeoutMillis: options.connectionTimeoutMs ?? 5_000,
|
||||
application_name: 'ktx-local-query',
|
||||
});
|
||||
await client.connect();
|
||||
try {
|
||||
await client.query('BEGIN READ ONLY');
|
||||
const result = await client.query({
|
||||
text: limitSqlForExecution(input.sql, input.maxRows),
|
||||
rowMode: 'array',
|
||||
});
|
||||
await client.query('COMMIT');
|
||||
return {
|
||||
headers: result.fields.map((field) => field.name),
|
||||
rows: result.rows,
|
||||
totalRows: result.rows.length,
|
||||
command: result.command,
|
||||
rowCount: result.rowCount,
|
||||
};
|
||||
} catch (error) {
|
||||
await client.query('ROLLBACK').catch(() => undefined);
|
||||
throw error;
|
||||
} finally {
|
||||
await client.end();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
25
packages/cli/src/context/connections/query-executor.ts
Normal file
25
packages/cli/src/context/connections/query-executor.ts
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
import type { KtxProjectConnectionConfig } from '../project/index.js';
|
||||
|
||||
export interface KtxSqlQueryExecutionInput {
|
||||
connectionId: string;
|
||||
projectDir?: string;
|
||||
connection: KtxProjectConnectionConfig | undefined;
|
||||
sql: string;
|
||||
maxRows?: number;
|
||||
}
|
||||
|
||||
export interface KtxSqlQueryExecutionResult {
|
||||
headers: string[];
|
||||
rows: unknown[][];
|
||||
totalRows: number;
|
||||
command: string;
|
||||
rowCount: number | null;
|
||||
}
|
||||
|
||||
export interface KtxSqlQueryExecutorPort {
|
||||
execute(input: KtxSqlQueryExecutionInput): Promise<KtxSqlQueryExecutionResult>;
|
||||
}
|
||||
|
||||
export function normalizeQueryRows(rows: unknown[]): unknown[][] {
|
||||
return rows.map((row) => (Array.isArray(row) ? row : Object.values(row as Record<string, unknown>)));
|
||||
}
|
||||
30
packages/cli/src/context/connections/read-only-sql.test.ts
Normal file
30
packages/cli/src/context/connections/read-only-sql.test.ts
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from './read-only-sql.js';
|
||||
|
||||
describe('assertReadOnlySql', () => {
|
||||
it('allows select and with queries', () => {
|
||||
expect(assertReadOnlySql('select * from orders')).toBe('select * from orders');
|
||||
expect(assertReadOnlySql('with paid as (select * from orders) select * from paid')).toContain('with paid');
|
||||
});
|
||||
|
||||
it('rejects mutating statements before opening a database connection', () => {
|
||||
expect(() => assertReadOnlySql('delete from orders')).toThrow(
|
||||
'Only read-only SELECT/WITH queries can be executed locally',
|
||||
);
|
||||
expect(() => assertReadOnlySql('create table x(id int)')).toThrow(
|
||||
'Only read-only SELECT/WITH queries can be executed locally',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('limitSqlForExecution', () => {
|
||||
it('wraps compiled SQL and strips trailing semicolons', () => {
|
||||
expect(limitSqlForExecution('select * from public.orders; ', 25)).toBe(
|
||||
'select * from (select * from public.orders) as ktx_query_result limit 25',
|
||||
);
|
||||
});
|
||||
|
||||
it('returns the trimmed SQL when no maxRows value is provided', () => {
|
||||
expect(limitSqlForExecution('select * from orders; ', undefined)).toBe('select * from orders');
|
||||
});
|
||||
});
|
||||
22
packages/cli/src/context/connections/read-only-sql.ts
Normal file
22
packages/cli/src/context/connections/read-only-sql.ts
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
const MUTATING_SQL =
|
||||
/^\s*(insert|update|delete|merge|alter|drop|create|truncate|grant|revoke|copy|call|do|vacuum|analyze|refresh)\b/i;
|
||||
const READ_SQL = /^\s*(select|with)\b/i;
|
||||
|
||||
export function assertReadOnlySql(sql: string): string {
|
||||
const trimmed = sql.trim();
|
||||
if (!READ_SQL.test(trimmed) || MUTATING_SQL.test(trimmed)) {
|
||||
throw new Error('Only read-only SELECT/WITH queries can be executed locally.');
|
||||
}
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
export function limitSqlForExecution(sql: string, maxRows: number | undefined): string {
|
||||
const trimmed = assertReadOnlySql(sql).replace(/;+\s*$/, '');
|
||||
if (!maxRows) {
|
||||
return trimmed;
|
||||
}
|
||||
if (!Number.isInteger(maxRows) || maxRows <= 0) {
|
||||
throw new Error('maxRows must be a positive integer.');
|
||||
}
|
||||
return `select * from (${trimmed}) as ktx_query_result limit ${maxRows}`;
|
||||
}
|
||||
|
|
@ -0,0 +1,139 @@
|
|||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { writeFileSync } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import Database from 'better-sqlite3';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { createSqliteQueryExecutor, sqliteDatabasePathFromConnection } from './sqlite-query-executor.js';
|
||||
|
||||
describe('createSqliteQueryExecutor', () => {
|
||||
let tempDir: string;
|
||||
let dbPath: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-sqlite-query-'));
|
||||
dbPath = join(tempDir, 'warehouse.db');
|
||||
const db = new Database(dbPath);
|
||||
db.exec(`
|
||||
CREATE TABLE orders (
|
||||
id INTEGER PRIMARY KEY,
|
||||
status TEXT NOT NULL,
|
||||
amount INTEGER NOT NULL
|
||||
);
|
||||
INSERT INTO orders (status, amount) VALUES
|
||||
('paid', 20),
|
||||
('paid', 30),
|
||||
('open', 10);
|
||||
`);
|
||||
db.close();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('executes read-only SELECT SQL against a relative SQLite path', async () => {
|
||||
const executor = createSqliteQueryExecutor();
|
||||
|
||||
const result = await executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', path: 'warehouse.db' },
|
||||
sql: 'select status, count(*) as order_count from orders group by status order by status',
|
||||
maxRows: 10,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
headers: ['status', 'order_count'],
|
||||
rows: [
|
||||
['open', 1],
|
||||
['paid', 2],
|
||||
],
|
||||
totalRows: 2,
|
||||
command: 'SELECT',
|
||||
rowCount: 2,
|
||||
});
|
||||
});
|
||||
|
||||
it('supports file urls for SQLite database paths', async () => {
|
||||
expect(
|
||||
sqliteDatabasePathFromConnection({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', url: `file://${dbPath}` },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
});
|
||||
|
||||
it('resolves file references for SQLite path fields', async () => {
|
||||
const pointerPath = join(tempDir, 'sqlite-path.txt');
|
||||
writeFileSync(pointerPath, dbPath, 'utf-8');
|
||||
|
||||
expect(
|
||||
sqliteDatabasePathFromConnection({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', path: `file:${pointerPath}` },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
});
|
||||
|
||||
it('resolves env references for SQLite database urls', async () => {
|
||||
const originalDatabaseUrl = process.env.KTX_SQLITE_TEST_URL;
|
||||
process.env.KTX_SQLITE_TEST_URL = `sqlite:${dbPath}`;
|
||||
|
||||
try {
|
||||
expect(
|
||||
sqliteDatabasePathFromConnection({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', url: 'env:KTX_SQLITE_TEST_URL' },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
} finally {
|
||||
if (originalDatabaseUrl === undefined) {
|
||||
delete process.env.KTX_SQLITE_TEST_URL;
|
||||
} else {
|
||||
process.env.KTX_SQLITE_TEST_URL = originalDatabaseUrl;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects mutating SQL before opening the database', async () => {
|
||||
const executor = createSqliteQueryExecutor();
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', path: 'warehouse.db' },
|
||||
sql: 'delete from orders',
|
||||
}),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
});
|
||||
|
||||
it('requires a SQLite driver and a database path', async () => {
|
||||
const executor = createSqliteQueryExecutor();
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'postgres', path: 'warehouse.db' },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).rejects.toThrow('Local SQLite execution cannot run driver "postgres"');
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite' },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).rejects.toThrow('Local SQLite execution requires connections.warehouse.path or connections.warehouse.url');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
import { isAbsolute, resolve } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import Database from 'better-sqlite3';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import type {
|
||||
KtxSqlQueryExecutionInput,
|
||||
KtxSqlQueryExecutionResult,
|
||||
KtxSqlQueryExecutorPort,
|
||||
} from './query-executor.js';
|
||||
import { normalizeQueryRows } from './query-executor.js';
|
||||
import { limitSqlForExecution } from './read-only-sql.js';
|
||||
|
||||
type SqliteConnectionConfig = Record<string, unknown> | undefined;
|
||||
|
||||
function connectionDriver(input: KtxSqlQueryExecutionInput): string {
|
||||
return String(input.connection?.driver ?? '').toLowerCase();
|
||||
}
|
||||
|
||||
function stringConfigValue(connection: SqliteConnectionConfig, key: string): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(key, value.trim()) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(key: string, value: string): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return process.env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (key !== 'url' && value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function sqlitePathFromUrl(url: string): string {
|
||||
if (url.startsWith('file:')) {
|
||||
return fileURLToPath(url);
|
||||
}
|
||||
|
||||
if (url.startsWith('sqlite:')) {
|
||||
const parsed = new URL(url);
|
||||
if (parsed.pathname.length > 0) {
|
||||
return decodeURIComponent(parsed.pathname);
|
||||
}
|
||||
}
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
export function sqliteDatabasePathFromConnection(input: KtxSqlQueryExecutionInput): string {
|
||||
const driver = connectionDriver(input);
|
||||
if (driver !== 'sqlite' && driver !== 'sqlite3') {
|
||||
throw new Error(`Local SQLite execution cannot run driver "${input.connection?.driver ?? 'unknown'}".`);
|
||||
}
|
||||
|
||||
const pathValue = stringConfigValue(input.connection, 'path');
|
||||
const urlValue = stringConfigValue(input.connection, 'url');
|
||||
if (!pathValue && !urlValue) {
|
||||
throw new Error(
|
||||
`Local SQLite execution requires connections.${input.connectionId}.path or connections.${input.connectionId}.url.`,
|
||||
);
|
||||
}
|
||||
|
||||
const candidate = pathValue ?? sqlitePathFromUrl(urlValue as string);
|
||||
return isAbsolute(candidate) ? candidate : resolve(input.projectDir ?? process.cwd(), candidate);
|
||||
}
|
||||
|
||||
export function createSqliteQueryExecutor(): KtxSqlQueryExecutorPort {
|
||||
return {
|
||||
async execute(input: KtxSqlQueryExecutionInput): Promise<KtxSqlQueryExecutionResult> {
|
||||
const sql = limitSqlForExecution(input.sql, input.maxRows);
|
||||
const dbPath = sqliteDatabasePathFromConnection(input);
|
||||
const db = new Database(dbPath, { readonly: true, fileMustExist: true });
|
||||
try {
|
||||
const statement = db.prepare(sql);
|
||||
const rows = statement.all() as unknown[];
|
||||
return {
|
||||
headers: statement.columns().map((column) => column.name),
|
||||
rows: normalizeQueryRows(rows),
|
||||
totalRows: rows.length,
|
||||
command: 'SELECT',
|
||||
rowCount: rows.length,
|
||||
};
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
34
packages/cli/src/context/core/config-reference.test.ts
Normal file
34
packages/cli/src/context/core/config-reference.test.ts
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { resolveKtxConfigReference, resolveKtxHomePath } from './config-reference.js';
|
||||
|
||||
describe('KTX config references', () => {
|
||||
it('resolves env references without returning empty values', () => {
|
||||
expect(resolveKtxConfigReference('env:AI_GATEWAY_API_KEY', { AI_GATEWAY_API_KEY: ' gateway-key ' })).toBe(
|
||||
'gateway-key',
|
||||
);
|
||||
expect(resolveKtxConfigReference('env:AI_GATEWAY_API_KEY', { AI_GATEWAY_API_KEY: ' ' })).toBeUndefined();
|
||||
expect(resolveKtxConfigReference('env:AI_GATEWAY_API_KEY', {})).toBeUndefined();
|
||||
});
|
||||
|
||||
it('resolves file references and trims file content', async () => {
|
||||
const dir = join(tmpdir(), `ktx-config-reference-${process.pid}`);
|
||||
await mkdir(dir, { recursive: true });
|
||||
const keyPath = join(dir, 'gateway-key.txt');
|
||||
await writeFile(keyPath, 'file-gateway-key\n', 'utf8');
|
||||
|
||||
expect(resolveKtxConfigReference(`file:${keyPath}`, {})).toBe('file-gateway-key');
|
||||
});
|
||||
|
||||
it('returns literal values unchanged after trimming blank-only values', () => {
|
||||
expect(resolveKtxConfigReference('provider/model', {})).toBe('provider/model');
|
||||
expect(resolveKtxConfigReference(' ', {})).toBeUndefined();
|
||||
expect(resolveKtxConfigReference(undefined, {})).toBeUndefined();
|
||||
});
|
||||
|
||||
it('resolves home-prefixed paths', () => {
|
||||
expect(resolveKtxHomePath('~/ktx/key.txt')).toContain('/ktx/key.txt');
|
||||
});
|
||||
});
|
||||
36
packages/cli/src/context/core/config-reference.ts
Normal file
36
packages/cli/src/context/core/config-reference.ts
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
|
||||
export function resolveKtxHomePath(path: string): string {
|
||||
if (path === '~') {
|
||||
return homedir();
|
||||
}
|
||||
|
||||
if (path.startsWith('~/')) {
|
||||
return resolve(homedir(), path.slice(2));
|
||||
}
|
||||
|
||||
return resolve(path);
|
||||
}
|
||||
|
||||
export function resolveKtxConfigReference(value: string | undefined, env: NodeJS.ProcessEnv): string | undefined {
|
||||
if (!value) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (value.startsWith('env:')) {
|
||||
const envName = value.slice('env:'.length).trim();
|
||||
const envValue = env[envName];
|
||||
return envValue && envValue.trim().length > 0 ? envValue.trim() : undefined;
|
||||
}
|
||||
|
||||
if (value.startsWith('file:')) {
|
||||
const filePath = resolveKtxHomePath(value.slice('file:'.length).trim());
|
||||
const fileValue = readFileSync(filePath, 'utf8').trim();
|
||||
return fileValue.length > 0 ? fileValue : undefined;
|
||||
}
|
||||
|
||||
const trimmed = value.trim();
|
||||
return trimmed.length > 0 ? trimmed : undefined;
|
||||
}
|
||||
42
packages/cli/src/context/core/config.ts
Normal file
42
packages/cli/src/context/core/config.ts
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
export interface KtxStorageConfig {
|
||||
configDir?: string;
|
||||
homeDir?: string;
|
||||
worktreesDir?: string;
|
||||
}
|
||||
|
||||
export interface KtxGitConfig {
|
||||
userName: string;
|
||||
userEmail: string;
|
||||
bootstrapMessage?: string;
|
||||
bootstrapAuthor?: string;
|
||||
bootstrapAuthorEmail?: string;
|
||||
}
|
||||
|
||||
export interface KtxCoreConfig {
|
||||
storage: KtxStorageConfig;
|
||||
git: KtxGitConfig;
|
||||
}
|
||||
|
||||
export interface KtxLogger {
|
||||
debug(message: string): void;
|
||||
log(message: string): void;
|
||||
warn(message: string): void;
|
||||
error(message: string, error?: unknown): void;
|
||||
}
|
||||
|
||||
export const noopLogger: KtxLogger = {
|
||||
debug: () => undefined,
|
||||
log: () => undefined,
|
||||
warn: () => undefined,
|
||||
error: () => undefined,
|
||||
};
|
||||
|
||||
export function resolveConfigDir(config: KtxCoreConfig): string {
|
||||
const homeDir = config.storage.homeDir ?? '/tmp';
|
||||
return config.storage.configDir ?? `${homeDir}/ktx/config`;
|
||||
}
|
||||
|
||||
export function resolveWorktreesDir(config: KtxCoreConfig): string {
|
||||
const homeDir = config.storage.homeDir ?? '/tmp';
|
||||
return config.storage.worktreesDir ?? `${homeDir}/.worktrees`;
|
||||
}
|
||||
5
packages/cli/src/context/core/embedding.ts
Normal file
5
packages/cli/src/context/core/embedding.ts
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
export interface KtxEmbeddingPort {
|
||||
maxBatchSize: number;
|
||||
computeEmbedding(text: string): Promise<number[]>;
|
||||
computeEmbeddingsBulk(texts: string[]): Promise<number[][]>;
|
||||
}
|
||||
43
packages/cli/src/context/core/file-store.ts
Normal file
43
packages/cli/src/context/core/file-store.ts
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
export interface KtxFileWriteResult {
|
||||
commitHash?: string | null;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxFileReadResult {
|
||||
content: string;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxFileListResult {
|
||||
files: string[];
|
||||
}
|
||||
|
||||
export interface KtxFileHistoryEntry {
|
||||
sha?: string;
|
||||
message?: string;
|
||||
author?: string;
|
||||
date?: string | Date;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxFileStorePort<TSelf = unknown> {
|
||||
writeFile(
|
||||
path: string,
|
||||
content: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
commitMessage: string,
|
||||
options?: { skipLock?: boolean },
|
||||
): Promise<KtxFileWriteResult>;
|
||||
readFile(path: string): Promise<KtxFileReadResult>;
|
||||
deleteFile(
|
||||
path: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
commitMessage: string,
|
||||
options?: { skipLock?: boolean },
|
||||
): Promise<KtxFileWriteResult | null>;
|
||||
listFiles(path: string, recursive?: boolean): Promise<KtxFileListResult>;
|
||||
getFileHistory(path: string): Promise<KtxFileHistoryEntry[] | unknown>;
|
||||
forWorktree(workdir: string): TSelf;
|
||||
}
|
||||
29
packages/cli/src/context/core/git-env.ts
Normal file
29
packages/cli/src/context/core/git-env.ts
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
import { simpleGit, type SimpleGit } from 'simple-git';
|
||||
|
||||
const GIT_HOOK_ENV_KEYS = [
|
||||
'GIT_ALTERNATE_OBJECT_DIRECTORIES',
|
||||
'GIT_DIR',
|
||||
'GIT_INDEX_FILE',
|
||||
'GIT_OBJECT_DIRECTORY',
|
||||
'GIT_PREFIX',
|
||||
'GIT_QUARANTINE_PATH',
|
||||
'GIT_WORK_TREE',
|
||||
'GIT_EDITOR',
|
||||
'GIT_EXEC_PATH',
|
||||
'GIT_PAGER',
|
||||
'PAGER',
|
||||
'VISUAL',
|
||||
'EDITOR',
|
||||
] as const;
|
||||
|
||||
function sanitizedGitEnv(env: NodeJS.ProcessEnv = process.env): NodeJS.ProcessEnv {
|
||||
const sanitized = { ...env };
|
||||
for (const key of GIT_HOOK_ENV_KEYS) {
|
||||
delete sanitized[key];
|
||||
}
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
export function createSimpleGit(baseDir: string): SimpleGit {
|
||||
return simpleGit({ baseDir, unsafe: { allowUnsafeAskPass: true } }).env(sanitizedGitEnv());
|
||||
}
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import type { SimpleGit } from 'simple-git';
|
||||
import type { KtxCoreConfig } from './config.js';
|
||||
import { createSimpleGit } from './git-env.js';
|
||||
import { GitService } from './git.service.js';
|
||||
|
||||
describe('GitService.assertWorktreeClean', () => {
|
||||
let workdir: string;
|
||||
let git: SimpleGit;
|
||||
let gitService: GitService;
|
||||
|
||||
beforeEach(async () => {
|
||||
workdir = await mkdtemp(join(tmpdir(), 'gitsvc-clean-'));
|
||||
git = createSimpleGit(workdir);
|
||||
await git.init();
|
||||
await git.addConfig('user.email', 't@test');
|
||||
await git.addConfig('user.name', 'Test');
|
||||
await writeFile(join(workdir, 'init'), 'init');
|
||||
await git.add('.');
|
||||
await git.commit('init');
|
||||
const coreConfig: KtxCoreConfig = {
|
||||
storage: { configDir: workdir, homeDir: workdir },
|
||||
git: { userName: 'Test', userEmail: 't@test' },
|
||||
};
|
||||
gitService = new GitService(coreConfig);
|
||||
(gitService as any).git = git;
|
||||
(gitService as any).configDir = workdir;
|
||||
});
|
||||
|
||||
afterEach(async () => rm(workdir, { recursive: true, force: true }));
|
||||
|
||||
it('does not throw on a clean worktree', async () => {
|
||||
await expect(gitService.assertWorktreeClean()).resolves.toBeUndefined();
|
||||
});
|
||||
|
||||
it('throws when MERGE_HEAD exists', async () => {
|
||||
await writeFile(join(workdir, '.git', 'MERGE_HEAD'), 'deadbeef\n');
|
||||
await expect(gitService.assertWorktreeClean()).rejects.toThrow(/MERGE_HEAD/);
|
||||
});
|
||||
|
||||
it('throws when CHERRY_PICK_HEAD exists', async () => {
|
||||
await writeFile(join(workdir, '.git', 'CHERRY_PICK_HEAD'), 'deadbeef\n');
|
||||
await expect(gitService.assertWorktreeClean()).rejects.toThrow(/CHERRY_PICK_HEAD/);
|
||||
});
|
||||
|
||||
it('throws when REVERT_HEAD exists', async () => {
|
||||
await writeFile(join(workdir, '.git', 'REVERT_HEAD'), 'deadbeef\n');
|
||||
await expect(gitService.assertWorktreeClean()).rejects.toThrow(/REVERT_HEAD/);
|
||||
});
|
||||
|
||||
it('throws when sequencer/todo exists (interrupted multi-commit revert/cherry-pick)', async () => {
|
||||
await mkdir(join(workdir, '.git', 'sequencer'), { recursive: true });
|
||||
await writeFile(join(workdir, '.git', 'sequencer', 'todo'), 'pick deadbeef foo\n');
|
||||
await expect(gitService.assertWorktreeClean()).rejects.toThrow(/sequencer/);
|
||||
});
|
||||
|
||||
it('throws when the index has unmerged paths', async () => {
|
||||
await git.checkoutLocalBranch('a');
|
||||
await writeFile(join(workdir, 'shared'), 'A version');
|
||||
await git.add('.');
|
||||
await git.commit('a');
|
||||
await git.checkout('master').catch(() => git.checkout('main'));
|
||||
await git.checkoutLocalBranch('b');
|
||||
await writeFile(join(workdir, 'shared'), 'B version');
|
||||
await git.add('.');
|
||||
await git.commit('b');
|
||||
|
||||
await git.raw(['merge', 'a']).catch(() => undefined);
|
||||
|
||||
await expect(gitService.assertWorktreeClean()).rejects.toThrow();
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { mkdir, mkdtemp, readdir, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import type { SimpleGit } from 'simple-git';
|
||||
import type { KtxCoreConfig } from './config.js';
|
||||
import { createSimpleGit } from './git-env.js';
|
||||
import { GitService } from './git.service.js';
|
||||
|
||||
describe('GitService.deleteDirectories', () => {
|
||||
let workdir: string;
|
||||
let git: SimpleGit;
|
||||
let gitService: GitService;
|
||||
|
||||
beforeEach(async () => {
|
||||
workdir = await mkdtemp(join(tmpdir(), 'gitsvc-dd-'));
|
||||
git = createSimpleGit(workdir);
|
||||
await git.init();
|
||||
await git.addConfig('user.email', 't@test');
|
||||
await git.addConfig('user.name', 'Test');
|
||||
await writeFile(join(workdir, 'keep'), 'k');
|
||||
await git.add('.');
|
||||
await git.commit('init');
|
||||
|
||||
const coreConfig: KtxCoreConfig = {
|
||||
storage: { configDir: workdir, homeDir: workdir },
|
||||
git: { userName: 'Test', userEmail: 't@test' },
|
||||
};
|
||||
gitService = new GitService(coreConfig);
|
||||
(gitService as any).git = git;
|
||||
(gitService as any).configDir = workdir;
|
||||
});
|
||||
|
||||
afterEach(async () => rm(workdir, { recursive: true, force: true }));
|
||||
|
||||
it('removes multiple directories in a single commit', async () => {
|
||||
for (const name of ['a', 'b', 'c']) {
|
||||
await mkdir(join(workdir, name), { recursive: true });
|
||||
await writeFile(join(workdir, name, 'f.txt'), name);
|
||||
}
|
||||
await git.add('.');
|
||||
await git.commit('seed 3 dirs');
|
||||
const beforeCommits = (await git.log()).total;
|
||||
|
||||
const result = await gitService.deleteDirectories(['a', 'b'], 'gc: drop a+b', 'System User', 'system@example.com');
|
||||
expect(result.commitHash).toBeTruthy();
|
||||
|
||||
const entries = await readdir(workdir);
|
||||
expect(entries).not.toContain('a');
|
||||
expect(entries).not.toContain('b');
|
||||
expect(entries).toContain('c');
|
||||
|
||||
const afterCommits = (await git.log()).total;
|
||||
expect(afterCommits).toBe(beforeCommits + 1);
|
||||
});
|
||||
|
||||
it('no-ops and returns a null hash when the input list is empty', async () => {
|
||||
const result = await gitService.deleteDirectories([], 'empty', 'X', 'x@example.com');
|
||||
expect(result.commitHash).toBe('');
|
||||
expect(result.created).toBe(false);
|
||||
});
|
||||
|
||||
it('ignores paths that have already been deleted — commits only the remaining ones', async () => {
|
||||
await mkdir(join(workdir, 'stale'), { recursive: true });
|
||||
await writeFile(join(workdir, 'stale', 'x'), 'x');
|
||||
await git.add('.');
|
||||
await git.commit('seed stale');
|
||||
const result = await gitService.deleteDirectories(
|
||||
['stale', 'missing'],
|
||||
'gc: drop stale + missing',
|
||||
'System User',
|
||||
'system@example.com',
|
||||
);
|
||||
expect(result.commitHash).toBeTruthy();
|
||||
const entries = await readdir(workdir);
|
||||
expect(entries).not.toContain('stale');
|
||||
});
|
||||
});
|
||||
45
packages/cli/src/context/core/git.service.patch.test.ts
Normal file
45
packages/cli/src/context/core/git.service.patch.test.ts
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { GitService } from './git.service.js';
|
||||
|
||||
async function makeGit() {
|
||||
const homeDir = await mkdtemp(join(tmpdir(), 'ktx-git-patch-'));
|
||||
const configDir = join(homeDir, 'config');
|
||||
const git = new GitService({
|
||||
storage: { configDir, homeDir },
|
||||
git: {
|
||||
userName: 'System User',
|
||||
userEmail: 'system@example.com',
|
||||
bootstrapMessage: 'init',
|
||||
bootstrapAuthor: 'system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
});
|
||||
await git.onModuleInit();
|
||||
return { homeDir, configDir, git };
|
||||
}
|
||||
|
||||
describe('GitService patch helpers', () => {
|
||||
it('collects binary-safe no-rename patches and applies them with --3way --index', async () => {
|
||||
const { homeDir, configDir, git } = await makeGit();
|
||||
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
|
||||
await writeFile(join(configDir, 'wiki/global/page.md'), 'old\n');
|
||||
await git.commitFiles(['wiki/global/page.md'], 'add page', 'System User', 'system@example.com');
|
||||
const base = await git.revParseHead();
|
||||
|
||||
await writeFile(join(configDir, 'wiki/global/page.md'), 'new\n');
|
||||
await git.commitFiles(['wiki/global/page.md'], 'edit page', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'proposal.patch');
|
||||
await git.writeBinaryNoRenamePatch(base, 'HEAD', patchPath);
|
||||
|
||||
const targetDir = join(homeDir, 'target');
|
||||
await git.addWorktree(targetDir, 'target', base);
|
||||
const targetGit = git.forWorktree(targetDir);
|
||||
await targetGit.applyPatchFile3WayIndex(patchPath);
|
||||
await targetGit.commitStaged('apply proposal', 'System User', 'system@example.com');
|
||||
|
||||
await expect(readFile(join(targetDir, 'wiki/global/page.md'), 'utf-8')).resolves.toBe('new\n');
|
||||
});
|
||||
});
|
||||
56
packages/cli/src/context/core/git.service.reset-hard.test.ts
Normal file
56
packages/cli/src/context/core/git.service.reset-hard.test.ts
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import type { SimpleGit } from 'simple-git';
|
||||
import type { KtxCoreConfig } from './config.js';
|
||||
import { createSimpleGit } from './git-env.js';
|
||||
import { GitService } from './git.service.js';
|
||||
|
||||
describe('GitService.resetHardTo', () => {
|
||||
let workdir: string;
|
||||
let git: SimpleGit;
|
||||
let gitService: GitService;
|
||||
|
||||
beforeEach(async () => {
|
||||
workdir = await mkdtemp(join(tmpdir(), 'gitsvc-reset-'));
|
||||
git = createSimpleGit(workdir);
|
||||
await git.init();
|
||||
await git.addConfig('user.email', 't@test');
|
||||
await git.addConfig('user.name', 'Test');
|
||||
await writeFile(join(workdir, 'init'), 'init');
|
||||
await git.add('.');
|
||||
await git.commit('init');
|
||||
const coreConfig: KtxCoreConfig = {
|
||||
storage: { configDir: workdir, homeDir: workdir },
|
||||
git: { userName: 'Test', userEmail: 't@test' },
|
||||
};
|
||||
gitService = new GitService(coreConfig);
|
||||
(gitService as any).git = git;
|
||||
(gitService as any).configDir = workdir;
|
||||
});
|
||||
|
||||
afterEach(async () => rm(workdir, { recursive: true, force: true }));
|
||||
|
||||
it('rewinds HEAD to the target SHA, removing later commits and their files', async () => {
|
||||
const baseSha = (await git.revparse(['HEAD'])).trim();
|
||||
await writeFile(join(workdir, 'a'), 'a1');
|
||||
await git.add('.');
|
||||
await git.commit('a');
|
||||
await writeFile(join(workdir, 'b'), 'b1');
|
||||
await git.add('.');
|
||||
await git.commit('b');
|
||||
|
||||
await gitService.resetHardTo(baseSha);
|
||||
|
||||
expect((await git.revparse(['HEAD'])).trim()).toBe(baseSha);
|
||||
expect(await readFile(join(workdir, 'a'), 'utf-8').catch(() => null)).toBeNull();
|
||||
expect(await readFile(join(workdir, 'b'), 'utf-8').catch(() => null)).toBeNull();
|
||||
});
|
||||
|
||||
it('is a no-op when target SHA equals current HEAD', async () => {
|
||||
const sha = (await git.revparse(['HEAD'])).trim();
|
||||
await gitService.resetHardTo(sha);
|
||||
expect((await git.revparse(['HEAD'])).trim()).toBe(sha);
|
||||
});
|
||||
});
|
||||
450
packages/cli/src/context/core/git.service.test.ts
Normal file
450
packages/cli/src/context/core/git.service.test.ts
Normal file
|
|
@ -0,0 +1,450 @@
|
|||
import { mkdtemp, readFile, realpath, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import type { KtxCoreConfig } from './config.js';
|
||||
import { GitService } from './git.service.js';
|
||||
|
||||
// These tests drive a real git repo inside a temp directory — simple-git shells out to the
|
||||
// system `git` binary. They are fast enough to run as unit tests and catch real issues that
|
||||
// would be invisible with mocked git.
|
||||
describe('GitService', () => {
|
||||
let service: GitService;
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'git-service-spec-'));
|
||||
|
||||
const coreConfig: KtxCoreConfig = {
|
||||
storage: { configDir: tempDir, homeDir: tempDir },
|
||||
git: {
|
||||
userName: 'Test User',
|
||||
userEmail: 'test@example.com',
|
||||
bootstrapMessage: 'Initialize test config repo',
|
||||
bootstrapAuthor: 'test-system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
};
|
||||
|
||||
service = new GitService(coreConfig);
|
||||
await service.onModuleInit();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
const writeAndCommit = async (filePath: string, content: string, message = 'msg') => {
|
||||
await writeFile(join(tempDir, filePath), content, 'utf-8');
|
||||
return service.commitFile(filePath, message, 'Test', 'test@example.com');
|
||||
};
|
||||
|
||||
describe('cold-start bootstrap commit', () => {
|
||||
it('writes an empty commit on init so HEAD always resolves', async () => {
|
||||
// beforeEach already ran onModuleInit() against an empty temp dir.
|
||||
const head = await service.revParseHead();
|
||||
expect(head).toMatch(/^[0-9a-f]{40}$/);
|
||||
});
|
||||
|
||||
it('does not double-commit when re-initialized', async () => {
|
||||
const before = await service.revParseHead();
|
||||
await service.onModuleInit();
|
||||
const after = await service.revParseHead();
|
||||
expect(after).toBe(before);
|
||||
});
|
||||
|
||||
it('keeps git auto-maintenance attached for deterministic cleanup', async () => {
|
||||
const config = await readFile(join(tempDir, '.git', 'config'), 'utf-8');
|
||||
|
||||
expect(config).toMatch(/\[gc]\n\s+autoDetach = false/);
|
||||
expect(config).toMatch(/\[maintenance]\n\s+autoDetach = false/);
|
||||
});
|
||||
|
||||
it('initializes when release automation sets GIT_ASKPASS', async () => {
|
||||
const releaseEnvDir = await mkdtemp(join(tmpdir(), 'git-service-release-env-'));
|
||||
const previousAskPass = process.env.GIT_ASKPASS;
|
||||
process.env.GIT_ASKPASS = 'echo';
|
||||
|
||||
try {
|
||||
const releaseEnvService = new GitService({
|
||||
storage: { configDir: releaseEnvDir, homeDir: releaseEnvDir },
|
||||
git: {
|
||||
userName: 'Test User',
|
||||
userEmail: 'test@example.com',
|
||||
bootstrapMessage: 'Initialize test config repo',
|
||||
bootstrapAuthor: 'test-system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
});
|
||||
|
||||
await expect(releaseEnvService.onModuleInit()).resolves.toBeUndefined();
|
||||
} finally {
|
||||
if (previousAskPass === undefined) {
|
||||
delete process.env.GIT_ASKPASS;
|
||||
} else {
|
||||
process.env.GIT_ASKPASS = previousAskPass;
|
||||
}
|
||||
await rm(releaseEnvDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('commitFile `created` flag', () => {
|
||||
it('is true for a real commit', async () => {
|
||||
const info = await writeAndCommit('a.md', '# Hello');
|
||||
expect(info.created).toBe(true);
|
||||
});
|
||||
|
||||
it('is false on a no-op write (content unchanged)', async () => {
|
||||
await writeAndCommit('a.md', '# Hello');
|
||||
const second = await writeAndCommit('a.md', '# Hello', 'unused');
|
||||
expect(second.created).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('addNote / getNote', () => {
|
||||
it('attaches a note and reads it back', async () => {
|
||||
const info = await writeAndCommit('a.md', '# Hello');
|
||||
await service.addNote(info.commitHash, 'Rich message from LLM');
|
||||
expect(await service.getNote(info.commitHash)).toBe('Rich message from LLM');
|
||||
});
|
||||
|
||||
it('returns undefined when no note exists', async () => {
|
||||
const info = await writeAndCommit('a.md', '# Hello');
|
||||
expect(await service.getNote(info.commitHash)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('overwrites an existing note (idempotent retries)', async () => {
|
||||
const info = await writeAndCommit('a.md', '# Hello');
|
||||
await service.addNote(info.commitHash, 'First');
|
||||
await service.addNote(info.commitHash, 'Second');
|
||||
expect(await service.getNote(info.commitHash)).toBe('Second');
|
||||
});
|
||||
|
||||
it('skips empty/whitespace messages silently', async () => {
|
||||
const info = await writeAndCommit('a.md', '# Hello');
|
||||
await service.addNote(info.commitHash, ' ');
|
||||
expect(await service.getNote(info.commitHash)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('getFileHistory', () => {
|
||||
it('surfaces enhancedMessage when a note is present', async () => {
|
||||
const info = await writeAndCommit('a.md', '# Hello');
|
||||
await service.addNote(info.commitHash, 'Note body');
|
||||
|
||||
const history = await service.getFileHistory('a.md');
|
||||
expect(history[0]?.enhancedMessage).toBe('Note body');
|
||||
});
|
||||
|
||||
it('leaves enhancedMessage undefined when no note is attached', async () => {
|
||||
await writeAndCommit('a.md', '# Hello');
|
||||
const history = await service.getFileHistory('a.md');
|
||||
expect(history[0]?.enhancedMessage).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('getCommitDiff', () => {
|
||||
it('returns the patch scoped to the requested path', async () => {
|
||||
const info = await writeAndCommit('a.md', '# Hello');
|
||||
const diff = await service.getCommitDiff(info.commitHash, 'a.md');
|
||||
expect(diff).toContain('diff --git');
|
||||
expect(diff).toContain('Hello');
|
||||
});
|
||||
|
||||
it('handles the repository initial commit without throwing', async () => {
|
||||
const info = await writeAndCommit('first.md', 'first');
|
||||
await expect(service.getCommitDiff(info.commitHash, 'first.md')).resolves.toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('squashTo', () => {
|
||||
const writeAsSystem = async (filePath: string, content: string, message = 'msg') => {
|
||||
await writeFile(join(tempDir, filePath), content, 'utf-8');
|
||||
return service.commitFile(filePath, message, 'System User', 'system@example.com');
|
||||
};
|
||||
|
||||
it('collapses 3 commits after preHead into a single commit', async () => {
|
||||
const pre = await writeAsSystem('a.md', 'v1');
|
||||
const preHead = pre.commitHash;
|
||||
|
||||
await writeAsSystem('b.md', 'b', 'add b');
|
||||
await writeAsSystem('c.md', 'c', 'add c');
|
||||
await writeAsSystem('a.md', 'v2', 'update a');
|
||||
|
||||
const result = await service.squashTo(preHead, {
|
||||
message: 'Ingest: bundle 3 writes',
|
||||
author: 'System User',
|
||||
authorEmail: 'system@example.com',
|
||||
});
|
||||
|
||||
expect(result.squashed).toBe(true);
|
||||
expect(result.squashedCount).toBe(3);
|
||||
expect(result.commitHash).toBeTruthy();
|
||||
expect(result.commitHash).not.toBe(preHead);
|
||||
const commitHash = result.commitHash;
|
||||
if (!commitHash) {
|
||||
throw new Error('Expected squash commit hash');
|
||||
}
|
||||
|
||||
// The squashed commit should preserve the final tree state.
|
||||
const fileAtSquash = await service.getFileAtCommit('a.md', commitHash);
|
||||
expect(fileAtSquash).toBe('v2');
|
||||
const bAtSquash = await service.getFileAtCommit('b.md', commitHash);
|
||||
expect(bAtSquash).toBe('b');
|
||||
});
|
||||
|
||||
it('is a no-op when preHead equals HEAD', async () => {
|
||||
const pre = await writeAsSystem('a.md', 'v1');
|
||||
|
||||
const result = await service.squashTo(pre.commitHash, {
|
||||
message: 'nothing to squash',
|
||||
author: 'System User',
|
||||
authorEmail: 'system@example.com',
|
||||
});
|
||||
|
||||
expect(result.squashed).toBe(false);
|
||||
expect(result.commitHash).toBe(pre.commitHash);
|
||||
});
|
||||
|
||||
it('skips squash when a foreign-author commit sits between preHead and HEAD', async () => {
|
||||
const pre = await writeAsSystem('a.md', 'v1');
|
||||
const preHead = pre.commitHash;
|
||||
|
||||
await writeAsSystem('b.md', 'from us', 'ours');
|
||||
// Foreign commit
|
||||
await writeAndCommit('c.md', 'from someone else', 'foreign');
|
||||
await writeAsSystem('d.md', 'ours again', 'ours 2');
|
||||
|
||||
const result = await service.squashTo(preHead, {
|
||||
message: 'should be skipped',
|
||||
author: 'System User',
|
||||
authorEmail: 'system@example.com',
|
||||
});
|
||||
|
||||
expect(result.squashed).toBe(false);
|
||||
expect(result.reason).toContain('foreign');
|
||||
expect(result.squashedCount).toBe(3);
|
||||
});
|
||||
|
||||
it('returns cleanly when preHead is empty (no starting commit)', async () => {
|
||||
const result = await service.squashTo('', {
|
||||
message: 'would have squashed',
|
||||
author: 'System User',
|
||||
authorEmail: 'system@example.com',
|
||||
});
|
||||
|
||||
expect(result.squashed).toBe(false);
|
||||
expect(result.commitHash).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('worktree lifecycle', () => {
|
||||
// macOS canonicalizes tmp paths (/var/folders → /private/var/folders) when git
|
||||
// returns them from `worktree list`. Resolve through realpath() before comparing.
|
||||
const canonicalSiblingPath = async (suffix: string): Promise<string> => {
|
||||
const parent = await realpath(join(tempDir, '..'));
|
||||
return join(parent, `wt-${Date.now()}-${suffix}`);
|
||||
};
|
||||
|
||||
it('addWorktree creates a branch + directory at the given startSha', async () => {
|
||||
const { commitHash } = await writeAndCommit('seed.md', 'seed');
|
||||
const wtDir = await canonicalSiblingPath('add');
|
||||
await service.addWorktree(wtDir, 'session/alpha', commitHash);
|
||||
const list = await service.listWorktrees();
|
||||
expect(list.find((e) => e.path === wtDir && e.branch === 'refs/heads/session/alpha')).toBeTruthy();
|
||||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
|
||||
it('removeWorktree detaches the worktree entry', async () => {
|
||||
const { commitHash } = await writeAndCommit('seed.md', 'seed');
|
||||
const wtDir = await canonicalSiblingPath('rm');
|
||||
await service.addWorktree(wtDir, 'session/beta', commitHash);
|
||||
await service.removeWorktree(wtDir);
|
||||
const list = await service.listWorktrees();
|
||||
expect(list.find((e) => e.path === wtDir)).toBeFalsy();
|
||||
});
|
||||
|
||||
it('deleteBranch removes a branch ref', async () => {
|
||||
const { commitHash } = await writeAndCommit('seed.md', 'seed');
|
||||
const wtDir = await canonicalSiblingPath('br');
|
||||
await service.addWorktree(wtDir, 'session/gamma', commitHash);
|
||||
await service.removeWorktree(wtDir);
|
||||
await service.deleteBranch('session/gamma', true);
|
||||
const branches = await (service as unknown as { git: import('simple-git').SimpleGit }).git.branchLocal();
|
||||
expect(branches.all).not.toContain('session/gamma');
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
});
|
||||
|
||||
describe('forWorktree', () => {
|
||||
it('returns a GitService whose operations run inside the given worktree', async () => {
|
||||
const { commitHash } = await writeAndCommit('seed.md', 'seed');
|
||||
const parent = await realpath(join(tempDir, '..'));
|
||||
const wtDir = join(parent, `wt-${Date.now()}-fw`);
|
||||
await service.addWorktree(wtDir, 'session/delta', commitHash);
|
||||
|
||||
const scoped = service.forWorktree(wtDir);
|
||||
expect(await scoped.revParseHead()).toBe(commitHash);
|
||||
|
||||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
|
||||
it('serializes concurrent commits from scoped services targeting the same worktree', async () => {
|
||||
const { commitHash } = await writeAndCommit('seed.md', 'seed');
|
||||
const parent = await realpath(join(tempDir, '..'));
|
||||
const wtDir = join(parent, `wt-${Date.now()}-fw-concurrent`);
|
||||
await service.addWorktree(wtDir, 'session/concurrent', commitHash);
|
||||
|
||||
const first = service.forWorktree(wtDir);
|
||||
const second = service.forWorktree(wtDir);
|
||||
await writeFile(join(wtDir, 'a.md'), 'a\n', 'utf-8');
|
||||
await writeFile(join(wtDir, 'b.md'), 'b\n', 'utf-8');
|
||||
|
||||
const [a, b] = await Promise.all([
|
||||
first.commitFile('a.md', 'add a', 'System User', 'system@example.com'),
|
||||
second.commitFile('b.md', 'add b', 'System User', 'system@example.com'),
|
||||
]);
|
||||
|
||||
expect(a.commitHash).toMatch(/^[0-9a-f]{40}$/);
|
||||
expect(b.commitHash).toMatch(/^[0-9a-f]{40}$/);
|
||||
await expect(first.getFileAtCommit('a.md', a.commitHash)).resolves.toBe('a\n');
|
||||
await expect(second.getFileAtCommit('b.md', b.commitHash)).resolves.toBe('b\n');
|
||||
|
||||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
});
|
||||
|
||||
describe('squashMergeIntoMain', () => {
|
||||
it('merges a session branch as one commit on main, returning the new SHA + touched paths', async () => {
|
||||
const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed');
|
||||
const parent = await realpath(join(tempDir, '..'));
|
||||
const wtDir = join(parent, `wt-${Date.now()}-sm`);
|
||||
await service.addWorktree(wtDir, 'session/happy', baseSha);
|
||||
|
||||
const scoped = service.forWorktree(wtDir);
|
||||
await writeFile(join(wtDir, 'a.yaml'), 'one: 1\n', 'utf-8');
|
||||
await scoped.commitFile('a.yaml', 'wip a', 'System User', 'system@example.com');
|
||||
await writeFile(join(wtDir, 'b.yaml'), 'two: 2\n', 'utf-8');
|
||||
await scoped.commitFile('b.yaml', 'wip b', 'System User', 'system@example.com');
|
||||
|
||||
const result = await service.squashMergeIntoMain(
|
||||
'session/happy',
|
||||
'System User',
|
||||
'system@example.com',
|
||||
'Memory capture: 2 files [chat=abcd1234]',
|
||||
);
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
if (!result.ok) {
|
||||
throw new Error('unreachable');
|
||||
}
|
||||
expect(result.squashSha).toMatch(/^[0-9a-f]{40}$/);
|
||||
expect(result.touchedPaths.sort()).toEqual(['a.yaml', 'b.yaml']);
|
||||
|
||||
const mainHead = await service.revParseHead();
|
||||
expect(mainHead).toBe(result.squashSha);
|
||||
expect(mainHead).not.toBe(baseSha);
|
||||
|
||||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
|
||||
it('returns ok with empty touchedPaths when the session branch has no diff vs main', async () => {
|
||||
const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed');
|
||||
const parent = await realpath(join(tempDir, '..'));
|
||||
const wtDir = join(parent, `wt-${Date.now()}-sm-empty`);
|
||||
await service.addWorktree(wtDir, 'session/empty', baseSha);
|
||||
|
||||
const result = await service.squashMergeIntoMain(
|
||||
'session/empty',
|
||||
'System User',
|
||||
'system@example.com',
|
||||
'should be a no-op',
|
||||
);
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
if (!result.ok) {
|
||||
throw new Error('unreachable');
|
||||
}
|
||||
expect(result.touchedPaths).toEqual([]);
|
||||
expect(result.squashSha).toBe(baseSha);
|
||||
|
||||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
|
||||
it('returns conflict=true and leaves main clean when session+main touched same file differently', async () => {
|
||||
await writeAndCommit('shared.yaml', 'base\n');
|
||||
const base = await service.revParseHead();
|
||||
if (!base) {
|
||||
throw new Error('no base head');
|
||||
}
|
||||
|
||||
const parent = await realpath(join(tempDir, '..'));
|
||||
const wtDir = join(parent, `wt-${Date.now()}-conf`);
|
||||
await service.addWorktree(wtDir, 'session/conf', base);
|
||||
const scoped = service.forWorktree(wtDir);
|
||||
await writeFile(join(wtDir, 'shared.yaml'), 'session-edit\n', 'utf-8');
|
||||
await scoped.commitFile('shared.yaml', 'session edit', 'System User', 'system@example.com');
|
||||
|
||||
// Main edits the same file a different way, after the session branched.
|
||||
await writeAndCommit('shared.yaml', 'main-edit\n');
|
||||
|
||||
const result = await service.squashMergeIntoMain(
|
||||
'session/conf',
|
||||
'System User',
|
||||
'system@example.com',
|
||||
'Memory capture: 1 file [chat=dead1234]',
|
||||
);
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
if (result.ok) {
|
||||
throw new Error('unreachable');
|
||||
}
|
||||
expect(result.conflict).toBe(true);
|
||||
expect(result.conflictPaths).toContain('shared.yaml');
|
||||
|
||||
const status = await (service as unknown as { git: import('simple-git').SimpleGit }).git.status();
|
||||
expect(status.isClean()).toBe(true);
|
||||
|
||||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
|
||||
it('reports untracked files that would be overwritten by the squash merge', async () => {
|
||||
const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed');
|
||||
const parent = await realpath(join(tempDir, '..'));
|
||||
const wtDir = join(parent, `wt-${Date.now()}-untracked`);
|
||||
await service.addWorktree(wtDir, 'session/untracked', baseSha);
|
||||
|
||||
const scoped = service.forWorktree(wtDir);
|
||||
await writeFile(join(wtDir, 'knowledge.md'), 'session version\n', 'utf-8');
|
||||
await scoped.commitFile('knowledge.md', 'session write', 'System User', 'system@example.com');
|
||||
await writeFile(join(tempDir, 'knowledge.md'), 'untracked local version\n', 'utf-8');
|
||||
|
||||
const result = await service.squashMergeIntoMain(
|
||||
'session/untracked',
|
||||
'System User',
|
||||
'system@example.com',
|
||||
'Memory capture: 1 file [chat=untracked]',
|
||||
);
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
if (result.ok) {
|
||||
throw new Error('unreachable');
|
||||
}
|
||||
expect(result.conflict).toBe(true);
|
||||
expect(result.conflictPaths).toEqual(['knowledge.md']);
|
||||
|
||||
const status = await (service as unknown as { git: import('simple-git').SimpleGit }).git.status();
|
||||
expect(status.not_added).toContain('knowledge.md');
|
||||
|
||||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
});
|
||||
});
|
||||
1062
packages/cli/src/context/core/git.service.ts
Normal file
1062
packages/cli/src/context/core/git.service.ts
Normal file
File diff suppressed because it is too large
Load diff
27
packages/cli/src/context/core/index.ts
Normal file
27
packages/cli/src/context/core/index.ts
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
export type { KtxCoreConfig, KtxGitConfig, KtxLogger, KtxStorageConfig } from './config.js';
|
||||
export { noopLogger, resolveConfigDir, resolveWorktreesDir } from './config.js';
|
||||
export { resolveKtxConfigReference, resolveKtxHomePath } from './config-reference.js';
|
||||
export type { KtxEmbeddingPort } from './embedding.js';
|
||||
export {
|
||||
REDACTED_KTX_CREDENTIAL_VALUE,
|
||||
redactKtxSensitiveMetadata,
|
||||
redactKtxSensitiveText,
|
||||
redactKtxSensitiveValue,
|
||||
} from './redaction.js';
|
||||
export type {
|
||||
KtxFileHistoryEntry,
|
||||
KtxFileListResult,
|
||||
KtxFileReadResult,
|
||||
KtxFileStorePort,
|
||||
KtxFileWriteResult,
|
||||
} from './file-store.js';
|
||||
export type { GitCommitInfo, SquashMergeResult, WorktreeEntry } from './git.service.js';
|
||||
export { GitService } from './git.service.js';
|
||||
export type {
|
||||
SentinelPayload,
|
||||
SessionOutcome,
|
||||
SessionWorktree,
|
||||
SessionWorktreeServiceDeps,
|
||||
WorktreeConfigPort,
|
||||
} from './session-worktree.service.js';
|
||||
export { SessionWorktreeService } from './session-worktree.service.js';
|
||||
47
packages/cli/src/context/core/redaction.ts
Normal file
47
packages/cli/src/context/core/redaction.ts
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
export const REDACTED_KTX_CREDENTIAL_VALUE = '<redacted>';
|
||||
|
||||
const SENSITIVE_FIELD_NAME = /(password|secret|token|api[_-]?key|private[_-]?key|passphrase|credential|authorization|url)/i;
|
||||
const URL_CREDENTIAL_PATTERN = /([a-z][a-z0-9+.-]*:\/\/[^:\s/@]+:)([^@\s/]+)(@)/gi;
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function isSensitiveField(key: string): boolean {
|
||||
return SENSITIVE_FIELD_NAME.test(key);
|
||||
}
|
||||
|
||||
export function redactKtxSensitiveValue(key: string, value: unknown): unknown {
|
||||
if (isSensitiveField(key)) {
|
||||
return REDACTED_KTX_CREDENTIAL_VALUE;
|
||||
}
|
||||
if (Array.isArray(value)) {
|
||||
return value.map((item) => redactKtxSensitiveValue(key, item));
|
||||
}
|
||||
if (isRecord(value)) {
|
||||
return redactKtxSensitiveMetadata(value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export function redactKtxSensitiveMetadata(metadata: Record<string, unknown>): Record<string, unknown> {
|
||||
const redacted: Record<string, unknown> = {};
|
||||
for (const [key, value] of Object.entries(metadata)) {
|
||||
if (Array.isArray(value)) {
|
||||
redacted[key] = value.map((item) =>
|
||||
isRecord(item) ? redactKtxSensitiveMetadata(item) : redactKtxSensitiveValue(key, item),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
if (isRecord(value)) {
|
||||
redacted[key] = redactKtxSensitiveValue(key, value);
|
||||
continue;
|
||||
}
|
||||
redacted[key] = redactKtxSensitiveValue(key, value);
|
||||
}
|
||||
return redacted;
|
||||
}
|
||||
|
||||
export function redactKtxSensitiveText(value: string): string {
|
||||
return value.replace(URL_CREDENTIAL_PATTERN, `$1${REDACTED_KTX_CREDENTIAL_VALUE}$3`);
|
||||
}
|
||||
124
packages/cli/src/context/core/session-worktree.service.test.ts
Normal file
124
packages/cli/src/context/core/session-worktree.service.test.ts
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
import { mkdtemp, realpath, rm, stat } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import type { KtxCoreConfig } from './config.js';
|
||||
import { GitService } from './git.service.js';
|
||||
import { SessionWorktreeService, type WorktreeConfigPort } from './session-worktree.service.js';
|
||||
|
||||
interface TestWorktreeConfig extends WorktreeConfigPort<TestWorktreeConfig> {
|
||||
workdir?: string;
|
||||
}
|
||||
|
||||
// SessionWorktreeService glues a real GitService to a scoped config adapter.
|
||||
describe('SessionWorktreeService', () => {
|
||||
let sessionService: SessionWorktreeService<TestWorktreeConfig>;
|
||||
let gitService: GitService;
|
||||
let homeDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
homeDir = await mkdtemp(join(tmpdir(), 'sws-spec-'));
|
||||
homeDir = await realpath(homeDir);
|
||||
|
||||
const coreConfig: KtxCoreConfig = {
|
||||
storage: { configDir: homeDir, homeDir },
|
||||
git: {
|
||||
userName: 'System User',
|
||||
userEmail: 'system@example.com',
|
||||
bootstrapMessage: 'Initialize test config repo',
|
||||
bootstrapAuthor: 'test-system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
};
|
||||
|
||||
gitService = new GitService(coreConfig);
|
||||
await gitService.onModuleInit();
|
||||
const configService: TestWorktreeConfig = {
|
||||
forWorktree: vi.fn(
|
||||
(workdir: string): TestWorktreeConfig => ({ workdir, forWorktree: configService.forWorktree }),
|
||||
),
|
||||
};
|
||||
sessionService = new SessionWorktreeService({
|
||||
coreConfig,
|
||||
gitService,
|
||||
configService,
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(homeDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
describe('create', () => {
|
||||
it('creates a worktree + branch and returns scoped services', async () => {
|
||||
const baseSha = await gitService.revParseHead();
|
||||
if (!baseSha) {
|
||||
throw new Error('no base sha');
|
||||
}
|
||||
|
||||
const session = await sessionService.create('chat-abc', baseSha);
|
||||
|
||||
expect(session.workdir).toBe(join(homeDir, '.worktrees', 'session-chat-abc'));
|
||||
expect(session.branch).toBe('session/chat-abc');
|
||||
expect(session.baseSha).toBe(baseSha);
|
||||
const stats = await stat(session.workdir);
|
||||
expect(stats.isDirectory()).toBe(true);
|
||||
|
||||
// Scoped git instance reports the worktree's HEAD (= baseSha at creation time).
|
||||
expect(await session.git.revParseHead()).toBe(baseSha);
|
||||
|
||||
const list = await gitService.listWorktrees();
|
||||
expect(list.find((e) => e.path === session.workdir)).toBeTruthy();
|
||||
});
|
||||
|
||||
it('appends a timestamp suffix when the primary dir already exists', async () => {
|
||||
const baseSha = await gitService.revParseHead();
|
||||
if (!baseSha) {
|
||||
throw new Error('no base sha');
|
||||
}
|
||||
|
||||
const first = await sessionService.create('chat-dup', baseSha);
|
||||
const second = await sessionService.create('chat-dup', baseSha);
|
||||
|
||||
expect(first.workdir).not.toBe(second.workdir);
|
||||
expect(second.branch).toMatch(/^session\/chat-dup-\d+$/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('cleanup', () => {
|
||||
it('success removes the worktree dir and deletes the branch', async () => {
|
||||
const baseSha = await gitService.revParseHead();
|
||||
if (!baseSha) {
|
||||
throw new Error('no base sha');
|
||||
}
|
||||
|
||||
const session = await sessionService.create('chat-cleanup-ok', baseSha);
|
||||
await sessionService.cleanup(session, 'success');
|
||||
|
||||
const list = await gitService.listWorktrees();
|
||||
expect(list.find((e) => e.path === session.workdir)).toBeFalsy();
|
||||
await expect(stat(session.workdir)).rejects.toThrow();
|
||||
});
|
||||
|
||||
it('conflict keeps the worktree and writes a sentinel file', async () => {
|
||||
const baseSha = await gitService.revParseHead();
|
||||
if (!baseSha) {
|
||||
throw new Error('no base sha');
|
||||
}
|
||||
|
||||
const session = await sessionService.create('chat-cleanup-conflict', baseSha);
|
||||
await sessionService.cleanup(session, 'conflict', { conflictPaths: ['shared.yaml'] });
|
||||
|
||||
// Dir still exists.
|
||||
await expect(stat(session.workdir)).resolves.toBeTruthy();
|
||||
|
||||
const { readFile } = await import('node:fs/promises');
|
||||
const raw = await readFile(join(session.workdir, '.ktx-outcome'), 'utf-8');
|
||||
const parsed = JSON.parse(raw);
|
||||
expect(parsed.outcome).toBe('conflict');
|
||||
expect(parsed.chatId).toBe('chat-cleanup-conflict');
|
||||
expect(parsed.conflictPaths).toEqual(['shared.yaml']);
|
||||
expect(typeof parsed.at).toBe('string');
|
||||
});
|
||||
});
|
||||
});
|
||||
113
packages/cli/src/context/core/session-worktree.service.ts
Normal file
113
packages/cli/src/context/core/session-worktree.service.ts
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
import { mkdir, stat, writeFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { noopLogger, resolveWorktreesDir, type KtxCoreConfig, type KtxLogger } from './config.js';
|
||||
import { GitService } from './git.service.js';
|
||||
|
||||
export type SessionOutcome = 'success' | 'empty' | 'conflict' | 'crash';
|
||||
|
||||
export interface SentinelPayload {
|
||||
outcome: SessionOutcome;
|
||||
at: string;
|
||||
chatId: string;
|
||||
baseSha: string;
|
||||
conflictPaths?: string[];
|
||||
}
|
||||
|
||||
export interface WorktreeConfigPort<TConfig> {
|
||||
forWorktree(workdir: string): TConfig;
|
||||
}
|
||||
|
||||
export interface SessionWorktree<TConfig> {
|
||||
chatId: string;
|
||||
workdir: string;
|
||||
branch: string;
|
||||
baseSha: string;
|
||||
createdAt: Date;
|
||||
git: GitService;
|
||||
config: TConfig;
|
||||
}
|
||||
|
||||
export interface SessionWorktreeServiceDeps<TConfig extends WorktreeConfigPort<TConfig>> {
|
||||
coreConfig: KtxCoreConfig;
|
||||
gitService: GitService;
|
||||
configService: TConfig;
|
||||
logger?: KtxLogger;
|
||||
}
|
||||
|
||||
export class SessionWorktreeService<TConfig extends WorktreeConfigPort<TConfig> = WorktreeConfigPort<never>> {
|
||||
private readonly logger: KtxLogger;
|
||||
private readonly worktreesRoot: string;
|
||||
|
||||
constructor(private readonly deps: SessionWorktreeServiceDeps<TConfig>) {
|
||||
this.logger = deps.logger ?? noopLogger;
|
||||
this.worktreesRoot = resolveWorktreesDir(deps.coreConfig);
|
||||
}
|
||||
|
||||
async create(sessionKey: string, baseSha: string): Promise<SessionWorktree<TConfig>> {
|
||||
await mkdir(this.worktreesRoot, { recursive: true });
|
||||
|
||||
let dirName = `session-${sessionKey}`;
|
||||
let branch = `session/${sessionKey}`;
|
||||
let workdir = join(this.worktreesRoot, dirName);
|
||||
|
||||
try {
|
||||
await stat(workdir);
|
||||
const suffix = Date.now().toString();
|
||||
dirName = `session-${sessionKey}-${suffix}`;
|
||||
branch = `session/${sessionKey}-${suffix}`;
|
||||
workdir = join(this.worktreesRoot, dirName);
|
||||
this.logger.warn(`session worktree collision for key=${sessionKey}; using suffix ${suffix}`);
|
||||
} catch {
|
||||
// no collision: primary name is free
|
||||
}
|
||||
|
||||
await this.deps.gitService.addWorktree(workdir, branch, baseSha);
|
||||
|
||||
return {
|
||||
chatId: sessionKey,
|
||||
workdir,
|
||||
branch,
|
||||
baseSha,
|
||||
createdAt: new Date(),
|
||||
git: this.deps.gitService.forWorktree(workdir),
|
||||
config: this.deps.configService.forWorktree(workdir),
|
||||
};
|
||||
}
|
||||
|
||||
async cleanup(
|
||||
session: SessionWorktree<TConfig>,
|
||||
outcome: SessionOutcome,
|
||||
extra?: { conflictPaths?: string[] },
|
||||
): Promise<void> {
|
||||
if (outcome === 'success' || outcome === 'empty') {
|
||||
try {
|
||||
await this.deps.gitService.removeWorktree(session.workdir);
|
||||
await this.deps.gitService.deleteBranch(session.branch, true);
|
||||
} catch (error) {
|
||||
this.logger.warn(
|
||||
`cleanup(${outcome}) failed for ${session.chatId}: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const payload: SentinelPayload = {
|
||||
outcome,
|
||||
at: new Date().toISOString(),
|
||||
chatId: session.chatId,
|
||||
baseSha: session.baseSha,
|
||||
...(extra?.conflictPaths ? { conflictPaths: extra.conflictPaths } : {}),
|
||||
};
|
||||
try {
|
||||
await writeFile(join(session.workdir, '.ktx-outcome'), JSON.stringify(payload, null, 2), 'utf-8');
|
||||
} catch (error) {
|
||||
this.logger.warn(
|
||||
`cleanup(${outcome}) failed to write sentinel for ${session.chatId}: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
1
packages/cli/src/context/daemon/index.ts
Normal file
1
packages/cli/src/context/daemon/index.ts
Normal file
|
|
@ -0,0 +1 @@
|
|||
export * from './semantic-layer-compute.js';
|
||||
339
packages/cli/src/context/daemon/semantic-layer-compute.test.ts
Normal file
339
packages/cli/src/context/daemon/semantic-layer-compute.test.ts
Normal file
|
|
@ -0,0 +1,339 @@
|
|||
import { once } from 'node:events';
|
||||
import { createServer } from 'node:http';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createHttpSemanticLayerComputePort, createPythonSemanticLayerComputePort } from './semantic-layer-compute.js';
|
||||
|
||||
const source = {
|
||||
name: 'orders',
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [{ name: 'id', type: 'number' }],
|
||||
joins: [],
|
||||
measures: [{ name: 'order_count', expr: 'count(*)' }],
|
||||
};
|
||||
|
||||
const sourceGenerationInput = {
|
||||
tables: [
|
||||
{
|
||||
name: 'orders',
|
||||
db: 'public',
|
||||
comment: 'Orders table',
|
||||
columns: [
|
||||
{ name: 'id', type: 'integer', primaryKey: true, nullable: false, comment: 'Order ID' },
|
||||
{ name: 'customer_id', type: 'integer' },
|
||||
{ name: 'amount', type: 'decimal', comment: 'Order amount' },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'customers',
|
||||
db: 'public',
|
||||
columns: [
|
||||
{ name: 'id', type: 'integer', primaryKey: true },
|
||||
{ name: 'email', type: 'varchar' },
|
||||
],
|
||||
},
|
||||
],
|
||||
links: [
|
||||
{
|
||||
fromTable: 'orders',
|
||||
fromColumn: 'customer_id',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
relationshipType: 'MANY_TO_ONE',
|
||||
},
|
||||
],
|
||||
dialect: 'postgres',
|
||||
};
|
||||
|
||||
const sourceGenerationDaemonPayload = {
|
||||
tables: [
|
||||
{
|
||||
name: 'orders',
|
||||
db: 'public',
|
||||
comment: 'Orders table',
|
||||
columns: [
|
||||
{ name: 'id', type: 'integer', primary_key: true, nullable: false, comment: 'Order ID' },
|
||||
{ name: 'customer_id', type: 'integer' },
|
||||
{ name: 'amount', type: 'decimal', comment: 'Order amount' },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'customers',
|
||||
db: 'public',
|
||||
columns: [
|
||||
{ name: 'id', type: 'integer', primary_key: true },
|
||||
{ name: 'email', type: 'varchar' },
|
||||
],
|
||||
},
|
||||
],
|
||||
links: [
|
||||
{
|
||||
from_table: 'orders',
|
||||
from_column: 'customer_id',
|
||||
to_table: 'customers',
|
||||
to_column: 'id',
|
||||
relationship_type: 'MANY_TO_ONE',
|
||||
},
|
||||
],
|
||||
dialect: 'postgres',
|
||||
};
|
||||
|
||||
const sourceGenerationDaemonResponse = {
|
||||
source_count: 2,
|
||||
sources: [
|
||||
{
|
||||
name: 'orders',
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [{ name: 'id', type: 'number' }],
|
||||
joins: [
|
||||
{
|
||||
to: 'customers',
|
||||
on: 'customer_id = customers.id',
|
||||
relationship: 'many_to_one',
|
||||
},
|
||||
],
|
||||
measures: [{ name: 'record_count', expr: 'count(id)' }],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
describe('createPythonSemanticLayerComputePort', () => {
|
||||
it('calls the semantic-query stdio command', async () => {
|
||||
const runJson = vi.fn(async () => ({
|
||||
sql: 'select count(*) from public.orders',
|
||||
dialect: 'postgres',
|
||||
columns: [{ name: 'orders.order_count' }],
|
||||
plan: { sources_used: ['orders'] },
|
||||
}));
|
||||
const port = createPythonSemanticLayerComputePort({ runJson });
|
||||
|
||||
await expect(
|
||||
port.query({
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
query: { measures: ['orders.order_count'], dimensions: [] },
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
sql: 'select count(*) from public.orders',
|
||||
dialect: 'postgres',
|
||||
columns: [{ name: 'orders.order_count' }],
|
||||
plan: { sources_used: ['orders'] },
|
||||
});
|
||||
|
||||
expect(runJson).toHaveBeenCalledWith('semantic-query', {
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
query: { measures: ['orders.order_count'], dimensions: [] },
|
||||
});
|
||||
});
|
||||
|
||||
it('calls the semantic-validate stdio command', async () => {
|
||||
const runJson = vi.fn(async () => ({
|
||||
valid: true,
|
||||
errors: [],
|
||||
warnings: [],
|
||||
per_source_warnings: {},
|
||||
}));
|
||||
const port = createPythonSemanticLayerComputePort({ runJson });
|
||||
|
||||
await expect(
|
||||
port.validateSources({
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
recentlyTouched: ['orders'],
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
valid: true,
|
||||
errors: [],
|
||||
warnings: [],
|
||||
perSourceWarnings: {},
|
||||
});
|
||||
|
||||
expect(runJson).toHaveBeenCalledWith('semantic-validate', {
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
recently_touched: ['orders'],
|
||||
});
|
||||
});
|
||||
|
||||
it('calls the semantic-generate-sources stdio command', async () => {
|
||||
const runJson = vi.fn(async () => sourceGenerationDaemonResponse);
|
||||
const port = createPythonSemanticLayerComputePort({ runJson });
|
||||
|
||||
await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({
|
||||
sourceCount: 2,
|
||||
sources: sourceGenerationDaemonResponse.sources,
|
||||
});
|
||||
|
||||
expect(runJson).toHaveBeenCalledWith('semantic-generate-sources', sourceGenerationDaemonPayload);
|
||||
});
|
||||
});
|
||||
|
||||
describe('createHttpSemanticLayerComputePort', () => {
|
||||
it('calls semantic query and validate HTTP endpoints through an injected runner', async () => {
|
||||
const requestJson = vi.fn(async (path: string) => {
|
||||
if (path === '/semantic-layer/query') {
|
||||
return {
|
||||
sql: 'select count(*) from public.orders',
|
||||
dialect: 'postgres',
|
||||
columns: [{ name: 'orders.order_count' }],
|
||||
plan: { sources_used: ['orders'] },
|
||||
};
|
||||
}
|
||||
return {
|
||||
valid: true,
|
||||
errors: [],
|
||||
warnings: [],
|
||||
per_source_warnings: {},
|
||||
};
|
||||
});
|
||||
const port = createHttpSemanticLayerComputePort({ baseUrl: 'http://127.0.0.1:8765/', requestJson });
|
||||
|
||||
await expect(
|
||||
port.query({
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
query: { measures: ['orders.order_count'], dimensions: [] },
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
sql: 'select count(*) from public.orders',
|
||||
dialect: 'postgres',
|
||||
columns: [{ name: 'orders.order_count' }],
|
||||
plan: { sources_used: ['orders'] },
|
||||
});
|
||||
|
||||
await expect(
|
||||
port.validateSources({
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
recentlyTouched: ['orders'],
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
valid: true,
|
||||
errors: [],
|
||||
warnings: [],
|
||||
perSourceWarnings: {},
|
||||
});
|
||||
|
||||
expect(requestJson).toHaveBeenNthCalledWith(1, '/semantic-layer/query', {
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
query: { measures: ['orders.order_count'], dimensions: [] },
|
||||
});
|
||||
expect(requestJson).toHaveBeenNthCalledWith(2, '/semantic-layer/validate', {
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
recently_touched: ['orders'],
|
||||
});
|
||||
});
|
||||
|
||||
it('calls the semantic source-generation HTTP endpoint through an injected runner', async () => {
|
||||
const requestJson = vi.fn(async () => sourceGenerationDaemonResponse);
|
||||
const port = createHttpSemanticLayerComputePort({ baseUrl: 'http://127.0.0.1:8765/', requestJson });
|
||||
|
||||
await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({
|
||||
sourceCount: 2,
|
||||
sources: sourceGenerationDaemonResponse.sources,
|
||||
});
|
||||
|
||||
expect(requestJson).toHaveBeenCalledWith('/semantic-layer/generate-sources', sourceGenerationDaemonPayload);
|
||||
});
|
||||
|
||||
it('posts JSON to a running HTTP daemon endpoint', async () => {
|
||||
const requests: Array<{ url: string | undefined; body: unknown }> = [];
|
||||
const server = createServer((request, response) => {
|
||||
const chunks: Buffer[] = [];
|
||||
request.on('data', (chunk: Buffer) => chunks.push(chunk));
|
||||
request.on('end', () => {
|
||||
requests.push({
|
||||
url: request.url,
|
||||
body: JSON.parse(Buffer.concat(chunks).toString('utf8')),
|
||||
});
|
||||
response.writeHead(200, { 'content-type': 'application/json' });
|
||||
response.end(
|
||||
JSON.stringify({
|
||||
sql: 'select count(*) from public.orders',
|
||||
dialect: 'postgres',
|
||||
columns: [{ name: 'orders.order_count' }],
|
||||
plan: { sources_used: ['orders'] },
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
server.listen(0, '127.0.0.1');
|
||||
await once(server, 'listening');
|
||||
try {
|
||||
const address = server.address();
|
||||
if (!address || typeof address === 'string') {
|
||||
throw new Error('expected TCP server address');
|
||||
}
|
||||
const port = createHttpSemanticLayerComputePort({ baseUrl: `http://127.0.0.1:${address.port}` });
|
||||
|
||||
await expect(
|
||||
port.query({
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
query: { measures: ['orders.order_count'], dimensions: [] },
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
sql: 'select count(*) from public.orders',
|
||||
dialect: 'postgres',
|
||||
});
|
||||
|
||||
expect(requests).toEqual([
|
||||
{
|
||||
url: '/semantic-layer/query',
|
||||
body: {
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
query: { measures: ['orders.order_count'], dimensions: [] },
|
||||
},
|
||||
},
|
||||
]);
|
||||
} finally {
|
||||
server.close();
|
||||
}
|
||||
});
|
||||
|
||||
it('posts source-generation JSON to a running HTTP daemon endpoint', async () => {
|
||||
const requests: Array<{ url: string | undefined; body: unknown }> = [];
|
||||
const server = createServer((request, response) => {
|
||||
const chunks: Buffer[] = [];
|
||||
request.on('data', (chunk: Buffer) => chunks.push(chunk));
|
||||
request.on('end', () => {
|
||||
requests.push({
|
||||
url: request.url,
|
||||
body: JSON.parse(Buffer.concat(chunks).toString('utf8')),
|
||||
});
|
||||
response.writeHead(200, { 'content-type': 'application/json' });
|
||||
response.end(JSON.stringify(sourceGenerationDaemonResponse));
|
||||
});
|
||||
});
|
||||
|
||||
server.listen(0, '127.0.0.1');
|
||||
await once(server, 'listening');
|
||||
try {
|
||||
const address = server.address();
|
||||
if (!address || typeof address === 'string') {
|
||||
throw new Error('expected TCP server address');
|
||||
}
|
||||
const port = createHttpSemanticLayerComputePort({ baseUrl: `http://127.0.0.1:${address.port}` });
|
||||
|
||||
await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({
|
||||
sourceCount: 2,
|
||||
sources: sourceGenerationDaemonResponse.sources,
|
||||
});
|
||||
|
||||
expect(requests).toEqual([
|
||||
{
|
||||
url: '/semantic-layer/generate-sources',
|
||||
body: sourceGenerationDaemonPayload,
|
||||
},
|
||||
]);
|
||||
} finally {
|
||||
server.close();
|
||||
}
|
||||
});
|
||||
});
|
||||
312
packages/cli/src/context/daemon/semantic-layer-compute.ts
Normal file
312
packages/cli/src/context/daemon/semantic-layer-compute.ts
Normal file
|
|
@ -0,0 +1,312 @@
|
|||
import { request as httpRequest } from 'node:http';
|
||||
import { request as httpsRequest } from 'node:https';
|
||||
import { URL } from 'node:url';
|
||||
import { spawn } from 'node:child_process';
|
||||
import type { ResolvedSemanticLayerSource, SemanticLayerQueryInput } from '../sl/types.js';
|
||||
|
||||
export interface KtxSemanticLayerComputeQueryResult {
|
||||
sql: string;
|
||||
dialect: string;
|
||||
columns: Array<Record<string, unknown>>;
|
||||
plan: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface KtxSemanticLayerComputeValidationResult {
|
||||
valid: boolean;
|
||||
errors: string[];
|
||||
warnings: string[];
|
||||
perSourceWarnings: Record<string, string[]>;
|
||||
}
|
||||
|
||||
export interface KtxSemanticLayerSourceGenerationColumnInput {
|
||||
name: string;
|
||||
type: string;
|
||||
primaryKey?: boolean;
|
||||
nullable?: boolean;
|
||||
comment?: string | null;
|
||||
}
|
||||
|
||||
export interface KtxSemanticLayerSourceGenerationTableInput {
|
||||
name: string;
|
||||
catalog?: string | null;
|
||||
db?: string | null;
|
||||
comment?: string | null;
|
||||
columns: KtxSemanticLayerSourceGenerationColumnInput[];
|
||||
}
|
||||
|
||||
export interface KtxSemanticLayerSourceGenerationLinkInput {
|
||||
fromTable: string;
|
||||
fromColumn: string;
|
||||
toTable: string;
|
||||
toColumn: string;
|
||||
relationshipType: string;
|
||||
}
|
||||
|
||||
export interface KtxSemanticLayerSourceGenerationInput {
|
||||
tables: KtxSemanticLayerSourceGenerationTableInput[];
|
||||
links: KtxSemanticLayerSourceGenerationLinkInput[];
|
||||
dialect?: string;
|
||||
}
|
||||
|
||||
export interface KtxSemanticLayerSourceGenerationResult {
|
||||
sources: Array<Record<string, unknown>>;
|
||||
sourceCount: number;
|
||||
}
|
||||
|
||||
export interface KtxSemanticLayerComputePort {
|
||||
/**
|
||||
* Callers must pass sources sanitized through toResolvedWire. The Python
|
||||
* daemon rejects authoring-only fields such as usage and inherits_columns_from.
|
||||
*/
|
||||
query(input: {
|
||||
sources: ResolvedSemanticLayerSource[];
|
||||
query: SemanticLayerQueryInput;
|
||||
dialect: string;
|
||||
}): Promise<KtxSemanticLayerComputeQueryResult>;
|
||||
/**
|
||||
* Callers must pass sources sanitized through toResolvedWire. The Python
|
||||
* daemon rejects authoring-only fields such as usage and inherits_columns_from.
|
||||
*/
|
||||
validateSources(input: {
|
||||
sources: ResolvedSemanticLayerSource[];
|
||||
dialect: string;
|
||||
recentlyTouched?: string[];
|
||||
}): Promise<KtxSemanticLayerComputeValidationResult>;
|
||||
generateSources(input: KtxSemanticLayerSourceGenerationInput): Promise<KtxSemanticLayerSourceGenerationResult>;
|
||||
}
|
||||
|
||||
export type KtxDaemonCommand = 'semantic-query' | 'semantic-validate' | 'semantic-generate-sources';
|
||||
|
||||
export type KtxDaemonJsonRunner = (
|
||||
subcommand: KtxDaemonCommand,
|
||||
payload: Record<string, unknown>,
|
||||
) => Promise<Record<string, unknown>>;
|
||||
|
||||
export type KtxDaemonHttpJsonRunner = (path: string, payload: Record<string, unknown>) => Promise<Record<string, unknown>>;
|
||||
|
||||
export interface PythonSemanticLayerComputeOptions {
|
||||
command?: string;
|
||||
args?: string[];
|
||||
cwd?: string;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
runJson?: KtxDaemonJsonRunner;
|
||||
}
|
||||
|
||||
export interface HttpSemanticLayerComputeOptions {
|
||||
baseUrl: string;
|
||||
requestJson?: KtxDaemonHttpJsonRunner;
|
||||
}
|
||||
|
||||
function parseJsonObject(raw: string, subcommand: string): Record<string, unknown> {
|
||||
const parsed = JSON.parse(raw) as unknown;
|
||||
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
||||
throw new Error(`ktx-daemon ${subcommand} returned non-object JSON`);
|
||||
}
|
||||
return parsed as Record<string, unknown>;
|
||||
}
|
||||
|
||||
function runProcessJson(
|
||||
options: Required<Pick<PythonSemanticLayerComputeOptions, 'command' | 'args'>> &
|
||||
Pick<PythonSemanticLayerComputeOptions, 'cwd' | 'env'>,
|
||||
): KtxDaemonJsonRunner {
|
||||
return async (subcommand: KtxDaemonCommand, payload: Record<string, unknown>): Promise<Record<string, unknown>> =>
|
||||
new Promise((resolve, reject) => {
|
||||
const child = spawn(options.command, [...options.args, subcommand], {
|
||||
cwd: options.cwd,
|
||||
env: { ...process.env, ...options.env },
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
});
|
||||
const stdout: Buffer[] = [];
|
||||
const stderr: Buffer[] = [];
|
||||
|
||||
child.stdout.on('data', (chunk: Buffer) => stdout.push(chunk));
|
||||
child.stderr.on('data', (chunk: Buffer) => stderr.push(chunk));
|
||||
child.on('error', reject);
|
||||
child.on('close', (code) => {
|
||||
const stdoutText = Buffer.concat(stdout).toString('utf8').trim();
|
||||
const stderrText = Buffer.concat(stderr).toString('utf8').trim();
|
||||
if (code !== 0) {
|
||||
reject(new Error(`ktx-daemon ${subcommand} failed: ${stderrText || `exit code ${code}`}`));
|
||||
return;
|
||||
}
|
||||
try {
|
||||
resolve(parseJsonObject(stdoutText, subcommand));
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
child.stdin.end(`${JSON.stringify(payload)}\n`);
|
||||
});
|
||||
}
|
||||
|
||||
function normalizedBaseUrl(baseUrl: string): string {
|
||||
return baseUrl.endsWith('/') ? baseUrl : `${baseUrl}/`;
|
||||
}
|
||||
|
||||
function postJson(baseUrl: string): KtxDaemonHttpJsonRunner {
|
||||
return async (path, payload) =>
|
||||
new Promise((resolve, reject) => {
|
||||
const target = new URL(path.replace(/^\//, ''), normalizedBaseUrl(baseUrl));
|
||||
const body = JSON.stringify(payload);
|
||||
const client = target.protocol === 'https:' ? httpsRequest : httpRequest;
|
||||
const request = client(
|
||||
target,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
accept: 'application/json',
|
||||
'content-type': 'application/json',
|
||||
'content-length': Buffer.byteLength(body),
|
||||
},
|
||||
},
|
||||
(response) => {
|
||||
const chunks: Buffer[] = [];
|
||||
response.on('data', (chunk: Buffer) => chunks.push(chunk));
|
||||
response.on('end', () => {
|
||||
const text = Buffer.concat(chunks).toString('utf8');
|
||||
const statusCode = response.statusCode ?? 0;
|
||||
if (statusCode < 200 || statusCode >= 300) {
|
||||
reject(new Error(`ktx-daemon HTTP ${path} failed with ${statusCode}: ${text}`));
|
||||
return;
|
||||
}
|
||||
try {
|
||||
resolve(parseJsonObject(text, path));
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
request.on('error', reject);
|
||||
request.end(body);
|
||||
});
|
||||
}
|
||||
|
||||
function stringArray(value: unknown): string[] {
|
||||
return Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string') : [];
|
||||
}
|
||||
|
||||
function recordValue(value: unknown): Record<string, unknown> {
|
||||
return value && typeof value === 'object' && !Array.isArray(value) ? (value as Record<string, unknown>) : {};
|
||||
}
|
||||
|
||||
function recordArray(value: unknown): Array<Record<string, unknown>> {
|
||||
return Array.isArray(value)
|
||||
? value.filter(
|
||||
(item): item is Record<string, unknown> => item !== null && typeof item === 'object' && !Array.isArray(item),
|
||||
)
|
||||
: [];
|
||||
}
|
||||
|
||||
function sourceGenerationPayload(input: KtxSemanticLayerSourceGenerationInput): Record<string, unknown> {
|
||||
return {
|
||||
tables: input.tables.map((table) => ({
|
||||
name: table.name,
|
||||
...(table.catalog !== undefined ? { catalog: table.catalog } : {}),
|
||||
...(table.db !== undefined ? { db: table.db } : {}),
|
||||
...(table.comment !== undefined ? { comment: table.comment } : {}),
|
||||
columns: table.columns.map((column) => ({
|
||||
name: column.name,
|
||||
type: column.type,
|
||||
...(column.primaryKey !== undefined ? { primary_key: column.primaryKey } : {}),
|
||||
...(column.nullable !== undefined ? { nullable: column.nullable } : {}),
|
||||
...(column.comment !== undefined ? { comment: column.comment } : {}),
|
||||
})),
|
||||
})),
|
||||
links: input.links.map((link) => ({
|
||||
from_table: link.fromTable,
|
||||
from_column: link.fromColumn,
|
||||
to_table: link.toTable,
|
||||
to_column: link.toColumn,
|
||||
relationship_type: link.relationshipType,
|
||||
})),
|
||||
dialect: input.dialect ?? 'postgres',
|
||||
};
|
||||
}
|
||||
|
||||
function sourceGenerationResult(raw: Record<string, unknown>): KtxSemanticLayerSourceGenerationResult {
|
||||
return {
|
||||
sources: recordArray(raw.sources),
|
||||
sourceCount: typeof raw.source_count === 'number' ? raw.source_count : recordArray(raw.sources).length,
|
||||
};
|
||||
}
|
||||
|
||||
export function createPythonSemanticLayerComputePort(
|
||||
options: PythonSemanticLayerComputeOptions = {},
|
||||
): KtxSemanticLayerComputePort {
|
||||
const command = options.command ?? 'python';
|
||||
const args = options.args ?? ['-m', 'ktx_daemon'];
|
||||
const runJson = options.runJson ?? runProcessJson({ command, args, cwd: options.cwd, env: options.env });
|
||||
|
||||
return {
|
||||
async query(input) {
|
||||
const raw = await runJson('semantic-query', {
|
||||
sources: input.sources,
|
||||
dialect: input.dialect,
|
||||
query: input.query,
|
||||
});
|
||||
return {
|
||||
sql: typeof raw.sql === 'string' ? raw.sql : '',
|
||||
dialect: typeof raw.dialect === 'string' ? raw.dialect : input.dialect,
|
||||
columns: recordArray(raw.columns),
|
||||
plan: recordValue(raw.plan),
|
||||
};
|
||||
},
|
||||
async validateSources(input) {
|
||||
const raw = await runJson('semantic-validate', {
|
||||
sources: input.sources,
|
||||
dialect: input.dialect,
|
||||
recently_touched: input.recentlyTouched,
|
||||
});
|
||||
return {
|
||||
valid: raw.valid === true,
|
||||
errors: stringArray(raw.errors),
|
||||
warnings: stringArray(raw.warnings),
|
||||
perSourceWarnings: recordValue(raw.per_source_warnings) as Record<string, string[]>,
|
||||
};
|
||||
},
|
||||
async generateSources(input) {
|
||||
const raw = await runJson('semantic-generate-sources', sourceGenerationPayload(input));
|
||||
return sourceGenerationResult(raw);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function createHttpSemanticLayerComputePort(
|
||||
options: HttpSemanticLayerComputeOptions,
|
||||
): KtxSemanticLayerComputePort {
|
||||
const requestJson = options.requestJson ?? postJson(options.baseUrl);
|
||||
|
||||
return {
|
||||
async query(input) {
|
||||
const raw = await requestJson('/semantic-layer/query', {
|
||||
sources: input.sources,
|
||||
dialect: input.dialect,
|
||||
query: input.query,
|
||||
});
|
||||
return {
|
||||
sql: typeof raw.sql === 'string' ? raw.sql : '',
|
||||
dialect: typeof raw.dialect === 'string' ? raw.dialect : input.dialect,
|
||||
columns: recordArray(raw.columns),
|
||||
plan: recordValue(raw.plan),
|
||||
};
|
||||
},
|
||||
async validateSources(input) {
|
||||
const raw = await requestJson('/semantic-layer/validate', {
|
||||
sources: input.sources,
|
||||
dialect: input.dialect,
|
||||
recently_touched: input.recentlyTouched,
|
||||
});
|
||||
return {
|
||||
valid: raw.valid === true,
|
||||
errors: stringArray(raw.errors),
|
||||
warnings: stringArray(raw.warnings),
|
||||
perSourceWarnings: recordValue(raw.per_source_warnings) as Record<string, string[]>,
|
||||
};
|
||||
},
|
||||
async generateSources(input) {
|
||||
const raw = await requestJson('/semantic-layer/generate-sources', sourceGenerationPayload(input));
|
||||
return sourceGenerationResult(raw);
|
||||
},
|
||||
};
|
||||
}
|
||||
2
packages/cli/src/context/index-sync/index.ts
Normal file
2
packages/cli/src/context/index-sync/index.ts
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
export type { ReindexOptions, ReindexScopeResult, ReindexSummary, ReindexWorkResult } from './types.js';
|
||||
export { discoverReindexScopes, reindexLocalIndexes } from './reindex.js';
|
||||
196
packages/cli/src/context/index-sync/reindex.test.ts
Normal file
196
packages/cli/src/context/index-sync/reindex.test.ts
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import type { KtxEmbeddingPort } from '../core/index.js';
|
||||
import { initKtxProject, loadKtxProject, type KtxLocalProject } from '../project/index.js';
|
||||
import { SqliteKnowledgeIndex } from '../wiki/sqlite-knowledge-index.js';
|
||||
import { reindexLocalIndexes } from './reindex.js';
|
||||
|
||||
class FakeEmbeddingPort implements KtxEmbeddingPort {
|
||||
readonly maxBatchSize = 8;
|
||||
|
||||
async computeEmbedding(text: string): Promise<number[]> {
|
||||
return [text.length, 1];
|
||||
}
|
||||
|
||||
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
|
||||
return texts.map((text) => [text.length, 1]);
|
||||
}
|
||||
}
|
||||
|
||||
async function createProject(tempDir: string): Promise<KtxLocalProject> {
|
||||
await initKtxProject({ projectDir: tempDir, force: true });
|
||||
return loadKtxProject({ projectDir: tempDir });
|
||||
}
|
||||
|
||||
describe('reindexLocalIndexes', () => {
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-reindex-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('returns an empty summary when no wiki or semantic-layer directories exist', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await rm(join(project.projectDir, 'wiki'), { recursive: true, force: true });
|
||||
await rm(join(project.projectDir, 'semantic-layer'), { recursive: true, force: true });
|
||||
|
||||
await expect(reindexLocalIndexes(project, { force: false, embeddingService: null })).resolves.toMatchObject({
|
||||
scopes: [],
|
||||
totals: { scanned: 0, updated: 0, deleted: 0, embeddingsRecomputed: 0, embeddingsFailed: 0 },
|
||||
force: false,
|
||||
embeddingsAvailable: false,
|
||||
});
|
||||
});
|
||||
|
||||
it('discovers empty directories as zero-row scopes', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await mkdir(join(project.projectDir, 'wiki/user/local'), { recursive: true });
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
|
||||
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
|
||||
expect(summary.scopes.map((scope) => scope.label)).toEqual(['global', 'user/local', 'warehouse']);
|
||||
expect(summary.totals.scanned).toBe(0);
|
||||
});
|
||||
|
||||
it('indexes mixed wiki and SL sources and reports totals', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(
|
||||
join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'),
|
||||
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n',
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
const summary = await reindexLocalIndexes(project, {
|
||||
force: false,
|
||||
embeddingService: new FakeEmbeddingPort(),
|
||||
});
|
||||
|
||||
expect(summary.scopes).toHaveLength(2);
|
||||
expect(summary.totals).toMatchObject({ scanned: 2, updated: 2, deleted: 0, embeddingsRecomputed: 2 });
|
||||
expect(summary.embeddingsAvailable).toBe(true);
|
||||
});
|
||||
|
||||
it('does not report unchanged lexical-only rows as updated on repeated runs', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(
|
||||
join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'),
|
||||
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n',
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
const first = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
expect(first.totals).toMatchObject({
|
||||
scanned: 2,
|
||||
updated: 2,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
});
|
||||
|
||||
const second = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
|
||||
expect(second.totals).toMatchObject({
|
||||
scanned: 2,
|
||||
updated: 0,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
});
|
||||
expect(second.scopes.map((scope) => [scope.label, scope.updated])).toEqual([
|
||||
['global', 0],
|
||||
['warehouse', 0],
|
||||
]);
|
||||
});
|
||||
|
||||
it('force clears stale rows before rebuilding each discovered scope', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
const wikiIndex = new SqliteKnowledgeIndex({ dbPath: join(project.projectDir, '.ktx/db.sqlite') });
|
||||
wikiIndex.sync([
|
||||
{
|
||||
path: 'wiki/global/stale.md',
|
||||
key: 'stale',
|
||||
scope: 'GLOBAL',
|
||||
scopeId: null,
|
||||
summary: 'Stale',
|
||||
content: 'Stale content',
|
||||
tags: [],
|
||||
embedding: [1, 0],
|
||||
},
|
||||
]);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
const summary = await reindexLocalIndexes(project, {
|
||||
force: true,
|
||||
embeddingService: new FakeEmbeddingPort(),
|
||||
});
|
||||
|
||||
expect(summary.force).toBe(true);
|
||||
expect(summary.totals).toMatchObject({ scanned: 1, updated: 1, deleted: 0 });
|
||||
expect(wikiIndex.search('Stale', 10)).toEqual([]);
|
||||
});
|
||||
|
||||
it('captures a per-scope error and continues other scopes', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(join(project.projectDir, 'semantic-layer/warehouse/broken.yaml'), 'not: [valid', 'utf-8');
|
||||
|
||||
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
|
||||
expect(summary.scopes.find((scope) => scope.label === 'global')?.error).toBeUndefined();
|
||||
expect(summary.scopes.find((scope) => scope.label === 'warehouse')?.error).toContain('YAML');
|
||||
});
|
||||
|
||||
it('marks a scope errored when configured embeddings fail', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
const embeddingService: KtxEmbeddingPort = {
|
||||
maxBatchSize: 8,
|
||||
async computeEmbedding() {
|
||||
throw new Error('embedding provider unavailable');
|
||||
},
|
||||
async computeEmbeddingsBulk() {
|
||||
throw new Error('embedding provider unavailable');
|
||||
},
|
||||
};
|
||||
|
||||
const summary = await reindexLocalIndexes(project, { force: false, embeddingService });
|
||||
|
||||
expect(summary.scopes[0]).toMatchObject({
|
||||
label: 'global',
|
||||
embeddingsFailed: 1,
|
||||
error: '1 embedding recomputation failed',
|
||||
});
|
||||
});
|
||||
});
|
||||
162
packages/cli/src/context/index-sync/reindex.ts
Normal file
162
packages/cli/src/context/index-sync/reindex.ts
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
import { readdir, stat } from 'node:fs/promises';
|
||||
import { join, relative } from 'node:path';
|
||||
import { ktxLocalStateDbPath, type KtxLocalProject } from '../project/index.js';
|
||||
import { loadLocalSlSourceRecords, SlSearchService, SqliteSlSourcesIndex } from '../sl/index.js';
|
||||
import { KnowledgeWikiService, SqliteKnowledgeIndex } from '../wiki/index.js';
|
||||
import type { ReindexOptions, ReindexScopeResult, ReindexSummary, ReindexWorkResult } from './types.js';
|
||||
|
||||
type DiscoveredScope =
|
||||
| { kind: 'wiki'; scope: 'GLOBAL'; scopeId: null; label: 'global' }
|
||||
| { kind: 'wiki'; scope: 'USER'; scopeId: string; label: `user/${string}` }
|
||||
| { kind: 'sl'; connectionId: string; label: string };
|
||||
|
||||
const ZERO: ReindexWorkResult = {
|
||||
scanned: 0,
|
||||
updated: 0,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
};
|
||||
|
||||
async function directoryExists(path: string): Promise<boolean> {
|
||||
try {
|
||||
return (await stat(path)).isDirectory();
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function childDirectories(path: string): Promise<string[]> {
|
||||
try {
|
||||
const entries = await readdir(path, { withFileTypes: true });
|
||||
return entries
|
||||
.filter((entry) => entry.isDirectory())
|
||||
.map((entry) => entry.name)
|
||||
.sort((left, right) => left.localeCompare(right));
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
return [];
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export async function discoverReindexScopes(project: KtxLocalProject): Promise<DiscoveredScope[]> {
|
||||
const scopes: DiscoveredScope[] = [];
|
||||
if (await directoryExists(join(project.projectDir, 'wiki/global'))) {
|
||||
scopes.push({ kind: 'wiki', scope: 'GLOBAL', scopeId: null, label: 'global' });
|
||||
}
|
||||
for (const userId of await childDirectories(join(project.projectDir, 'wiki/user'))) {
|
||||
scopes.push({ kind: 'wiki', scope: 'USER', scopeId: userId, label: `user/${userId}` });
|
||||
}
|
||||
for (const connectionId of await childDirectories(join(project.projectDir, 'semantic-layer'))) {
|
||||
if (connectionId !== '_schema') {
|
||||
scopes.push({ kind: 'sl', connectionId, label: connectionId });
|
||||
}
|
||||
}
|
||||
return scopes;
|
||||
}
|
||||
|
||||
function errorMessage(error: unknown): string {
|
||||
if (!(error instanceof Error)) {
|
||||
return String(error);
|
||||
}
|
||||
return error.name && error.name !== 'Error' ? `${error.name}: ${error.message}` : error.message;
|
||||
}
|
||||
|
||||
function addTotals(left: ReindexWorkResult, right: ReindexWorkResult): ReindexWorkResult {
|
||||
return {
|
||||
scanned: left.scanned + right.scanned,
|
||||
updated: left.updated + right.updated,
|
||||
deleted: left.deleted + right.deleted,
|
||||
embeddingsRecomputed: left.embeddingsRecomputed + right.embeddingsRecomputed,
|
||||
embeddingsFailed: left.embeddingsFailed + right.embeddingsFailed,
|
||||
};
|
||||
}
|
||||
|
||||
function durationSince(startedAt: bigint): number {
|
||||
return Number((process.hrtime.bigint() - startedAt) / 1_000_000n);
|
||||
}
|
||||
|
||||
function embeddingFailureError(work: ReindexWorkResult): string | undefined {
|
||||
if (work.embeddingsFailed === 0) {
|
||||
return undefined;
|
||||
}
|
||||
return `${work.embeddingsFailed} embedding recomputation${work.embeddingsFailed === 1 ? '' : 's'} failed`;
|
||||
}
|
||||
|
||||
export async function reindexLocalIndexes(
|
||||
project: KtxLocalProject,
|
||||
options: ReindexOptions,
|
||||
): Promise<ReindexSummary> {
|
||||
const startedAt = process.hrtime.bigint();
|
||||
const dbPath = ktxLocalStateDbPath(project);
|
||||
const scopes = await discoverReindexScopes(project);
|
||||
const wikiIndex = new SqliteKnowledgeIndex({ dbPath });
|
||||
const slIndex = new SqliteSlSourcesIndex({ dbPath });
|
||||
const wikiService = new KnowledgeWikiService(project.fileStore, options.embeddingService, wikiIndex, project.git);
|
||||
const slService = new SlSearchService(options.embeddingService, slIndex);
|
||||
const results: ReindexScopeResult[] = [];
|
||||
|
||||
for (const scope of scopes) {
|
||||
const scopeStartedAt = process.hrtime.bigint();
|
||||
try {
|
||||
let work: ReindexWorkResult;
|
||||
if (scope.kind === 'wiki') {
|
||||
if (options.force) {
|
||||
wikiIndex.clear(scope.scope, scope.scopeId);
|
||||
}
|
||||
work = await wikiService.syncIndex(scope.scope, scope.scopeId);
|
||||
results.push({
|
||||
kind: 'wiki',
|
||||
label: scope.label,
|
||||
scope: scope.scope === 'GLOBAL' ? 'global' : 'user',
|
||||
scopeId: scope.scopeId,
|
||||
...work,
|
||||
...(options.force ? { deleted: 0 } : {}),
|
||||
...(options.embeddingService && work.embeddingsFailed > 0 ? { error: embeddingFailureError(work) } : {}),
|
||||
durationMs: durationSince(scopeStartedAt),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (options.force) {
|
||||
await slIndex.clear(scope.connectionId);
|
||||
}
|
||||
const records = await loadLocalSlSourceRecords(project, { connectionId: scope.connectionId });
|
||||
work = await slService.indexSources(
|
||||
scope.connectionId,
|
||||
records.map((record) => record.source),
|
||||
);
|
||||
results.push({
|
||||
kind: 'sl',
|
||||
label: scope.label,
|
||||
connectionId: scope.connectionId,
|
||||
...work,
|
||||
...(options.force ? { deleted: 0 } : {}),
|
||||
...(options.embeddingService && work.embeddingsFailed > 0 ? { error: embeddingFailureError(work) } : {}),
|
||||
durationMs: durationSince(scopeStartedAt),
|
||||
});
|
||||
} catch (error) {
|
||||
results.push({
|
||||
kind: scope.kind,
|
||||
label: scope.label,
|
||||
...(scope.kind === 'wiki'
|
||||
? { scope: scope.scope === 'GLOBAL' ? 'global' : 'user', scopeId: scope.scopeId }
|
||||
: { connectionId: scope.connectionId }),
|
||||
...ZERO,
|
||||
durationMs: durationSince(scopeStartedAt),
|
||||
error: errorMessage(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
scopes: results,
|
||||
totals: results.reduce(addTotals, ZERO),
|
||||
dbPath: relative(project.projectDir, dbPath) || dbPath,
|
||||
force: options.force,
|
||||
embeddingsAvailable: options.embeddingService !== null,
|
||||
durationMs: durationSince(startedAt),
|
||||
};
|
||||
}
|
||||
33
packages/cli/src/context/index-sync/types.ts
Normal file
33
packages/cli/src/context/index-sync/types.ts
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
import type { KtxEmbeddingPort } from '../core/index.js';
|
||||
|
||||
export interface ReindexOptions {
|
||||
force: boolean;
|
||||
embeddingService: KtxEmbeddingPort | null;
|
||||
}
|
||||
|
||||
export interface ReindexWorkResult {
|
||||
scanned: number;
|
||||
updated: number;
|
||||
deleted: number;
|
||||
embeddingsRecomputed: number;
|
||||
embeddingsFailed: number;
|
||||
}
|
||||
|
||||
export interface ReindexScopeResult extends ReindexWorkResult {
|
||||
kind: 'wiki' | 'sl';
|
||||
label: string;
|
||||
scope?: 'global' | 'user';
|
||||
scopeId?: string | null;
|
||||
connectionId?: string;
|
||||
durationMs: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface ReindexSummary {
|
||||
scopes: ReindexScopeResult[];
|
||||
totals: ReindexWorkResult;
|
||||
dbPath: string;
|
||||
force: boolean;
|
||||
embeddingsAvailable: boolean;
|
||||
durationMs: number;
|
||||
}
|
||||
128
packages/cli/src/context/index.ts
Normal file
128
packages/cli/src/context/index.ts
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
export * from './agent/index.js';
|
||||
export * from './core/index.js';
|
||||
export * from './daemon/index.js';
|
||||
export * from './ingest/index.js';
|
||||
export * from './index-sync/index.js';
|
||||
export * from './llm/index.js';
|
||||
export type {
|
||||
CaptureSession,
|
||||
CaptureSignals,
|
||||
MemoryAgentInput,
|
||||
MemoryAgentResult,
|
||||
MemoryAgentServiceDeps,
|
||||
MemoryAgentSettings,
|
||||
MemoryAgentSourceType,
|
||||
MemoryCommitMessagePort,
|
||||
MemoryConnectionPort,
|
||||
MemoryFileStorePort,
|
||||
MemoryKnowledgeSlRefsPort,
|
||||
MemoryLockPort,
|
||||
MemorySlSourceReconcilerPort,
|
||||
MemoryTelemetryPort,
|
||||
MemoryToolSetLike,
|
||||
MemoryToolsetFactoryPort,
|
||||
} from './memory/index.js';
|
||||
export * from './project/index.js';
|
||||
export * from './prompts/index.js';
|
||||
export * from './search/index.js';
|
||||
export * from './sql-analysis/index.js';
|
||||
export type {
|
||||
KtxColumnAnalysisResult,
|
||||
KtxColumnDescriptionPromptInput,
|
||||
KtxColumnEmbeddingForeignKeys,
|
||||
KtxColumnEmbeddingTextInput,
|
||||
KtxColumnSampleInput,
|
||||
KtxColumnSampleResult,
|
||||
KtxColumnSampleUpdate,
|
||||
KtxColumnStatsInput,
|
||||
KtxColumnStatsResult,
|
||||
KtxConnectionDriver,
|
||||
KtxConnectorCapabilities,
|
||||
KtxCredentialEnvelope,
|
||||
KtxCredentialEnvReference,
|
||||
KtxCredentialFileReference,
|
||||
KtxDataDictionaryColumnState,
|
||||
KtxDataDictionarySampleDecision,
|
||||
KtxDataDictionarySettings,
|
||||
KtxDataDictionarySkipReason,
|
||||
KtxDataSourceDescriptionPromptInput,
|
||||
KtxDescriptionCachePort,
|
||||
KtxDescriptionColumn,
|
||||
KtxDescriptionColumnTable,
|
||||
KtxDescriptionGenerationSettings,
|
||||
KtxDescriptionGeneratorOptions,
|
||||
KtxDescriptionSource,
|
||||
KtxDescriptionTableInput,
|
||||
KtxDescriptionUpdate,
|
||||
KtxEmbeddingPort as KtxScanEmbeddingPort,
|
||||
KtxEmbeddingUpdate,
|
||||
KtxEnrichedColumn,
|
||||
KtxEnrichedRelationship,
|
||||
KtxEnrichedSchema,
|
||||
KtxEnrichedTable,
|
||||
KtxGenerateColumnDescriptionsInput,
|
||||
KtxGenerateDataSourceDescriptionInput,
|
||||
KtxGenerateTableDescriptionInput,
|
||||
KtxOptionalConnectorCapabilities,
|
||||
KtxProgressPort,
|
||||
KtxQueryResult as KtxScanQueryResult,
|
||||
KtxReadOnlyQueryInput,
|
||||
KtxRelationshipEndpoint,
|
||||
KtxRelationshipSource,
|
||||
KtxRelationshipType,
|
||||
KtxRelationshipUpdate,
|
||||
KtxResolvedCredentialEnvelope,
|
||||
KtxScanArtifactPaths,
|
||||
KtxScanConnector,
|
||||
KtxScanContext,
|
||||
KtxScanDiffSummary,
|
||||
KtxScanEnrichmentSummary,
|
||||
KtxScanInput,
|
||||
KtxScanLoggerPort,
|
||||
KtxScanMetadataStore,
|
||||
KtxScanMode,
|
||||
KtxScanRelationshipSummary,
|
||||
KtxScanReport,
|
||||
KtxScanTrigger,
|
||||
KtxScanWarning,
|
||||
KtxScanWarningCode,
|
||||
KtxSchemaColumn,
|
||||
KtxSchemaDimensionType,
|
||||
KtxSchemaForeignKey,
|
||||
KtxSchemaScope,
|
||||
KtxSchemaSnapshot,
|
||||
KtxSchemaTable,
|
||||
KtxSchemaTableKind,
|
||||
KtxSkippedRelationship,
|
||||
KtxStructuralSyncPlan,
|
||||
KtxStructuralSyncStats,
|
||||
KtxTableDescriptionPromptInput,
|
||||
KtxTableRef,
|
||||
KtxTableSampleInput,
|
||||
KtxTableSampleResult,
|
||||
KtxColumnTypeMapping,
|
||||
} from './scan/index.js';
|
||||
export {
|
||||
buildKtxColumnDescriptionPrompt,
|
||||
buildKtxColumnEmbeddingText,
|
||||
buildKtxDataSourceDescriptionPrompt,
|
||||
buildKtxTableDescriptionPrompt,
|
||||
createKtxConnectorCapabilities,
|
||||
defaultKtxDataDictionarySettings,
|
||||
inferKtxDimensionType,
|
||||
isKtxDataDictionaryCandidate,
|
||||
ktxColumnTypeMappingFromNative,
|
||||
KtxDescriptionGenerator,
|
||||
normalizeKtxNativeType,
|
||||
REDACTED_KTX_CREDENTIAL_VALUE,
|
||||
redactKtxCredentialEnvelope,
|
||||
redactKtxCredentialValue,
|
||||
redactKtxScanMetadata,
|
||||
redactKtxScanReport,
|
||||
redactKtxScanWarning,
|
||||
shouldKtxSampleColumnForDictionary,
|
||||
} from './scan/index.js';
|
||||
export * from './skills/index.js';
|
||||
export * from './sl/index.js';
|
||||
export * from './tools/index.js';
|
||||
export * from './wiki/index.js';
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue