mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-19 08:28:06 +02:00
chore(workspace): gate dead-code with knip production mode (#196)
* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm * refactor(workspace): rewrite @ktx/llm imports to relative paths * refactor(workspace): fold internal packages into cli * chore(workspace): gate dead-code with knip production mode Turn on production-mode knip plus an autofix run in pre-commit and the `pnpm dead-code` script, document the `/** @internal */` convention for test-only exports in AGENTS.md, annotate test-only exports across the CLI with that JSDoc, and drop dead exports/wrappers the new gate surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`, `createLocalScanEnrichmentProvidersFromConfig`, `PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports). Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit production entries so cross-package barrel leaks are caught. * refactor(cli): delete internal barrel index.ts files The 34 `index.ts` re-export barrels inside `packages/cli/src/` were holdovers from the pre-fold multi-workspace structure. Post-fold-in they served no production purpose: external consumers go through the single package main entry, and in-repo callers mostly imported through them only because the path was short. Internally, knip flagged most barrel re-exports as production-dead (only reached via tests). This change: - Deletes every internal barrel except `packages/cli/src/index.ts` (the published package entry). - Rewrites ~270 source/test files to import each name directly from the file that defines it. - Moves `tools/warehouse-verification/index.ts` to `create-warehouse-verification-tools.ts` (the function it defined locally) and updates its single consumer. - Renames `search/backend-conformance.ts` → `.test-utils.ts` to match the existing test-helper file convention. - Deletes 13 dead test-only chains (dbt-descriptions/*, live-database/extracted-schema, live-database/structural-sync, relationship-* feedback/review chain) plus their tests and a cascading orphan integration test. - Updates test mocks that pointed at deleted barrel paths (notion-client, connector barrels in scan/local-scan-connectors tests) to mock the source files instead. - Points the maintainer benchmark script (`scripts/relationship-benchmark-report.mjs`) at source files instead of `dist/context/scan/index.js`. - Drops the barrel `!` entries from `knip.json`; adds explicit production entries only for the benchmark code reached via dist by the maintainer script. Net: 413 files changed, ~1.2k insertions, ~9.4k deletions. `pnpm run dead-code` (Biome + knip default + knip production) and `pnpm run type-check` are clean; 2277 tests pass. * refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly Promote the CLI workspace package to the public name `@kaelio/ktx` and drop the separate `scripts/build-public-npm-package.mjs` wrapper. The CLI package is now publishable in place (`publishConfig.access: public`, `provenance: true`), so artifact packing uses `pnpm pack` against `packages/cli/` instead of assembling a parallel package tree. Updates all workspace filter invocations, docs, tests, and release readiness checks to reference the new package name, and folds the tarball-name helper into `scripts/public-npm-release-metadata.mjs`. * docs: align "agent clients" and "data agents" terminology Replace "client agents" with "agent clients" and "database agents" with "data agents" across AGENTS.md, README.md, the docs-site copy, and the matching setup-agents test description, matching the canonical vocabulary in docs/terminology.md. Also moves packages/cli/tsconfig.json's tsBuildInfoFile from node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive node_modules reinstalls. * refactor(release): single source of truth for package version Make packages/cli/package.json the single source of truth for the @kaelio/ktx version. publicNpmPackageVersion() now reads it directly, so artifact filenames, release-readiness checks, and the Python wheel version all derive from one field. The duplicate release-policy.json.publicNpmPackageVersion is removed. Previously the two fields could drift: tarballs were named kaelio-ktx-0.4.1.tgz while internally containing @kaelio/ktx@0.0.0-private. - update-public-release-version.mjs rewrites both Python pyproject.toml files (ktx-daemon, ktx-sl) alongside the npm package.jsons, normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2). - semantic-release-config.cjs adds the two pyproject.toml files to @semantic-release/git assets so the release commit back to main carries every version source in lockstep. - The six "?? '0.0.0-private'" fallback literals across the CLI are replaced with "?? getKtxCliPackageInfo().version", and createDefaultKtxMcpServer makes its version arg required. - docs/release.md describes the actual commit-back model: the dev tree always reflects the most recent release; no sentinel pin to maintain. Verified: pnpm run artifacts:build now produces kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with @kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and 2287 vitests + 173 script tests pass. * refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and scan command entrypoints so tests can stub them, and teach resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime feature when ktx.yaml selects sentence-transformers. * chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal Both symbols are consumed only by status-project.test.ts. Annotating with /** @internal */ keeps knip's production-mode check clean without changing runtime behavior. * fix(cli): use real package metadata in print-command-tree The stubbed package name embedded a forbidden product identifier that tripped the boundary check in CI. Read the metadata from package.json instead — keeps the rendered tree unchanged and removes a duplicate source of truth. * feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer source counts, computed with `SUM(embedding_json IS NOT NULL)` over `knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to "Wiki" (canonical per `docs/terminology.md`) and rename the matching `localStats.knowledgePages` field to `localStats.wikiPages`. Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those duplicated the per-surface rows above. Disk now reports only actual byte usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` / `semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry` helpers, and the `filter` arg on `summarizeDir` are removed.
This commit is contained in:
parent
a1cfb03d73
commit
2366b00301
1002 changed files with 2286 additions and 12051 deletions
|
|
@ -1,11 +1,11 @@
|
|||
import { createRequire } from 'node:module';
|
||||
|
||||
import type { ReindexSummary } from '@ktx/context/index-sync';
|
||||
import type { ReindexSummary } from './context/index-sync/types.js';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { renderReindexJson, renderReindexPlain, reindexHasErrors } from './admin-reindex.js';
|
||||
import { runKtxCli } from './index.js';
|
||||
|
||||
const cliVersion = (createRequire(import.meta.url)('@ktx/cli/package.json') as { version: string })
|
||||
const cliVersion = (createRequire(import.meta.url)('@kaelio/ktx/package.json') as { version: string })
|
||||
.version;
|
||||
|
||||
function makeIo(options: { stdoutIsTTY?: boolean } = {}) {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
import { KtxIngestEmbeddingPortAdapter, type KtxEmbeddingPort } from '@ktx/context';
|
||||
import { reindexLocalIndexes, type ReindexScopeResult, type ReindexSummary } from '@ktx/context/index-sync';
|
||||
import { loadKtxProject } from '@ktx/context/project';
|
||||
import { KtxIngestEmbeddingPortAdapter } from './context/llm/embedding-port.js';
|
||||
import type { KtxEmbeddingPort } from './context/core/embedding.js';
|
||||
import { reindexLocalIndexes } from './context/index-sync/reindex.js';
|
||||
import type { ReindexScopeResult, ReindexSummary } from './context/index-sync/types.js';
|
||||
import { loadKtxProject } from './context/project/project.js';
|
||||
import { Option, type Command } from '@commander-js/extra-typings';
|
||||
import { cancel, intro, log, note, outro } from '@clack/prompts';
|
||||
import type { KtxCliCommandContext } from './cli-program.js';
|
||||
|
|
@ -55,10 +57,12 @@ function quotePlainValue(value: string): string {
|
|||
return `"${value.replaceAll('\\', '\\\\').replaceAll('"', '\\"')}"`;
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function reindexHasErrors(summary: ReindexSummary): boolean {
|
||||
return summary.scopes.some((scope) => scope.error);
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function renderReindexPlain(summary: ReindexSummary, io: KtxCliIo): void {
|
||||
const updateKey = summary.force ? 'rebuilt' : 'updated';
|
||||
for (const scope of summary.scopes) {
|
||||
|
|
@ -88,6 +92,7 @@ export function renderReindexPlain(summary: ReindexSummary, io: KtxCliIo): void
|
|||
);
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function renderReindexJson(summary: ReindexSummary, io: KtxCliIo): void {
|
||||
io.stdout.write(`${JSON.stringify({ kind: 'reindex', data: summary, meta: { command: 'admin reindex' } }, null, 2)}\n`);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ export function registerAdminCommands(program: Command, context: KtxCliCommandCo
|
|||
.description('Print a JSON Schema describing ktx.yaml (for editors and LLM agents)')
|
||||
.option('--output <file>', 'Write the schema to a file instead of stdout')
|
||||
.action(async (options: { output?: string }) => {
|
||||
const { generateKtxProjectConfigJsonSchema } = await import('@ktx/context/project');
|
||||
const { generateKtxProjectConfigJsonSchema } = await import('./context/project/config.js');;
|
||||
const json = `${JSON.stringify(generateKtxProjectConfigJsonSchema(), null, 2)}\n`;
|
||||
if (options.output) {
|
||||
const { writeFile } = await import('node:fs/promises');
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ export interface KtxCliPromptAdapter {
|
|||
spinner(): KtxCliSpinner;
|
||||
}
|
||||
|
||||
export class KtxCliPromptCancelledError extends Error {
|
||||
class KtxCliPromptCancelledError extends Error {
|
||||
constructor(message = 'Operation cancelled.') {
|
||||
super(message);
|
||||
this.name = 'KtxCliPromptCancelledError';
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import type { KtxProjectLlmConfig } from '@ktx/context/project';
|
||||
import type { KtxProjectLlmConfig } from './context/project/config.js';
|
||||
|
||||
const CLAUDE_CODE_IGNORED_PROMPT_CACHING_FIELDS = [
|
||||
'systemTtl',
|
||||
|
|
|
|||
|
|
@ -12,9 +12,8 @@ function stubIo(): KtxCliIo {
|
|||
|
||||
function stubPackageInfo(): KtxCliPackageInfo {
|
||||
return {
|
||||
name: '@ktx/cli',
|
||||
name: '@kaelio/ktx',
|
||||
version: '0.0.0-test',
|
||||
contextPackageName: '@ktx/context',
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,26 +0,0 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { buildDefaultKtxProjectConfig, type KtxLocalProject, type KtxProjectConfig } from '@ktx/context/project';
|
||||
import { loadKtxCliProject } from './cli-project.js';
|
||||
|
||||
function projectWithConfig(config: KtxProjectConfig): KtxLocalProject {
|
||||
return {
|
||||
projectDir: '/work/proj',
|
||||
configPath: '/work/proj/ktx.yaml',
|
||||
config,
|
||||
coreConfig: {} as KtxLocalProject['coreConfig'],
|
||||
git: {} as KtxLocalProject['git'],
|
||||
fileStore: {} as KtxLocalProject['fileStore'],
|
||||
};
|
||||
}
|
||||
|
||||
describe('loadKtxCliProject', () => {
|
||||
it('delegates to loadKtxProject and returns the project unchanged', async () => {
|
||||
const project = projectWithConfig(buildDefaultKtxProjectConfig());
|
||||
const loadProject = vi.fn(async () => project);
|
||||
|
||||
const result = await loadKtxCliProject({ projectDir: '/work/proj' }, { loadProject });
|
||||
|
||||
expect(result).toBe(project);
|
||||
expect(loadProject).toHaveBeenCalledWith({ projectDir: '/work/proj' });
|
||||
});
|
||||
});
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
import { loadKtxProject, type KtxLocalProject } from '@ktx/context/project';
|
||||
|
||||
export interface LoadKtxCliProjectOptions {
|
||||
projectDir: string;
|
||||
}
|
||||
|
||||
export interface LoadKtxCliProjectDeps {
|
||||
loadProject?: typeof loadKtxProject;
|
||||
}
|
||||
|
||||
/**
|
||||
* Thin wrapper around `loadKtxProject`. Kept as a single entrypoint so the CLI can grow shared
|
||||
* pre-load behavior later (telemetry, project lock, etc.). Today it does no extra work.
|
||||
*/
|
||||
export async function loadKtxCliProject(
|
||||
options: LoadKtxCliProjectOptions,
|
||||
deps: LoadKtxCliProjectDeps = {},
|
||||
): Promise<KtxLocalProject> {
|
||||
return (deps.loadProject ?? loadKtxProject)({ projectDir: options.projectDir });
|
||||
}
|
||||
|
|
@ -20,7 +20,6 @@ const requirePackageJson = createRequire(import.meta.url);
|
|||
export interface KtxCliPackageInfo {
|
||||
name: string;
|
||||
version: string;
|
||||
contextPackageName: '@ktx/context';
|
||||
}
|
||||
|
||||
export interface KtxCliIo {
|
||||
|
|
@ -67,12 +66,11 @@ export function packageInfoFromJson(packageJson: unknown): KtxCliPackageInfo {
|
|||
return {
|
||||
name: packageJson.name,
|
||||
version: assertCliVersion(packageJson.version, `${packageJson.name}/package.json`),
|
||||
contextPackageName: '@ktx/context',
|
||||
};
|
||||
}
|
||||
|
||||
async function runInit(args: { projectDir: string; force: boolean }, io: KtxCliIo): Promise<number> {
|
||||
const { initKtxProject } = await import('@ktx/context/project');
|
||||
const { initKtxProject } = await import('./context/project/project.js');;
|
||||
const result = await initKtxProject({
|
||||
projectDir: args.projectDir,
|
||||
force: args.force,
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ function makeContext(overrides: Partial<KtxCliCommandContext> = {}): KtxCliComma
|
|||
stderr: { write: vi.fn() },
|
||||
},
|
||||
deps: {},
|
||||
packageInfo: { name: '@ktx/cli', version: '0.0.0-test', contextPackageName: '@ktx/context' },
|
||||
packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' },
|
||||
setExitCode: (code) => {
|
||||
exitCode = code;
|
||||
},
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ function makeContext(overrides: Partial<KtxCliCommandContext> = {}): KtxCliComma
|
|||
stderr: { write: vi.fn() },
|
||||
},
|
||||
deps: {},
|
||||
packageInfo: { name: '@ktx/cli', version: '0.0.0-test', contextPackageName: '@ktx/context' },
|
||||
packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' },
|
||||
setExitCode: (code) => {
|
||||
exitCode = code;
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1,9 +1,12 @@
|
|||
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import type { LookerClient, MetabaseRuntimeClient, NotionClient } from '@ktx/context/ingest';
|
||||
import { initKtxProject, parseKtxProjectConfig, serializeKtxProjectConfig } from '@ktx/context/project';
|
||||
import type { KtxConnectionDriver, KtxScanConnector } from '@ktx/context/scan';
|
||||
import type { LookerClient } from './context/ingest/adapters/looker/client.js';
|
||||
import type { MetabaseRuntimeClient } from './context/ingest/adapters/metabase/client-port.js';
|
||||
import type { NotionClient } from './context/ingest/adapters/notion/notion-client.js';
|
||||
import { initKtxProject } from './context/project/project.js';
|
||||
import { parseKtxProjectConfig, serializeKtxProjectConfig } from './context/project/config.js';
|
||||
import type { KtxConnectionDriver, KtxScanConnector } from './context/scan/types.js';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { runKtxConnection } from './connection.js';
|
||||
|
||||
|
|
|
|||
|
|
@ -1,19 +1,15 @@
|
|||
import {
|
||||
DEFAULT_METABASE_CLIENT_CONFIG,
|
||||
DefaultLookerConnectionClientFactory,
|
||||
DefaultMetabaseConnectionClientFactory,
|
||||
type LookerClient,
|
||||
type MetabaseRuntimeClient,
|
||||
type NotionBotInfo,
|
||||
NotionClient,
|
||||
createLocalLookerCredentialResolver,
|
||||
metabaseRuntimeConfigFromLocalConnection,
|
||||
testRepoConnection,
|
||||
} from '@ktx/context/ingest';
|
||||
import { parseNotionConnectionConfig, resolveNotionConnectionAuthToken } from '@ktx/context/connections';
|
||||
import { resolveKtxConfigReference } from '@ktx/context/core';
|
||||
import { type KtxLocalProject, loadKtxProject } from '@ktx/context/project';
|
||||
import type { KtxScanConnector } from '@ktx/context/scan';
|
||||
import { DEFAULT_METABASE_CLIENT_CONFIG, DefaultMetabaseConnectionClientFactory } from './context/ingest/adapters/metabase/client.js';
|
||||
import { DefaultLookerConnectionClientFactory } from './context/ingest/adapters/looker/factory.js';
|
||||
import type { LookerClient } from './context/ingest/adapters/looker/client.js';
|
||||
import type { MetabaseRuntimeClient } from './context/ingest/adapters/metabase/client-port.js';
|
||||
import { type NotionBotInfo, NotionClient } from './context/ingest/adapters/notion/notion-client.js';
|
||||
import { createLocalLookerCredentialResolver } from './context/ingest/adapters/looker/local-looker.adapter.js';
|
||||
import { metabaseRuntimeConfigFromLocalConnection } from './context/ingest/adapters/metabase/local-metabase.adapter.js';
|
||||
import { testRepoConnection } from './context/ingest/repo-fetch.js';
|
||||
import { parseNotionConnectionConfig, resolveNotionConnectionAuthToken } from './context/connections/notion-config.js';
|
||||
import { resolveKtxConfigReference } from './context/core/config-reference.js';
|
||||
import { type KtxLocalProject, loadKtxProject } from './context/project/project.js';
|
||||
import type { KtxScanConnector } from './context/scan/types.js';
|
||||
import type { KtxCliIo } from './index.js';
|
||||
import { bold, dim, green, red, SYMBOLS } from './io/symbols.js';
|
||||
import { createKtxCliScanConnector } from './local-scan-connectors.js';
|
||||
|
|
|
|||
315
packages/cli/src/connectors/bigquery/connector.test.ts
Normal file
315
packages/cli/src/connectors/bigquery/connector.test.ts
Normal file
|
|
@ -0,0 +1,315 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { bigQueryConnectionConfigFromConfig, isKtxBigQueryConnectionConfig, type KtxBigQueryClient, KtxBigQueryScanConnector, type KtxBigQueryClientFactory, type KtxBigQueryDataset, type KtxBigQueryQueryJob, type KtxBigQueryTableRef } from '../../connectors/bigquery/connector.js';
|
||||
import { createBigQueryLiveDatabaseIntrospection } from '../../connectors/bigquery/live-database-introspection.js';
|
||||
|
||||
function fakeClientFactory(): KtxBigQueryClientFactory {
|
||||
const queryResults = vi.fn(async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ id: 1, status: 'paid' }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'id', type: 'INT64' }, { name: 'status', type: 'STRING' }] } },
|
||||
]);
|
||||
const createQueryJob = vi.fn(async (input: { query: string }): ReturnType<KtxBigQueryClient['createQueryJob']> => {
|
||||
if (input.query.includes('INFORMATION_SCHEMA.TABLE_CONSTRAINTS')) {
|
||||
return [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ table_name: 'orders', column_name: 'id' }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'table_name', type: 'STRING' }, { name: 'column_name', type: 'STRING' }] } },
|
||||
],
|
||||
},
|
||||
];
|
||||
}
|
||||
if (input.query.includes('APPROX_COUNT_DISTINCT')) {
|
||||
return [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ cardinality: 2 }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'cardinality', type: 'INT64' }] } },
|
||||
],
|
||||
},
|
||||
];
|
||||
}
|
||||
if (input.query.includes('SELECT DISTINCT CAST')) {
|
||||
return [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ val: 'open' }, { val: 'paid' }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'val', type: 'STRING' }] } },
|
||||
],
|
||||
},
|
||||
];
|
||||
}
|
||||
if (input.query.includes('SELECT `status`')) {
|
||||
return [
|
||||
{
|
||||
getQueryResults: async (): ReturnType<KtxBigQueryQueryJob['getQueryResults']> => [
|
||||
[{ status: 'paid' }],
|
||||
undefined,
|
||||
{ schema: { fields: [{ name: 'status', type: 'STRING' }] } },
|
||||
],
|
||||
},
|
||||
];
|
||||
}
|
||||
return [{ getQueryResults: queryResults }];
|
||||
});
|
||||
const getTable = vi.fn(async (): ReturnType<KtxBigQueryTableRef['get']> => [
|
||||
{
|
||||
metadata: {
|
||||
type: 'TABLE',
|
||||
numRows: '12',
|
||||
description: 'Orders table',
|
||||
schema: {
|
||||
fields: [
|
||||
{ name: 'id', type: 'INT64', mode: 'REQUIRED', description: 'Order id' },
|
||||
{ name: 'status', type: 'STRING', mode: 'NULLABLE' },
|
||||
{ name: 'payload', type: 'RECORD', mode: 'NULLABLE' },
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
]);
|
||||
const tableRef: KtxBigQueryTableRef = { id: 'orders', get: getTable };
|
||||
return {
|
||||
createClient: vi.fn(() => ({
|
||||
getDatasets: vi.fn(async (): ReturnType<KtxBigQueryClient['getDatasets']> => [[{ id: 'analytics' }, { id: 'staging' }]]),
|
||||
dataset: vi.fn(
|
||||
(datasetId: string): KtxBigQueryDataset => ({
|
||||
get: vi.fn(async () => [{ id: datasetId }]),
|
||||
getTables: vi.fn(async (): ReturnType<KtxBigQueryDataset['getTables']> => [[tableRef]]),
|
||||
}),
|
||||
),
|
||||
createQueryJob,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
const connection = {
|
||||
driver: 'bigquery',
|
||||
dataset_id: 'analytics',
|
||||
credentials_json: JSON.stringify({ project_id: 'project-1', client_email: 'reader@example.test' }),
|
||||
location: 'US',
|
||||
} as const;
|
||||
|
||||
describe('KtxBigQueryScanConnector', () => {
|
||||
it('resolves configuration safely', () => {
|
||||
expect(isKtxBigQueryConnectionConfig(connection)).toBe(true);
|
||||
expect(isKtxBigQueryConnectionConfig({ driver: 'mysql' })).toBe(false);
|
||||
expect(bigQueryConnectionConfigFromConfig({ connectionId: 'warehouse', connection })).toMatchObject({
|
||||
projectId: 'project-1',
|
||||
datasetIds: ['analytics'],
|
||||
location: 'US',
|
||||
});
|
||||
});
|
||||
|
||||
it('introspects datasets, table metadata, primary keys, and normalized types', async () => {
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection,
|
||||
clientFactory: fakeClientFactory(),
|
||||
now: () => new Date('2026-04-29T17:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'bigquery' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'bigquery',
|
||||
extractedAt: '2026-04-29T17:00:00.000Z',
|
||||
scope: { catalogs: ['project-1'], datasets: ['analytics'] },
|
||||
metadata: {
|
||||
project_id: 'project-1',
|
||||
datasets: ['analytics'],
|
||||
table_count: 1,
|
||||
total_columns: 3,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables[0]).toMatchObject({
|
||||
catalog: 'project-1',
|
||||
db: 'analytics',
|
||||
name: 'orders',
|
||||
kind: 'table',
|
||||
comment: 'Orders table',
|
||||
estimatedRows: 12,
|
||||
foreignKeys: [],
|
||||
});
|
||||
expect(snapshot.tables[0]?.columns).toEqual([
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'INT64',
|
||||
normalizedType: 'BIGINT',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'Order id',
|
||||
},
|
||||
{
|
||||
name: 'status',
|
||||
nativeType: 'STRING',
|
||||
normalizedType: 'VARCHAR',
|
||||
dimensionType: 'string',
|
||||
nullable: true,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
{
|
||||
name: 'payload',
|
||||
nativeType: 'RECORD',
|
||||
normalizedType: 'JSON',
|
||||
dimensionType: 'string',
|
||||
nullable: true,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('runs samples, read-only SQL, distinct values, dataset listing, row counts, and cleanup', async () => {
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection,
|
||||
clientFactory: fakeClientFactory(),
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
table: { catalog: 'project-1', db: 'analytics', name: 'orders' },
|
||||
columns: ['id', 'status'],
|
||||
limit: 1,
|
||||
},
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({
|
||||
headers: ['id', 'status'],
|
||||
headerTypes: ['INT64', 'STRING'],
|
||||
rows: [[1, 'paid']],
|
||||
totalRows: 1,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
table: { catalog: 'project-1', db: 'analytics', name: 'orders' },
|
||||
column: 'status',
|
||||
limit: 5,
|
||||
},
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ values: ['paid'], nullCount: null, distinctCount: null });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[1, 'paid']], totalRows: 1, rowCount: 1 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
|
||||
await expect(
|
||||
connector.getColumnDistinctValues(
|
||||
{ catalog: 'project-1', db: 'analytics', name: 'orders' },
|
||||
'status',
|
||||
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
|
||||
),
|
||||
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
|
||||
await expect(connector.getTableRowCount('orders')).resolves.toBe(12);
|
||||
await expect(connector.listDatasets()).resolves.toEqual(['analytics', 'staging']);
|
||||
await expect(
|
||||
connector.columnStats(
|
||||
{ connectionId: 'warehouse', table: { catalog: 'project-1', db: 'analytics', name: 'orders' }, column: 'status' },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toBeNull();
|
||||
await connector.cleanup();
|
||||
});
|
||||
|
||||
it('applies maximumBytesBilled to read-only queries when configured', async () => {
|
||||
const clientFactory = fakeClientFactory();
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection,
|
||||
clientFactory,
|
||||
maxBytesBilled: 123456789,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ rows: [[1, 'paid']], rowCount: 1 });
|
||||
|
||||
const client = vi.mocked(clientFactory.createClient).mock.results[0]?.value as KtxBigQueryClient;
|
||||
expect(client.createQueryJob).toHaveBeenLastCalledWith(
|
||||
expect.objectContaining({
|
||||
maximumBytesBilled: '123456789',
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('applies canonical BigQuery YAML scan limits to query jobs', async () => {
|
||||
const clientFactory = fakeClientFactory();
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: { ...connection, max_bytes_billed: '987654321', job_timeout_ms: 30_000 },
|
||||
clientFactory,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from `project-1`.`analytics`.`orders`', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ rows: [[1, 'paid']], rowCount: 1 });
|
||||
|
||||
const client = vi.mocked(clientFactory.createClient).mock.results[0]?.value as KtxBigQueryClient;
|
||||
expect(client.createQueryJob).toHaveBeenLastCalledWith(
|
||||
expect.objectContaining({
|
||||
maximumBytesBilled: '987654321',
|
||||
jobTimeoutMs: 30_000,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('adapts native snapshots to live-database introspection snapshots', async () => {
|
||||
const introspection = createBigQueryLiveDatabaseIntrospection({
|
||||
connections: { warehouse: connection },
|
||||
clientFactory: fakeClientFactory(),
|
||||
now: () => new Date('2026-04-29T17:00:00.000Z'),
|
||||
});
|
||||
|
||||
await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
metadata: { project_id: 'project-1' },
|
||||
tables: expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
catalog: 'project-1',
|
||||
db: 'analytics',
|
||||
name: 'orders',
|
||||
columns: expect.arrayContaining([
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'INT64',
|
||||
normalizedType: 'BIGINT',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'Order id',
|
||||
},
|
||||
]),
|
||||
}),
|
||||
]),
|
||||
});
|
||||
});
|
||||
});
|
||||
522
packages/cli/src/connectors/bigquery/connector.ts
Normal file
522
packages/cli/src/connectors/bigquery/connector.ts
Normal file
|
|
@ -0,0 +1,522 @@
|
|||
import { BigQuery, type TableField } from '@google-cloud/bigquery';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
|
||||
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { KtxBigQueryDialect } from './dialect.js';
|
||||
|
||||
export interface KtxBigQueryConnectionConfig {
|
||||
driver?: string;
|
||||
dataset_id?: string;
|
||||
dataset_ids?: string[];
|
||||
credentials_json?: string;
|
||||
location?: string;
|
||||
max_bytes_billed?: number | string;
|
||||
job_timeout_ms?: number;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxBigQueryResolvedConnectionConfig {
|
||||
projectId: string;
|
||||
credentials: Record<string, unknown>;
|
||||
datasetIds: string[];
|
||||
location?: string;
|
||||
}
|
||||
|
||||
export interface KtxBigQueryReadOnlyQueryInput extends KtxReadOnlyQueryInput {
|
||||
params?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface KtxBigQueryColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KtxBigQueryColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export interface KtxBigQueryQueryJob {
|
||||
getQueryResults(): Promise<
|
||||
[Array<Record<string, unknown>>, unknown, { schema?: { fields?: TableField[] } }?, ...unknown[]]
|
||||
>;
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export interface KtxBigQueryTableRef {
|
||||
id?: string;
|
||||
metadata?: { type?: string };
|
||||
get(): Promise<
|
||||
[
|
||||
{
|
||||
metadata: {
|
||||
type?: string;
|
||||
numRows?: string | number;
|
||||
description?: string;
|
||||
schema?: { fields?: TableField[] };
|
||||
};
|
||||
},
|
||||
...unknown[],
|
||||
]
|
||||
>;
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export interface KtxBigQueryDataset {
|
||||
get(): Promise<unknown>;
|
||||
getTables(): Promise<[KtxBigQueryTableRef[], ...unknown[]]>;
|
||||
}
|
||||
|
||||
export interface KtxBigQueryClient {
|
||||
getDatasets(input?: { maxResults?: number }): Promise<[Array<{ id?: string }>, ...unknown[]]>;
|
||||
dataset(datasetId: string): KtxBigQueryDataset;
|
||||
createQueryJob(input: {
|
||||
query: string;
|
||||
location?: string;
|
||||
params?: Record<string, unknown>;
|
||||
maximumBytesBilled?: string;
|
||||
jobTimeoutMs?: number;
|
||||
}): Promise<[KtxBigQueryQueryJob, ...unknown[]]>;
|
||||
}
|
||||
|
||||
export interface KtxBigQueryClientFactory {
|
||||
createClient(input: { projectId: string; credentials: Record<string, unknown> }): KtxBigQueryClient;
|
||||
}
|
||||
|
||||
export interface KtxBigQueryScanConnectorOptions {
|
||||
connectionId: string;
|
||||
connection: KtxBigQueryConnectionConfig | undefined;
|
||||
clientFactory?: KtxBigQueryClientFactory;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
now?: () => Date;
|
||||
maxBytesBilled?: number | string;
|
||||
queryTimeoutMs?: number;
|
||||
}
|
||||
|
||||
class DefaultBigQueryClientFactory implements KtxBigQueryClientFactory {
|
||||
createClient(input: { projectId: string; credentials: Record<string, unknown> }): KtxBigQueryClient {
|
||||
const client = new BigQuery(input);
|
||||
return {
|
||||
getDatasets: (options) => client.getDatasets(options) as Promise<[Array<{ id?: string }>, ...unknown[]]>,
|
||||
dataset: (datasetId) => {
|
||||
const dataset = client.dataset(datasetId);
|
||||
return {
|
||||
get: () => dataset.get() as Promise<unknown>,
|
||||
getTables: () => dataset.getTables() as Promise<[KtxBigQueryTableRef[], ...unknown[]]>,
|
||||
};
|
||||
},
|
||||
createQueryJob: (options) => client.createQueryJob(options) as Promise<[KtxBigQueryQueryJob, ...unknown[]]>,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxBigQueryConnectionConfig | undefined,
|
||||
key: keyof KtxBigQueryConnectionConfig,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function datasetIds(connection: KtxBigQueryConnectionConfig, env: NodeJS.ProcessEnv): string[] {
|
||||
if (Array.isArray(connection.dataset_ids) && connection.dataset_ids.length > 0) {
|
||||
return connection.dataset_ids
|
||||
.filter((dataset) => dataset.trim().length > 0)
|
||||
.map((dataset) => resolveStringReference(dataset, env));
|
||||
}
|
||||
const datasetId = stringConfigValue(connection, 'dataset_id', env);
|
||||
return datasetId ? [datasetId] : [];
|
||||
}
|
||||
|
||||
function bigQueryMaxBytesBilledFromConnection(
|
||||
connection: KtxBigQueryConnectionConfig | undefined,
|
||||
): number | string | undefined {
|
||||
const value = connection?.max_bytes_billed;
|
||||
if (typeof value === 'number') {
|
||||
return Number.isFinite(value) && value > 0 ? value : undefined;
|
||||
}
|
||||
if (typeof value === 'string') {
|
||||
const trimmed = value.trim();
|
||||
return trimmed.length > 0 ? trimmed : undefined;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function bigQueryJobTimeoutMsFromConnection(connection: KtxBigQueryConnectionConfig | undefined): number | undefined {
|
||||
const value = connection?.job_timeout_ms;
|
||||
if (typeof value !== 'number') {
|
||||
return undefined;
|
||||
}
|
||||
return Number.isInteger(value) && value > 0 ? value : undefined;
|
||||
}
|
||||
|
||||
function tableKind(metadataType: string | undefined): KtxSchemaTable['kind'] {
|
||||
const type = String(metadataType ?? '').toUpperCase();
|
||||
if (type === 'VIEW' || type === 'MATERIALIZED_VIEW') {
|
||||
return 'view';
|
||||
}
|
||||
if (type === 'EXTERNAL' || type === 'EXTERNAL_TABLE') {
|
||||
return 'external';
|
||||
}
|
||||
return 'table';
|
||||
}
|
||||
|
||||
function firstNumber(value: unknown): number | null {
|
||||
const numberValue = Number(value);
|
||||
return Number.isFinite(numberValue) ? numberValue : null;
|
||||
}
|
||||
|
||||
function normalizeValue(value: unknown): unknown {
|
||||
if (value === null || value === undefined) {
|
||||
return null;
|
||||
}
|
||||
if (Array.isArray(value)) {
|
||||
return value.map((item) => String(item)).join(', ');
|
||||
}
|
||||
if (typeof value === 'object') {
|
||||
if ('toNumber' in value && typeof value.toNumber === 'function' && 'toFixed' in value && typeof value.toFixed === 'function') {
|
||||
return value.toNumber();
|
||||
}
|
||||
if ('value' in value && Object.keys(value).length === 1 && typeof value.value !== 'object') {
|
||||
return value.value;
|
||||
}
|
||||
return JSON.stringify(value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export function isKtxBigQueryConnectionConfig(
|
||||
connection: KtxBigQueryConnectionConfig | undefined,
|
||||
): connection is KtxBigQueryConnectionConfig {
|
||||
return String(connection?.driver ?? '').toLowerCase() === 'bigquery';
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function bigQueryConnectionConfigFromConfig(input: {
|
||||
connectionId: string;
|
||||
connection: KtxBigQueryConnectionConfig | undefined;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): KtxBigQueryResolvedConnectionConfig {
|
||||
const inputDriver = input.connection?.driver ?? 'unknown';
|
||||
if (!isKtxBigQueryConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native BigQuery connector cannot run driver "${inputDriver}"`);
|
||||
}
|
||||
|
||||
const env = input.env ?? process.env;
|
||||
const credentialsJson = stringConfigValue(input.connection, 'credentials_json', env);
|
||||
if (!credentialsJson) {
|
||||
throw new Error(`Native BigQuery connector requires connections.${input.connectionId}.credentials_json`);
|
||||
}
|
||||
const credentials = JSON.parse(credentialsJson) as Record<string, unknown>;
|
||||
const projectId = typeof credentials.project_id === 'string' ? credentials.project_id : undefined;
|
||||
if (!projectId) {
|
||||
throw new Error(`Native BigQuery connector requires credentials_json.project_id for connections.${input.connectionId}`);
|
||||
}
|
||||
const resolvedDatasetIds = datasetIds(input.connection, env);
|
||||
if (resolvedDatasetIds.length === 0) {
|
||||
throw new Error(`Native BigQuery connector requires connections.${input.connectionId}.dataset_id or dataset_ids`);
|
||||
}
|
||||
const location = stringConfigValue(input.connection, 'location', env);
|
||||
return { projectId, credentials, datasetIds: resolvedDatasetIds, ...(location ? { location } : {}) };
|
||||
}
|
||||
|
||||
export class KtxBigQueryScanConnector implements KtxScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'bigquery' as const;
|
||||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: false,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: true,
|
||||
formalForeignKeys: false,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly connectionId: string;
|
||||
private readonly resolved: KtxBigQueryResolvedConnectionConfig;
|
||||
private readonly clientFactory: KtxBigQueryClientFactory;
|
||||
private readonly now: () => Date;
|
||||
private readonly maxBytesBilled?: number | string;
|
||||
private readonly queryTimeoutMs?: number;
|
||||
private readonly dialect = new KtxBigQueryDialect();
|
||||
private client: KtxBigQueryClient | null = null;
|
||||
|
||||
constructor(options: KtxBigQueryScanConnectorOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.resolved = bigQueryConnectionConfigFromConfig({
|
||||
connectionId: options.connectionId,
|
||||
connection: options.connection,
|
||||
env: options.env,
|
||||
});
|
||||
this.clientFactory = options.clientFactory ?? new DefaultBigQueryClientFactory();
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.maxBytesBilled = options.maxBytesBilled ?? bigQueryMaxBytesBilledFromConnection(options.connection);
|
||||
this.queryTimeoutMs = options.queryTimeoutMs ?? bigQueryJobTimeoutMsFromConnection(options.connection);
|
||||
this.id = `bigquery:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
try {
|
||||
const client = this.getClient();
|
||||
await client.getDatasets({ maxResults: 1 });
|
||||
for (const datasetId of this.resolved.datasetIds) {
|
||||
await client.dataset(datasetId).get();
|
||||
}
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const tables: KtxSchemaTable[] = [];
|
||||
for (const datasetId of this.resolved.datasetIds) {
|
||||
tables.push(...(await this.introspectDataset(datasetId)));
|
||||
}
|
||||
return {
|
||||
connectionId: this.connectionId,
|
||||
driver: 'bigquery',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: { catalogs: [this.resolved.projectId], datasets: this.resolved.datasetIds },
|
||||
metadata: {
|
||||
project_id: this.resolved.projectId,
|
||||
datasets: this.resolved.datasetIds,
|
||||
table_count: tables.length,
|
||||
total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxTableSampleResult & { headerTypes?: string[] }> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
|
||||
return { headers: result.headers, headerTypes: result.headerTypes, rows: result.rows, totalRows: result.totalRows };
|
||||
}
|
||||
|
||||
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
return { values: result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]), nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
return null;
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxBigQueryReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
|
||||
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
|
||||
const result = await this.query(prepared.sql, prepared.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KtxTableRef,
|
||||
columnName: string,
|
||||
options: KtxBigQueryColumnDistinctValuesOptions,
|
||||
): Promise<KtxBigQueryColumnDistinctValuesResult | null> {
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinality = await this.singleNumber(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, options.sampleSize ?? 10000),
|
||||
'cardinality',
|
||||
);
|
||||
if (cardinality === null) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valueRows = await this.queryRaw<{ val: unknown }>(
|
||||
this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit),
|
||||
);
|
||||
return { values: valueRows.filter((row) => row.val !== null).map((row) => String(row.val)), cardinality };
|
||||
}
|
||||
|
||||
async getTableRowCount(tableName: string, datasetId = this.resolved.datasetIds[0]): Promise<number> {
|
||||
if (!datasetId) {
|
||||
return 0;
|
||||
}
|
||||
const tables = await this.introspectDataset(datasetId);
|
||||
return tables.find((table) => table.name === tableName)?.estimatedRows ?? 0;
|
||||
}
|
||||
|
||||
qTableName(table: Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
async listDatasets(): Promise<string[]> {
|
||||
const [datasets] = await this.getClient().getDatasets();
|
||||
return datasets.map((dataset) => dataset.id).filter((id): id is string => Boolean(id));
|
||||
}
|
||||
|
||||
async listTables(datasetIds?: string[]): Promise<KtxTableListEntry[]> {
|
||||
const filterDatasets = datasetIds ?? (await this.listDatasets());
|
||||
const entries: KtxTableListEntry[] = [];
|
||||
for (const datasetId of filterDatasets) {
|
||||
const dataset = this.getClient().dataset(datasetId);
|
||||
const [tables] = await dataset.getTables();
|
||||
for (const table of tables) {
|
||||
if (!table.id) continue;
|
||||
entries.push({
|
||||
schema: datasetId,
|
||||
name: table.id,
|
||||
kind: table.metadata?.type === 'VIEW' ? 'view' : 'table',
|
||||
});
|
||||
}
|
||||
}
|
||||
entries.sort((a, b) => a.schema.localeCompare(b.schema) || a.name.localeCompare(b.name));
|
||||
return entries;
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
this.client = null;
|
||||
}
|
||||
|
||||
private getClient(): KtxBigQueryClient {
|
||||
if (!this.client) {
|
||||
this.client = this.clientFactory.createClient({
|
||||
projectId: this.resolved.projectId,
|
||||
credentials: this.resolved.credentials,
|
||||
});
|
||||
}
|
||||
return this.client;
|
||||
}
|
||||
|
||||
private async query(sql: string, params?: Record<string, unknown>): Promise<KtxQueryResult> {
|
||||
const [job] = await this.getClient().createQueryJob({
|
||||
query: sql,
|
||||
...(this.resolved.location ? { location: this.resolved.location } : {}),
|
||||
...(params && Object.keys(params).length > 0 ? { params } : {}),
|
||||
...(this.maxBytesBilled ? { maximumBytesBilled: String(this.maxBytesBilled) } : {}),
|
||||
...(this.queryTimeoutMs ? { jobTimeoutMs: this.queryTimeoutMs } : {}),
|
||||
});
|
||||
const [rows, , response] = await job.getQueryResults();
|
||||
let headers = response?.schema?.fields?.map((field) => field.name || '') ?? [];
|
||||
const headerTypes = response?.schema?.fields?.map((field) => String(field.type || 'STRING')) ?? [];
|
||||
if (headers.length === 0 && rows.length > 0) {
|
||||
headers = Object.keys(rows[0]!);
|
||||
}
|
||||
return {
|
||||
headers,
|
||||
headerTypes: headerTypes.length > 0 ? headerTypes : undefined,
|
||||
rows: rows.map((row) => headers.map((header) => normalizeValue(row[header]))),
|
||||
totalRows: rows.length,
|
||||
rowCount: rows.length,
|
||||
};
|
||||
}
|
||||
|
||||
private async queryRaw<T extends Record<string, unknown>>(sql: string, params?: Record<string, unknown>): Promise<T[]> {
|
||||
const result = await this.query(sql, params);
|
||||
return result.rows.map((row) => Object.fromEntries(result.headers.map((header, index) => [header, row[index]])) as T);
|
||||
}
|
||||
|
||||
private async singleNumber(sql: string, header: string): Promise<number | null> {
|
||||
const rows = await this.queryRaw<Record<string, unknown>>(sql);
|
||||
return firstNumber(rows[0]?.[header]);
|
||||
}
|
||||
|
||||
private async introspectDataset(datasetId: string): Promise<KtxSchemaTable[]> {
|
||||
const dataset = this.getClient().dataset(datasetId);
|
||||
const [tableRefs] = await dataset.getTables();
|
||||
const primaryKeys = await this.primaryKeys(datasetId);
|
||||
const tables: KtxSchemaTable[] = [];
|
||||
for (const tableRef of tableRefs) {
|
||||
const tableName = tableRef.id || '';
|
||||
const [table] = await tableRef.get();
|
||||
const fields = table.metadata.schema?.fields ?? [];
|
||||
tables.push({
|
||||
catalog: this.resolved.projectId,
|
||||
db: datasetId,
|
||||
name: tableName,
|
||||
kind: tableKind(table.metadata.type),
|
||||
comment: table.metadata.description || null,
|
||||
estimatedRows: firstNumber(table.metadata.numRows) ?? 0,
|
||||
columns: fields.map((field) => this.toSchemaColumn(tableName, field, primaryKeys)),
|
||||
foreignKeys: [],
|
||||
});
|
||||
}
|
||||
return tables;
|
||||
}
|
||||
|
||||
private async primaryKeys(datasetId: string): Promise<Map<string, Set<string>>> {
|
||||
const rows = await this.queryRaw<{ table_name: string; column_name: string }>(
|
||||
'SELECT tc.table_name, kcu.column_name ' +
|
||||
'FROM `' +
|
||||
this.resolved.projectId +
|
||||
'.' +
|
||||
datasetId +
|
||||
'.INFORMATION_SCHEMA.TABLE_CONSTRAINTS` tc ' +
|
||||
'JOIN `' +
|
||||
this.resolved.projectId +
|
||||
'.' +
|
||||
datasetId +
|
||||
'.INFORMATION_SCHEMA.KEY_COLUMN_USAGE` kcu ' +
|
||||
'ON tc.constraint_name = kcu.constraint_name ' +
|
||||
'AND tc.table_schema = kcu.table_schema ' +
|
||||
'AND tc.table_name = kcu.table_name ' +
|
||||
"WHERE tc.constraint_type = 'PRIMARY KEY' " +
|
||||
"AND tc.table_schema = '" +
|
||||
datasetId +
|
||||
"' " +
|
||||
"AND NOT REGEXP_CONTAINS(kcu.column_name, r'^(stacksync_record_id|sync_primary_key)_') " +
|
||||
'ORDER BY tc.table_name, kcu.ordinal_position',
|
||||
);
|
||||
const grouped = new Map<string, Set<string>>();
|
||||
for (const row of rows) {
|
||||
const columns = grouped.get(row.table_name) ?? new Set<string>();
|
||||
columns.add(row.column_name);
|
||||
grouped.set(row.table_name, columns);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
private toSchemaColumn(tableName: string, field: TableField, primaryKeys: Map<string, Set<string>>): KtxSchemaColumn {
|
||||
const nativeType = String(field.type || 'STRING').toUpperCase();
|
||||
return {
|
||||
name: field.name || '',
|
||||
nativeType,
|
||||
normalizedType: this.dialect.mapDataType(nativeType),
|
||||
dimensionType: this.dialect.mapToDimensionType(nativeType),
|
||||
nullable: field.mode !== 'REQUIRED',
|
||||
primaryKey: primaryKeys.get(tableName)?.has(field.name || '') ?? false,
|
||||
comment: field.description || null,
|
||||
};
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`BigQuery connector ${this.connectionId} cannot scan connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
52
packages/cli/src/connectors/bigquery/dialect.test.ts
Normal file
52
packages/cli/src/connectors/bigquery/dialect.test.ts
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KtxBigQueryDialect } from './dialect.js';
|
||||
|
||||
describe('KtxBigQueryDialect', () => {
|
||||
const dialect = new KtxBigQueryDialect();
|
||||
|
||||
it('quotes identifiers and formats project.dataset.table names', () => {
|
||||
expect(dialect.quoteIdentifier('order`items')).toBe('`order\\`items`');
|
||||
expect(dialect.formatTableName({ catalog: 'project-1', db: 'analytics', name: 'orders' })).toBe(
|
||||
'`project-1`.`analytics`.`orders`',
|
||||
);
|
||||
expect(dialect.formatTableName({ db: 'analytics', name: 'orders' })).toBe('`analytics`.`orders`');
|
||||
expect(dialect.formatTableName({ name: 'orders' })).toBe('`orders`');
|
||||
});
|
||||
|
||||
it('maps native BigQuery types to normalized types and scan dimensions', () => {
|
||||
expect(dialect.mapDataType('INT64')).toBe('BIGINT');
|
||||
expect(dialect.mapDataType('STRUCT')).toBe('JSON');
|
||||
expect(dialect.mapDataType('GEOGRAPHY')).toBe('GEOGRAPHY');
|
||||
expect(dialect.mapToDimensionType('TIMESTAMP')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('NUMERIC')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('BOOL')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('JSON')).toBe('string');
|
||||
});
|
||||
|
||||
it('generates sampling, cardinality, and distinct-value SQL', () => {
|
||||
expect(dialect.generateSampleQuery('`p`.`d`.`orders`', 5, ['id', 'status'])).toBe(
|
||||
'SELECT `id`, `status` FROM `p`.`d`.`orders` ORDER BY RAND() LIMIT 5',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('`p`.`d`.`orders`', 'status', 10)).toBe(
|
||||
"SELECT `status` FROM `p`.`d`.`orders` WHERE `status` IS NOT NULL AND TRIM(CAST(`status` AS STRING)) != '' ORDER BY RAND() LIMIT 10",
|
||||
);
|
||||
expect(dialect.generateCardinalitySampleQuery('`p`.`d`.`orders`', '`status`', 100)).toContain(
|
||||
'SELECT APPROX_COUNT_DISTINCT(val) AS cardinality',
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('`p`.`d`.`orders`', '`status`', 20)).toContain(
|
||||
'SELECT DISTINCT CAST(`status` AS STRING) AS val',
|
||||
);
|
||||
});
|
||||
|
||||
it('rewrites colon parameters to BigQuery named parameters', () => {
|
||||
expect(dialect.prepareQuery('SELECT * FROM orders WHERE id = :id AND id_2 = :id_2', { id: 1, id_2: 2 })).toEqual({
|
||||
sql: 'SELECT * FROM orders WHERE id = @id AND id_2 = @id_2',
|
||||
params: { id: 1, id_2: 2 },
|
||||
});
|
||||
expect(dialect.prepareQuery('SELECT * FROM orders')).toEqual({ sql: 'SELECT * FROM orders', params: undefined });
|
||||
});
|
||||
|
||||
it('keeps unsupported statistics explicit', () => {
|
||||
expect(dialect.generateColumnStatisticsQuery('analytics', 'orders')).toBeNull();
|
||||
});
|
||||
});
|
||||
207
packages/cli/src/connectors/bigquery/dialect.ts
Normal file
207
packages/cli/src/connectors/bigquery/dialect.ts
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
|
||||
|
||||
type BigQueryTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxBigQueryDialect {
|
||||
readonly type = 'bigquery';
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
TIMESTAMP: 'time',
|
||||
DATETIME: 'time',
|
||||
DATE: 'time',
|
||||
TIME: 'time',
|
||||
INT64: 'number',
|
||||
INTEGER: 'number',
|
||||
FLOAT64: 'number',
|
||||
FLOAT: 'number',
|
||||
NUMERIC: 'number',
|
||||
BIGNUMERIC: 'number',
|
||||
STRING: 'string',
|
||||
BYTES: 'string',
|
||||
BOOL: 'boolean',
|
||||
BOOLEAN: 'boolean',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `\`${identifier.replace(/`/g, '\\`')}\``;
|
||||
}
|
||||
|
||||
formatTableName(table: BigQueryTableNameRef): string {
|
||||
if (table.catalog && table.db) {
|
||||
return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
|
||||
}
|
||||
if (table.db) {
|
||||
return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
|
||||
}
|
||||
return this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
const fieldType = nativeType.toUpperCase().trim();
|
||||
if (fieldType === 'RECORD' || fieldType === 'STRUCT') {
|
||||
return 'JSON';
|
||||
}
|
||||
const typeMapping: Record<string, string> = {
|
||||
STRING: 'VARCHAR',
|
||||
BYTES: 'VARBINARY',
|
||||
INTEGER: 'BIGINT',
|
||||
INT64: 'BIGINT',
|
||||
FLOAT: 'DOUBLE',
|
||||
FLOAT64: 'DOUBLE',
|
||||
NUMERIC: 'DECIMAL',
|
||||
BIGNUMERIC: 'DECIMAL',
|
||||
BOOLEAN: 'BOOLEAN',
|
||||
BOOL: 'BOOLEAN',
|
||||
TIMESTAMP: 'TIMESTAMP',
|
||||
DATE: 'DATE',
|
||||
TIME: 'TIME',
|
||||
DATETIME: 'DATETIME',
|
||||
GEOGRAPHY: 'GEOGRAPHY',
|
||||
JSON: 'JSON',
|
||||
};
|
||||
return typeMapping[fieldType] || fieldType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
const normalizedType = nativeType.toUpperCase().trim();
|
||||
if (this.typeMappings[normalizedType]) {
|
||||
return this.typeMappings[normalizedType];
|
||||
}
|
||||
if (normalizedType.includes('TIME') || normalizedType.includes('DATE')) {
|
||||
return 'time';
|
||||
}
|
||||
if (normalizedType.includes('INT') || normalizedType.includes('NUM') || normalizedType.includes('FLOAT')) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalizedType.includes('BOOL')) {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT ${columnList} FROM ${tableName} ORDER BY RAND() LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quotedColumn = this.quoteIdentifier(columnName);
|
||||
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS STRING)) != '' ORDER BY RAND() LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
|
||||
if (!params) {
|
||||
return { sql, params: undefined };
|
||||
}
|
||||
let processedSql = sql;
|
||||
const processedParams: Record<string, unknown> = {};
|
||||
for (const [key, value] of Object.entries(params)) {
|
||||
processedSql = processedSql.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
|
||||
processedParams[key] = value;
|
||||
}
|
||||
return { sql: processedSql, params: Object.keys(processedParams).length > 0 ? processedParams : undefined };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `RAND() < ${samplePct}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `TABLESAMPLE SYSTEM (${samplePct * 100} PERCENT)`;
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `COUNTIF(${column} IS NULL)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `APPROX_COUNT_DISTINCT(${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT APPROX_COUNT_DISTINCT(val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT DISTINCT CAST(${columnName} AS STRING) AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY val
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY RAND()
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT APPROX_COUNT_DISTINCT(val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
timezone?: string,
|
||||
): string {
|
||||
const bigQueryGranularity = granularity.toUpperCase();
|
||||
if (timezone) {
|
||||
return `DATE_TRUNC(DATETIME(${column}, '${timezone}'), ${bigQueryGranularity})`;
|
||||
}
|
||||
return `DATE_TRUNC(${column}, ${bigQueryGranularity})`;
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
|
||||
const col = timezone ? `DATETIME(${column}, '${timezone}')` : column;
|
||||
const [rawAmount, rawUnit] = interval.split(' ');
|
||||
let diffUnit = rawUnit!.toUpperCase();
|
||||
let amount = Number(rawAmount);
|
||||
let addUnit = diffUnit;
|
||||
if (diffUnit === 'WEEK') {
|
||||
diffUnit = 'DAY';
|
||||
amount = amount * 7;
|
||||
addUnit = 'DAY';
|
||||
}
|
||||
const originExpr = origin ? `TIMESTAMP '${origin}'` : `TIMESTAMP '1970-01-01'`;
|
||||
return `TIMESTAMP_ADD(${originExpr}, INTERVAL CAST(FLOOR(TIMESTAMP_DIFF(${col}, ${originExpr}, ${diffUnit}) / ${amount}) * ${amount} AS INT64) ${addUnit})`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
const [amount, unit] = interval.split(' ');
|
||||
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/adapters/live-database/types.js';
|
||||
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
|
||||
import {
|
||||
KtxBigQueryScanConnector,
|
||||
type KtxBigQueryClientFactory,
|
||||
type KtxBigQueryConnectionConfig,
|
||||
} from './connector.js';
|
||||
|
||||
interface CreateBigQueryLiveDatabaseIntrospectionOptions {
|
||||
connections: Record<string, KtxProjectConnectionConfig>;
|
||||
clientFactory?: KtxBigQueryClientFactory;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createBigQueryLiveDatabaseIntrospection(
|
||||
options: CreateBigQueryLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KtxBigQueryConnectionConfig | undefined;
|
||||
const connector = new KtxBigQueryScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
clientFactory: options.clientFactory,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect({ connectionId, driver: 'bigquery' }, { runId: `bigquery-${connectionId}` });
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
281
packages/cli/src/connectors/clickhouse/connector.test.ts
Normal file
281
packages/cli/src/connectors/clickhouse/connector.test.ts
Normal file
|
|
@ -0,0 +1,281 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { clickHouseClientConfigFromConfig, isKtxClickHouseConnectionConfig, KtxClickHouseScanConnector, type KtxClickHouseClientFactory } from '../../connectors/clickhouse/connector.js';
|
||||
import { createClickHouseLiveDatabaseIntrospection } from '../../connectors/clickhouse/live-database-introspection.js';
|
||||
|
||||
function result<T>(payload: T) {
|
||||
return {
|
||||
async json(): Promise<T> {
|
||||
return payload;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function fakeClientFactory(): KtxClickHouseClientFactory {
|
||||
const query = vi.fn(async (input: { query: string; format: string; query_params?: Record<string, unknown> }) => {
|
||||
if (input.query.includes('FROM system.tables')) {
|
||||
return result([
|
||||
{ name: 'events', engine: 'MergeTree', comment: 'Event stream' },
|
||||
{ name: 'event_summary', engine: 'View', comment: '' },
|
||||
]);
|
||||
}
|
||||
if (input.query.includes('FROM system.columns')) {
|
||||
return result([
|
||||
{ table: 'events', name: 'id', type: 'UInt64', comment: 'PK', is_in_primary_key: 1 },
|
||||
{ table: 'events', name: 'event_name', type: 'LowCardinality(String)', comment: '', is_in_primary_key: 0 },
|
||||
{ table: 'event_summary', name: 'event_name', type: 'String', comment: '', is_in_primary_key: 0 },
|
||||
]);
|
||||
}
|
||||
if (input.query.includes('FROM system.parts') && input.query.includes('GROUP BY table')) {
|
||||
return result([{ table: 'events', row_count: '2' }]);
|
||||
}
|
||||
if (input.query.includes('SELECT `id`, `event_name` FROM `analytics`.`events` LIMIT 1')) {
|
||||
return result({
|
||||
meta: [
|
||||
{ name: 'id', type: 'UInt64' },
|
||||
{ name: 'event_name', type: 'String' },
|
||||
],
|
||||
data: [[10, 'signup']],
|
||||
rows: 1,
|
||||
});
|
||||
}
|
||||
if (input.query.includes('SELECT `event_name` FROM `analytics`.`events`')) {
|
||||
return result({
|
||||
meta: [{ name: 'event_name', type: 'String' }],
|
||||
data: [['signup'], ['purchase']],
|
||||
rows: 2,
|
||||
});
|
||||
}
|
||||
if (input.query.includes('COUNT(DISTINCT val)')) {
|
||||
return result({
|
||||
meta: [{ name: 'cardinality', type: 'UInt64' }],
|
||||
data: [[2]],
|
||||
rows: 1,
|
||||
});
|
||||
}
|
||||
if (input.query.includes('SELECT DISTINCT toString(`event_name`) AS val')) {
|
||||
return result({
|
||||
meta: [{ name: 'val', type: 'String' }],
|
||||
data: [['purchase'], ['signup']],
|
||||
rows: 2,
|
||||
});
|
||||
}
|
||||
if (input.query.includes('sum(rows) AS count')) {
|
||||
return result({
|
||||
meta: [{ name: 'count', type: 'UInt64' }],
|
||||
data: [[2]],
|
||||
rows: 1,
|
||||
});
|
||||
}
|
||||
if (input.query.includes('FROM system.databases')) {
|
||||
return result([{ name: 'analytics' }, { name: 'warehouse' }]);
|
||||
}
|
||||
if (input.query.trim() === 'SELECT 1') {
|
||||
return result({ meta: [{ name: '1', type: 'UInt8' }], data: [[1]], rows: 1 });
|
||||
}
|
||||
if (input.query.includes('select * from (select id, event_name from analytics.events) as ktx_query_result limit 1')) {
|
||||
return result({
|
||||
meta: [
|
||||
{ name: 'id', type: 'UInt64' },
|
||||
{ name: 'event_name', type: 'String' },
|
||||
],
|
||||
data: [[10, 'signup']],
|
||||
rows: 1,
|
||||
});
|
||||
}
|
||||
throw new Error(`Unexpected SQL: ${input.query}`);
|
||||
});
|
||||
const close = vi.fn(async () => undefined);
|
||||
return {
|
||||
createClient: vi.fn(() => ({ query, close })),
|
||||
};
|
||||
}
|
||||
|
||||
describe('KtxClickHouseScanConnector', () => {
|
||||
it('resolves ClickHouse connection configuration safely', () => {
|
||||
expect(isKtxClickHouseConnectionConfig({ driver: 'clickhouse', host: 'localhost', database: 'analytics' })).toBe(
|
||||
true,
|
||||
);
|
||||
expect(isKtxClickHouseConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(false);
|
||||
expect(
|
||||
clickHouseClientConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'clickhouse',
|
||||
host: 'ch.example.test',
|
||||
port: 9440,
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-pass', // pragma: allowlist secret
|
||||
ssl: true,
|
||||
},
|
||||
}),
|
||||
).toMatchObject({
|
||||
host: 'ch.example.test',
|
||||
port: 9440,
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-pass', // pragma: allowlist secret
|
||||
ssl: true,
|
||||
});
|
||||
});
|
||||
|
||||
it('introspects schema, primary keys, comments, row counts, and views', async () => {
|
||||
const connector = new KtxClickHouseScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'clickhouse',
|
||||
host: 'ch.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-pass', // pragma: allowlist secret
|
||||
},
|
||||
clientFactory: fakeClientFactory(),
|
||||
now: () => new Date('2026-04-29T14:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'clickhouse' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'clickhouse',
|
||||
extractedAt: '2026-04-29T14:00:00.000Z',
|
||||
scope: { schemas: ['analytics'] },
|
||||
metadata: {
|
||||
database: 'analytics',
|
||||
host: 'ch.example.test',
|
||||
table_count: 2,
|
||||
total_columns: 3,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([
|
||||
['events', 'table', 2, 'Event stream'],
|
||||
['event_summary', 'view', null, null],
|
||||
]);
|
||||
expect(snapshot.tables.find((table) => table.name === 'events')?.columns[0]).toMatchObject({
|
||||
name: 'id',
|
||||
nativeType: 'UInt64',
|
||||
normalizedType: 'UInt64',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'PK',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'events')?.foreignKeys).toEqual([]);
|
||||
});
|
||||
|
||||
it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => {
|
||||
const clientFactory = fakeClientFactory();
|
||||
const connector = new KtxClickHouseScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'clickhouse',
|
||||
host: 'ch.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-pass', // pragma: allowlist secret
|
||||
},
|
||||
clientFactory,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
table: { catalog: null, db: 'analytics', name: 'events' },
|
||||
columns: ['id', 'event_name'],
|
||||
limit: 1,
|
||||
},
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({ headers: ['id', 'event_name'], rows: [[10, 'signup']], totalRows: 1 });
|
||||
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'events' }, column: 'event_name', limit: 5 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ values: ['signup', 'purchase'], nullCount: null, distinctCount: null });
|
||||
|
||||
await expect(
|
||||
connector.getColumnDistinctValues(
|
||||
{ catalog: null, db: 'analytics', name: 'events' },
|
||||
'event_name',
|
||||
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
|
||||
),
|
||||
).resolves.toEqual({ values: ['purchase', 'signup'], cardinality: 2 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, event_name from analytics.events', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ headers: ['id', 'event_name'], rows: [[10, 'signup']], totalRows: 1, rowCount: 1 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from events' }, { runId: 'scan-run-1' }),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
|
||||
await expect(connector.getTableRowCount('events')).resolves.toBe(2);
|
||||
await expect(connector.listSchemas()).resolves.toEqual(['analytics', 'warehouse']);
|
||||
await expect(
|
||||
connector.columnStats(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'events' }, column: 'event_name' },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toBeNull();
|
||||
|
||||
await connector.cleanup();
|
||||
});
|
||||
|
||||
it('adapts native ClickHouse snapshots to live-database introspection for local ingest', async () => {
|
||||
const introspection = createClickHouseLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'clickhouse',
|
||||
host: 'ch.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-pass', // pragma: allowlist secret
|
||||
},
|
||||
},
|
||||
clientFactory: fakeClientFactory(),
|
||||
now: () => new Date('2026-04-29T14:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
extractedAt: '2026-04-29T14:00:00.000Z',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'events')).toMatchObject({
|
||||
name: 'events',
|
||||
catalog: null,
|
||||
db: 'analytics',
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'UInt64',
|
||||
normalizedType: 'UInt64',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'PK',
|
||||
},
|
||||
{
|
||||
name: 'event_name',
|
||||
nativeType: 'LowCardinality(String)',
|
||||
normalizedType: 'LowCardinality(String)',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
});
|
||||
});
|
||||
});
|
||||
533
packages/cli/src/connectors/clickhouse/connector.ts
Normal file
533
packages/cli/src/connectors/clickhouse/connector.ts
Normal file
|
|
@ -0,0 +1,533 @@
|
|||
import { createClient } from '@clickhouse/client';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
|
||||
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableListEntry, type KtxTableSampleResult } from '../../context/scan/types.js';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { Agent as HttpsAgent } from 'node:https';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { KtxClickHouseDialect } from './dialect.js';
|
||||
|
||||
export interface KtxClickHouseConnectionConfig {
|
||||
driver?: string;
|
||||
host?: string;
|
||||
port?: number;
|
||||
database?: string;
|
||||
username?: string;
|
||||
user?: string;
|
||||
password?: string;
|
||||
url?: string;
|
||||
ssl?: boolean;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxClickHouseResolvedClientConfig {
|
||||
host: string;
|
||||
port: number;
|
||||
database: string;
|
||||
username: string;
|
||||
password?: string;
|
||||
ssl: boolean;
|
||||
}
|
||||
|
||||
interface ClickHouseQueryInput {
|
||||
query: string;
|
||||
format: 'JSONCompact' | 'JSONEachRow';
|
||||
query_params?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
interface ClickHouseResultSet {
|
||||
json(): Promise<unknown>;
|
||||
}
|
||||
|
||||
export interface KtxClickHouseClient {
|
||||
query(input: ClickHouseQueryInput): Promise<ClickHouseResultSet>;
|
||||
close(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxClickHouseClientFactory {
|
||||
createClient(config: Parameters<typeof createClient>[0]): KtxClickHouseClient;
|
||||
}
|
||||
|
||||
interface KtxClickHouseResolvedEndpoint {
|
||||
host: string;
|
||||
port: number;
|
||||
close?: () => Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxClickHouseEndpointResolver {
|
||||
resolve(input: {
|
||||
host: string;
|
||||
port: number;
|
||||
connection: KtxClickHouseConnectionConfig;
|
||||
}): Promise<KtxClickHouseResolvedEndpoint>;
|
||||
}
|
||||
|
||||
export interface KtxClickHouseScanConnectorOptions {
|
||||
connectionId: string;
|
||||
connection: KtxClickHouseConnectionConfig | undefined;
|
||||
clientFactory?: KtxClickHouseClientFactory;
|
||||
endpointResolver?: KtxClickHouseEndpointResolver;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export interface KtxClickHouseReadOnlyQueryInput extends KtxReadOnlyQueryInput {
|
||||
params?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface KtxClickHouseColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KtxClickHouseColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
interface ClickHouseTableRow {
|
||||
name: string;
|
||||
engine: string;
|
||||
comment: string;
|
||||
}
|
||||
|
||||
interface ClickHouseColumnRow {
|
||||
table: string;
|
||||
name: string;
|
||||
type: string;
|
||||
comment: string;
|
||||
is_in_primary_key: number;
|
||||
}
|
||||
|
||||
interface ClickHouseRowCountRow {
|
||||
table?: string;
|
||||
row_count?: string | number;
|
||||
count?: string | number;
|
||||
}
|
||||
|
||||
interface ClickHouseDatabaseRow {
|
||||
name: string;
|
||||
}
|
||||
|
||||
interface ClickHouseTableListRow {
|
||||
database: string;
|
||||
name: string;
|
||||
engine: string;
|
||||
}
|
||||
|
||||
interface ClickHouseCompactResponse {
|
||||
meta?: Array<{ name: string; type: string }>;
|
||||
data?: unknown[][];
|
||||
rows?: number;
|
||||
}
|
||||
|
||||
class DefaultClickHouseClientFactory implements KtxClickHouseClientFactory {
|
||||
createClient(config: Parameters<typeof createClient>[0]): KtxClickHouseClient {
|
||||
return createClient(config);
|
||||
}
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxClickHouseConnectionConfig | undefined,
|
||||
key: keyof KtxClickHouseConnectionConfig,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
const envName = value.slice('env:'.length);
|
||||
return env[envName] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function maybeNumber(value: unknown): number | undefined {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
||||
function parseClickHouseUrl(url: string): Partial<KtxClickHouseConnectionConfig> {
|
||||
const parsed = new URL(url);
|
||||
return {
|
||||
host: parsed.hostname,
|
||||
port: parsed.port ? Number(parsed.port) : undefined,
|
||||
database: parsed.pathname.replace(/^\/+/, '') || undefined,
|
||||
username: parsed.username ? decodeURIComponent(parsed.username) : undefined,
|
||||
password: parsed.password ? decodeURIComponent(parsed.password) : undefined,
|
||||
ssl: parsed.protocol === 'https:' || parsed.searchParams.get('ssl') === 'true',
|
||||
};
|
||||
}
|
||||
|
||||
function tableKind(engine: string): KtxSchemaTable['kind'] {
|
||||
return engine === 'View' || engine === 'MaterializedView' ? 'view' : 'table';
|
||||
}
|
||||
|
||||
function isNullableClickHouseType(type: string): boolean {
|
||||
return type.startsWith('Nullable(') || type.startsWith('LowCardinality(Nullable(');
|
||||
}
|
||||
|
||||
export function isKtxClickHouseConnectionConfig(
|
||||
connection: KtxClickHouseConnectionConfig | undefined,
|
||||
): connection is KtxClickHouseConnectionConfig {
|
||||
return String(connection?.driver ?? '').toLowerCase() === 'clickhouse';
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function clickHouseClientConfigFromConfig(input: {
|
||||
connectionId: string;
|
||||
connection: KtxClickHouseConnectionConfig | undefined;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): KtxClickHouseResolvedClientConfig {
|
||||
const inputDriver = input.connection?.driver ?? 'unknown';
|
||||
if (!isKtxClickHouseConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native ClickHouse connector cannot run driver "${inputDriver}"`);
|
||||
}
|
||||
|
||||
const env = input.env ?? process.env;
|
||||
const referencedUrl = stringConfigValue(input.connection, 'url', env);
|
||||
const urlConfig = referencedUrl ? parseClickHouseUrl(referencedUrl) : {};
|
||||
const merged: KtxClickHouseConnectionConfig = { ...urlConfig, ...input.connection };
|
||||
const host = stringConfigValue(merged, 'host', env);
|
||||
const database = stringConfigValue(merged, 'database', env) ?? 'default';
|
||||
const username = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env) ?? 'default';
|
||||
|
||||
if (!host) {
|
||||
throw new Error(`Native ClickHouse connector requires connections.${input.connectionId}.host or url`);
|
||||
}
|
||||
|
||||
return {
|
||||
host,
|
||||
port: maybeNumber(merged.port) ?? 8123,
|
||||
database,
|
||||
username,
|
||||
password: stringConfigValue(merged, 'password', env),
|
||||
ssl: merged.ssl === true,
|
||||
};
|
||||
}
|
||||
|
||||
export class KtxClickHouseScanConnector implements KtxScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'clickhouse' as const;
|
||||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: false,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: true,
|
||||
formalForeignKeys: false,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly connectionId: string;
|
||||
private readonly connection: KtxClickHouseConnectionConfig;
|
||||
private readonly clientConfig: KtxClickHouseResolvedClientConfig;
|
||||
private readonly clientFactory: KtxClickHouseClientFactory;
|
||||
private readonly endpointResolver?: KtxClickHouseEndpointResolver;
|
||||
private readonly now: () => Date;
|
||||
private readonly dialect = new KtxClickHouseDialect();
|
||||
private client: KtxClickHouseClient | null = null;
|
||||
private resolvedEndpoint: KtxClickHouseResolvedEndpoint | null = null;
|
||||
|
||||
constructor(options: KtxClickHouseScanConnectorOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.connection = options.connection ?? {};
|
||||
this.clientConfig = clickHouseClientConfigFromConfig({
|
||||
connectionId: options.connectionId,
|
||||
connection: options.connection,
|
||||
env: options.env,
|
||||
});
|
||||
this.clientFactory = options.clientFactory ?? new DefaultClickHouseClientFactory();
|
||||
this.endpointResolver = options.endpointResolver;
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.id = `clickhouse:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
try {
|
||||
await this.query('SELECT 1');
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const database = this.clientConfig.database;
|
||||
const tables = await this.queryEachRow<ClickHouseTableRow>(
|
||||
`
|
||||
SELECT name, engine, comment
|
||||
FROM system.tables
|
||||
WHERE database = {database:String}
|
||||
AND engine NOT IN ('Dictionary')
|
||||
ORDER BY name
|
||||
`,
|
||||
{ database },
|
||||
);
|
||||
const columns = await this.queryEachRow<ClickHouseColumnRow>(
|
||||
`
|
||||
SELECT table, name, type, comment, is_in_primary_key
|
||||
FROM system.columns
|
||||
WHERE database = {database:String}
|
||||
ORDER BY table, position
|
||||
`,
|
||||
{ database },
|
||||
);
|
||||
const rowCounts = await this.queryEachRow<ClickHouseRowCountRow>(
|
||||
`
|
||||
SELECT table, sum(rows) AS row_count
|
||||
FROM system.parts
|
||||
WHERE database = {database:String}
|
||||
AND active = 1
|
||||
GROUP BY table
|
||||
`,
|
||||
{ database },
|
||||
);
|
||||
const columnsByTable = new Map<string, ClickHouseColumnRow[]>();
|
||||
for (const column of columns) {
|
||||
columnsByTable.set(column.table, [...(columnsByTable.get(column.table) ?? []), column]);
|
||||
}
|
||||
const rowCountByTable = new Map(rowCounts.map((row) => [String(row.table), Number(row.row_count ?? 0)]));
|
||||
const schemaTables = tables.map((table) =>
|
||||
this.toSchemaTable(table, columnsByTable.get(table.name) ?? [], rowCountByTable.get(table.name) ?? 0),
|
||||
);
|
||||
|
||||
return {
|
||||
connectionId: this.connectionId,
|
||||
driver: 'clickhouse',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: { schemas: [database] },
|
||||
metadata: {
|
||||
database,
|
||||
host: this.clientConfig.host,
|
||||
table_count: schemaTables.length,
|
||||
total_columns: schemaTables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables: schemaTables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxTableSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(
|
||||
this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns),
|
||||
);
|
||||
return { headers: result.headers, rows: result.rows, totalRows: result.totalRows };
|
||||
}
|
||||
|
||||
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
|
||||
return { values, nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
return null;
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxClickHouseReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
|
||||
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
|
||||
const result = await this.query(prepared.sql, prepared.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KtxTableRef,
|
||||
columnName: string,
|
||||
options: KtxClickHouseColumnDistinctValuesOptions,
|
||||
): Promise<KtxClickHouseColumnDistinctValuesResult | null> {
|
||||
const sampleSize = options.sampleSize ?? 10000;
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinalityResult = await this.query(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, sampleSize),
|
||||
);
|
||||
const cardinality = Number(cardinalityResult.rows[0]?.[0]);
|
||||
if (Number.isNaN(cardinality)) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valuesResult = await this.query(this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit));
|
||||
return {
|
||||
values: valuesResult.rows.filter((row) => row[0] !== null).map((row) => String(row[0])),
|
||||
cardinality,
|
||||
};
|
||||
}
|
||||
|
||||
async getTableRowCount(tableName: string): Promise<number> {
|
||||
const result = await this.query(
|
||||
`
|
||||
SELECT sum(rows) AS count
|
||||
FROM system.parts
|
||||
WHERE database = {database:String}
|
||||
AND table = {table:String}
|
||||
AND active = 1
|
||||
`,
|
||||
{ database: this.clientConfig.database, table: tableName },
|
||||
);
|
||||
return Number(result.rows[0]?.[0] ?? 0);
|
||||
}
|
||||
|
||||
qTableName(table: Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
async listSchemas(): Promise<string[]> {
|
||||
const rows = await this.queryEachRow<ClickHouseDatabaseRow>(
|
||||
`
|
||||
SELECT name
|
||||
FROM system.databases
|
||||
WHERE name NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')
|
||||
ORDER BY name
|
||||
`,
|
||||
);
|
||||
return rows.map((row) => row.name);
|
||||
}
|
||||
|
||||
async listTables(schemas?: string[]): Promise<KtxTableListEntry[]> {
|
||||
const filterSchemas = schemas ?? (await this.listSchemas());
|
||||
if (filterSchemas.length === 0) return [];
|
||||
const rows = await this.queryEachRow<ClickHouseTableListRow>(
|
||||
`
|
||||
SELECT database, name, engine
|
||||
FROM system.tables
|
||||
WHERE database IN ({schemas:Array(String)})
|
||||
ORDER BY database, name
|
||||
`,
|
||||
{ schemas: filterSchemas },
|
||||
);
|
||||
return rows.map((row) => ({
|
||||
schema: row.database,
|
||||
name: row.name,
|
||||
kind: row.engine === 'View' || row.engine === 'MaterializedView' ? ('view' as const) : ('table' as const),
|
||||
}));
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
if (this.client) {
|
||||
await this.client.close();
|
||||
this.client = null;
|
||||
}
|
||||
if (this.resolvedEndpoint?.close) {
|
||||
await this.resolvedEndpoint.close();
|
||||
this.resolvedEndpoint = null;
|
||||
}
|
||||
}
|
||||
|
||||
private toSchemaTable(table: ClickHouseTableRow, columns: ClickHouseColumnRow[], estimatedRows: number): KtxSchemaTable {
|
||||
const kind = tableKind(table.engine);
|
||||
return {
|
||||
catalog: null,
|
||||
db: this.clientConfig.database,
|
||||
name: table.name,
|
||||
kind,
|
||||
comment: table.comment || null,
|
||||
estimatedRows: kind === 'view' ? null : estimatedRows,
|
||||
columns: columns.map((column) => this.toSchemaColumn(column)),
|
||||
foreignKeys: [],
|
||||
};
|
||||
}
|
||||
|
||||
private toSchemaColumn(column: ClickHouseColumnRow): KtxSchemaColumn {
|
||||
return {
|
||||
name: column.name,
|
||||
nativeType: column.type,
|
||||
normalizedType: this.dialect.mapDataType(column.type),
|
||||
dimensionType: this.dialect.mapToDimensionType(column.type),
|
||||
nullable: isNullableClickHouseType(column.type),
|
||||
primaryKey: column.is_in_primary_key === 1,
|
||||
comment: column.comment || null,
|
||||
};
|
||||
}
|
||||
|
||||
private async clientForQuery(): Promise<KtxClickHouseClient> {
|
||||
if (!this.client) {
|
||||
const config = { ...this.clientConfig };
|
||||
if (this.endpointResolver) {
|
||||
this.resolvedEndpoint = await this.endpointResolver.resolve({
|
||||
host: config.host,
|
||||
port: config.port,
|
||||
connection: this.connection,
|
||||
});
|
||||
config.host = this.resolvedEndpoint.host;
|
||||
config.port = this.resolvedEndpoint.port;
|
||||
}
|
||||
const protocol = config.ssl ? 'https' : 'http';
|
||||
const isProxied = config.host !== this.clientConfig.host;
|
||||
this.client = this.clientFactory.createClient({
|
||||
url: `${protocol}://${config.host}:${config.port}`,
|
||||
username: config.username,
|
||||
password: config.password ?? '',
|
||||
database: config.database,
|
||||
request_timeout: 30_000,
|
||||
clickhouse_settings: {
|
||||
output_format_json_quote_64bit_integers: 1,
|
||||
},
|
||||
...(isProxied && config.ssl
|
||||
? {
|
||||
http_agent: new HttpsAgent({
|
||||
servername: this.clientConfig.host,
|
||||
keepAlive: true,
|
||||
}),
|
||||
}
|
||||
: {}),
|
||||
});
|
||||
}
|
||||
return this.client;
|
||||
}
|
||||
|
||||
private async queryEachRow<T>(sql: string, params?: Record<string, unknown>): Promise<T[]> {
|
||||
const client = await this.clientForQuery();
|
||||
const resultSet = await client.query({
|
||||
query: assertReadOnlySql(sql),
|
||||
format: 'JSONEachRow',
|
||||
...(params ? { query_params: params } : {}),
|
||||
});
|
||||
return (await resultSet.json()) as T[];
|
||||
}
|
||||
|
||||
private async query(sql: string, params?: Record<string, unknown>): Promise<Omit<KtxQueryResult, 'rowCount'>> {
|
||||
const client = await this.clientForQuery();
|
||||
const resultSet = await client.query({
|
||||
query: assertReadOnlySql(sql),
|
||||
format: 'JSONCompact',
|
||||
...(params ? { query_params: params } : {}),
|
||||
});
|
||||
const response = (await resultSet.json()) as ClickHouseCompactResponse;
|
||||
const meta = response.meta ?? [];
|
||||
return {
|
||||
headers: meta.map((field) => field.name),
|
||||
headerTypes: meta.map((field) => field.type),
|
||||
rows: response.data ?? [],
|
||||
totalRows: response.rows ?? response.data?.length ?? 0,
|
||||
};
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`KTX ClickHouse connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
49
packages/cli/src/connectors/clickhouse/dialect.test.ts
Normal file
49
packages/cli/src/connectors/clickhouse/dialect.test.ts
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KtxClickHouseDialect } from './dialect.js';
|
||||
|
||||
describe('KtxClickHouseDialect', () => {
|
||||
const dialect = new KtxClickHouseDialect();
|
||||
|
||||
it('quotes identifiers and formats database-qualified table names', () => {
|
||||
expect(dialect.quoteIdentifier('events')).toBe('`events`');
|
||||
expect(dialect.quoteIdentifier('odd`name')).toBe('`odd``name`');
|
||||
expect(dialect.formatTableName({ catalog: null, db: 'analytics', name: 'events' })).toBe(
|
||||
'`analytics`.`events`',
|
||||
);
|
||||
expect(dialect.formatTableName({ catalog: null, db: null, name: 'events' })).toBe('`events`');
|
||||
});
|
||||
|
||||
it('maps nullable and low-cardinality ClickHouse types to KTX dimension types', () => {
|
||||
expect(dialect.mapToDimensionType('Nullable(DateTime64(3))')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('LowCardinality(Nullable(String))')).toBe('string');
|
||||
expect(dialect.mapToDimensionType('UInt64')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('Decimal(18, 4)')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('Bool')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('IPv4')).toBe('string');
|
||||
expect(dialect.mapToDimensionType('')).toBe('string');
|
||||
});
|
||||
|
||||
it('builds sampling, distinct-value, pagination, and time SQL', () => {
|
||||
expect(dialect.generateSampleQuery('`analytics`.`events`', 25, ['id', 'event_name'])).toBe(
|
||||
'SELECT `id`, `event_name` FROM `analytics`.`events` LIMIT 25',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('`analytics`.`events`', 'event_name', 10)).toBe(
|
||||
"SELECT `event_name` FROM `analytics`.`events` WHERE `event_name` IS NOT NULL AND trim(toString(`event_name`)) != '' LIMIT 10",
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('`analytics`.`events`', '`event_name`', 5)).toContain(
|
||||
'SELECT DISTINCT toString(`event_name`) AS val',
|
||||
);
|
||||
expect(dialect.getLimitOffsetClause(10, 20)).toBe('LIMIT 10 OFFSET 20');
|
||||
expect(dialect.getTimeTruncExpression('created_at', 'week')).toBe('toStartOfWeek(created_at, 1)');
|
||||
});
|
||||
|
||||
it('prepares named parameters using ClickHouse typed placeholders', () => {
|
||||
expect(dialect.prepareQuery('select * from events where id = :id and event_name = :name', {
|
||||
id: 10,
|
||||
name: 'signup',
|
||||
})).toEqual({
|
||||
sql: 'select * from events where id = {id:Int64} and event_name = {name:String}',
|
||||
params: { id: 10, name: 'signup' },
|
||||
});
|
||||
});
|
||||
});
|
||||
279
packages/cli/src/connectors/clickhouse/dialect.ts
Normal file
279
packages/cli/src/connectors/clickhouse/dialect.ts
Normal file
|
|
@ -0,0 +1,279 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
|
||||
|
||||
type ClickHouseTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxClickHouseDialect {
|
||||
readonly type = 'clickhouse';
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
date: 'time',
|
||||
date32: 'time',
|
||||
datetime: 'time',
|
||||
datetime64: 'time',
|
||||
uint8: 'number',
|
||||
uint16: 'number',
|
||||
uint32: 'number',
|
||||
uint64: 'number',
|
||||
uint128: 'number',
|
||||
uint256: 'number',
|
||||
int8: 'number',
|
||||
int16: 'number',
|
||||
int32: 'number',
|
||||
int64: 'number',
|
||||
int128: 'number',
|
||||
int256: 'number',
|
||||
float32: 'number',
|
||||
float64: 'number',
|
||||
decimal: 'number',
|
||||
decimal32: 'number',
|
||||
decimal64: 'number',
|
||||
decimal128: 'number',
|
||||
decimal256: 'number',
|
||||
string: 'string',
|
||||
fixedstring: 'string',
|
||||
uuid: 'string',
|
||||
ipv4: 'string',
|
||||
ipv6: 'string',
|
||||
enum8: 'string',
|
||||
enum16: 'string',
|
||||
bool: 'boolean',
|
||||
boolean: 'boolean',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `\`${identifier.replace(/`/g, '``')}\``;
|
||||
}
|
||||
|
||||
formatTableName(table: ClickHouseTableNameRef): string {
|
||||
return table.db
|
||||
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
|
||||
: this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
return nativeType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
|
||||
let normalizedType = nativeType.toLowerCase().trim();
|
||||
normalizedType = this.unwrapClickHouseType(normalizedType, 'nullable');
|
||||
normalizedType = this.unwrapClickHouseType(normalizedType, 'lowcardinality');
|
||||
normalizedType = this.unwrapClickHouseType(normalizedType, 'nullable');
|
||||
if (normalizedType.includes('(')) {
|
||||
normalizedType = normalizedType.split('(')[0] ?? normalizedType;
|
||||
}
|
||||
|
||||
if (this.typeMappings[normalizedType]) {
|
||||
return this.typeMappings[normalizedType];
|
||||
}
|
||||
if (normalizedType.includes('date') || normalizedType.includes('time')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalizedType.includes('int') ||
|
||||
normalizedType.includes('float') ||
|
||||
normalizedType.includes('decimal')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalizedType === 'bool' || normalizedType === 'boolean') {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT ${columnList} FROM ${tableName} LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quotedColumn = this.quoteIdentifier(columnName);
|
||||
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND trim(toString(${quotedColumn})) != '' LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
|
||||
if (!params) {
|
||||
return { sql, params: undefined };
|
||||
}
|
||||
|
||||
let parameterizedQuery = sql;
|
||||
const queryParams: Record<string, unknown> = {};
|
||||
const sortedKeys = Object.keys(params).sort((a, b) => b.length - a.length);
|
||||
|
||||
for (const key of sortedKeys) {
|
||||
const placeholder = `:${key}`;
|
||||
if (parameterizedQuery.includes(placeholder)) {
|
||||
parameterizedQuery = parameterizedQuery.replace(
|
||||
new RegExp(`:${key}\\b`, 'g'),
|
||||
`{${key}:${this.inferClickHouseType(params[key])}}`,
|
||||
);
|
||||
queryParams[key] = params[key];
|
||||
}
|
||||
}
|
||||
|
||||
return { sql: parameterizedQuery, params: queryParams };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `rand() / 4294967295.0 < ${samplePct}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(_samplePct: number): string {
|
||||
return '';
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `countIf(${column} IS NULL)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `COUNT(DISTINCT ${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT DISTINCT toString(${columnName}) AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY val
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY rand()
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
timezone?: string,
|
||||
): string {
|
||||
const tz = timezone ? `, '${timezone}'` : '';
|
||||
switch (granularity) {
|
||||
case 'day':
|
||||
return `toStartOfDay(${column}${tz})`;
|
||||
case 'week':
|
||||
return `toStartOfWeek(${column}, 1${tz})`;
|
||||
case 'month':
|
||||
return `toStartOfMonth(${column}${tz})`;
|
||||
case 'quarter':
|
||||
return `toStartOfQuarter(${column}${tz})`;
|
||||
case 'year':
|
||||
return `toStartOfYear(${column}${tz})`;
|
||||
}
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
|
||||
const col = timezone ? `toTimezone(${column}, '${timezone}')` : column;
|
||||
const [rawAmount, rawUnit] = interval.split(' ');
|
||||
const amount = Number(rawAmount);
|
||||
const unit = rawUnit!.toLowerCase();
|
||||
const originExpr = origin ? `toDateTime('${origin}')` : "toDateTime('1970-01-01')";
|
||||
const calendarUnit = this.toClickHouseDateDiffUnit(unit);
|
||||
if (calendarUnit) {
|
||||
return `dateAdd(${calendarUnit}, intDiv(dateDiff(${calendarUnit}, ${originExpr}, ${col}), ${amount}) * ${amount}, ${originExpr})`;
|
||||
}
|
||||
const seconds = this.intervalToSeconds(amount, unit);
|
||||
return `addSeconds(${originExpr}, intDiv(toUInt64(dateDiff('second', ${originExpr}, ${col})), ${seconds}) * ${seconds})`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
const [amount, unit] = interval.split(' ');
|
||||
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
|
||||
}
|
||||
|
||||
private unwrapClickHouseType(value: string, wrapper: string): string {
|
||||
const prefix = `${wrapper}(`;
|
||||
return value.startsWith(prefix) && value.endsWith(')') ? value.slice(prefix.length, -1) : value;
|
||||
}
|
||||
|
||||
private inferClickHouseType(value: unknown): string {
|
||||
if (value === null || value === undefined) {
|
||||
return 'String';
|
||||
}
|
||||
if (typeof value === 'boolean') {
|
||||
return 'Bool';
|
||||
}
|
||||
if (typeof value === 'number') {
|
||||
return Number.isInteger(value) ? 'Int64' : 'Float64';
|
||||
}
|
||||
if (value instanceof Date) {
|
||||
return 'DateTime';
|
||||
}
|
||||
return 'String';
|
||||
}
|
||||
|
||||
private toClickHouseDateDiffUnit(unit: string): string | null {
|
||||
if (unit === 'month' || unit === 'months') {
|
||||
return "'month'";
|
||||
}
|
||||
if (unit === 'quarter' || unit === 'quarters') {
|
||||
return "'quarter'";
|
||||
}
|
||||
if (unit === 'year' || unit === 'years') {
|
||||
return "'year'";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private intervalToSeconds(amount: number, unit: string): number {
|
||||
switch (unit) {
|
||||
case 'second':
|
||||
case 'seconds':
|
||||
return amount;
|
||||
case 'minute':
|
||||
case 'minutes':
|
||||
return amount * 60;
|
||||
case 'hour':
|
||||
case 'hours':
|
||||
return amount * 3600;
|
||||
case 'day':
|
||||
case 'days':
|
||||
return amount * 86400;
|
||||
case 'week':
|
||||
case 'weeks':
|
||||
return amount * 604800;
|
||||
default:
|
||||
return amount * 86400;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/adapters/live-database/types.js';
|
||||
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
|
||||
import {
|
||||
KtxClickHouseScanConnector,
|
||||
type KtxClickHouseClientFactory,
|
||||
type KtxClickHouseConnectionConfig,
|
||||
type KtxClickHouseEndpointResolver,
|
||||
} from './connector.js';
|
||||
|
||||
interface CreateClickHouseLiveDatabaseIntrospectionOptions {
|
||||
connections: Record<string, KtxProjectConnectionConfig>;
|
||||
clientFactory?: KtxClickHouseClientFactory;
|
||||
endpointResolver?: KtxClickHouseEndpointResolver;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createClickHouseLiveDatabaseIntrospection(
|
||||
options: CreateClickHouseLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KtxClickHouseConnectionConfig | undefined;
|
||||
const connector = new KtxClickHouseScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
clientFactory: options.clientFactory,
|
||||
endpointResolver: options.endpointResolver,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect(
|
||||
{ connectionId, driver: 'clickhouse' },
|
||||
{ runId: `clickhouse-${connectionId}` },
|
||||
);
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
277
packages/cli/src/connectors/mysql/connector.test.ts
Normal file
277
packages/cli/src/connectors/mysql/connector.test.ts
Normal file
|
|
@ -0,0 +1,277 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import type { FieldPacket, RowDataPacket } from 'mysql2/promise';
|
||||
import { createMysqlLiveDatabaseIntrospection } from '../../connectors/mysql/live-database-introspection.js';
|
||||
import { isKtxMysqlConnectionConfig, KtxMysqlScanConnector, mysqlConnectionPoolConfigFromConfig, type KtxMysqlPoolFactory } from '../../connectors/mysql/connector.js';
|
||||
|
||||
function mysqlResult(rows: Record<string, unknown>[], fields: Array<{ name: string; type?: number }>): [RowDataPacket[], FieldPacket[]] {
|
||||
return [rows as RowDataPacket[], fields as FieldPacket[]];
|
||||
}
|
||||
|
||||
function fakePoolFactory(): KtxMysqlPoolFactory {
|
||||
const query = vi.fn(async (sql: string, params?: unknown): Promise<[RowDataPacket[], FieldPacket[]]> => {
|
||||
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
|
||||
return mysqlResult(
|
||||
[
|
||||
{ TABLE_NAME: 'customers', TABLE_TYPE: 'BASE TABLE', TABLE_COMMENT: 'Customer table', TABLE_ROWS: 2 },
|
||||
{ TABLE_NAME: 'orders', TABLE_TYPE: 'BASE TABLE', TABLE_COMMENT: 'InnoDB free: 1 kB; Order table', TABLE_ROWS: 2 },
|
||||
{ TABLE_NAME: 'order_summary', TABLE_TYPE: 'VIEW', TABLE_COMMENT: '', TABLE_ROWS: null },
|
||||
],
|
||||
[{ name: 'TABLE_NAME' }, { name: 'TABLE_TYPE' }, { name: 'TABLE_COMMENT' }, { name: 'TABLE_ROWS' }],
|
||||
);
|
||||
}
|
||||
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
|
||||
return mysqlResult(
|
||||
[
|
||||
{ TABLE_NAME: 'customers', COLUMN_NAME: 'id', DATA_TYPE: 'int', IS_NULLABLE: 'NO', COLUMN_COMMENT: 'PK' },
|
||||
{ TABLE_NAME: 'customers', COLUMN_NAME: 'name', DATA_TYPE: 'varchar', IS_NULLABLE: 'NO', COLUMN_COMMENT: '' },
|
||||
{ TABLE_NAME: 'orders', COLUMN_NAME: 'id', DATA_TYPE: 'int', IS_NULLABLE: 'NO', COLUMN_COMMENT: '' },
|
||||
{ TABLE_NAME: 'orders', COLUMN_NAME: 'customer_id', DATA_TYPE: 'int', IS_NULLABLE: 'NO', COLUMN_COMMENT: '' },
|
||||
{ TABLE_NAME: 'orders', COLUMN_NAME: 'status', DATA_TYPE: 'varchar', IS_NULLABLE: 'YES', COLUMN_COMMENT: '' },
|
||||
{ TABLE_NAME: 'order_summary', COLUMN_NAME: 'status', DATA_TYPE: 'varchar', IS_NULLABLE: 'YES', COLUMN_COMMENT: '' },
|
||||
],
|
||||
[{ name: 'TABLE_NAME' }, { name: 'COLUMN_NAME' }, { name: 'DATA_TYPE' }, { name: 'IS_NULLABLE' }],
|
||||
);
|
||||
}
|
||||
if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes("CONSTRAINT_NAME = 'PRIMARY'")) {
|
||||
return mysqlResult([{ TABLE_NAME: 'customers', COLUMN_NAME: 'id' }, { TABLE_NAME: 'orders', COLUMN_NAME: 'id' }], []);
|
||||
}
|
||||
if (sql.includes('INFORMATION_SCHEMA.KEY_COLUMN_USAGE') && sql.includes('REFERENCED_TABLE_NAME IS NOT NULL')) {
|
||||
return mysqlResult(
|
||||
[
|
||||
{
|
||||
TABLE_NAME: 'orders',
|
||||
COLUMN_NAME: 'customer_id',
|
||||
REFERENCED_TABLE_NAME: 'customers',
|
||||
REFERENCED_COLUMN_NAME: 'id',
|
||||
CONSTRAINT_NAME: 'orders_customer_id_fk',
|
||||
},
|
||||
],
|
||||
[],
|
||||
);
|
||||
}
|
||||
if (sql.includes('SELECT `id`, `status` FROM `analytics`.`orders` LIMIT 1')) {
|
||||
return mysqlResult([{ id: 10, status: 'paid' }], [{ name: 'id', type: 3 }, { name: 'status', type: 253 }]);
|
||||
}
|
||||
if (sql.includes('select * from (select id, status from analytics.orders) as ktx_query_result limit 1')) {
|
||||
return mysqlResult([{ id: 10, status: 'paid' }], [{ name: 'id', type: 3 }, { name: 'status', type: 253 }]);
|
||||
}
|
||||
if (sql.includes('SELECT `status` FROM `analytics`.`orders`')) {
|
||||
return mysqlResult([{ status: 'paid' }, { status: 'open' }], [{ name: 'status', type: 253 }]);
|
||||
}
|
||||
if (sql.includes('COUNT(DISTINCT val)')) {
|
||||
return mysqlResult([{ cardinality: 2 }], [{ name: 'cardinality', type: 8 }]);
|
||||
}
|
||||
if (sql.includes('SELECT DISTINCT CAST(`status` AS CHAR) AS val')) {
|
||||
return mysqlResult([{ val: 'open' }, { val: 'paid' }], [{ name: 'val', type: 253 }]);
|
||||
}
|
||||
if (sql.includes('COUNT(*) AS count')) {
|
||||
return mysqlResult([{ count: 2 }], [{ name: 'count', type: 8 }]);
|
||||
}
|
||||
if (sql.includes('INFORMATION_SCHEMA.SCHEMATA')) {
|
||||
return mysqlResult([{ SCHEMA_NAME: 'analytics' }, { SCHEMA_NAME: 'warehouse' }], [{ name: 'SCHEMA_NAME' }]);
|
||||
}
|
||||
if (sql.trim() === 'SELECT 1') {
|
||||
return mysqlResult([{ '1': 1 }], [{ name: '1', type: 8 }]);
|
||||
}
|
||||
throw new Error(`Unexpected SQL: ${sql} params=${JSON.stringify(params)}`);
|
||||
});
|
||||
const release = vi.fn();
|
||||
const end = vi.fn(async () => undefined);
|
||||
return {
|
||||
createPool: vi.fn(() => ({
|
||||
getConnection: vi.fn(async () => ({ query, release })),
|
||||
end,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
describe('KtxMysqlScanConnector', () => {
|
||||
it('resolves MySQL connection configuration safely', () => {
|
||||
expect(isKtxMysqlConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(true);
|
||||
expect(isKtxMysqlConnectionConfig({ driver: 'postgres', host: 'localhost', database: 'analytics' })).toBe(false);
|
||||
expect(
|
||||
mysqlConnectionPoolConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'mysql',
|
||||
host: 'db.example.test',
|
||||
port: 3307,
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'secret', // pragma: allowlist secret
|
||||
ssl: true,
|
||||
},
|
||||
}),
|
||||
).toMatchObject({
|
||||
host: 'db.example.test',
|
||||
port: 3307,
|
||||
database: 'analytics',
|
||||
user: 'reader',
|
||||
password: 'secret', // pragma: allowlist secret
|
||||
ssl: { rejectUnauthorized: false },
|
||||
});
|
||||
});
|
||||
|
||||
it('introspects schema, primary keys, comments, row counts, views, and foreign keys', async () => {
|
||||
const connector = new KtxMysqlScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'mysql',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'secret', // pragma: allowlist secret
|
||||
},
|
||||
poolFactory: fakePoolFactory(),
|
||||
now: () => new Date('2026-04-29T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'mysql' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'mysql',
|
||||
extractedAt: '2026-04-29T12:00:00.000Z',
|
||||
scope: { schemas: ['analytics'] },
|
||||
metadata: {
|
||||
database: 'analytics',
|
||||
host: 'db.example.test',
|
||||
table_count: 3,
|
||||
total_columns: 6,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([
|
||||
['customers', 'table', 2, 'Customer table'],
|
||||
['orders', 'table', 2, 'Order table'],
|
||||
['order_summary', 'view', null, null],
|
||||
]);
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
|
||||
name: 'id',
|
||||
nativeType: 'int',
|
||||
normalizedType: 'int',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'PK',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
|
||||
{
|
||||
fromColumn: 'customer_id',
|
||||
toCatalog: null,
|
||||
toDb: 'analytics',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
constraintName: 'orders_customer_id_fk',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => {
|
||||
const poolFactory = fakePoolFactory();
|
||||
const connector = new KtxMysqlScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'mysql',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'secret', // pragma: allowlist secret
|
||||
},
|
||||
poolFactory,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'orders' }, columns: ['id', 'status'], limit: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1 });
|
||||
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'orders' }, column: 'status', limit: 5 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
|
||||
|
||||
await expect(
|
||||
connector.getColumnDistinctValues(
|
||||
{ catalog: null, db: 'analytics', name: 'orders' },
|
||||
'status',
|
||||
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
|
||||
),
|
||||
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from analytics.orders', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
|
||||
await expect(connector.getTableRowCount('orders')).resolves.toBe(2);
|
||||
await expect(connector.listSchemas()).resolves.toEqual(['analytics', 'warehouse']);
|
||||
await expect(connector.columnStats(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: 'analytics', name: 'orders' }, column: 'status' },
|
||||
{ runId: 'scan-run-1' },
|
||||
)).resolves.toBeNull();
|
||||
|
||||
await connector.cleanup();
|
||||
});
|
||||
|
||||
it('adapts native MySQL snapshots to live-database introspection for local ingest', async () => {
|
||||
const introspection = createMysqlLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'mysql',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'secret', // pragma: allowlist secret
|
||||
},
|
||||
},
|
||||
poolFactory: fakePoolFactory(),
|
||||
now: () => new Date('2026-04-29T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
extractedAt: '2026-04-29T12:00:00.000Z',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: 'analytics',
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'int',
|
||||
normalizedType: 'int',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'PK',
|
||||
},
|
||||
{
|
||||
name: 'name',
|
||||
nativeType: 'varchar',
|
||||
normalizedType: 'varchar',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
});
|
||||
});
|
||||
});
|
||||
587
packages/cli/src/connectors/mysql/connector.ts
Normal file
587
packages/cli/src/connectors/mysql/connector.ts
Normal file
|
|
@ -0,0 +1,587 @@
|
|||
import mysql, { type FieldPacket, type Pool, type RowDataPacket } from 'mysql2/promise';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
|
||||
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxTableListEntry, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
|
||||
import { KtxMysqlDialect } from './dialect.js';
|
||||
|
||||
export interface KtxMysqlConnectionConfig {
|
||||
driver?: string;
|
||||
host?: string;
|
||||
port?: number;
|
||||
database?: string;
|
||||
username?: string;
|
||||
user?: string;
|
||||
password?: string;
|
||||
url?: string;
|
||||
ssl?: boolean | { rejectUnauthorized?: boolean };
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxMysqlPoolConfig {
|
||||
host: string;
|
||||
port: number;
|
||||
database: string;
|
||||
user: string;
|
||||
password?: string;
|
||||
connectionLimit: number;
|
||||
waitForConnections: true;
|
||||
ssl?: { rejectUnauthorized: boolean };
|
||||
}
|
||||
|
||||
interface KtxMysqlConnection {
|
||||
query(sql: string, params?: unknown): Promise<[RowDataPacket[], FieldPacket[]]>;
|
||||
release(): void;
|
||||
}
|
||||
|
||||
interface KtxMysqlPool {
|
||||
getConnection(): Promise<KtxMysqlConnection>;
|
||||
end(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxMysqlPoolFactory {
|
||||
createPool(config: KtxMysqlPoolConfig): KtxMysqlPool;
|
||||
}
|
||||
|
||||
interface KtxMysqlResolvedEndpoint {
|
||||
host: string;
|
||||
port: number;
|
||||
close?: () => Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxMysqlEndpointResolver {
|
||||
resolve(input: { host: string; port: number; connection: KtxMysqlConnectionConfig }): Promise<KtxMysqlResolvedEndpoint>;
|
||||
}
|
||||
|
||||
export interface KtxMysqlScanConnectorOptions {
|
||||
connectionId: string;
|
||||
connection: KtxMysqlConnectionConfig | undefined;
|
||||
poolFactory?: KtxMysqlPoolFactory;
|
||||
endpointResolver?: KtxMysqlEndpointResolver;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export interface KtxMysqlReadOnlyQueryInput extends KtxReadOnlyQueryInput {
|
||||
params?: Record<string, unknown> | unknown[];
|
||||
}
|
||||
|
||||
export interface KtxMysqlColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KtxMysqlColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
interface MysqlTableRow extends RowDataPacket {
|
||||
TABLE_NAME: string;
|
||||
TABLE_TYPE: string;
|
||||
TABLE_COMMENT: string | null;
|
||||
TABLE_ROWS: number | null;
|
||||
}
|
||||
|
||||
interface MysqlColumnRow extends RowDataPacket {
|
||||
TABLE_NAME: string;
|
||||
COLUMN_NAME: string;
|
||||
DATA_TYPE: string;
|
||||
IS_NULLABLE: string;
|
||||
COLUMN_COMMENT: string | null;
|
||||
}
|
||||
|
||||
interface MysqlPrimaryKeyRow extends RowDataPacket {
|
||||
TABLE_NAME: string;
|
||||
COLUMN_NAME: string;
|
||||
}
|
||||
|
||||
interface MysqlForeignKeyRow extends RowDataPacket {
|
||||
TABLE_NAME: string;
|
||||
COLUMN_NAME: string;
|
||||
REFERENCED_TABLE_NAME: string;
|
||||
REFERENCED_COLUMN_NAME: string;
|
||||
CONSTRAINT_NAME: string;
|
||||
}
|
||||
|
||||
interface MysqlSchemaRow extends RowDataPacket {
|
||||
SCHEMA_NAME: string;
|
||||
}
|
||||
|
||||
interface MysqlTableListRow extends RowDataPacket {
|
||||
TABLE_SCHEMA: string;
|
||||
TABLE_NAME: string;
|
||||
TABLE_TYPE: string;
|
||||
}
|
||||
|
||||
interface MysqlCountRow extends RowDataPacket {
|
||||
count?: unknown;
|
||||
cardinality?: unknown;
|
||||
}
|
||||
|
||||
interface MysqlDistinctValueRow extends RowDataPacket {
|
||||
val: unknown;
|
||||
}
|
||||
|
||||
class DefaultMysqlPoolFactory implements KtxMysqlPoolFactory {
|
||||
createPool(config: KtxMysqlPoolConfig): KtxMysqlPool {
|
||||
return mysql.createPool(config) as Pool;
|
||||
}
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxMysqlConnectionConfig | undefined,
|
||||
key: keyof KtxMysqlConnectionConfig,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
const envName = value.slice('env:'.length);
|
||||
return env[envName] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function maybeNumber(value: unknown): number | undefined {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
||||
function parseMysqlUrl(url: string): Partial<KtxMysqlConnectionConfig> {
|
||||
const parsed = new URL(url);
|
||||
const sslParam = parsed.searchParams.get('ssl') ?? parsed.searchParams.get('sslmode');
|
||||
return {
|
||||
host: parsed.hostname,
|
||||
port: parsed.port ? Number(parsed.port) : undefined,
|
||||
database: parsed.pathname.replace(/^\/+/, '') || undefined,
|
||||
username: parsed.username ? decodeURIComponent(parsed.username) : undefined,
|
||||
password: parsed.password ? decodeURIComponent(parsed.password) : undefined,
|
||||
ssl: sslParam === 'true' || sslParam === 'required',
|
||||
};
|
||||
}
|
||||
|
||||
function cleanMySqlTableComment(comment: string | null): string | null {
|
||||
if (!comment) {
|
||||
return null;
|
||||
}
|
||||
if (comment.startsWith('InnoDB free:')) {
|
||||
const semiIndex = comment.indexOf(';');
|
||||
if (semiIndex === -1) {
|
||||
return null;
|
||||
}
|
||||
const userComment = comment.slice(semiIndex + 1).trim();
|
||||
return userComment || null;
|
||||
}
|
||||
return comment;
|
||||
}
|
||||
|
||||
function groupByTable<T extends { TABLE_NAME: string }>(rows: T[]): Map<string, T[]> {
|
||||
const grouped = new Map<string, T[]>();
|
||||
for (const row of rows) {
|
||||
const tableRows = grouped.get(row.TABLE_NAME) ?? [];
|
||||
tableRows.push(row);
|
||||
grouped.set(row.TABLE_NAME, tableRows);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
function primaryKeyMap(rows: MysqlPrimaryKeyRow[]): Map<string, Set<string>> {
|
||||
const grouped = new Map<string, Set<string>>();
|
||||
for (const row of rows) {
|
||||
const columns = grouped.get(row.TABLE_NAME) ?? new Set<string>();
|
||||
columns.add(row.COLUMN_NAME);
|
||||
grouped.set(row.TABLE_NAME, columns);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
function queryParams(params: Record<string, unknown> | unknown[] | undefined): unknown[] | undefined {
|
||||
if (!params) {
|
||||
return undefined;
|
||||
}
|
||||
return Array.isArray(params) ? params : Object.values(params);
|
||||
}
|
||||
|
||||
export function isKtxMysqlConnectionConfig(
|
||||
connection: KtxMysqlConnectionConfig | undefined,
|
||||
): connection is KtxMysqlConnectionConfig {
|
||||
return String(connection?.driver ?? '').toLowerCase() === 'mysql';
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function mysqlConnectionPoolConfigFromConfig(input: {
|
||||
connectionId: string;
|
||||
connection: KtxMysqlConnectionConfig | undefined;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): KtxMysqlPoolConfig {
|
||||
const inputDriver = input.connection?.driver ?? 'unknown';
|
||||
if (!isKtxMysqlConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native MySQL connector cannot run driver "${inputDriver}"`);
|
||||
}
|
||||
|
||||
const env = input.env ?? process.env;
|
||||
const referencedUrl = stringConfigValue(input.connection, 'url', env);
|
||||
const urlConfig = referencedUrl ? parseMysqlUrl(referencedUrl) : {};
|
||||
const merged: KtxMysqlConnectionConfig = { ...urlConfig, ...input.connection };
|
||||
const host = stringConfigValue(merged, 'host', env);
|
||||
const database = stringConfigValue(merged, 'database', env);
|
||||
const user = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env);
|
||||
|
||||
if (!host) {
|
||||
throw new Error(`Native MySQL connector requires connections.${input.connectionId}.host or url`);
|
||||
}
|
||||
if (!database) {
|
||||
throw new Error(`Native MySQL connector requires connections.${input.connectionId}.database or url`);
|
||||
}
|
||||
if (!user) {
|
||||
throw new Error(`Native MySQL connector requires connections.${input.connectionId}.username, user, or url`);
|
||||
}
|
||||
|
||||
const ssl = merged.ssl === true ? { rejectUnauthorized: false } : typeof merged.ssl === 'object' ? merged.ssl : undefined;
|
||||
return {
|
||||
host,
|
||||
port: maybeNumber(merged.port) ?? 3306,
|
||||
database,
|
||||
user,
|
||||
password: stringConfigValue(merged, 'password', env),
|
||||
connectionLimit: 10,
|
||||
waitForConnections: true,
|
||||
...(ssl ? { ssl: { rejectUnauthorized: ssl.rejectUnauthorized ?? false } } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
export class KtxMysqlScanConnector implements KtxScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'mysql' as const;
|
||||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: false,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: true,
|
||||
formalForeignKeys: true,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly connectionId: string;
|
||||
private readonly connection: KtxMysqlConnectionConfig;
|
||||
private readonly poolConfig: KtxMysqlPoolConfig;
|
||||
private readonly poolFactory: KtxMysqlPoolFactory;
|
||||
private readonly endpointResolver?: KtxMysqlEndpointResolver;
|
||||
private readonly now: () => Date;
|
||||
private readonly dialect = new KtxMysqlDialect();
|
||||
private pool: KtxMysqlPool | null = null;
|
||||
private resolvedEndpoint: KtxMysqlResolvedEndpoint | null = null;
|
||||
|
||||
constructor(options: KtxMysqlScanConnectorOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.connection = options.connection ?? {};
|
||||
this.poolConfig = mysqlConnectionPoolConfigFromConfig({
|
||||
connectionId: options.connectionId,
|
||||
connection: options.connection,
|
||||
env: options.env,
|
||||
});
|
||||
this.poolFactory = options.poolFactory ?? new DefaultMysqlPoolFactory();
|
||||
this.endpointResolver = options.endpointResolver;
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.id = `mysql:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
try {
|
||||
await this.query('SELECT 1');
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const database = this.poolConfig.database;
|
||||
const tables = await this.queryRaw<MysqlTableRow>(
|
||||
`
|
||||
SELECT TABLE_NAME, TABLE_TYPE, TABLE_COMMENT, TABLE_ROWS
|
||||
FROM INFORMATION_SCHEMA.TABLES
|
||||
WHERE TABLE_SCHEMA = ? AND TABLE_TYPE IN ('BASE TABLE', 'VIEW')
|
||||
ORDER BY TABLE_NAME
|
||||
`,
|
||||
[database],
|
||||
);
|
||||
const columns = await this.queryRaw<MysqlColumnRow>(
|
||||
`
|
||||
SELECT TABLE_NAME, COLUMN_NAME, DATA_TYPE, IS_NULLABLE, COLUMN_COMMENT
|
||||
FROM INFORMATION_SCHEMA.COLUMNS
|
||||
WHERE TABLE_SCHEMA = ?
|
||||
ORDER BY TABLE_NAME, ORDINAL_POSITION
|
||||
`,
|
||||
[database],
|
||||
);
|
||||
const primaryKeys = await this.queryRaw<MysqlPrimaryKeyRow>(
|
||||
`
|
||||
SELECT TABLE_NAME, COLUMN_NAME
|
||||
FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
|
||||
WHERE TABLE_SCHEMA = ?
|
||||
AND CONSTRAINT_NAME = 'PRIMARY'
|
||||
ORDER BY TABLE_NAME, ORDINAL_POSITION
|
||||
`,
|
||||
[database],
|
||||
);
|
||||
const foreignKeys = await this.queryRaw<MysqlForeignKeyRow>(
|
||||
`
|
||||
SELECT TABLE_NAME, COLUMN_NAME, REFERENCED_TABLE_NAME, REFERENCED_COLUMN_NAME, CONSTRAINT_NAME
|
||||
FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
|
||||
WHERE TABLE_SCHEMA = ?
|
||||
AND REFERENCED_TABLE_NAME IS NOT NULL
|
||||
ORDER BY TABLE_NAME, COLUMN_NAME
|
||||
`,
|
||||
[database],
|
||||
);
|
||||
|
||||
const columnsByTable = groupByTable(columns);
|
||||
const primaryKeysByTable = primaryKeyMap(primaryKeys);
|
||||
const foreignKeysByTable = groupByTable(foreignKeys);
|
||||
const schemaTables = tables.map((table) =>
|
||||
this.toSchemaTable(table, columnsByTable.get(table.TABLE_NAME) ?? [], primaryKeysByTable, foreignKeysByTable),
|
||||
);
|
||||
|
||||
return {
|
||||
connectionId: this.connectionId,
|
||||
driver: 'mysql',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: { schemas: [database] },
|
||||
metadata: {
|
||||
database,
|
||||
host: this.poolConfig.host,
|
||||
table_count: schemaTables.length,
|
||||
total_columns: schemaTables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables: schemaTables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxTableSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
|
||||
return { headers: result.headers, rows: result.rows, totalRows: result.totalRows };
|
||||
}
|
||||
|
||||
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
|
||||
return { values, nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
return null;
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxMysqlReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
|
||||
const prepared = Array.isArray(input.params)
|
||||
? { sql: limitedSql, params: input.params }
|
||||
: this.dialect.prepareQuery(limitedSql, input.params);
|
||||
const result = await this.query(prepared.sql, prepared.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KtxTableRef,
|
||||
columnName: string,
|
||||
options: KtxMysqlColumnDistinctValuesOptions,
|
||||
): Promise<KtxMysqlColumnDistinctValuesResult | null> {
|
||||
const sampleSize = options.sampleSize ?? 10000;
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinalityRows = await this.queryRaw<MysqlCountRow>(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, sampleSize),
|
||||
);
|
||||
const cardinality = Number(cardinalityRows[0]?.cardinality);
|
||||
if (Number.isNaN(cardinality)) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valuesRows = await this.queryRaw<MysqlDistinctValueRow>(
|
||||
this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit),
|
||||
);
|
||||
return {
|
||||
values: valuesRows.filter((row) => row.val !== null).map((row) => String(row.val)),
|
||||
cardinality,
|
||||
};
|
||||
}
|
||||
|
||||
async getTableRowCount(tableName: string): Promise<number> {
|
||||
const rows = await this.queryRaw<MysqlCountRow>(
|
||||
`SELECT COUNT(*) AS count FROM ${this.dialect.quoteIdentifier(tableName)}`,
|
||||
);
|
||||
return Number(rows[0]?.count ?? 0);
|
||||
}
|
||||
|
||||
qTableName(table: Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
async listSchemas(): Promise<string[]> {
|
||||
const rows = await this.queryRaw<MysqlSchemaRow>(`
|
||||
SELECT SCHEMA_NAME
|
||||
FROM INFORMATION_SCHEMA.SCHEMATA
|
||||
WHERE SCHEMA_NAME NOT IN ('information_schema', 'mysql', 'performance_schema', 'sys')
|
||||
ORDER BY SCHEMA_NAME
|
||||
`);
|
||||
return rows.map((row) => row.SCHEMA_NAME);
|
||||
}
|
||||
|
||||
async listTables(schemas?: string[]): Promise<KtxTableListEntry[]> {
|
||||
const filterSchemas = schemas ?? (await this.listSchemas());
|
||||
if (filterSchemas.length === 0) return [];
|
||||
const placeholders = filterSchemas.map(() => '?').join(', ');
|
||||
const rows = await this.queryRaw<MysqlTableListRow>(
|
||||
`
|
||||
SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE
|
||||
FROM INFORMATION_SCHEMA.TABLES
|
||||
WHERE TABLE_SCHEMA IN (${placeholders})
|
||||
AND TABLE_TYPE IN ('BASE TABLE', 'VIEW')
|
||||
ORDER BY TABLE_SCHEMA, TABLE_NAME
|
||||
`,
|
||||
filterSchemas,
|
||||
);
|
||||
return rows.map((row) => ({
|
||||
schema: row.TABLE_SCHEMA,
|
||||
name: row.TABLE_NAME,
|
||||
kind: row.TABLE_TYPE === 'VIEW' ? ('view' as const) : ('table' as const),
|
||||
}));
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
if (this.pool) {
|
||||
await this.pool.end();
|
||||
this.pool = null;
|
||||
}
|
||||
if (this.resolvedEndpoint?.close) {
|
||||
await this.resolvedEndpoint.close();
|
||||
this.resolvedEndpoint = null;
|
||||
}
|
||||
}
|
||||
|
||||
private toSchemaTable(
|
||||
table: MysqlTableRow,
|
||||
columns: MysqlColumnRow[],
|
||||
primaryKeysByTable: Map<string, Set<string>>,
|
||||
foreignKeysByTable: Map<string, MysqlForeignKeyRow[]>,
|
||||
): KtxSchemaTable {
|
||||
const tableName = table.TABLE_NAME;
|
||||
const kind = table.TABLE_TYPE === 'VIEW' ? 'view' : 'table';
|
||||
const estimatedRows = kind === 'view' ? null : Number(table.TABLE_ROWS ?? 0);
|
||||
return {
|
||||
catalog: null,
|
||||
db: this.poolConfig.database,
|
||||
name: tableName,
|
||||
kind,
|
||||
comment: cleanMySqlTableComment(table.TABLE_COMMENT),
|
||||
estimatedRows: Number.isFinite(estimatedRows) ? estimatedRows : null,
|
||||
columns: columns.map((column) => this.toSchemaColumn(column, primaryKeysByTable.get(tableName) ?? new Set())),
|
||||
foreignKeys: (foreignKeysByTable.get(tableName) ?? []).map((row) => this.toSchemaForeignKey(row)),
|
||||
};
|
||||
}
|
||||
|
||||
private toSchemaColumn(column: MysqlColumnRow, primaryKeys: Set<string>): KtxSchemaColumn {
|
||||
return {
|
||||
name: column.COLUMN_NAME,
|
||||
nativeType: column.DATA_TYPE,
|
||||
normalizedType: this.dialect.mapDataType(column.DATA_TYPE),
|
||||
dimensionType: this.dialect.mapToDimensionType(column.DATA_TYPE),
|
||||
nullable: column.IS_NULLABLE === 'YES',
|
||||
primaryKey: primaryKeys.has(column.COLUMN_NAME),
|
||||
comment: column.COLUMN_COMMENT || null,
|
||||
};
|
||||
}
|
||||
|
||||
private toSchemaForeignKey(row: MysqlForeignKeyRow): KtxSchemaForeignKey {
|
||||
return {
|
||||
fromColumn: row.COLUMN_NAME,
|
||||
toCatalog: null,
|
||||
toDb: this.poolConfig.database,
|
||||
toTable: row.REFERENCED_TABLE_NAME,
|
||||
toColumn: row.REFERENCED_COLUMN_NAME,
|
||||
constraintName: row.CONSTRAINT_NAME || null,
|
||||
};
|
||||
}
|
||||
|
||||
private async poolForQuery(): Promise<KtxMysqlPool> {
|
||||
if (!this.pool) {
|
||||
const config = { ...this.poolConfig };
|
||||
if (this.endpointResolver) {
|
||||
this.resolvedEndpoint = await this.endpointResolver.resolve({
|
||||
host: config.host,
|
||||
port: config.port,
|
||||
connection: this.connection,
|
||||
});
|
||||
config.host = this.resolvedEndpoint.host;
|
||||
config.port = this.resolvedEndpoint.port;
|
||||
}
|
||||
this.pool = this.poolFactory.createPool(config);
|
||||
}
|
||||
return this.pool;
|
||||
}
|
||||
|
||||
private async queryRaw<T extends RowDataPacket>(sql: string, params?: unknown): Promise<T[]> {
|
||||
const pool = await this.poolForQuery();
|
||||
const connection = await pool.getConnection();
|
||||
try {
|
||||
const [rows] = await connection.query(sql, params);
|
||||
return rows as T[];
|
||||
} finally {
|
||||
connection.release();
|
||||
}
|
||||
}
|
||||
|
||||
private async query(
|
||||
sql: string,
|
||||
params?: Record<string, unknown> | unknown[],
|
||||
): Promise<Omit<KtxQueryResult, 'rowCount'>> {
|
||||
const pool = await this.poolForQuery();
|
||||
const connection = await pool.getConnection();
|
||||
try {
|
||||
const [rows, fields] = await connection.query(assertReadOnlySql(sql), queryParams(params));
|
||||
const headers = fields.map((field) => field.name);
|
||||
const headerTypes = fields.map((field) => String(field.type ?? 'unknown'));
|
||||
return {
|
||||
headers,
|
||||
headerTypes,
|
||||
rows: rows.map((row) => headers.map((header) => row[header])),
|
||||
totalRows: rows.length,
|
||||
};
|
||||
} finally {
|
||||
connection.release();
|
||||
}
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`KTX MySQL connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
49
packages/cli/src/connectors/mysql/dialect.test.ts
Normal file
49
packages/cli/src/connectors/mysql/dialect.test.ts
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KtxMysqlDialect } from './dialect.js';
|
||||
|
||||
describe('KtxMysqlDialect', () => {
|
||||
const dialect = new KtxMysqlDialect();
|
||||
|
||||
it('quotes identifiers and formats database-qualified table names', () => {
|
||||
expect(dialect.quoteIdentifier('orders')).toBe('`orders`');
|
||||
expect(dialect.quoteIdentifier('odd`name')).toBe('`odd``name`');
|
||||
expect(dialect.formatTableName({ catalog: null, db: 'analytics', name: 'orders' })).toBe(
|
||||
'`analytics`.`orders`',
|
||||
);
|
||||
expect(dialect.formatTableName({ catalog: null, db: null, name: 'orders' })).toBe('`orders`');
|
||||
});
|
||||
|
||||
it('maps native MySQL types to KTX dimension types', () => {
|
||||
expect(dialect.mapToDimensionType('tinyint(1)')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('int')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('decimal(10,2)')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('timestamp')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('varchar(255)')).toBe('string');
|
||||
expect(dialect.mapToDimensionType('json')).toBe('string');
|
||||
expect(dialect.mapToDimensionType('')).toBe('string');
|
||||
});
|
||||
|
||||
it('builds sampling, distinct-value, pagination, and time SQL', () => {
|
||||
expect(dialect.generateSampleQuery('`analytics`.`orders`', 25, ['id', 'status'])).toBe(
|
||||
'SELECT `id`, `status` FROM `analytics`.`orders` LIMIT 25',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('`analytics`.`orders`', 'status', 10)).toBe(
|
||||
"SELECT `status` FROM `analytics`.`orders` WHERE `status` IS NOT NULL AND TRIM(CAST(`status` AS CHAR)) != '' LIMIT 10",
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('`analytics`.`orders`', '`status`', 5)).toContain(
|
||||
'SELECT DISTINCT CAST(`status` AS CHAR) AS val',
|
||||
);
|
||||
expect(dialect.getLimitOffsetClause(10, 20)).toBe('LIMIT 10 OFFSET 20');
|
||||
expect(dialect.getTimeTruncExpression('created_at', 'month')).toBe("DATE_FORMAT(created_at, '%Y-%m-01')");
|
||||
});
|
||||
|
||||
it('prepares named parameters in deterministic SQL placeholder order', () => {
|
||||
expect(dialect.prepareQuery('select * from orders where id = :id and status = :status', {
|
||||
status: 'paid',
|
||||
id: 10,
|
||||
})).toEqual({
|
||||
sql: 'select * from orders where id = ? and status = ?',
|
||||
params: [10, 'paid'],
|
||||
});
|
||||
});
|
||||
});
|
||||
202
packages/cli/src/connectors/mysql/dialect.ts
Normal file
202
packages/cli/src/connectors/mysql/dialect.ts
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
|
||||
|
||||
type MysqlTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxMysqlDialect {
|
||||
readonly type = 'mysql';
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
datetime: 'time',
|
||||
timestamp: 'time',
|
||||
date: 'time',
|
||||
time: 'time',
|
||||
year: 'time',
|
||||
tinyint: 'number',
|
||||
smallint: 'number',
|
||||
mediumint: 'number',
|
||||
int: 'number',
|
||||
integer: 'number',
|
||||
bigint: 'number',
|
||||
decimal: 'number',
|
||||
numeric: 'number',
|
||||
float: 'number',
|
||||
double: 'number',
|
||||
real: 'number',
|
||||
varchar: 'string',
|
||||
char: 'string',
|
||||
text: 'string',
|
||||
tinytext: 'string',
|
||||
mediumtext: 'string',
|
||||
longtext: 'string',
|
||||
enum: 'string',
|
||||
set: 'string',
|
||||
json: 'string',
|
||||
bit: 'boolean',
|
||||
bool: 'boolean',
|
||||
boolean: 'boolean',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `\`${identifier.replace(/`/g, '``')}\``;
|
||||
}
|
||||
|
||||
formatTableName(table: MysqlTableNameRef): string {
|
||||
return table.db
|
||||
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
|
||||
: this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
return nativeType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
const lower = nativeType.toLowerCase().trim();
|
||||
if (lower.includes('tinyint(1)')) {
|
||||
return 'boolean';
|
||||
}
|
||||
const normalized = lower.includes('(') ? lower.split('(')[0] : lower;
|
||||
if (this.typeMappings[normalized]) {
|
||||
return this.typeMappings[normalized];
|
||||
}
|
||||
if (normalized.includes('time') || normalized.includes('date')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalized.includes('int') ||
|
||||
normalized.includes('num') ||
|
||||
normalized.includes('dec') ||
|
||||
normalized.includes('float') ||
|
||||
normalized.includes('double')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalized.includes('bit') || normalized === 'bool' || normalized === 'boolean') {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT ${columnList} FROM ${tableName} LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quotedColumn = this.quoteIdentifier(columnName);
|
||||
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS CHAR)) != '' LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown[] } {
|
||||
if (!params) {
|
||||
return { sql, params: undefined };
|
||||
}
|
||||
const values: unknown[] = [];
|
||||
const parameterizedQuery = sql.replace(/:([A-Za-z_][A-Za-z0-9_]*)\b/g, (placeholder, key: string) => {
|
||||
if (!(key in params)) {
|
||||
return placeholder;
|
||||
}
|
||||
values.push(params[key]);
|
||||
return '?';
|
||||
});
|
||||
return { sql: parameterizedQuery, params: values };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `RAND() < ${samplePct}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(_samplePct: number): string {
|
||||
return '';
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `SUM(CASE WHEN ${column} IS NULL THEN 1 ELSE 0 END)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `COUNT(DISTINCT ${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
LIMIT ${sampleSize}
|
||||
) AS sampled
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT DISTINCT CAST(${columnName} AS CHAR) AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY val
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY RAND()
|
||||
LIMIT ${sampleSize}
|
||||
) AS sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
timezone?: string,
|
||||
): string {
|
||||
const col = timezone ? `CONVERT_TZ(${column}, '+00:00', '${timezone}')` : column;
|
||||
switch (granularity) {
|
||||
case 'day':
|
||||
return `DATE(${col})`;
|
||||
case 'week':
|
||||
return `DATE(${col} - INTERVAL WEEKDAY(${col}) DAY)`;
|
||||
case 'month':
|
||||
return `DATE_FORMAT(${col}, '%Y-%m-01')`;
|
||||
case 'quarter':
|
||||
return `MAKEDATE(YEAR(${col}), 1) + INTERVAL (QUARTER(${col}) - 1) QUARTER`;
|
||||
case 'year':
|
||||
return `DATE_FORMAT(${col}, '%Y-01-01')`;
|
||||
}
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
|
||||
const col = timezone ? `CONVERT_TZ(${column}, '+00:00', '${timezone}')` : column;
|
||||
const [amount, unit] = interval.split(' ');
|
||||
const originExpr = origin ? `'${origin}'` : `'1970-01-01'`;
|
||||
return `DATE_ADD(${originExpr}, INTERVAL FLOOR(TIMESTAMPDIFF(${unit!.toUpperCase()}, ${originExpr}, ${col}) / ${amount}) * ${amount} ${unit!.toUpperCase()})`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
const [amount, unit] = interval.split(' ');
|
||||
return `INTERVAL ${amount} ${unit!.toUpperCase()}`;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/adapters/live-database/types.js';
|
||||
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
|
||||
import {
|
||||
KtxMysqlScanConnector,
|
||||
type KtxMysqlConnectionConfig,
|
||||
type KtxMysqlEndpointResolver,
|
||||
type KtxMysqlPoolFactory,
|
||||
} from './connector.js';
|
||||
|
||||
interface CreateMysqlLiveDatabaseIntrospectionOptions {
|
||||
connections: Record<string, KtxProjectConnectionConfig>;
|
||||
poolFactory?: KtxMysqlPoolFactory;
|
||||
endpointResolver?: KtxMysqlEndpointResolver;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createMysqlLiveDatabaseIntrospection(
|
||||
options: CreateMysqlLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KtxMysqlConnectionConfig | undefined;
|
||||
const connector = new KtxMysqlScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
poolFactory: options.poolFactory,
|
||||
endpointResolver: options.endpointResolver,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect({ connectionId, driver: 'mysql' }, { runId: `mysql-${connectionId}` });
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
387
packages/cli/src/connectors/postgres/connector.test.ts
Normal file
387
packages/cli/src/connectors/postgres/connector.test.ts
Normal file
|
|
@ -0,0 +1,387 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createPostgresLiveDatabaseIntrospection } from '../../connectors/postgres/live-database-introspection.js';
|
||||
import { isKtxPostgresConnectionConfig, KtxPostgresScanConnector, postgresPoolConfigFromConfig, type KtxPostgresPoolFactory } from '../../connectors/postgres/connector.js';
|
||||
|
||||
interface FakeQueryResult {
|
||||
rows: Record<string, unknown>[];
|
||||
fields?: Array<{ name: string; dataTypeID: number }>;
|
||||
}
|
||||
|
||||
function fakePoolFactory(results: Map<string, FakeQueryResult>): KtxPostgresPoolFactory {
|
||||
const query = vi.fn(async (sql: string, params?: unknown[]) => {
|
||||
const normalized = sql.replace(/\s+/g, ' ').trim();
|
||||
for (const [key, value] of results.entries()) {
|
||||
if (normalized.includes(key)) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
throw new Error(`Unexpected SQL: ${normalized} params=${JSON.stringify(params ?? [])}`);
|
||||
});
|
||||
return {
|
||||
createPool() {
|
||||
return {
|
||||
async connect() {
|
||||
return {
|
||||
query,
|
||||
release: vi.fn(),
|
||||
};
|
||||
},
|
||||
end: vi.fn(async () => undefined),
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function metadataResults(): Map<string, FakeQueryResult> {
|
||||
return new Map<string, FakeQueryResult>([
|
||||
[
|
||||
'FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n',
|
||||
{
|
||||
rows: [
|
||||
{ table_name: 'customers', table_kind: 'r', row_count: '2', table_comment: 'Customers' },
|
||||
{ table_name: 'orders', table_kind: 'r', row_count: '3', table_comment: null },
|
||||
{ table_name: 'recent_orders', table_kind: 'v', row_count: '0', table_comment: 'Recent orders' },
|
||||
],
|
||||
},
|
||||
],
|
||||
[
|
||||
'FROM pg_catalog.pg_attribute a JOIN pg_catalog.pg_class c',
|
||||
{
|
||||
rows: [
|
||||
{ table_name: 'customers', column_name: 'id', data_type: 'integer', is_nullable: false, column_comment: null },
|
||||
{ table_name: 'customers', column_name: 'name', data_type: 'text', is_nullable: false, column_comment: 'Name' },
|
||||
{ table_name: 'orders', column_name: 'id', data_type: 'integer', is_nullable: false, column_comment: null },
|
||||
{ table_name: 'orders', column_name: 'customer_id', data_type: 'integer', is_nullable: false, column_comment: null },
|
||||
{ table_name: 'orders', column_name: 'status', data_type: 'text', is_nullable: true, column_comment: null },
|
||||
{ table_name: 'recent_orders', column_name: 'id', data_type: 'integer', is_nullable: true, column_comment: null },
|
||||
],
|
||||
},
|
||||
],
|
||||
[
|
||||
"tc.constraint_type = 'FOREIGN KEY'",
|
||||
{
|
||||
rows: [
|
||||
{
|
||||
table_name: 'orders',
|
||||
column_name: 'customer_id',
|
||||
foreign_table_schema: 'public',
|
||||
foreign_table_name: 'customers',
|
||||
foreign_column_name: 'id',
|
||||
constraint_name: 'orders_customer_id_fkey',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
[
|
||||
"tc.constraint_type = 'PRIMARY KEY'",
|
||||
{
|
||||
rows: [
|
||||
{ table_name: 'customers', column_name: 'id' },
|
||||
{ table_name: 'orders', column_name: 'id' },
|
||||
],
|
||||
},
|
||||
],
|
||||
['SELECT "id" FROM "public"."orders" LIMIT 1', { rows: [{ id: 10 }], fields: [{ name: 'id', dataTypeID: 23 }] }],
|
||||
[
|
||||
'SELECT "status" FROM "public"."orders" WHERE "status" IS NOT NULL',
|
||||
{ rows: [{ status: 'paid' }, { status: 'open' }], fields: [{ name: 'status', dataTypeID: 25 }] },
|
||||
],
|
||||
['COUNT(DISTINCT val) AS cardinality', { rows: [{ cardinality: '2' }] }],
|
||||
['SELECT DISTINCT "status"::text AS val', { rows: [{ val: 'open' }, { val: 'paid' }] }],
|
||||
['SELECT COUNT(*) AS count FROM "public"."orders"', { rows: [{ count: '3' }] }],
|
||||
['FROM pg_stats s', { rows: [{ column_name: 'status', estimated_cardinality: '2' }] }],
|
||||
['SELECT 1', { rows: [{ '?column?': 1 }], fields: [{ name: '?column?', dataTypeID: 23 }] }],
|
||||
['SELECT schema_name FROM information_schema.schemata', { rows: [{ schema_name: 'public' }] }],
|
||||
]);
|
||||
}
|
||||
|
||||
describe('KtxPostgresScanConnector', () => {
|
||||
it('resolves configuration safely', () => {
|
||||
expect(isKtxPostgresConnectionConfig({ driver: 'postgres', url: 'env:DATABASE_URL' })).toBe(true);
|
||||
expect(isKtxPostgresConnectionConfig({ driver: 'postgresql', host: 'db', database: 'analytics' })).toBe(true);
|
||||
expect(isKtxPostgresConnectionConfig({ driver: 'mysql', host: 'db' })).toBe(false);
|
||||
expect(
|
||||
postgresPoolConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
schemas: ['analytics', 'public'],
|
||||
ssl: true,
|
||||
rejectUnauthorized: false,
|
||||
},
|
||||
}),
|
||||
).toMatchObject({
|
||||
host: 'db.example.test',
|
||||
port: 5432,
|
||||
database: 'analytics',
|
||||
user: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
options: '-c search_path=analytics,public',
|
||||
ssl: { rejectUnauthorized: false },
|
||||
});
|
||||
const libpqPreferConfig = postgresPoolConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'postgres',
|
||||
url: 'env:DEMO_DATABASE_URL',
|
||||
},
|
||||
env: {
|
||||
DEMO_DATABASE_URL: 'postgresql://reader@demo.example.test:5432/demo?sslmode=prefer',
|
||||
},
|
||||
});
|
||||
expect(libpqPreferConfig).toMatchObject({
|
||||
host: 'demo.example.test',
|
||||
port: 5432,
|
||||
database: 'demo',
|
||||
user: 'reader',
|
||||
});
|
||||
expect(libpqPreferConfig).not.toHaveProperty('connectionString');
|
||||
expect(libpqPreferConfig).not.toHaveProperty('ssl');
|
||||
expect(
|
||||
postgresPoolConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'postgres', host: 'db.example.test', database: 'analytics', username: 'reader' },
|
||||
}),
|
||||
).toMatchObject({
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
user: 'reader',
|
||||
});
|
||||
});
|
||||
|
||||
it('introspects schemas, tables, views, primary keys, comments, row counts, and foreign keys', async () => {
|
||||
const connector = new KtxPostgresScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
schema: 'public',
|
||||
},
|
||||
poolFactory: fakePoolFactory(metadataResults()),
|
||||
now: () => new Date('2026-04-29T10:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'postgres' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
extractedAt: '2026-04-29T10:00:00.000Z',
|
||||
scope: { schemas: ['public'] },
|
||||
metadata: {
|
||||
database: 'analytics',
|
||||
schemas: ['public'],
|
||||
host: 'db.example.test',
|
||||
table_count: 3,
|
||||
total_columns: 6,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables.map((table) => [table.db, table.name, table.kind, table.estimatedRows])).toEqual([
|
||||
['public', 'customers', 'table', 2],
|
||||
['public', 'orders', 'table', 3],
|
||||
['public', 'recent_orders', 'view', null],
|
||||
]);
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
|
||||
{
|
||||
fromColumn: 'customer_id',
|
||||
toCatalog: null,
|
||||
toDb: 'public',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
constraintName: 'orders_customer_id_fkey',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('runs samples, distinct values, statistics, read-only SQL, and schema listing', async () => {
|
||||
const connector = new KtxPostgresScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
schema: 'public',
|
||||
},
|
||||
poolFactory: fakePoolFactory(metadataResults()),
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: 'public', name: 'orders' }, columns: ['id'], limit: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({ headers: ['id'], headerTypes: ['integer'], rows: [[10]], totalRows: 1 });
|
||||
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: 'public', name: 'orders' }, column: 'status', limit: 5 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
|
||||
|
||||
await expect(
|
||||
connector.getColumnDistinctValues(
|
||||
{ catalog: null, db: 'public', name: 'orders' },
|
||||
'status',
|
||||
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
|
||||
),
|
||||
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
|
||||
|
||||
await expect(connector.getColumnStatistics({ catalog: null, db: 'public', name: 'orders' })).resolves.toEqual({
|
||||
cardinalityByColumn: new Map([['status', 2]]),
|
||||
});
|
||||
await expect(connector.getTableRowCount({ db: 'public', name: 'orders' })).resolves.toBe(3);
|
||||
await expect(connector.listSchemas()).resolves.toEqual(['public']);
|
||||
await expect(connector.testConnection()).resolves.toEqual({ success: true });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
});
|
||||
|
||||
it('adapts native PostgreSQL snapshots to live-database introspection for local ingest', async () => {
|
||||
const introspection = createPostgresLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
schema: 'public',
|
||||
},
|
||||
},
|
||||
poolFactory: fakePoolFactory(metadataResults()),
|
||||
now: () => new Date('2026-04-29T10:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
extractedAt: '2026-04-29T10:00:00.000Z',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: null,
|
||||
},
|
||||
{
|
||||
name: 'name',
|
||||
nativeType: 'text',
|
||||
normalizedType: 'text',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: 'Name',
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('does not end the pool before introspection completes', async () => {
|
||||
let endCalled = false;
|
||||
const endAwarePoolFactory: KtxPostgresPoolFactory = {
|
||||
createPool() {
|
||||
const inner = fakePoolFactory(metadataResults()).createPool({
|
||||
max: 1,
|
||||
idleTimeoutMillis: 1,
|
||||
connectionTimeoutMillis: 1,
|
||||
});
|
||||
return {
|
||||
async connect() {
|
||||
if (endCalled) {
|
||||
throw new Error('Cannot use a pool after calling end on the pool');
|
||||
}
|
||||
return inner.connect();
|
||||
},
|
||||
async end() {
|
||||
endCalled = true;
|
||||
return inner.end();
|
||||
},
|
||||
};
|
||||
},
|
||||
};
|
||||
const introspection = createPostgresLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
schema: 'public',
|
||||
},
|
||||
},
|
||||
poolFactory: endAwarePoolFactory,
|
||||
now: () => new Date('2026-04-29T10:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
expect(snapshot.tables.length).toBeGreaterThan(0);
|
||||
expect(endCalled).toBe(true);
|
||||
});
|
||||
|
||||
it('attaches an error listener to the pg pool', async () => {
|
||||
const on = vi.fn();
|
||||
const poolFactory: KtxPostgresPoolFactory = {
|
||||
createPool() {
|
||||
return {
|
||||
on,
|
||||
async connect() {
|
||||
return {
|
||||
query: vi.fn(async () => ({ rows: [{ '?column?': 1 }], fields: [{ name: '?column?', dataTypeID: 23 }] })),
|
||||
release: vi.fn(),
|
||||
};
|
||||
},
|
||||
end: vi.fn(async () => undefined),
|
||||
};
|
||||
},
|
||||
};
|
||||
const connector = new KtxPostgresScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
},
|
||||
poolFactory,
|
||||
});
|
||||
|
||||
await expect(connector.testConnection()).resolves.toEqual({ success: true });
|
||||
|
||||
expect(on).toHaveBeenCalledWith('error', expect.any(Function));
|
||||
});
|
||||
});
|
||||
742
packages/cli/src/connectors/postgres/connector.ts
Normal file
742
packages/cli/src/connectors/postgres/connector.ts
Normal file
|
|
@ -0,0 +1,742 @@
|
|||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
|
||||
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
|
||||
import { Pool } from 'pg';
|
||||
import { KtxPostgresDialect } from './dialect.js';
|
||||
|
||||
const PG_OID_TYPE_MAP: Record<number, string> = {
|
||||
16: 'boolean',
|
||||
20: 'bigint',
|
||||
21: 'smallint',
|
||||
23: 'integer',
|
||||
25: 'text',
|
||||
700: 'real',
|
||||
701: 'double precision',
|
||||
1043: 'varchar',
|
||||
1082: 'date',
|
||||
1114: 'timestamp',
|
||||
1184: 'timestamptz',
|
||||
1700: 'numeric',
|
||||
2950: 'uuid',
|
||||
3802: 'jsonb',
|
||||
114: 'json',
|
||||
1009: 'text[]',
|
||||
1007: 'integer[]',
|
||||
1016: 'bigint[]',
|
||||
};
|
||||
|
||||
export interface KtxPostgresConnectionConfig {
|
||||
driver?: string;
|
||||
host?: string;
|
||||
port?: number;
|
||||
database?: string;
|
||||
username?: string;
|
||||
user?: string;
|
||||
password?: string;
|
||||
url?: string;
|
||||
schema?: string;
|
||||
schemas?: string[];
|
||||
ssl?: boolean;
|
||||
sslmode?: string;
|
||||
sslMode?: string;
|
||||
rejectUnauthorized?: boolean;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxPostgresPoolConfig {
|
||||
host?: string;
|
||||
port?: number;
|
||||
database?: string;
|
||||
user?: string;
|
||||
password?: string;
|
||||
connectionString?: string;
|
||||
max: number;
|
||||
idleTimeoutMillis: number;
|
||||
connectionTimeoutMillis: number;
|
||||
options?: string;
|
||||
ssl?: { rejectUnauthorized: boolean };
|
||||
}
|
||||
|
||||
interface KtxPostgresQueryResult {
|
||||
fields?: Array<{ name: string; dataTypeID: number }>;
|
||||
rows: Record<string, unknown>[];
|
||||
}
|
||||
|
||||
interface KtxPostgresClient {
|
||||
query(sql: string, params?: unknown[]): Promise<KtxPostgresQueryResult>;
|
||||
release(): void;
|
||||
}
|
||||
|
||||
interface KtxPostgresPool {
|
||||
connect(): Promise<KtxPostgresClient>;
|
||||
end(): Promise<void>;
|
||||
on?(event: 'error', listener: (error: Error) => void): void;
|
||||
}
|
||||
|
||||
export interface KtxPostgresPoolFactory {
|
||||
createPool(config: KtxPostgresPoolConfig): KtxPostgresPool;
|
||||
}
|
||||
|
||||
interface KtxPostgresResolvedEndpoint {
|
||||
host: string;
|
||||
port: number;
|
||||
close?: () => Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxPostgresEndpointResolver {
|
||||
resolve(input: {
|
||||
host: string;
|
||||
port: number;
|
||||
connection: KtxPostgresConnectionConfig;
|
||||
}): Promise<KtxPostgresResolvedEndpoint>;
|
||||
}
|
||||
|
||||
export interface KtxPostgresScanConnectorOptions {
|
||||
connectionId: string;
|
||||
connection: KtxPostgresConnectionConfig | undefined;
|
||||
poolFactory?: KtxPostgresPoolFactory;
|
||||
endpointResolver?: KtxPostgresEndpointResolver;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export interface KtxPostgresReadOnlyQueryInput extends KtxReadOnlyQueryInput {
|
||||
params?: Record<string, unknown> | unknown[];
|
||||
}
|
||||
|
||||
export interface KtxPostgresColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KtxPostgresColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
export interface KtxPostgresColumnStatisticsResult {
|
||||
cardinalityByColumn: Map<string, number>;
|
||||
}
|
||||
|
||||
export interface KtxPostgresTableSampleResult extends KtxTableSampleResult {
|
||||
headerTypes?: string[];
|
||||
}
|
||||
|
||||
type PostgresTableRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
interface PostgresTableRow {
|
||||
table_name: string;
|
||||
table_kind: string;
|
||||
row_count: unknown;
|
||||
table_comment: string | null;
|
||||
}
|
||||
|
||||
interface PostgresColumnRow {
|
||||
table_name: string;
|
||||
column_name: string;
|
||||
data_type: string;
|
||||
is_nullable: boolean;
|
||||
column_comment: string | null;
|
||||
}
|
||||
|
||||
interface PostgresPrimaryKeyRow {
|
||||
table_name: string;
|
||||
column_name: string;
|
||||
}
|
||||
|
||||
interface PostgresForeignKeyRow {
|
||||
table_name: string;
|
||||
column_name: string;
|
||||
foreign_table_schema: string | null;
|
||||
foreign_table_name: string;
|
||||
foreign_column_name: string;
|
||||
constraint_name: string | null;
|
||||
}
|
||||
|
||||
interface PostgresSchemaRow {
|
||||
schema_name: string;
|
||||
}
|
||||
|
||||
interface PostgresTableListRow {
|
||||
schema_name: string;
|
||||
table_name: string;
|
||||
table_kind: string;
|
||||
}
|
||||
|
||||
interface PostgresCountRow {
|
||||
count?: unknown;
|
||||
cardinality?: unknown;
|
||||
}
|
||||
|
||||
interface PostgresDistinctValueRow {
|
||||
val: unknown;
|
||||
}
|
||||
|
||||
interface PostgresStatsRow {
|
||||
column_name: string;
|
||||
estimated_cardinality: unknown;
|
||||
}
|
||||
|
||||
class DefaultPostgresPoolFactory implements KtxPostgresPoolFactory {
|
||||
createPool(config: KtxPostgresPoolConfig): KtxPostgresPool {
|
||||
return new Pool(config);
|
||||
}
|
||||
}
|
||||
|
||||
function groupByTable<T extends { table_name: string }>(rows: T[]): Map<string, T[]> {
|
||||
const grouped = new Map<string, T[]>();
|
||||
for (const row of rows) {
|
||||
const tableRows = grouped.get(row.table_name) ?? [];
|
||||
tableRows.push(row);
|
||||
grouped.set(row.table_name, tableRows);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
function primaryKeyMap(rows: PostgresPrimaryKeyRow[]): Map<string, Set<string>> {
|
||||
const grouped = new Map<string, Set<string>>();
|
||||
for (const row of rows) {
|
||||
const columns = grouped.get(row.table_name) ?? new Set<string>();
|
||||
columns.add(row.column_name);
|
||||
grouped.set(row.table_name, columns);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
function queryRows(result: KtxPostgresQueryResult): unknown[][] {
|
||||
const headers = (result.fields ?? []).map((field) => field.name);
|
||||
return result.rows.map((row) => headers.map((header) => row[header]));
|
||||
}
|
||||
|
||||
function finiteNumber(value: unknown): number | null {
|
||||
const parsed = Number(value);
|
||||
return Number.isFinite(parsed) ? parsed : null;
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxPostgresConnectionConfig | undefined,
|
||||
key: keyof KtxPostgresConnectionConfig,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function numberValue(value: unknown): number | undefined {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
||||
function parsePostgresUrl(url: string): Partial<KtxPostgresConnectionConfig> {
|
||||
const parsed = new URL(url);
|
||||
const sslmode = parsed.searchParams.get('sslmode') ?? undefined;
|
||||
return {
|
||||
host: parsed.hostname,
|
||||
port: parsed.port ? Number(parsed.port) : undefined,
|
||||
database: parsed.pathname.replace(/^\/+/, '') || undefined,
|
||||
username: parsed.username ? decodeURIComponent(parsed.username) : undefined,
|
||||
password: parsed.password ? decodeURIComponent(parsed.password) : undefined,
|
||||
...(sslmode ? { sslmode } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function normalizedSslMode(connection: KtxPostgresConnectionConfig): string | undefined {
|
||||
const value = connection.sslmode ?? connection.sslMode;
|
||||
return typeof value === 'string' && value.trim().length > 0 ? value.trim().toLowerCase() : undefined;
|
||||
}
|
||||
|
||||
function schemasFromConnection(connection: KtxPostgresConnectionConfig): string[] {
|
||||
if (Array.isArray(connection.schemas) && connection.schemas.length > 0) {
|
||||
return connection.schemas.filter((schema): schema is string => typeof schema === 'string' && schema.length > 0);
|
||||
}
|
||||
return typeof connection.schema === 'string' && connection.schema.length > 0 ? [connection.schema] : ['public'];
|
||||
}
|
||||
|
||||
function searchPathSchemasFromConnection(connection: KtxPostgresConnectionConfig): string[] {
|
||||
const schemas = schemasFromConnection(connection);
|
||||
return schemas.includes('public') ? schemas : [...schemas, 'public'];
|
||||
}
|
||||
|
||||
export function isKtxPostgresConnectionConfig(
|
||||
connection: KtxPostgresConnectionConfig | undefined,
|
||||
): connection is KtxPostgresConnectionConfig {
|
||||
const driver = String(connection?.driver ?? '').toLowerCase();
|
||||
return driver === 'postgres' || driver === 'postgresql';
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function postgresPoolConfigFromConfig(input: {
|
||||
connectionId: string;
|
||||
connection: KtxPostgresConnectionConfig | undefined;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): KtxPostgresPoolConfig {
|
||||
const inputDriver = input.connection?.driver ?? 'unknown';
|
||||
if (!isKtxPostgresConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native PostgreSQL connector cannot run driver "${inputDriver}"`);
|
||||
}
|
||||
|
||||
const env = input.env ?? process.env;
|
||||
const referencedUrl = stringConfigValue(input.connection, 'url', env);
|
||||
const urlConfig = referencedUrl ? parsePostgresUrl(referencedUrl) : {};
|
||||
const merged: KtxPostgresConnectionConfig = { ...urlConfig, ...input.connection };
|
||||
const host = stringConfigValue(merged, 'host', env);
|
||||
const database = stringConfigValue(merged, 'database', env);
|
||||
const user = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env);
|
||||
const password = stringConfigValue(merged, 'password', env);
|
||||
const sslmode = normalizedSslMode(merged);
|
||||
|
||||
if (!referencedUrl && !host) {
|
||||
throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.host or url`);
|
||||
}
|
||||
if (!database && !referencedUrl) {
|
||||
throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.database or url`);
|
||||
}
|
||||
if (!user && !referencedUrl) {
|
||||
throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.username, user, or url`);
|
||||
}
|
||||
|
||||
const config: KtxPostgresPoolConfig = {
|
||||
max: 10,
|
||||
idleTimeoutMillis: 30_000,
|
||||
connectionTimeoutMillis: 10_000,
|
||||
...(referencedUrl && sslmode !== 'prefer' && sslmode !== 'disable'
|
||||
? { connectionString: referencedUrl }
|
||||
: { host, port: numberValue(merged.port) ?? 5432, database, user, password }),
|
||||
};
|
||||
const searchPathSchemas = searchPathSchemasFromConnection(merged);
|
||||
if (searchPathSchemas.length > 0) {
|
||||
config.options = `-c search_path=${searchPathSchemas.join(',')}`;
|
||||
}
|
||||
if (merged.ssl && sslmode !== 'prefer' && sslmode !== 'disable') {
|
||||
config.ssl = { rejectUnauthorized: merged.rejectUnauthorized ?? true };
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
export class KtxPostgresScanConnector implements KtxScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'postgres' as const;
|
||||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: true,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: true,
|
||||
formalForeignKeys: true,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly connectionId: string;
|
||||
private readonly connection: KtxPostgresConnectionConfig;
|
||||
private readonly poolConfig: KtxPostgresPoolConfig;
|
||||
private readonly poolFactory: KtxPostgresPoolFactory;
|
||||
private readonly endpointResolver?: KtxPostgresEndpointResolver;
|
||||
private readonly now: () => Date;
|
||||
private readonly dialect = new KtxPostgresDialect();
|
||||
private pool: KtxPostgresPool | null = null;
|
||||
private lastIdlePoolError: Error | null = null;
|
||||
private resolvedEndpoint: KtxPostgresResolvedEndpoint | null = null;
|
||||
|
||||
constructor(options: KtxPostgresScanConnectorOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.connection = options.connection ?? {};
|
||||
this.poolConfig = postgresPoolConfigFromConfig({
|
||||
connectionId: options.connectionId,
|
||||
connection: options.connection,
|
||||
env: options.env,
|
||||
});
|
||||
this.poolFactory = options.poolFactory ?? new DefaultPostgresPoolFactory();
|
||||
this.endpointResolver = options.endpointResolver;
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.id = `postgres:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
try {
|
||||
await this.query('SELECT 1');
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const schemas = schemasFromConnection(this.connection);
|
||||
const allTables: KtxSchemaTable[] = [];
|
||||
for (const schema of schemas) {
|
||||
const tables = await this.loadSchemaTables(schema);
|
||||
allTables.push(...tables);
|
||||
}
|
||||
return {
|
||||
connectionId: this.connectionId,
|
||||
driver: 'postgres',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: { schemas },
|
||||
metadata: {
|
||||
database: this.poolConfig.database ?? this.connection.database ?? null,
|
||||
schemas,
|
||||
host: this.poolConfig.host ?? this.connection.host ?? null,
|
||||
table_count: allTables.length,
|
||||
total_columns: allTables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables: allTables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxPostgresTableSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
|
||||
return {
|
||||
headers: result.headers,
|
||||
headerTypes: result.headerTypes,
|
||||
rows: result.rows,
|
||||
totalRows: result.totalRows,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
|
||||
return { values, nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
const stats = await this.getColumnStatistics(input.table);
|
||||
const value = stats?.cardinalityByColumn.get(input.column);
|
||||
return value === undefined
|
||||
? null
|
||||
: { min: null, max: null, average: null, nullCount: null, distinctCount: value };
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxPostgresReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
|
||||
const prepared = Array.isArray(input.params)
|
||||
? { sql: limitedSql, params: input.params }
|
||||
: this.dialect.prepareQuery(limitedSql, input.params);
|
||||
const result = await this.query(prepared.sql, prepared.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KtxTableRef,
|
||||
columnName: string,
|
||||
options: KtxPostgresColumnDistinctValuesOptions,
|
||||
): Promise<KtxPostgresColumnDistinctValuesResult | null> {
|
||||
const sampleSize = options.sampleSize ?? 10000;
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinalityRows = await this.queryRaw<PostgresCountRow>(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, sampleSize),
|
||||
);
|
||||
const cardinality = finiteNumber(cardinalityRows[0]?.cardinality);
|
||||
if (cardinality === null) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valuesRows = await this.queryRaw<PostgresDistinctValueRow>(
|
||||
this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit),
|
||||
);
|
||||
return {
|
||||
values: valuesRows.filter((row) => row.val !== null).map((row) => String(row.val)),
|
||||
cardinality,
|
||||
};
|
||||
}
|
||||
|
||||
async getColumnStatistics(table: KtxTableRef): Promise<KtxPostgresColumnStatisticsResult | null> {
|
||||
const schema = table.db ?? schemasFromConnection(this.connection)[0] ?? 'public';
|
||||
const sql = this.dialect.generateColumnStatisticsQuery(schema, table.name);
|
||||
if (!sql) {
|
||||
return null;
|
||||
}
|
||||
const rows = await this.queryRaw<PostgresStatsRow>(sql);
|
||||
const cardinalityByColumn = new Map<string, number>();
|
||||
for (const row of rows) {
|
||||
const cardinality = finiteNumber(row.estimated_cardinality);
|
||||
if (cardinality !== null) {
|
||||
cardinalityByColumn.set(row.column_name, cardinality);
|
||||
}
|
||||
}
|
||||
return cardinalityByColumn.size > 0 ? { cardinalityByColumn } : null;
|
||||
}
|
||||
|
||||
async getTableRowCount(table: string | PostgresTableRef): Promise<number> {
|
||||
const tableRef =
|
||||
typeof table === 'string'
|
||||
? { catalog: null, db: schemasFromConnection(this.connection)[0] ?? 'public', name: table }
|
||||
: table;
|
||||
const rows = await this.queryRaw<PostgresCountRow>(`SELECT COUNT(*) AS count FROM ${this.qTableName(tableRef)}`);
|
||||
return finiteNumber(rows[0]?.count) ?? 0;
|
||||
}
|
||||
|
||||
qTableName(table: PostgresTableRef): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
async listSchemas(): Promise<string[]> {
|
||||
const rows = await this.queryRaw<PostgresSchemaRow>(`
|
||||
SELECT schema_name
|
||||
FROM information_schema.schemata
|
||||
WHERE schema_name <> 'information_schema'
|
||||
AND schema_name NOT LIKE 'pg_%'
|
||||
ORDER BY schema_name
|
||||
`);
|
||||
return rows.map((row) => row.schema_name);
|
||||
}
|
||||
|
||||
async listTables(schemas?: string[]): Promise<KtxTableListEntry[]> {
|
||||
const filterSchemas = schemas ?? (await this.listSchemas());
|
||||
if (filterSchemas.length === 0) return [];
|
||||
const rows = await this.queryRaw<PostgresTableListRow>(
|
||||
`
|
||||
SELECT n.nspname AS schema_name, c.relname AS table_name, c.relkind AS table_kind
|
||||
FROM pg_catalog.pg_class c
|
||||
JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
|
||||
WHERE n.nspname = ANY($1)
|
||||
AND c.relkind IN ('r', 'v')
|
||||
ORDER BY n.nspname, c.relname
|
||||
`,
|
||||
[filterSchemas],
|
||||
);
|
||||
return rows.map((row) => ({
|
||||
schema: row.schema_name,
|
||||
name: row.table_name,
|
||||
kind: row.table_kind === 'v' ? ('view' as const) : ('table' as const),
|
||||
}));
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
if (this.pool) {
|
||||
await this.pool.end();
|
||||
this.pool = null;
|
||||
}
|
||||
if (this.resolvedEndpoint?.close) {
|
||||
await this.resolvedEndpoint.close();
|
||||
this.resolvedEndpoint = null;
|
||||
}
|
||||
}
|
||||
|
||||
private async loadSchemaTables(schema: string): Promise<KtxSchemaTable[]> {
|
||||
const tables = await this.queryRaw<PostgresTableRow>(
|
||||
`
|
||||
SELECT
|
||||
c.relname AS table_name,
|
||||
c.relkind AS table_kind,
|
||||
c.reltuples::bigint AS row_count,
|
||||
d.description AS table_comment
|
||||
FROM pg_catalog.pg_class c
|
||||
JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
|
||||
LEFT JOIN pg_catalog.pg_description d
|
||||
ON d.objoid = c.oid AND d.objsubid = 0
|
||||
WHERE n.nspname = $1
|
||||
AND c.relkind IN ('r', 'v')
|
||||
ORDER BY c.relname
|
||||
`,
|
||||
[schema],
|
||||
);
|
||||
const columns = await this.queryRaw<PostgresColumnRow>(
|
||||
`
|
||||
SELECT
|
||||
c.relname AS table_name,
|
||||
a.attname AS column_name,
|
||||
format_type(a.atttypid, a.atttypmod) AS data_type,
|
||||
NOT a.attnotnull AS is_nullable,
|
||||
d.description AS column_comment
|
||||
FROM pg_catalog.pg_attribute a
|
||||
JOIN pg_catalog.pg_class c ON a.attrelid = c.oid
|
||||
JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
|
||||
LEFT JOIN pg_catalog.pg_description d
|
||||
ON d.objoid = c.oid AND d.objsubid = a.attnum
|
||||
WHERE n.nspname = $1
|
||||
AND c.relkind IN ('r', 'v')
|
||||
AND a.attnum > 0
|
||||
AND NOT a.attisdropped
|
||||
ORDER BY c.relname, a.attnum
|
||||
`,
|
||||
[schema],
|
||||
);
|
||||
const primaryKeys = await this.queryRaw<PostgresPrimaryKeyRow>(
|
||||
`
|
||||
SELECT tc.table_name, kcu.column_name
|
||||
FROM information_schema.table_constraints tc
|
||||
JOIN information_schema.key_column_usage kcu
|
||||
ON tc.constraint_name = kcu.constraint_name
|
||||
AND tc.table_schema = kcu.table_schema
|
||||
WHERE tc.constraint_type = 'PRIMARY KEY'
|
||||
AND tc.table_schema = $1
|
||||
ORDER BY tc.table_name, kcu.ordinal_position
|
||||
`,
|
||||
[schema],
|
||||
);
|
||||
const foreignKeys = await this.queryRaw<PostgresForeignKeyRow>(
|
||||
`
|
||||
SELECT
|
||||
tc.table_name,
|
||||
kcu.column_name,
|
||||
ccu.table_schema AS foreign_table_schema,
|
||||
ccu.table_name AS foreign_table_name,
|
||||
ccu.column_name AS foreign_column_name,
|
||||
tc.constraint_name
|
||||
FROM information_schema.table_constraints AS tc
|
||||
JOIN information_schema.key_column_usage AS kcu
|
||||
ON tc.constraint_name = kcu.constraint_name
|
||||
AND tc.table_schema = kcu.table_schema
|
||||
JOIN information_schema.constraint_column_usage AS ccu
|
||||
ON ccu.constraint_name = tc.constraint_name
|
||||
AND ccu.table_schema = tc.table_schema
|
||||
WHERE tc.constraint_type = 'FOREIGN KEY'
|
||||
AND tc.table_schema = $1
|
||||
ORDER BY tc.table_name, kcu.column_name
|
||||
`,
|
||||
[schema],
|
||||
);
|
||||
|
||||
const columnsByTable = groupByTable(columns);
|
||||
const primaryKeysByTable = primaryKeyMap(primaryKeys);
|
||||
const foreignKeysByTable = groupByTable(foreignKeys);
|
||||
return tables.map((table) =>
|
||||
this.toSchemaTable(
|
||||
schema,
|
||||
table,
|
||||
columnsByTable.get(table.table_name) ?? [],
|
||||
primaryKeysByTable.get(table.table_name) ?? new Set<string>(),
|
||||
foreignKeysByTable.get(table.table_name) ?? [],
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
private toSchemaTable(
|
||||
schema: string,
|
||||
table: PostgresTableRow,
|
||||
columns: PostgresColumnRow[],
|
||||
primaryKeys: Set<string>,
|
||||
foreignKeys: PostgresForeignKeyRow[],
|
||||
): KtxSchemaTable {
|
||||
const kind = table.table_kind === 'v' ? 'view' : 'table';
|
||||
return {
|
||||
catalog: null,
|
||||
db: schema,
|
||||
name: table.table_name,
|
||||
kind,
|
||||
comment: table.table_comment || null,
|
||||
estimatedRows: kind === 'view' ? null : finiteNumber(table.row_count),
|
||||
columns: columns.map((column) => this.toSchemaColumn(column, primaryKeys)),
|
||||
foreignKeys: foreignKeys.map((foreignKey) => this.toSchemaForeignKey(foreignKey)),
|
||||
};
|
||||
}
|
||||
|
||||
private toSchemaColumn(column: PostgresColumnRow, primaryKeys: Set<string>): KtxSchemaColumn {
|
||||
return {
|
||||
name: column.column_name,
|
||||
nativeType: column.data_type,
|
||||
normalizedType: this.dialect.mapDataType(column.data_type),
|
||||
dimensionType: this.dialect.mapToDimensionType(column.data_type),
|
||||
nullable: column.is_nullable,
|
||||
primaryKey: primaryKeys.has(column.column_name),
|
||||
comment: column.column_comment || null,
|
||||
};
|
||||
}
|
||||
|
||||
private toSchemaForeignKey(row: PostgresForeignKeyRow): KtxSchemaForeignKey {
|
||||
return {
|
||||
fromColumn: row.column_name,
|
||||
toCatalog: null,
|
||||
toDb: row.foreign_table_schema,
|
||||
toTable: row.foreign_table_name,
|
||||
toColumn: row.foreign_column_name,
|
||||
constraintName: row.constraint_name || null,
|
||||
};
|
||||
}
|
||||
|
||||
private async getPool(): Promise<KtxPostgresPool> {
|
||||
if (!this.pool) {
|
||||
let config = { ...this.poolConfig };
|
||||
if (this.endpointResolver) {
|
||||
const endpoint = await this.endpointResolver.resolve({
|
||||
host: config.host ?? this.connection.host ?? 'localhost',
|
||||
port: config.port ?? numberValue(this.connection.port) ?? 5432,
|
||||
connection: this.connection,
|
||||
});
|
||||
this.resolvedEndpoint = endpoint;
|
||||
config = { ...config, host: endpoint.host, port: endpoint.port };
|
||||
}
|
||||
this.pool = this.poolFactory.createPool(config);
|
||||
this.pool.on?.('error', (error) => {
|
||||
this.lastIdlePoolError = error;
|
||||
});
|
||||
}
|
||||
return this.pool;
|
||||
}
|
||||
|
||||
private async queryRaw<T>(sql: string, params?: unknown[]): Promise<T[]> {
|
||||
this.throwIdlePoolErrorIfPresent();
|
||||
const pool = await this.getPool();
|
||||
const client = await pool.connect();
|
||||
try {
|
||||
const result = await client.query(sql, params);
|
||||
return result.rows as T[];
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
private async query(sql: string, params?: Record<string, unknown> | unknown[]): Promise<KtxQueryResult> {
|
||||
this.throwIdlePoolErrorIfPresent();
|
||||
const pool = await this.getPool();
|
||||
const client = await pool.connect();
|
||||
try {
|
||||
const result = await client.query(assertReadOnlySql(sql), Array.isArray(params) ? params : undefined);
|
||||
return {
|
||||
headers: (result.fields ?? []).map((field) => field.name),
|
||||
headerTypes: (result.fields ?? []).map((field) => PG_OID_TYPE_MAP[field.dataTypeID] ?? `oid:${field.dataTypeID}`),
|
||||
rows: queryRows(result),
|
||||
totalRows: result.rows.length,
|
||||
rowCount: result.rows.length,
|
||||
};
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`PostgreSQL connector ${this.connectionId} cannot run scan for ${connectionId}`);
|
||||
}
|
||||
}
|
||||
|
||||
private throwIdlePoolErrorIfPresent(): void {
|
||||
if (!this.lastIdlePoolError) {
|
||||
return;
|
||||
}
|
||||
const error = this.lastIdlePoolError;
|
||||
this.lastIdlePoolError = null;
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
52
packages/cli/src/connectors/postgres/dialect.test.ts
Normal file
52
packages/cli/src/connectors/postgres/dialect.test.ts
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KtxPostgresDialect } from './dialect.js';
|
||||
|
||||
describe('KtxPostgresDialect', () => {
|
||||
const dialect = new KtxPostgresDialect();
|
||||
|
||||
it('quotes identifiers and formats schema-qualified tables', () => {
|
||||
expect(dialect.quoteIdentifier('order"items')).toBe('"order""items"');
|
||||
expect(dialect.formatTableName({ catalog: null, db: 'public', name: 'orders' })).toBe('"public"."orders"');
|
||||
expect(dialect.formatTableName({ catalog: null, db: null, name: 'orders' })).toBe('"orders"');
|
||||
});
|
||||
|
||||
it('maps native PostgreSQL types to KTX dimension types', () => {
|
||||
expect(dialect.mapToDimensionType('timestamp with time zone')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('numeric(12,2)')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('uuid')).toBe('string');
|
||||
expect(dialect.mapToDimensionType('boolean')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('jsonb')).toBe('string');
|
||||
});
|
||||
|
||||
it('generates sample, distinct-value, statistics, and time SQL', () => {
|
||||
expect(dialect.generateSampleQuery('"public"."orders"', 5, ['id', 'status'])).toBe(
|
||||
'SELECT "id", "status" FROM "public"."orders" LIMIT 5',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('"public"."orders"', 'status', 10)).toContain(
|
||||
'TRIM(CAST("status" AS TEXT)) != \'\'',
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('"public"."orders"', '"status"', 20)).toContain(
|
||||
'SELECT DISTINCT "status"::text AS val',
|
||||
);
|
||||
expect(dialect.generateColumnStatisticsQuery('public', 'orders')).toContain('FROM pg_stats s');
|
||||
expect(dialect.getTimeTruncExpression('"created_at"', 'month')).toBe('DATE_TRUNC(\'month\', "created_at")');
|
||||
});
|
||||
|
||||
it('prepares named parameters with PostgreSQL positional parameters', () => {
|
||||
expect(
|
||||
dialect.prepareQuery('select * from orders where id = :id and status = :status', { id: 1, status: 'paid' }),
|
||||
).toEqual({
|
||||
sql: 'select * from orders where id = $1 and status = $2',
|
||||
params: [1, 'paid'],
|
||||
});
|
||||
expect(
|
||||
dialect.prepareQuery('select :Client_Name_10, :Client_Name_1', {
|
||||
Client_Name_1: 'short',
|
||||
Client_Name_10: 'long',
|
||||
}),
|
||||
).toEqual({
|
||||
sql: 'select $2, $1',
|
||||
params: ['short', 'long'],
|
||||
});
|
||||
});
|
||||
});
|
||||
213
packages/cli/src/connectors/postgres/dialect.ts
Normal file
213
packages/cli/src/connectors/postgres/dialect.ts
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
|
||||
|
||||
type PostgresTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxPostgresDialect {
|
||||
readonly type = 'postgresql';
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
timestamp: 'time',
|
||||
'timestamp without time zone': 'time',
|
||||
'timestamp with time zone': 'time',
|
||||
timestamptz: 'time',
|
||||
datetime: 'time',
|
||||
date: 'time',
|
||||
time: 'time',
|
||||
integer: 'number',
|
||||
int: 'number',
|
||||
int2: 'number',
|
||||
int4: 'number',
|
||||
int8: 'number',
|
||||
bigint: 'number',
|
||||
smallint: 'number',
|
||||
decimal: 'number',
|
||||
numeric: 'number',
|
||||
float: 'number',
|
||||
float4: 'number',
|
||||
float8: 'number',
|
||||
'double precision': 'number',
|
||||
real: 'number',
|
||||
money: 'number',
|
||||
text: 'string',
|
||||
varchar: 'string',
|
||||
'character varying': 'string',
|
||||
char: 'string',
|
||||
character: 'string',
|
||||
uuid: 'string',
|
||||
json: 'string',
|
||||
jsonb: 'string',
|
||||
boolean: 'boolean',
|
||||
bool: 'boolean',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `"${identifier.replace(/"/g, '""')}"`;
|
||||
}
|
||||
|
||||
formatTableName(table: PostgresTableNameRef): string {
|
||||
return table.db
|
||||
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
|
||||
: this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
return nativeType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
const lower = nativeType.toLowerCase().trim();
|
||||
const normalized = lower.includes('(') ? lower.split('(')[0]!.trim() : lower;
|
||||
if (this.typeMappings[normalized]) {
|
||||
return this.typeMappings[normalized];
|
||||
}
|
||||
if (normalized.includes('time') || normalized.includes('date')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalized.includes('int') ||
|
||||
normalized.includes('num') ||
|
||||
normalized.includes('dec') ||
|
||||
normalized.includes('float') ||
|
||||
normalized.includes('double')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalized.includes('bool')) {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT ${columnList} FROM ${tableName} LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quotedColumn = this.quoteIdentifier(columnName);
|
||||
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS TEXT)) != '' LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown[] } {
|
||||
if (!params) {
|
||||
return { sql, params: undefined };
|
||||
}
|
||||
const paramNames = Object.keys(params);
|
||||
const values: unknown[] = new Array(paramNames.length);
|
||||
const paramIndexMap = new Map<string, number>();
|
||||
paramNames.forEach((name, index) => {
|
||||
paramIndexMap.set(name, index + 1);
|
||||
values[index] = params[name];
|
||||
});
|
||||
const sortedKeys = [...paramNames].sort((a, b) => b.length - a.length);
|
||||
let parameterizedQuery = sql;
|
||||
for (const name of sortedKeys) {
|
||||
parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${name}\\b`, 'g'), `$${paramIndexMap.get(name)}`);
|
||||
}
|
||||
return { sql: parameterizedQuery, params: values };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `RANDOM() < ${samplePct}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `TABLESAMPLE SYSTEM (${samplePct * 100})`;
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `COUNT(*) FILTER (WHERE ${column} IS NULL)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `COUNT(DISTINCT ${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT DISTINCT ${columnName}::text AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY val
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(schemaName: string, tableName: string): string | null {
|
||||
return `
|
||||
SELECT
|
||||
s.attname AS column_name,
|
||||
CASE
|
||||
WHEN s.n_distinct > 0 THEN s.n_distinct::bigint
|
||||
WHEN s.n_distinct < 0 THEN (-s.n_distinct * c.reltuples)::bigint
|
||||
ELSE NULL
|
||||
END AS estimated_cardinality
|
||||
FROM pg_stats s
|
||||
JOIN pg_class c ON c.relname = s.tablename
|
||||
JOIN pg_namespace n ON c.relnamespace = n.oid AND n.nspname = s.schemaname
|
||||
WHERE s.schemaname = '${schemaName.replace(/'/g, "''")}'
|
||||
AND s.tablename = '${tableName.replace(/'/g, "''")}'
|
||||
AND s.n_distinct IS NOT NULL
|
||||
`;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY RANDOM()
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
timezone?: string,
|
||||
): string {
|
||||
const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column;
|
||||
return `DATE_TRUNC('${granularity}', ${col})`;
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
|
||||
const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column;
|
||||
const originExpr = origin ? `TIMESTAMP '${origin.replace(/'/g, "''")}'` : "TIMESTAMP '1970-01-01'";
|
||||
return `${originExpr} + FLOOR(EXTRACT(EPOCH FROM (${col} - ${originExpr})) / EXTRACT(EPOCH FROM INTERVAL '${interval.replace(/'/g, "''")}')) * INTERVAL '${interval.replace(/'/g, "''")}'`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
return `INTERVAL '${interval.replace(/'/g, "''")}'`;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { KtxPostgresHistoricSqlQueryClient } from './historic-sql-query-client.js';
|
||||
import type { KtxPostgresPoolConfig, KtxPostgresPoolFactory } from './connector.js';
|
||||
|
||||
describe('KtxPostgresHistoricSqlQueryClient', () => {
|
||||
it('executes parameterized read-only SQL through the native Postgres connector pool', async () => {
|
||||
const queryCalls: Array<{ sql: string; params?: unknown[] }> = [];
|
||||
const release = vi.fn();
|
||||
const end = vi.fn(async () => {});
|
||||
const poolFactory: KtxPostgresPoolFactory = {
|
||||
createPool(_config: KtxPostgresPoolConfig) {
|
||||
return {
|
||||
async connect() {
|
||||
return {
|
||||
async query(sql: string, params?: unknown[]) {
|
||||
queryCalls.push({ sql, params });
|
||||
return {
|
||||
fields: [{ name: 'answer', dataTypeID: 23 }],
|
||||
rows: [{ answer: 42 }],
|
||||
};
|
||||
},
|
||||
release,
|
||||
};
|
||||
},
|
||||
end,
|
||||
};
|
||||
},
|
||||
};
|
||||
const client = new KtxPostgresHistoricSqlQueryClient({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'postgres',
|
||||
url: 'postgresql://readonly:secret@pg.example.test/warehouse', // pragma: allowlist secret
|
||||
},
|
||||
poolFactory,
|
||||
});
|
||||
|
||||
await expect(client.executeQuery('SELECT $1::int AS answer', [42])).resolves.toEqual({
|
||||
headers: ['answer'],
|
||||
rows: [[42]],
|
||||
totalRows: 1,
|
||||
});
|
||||
expect(queryCalls).toEqual([{ sql: 'SELECT $1::int AS answer', params: [42] }]);
|
||||
|
||||
await client.cleanup();
|
||||
expect(release).toHaveBeenCalledTimes(1);
|
||||
expect(end).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
import type { KtxPostgresQueryClient } from '../../context/ingest/adapters/historic-sql/types.js';
|
||||
import { KtxPostgresScanConnector, type KtxPostgresScanConnectorOptions } from './connector.js';
|
||||
|
||||
export type KtxPostgresHistoricSqlQueryClientOptions = KtxPostgresScanConnectorOptions;
|
||||
|
||||
export class KtxPostgresHistoricSqlQueryClient implements KtxPostgresQueryClient {
|
||||
private readonly connectionId: string;
|
||||
private readonly connector: KtxPostgresScanConnector;
|
||||
|
||||
constructor(options: KtxPostgresHistoricSqlQueryClientOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.connector = new KtxPostgresScanConnector(options);
|
||||
}
|
||||
|
||||
async executeQuery(
|
||||
sql: string,
|
||||
params?: unknown[],
|
||||
): Promise<{ headers: string[]; rows: unknown[][]; totalRows: number }> {
|
||||
const result = await this.connector.executeReadOnly(
|
||||
{
|
||||
connectionId: this.connectionId,
|
||||
sql,
|
||||
params,
|
||||
},
|
||||
{} as never,
|
||||
);
|
||||
return {
|
||||
headers: result.headers,
|
||||
rows: result.rows,
|
||||
totalRows: result.totalRows,
|
||||
};
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
await this.connector.cleanup();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/adapters/live-database/types.js';
|
||||
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
|
||||
import {
|
||||
KtxPostgresScanConnector,
|
||||
type KtxPostgresConnectionConfig,
|
||||
type KtxPostgresEndpointResolver,
|
||||
type KtxPostgresPoolFactory,
|
||||
} from './connector.js';
|
||||
|
||||
interface CreatePostgresLiveDatabaseIntrospectionOptions {
|
||||
connections: Record<string, KtxProjectConnectionConfig>;
|
||||
poolFactory?: KtxPostgresPoolFactory;
|
||||
endpointResolver?: KtxPostgresEndpointResolver;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createPostgresLiveDatabaseIntrospection(
|
||||
options: CreatePostgresLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KtxPostgresConnectionConfig | undefined;
|
||||
const connector = new KtxPostgresScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
poolFactory: options.poolFactory,
|
||||
endpointResolver: options.endpointResolver,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect({ connectionId, driver: 'postgres' }, { runId: `postgres-${connectionId}` });
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
244
packages/cli/src/connectors/snowflake/connector.test.ts
Normal file
244
packages/cli/src/connectors/snowflake/connector.test.ts
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createSnowflakeLiveDatabaseIntrospection } from '../../connectors/snowflake/live-database-introspection.js';
|
||||
import { isKtxSnowflakeConnectionConfig, KtxSnowflakeScanConnector, snowflakeConnectionConfigFromConfig, type KtxSnowflakeDriver, type KtxSnowflakeDriverFactory } from '../../connectors/snowflake/connector.js';
|
||||
|
||||
function fakeDriverFactory(): KtxSnowflakeDriverFactory {
|
||||
const driver: KtxSnowflakeDriver = {
|
||||
test: vi.fn(async () => ({ success: true })),
|
||||
query: vi.fn(async (sql: string) => {
|
||||
if (sql.includes('TABLE_CONSTRAINTS')) {
|
||||
return { headers: ['TABLE_NAME', 'COLUMN_NAME'], rows: [['ORDERS', 'ID']], totalRows: 1, rowCount: 1 };
|
||||
}
|
||||
if (sql.includes('SELECT "ID", "STATUS" FROM "ANALYTICS"."PUBLIC"."ORDERS"')) {
|
||||
return {
|
||||
headers: ['ID', 'STATUS'],
|
||||
headerTypes: ['NUMBER', 'VARCHAR'],
|
||||
rows: [[1, 'paid']],
|
||||
totalRows: 1,
|
||||
rowCount: 1,
|
||||
};
|
||||
}
|
||||
if (sql.includes('select * from (select ID, STATUS from ORDERS) as ktx_query_result limit 1')) {
|
||||
return { headers: ['ID', 'STATUS'], rows: [[1, 'paid']], totalRows: 1, rowCount: 1 };
|
||||
}
|
||||
if (sql.includes('SELECT "STATUS" FROM "ANALYTICS"."PUBLIC"."ORDERS"')) {
|
||||
return { headers: ['STATUS'], rows: [['paid'], ['open']], totalRows: 2, rowCount: 2 };
|
||||
}
|
||||
if (sql.includes('COUNT(DISTINCT val)')) {
|
||||
return { headers: ['CARDINALITY'], rows: [[2]], totalRows: 1, rowCount: 1 };
|
||||
}
|
||||
if (sql.includes('SELECT DISTINCT "STATUS"::VARCHAR AS val')) {
|
||||
return { headers: ['VAL'], rows: [['open'], ['paid']], totalRows: 2, rowCount: 2 };
|
||||
}
|
||||
throw new Error(`Unexpected SQL: ${sql}`);
|
||||
}),
|
||||
getSchemaMetadata: vi.fn(async () => [
|
||||
{
|
||||
name: 'ORDERS',
|
||||
catalog: 'ANALYTICS',
|
||||
db: 'PUBLIC',
|
||||
rowCount: 12,
|
||||
comment: 'Orders',
|
||||
columns: [
|
||||
{ name: 'ID', type: 'NUMBER(38,0)', nullable: false, comment: 'Primary key' },
|
||||
{ name: 'STATUS', type: 'VARCHAR', nullable: true, comment: null },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'ORDER_SUMMARY',
|
||||
catalog: 'ANALYTICS',
|
||||
db: 'PUBLIC',
|
||||
rowCount: 3,
|
||||
comment: null,
|
||||
columns: [{ name: 'STATUS', type: 'VARCHAR', nullable: true, comment: null }],
|
||||
},
|
||||
]),
|
||||
listSchemas: vi.fn(async () => ['PUBLIC', 'MART']),
|
||||
listTables: vi.fn(async () => [
|
||||
{ schema: 'PUBLIC', name: 'ORDERS', kind: 'table' as const },
|
||||
{ schema: 'PUBLIC', name: 'ORDER_SUMMARY', kind: 'view' as const },
|
||||
]),
|
||||
cleanup: vi.fn(async () => undefined),
|
||||
};
|
||||
return { createDriver: vi.fn(() => driver) };
|
||||
}
|
||||
|
||||
describe('KtxSnowflakeScanConnector', () => {
|
||||
it('resolves Snowflake connection configuration safely', () => {
|
||||
expect(
|
||||
isKtxSnowflakeConnectionConfig({
|
||||
driver: 'snowflake',
|
||||
account: 'acct',
|
||||
warehouse: 'WH',
|
||||
database: 'ANALYTICS',
|
||||
username: 'reader',
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(isKtxSnowflakeConnectionConfig({ driver: 'bigquery' })).toBe(false);
|
||||
expect(
|
||||
snowflakeConnectionConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'snowflake',
|
||||
authMethod: 'password',
|
||||
account: 'acct',
|
||||
warehouse: 'WH',
|
||||
database: 'ANALYTICS',
|
||||
schema_name: 'PUBLIC',
|
||||
username: 'reader',
|
||||
password: 'fixture-pass', // pragma: allowlist secret
|
||||
},
|
||||
}),
|
||||
).toMatchObject({
|
||||
account: 'acct',
|
||||
warehouse: 'WH',
|
||||
database: 'ANALYTICS',
|
||||
schemas: ['PUBLIC'],
|
||||
username: 'reader',
|
||||
authMethod: 'password',
|
||||
});
|
||||
});
|
||||
|
||||
it('introspects schema, primary keys, comments, row counts, and dimensions', async () => {
|
||||
const connector = new KtxSnowflakeScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'snowflake',
|
||||
authMethod: 'password',
|
||||
account: 'acct',
|
||||
warehouse: 'WH',
|
||||
database: 'ANALYTICS',
|
||||
schema_name: 'PUBLIC',
|
||||
username: 'reader',
|
||||
password: 'fixture-pass', // pragma: allowlist secret
|
||||
},
|
||||
driverFactory: fakeDriverFactory(),
|
||||
now: () => new Date('2026-04-29T18:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'snowflake' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'snowflake',
|
||||
extractedAt: '2026-04-29T18:00:00.000Z',
|
||||
scope: { catalogs: ['ANALYTICS'], schemas: ['PUBLIC'] },
|
||||
metadata: {
|
||||
account: 'acct',
|
||||
warehouse: 'WH',
|
||||
database: 'ANALYTICS',
|
||||
schemas: ['PUBLIC'],
|
||||
table_count: 2,
|
||||
total_columns: 3,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'ORDERS')?.columns).toEqual([
|
||||
{
|
||||
name: 'ID',
|
||||
nativeType: 'NUMBER(38,0)',
|
||||
normalizedType: 'NUMBER(38,0)',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'Primary key',
|
||||
},
|
||||
{
|
||||
name: 'STATUS',
|
||||
nativeType: 'VARCHAR',
|
||||
normalizedType: 'VARCHAR',
|
||||
dimensionType: 'string',
|
||||
nullable: true,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('supports read-only query, sampling, distinct values, row counts, schema listing, and cleanup', async () => {
|
||||
const driverFactory = fakeDriverFactory();
|
||||
const connector = new KtxSnowflakeScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'snowflake',
|
||||
authMethod: 'password',
|
||||
account: 'acct',
|
||||
warehouse: 'WH',
|
||||
database: 'ANALYTICS',
|
||||
schema_name: 'PUBLIC',
|
||||
username: 'reader',
|
||||
password: 'fixture-pass', // pragma: allowlist secret
|
||||
},
|
||||
driverFactory,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
table: { catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' },
|
||||
limit: 1,
|
||||
columns: ['ID', 'STATUS'],
|
||||
},
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ headers: ['ID', 'STATUS'], rows: [[1, 'paid']], totalRows: 1 });
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select ID, STATUS from ORDERS', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ headers: ['ID', 'STATUS'], rows: [[1, 'paid']], rowCount: 1 });
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
table: { catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' },
|
||||
column: 'STATUS',
|
||||
limit: 2,
|
||||
},
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
|
||||
await expect(
|
||||
connector.getColumnDistinctValues({ catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' }, 'STATUS', {
|
||||
maxCardinality: 10,
|
||||
limit: 5,
|
||||
}),
|
||||
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
|
||||
await expect(connector.getTableRowCount('ORDERS')).resolves.toBe(12);
|
||||
await expect(connector.listSchemas()).resolves.toEqual(['PUBLIC', 'MART']);
|
||||
await connector.cleanup();
|
||||
const driver = (driverFactory.createDriver as ReturnType<typeof vi.fn>).mock.results[0]?.value as KtxSnowflakeDriver;
|
||||
expect(driver.cleanup).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('converts a native snapshot into a live-database introspection snapshot', async () => {
|
||||
const introspection = createSnowflakeLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'snowflake',
|
||||
authMethod: 'password',
|
||||
account: 'acct',
|
||||
warehouse: 'WH',
|
||||
database: 'ANALYTICS',
|
||||
schema_name: 'PUBLIC',
|
||||
username: 'reader',
|
||||
password: 'fixture-pass', // pragma: allowlist secret
|
||||
},
|
||||
},
|
||||
driverFactory: fakeDriverFactory(),
|
||||
now: () => new Date('2026-04-29T18:00:00.000Z'),
|
||||
});
|
||||
|
||||
await expect(introspection.extractSchema('warehouse')).resolves.toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
metadata: { database: 'ANALYTICS', schemas: ['PUBLIC'] },
|
||||
tables: expect.arrayContaining([
|
||||
expect.objectContaining({ catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' }),
|
||||
]),
|
||||
});
|
||||
});
|
||||
});
|
||||
700
packages/cli/src/connectors/snowflake/connector.ts
Normal file
700
packages/cli/src/connectors/snowflake/connector.ts
Normal file
|
|
@ -0,0 +1,700 @@
|
|||
import { createPrivateKey } from 'node:crypto';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
|
||||
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableListEntry, type KtxTableSampleResult } from '../../context/scan/types.js';
|
||||
import * as snowflake from 'snowflake-sdk';
|
||||
import { KtxSnowflakeDialect } from './dialect.js';
|
||||
|
||||
export interface KtxSnowflakeConnectionConfig {
|
||||
driver?: string;
|
||||
authMethod?: 'password' | 'rsa';
|
||||
account?: string;
|
||||
warehouse?: string;
|
||||
database?: string;
|
||||
schema_name?: string;
|
||||
schema_names?: string[];
|
||||
username?: string;
|
||||
password?: string;
|
||||
privateKey?: string;
|
||||
passphrase?: string;
|
||||
role?: string;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeResolvedConnectionConfig {
|
||||
authMethod: 'password' | 'rsa';
|
||||
account: string;
|
||||
warehouse: string;
|
||||
database: string;
|
||||
schemas: string[];
|
||||
username: string;
|
||||
password?: string;
|
||||
privateKey?: string;
|
||||
passphrase?: string;
|
||||
role?: string;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeRawColumnMetadata {
|
||||
name: string;
|
||||
type: string;
|
||||
nullable: boolean;
|
||||
comment: string | null;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeRawTableMetadata {
|
||||
name: string;
|
||||
catalog: string;
|
||||
db: string;
|
||||
rowCount: number | null;
|
||||
comment: string | null;
|
||||
columns: KtxSnowflakeRawColumnMetadata[];
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeDriver {
|
||||
test(): Promise<{ success: boolean; error?: string }>;
|
||||
query(sql: string, params?: unknown): Promise<KtxQueryResult>;
|
||||
getSchemaMetadata(schemaName?: string): Promise<KtxSnowflakeRawTableMetadata[]>;
|
||||
listSchemas(): Promise<string[]>;
|
||||
listTables(schemas?: string[]): Promise<KtxTableListEntry[]>;
|
||||
cleanup(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeDriverFactory {
|
||||
createDriver(input: {
|
||||
resolved: KtxSnowflakeResolvedConnectionConfig;
|
||||
sdkOptionsProvider?: KtxSnowflakeSdkOptionsProvider;
|
||||
}): KtxSnowflakeDriver;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeSdkOptionsProvider {
|
||||
resolve(input: {
|
||||
account: string;
|
||||
connection: KtxSnowflakeConnectionConfig;
|
||||
}): Promise<{ sdkOptions: Record<string, unknown>; close?: () => Promise<void> } | undefined>;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeScanConnectorOptions {
|
||||
connectionId: string;
|
||||
connection: KtxSnowflakeConnectionConfig | undefined;
|
||||
driverFactory?: KtxSnowflakeDriverFactory;
|
||||
sdkOptionsProvider?: KtxSnowflakeSdkOptionsProvider;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeReadOnlyQueryInput extends KtxReadOnlyQueryInput {
|
||||
params?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KtxSnowflakeColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
const DATE_TYPES = ['DATE', 'TIMESTAMP', 'TIMESTAMP_LTZ', 'TIMESTAMP_NTZ', 'TIMESTAMP_TZ', 'TIME'];
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxSnowflakeConnectionConfig | undefined,
|
||||
key: keyof KtxSnowflakeConnectionConfig,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function schemaNames(connection: KtxSnowflakeConnectionConfig, env: NodeJS.ProcessEnv): string[] {
|
||||
if (Array.isArray(connection.schema_names) && connection.schema_names.length > 0) {
|
||||
return connection.schema_names
|
||||
.filter((schema) => schema.trim().length > 0)
|
||||
.map((schema) => resolveStringReference(schema, env));
|
||||
}
|
||||
return [stringConfigValue(connection, 'schema_name', env) ?? 'PUBLIC'];
|
||||
}
|
||||
|
||||
function firstNumber(value: unknown): number | null {
|
||||
const numberValue = Number(value);
|
||||
return Number.isFinite(numberValue) ? numberValue : null;
|
||||
}
|
||||
|
||||
function normalizeSnowflakeValue(value: unknown, columnType?: string): unknown {
|
||||
if (columnType && DATE_TYPES.some((type) => columnType.toUpperCase().includes(type))) {
|
||||
if (typeof value === 'number') {
|
||||
return new Date(value).toISOString();
|
||||
}
|
||||
if (value instanceof Date) {
|
||||
return value.toISOString();
|
||||
}
|
||||
}
|
||||
if (typeof value === 'string') {
|
||||
const trimmed = value.trim();
|
||||
if ((trimmed.startsWith('{') && trimmed.endsWith('}')) || (trimmed.startsWith('[') && trimmed.endsWith(']'))) {
|
||||
try {
|
||||
return JSON.parse(trimmed) as unknown;
|
||||
} catch {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function toSnowflakeBind(value: unknown): snowflake.Bind {
|
||||
if (value === null || typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
|
||||
return value;
|
||||
}
|
||||
if (value instanceof Date) {
|
||||
return value.toISOString();
|
||||
}
|
||||
return String(value);
|
||||
}
|
||||
|
||||
function toSnowflakeBinds(params: unknown[] | undefined): snowflake.Binds | undefined {
|
||||
return params?.map((value) => toSnowflakeBind(value));
|
||||
}
|
||||
|
||||
export function isKtxSnowflakeConnectionConfig(
|
||||
connection: KtxSnowflakeConnectionConfig | undefined,
|
||||
): connection is KtxSnowflakeConnectionConfig {
|
||||
return String(connection?.driver ?? '').toLowerCase() === 'snowflake';
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function snowflakeConnectionConfigFromConfig(input: {
|
||||
connectionId: string;
|
||||
connection: KtxSnowflakeConnectionConfig | undefined;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): KtxSnowflakeResolvedConnectionConfig {
|
||||
const inputDriver = input.connection?.driver ?? 'unknown';
|
||||
if (!isKtxSnowflakeConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native Snowflake connector cannot run driver "${inputDriver}"`);
|
||||
}
|
||||
const env = input.env ?? process.env;
|
||||
const authMethod = input.connection?.authMethod ?? 'password';
|
||||
const account = stringConfigValue(input.connection, 'account', env);
|
||||
const warehouse = stringConfigValue(input.connection, 'warehouse', env);
|
||||
const database = stringConfigValue(input.connection, 'database', env);
|
||||
const username = stringConfigValue(input.connection, 'username', env);
|
||||
if (!account) {
|
||||
throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.account`);
|
||||
}
|
||||
if (!warehouse) {
|
||||
throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.warehouse`);
|
||||
}
|
||||
if (!database) {
|
||||
throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.database`);
|
||||
}
|
||||
if (!username) {
|
||||
throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.username`);
|
||||
}
|
||||
const resolved: KtxSnowflakeResolvedConnectionConfig = {
|
||||
authMethod,
|
||||
account,
|
||||
warehouse,
|
||||
database,
|
||||
schemas: schemaNames(input.connection!, env),
|
||||
username,
|
||||
};
|
||||
const role = stringConfigValue(input.connection, 'role', env);
|
||||
if (role) {
|
||||
resolved.role = role;
|
||||
}
|
||||
if (authMethod === 'rsa') {
|
||||
resolved.privateKey = stringConfigValue(input.connection, 'privateKey', env);
|
||||
const passphrase = stringConfigValue(input.connection, 'passphrase', env);
|
||||
if (passphrase) {
|
||||
resolved.passphrase = passphrase;
|
||||
}
|
||||
if (!resolved.privateKey) {
|
||||
throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.privateKey for RSA auth`);
|
||||
}
|
||||
} else {
|
||||
resolved.password = stringConfigValue(input.connection, 'password', env);
|
||||
if (!resolved.password) {
|
||||
throw new Error(`Native Snowflake connector requires connections.${input.connectionId}.password`);
|
||||
}
|
||||
}
|
||||
return resolved;
|
||||
}
|
||||
|
||||
class DefaultSnowflakeDriverFactory implements KtxSnowflakeDriverFactory {
|
||||
createDriver(input: {
|
||||
resolved: KtxSnowflakeResolvedConnectionConfig;
|
||||
sdkOptionsProvider?: KtxSnowflakeSdkOptionsProvider;
|
||||
}): KtxSnowflakeDriver {
|
||||
return new SnowflakeSdkDriver(input.resolved, input.sdkOptionsProvider);
|
||||
}
|
||||
}
|
||||
|
||||
class SnowflakeSdkDriver implements KtxSnowflakeDriver {
|
||||
private closeSdkOptions: Array<() => Promise<void>> = [];
|
||||
|
||||
constructor(
|
||||
private readonly resolved: KtxSnowflakeResolvedConnectionConfig,
|
||||
private readonly sdkOptionsProvider?: KtxSnowflakeSdkOptionsProvider,
|
||||
) {}
|
||||
|
||||
async test(): Promise<{ success: boolean; error?: string }> {
|
||||
const timeoutMs = 60_000;
|
||||
return Promise.race([
|
||||
this.runTest(),
|
||||
new Promise<{ success: boolean; error: string }>((resolveTest) =>
|
||||
setTimeout(
|
||||
() => resolveTest({ success: false, error: `Connection test timed out after ${timeoutMs / 1000}s` }),
|
||||
timeoutMs,
|
||||
),
|
||||
),
|
||||
]);
|
||||
}
|
||||
|
||||
async query(sql: string, params?: unknown): Promise<KtxQueryResult> {
|
||||
let connection: snowflake.Connection | null = null;
|
||||
try {
|
||||
connection = await this.createConnection();
|
||||
const binds = Array.isArray(params) ? toSnowflakeBinds(params) : undefined;
|
||||
const result = await this.executeSnowflakeQuery(connection, sql, binds);
|
||||
return { ...result, totalRows: result.rows.length, rowCount: result.rows.length };
|
||||
} finally {
|
||||
if (connection) {
|
||||
await this.destroyConnection(connection);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async getSchemaMetadata(schemaName = this.resolved.schemas[0] ?? 'PUBLIC'): Promise<KtxSnowflakeRawTableMetadata[]> {
|
||||
const tablesResult = await this.query(
|
||||
`
|
||||
SELECT TABLE_NAME, TABLE_TYPE, COMMENT, ROW_COUNT
|
||||
FROM INFORMATION_SCHEMA.TABLES
|
||||
WHERE TABLE_SCHEMA = ? AND TABLE_CATALOG = ?
|
||||
ORDER BY TABLE_NAME
|
||||
`,
|
||||
[schemaName, this.resolved.database],
|
||||
);
|
||||
const columnsResult = await this.query(
|
||||
`
|
||||
SELECT TABLE_NAME, COLUMN_NAME, DATA_TYPE, IS_NULLABLE, COMMENT, ORDINAL_POSITION
|
||||
FROM INFORMATION_SCHEMA.COLUMNS
|
||||
WHERE TABLE_SCHEMA = ? AND TABLE_CATALOG = ?
|
||||
ORDER BY TABLE_NAME, ORDINAL_POSITION
|
||||
`,
|
||||
[schemaName, this.resolved.database],
|
||||
);
|
||||
const columnsByTable = new Map<string, KtxSnowflakeRawColumnMetadata[]>();
|
||||
for (const row of columnsResult.rows) {
|
||||
const tableName = String(row[0]);
|
||||
const columns = columnsByTable.get(tableName) ?? [];
|
||||
columns.push({
|
||||
name: String(row[1]),
|
||||
type: String(row[2]),
|
||||
nullable: row[3] === 'YES',
|
||||
comment: row[4] ? String(row[4]) : null,
|
||||
});
|
||||
columnsByTable.set(tableName, columns);
|
||||
}
|
||||
return tablesResult.rows.map((row) => ({
|
||||
name: String(row[0]),
|
||||
catalog: this.resolved.database,
|
||||
db: schemaName,
|
||||
rowCount: firstNumber(row[3]) ?? 0,
|
||||
comment: row[2] ? String(row[2]) : null,
|
||||
columns: columnsByTable.get(String(row[0])) ?? [],
|
||||
}));
|
||||
}
|
||||
|
||||
async listSchemas(): Promise<string[]> {
|
||||
const result = await this.query(`SHOW SCHEMAS IN DATABASE "${this.resolved.database}"`);
|
||||
return result.rows.map((row) => String(row[1])).filter((name) => name !== 'INFORMATION_SCHEMA');
|
||||
}
|
||||
|
||||
async listTables(schemas?: string[]): Promise<KtxTableListEntry[]> {
|
||||
const filterSchemas = schemas ?? (await this.listSchemas());
|
||||
if (filterSchemas.length === 0) return [];
|
||||
const entries: KtxTableListEntry[] = [];
|
||||
for (const schemaName of filterSchemas) {
|
||||
const result = await this.query(
|
||||
`
|
||||
SELECT TABLE_NAME, TABLE_TYPE
|
||||
FROM INFORMATION_SCHEMA.TABLES
|
||||
WHERE TABLE_SCHEMA = ? AND TABLE_CATALOG = ?
|
||||
ORDER BY TABLE_NAME
|
||||
`,
|
||||
[schemaName, this.resolved.database],
|
||||
);
|
||||
for (const row of result.rows) {
|
||||
entries.push({
|
||||
schema: schemaName,
|
||||
name: String(row[0]),
|
||||
kind: String(row[1]) === 'VIEW' ? 'view' : 'table',
|
||||
});
|
||||
}
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
const closers = this.closeSdkOptions;
|
||||
this.closeSdkOptions = [];
|
||||
await Promise.all(closers.map((close) => close()));
|
||||
}
|
||||
|
||||
private async runTest(): Promise<{ success: boolean; error?: string }> {
|
||||
let connection: snowflake.Connection | null = null;
|
||||
try {
|
||||
connection = await this.createConnection();
|
||||
await this.executeSnowflakeQuery(connection, 'SELECT 1');
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
} finally {
|
||||
if (connection) {
|
||||
await this.destroyConnection(connection);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async createConnection(): Promise<snowflake.Connection> {
|
||||
const patch = await this.sdkOptionsProvider?.resolve({
|
||||
account: this.resolved.account,
|
||||
connection: { ...this.resolved, driver: 'snowflake' },
|
||||
});
|
||||
if (patch?.close) {
|
||||
this.closeSdkOptions.push(patch.close);
|
||||
}
|
||||
const baseConfig: snowflake.ConnectionOptions = {
|
||||
account: this.resolved.account,
|
||||
username: this.resolved.username,
|
||||
warehouse: this.resolved.warehouse,
|
||||
database: this.resolved.database,
|
||||
schema: this.resolved.schemas[0] ?? 'PUBLIC',
|
||||
role: this.resolved.role,
|
||||
...patch?.sdkOptions,
|
||||
};
|
||||
const connectionConfig: snowflake.ConnectionOptions =
|
||||
this.resolved.authMethod === 'rsa'
|
||||
? { ...baseConfig, authenticator: 'SNOWFLAKE_JWT', privateKey: this.decryptPrivateKey() }
|
||||
: { ...baseConfig, password: this.resolved.password };
|
||||
const connection = snowflake.createConnection(connectionConfig);
|
||||
return new Promise((resolveConnection, rejectConnection) => {
|
||||
connection.connect((error, connected) => {
|
||||
if (error) {
|
||||
rejectConnection(error);
|
||||
return;
|
||||
}
|
||||
const resolvedConnection = connected ?? connection;
|
||||
this.setConnectionContext(resolvedConnection).then(
|
||||
() => resolveConnection(resolvedConnection),
|
||||
(contextError) => {
|
||||
resolvedConnection.destroy(() => undefined);
|
||||
rejectConnection(contextError);
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
private async setConnectionContext(connection: snowflake.Connection): Promise<void> {
|
||||
if (this.resolved.role) {
|
||||
await this.executeSnowflakeQuery(connection, `USE ROLE "${this.resolved.role}"`);
|
||||
}
|
||||
await this.executeSnowflakeQuery(connection, `USE WAREHOUSE "${this.resolved.warehouse}"`);
|
||||
await this.executeSnowflakeQuery(connection, `USE DATABASE "${this.resolved.database}"`);
|
||||
await this.executeSnowflakeQuery(connection, `USE SCHEMA "${this.resolved.schemas[0] ?? 'PUBLIC'}"`);
|
||||
}
|
||||
|
||||
private async executeSnowflakeQuery(
|
||||
connection: snowflake.Connection,
|
||||
sqlText: string,
|
||||
binds?: snowflake.Binds,
|
||||
): Promise<{ headers: string[]; headerTypes?: string[]; rows: unknown[][] }> {
|
||||
return new Promise((resolveQuery, rejectQuery) => {
|
||||
connection.execute({
|
||||
sqlText,
|
||||
binds,
|
||||
complete: (error, statement, rows) => {
|
||||
if (error) {
|
||||
rejectQuery(error);
|
||||
return;
|
||||
}
|
||||
const columns = statement.getColumns();
|
||||
const headers = columns ? columns.map((column) => column.getName()) : [];
|
||||
const headerTypes = columns ? columns.map((column) => column.getType()) : [];
|
||||
const normalizedRows = rows
|
||||
? rows.map((row) => headers.map((header, index) => normalizeSnowflakeValue(row[header], headerTypes[index])))
|
||||
: [];
|
||||
resolveQuery({ headers, headerTypes, rows: normalizedRows });
|
||||
},
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
private destroyConnection(connection: snowflake.Connection): Promise<void> {
|
||||
return new Promise((resolveDestroy, rejectDestroy) => {
|
||||
connection.destroy((error) => {
|
||||
if (error) {
|
||||
rejectDestroy(error);
|
||||
return;
|
||||
}
|
||||
resolveDestroy();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
private decryptPrivateKey(): string {
|
||||
if (!this.resolved.privateKey) {
|
||||
throw new Error('Private key is required for RSA authentication');
|
||||
}
|
||||
const privateKeyObject = createPrivateKey({
|
||||
key: this.resolved.privateKey,
|
||||
format: 'pem',
|
||||
...(this.resolved.passphrase ? { passphrase: this.resolved.passphrase } : {}),
|
||||
});
|
||||
return privateKeyObject.export({ format: 'pem', type: 'pkcs8' }) as string;
|
||||
}
|
||||
}
|
||||
|
||||
export class KtxSnowflakeScanConnector implements KtxScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'snowflake' as const;
|
||||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: false,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: true,
|
||||
formalForeignKeys: false,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly resolved: KtxSnowflakeResolvedConnectionConfig;
|
||||
private readonly driverFactory: KtxSnowflakeDriverFactory;
|
||||
private readonly dialect = new KtxSnowflakeDialect();
|
||||
private readonly now: () => Date;
|
||||
private driverInstance: KtxSnowflakeDriver | null = null;
|
||||
|
||||
constructor(private readonly options: KtxSnowflakeScanConnectorOptions) {
|
||||
this.resolved = snowflakeConnectionConfigFromConfig(options);
|
||||
this.driverFactory = options.driverFactory ?? new DefaultSnowflakeDriverFactory();
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.id = `snowflake:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
return this.getDriver().test();
|
||||
}
|
||||
|
||||
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const tables: KtxSchemaTable[] = [];
|
||||
for (const schemaName of this.resolved.schemas) {
|
||||
const rawTables = await this.getDriver().getSchemaMetadata(schemaName);
|
||||
const primaryKeys = await this.primaryKeys(rawTables.map((table) => table.name), schemaName);
|
||||
tables.push(...rawTables.map((table) => this.toSchemaTable(table, primaryKeys)));
|
||||
}
|
||||
return {
|
||||
connectionId: this.options.connectionId,
|
||||
driver: 'snowflake',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: { catalogs: [this.resolved.database], schemas: this.resolved.schemas },
|
||||
metadata: {
|
||||
account: this.resolved.account,
|
||||
warehouse: this.resolved.warehouse,
|
||||
database: this.resolved.database,
|
||||
schemas: this.resolved.schemas,
|
||||
table_count: tables.length,
|
||||
total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxTableSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.getDriver().query(
|
||||
this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns),
|
||||
);
|
||||
return { headers: result.headers, rows: result.rows, totalRows: result.totalRows };
|
||||
}
|
||||
|
||||
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.getDriver().query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
return {
|
||||
values: result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]),
|
||||
nullCount: null,
|
||||
distinctCount: null,
|
||||
};
|
||||
}
|
||||
|
||||
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
return null;
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxSnowflakeReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
|
||||
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
|
||||
return this.getDriver().query(prepared.sql, prepared.params);
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KtxTableRef,
|
||||
columnName: string,
|
||||
options: KtxSnowflakeColumnDistinctValuesOptions,
|
||||
): Promise<KtxSnowflakeColumnDistinctValuesResult | null> {
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinality = await this.singleNumber(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, options.sampleSize ?? 10000),
|
||||
'CARDINALITY',
|
||||
);
|
||||
if (cardinality === null) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valueRows = await this.queryRaw<Record<string, unknown>>(
|
||||
this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit),
|
||||
);
|
||||
return { values: valueRows.map((row) => String(row.VAL ?? row.val)).filter((value) => value !== 'null'), cardinality };
|
||||
}
|
||||
|
||||
async getTableRowCount(tableName: string, schemaName = this.resolved.schemas[0] ?? 'PUBLIC'): Promise<number> {
|
||||
const tables = await this.getDriver().getSchemaMetadata(schemaName);
|
||||
return tables.find((table) => table.name === tableName)?.rowCount ?? 0;
|
||||
}
|
||||
|
||||
qTableName(table: Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
listSchemas(): Promise<string[]> {
|
||||
return this.getDriver().listSchemas();
|
||||
}
|
||||
|
||||
listTables(schemas?: string[]): Promise<KtxTableListEntry[]> {
|
||||
return this.getDriver().listTables(schemas);
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
if (this.driverInstance) {
|
||||
await this.driverInstance.cleanup();
|
||||
this.driverInstance = null;
|
||||
}
|
||||
}
|
||||
|
||||
private getDriver(): KtxSnowflakeDriver {
|
||||
if (!this.driverInstance) {
|
||||
this.driverInstance = this.driverFactory.createDriver({
|
||||
resolved: this.resolved,
|
||||
sdkOptionsProvider: this.options.sdkOptionsProvider,
|
||||
});
|
||||
}
|
||||
return this.driverInstance;
|
||||
}
|
||||
|
||||
private async primaryKeys(tableNames: string[], schemaName: string): Promise<Map<string, Set<string>>> {
|
||||
if (tableNames.length === 0) {
|
||||
return new Map();
|
||||
}
|
||||
const result = await this.getDriver().query(
|
||||
`
|
||||
SELECT tc.TABLE_NAME, kcu.COLUMN_NAME
|
||||
FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
|
||||
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu
|
||||
ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME
|
||||
AND tc.TABLE_SCHEMA = kcu.TABLE_SCHEMA
|
||||
AND tc.TABLE_CATALOG = kcu.TABLE_CATALOG
|
||||
WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY'
|
||||
AND tc.TABLE_SCHEMA = ?
|
||||
AND tc.TABLE_CATALOG = ?
|
||||
ORDER BY tc.TABLE_NAME, kcu.ORDINAL_POSITION
|
||||
`,
|
||||
[schemaName, this.resolved.database],
|
||||
);
|
||||
const grouped = new Map<string, Set<string>>();
|
||||
for (const tableName of tableNames) {
|
||||
grouped.set(tableName, new Set());
|
||||
}
|
||||
for (const row of result.rows) {
|
||||
const tableName = String(row[0]);
|
||||
const columnName = String(row[1]);
|
||||
grouped.get(tableName)?.add(columnName);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
private toSchemaTable(table: KtxSnowflakeRawTableMetadata, primaryKeys: Map<string, Set<string>>): KtxSchemaTable {
|
||||
return {
|
||||
catalog: table.catalog,
|
||||
db: table.db,
|
||||
name: table.name,
|
||||
kind: 'table',
|
||||
comment: table.comment,
|
||||
estimatedRows: table.rowCount,
|
||||
columns: table.columns.map((column) => this.toSchemaColumn(table.name, column, primaryKeys)),
|
||||
foreignKeys: [],
|
||||
};
|
||||
}
|
||||
|
||||
private toSchemaColumn(
|
||||
tableName: string,
|
||||
column: KtxSnowflakeRawColumnMetadata,
|
||||
primaryKeys: Map<string, Set<string>>,
|
||||
): KtxSchemaColumn {
|
||||
return {
|
||||
name: column.name,
|
||||
nativeType: column.type,
|
||||
normalizedType: this.dialect.mapDataType(column.type),
|
||||
dimensionType: this.dialect.mapToDimensionType(column.type),
|
||||
nullable: column.nullable,
|
||||
primaryKey: primaryKeys.get(tableName)?.has(column.name) ?? false,
|
||||
comment: column.comment,
|
||||
};
|
||||
}
|
||||
|
||||
private async queryRaw<T extends Record<string, unknown>>(sql: string, params?: unknown): Promise<T[]> {
|
||||
const result = await this.getDriver().query(sql, params);
|
||||
return result.rows.map((row) => Object.fromEntries(result.headers.map((header, index) => [header, row[index]])) as T);
|
||||
}
|
||||
|
||||
private async singleNumber(sql: string, header: string): Promise<number | null> {
|
||||
const rows = await this.queryRaw<Record<string, unknown>>(sql);
|
||||
return firstNumber(rows[0]?.[header] ?? rows[0]?.[header.toLowerCase()]);
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.options.connectionId) {
|
||||
throw new Error(`Snowflake connector ${this.options.connectionId} cannot scan connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
50
packages/cli/src/connectors/snowflake/dialect.test.ts
Normal file
50
packages/cli/src/connectors/snowflake/dialect.test.ts
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KtxSnowflakeDialect } from './dialect.js';
|
||||
|
||||
describe('KtxSnowflakeDialect', () => {
|
||||
const dialect = new KtxSnowflakeDialect();
|
||||
|
||||
it('quotes identifiers and formats database.schema.table names', () => {
|
||||
expect(dialect.quoteIdentifier('order"items')).toBe('"order""items"');
|
||||
expect(dialect.formatTableName({ catalog: 'ANALYTICS', db: 'PUBLIC', name: 'ORDERS' })).toBe(
|
||||
'"ANALYTICS"."PUBLIC"."ORDERS"',
|
||||
);
|
||||
expect(dialect.formatTableName({ db: 'PUBLIC', name: 'ORDERS' })).toBe('"PUBLIC"."ORDERS"');
|
||||
expect(dialect.formatTableName({ name: 'ORDERS' })).toBe('"ORDERS"');
|
||||
});
|
||||
|
||||
it('maps native Snowflake types to scan dimensions', () => {
|
||||
expect(dialect.mapDataType('NUMBER(38,0)')).toBe('NUMBER(38,0)');
|
||||
expect(dialect.mapToDimensionType('TIMESTAMP_NTZ')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('NUMBER(38,0)')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('BOOLEAN')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('VARIANT')).toBe('string');
|
||||
});
|
||||
|
||||
it('generates sampling and dictionary SQL', () => {
|
||||
expect(dialect.generateSampleQuery('"PUBLIC"."ORDERS"', 5, ['ID', 'STATUS'])).toBe(
|
||||
'SELECT "ID", "STATUS" FROM "PUBLIC"."ORDERS" SAMPLE ROW (5 ROWS)',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('"PUBLIC"."ORDERS"', 'STATUS', 10)).toBe(
|
||||
'SELECT "STATUS" FROM "PUBLIC"."ORDERS" WHERE "STATUS" IS NOT NULL AND TRIM(CAST("STATUS" AS STRING)) != \'\' LIMIT 10',
|
||||
);
|
||||
expect(dialect.generateCardinalitySampleQuery('"PUBLIC"."ORDERS"', '"STATUS"', 100)).toContain(
|
||||
'SELECT COUNT(DISTINCT val) AS cardinality',
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('"PUBLIC"."ORDERS"', '"STATUS"', 20)).toContain(
|
||||
'SELECT DISTINCT "STATUS"::VARCHAR AS val',
|
||||
);
|
||||
});
|
||||
|
||||
it('passes Snowflake positional parameters as bind arrays', () => {
|
||||
expect(dialect.prepareQuery('SELECT * FROM ORDERS WHERE ID = ? AND STATUS = ?', { id: 1, status: 'paid' })).toEqual({
|
||||
sql: 'SELECT * FROM ORDERS WHERE ID = ? AND STATUS = ?',
|
||||
params: [1, 'paid'],
|
||||
});
|
||||
expect(dialect.prepareQuery('SELECT * FROM ORDERS')).toEqual({ sql: 'SELECT * FROM ORDERS', params: undefined });
|
||||
});
|
||||
|
||||
it('keeps unsupported statistics explicit', () => {
|
||||
expect(dialect.generateColumnStatisticsQuery('PUBLIC', 'ORDERS')).toBeNull();
|
||||
});
|
||||
});
|
||||
187
packages/cli/src/connectors/snowflake/dialect.ts
Normal file
187
packages/cli/src/connectors/snowflake/dialect.ts
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
|
||||
|
||||
type SnowflakeTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxSnowflakeDialect {
|
||||
readonly type = 'snowflake';
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
TIMESTAMP_NTZ: 'time',
|
||||
TIMESTAMP_LTZ: 'time',
|
||||
TIMESTAMP_TZ: 'time',
|
||||
TIMESTAMP: 'time',
|
||||
DATE: 'time',
|
||||
TIME: 'time',
|
||||
NUMBER: 'number',
|
||||
DECIMAL: 'number',
|
||||
NUMERIC: 'number',
|
||||
INT: 'number',
|
||||
INTEGER: 'number',
|
||||
BIGINT: 'number',
|
||||
SMALLINT: 'number',
|
||||
TINYINT: 'number',
|
||||
BYTEINT: 'number',
|
||||
FLOAT: 'number',
|
||||
FLOAT4: 'number',
|
||||
FLOAT8: 'number',
|
||||
DOUBLE: 'number',
|
||||
'DOUBLE PRECISION': 'number',
|
||||
REAL: 'number',
|
||||
VARCHAR: 'string',
|
||||
CHAR: 'string',
|
||||
CHARACTER: 'string',
|
||||
STRING: 'string',
|
||||
TEXT: 'string',
|
||||
BINARY: 'string',
|
||||
VARBINARY: 'string',
|
||||
BOOLEAN: 'boolean',
|
||||
VARIANT: 'string',
|
||||
OBJECT: 'string',
|
||||
ARRAY: 'string',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `"${identifier.replace(/"/g, '""')}"`;
|
||||
}
|
||||
|
||||
formatTableName(table: SnowflakeTableNameRef): string {
|
||||
if (table.catalog && table.db) {
|
||||
return `${this.quoteIdentifier(table.catalog)}.${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
|
||||
}
|
||||
if (table.db) {
|
||||
return `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`;
|
||||
}
|
||||
return this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
return nativeType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
const upper = nativeType.toUpperCase().trim();
|
||||
const normalized = upper.includes('(') ? upper.split('(')[0]! : upper;
|
||||
if (this.typeMappings[normalized]) {
|
||||
return this.typeMappings[normalized];
|
||||
}
|
||||
if (normalized.includes('TIME') || normalized.includes('DATE')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalized.includes('INT') ||
|
||||
normalized.includes('NUM') ||
|
||||
normalized.includes('DEC') ||
|
||||
normalized.includes('FLOAT') ||
|
||||
normalized.includes('DOUBLE')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalized.includes('BOOL')) {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT ${columnList} FROM ${tableName} SAMPLE ROW (${limit} ROWS)`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quotedColumn = this.quoteIdentifier(columnName);
|
||||
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS STRING)) != '' LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown[] } {
|
||||
return { sql, params: params ? Object.values(params) : undefined };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `UNIFORM(0::FLOAT, 1::FLOAT, RANDOM()) < ${samplePct}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `SAMPLE (${samplePct * 100})`;
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `COUNT_IF(${column} IS NULL)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `APPROX_COUNT_DISTINCT(${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT DISTINCT ${columnName}::VARCHAR AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY val
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName} SAMPLE ROW (${sampleSize} ROWS)
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
timezone?: string,
|
||||
): string {
|
||||
const target = timezone ? `CONVERT_TIMEZONE('UTC', '${timezone}', ${column})` : column;
|
||||
return `DATE_TRUNC('${granularity}', ${target})`;
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
|
||||
const target = timezone ? `CONVERT_TIMEZONE('UTC', '${timezone}', ${column})` : column;
|
||||
const [amount, unit] = interval.split(' ');
|
||||
const originExpr = origin ? `'${origin}'::TIMESTAMP` : `'1970-01-01'::TIMESTAMP`;
|
||||
return `DATEADD(${unit}, FLOOR(DATEDIFF(${unit}, ${originExpr}, ${target}) / ${amount}) * ${amount}, ${originExpr})`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
return `INTERVAL '${interval}'`;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/adapters/live-database/types.js';
|
||||
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
|
||||
import {
|
||||
KtxSnowflakeScanConnector,
|
||||
type KtxSnowflakeConnectionConfig,
|
||||
type KtxSnowflakeDriverFactory,
|
||||
type KtxSnowflakeSdkOptionsProvider,
|
||||
} from './connector.js';
|
||||
|
||||
interface CreateSnowflakeLiveDatabaseIntrospectionOptions {
|
||||
connections: Record<string, KtxProjectConnectionConfig>;
|
||||
driverFactory?: KtxSnowflakeDriverFactory;
|
||||
sdkOptionsProvider?: KtxSnowflakeSdkOptionsProvider;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createSnowflakeLiveDatabaseIntrospection(
|
||||
options: CreateSnowflakeLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KtxSnowflakeConnectionConfig | undefined;
|
||||
const connector = new KtxSnowflakeScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
driverFactory: options.driverFactory,
|
||||
sdkOptionsProvider: options.sdkOptionsProvider,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect(
|
||||
{ connectionId, driver: 'snowflake' },
|
||||
{ runId: `snowflake-${connectionId}` },
|
||||
);
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
256
packages/cli/src/connectors/sqlite/connector.test.ts
Normal file
256
packages/cli/src/connectors/sqlite/connector.test.ts
Normal file
|
|
@ -0,0 +1,256 @@
|
|||
import Database from 'better-sqlite3';
|
||||
import { writeFileSync } from 'node:fs';
|
||||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { createSqliteLiveDatabaseIntrospection } from '../../connectors/sqlite/live-database-introspection.js';
|
||||
import { isKtxSqliteConnectionConfig, KtxSqliteScanConnector, sqliteDatabasePathFromConfig } from '../../connectors/sqlite/connector.js';
|
||||
|
||||
describe('KtxSqliteScanConnector', () => {
|
||||
let tempDir: string;
|
||||
let dbPath: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-connector-sqlite-'));
|
||||
dbPath = join(tempDir, 'warehouse.db');
|
||||
const db = new Database(dbPath);
|
||||
db.exec(`
|
||||
PRAGMA foreign_keys = ON;
|
||||
CREATE TABLE customers (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
tier TEXT
|
||||
);
|
||||
CREATE TABLE orders (
|
||||
id INTEGER PRIMARY KEY,
|
||||
customer_id INTEGER NOT NULL,
|
||||
status TEXT,
|
||||
total NUMERIC,
|
||||
created_at TEXT,
|
||||
FOREIGN KEY(customer_id) REFERENCES customers(id)
|
||||
);
|
||||
CREATE VIEW recent_orders AS SELECT id, customer_id, status FROM orders;
|
||||
INSERT INTO customers (id, name, tier) VALUES (1, 'Ada', 'enterprise'), (2, 'Grace', 'growth');
|
||||
INSERT INTO orders (id, customer_id, status, total, created_at)
|
||||
VALUES (10, 1, 'paid', 42.5, '2026-04-28'), (11, 2, 'open', 9.5, '2026-04-29');
|
||||
`);
|
||||
db.close();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('resolves SQLite path configuration safely', () => {
|
||||
const originalDatabaseUrl = process.env.KTX_SQLITE_TEST_URL;
|
||||
const pointerPath = join(tempDir, 'sqlite-path.txt');
|
||||
process.env.KTX_SQLITE_TEST_URL = `sqlite:${dbPath}`;
|
||||
writeFileSync(pointerPath, dbPath, 'utf-8');
|
||||
|
||||
try {
|
||||
expect(isKtxSqliteConnectionConfig({ driver: 'sqlite', path: 'warehouse.db' })).toBe(true);
|
||||
expect(isKtxSqliteConnectionConfig({ driver: 'postgres', url: 'env:DATABASE_URL' })).toBe(false);
|
||||
expect(
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', path: 'warehouse.db' },
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
expect(
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', url: 'env:KTX_SQLITE_TEST_URL' },
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
expect(
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', url: `file://${dbPath}` },
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
expect(
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', path: `file:${pointerPath}` },
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
expect(
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', path: 'warehouse.db' },
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
expect(() =>
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', file_path: 'warehouse.db' },
|
||||
}),
|
||||
).toThrow('Native SQLite connector requires connections.warehouse.path or url');
|
||||
} finally {
|
||||
if (originalDatabaseUrl === undefined) {
|
||||
delete process.env.KTX_SQLITE_TEST_URL;
|
||||
} else {
|
||||
process.env.KTX_SQLITE_TEST_URL = originalDatabaseUrl;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('introspects schema, primary keys, row counts, views, and foreign keys', async () => {
|
||||
const connector = new KtxSqliteScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'sqlite', path: dbPath },
|
||||
now: () => new Date('2026-04-29T10:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'sqlite' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'sqlite',
|
||||
extractedAt: '2026-04-29T10:00:00.000Z',
|
||||
metadata: {
|
||||
file_path: dbPath,
|
||||
table_count: 3,
|
||||
total_columns: 11,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows])).toEqual([
|
||||
['customers', 'table', 2],
|
||||
['orders', 'table', 2],
|
||||
['recent_orders', 'view', null],
|
||||
]);
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
|
||||
name: 'id',
|
||||
nativeType: 'INTEGER',
|
||||
normalizedType: 'INTEGER',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
|
||||
{
|
||||
fromColumn: 'customer_id',
|
||||
toCatalog: null,
|
||||
toDb: null,
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
constraintName: null,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('runs samples, distinct values, statistics, and read-only SQL', async () => {
|
||||
const connector = new KtxSqliteScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'sqlite', path: dbPath },
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, columns: ['id'], limit: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({ headers: ['id'], rows: [[10]], totalRows: 1 });
|
||||
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, column: 'status', limit: 5 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
|
||||
|
||||
await expect(
|
||||
connector.getColumnDistinctValues(
|
||||
{ catalog: null, db: null, name: 'orders' },
|
||||
'status',
|
||||
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
|
||||
),
|
||||
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from orders order by id', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
|
||||
await expect(
|
||||
connector.columnStats(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, column: 'status' },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toBeNull();
|
||||
});
|
||||
|
||||
it('adapts native SQLite snapshots to live-database introspection for local ingest', async () => {
|
||||
const introspection = createSqliteLiveDatabaseIntrospection({
|
||||
projectDir: tempDir,
|
||||
connections: {
|
||||
warehouse: { driver: 'sqlite', path: 'warehouse.db' },
|
||||
},
|
||||
now: () => new Date('2026-04-29T10:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
extractedAt: '2026-04-29T10:00:00.000Z',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: null,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'INTEGER',
|
||||
normalizedType: 'INTEGER',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: null,
|
||||
},
|
||||
{
|
||||
name: 'name',
|
||||
nativeType: 'TEXT',
|
||||
normalizedType: 'TEXT',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
{
|
||||
name: 'tier',
|
||||
nativeType: 'TEXT',
|
||||
normalizedType: 'TEXT',
|
||||
dimensionType: 'string',
|
||||
nullable: true,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'orders')).toMatchObject({
|
||||
name: 'orders',
|
||||
catalog: null,
|
||||
db: null,
|
||||
foreignKeys: [{ fromColumn: 'customer_id', toTable: 'customers', toColumn: 'id' }],
|
||||
});
|
||||
});
|
||||
});
|
||||
354
packages/cli/src/connectors/sqlite/connector.ts
Normal file
354
packages/cli/src/connectors/sqlite/connector.ts
Normal file
|
|
@ -0,0 +1,354 @@
|
|||
import Database from 'better-sqlite3';
|
||||
import { existsSync, readFileSync, statSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { isAbsolute, resolve } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
|
||||
import { normalizeQueryRows } from '../../context/connections/query-executor.js';
|
||||
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
|
||||
import { KtxSqliteDialect } from './dialect.js';
|
||||
|
||||
export interface KtxSqliteConnectionConfig {
|
||||
driver?: string;
|
||||
path?: string;
|
||||
url?: string;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export interface SqliteDatabasePathInput {
|
||||
connectionId: string;
|
||||
projectDir?: string;
|
||||
connection: KtxSqliteConnectionConfig | undefined;
|
||||
}
|
||||
|
||||
export interface KtxSqliteScanConnectorOptions extends SqliteDatabasePathInput {
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export interface KtxSqliteReadOnlyQueryInput extends KtxReadOnlyQueryInput {
|
||||
params?: Record<string, unknown> | unknown[];
|
||||
}
|
||||
|
||||
export interface KtxSqliteColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KtxSqliteColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
interface SqliteMasterRow {
|
||||
name: string;
|
||||
type: 'table' | 'view';
|
||||
}
|
||||
|
||||
interface SqliteTableInfoRow {
|
||||
cid: number;
|
||||
name: string;
|
||||
type: string;
|
||||
notnull: number;
|
||||
dflt_value: unknown;
|
||||
pk: number;
|
||||
}
|
||||
|
||||
interface SqliteForeignKeyRow {
|
||||
id: number;
|
||||
seq: number;
|
||||
table: string;
|
||||
from: string;
|
||||
to: string;
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxSqliteConnectionConfig | undefined,
|
||||
key: keyof KtxSqliteConnectionConfig,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(key, value.trim()) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(key: keyof KtxSqliteConnectionConfig, value: string): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return process.env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
// `file:` on the `url` key is SQLite's native URI form (e.g. `file:///db.sqlite`), not a
|
||||
// file-contents reference — skip the read so the URI passes through verbatim.
|
||||
if (key !== 'url' && value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function sqlitePathFromUrl(url: string): string {
|
||||
if (url.startsWith('file:')) {
|
||||
return fileURLToPath(url);
|
||||
}
|
||||
if (url.startsWith('sqlite:')) {
|
||||
const parsed = new URL(url);
|
||||
return decodeURIComponent(parsed.pathname);
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
function stripLeadingSqlComments(sql: string): string {
|
||||
let index = 0;
|
||||
while (index < sql.length) {
|
||||
while (/\s/.test(sql[index] ?? '')) {
|
||||
index += 1;
|
||||
}
|
||||
if (sql.startsWith('--', index)) {
|
||||
const end = sql.indexOf('\n', index + 2);
|
||||
index = end === -1 ? sql.length : end + 1;
|
||||
continue;
|
||||
}
|
||||
if (sql.startsWith('/*', index)) {
|
||||
const end = sql.indexOf('*/', index + 2);
|
||||
if (end === -1) {
|
||||
return sql.slice(index);
|
||||
}
|
||||
index = end + 2;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return sql.slice(index);
|
||||
}
|
||||
|
||||
export function isKtxSqliteConnectionConfig(
|
||||
connection: KtxSqliteConnectionConfig | undefined,
|
||||
): connection is KtxSqliteConnectionConfig {
|
||||
const driver = String(connection?.driver ?? '').toLowerCase();
|
||||
return driver === 'sqlite' || driver === 'sqlite3';
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function sqliteDatabasePathFromConfig(input: SqliteDatabasePathInput): string {
|
||||
const inputDriver = input.connection?.driver ?? 'unknown';
|
||||
if (!isKtxSqliteConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native SQLite connector cannot run driver "${inputDriver}"`);
|
||||
}
|
||||
const configuredPath = stringConfigValue(input.connection, 'path') ?? sqlitePathFromUrl(stringConfigValue(input.connection, 'url') ?? '');
|
||||
if (!configuredPath) {
|
||||
throw new Error(`Native SQLite connector requires connections.${input.connectionId}.path or url`);
|
||||
}
|
||||
return isAbsolute(configuredPath) ? configuredPath : resolve(input.projectDir ?? process.cwd(), configuredPath);
|
||||
}
|
||||
|
||||
export class KtxSqliteScanConnector implements KtxScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'sqlite' as const;
|
||||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: false,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: false,
|
||||
formalForeignKeys: true,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly connectionId: string;
|
||||
private readonly dbPath: string;
|
||||
private readonly now: () => Date;
|
||||
private readonly dialect = new KtxSqliteDialect();
|
||||
private db: Database.Database | null = null;
|
||||
|
||||
constructor(options: KtxSqliteScanConnectorOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.dbPath = sqliteDatabasePathFromConfig(options);
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.id = `sqlite:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
try {
|
||||
if (!existsSync(this.dbPath) || !statSync(this.dbPath).isFile()) {
|
||||
return { success: false, error: `File not found: ${this.dbPath}` };
|
||||
}
|
||||
this.database().prepare('SELECT 1').get();
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const database = this.database();
|
||||
const rawTables = database
|
||||
.prepare(
|
||||
`SELECT name, type FROM sqlite_master WHERE type IN ('table', 'view') AND name NOT LIKE 'sqlite_%' ORDER BY name`,
|
||||
)
|
||||
.all() as SqliteMasterRow[];
|
||||
const tables = rawTables.map((table) => this.readTable(database, table));
|
||||
const fileStats = existsSync(this.dbPath) ? statSync(this.dbPath) : null;
|
||||
return {
|
||||
connectionId: this.connectionId,
|
||||
driver: 'sqlite',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: {},
|
||||
metadata: {
|
||||
file_path: this.dbPath,
|
||||
file_size: fileStats ? fileStats.size : 0,
|
||||
table_count: tables.length,
|
||||
total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxTableSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
|
||||
return { headers: result.headers, rows: result.rows, totalRows: result.totalRows };
|
||||
}
|
||||
|
||||
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = this.query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
|
||||
return { values, nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
return null;
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxSqliteReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = this.query(limitSqlForExecution(stripLeadingSqlComments(input.sql), input.maxRows), input.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KtxTableRef,
|
||||
columnName: string,
|
||||
options: KtxSqliteColumnDistinctValuesOptions,
|
||||
): Promise<KtxSqliteColumnDistinctValuesResult | null> {
|
||||
const sampleSize = options.sampleSize ?? 10000;
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinalityResult = this.query(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, sampleSize),
|
||||
);
|
||||
if (cardinalityResult.rows.length === 0) {
|
||||
return null;
|
||||
}
|
||||
const cardinality = Number(cardinalityResult.rows[0][0]);
|
||||
if (Number.isNaN(cardinality)) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valuesResult = this.query(this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit));
|
||||
return {
|
||||
values: valuesResult.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => String(row[0])),
|
||||
cardinality,
|
||||
};
|
||||
}
|
||||
|
||||
async getTableRowCount(tableName: string): Promise<number> {
|
||||
const result = this.query(`SELECT COUNT(*) AS count FROM ${this.dialect.quoteIdentifier(tableName)}`);
|
||||
return Number(result.rows[0]?.[0] ?? 0);
|
||||
}
|
||||
|
||||
qTableName(table: Pick<KtxTableRef, 'name'>): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
if (this.db) {
|
||||
this.db.close();
|
||||
this.db = null;
|
||||
}
|
||||
}
|
||||
|
||||
private database(): Database.Database {
|
||||
if (!this.db) {
|
||||
this.db = new Database(this.dbPath, { readonly: true, fileMustExist: true });
|
||||
}
|
||||
return this.db;
|
||||
}
|
||||
|
||||
private query(sql: string, params?: Record<string, unknown> | unknown[]): Omit<KtxQueryResult, 'rowCount'> {
|
||||
const statement = this.database().prepare(assertReadOnlySql(sql));
|
||||
const rows = (params ? statement.all(params) : statement.all()) as unknown[];
|
||||
return {
|
||||
headers: statement.columns().map((column) => column.name),
|
||||
rows: normalizeQueryRows(rows),
|
||||
totalRows: rows.length,
|
||||
};
|
||||
}
|
||||
|
||||
private readTable(database: Database.Database, table: SqliteMasterRow): KtxSchemaTable {
|
||||
const columns = database
|
||||
.prepare(`PRAGMA table_info(${this.dialect.quoteIdentifier(table.name)})`)
|
||||
.all() as SqliteTableInfoRow[];
|
||||
const foreignKeys = database
|
||||
.prepare(`PRAGMA foreign_key_list(${this.dialect.quoteIdentifier(table.name)})`)
|
||||
.all() as SqliteForeignKeyRow[];
|
||||
const estimatedRows =
|
||||
table.type === 'table'
|
||||
? Number(
|
||||
(
|
||||
database
|
||||
.prepare(`SELECT COUNT(*) AS count FROM ${this.dialect.quoteIdentifier(table.name)}`)
|
||||
.get() as { count: unknown }
|
||||
).count,
|
||||
)
|
||||
: null;
|
||||
return {
|
||||
catalog: null,
|
||||
db: null,
|
||||
name: table.name,
|
||||
kind: table.type,
|
||||
comment: null,
|
||||
estimatedRows,
|
||||
columns: columns.map((column) => ({
|
||||
name: column.name,
|
||||
nativeType: column.type,
|
||||
normalizedType: this.dialect.mapDataType(column.type),
|
||||
dimensionType: this.dialect.mapToDimensionType(column.type),
|
||||
nullable: column.notnull === 0 && column.pk === 0,
|
||||
primaryKey: column.pk > 0,
|
||||
comment: null,
|
||||
})),
|
||||
foreignKeys: this.mapForeignKeys(foreignKeys),
|
||||
};
|
||||
}
|
||||
|
||||
private mapForeignKeys(rows: SqliteForeignKeyRow[]): KtxSchemaForeignKey[] {
|
||||
return rows
|
||||
.sort((a, b) => a.id - b.id || a.seq - b.seq)
|
||||
.map((row) => ({
|
||||
fromColumn: row.from,
|
||||
toCatalog: null,
|
||||
toDb: null,
|
||||
toTable: row.table,
|
||||
toColumn: row.to,
|
||||
constraintName: null,
|
||||
}));
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`KTX SQLite connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
33
packages/cli/src/connectors/sqlite/dialect.test.ts
Normal file
33
packages/cli/src/connectors/sqlite/dialect.test.ts
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KtxSqliteDialect } from './dialect.js';
|
||||
|
||||
describe('KtxSqliteDialect', () => {
|
||||
const dialect = new KtxSqliteDialect();
|
||||
|
||||
it('quotes identifiers and formats single-file SQLite table names', () => {
|
||||
expect(dialect.quoteIdentifier('orders')).toBe('"orders"');
|
||||
expect(dialect.quoteIdentifier('weird"name')).toBe('"weird""name"');
|
||||
expect(dialect.formatTableName({ catalog: 'ignored', db: 'ignored', name: 'orders' })).toBe('"orders"');
|
||||
});
|
||||
|
||||
it('maps native SQLite types to KTX dimension types', () => {
|
||||
expect(dialect.mapToDimensionType('INTEGER')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('numeric(10,2)')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('timestamp')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('VARCHAR(255)')).toBe('string');
|
||||
expect(dialect.mapToDimensionType('bool')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('')).toBe('string');
|
||||
});
|
||||
|
||||
it('builds sampling and distinct-value SQL without host-specific state', () => {
|
||||
expect(dialect.generateSampleQuery('"orders"', 25, ['id', 'status'])).toBe(
|
||||
'SELECT "id", "status" FROM "orders" LIMIT 25',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('"orders"', 'status', 10)).toBe(
|
||||
'SELECT "status" FROM "orders" WHERE "status" IS NOT NULL AND TRIM(CAST("status" AS TEXT)) != \'\' LIMIT 10',
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('"orders"', '"status"', 5)).toContain(
|
||||
'SELECT DISTINCT CAST("status" AS TEXT) AS val',
|
||||
);
|
||||
});
|
||||
});
|
||||
177
packages/cli/src/connectors/sqlite/dialect.ts
Normal file
177
packages/cli/src/connectors/sqlite/dialect.ts
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
|
||||
|
||||
type SqliteTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxSqliteDialect {
|
||||
readonly type = 'sqlite';
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
DATETIME: 'time',
|
||||
DATE: 'time',
|
||||
TIMESTAMP: 'time',
|
||||
TIME: 'time',
|
||||
INTEGER: 'number',
|
||||
INT: 'number',
|
||||
REAL: 'number',
|
||||
NUMERIC: 'number',
|
||||
FLOAT: 'number',
|
||||
DOUBLE: 'number',
|
||||
TEXT: 'string',
|
||||
VARCHAR: 'string',
|
||||
CHAR: 'string',
|
||||
BLOB: 'string',
|
||||
BOOLEAN: 'boolean',
|
||||
BOOL: 'boolean',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `"${identifier.replace(/"/g, '""')}"`;
|
||||
}
|
||||
|
||||
formatTableName(table: SqliteTableNameRef): string {
|
||||
return this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
return nativeType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
let normalized = nativeType.toUpperCase().trim();
|
||||
if (normalized.includes('(')) {
|
||||
normalized = normalized.split('(')[0];
|
||||
}
|
||||
if (this.typeMappings[normalized]) {
|
||||
return this.typeMappings[normalized];
|
||||
}
|
||||
if (normalized.includes('TIME') || normalized.includes('DATE')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalized.includes('INT') ||
|
||||
normalized.includes('NUM') ||
|
||||
normalized.includes('REAL') ||
|
||||
normalized.includes('FLOAT') ||
|
||||
normalized.includes('DOUBLE')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalized.includes('BOOL')) {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT ${columnList} FROM ${tableName} LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quoted = this.quoteIdentifier(columnName);
|
||||
return `SELECT ${quoted} FROM ${tableName} WHERE ${quoted} IS NOT NULL AND TRIM(CAST(${quoted} AS TEXT)) != '' LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown } {
|
||||
return params ? { sql, params } : { sql };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `(RANDOM() % 100) < ${Math.round(samplePct * 100)}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(_samplePct: number): string {
|
||||
return '';
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `SUM(CASE WHEN ${column} IS NULL THEN 1 ELSE 0 END)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `COUNT(DISTINCT ${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT DISTINCT CAST(${columnName} AS TEXT) AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY val
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY RANDOM()
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
_timezone?: string,
|
||||
): string {
|
||||
switch (granularity) {
|
||||
case 'day':
|
||||
return `DATE(${column})`;
|
||||
case 'week':
|
||||
return `DATE(${column}, 'weekday 0', '-6 days')`;
|
||||
case 'month':
|
||||
return `DATE(${column}, 'start of month')`;
|
||||
case 'quarter':
|
||||
return `DATE(${column}, 'start of month', '-' || ((CAST(STRFTIME('%m', ${column}) AS INTEGER) - 1) % 3) || ' months')`;
|
||||
case 'year':
|
||||
return `DATE(${column}, 'start of year')`;
|
||||
}
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, _timezone?: string): string {
|
||||
const [amount, unit] = interval.split(' ');
|
||||
const originExpr = origin ? `julianday('${origin}')` : `julianday('1970-01-01')`;
|
||||
const unitDays = unit === 'day' ? 1 : unit === 'week' ? 7 : 30;
|
||||
const intervalDays = Number(amount) * unitDays;
|
||||
return `DATE(julianday('1970-01-01') + (CAST((julianday(${column}) - ${originExpr}) / ${intervalDays} AS INTEGER) * ${intervalDays}))`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
return `'${interval}'`;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/adapters/live-database/types.js';
|
||||
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
|
||||
import { KtxSqliteScanConnector, type KtxSqliteConnectionConfig } from './connector.js';
|
||||
|
||||
export interface CreateSqliteLiveDatabaseIntrospectionOptions {
|
||||
projectDir?: string;
|
||||
connections: Record<string, KtxProjectConnectionConfig>;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createSqliteLiveDatabaseIntrospection(
|
||||
options: CreateSqliteLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KtxSqliteConnectionConfig | undefined;
|
||||
const connector = new KtxSqliteScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
projectDir: options.projectDir,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect({ connectionId, driver: 'sqlite' }, { runId: `sqlite-${connectionId}` });
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
341
packages/cli/src/connectors/sqlserver/connector.test.ts
Normal file
341
packages/cli/src/connectors/sqlserver/connector.test.ts
Normal file
|
|
@ -0,0 +1,341 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createSqlServerLiveDatabaseIntrospection } from '../../connectors/sqlserver/live-database-introspection.js';
|
||||
import { isKtxSqlServerConnectionConfig, KtxSqlServerScanConnector, sqlServerConnectionPoolConfigFromConfig, type KtxSqlServerPoolFactory, type KtxSqlServerQueryResult } from '../../connectors/sqlserver/connector.js';
|
||||
|
||||
function recordset<T extends Record<string, unknown>>(
|
||||
rows: T[],
|
||||
columnNames: string[],
|
||||
): T[] & { columns: Record<string, { type: { declaration: string } }> } {
|
||||
const withColumns = rows as T[] & { columns: Record<string, { type: { declaration: string } }> };
|
||||
withColumns.columns = Object.fromEntries(columnNames.map((name) => [name, { type: { declaration: 'nvarchar' } }]));
|
||||
return withColumns;
|
||||
}
|
||||
|
||||
function result<T extends Record<string, unknown>>(rows: T[], columnNames: string[]): KtxSqlServerQueryResult {
|
||||
return { recordset: recordset(rows, columnNames) };
|
||||
}
|
||||
|
||||
function fakePoolFactory(): KtxSqlServerPoolFactory {
|
||||
const query = vi.fn(async (sql: string): Promise<KtxSqlServerQueryResult> => {
|
||||
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
|
||||
return result(
|
||||
[
|
||||
{ table_name: 'customers', table_type: 'BASE TABLE' },
|
||||
{ table_name: 'orders', table_type: 'BASE TABLE' },
|
||||
{ table_name: 'order_summary', table_type: 'VIEW' },
|
||||
],
|
||||
['table_name', 'table_type'],
|
||||
);
|
||||
}
|
||||
if (sql.includes("ep.name = 'MS_Description'") && sql.includes('ep.minor_id = 0')) {
|
||||
return result([{ table_name: 'customers', table_comment: 'Customer table' }], [
|
||||
'table_name',
|
||||
'table_comment',
|
||||
]);
|
||||
}
|
||||
if (sql.includes("ep.name = 'MS_Description'") && sql.includes('ep.minor_id = c.column_id')) {
|
||||
return result([{ table_name: 'customers', column_name: 'id', column_comment: 'PK' }], [
|
||||
'table_name',
|
||||
'column_name',
|
||||
'column_comment',
|
||||
]);
|
||||
}
|
||||
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
|
||||
return result(
|
||||
[
|
||||
{ table_name: 'customers', column_name: 'id', data_type: 'int', is_nullable: 'NO' },
|
||||
{ table_name: 'customers', column_name: 'name', data_type: 'nvarchar', is_nullable: 'NO' },
|
||||
{ table_name: 'orders', column_name: 'id', data_type: 'int', is_nullable: 'NO' },
|
||||
{ table_name: 'orders', column_name: 'customer_id', data_type: 'int', is_nullable: 'NO' },
|
||||
{ table_name: 'orders', column_name: 'status', data_type: 'nvarchar', is_nullable: 'YES' },
|
||||
{ table_name: 'order_summary', column_name: 'status', data_type: 'nvarchar', is_nullable: 'YES' },
|
||||
],
|
||||
['table_name', 'column_name', 'data_type', 'is_nullable'],
|
||||
);
|
||||
}
|
||||
if (sql.includes("CONSTRAINT_TYPE = 'PRIMARY KEY'")) {
|
||||
return result(
|
||||
[
|
||||
{ table_name: 'customers', column_name: 'id' },
|
||||
{ table_name: 'orders', column_name: 'id' },
|
||||
],
|
||||
['table_name', 'column_name'],
|
||||
);
|
||||
}
|
||||
if (sql.includes('REFERENTIAL_CONSTRAINTS')) {
|
||||
return result(
|
||||
[
|
||||
{
|
||||
table_name: 'orders',
|
||||
column_name: 'customer_id',
|
||||
referenced_table_schema: 'dbo',
|
||||
referenced_table_name: 'customers',
|
||||
referenced_column_name: 'id',
|
||||
constraint_name: 'orders_customer_id_fk',
|
||||
},
|
||||
],
|
||||
[
|
||||
'table_name',
|
||||
'column_name',
|
||||
'referenced_table_schema',
|
||||
'referenced_table_name',
|
||||
'referenced_column_name',
|
||||
'constraint_name',
|
||||
],
|
||||
);
|
||||
}
|
||||
if (sql.includes('sys.partitions') && sql.includes('GROUP BY t.name')) {
|
||||
return result(
|
||||
[
|
||||
{ table_name: 'customers', row_count: 2 },
|
||||
{ table_name: 'orders', row_count: 2 },
|
||||
],
|
||||
['table_name', 'row_count'],
|
||||
);
|
||||
}
|
||||
if (sql.includes('SELECT TOP 1 [id], [status] FROM [dbo].[orders]')) {
|
||||
return result([{ id: 10, status: 'paid' }], ['id', 'status']);
|
||||
}
|
||||
if (sql.includes('SELECT TOP 1 * FROM (select id, status from dbo.orders) AS ktx_query_result')) {
|
||||
return result([{ id: 10, status: 'paid' }], ['id', 'status']);
|
||||
}
|
||||
if (sql.includes('SELECT TOP 5 [status] FROM [dbo].[orders]')) {
|
||||
return result([{ status: 'paid' }, { status: 'open' }], ['status']);
|
||||
}
|
||||
if (sql.includes('COUNT(DISTINCT val)')) {
|
||||
return result([{ cardinality: 2 }], ['cardinality']);
|
||||
}
|
||||
if (sql.includes('SELECT TOP 10 val')) {
|
||||
return result([{ val: 'open' }, { val: 'paid' }], ['val']);
|
||||
}
|
||||
if (sql.includes('SUM(p.rows) AS row_count') && sql.includes('t.name = @tableName')) {
|
||||
return result([{ row_count: 2 }], ['row_count']);
|
||||
}
|
||||
if (sql.includes('SELECT s.name AS schema_name')) {
|
||||
return result([{ schema_name: 'dbo' }, { schema_name: 'sales' }], ['schema_name']);
|
||||
}
|
||||
if (sql.trim() === 'SELECT 1') {
|
||||
return result([{ ok: 1 }], ['ok']);
|
||||
}
|
||||
throw new Error(`Unexpected SQL: ${sql}`);
|
||||
});
|
||||
const request: { input(name: string, value: unknown): typeof request; query: typeof query } = {
|
||||
input: vi.fn((_key: string, _value: unknown) => request),
|
||||
query,
|
||||
};
|
||||
const close = vi.fn(async () => undefined);
|
||||
return {
|
||||
createPool: vi.fn(async () => ({
|
||||
request: () => request,
|
||||
close,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
describe('KtxSqlServerScanConnector', () => {
|
||||
it('resolves SQL Server connection configuration safely', () => {
|
||||
expect(
|
||||
isKtxSqlServerConnectionConfig({
|
||||
driver: 'sqlserver',
|
||||
host: 'localhost',
|
||||
database: 'analytics',
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(isKtxSqlServerConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(false);
|
||||
expect(
|
||||
sqlServerConnectionPoolConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'sqlserver',
|
||||
host: 'db.example.test',
|
||||
port: 14330,
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
trustServerCertificate: false,
|
||||
},
|
||||
}),
|
||||
).toMatchObject({
|
||||
server: 'db.example.test',
|
||||
port: 14330,
|
||||
database: 'analytics',
|
||||
user: 'reader',
|
||||
options: { encrypt: true, trustServerCertificate: false },
|
||||
});
|
||||
});
|
||||
|
||||
it('introspects schema, primary keys, comments, row counts, views, and foreign keys', async () => {
|
||||
const connector = new KtxSqlServerScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'sqlserver',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
schema: 'dbo',
|
||||
},
|
||||
poolFactory: fakePoolFactory(),
|
||||
now: () => new Date('2026-04-29T16:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'sqlserver' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'sqlserver',
|
||||
extractedAt: '2026-04-29T16:00:00.000Z',
|
||||
scope: { catalogs: ['analytics'], schemas: ['dbo'] },
|
||||
metadata: {
|
||||
database: 'analytics',
|
||||
host: 'db.example.test',
|
||||
schemas: ['dbo'],
|
||||
table_count: 3,
|
||||
total_columns: 6,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([
|
||||
['customers', 'table', 2, 'Customer table'],
|
||||
['orders', 'table', 2, null],
|
||||
['order_summary', 'view', null, null],
|
||||
]);
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
|
||||
name: 'id',
|
||||
nativeType: 'int',
|
||||
normalizedType: 'int',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'PK',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
|
||||
{
|
||||
fromColumn: 'customer_id',
|
||||
toCatalog: 'analytics',
|
||||
toDb: 'dbo',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
constraintName: 'orders_customer_id_fk',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => {
|
||||
const poolFactory = fakePoolFactory();
|
||||
const connector = new KtxSqlServerScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'sqlserver',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
schema: 'dbo',
|
||||
},
|
||||
poolFactory,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
table: { catalog: 'analytics', db: 'dbo', name: 'orders' },
|
||||
columns: ['id', 'status'],
|
||||
limit: 1,
|
||||
},
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({
|
||||
headers: ['id', 'status'],
|
||||
headerTypes: ['nvarchar', 'nvarchar'],
|
||||
rows: [[10, 'paid']],
|
||||
totalRows: 1,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{ connectionId: 'warehouse', table: { catalog: 'analytics', db: 'dbo', name: 'orders' }, column: 'status', limit: 5 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
|
||||
|
||||
await expect(
|
||||
connector.getColumnDistinctValues(
|
||||
{ catalog: 'analytics', db: 'dbo', name: 'orders' },
|
||||
'status',
|
||||
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
|
||||
),
|
||||
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from dbo.orders', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
|
||||
await expect(connector.getTableRowCount('orders')).resolves.toBe(2);
|
||||
await expect(connector.listSchemas()).resolves.toEqual(['dbo', 'sales']);
|
||||
await expect(
|
||||
connector.columnStats(
|
||||
{ connectionId: 'warehouse', table: { catalog: 'analytics', db: 'dbo', name: 'orders' }, column: 'status' },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toBeNull();
|
||||
|
||||
await connector.cleanup();
|
||||
});
|
||||
|
||||
it('adapts native SQL Server snapshots to live-database introspection for local ingest', async () => {
|
||||
const introspection = createSqlServerLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'sqlserver',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
schema: 'dbo',
|
||||
},
|
||||
},
|
||||
poolFactory: fakePoolFactory(),
|
||||
now: () => new Date('2026-04-29T16:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
extractedAt: '2026-04-29T16:00:00.000Z',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
|
||||
name: 'customers',
|
||||
catalog: 'analytics',
|
||||
db: 'dbo',
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'int',
|
||||
normalizedType: 'int',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'PK',
|
||||
},
|
||||
{
|
||||
name: 'name',
|
||||
nativeType: 'nvarchar',
|
||||
normalizedType: 'nvarchar',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
});
|
||||
});
|
||||
});
|
||||
710
packages/cli/src/connectors/sqlserver/connector.ts
Normal file
710
packages/cli/src/connectors/sqlserver/connector.ts
Normal file
|
|
@ -0,0 +1,710 @@
|
|||
import { assertReadOnlySql } from '../../context/connections/read-only-sql.js';
|
||||
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import sql from 'mssql';
|
||||
import { KtxSqlServerDialect } from './dialect.js';
|
||||
|
||||
export interface KtxSqlServerConnectionConfig {
|
||||
driver?: string;
|
||||
host?: string;
|
||||
port?: number;
|
||||
database?: string;
|
||||
username?: string;
|
||||
user?: string;
|
||||
password?: string;
|
||||
url?: string;
|
||||
schema?: string;
|
||||
schemas?: string[];
|
||||
trustServerCertificate?: boolean;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerPoolConfig {
|
||||
server: string;
|
||||
port: number;
|
||||
database: string;
|
||||
user: string;
|
||||
password?: string;
|
||||
options: { encrypt: true; trustServerCertificate: boolean };
|
||||
pool: { max: number; min: number; idleTimeoutMillis: number };
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export interface KtxSqlServerQueryResult {
|
||||
recordset?: Array<Record<string, unknown>> & { columns?: Record<string, { type?: { declaration?: string } }> };
|
||||
}
|
||||
|
||||
interface KtxSqlServerRequest {
|
||||
input(name: string, value: unknown): KtxSqlServerRequest;
|
||||
query(query: string): Promise<KtxSqlServerQueryResult>;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerPool {
|
||||
request(): KtxSqlServerRequest;
|
||||
close(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerPoolFactory {
|
||||
createPool(config: KtxSqlServerPoolConfig): Promise<KtxSqlServerPool>;
|
||||
}
|
||||
|
||||
interface KtxSqlServerResolvedEndpoint {
|
||||
host: string;
|
||||
port: number;
|
||||
close?: () => Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerEndpointResolver {
|
||||
resolve(input: {
|
||||
host: string;
|
||||
port: number;
|
||||
connection: KtxSqlServerConnectionConfig;
|
||||
}): Promise<KtxSqlServerResolvedEndpoint>;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerScanConnectorOptions {
|
||||
connectionId: string;
|
||||
connection: KtxSqlServerConnectionConfig | undefined;
|
||||
poolFactory?: KtxSqlServerPoolFactory;
|
||||
endpointResolver?: KtxSqlServerEndpointResolver;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerReadOnlyQueryInput extends KtxReadOnlyQueryInput {
|
||||
params?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
interface KtxSqlServerTableSampleResult extends KtxTableSampleResult {
|
||||
headerTypes?: string[];
|
||||
}
|
||||
|
||||
function sqlTypeDeclaration(type: unknown): string {
|
||||
if (typeof type === 'function') {
|
||||
try {
|
||||
return sqlTypeDeclaration(type());
|
||||
} catch {
|
||||
return 'unknown';
|
||||
}
|
||||
}
|
||||
if (typeof type === 'object' && type !== null && 'declaration' in type) {
|
||||
const declaration = (type as { declaration?: unknown }).declaration;
|
||||
return typeof declaration === 'string' ? declaration : 'unknown';
|
||||
}
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
function sqlRecordset(
|
||||
rows: Array<Record<string, unknown>> | undefined,
|
||||
columns: Record<string, { type?: unknown }> | undefined,
|
||||
): NonNullable<KtxSqlServerQueryResult['recordset']> {
|
||||
const recordset = [...(rows ?? [])] as NonNullable<KtxSqlServerQueryResult['recordset']>;
|
||||
recordset.columns = Object.fromEntries(
|
||||
Object.entries(columns ?? {}).map(([name, metadata]) => [
|
||||
name,
|
||||
{ type: { declaration: sqlTypeDeclaration(metadata.type) } },
|
||||
]),
|
||||
);
|
||||
return recordset;
|
||||
}
|
||||
|
||||
class DefaultSqlServerPoolFactory implements KtxSqlServerPoolFactory {
|
||||
async createPool(config: KtxSqlServerPoolConfig): Promise<KtxSqlServerPool> {
|
||||
const pool = await new sql.ConnectionPool(config as sql.config).connect();
|
||||
return {
|
||||
request() {
|
||||
const request = pool.request();
|
||||
return {
|
||||
input(name: string, value: unknown) {
|
||||
request.input(name, value);
|
||||
return this;
|
||||
},
|
||||
async query(query: string) {
|
||||
const result = await request.query(query);
|
||||
return {
|
||||
recordset: sqlRecordset(result.recordset as Array<Record<string, unknown>> | undefined, result.recordset?.columns),
|
||||
};
|
||||
},
|
||||
};
|
||||
},
|
||||
close: () => pool.close(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxSqlServerConnectionConfig | undefined,
|
||||
key: keyof KtxSqlServerConnectionConfig,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function parseSqlServerUrl(url: string): Partial<KtxSqlServerConnectionConfig> {
|
||||
const parsed = new URL(url);
|
||||
return {
|
||||
host: parsed.hostname,
|
||||
port: parsed.port ? Number(parsed.port) : undefined,
|
||||
database: parsed.pathname.replace(/^\/+/, '') || undefined,
|
||||
username: parsed.username ? decodeURIComponent(parsed.username) : undefined,
|
||||
password: parsed.password ? decodeURIComponent(parsed.password) : undefined,
|
||||
trustServerCertificate: parsed.searchParams.get('trustServerCertificate') === 'true',
|
||||
};
|
||||
}
|
||||
|
||||
function maybeNumber(value: unknown): number | undefined {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
||||
function schemaNames(connection: KtxSqlServerConnectionConfig, env: NodeJS.ProcessEnv): string[] {
|
||||
if (Array.isArray(connection.schemas) && connection.schemas.length > 0) {
|
||||
return connection.schemas.filter((schema) => schema.trim().length > 0).map((schema) => resolveStringReference(schema, env));
|
||||
}
|
||||
return [stringConfigValue(connection, 'schema', env) ?? 'dbo'];
|
||||
}
|
||||
|
||||
function groupByTable<T extends { table_name: string }>(rows: T[]): Map<string, T[]> {
|
||||
const grouped = new Map<string, T[]>();
|
||||
for (const row of rows) {
|
||||
const values = grouped.get(row.table_name) ?? [];
|
||||
values.push(row);
|
||||
grouped.set(row.table_name, values);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
function firstNumber(value: unknown): number | null {
|
||||
const numberValue = Number(value);
|
||||
return Number.isFinite(numberValue) ? numberValue : null;
|
||||
}
|
||||
|
||||
function limitSqlForSqlServerExecution(sqlText: string, maxRows: number | undefined): string {
|
||||
const trimmed = assertReadOnlySql(sqlText).replace(/;+\s*$/, '');
|
||||
if (!maxRows) {
|
||||
return trimmed;
|
||||
}
|
||||
if (!Number.isInteger(maxRows) || maxRows <= 0) {
|
||||
throw new Error('maxRows must be a positive integer.');
|
||||
}
|
||||
return `SELECT TOP ${maxRows} * FROM (${trimmed}) AS ktx_query_result`;
|
||||
}
|
||||
|
||||
export function isKtxSqlServerConnectionConfig(
|
||||
connection: KtxSqlServerConnectionConfig | undefined,
|
||||
): connection is KtxSqlServerConnectionConfig {
|
||||
return String(connection?.driver ?? '').toLowerCase() === 'sqlserver';
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function sqlServerConnectionPoolConfigFromConfig(input: {
|
||||
connectionId: string;
|
||||
connection: KtxSqlServerConnectionConfig | undefined;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): KtxSqlServerPoolConfig {
|
||||
const inputDriver = input.connection?.driver ?? 'unknown';
|
||||
if (!isKtxSqlServerConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native SQL Server connector cannot run driver "${inputDriver}"`);
|
||||
}
|
||||
|
||||
const env = input.env ?? process.env;
|
||||
const referencedUrl = stringConfigValue(input.connection, 'url', env);
|
||||
const urlConfig = referencedUrl ? parseSqlServerUrl(referencedUrl) : {};
|
||||
const merged: KtxSqlServerConnectionConfig = { ...urlConfig, ...input.connection };
|
||||
const server = stringConfigValue(merged, 'host', env);
|
||||
const database = stringConfigValue(merged, 'database', env);
|
||||
const user = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env);
|
||||
|
||||
if (!server) {
|
||||
throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.host or url`);
|
||||
}
|
||||
if (!database) {
|
||||
throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.database or url`);
|
||||
}
|
||||
if (!user) {
|
||||
throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.username, user, or url`);
|
||||
}
|
||||
|
||||
return {
|
||||
server,
|
||||
port: maybeNumber(merged.port) ?? 1433,
|
||||
database,
|
||||
user,
|
||||
password: stringConfigValue(merged, 'password', env),
|
||||
options: { encrypt: true, trustServerCertificate: merged.trustServerCertificate ?? true },
|
||||
pool: { max: 10, min: 0, idleTimeoutMillis: 30000 },
|
||||
};
|
||||
}
|
||||
|
||||
export class KtxSqlServerScanConnector implements KtxScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'sqlserver' as const;
|
||||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: false,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: false,
|
||||
formalForeignKeys: true,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly connectionId: string;
|
||||
private readonly connection: KtxSqlServerConnectionConfig;
|
||||
private readonly poolConfig: KtxSqlServerPoolConfig;
|
||||
private readonly schemas: string[];
|
||||
private readonly poolFactory: KtxSqlServerPoolFactory;
|
||||
private readonly endpointResolver?: KtxSqlServerEndpointResolver;
|
||||
private readonly now: () => Date;
|
||||
private readonly dialect = new KtxSqlServerDialect();
|
||||
private pool: KtxSqlServerPool | null = null;
|
||||
private resolvedEndpoint: KtxSqlServerResolvedEndpoint | null = null;
|
||||
|
||||
constructor(options: KtxSqlServerScanConnectorOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.connection = options.connection ?? {};
|
||||
const env = options.env ?? process.env;
|
||||
this.poolConfig = sqlServerConnectionPoolConfigFromConfig({
|
||||
connectionId: options.connectionId,
|
||||
connection: options.connection,
|
||||
env,
|
||||
});
|
||||
this.schemas = schemaNames(this.connection, env);
|
||||
this.poolFactory = options.poolFactory ?? new DefaultSqlServerPoolFactory();
|
||||
this.endpointResolver = options.endpointResolver;
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.id = `sqlserver:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
try {
|
||||
await this.query('SELECT 1');
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const tables: KtxSchemaTable[] = [];
|
||||
for (const schemaName of this.schemas) {
|
||||
tables.push(...(await this.introspectSchema(schemaName)));
|
||||
}
|
||||
return {
|
||||
connectionId: this.connectionId,
|
||||
driver: 'sqlserver',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: { catalogs: [this.poolConfig.database], schemas: this.schemas },
|
||||
metadata: {
|
||||
database: this.poolConfig.database,
|
||||
schemas: this.schemas,
|
||||
host: this.poolConfig.server,
|
||||
table_count: tables.length,
|
||||
total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxSqlServerTableSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
|
||||
return { headers: result.headers, headerTypes: result.headerTypes, rows: result.rows, totalRows: result.totalRows };
|
||||
}
|
||||
|
||||
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
|
||||
return { values, nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
return null;
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxSqlServerReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const limitedSql = limitSqlForSqlServerExecution(input.sql, input.maxRows);
|
||||
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
|
||||
const result = await this.query(prepared.sql, prepared.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KtxTableRef,
|
||||
columnName: string,
|
||||
options: KtxSqlServerColumnDistinctValuesOptions,
|
||||
): Promise<KtxSqlServerColumnDistinctValuesResult | null> {
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinalityRows = await this.queryRaw<{ cardinality: unknown }>(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, options.sampleSize ?? 10000),
|
||||
);
|
||||
const cardinality = Number(cardinalityRows[0]?.cardinality);
|
||||
if (Number.isNaN(cardinality)) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valuesRows = await this.queryRaw<{ val: unknown }>(
|
||||
this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit),
|
||||
);
|
||||
return { values: valuesRows.filter((row) => row.val !== null).map((row) => String(row.val)), cardinality };
|
||||
}
|
||||
|
||||
async getTableRowCount(tableName: string, schemaName = this.schemas[0] ?? 'dbo'): Promise<number> {
|
||||
const rows = await this.queryRaw<{ row_count: unknown }>(
|
||||
`
|
||||
SELECT SUM(p.rows) AS row_count
|
||||
FROM sys.tables t
|
||||
INNER JOIN sys.partitions p ON t.object_id = p.object_id
|
||||
INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
|
||||
WHERE s.name = @schemaName
|
||||
AND t.name = @tableName
|
||||
AND p.index_id IN (0, 1)
|
||||
`,
|
||||
{ schemaName, tableName },
|
||||
);
|
||||
return firstNumber(rows[0]?.row_count) ?? 0;
|
||||
}
|
||||
|
||||
qTableName(table: Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
async listSchemas(): Promise<string[]> {
|
||||
const rows = await this.queryRaw<{ schema_name: string }>(`
|
||||
SELECT s.name AS schema_name
|
||||
FROM sys.schemas s
|
||||
WHERE s.name NOT IN (
|
||||
'INFORMATION_SCHEMA', 'sys', 'guest',
|
||||
'db_owner', 'db_accessadmin', 'db_securityadmin', 'db_ddladmin',
|
||||
'db_backupoperator', 'db_datareader', 'db_datawriter',
|
||||
'db_denydatareader', 'db_denydatawriter'
|
||||
)
|
||||
ORDER BY s.name
|
||||
`);
|
||||
return rows.map((row) => row.schema_name);
|
||||
}
|
||||
|
||||
async listTables(schemas?: string[]): Promise<KtxTableListEntry[]> {
|
||||
const filterSchemas = schemas ?? (await this.listSchemas());
|
||||
if (filterSchemas.length === 0) return [];
|
||||
const params: Record<string, unknown> = {};
|
||||
const placeholders = filterSchemas.map((s, i) => {
|
||||
params[`schema${i}`] = s;
|
||||
return `@schema${i}`;
|
||||
});
|
||||
const rows = await this.queryRaw<{ schema_name: string; table_name: string; table_type: string }>(
|
||||
`
|
||||
SELECT s.name AS schema_name, o.name AS table_name, o.type_desc AS table_type
|
||||
FROM sys.objects o
|
||||
JOIN sys.schemas s ON o.schema_id = s.schema_id
|
||||
WHERE o.type IN ('U', 'V')
|
||||
AND s.name IN (${placeholders.join(', ')})
|
||||
ORDER BY s.name, o.name
|
||||
`,
|
||||
params,
|
||||
);
|
||||
return rows.map((row) => ({
|
||||
schema: row.schema_name,
|
||||
name: row.table_name,
|
||||
kind: row.table_type === 'VIEW' ? ('view' as const) : ('table' as const),
|
||||
}));
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
if (this.pool) {
|
||||
await this.pool.close();
|
||||
this.pool = null;
|
||||
}
|
||||
if (this.resolvedEndpoint?.close) {
|
||||
await this.resolvedEndpoint.close();
|
||||
this.resolvedEndpoint = null;
|
||||
}
|
||||
}
|
||||
|
||||
private async introspectSchema(schemaName: string): Promise<KtxSchemaTable[]> {
|
||||
const tables = await this.queryRaw<{ table_name: string; table_type: string }>(
|
||||
`
|
||||
SELECT TABLE_NAME AS table_name, TABLE_TYPE AS table_type
|
||||
FROM INFORMATION_SCHEMA.TABLES
|
||||
WHERE TABLE_SCHEMA = @schemaName
|
||||
AND TABLE_TYPE IN ('BASE TABLE', 'VIEW')
|
||||
ORDER BY TABLE_NAME
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
const columns = await this.queryRaw<{
|
||||
table_name: string;
|
||||
column_name: string;
|
||||
data_type: string;
|
||||
is_nullable: string;
|
||||
}>(
|
||||
`
|
||||
SELECT TABLE_NAME AS table_name, COLUMN_NAME AS column_name, DATA_TYPE AS data_type, IS_NULLABLE AS is_nullable
|
||||
FROM INFORMATION_SCHEMA.COLUMNS
|
||||
WHERE TABLE_SCHEMA = @schemaName
|
||||
ORDER BY TABLE_NAME, ORDINAL_POSITION
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
const tableComments = await this.tableComments(schemaName);
|
||||
const columnComments = await this.columnComments(schemaName);
|
||||
const primaryKeys = await this.primaryKeys(schemaName);
|
||||
const foreignKeys = await this.foreignKeys(schemaName);
|
||||
const rowCounts = await this.rowCounts(schemaName);
|
||||
const columnsByTable = groupByTable(columns);
|
||||
const foreignKeysByTable = groupByTable(foreignKeys);
|
||||
|
||||
return tables.map((table) => ({
|
||||
catalog: this.poolConfig.database,
|
||||
db: schemaName,
|
||||
name: table.table_name,
|
||||
kind: table.table_type === 'VIEW' ? 'view' : 'table',
|
||||
comment: tableComments.get(table.table_name) ?? null,
|
||||
estimatedRows: table.table_type === 'VIEW' ? null : rowCounts.get(table.table_name) ?? 0,
|
||||
columns: (columnsByTable.get(table.table_name) ?? []).map((column) =>
|
||||
this.toSchemaColumn(column, primaryKeys.get(table.table_name) ?? new Set(), columnComments),
|
||||
),
|
||||
foreignKeys: (foreignKeysByTable.get(table.table_name) ?? []).map((row) => this.toSchemaForeignKey(row)),
|
||||
}));
|
||||
}
|
||||
|
||||
private async tableComments(schemaName: string): Promise<Map<string, string>> {
|
||||
const rows = await this.queryRaw<{ table_name: string; table_comment: string }>(
|
||||
`
|
||||
SELECT o.name AS table_name, CAST(ep.value AS NVARCHAR(MAX)) AS table_comment
|
||||
FROM sys.objects o
|
||||
INNER JOIN sys.schemas s ON o.schema_id = s.schema_id
|
||||
INNER JOIN sys.extended_properties ep ON ep.major_id = o.object_id
|
||||
AND ep.minor_id = 0
|
||||
AND ep.name = 'MS_Description'
|
||||
WHERE s.name = @schemaName
|
||||
AND o.type IN ('U', 'V')
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
return new Map(rows.map((row) => [row.table_name, row.table_comment]));
|
||||
}
|
||||
|
||||
private async columnComments(schemaName: string): Promise<Map<string, string>> {
|
||||
const rows = await this.queryRaw<{ table_name: string; column_name: string; column_comment: string }>(
|
||||
`
|
||||
SELECT o.name AS table_name, c.name AS column_name, CAST(ep.value AS NVARCHAR(MAX)) AS column_comment
|
||||
FROM sys.columns c
|
||||
INNER JOIN sys.objects o ON c.object_id = o.object_id
|
||||
INNER JOIN sys.schemas s ON o.schema_id = s.schema_id
|
||||
INNER JOIN sys.extended_properties ep ON ep.major_id = c.object_id
|
||||
AND ep.minor_id = c.column_id
|
||||
AND ep.name = 'MS_Description'
|
||||
WHERE s.name = @schemaName
|
||||
AND o.type IN ('U', 'V')
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
return new Map(rows.map((row) => [`${row.table_name}.${row.column_name}`, row.column_comment]));
|
||||
}
|
||||
|
||||
private async primaryKeys(schemaName: string): Promise<Map<string, Set<string>>> {
|
||||
const rows = await this.queryRaw<{ table_name: string; column_name: string }>(
|
||||
`
|
||||
SELECT tc.TABLE_NAME AS table_name, kcu.COLUMN_NAME AS column_name
|
||||
FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
|
||||
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu
|
||||
ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME
|
||||
AND tc.TABLE_SCHEMA = kcu.TABLE_SCHEMA
|
||||
WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY'
|
||||
AND tc.TABLE_SCHEMA = @schemaName
|
||||
ORDER BY tc.TABLE_NAME, kcu.ORDINAL_POSITION
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
const grouped = new Map<string, Set<string>>();
|
||||
for (const row of rows) {
|
||||
const columns = grouped.get(row.table_name) ?? new Set<string>();
|
||||
columns.add(row.column_name);
|
||||
grouped.set(row.table_name, columns);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
private async foreignKeys(schemaName: string): Promise<
|
||||
Array<{
|
||||
table_name: string;
|
||||
column_name: string;
|
||||
referenced_table_schema: string;
|
||||
referenced_table_name: string;
|
||||
referenced_column_name: string;
|
||||
constraint_name: string;
|
||||
}>
|
||||
> {
|
||||
return this.queryRaw(
|
||||
`
|
||||
SELECT
|
||||
fk.TABLE_NAME AS table_name,
|
||||
fk.COLUMN_NAME AS column_name,
|
||||
pk.TABLE_SCHEMA AS referenced_table_schema,
|
||||
pk.TABLE_NAME AS referenced_table_name,
|
||||
pk.COLUMN_NAME AS referenced_column_name,
|
||||
fk.CONSTRAINT_NAME AS constraint_name
|
||||
FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS rc
|
||||
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE fk
|
||||
ON fk.CONSTRAINT_CATALOG = rc.CONSTRAINT_CATALOG
|
||||
AND fk.CONSTRAINT_SCHEMA = rc.CONSTRAINT_SCHEMA
|
||||
AND fk.CONSTRAINT_NAME = rc.CONSTRAINT_NAME
|
||||
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE pk
|
||||
ON pk.CONSTRAINT_CATALOG = rc.UNIQUE_CONSTRAINT_CATALOG
|
||||
AND pk.CONSTRAINT_SCHEMA = rc.UNIQUE_CONSTRAINT_SCHEMA
|
||||
AND pk.CONSTRAINT_NAME = rc.UNIQUE_CONSTRAINT_NAME
|
||||
AND pk.ORDINAL_POSITION = fk.ORDINAL_POSITION
|
||||
WHERE fk.TABLE_SCHEMA = @schemaName
|
||||
ORDER BY fk.TABLE_NAME, fk.COLUMN_NAME
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
}
|
||||
|
||||
private async rowCounts(schemaName: string): Promise<Map<string, number>> {
|
||||
const rows = await this.queryRaw<{ table_name: string; row_count: unknown }>(
|
||||
`
|
||||
SELECT t.name AS table_name, SUM(p.rows) AS row_count
|
||||
FROM sys.tables t
|
||||
INNER JOIN sys.partitions p ON t.object_id = p.object_id
|
||||
INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
|
||||
WHERE s.name = @schemaName
|
||||
AND p.index_id IN (0, 1)
|
||||
GROUP BY t.name
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
return new Map(rows.map((row) => [row.table_name, firstNumber(row.row_count) ?? 0]));
|
||||
}
|
||||
|
||||
private toSchemaColumn(
|
||||
column: { table_name: string; column_name: string; data_type: string; is_nullable: string },
|
||||
primaryKeys: Set<string>,
|
||||
comments: Map<string, string>,
|
||||
): KtxSchemaColumn {
|
||||
return {
|
||||
name: column.column_name,
|
||||
nativeType: column.data_type,
|
||||
normalizedType: this.dialect.mapDataType(column.data_type),
|
||||
dimensionType: this.dialect.mapToDimensionType(column.data_type),
|
||||
nullable: column.is_nullable === 'YES',
|
||||
primaryKey: primaryKeys.has(column.column_name),
|
||||
comment: comments.get(`${column.table_name}.${column.column_name}`) ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
private toSchemaForeignKey(row: {
|
||||
column_name: string;
|
||||
referenced_table_schema: string;
|
||||
referenced_table_name: string;
|
||||
referenced_column_name: string;
|
||||
constraint_name: string;
|
||||
}): KtxSchemaForeignKey {
|
||||
return {
|
||||
fromColumn: row.column_name,
|
||||
toCatalog: this.poolConfig.database,
|
||||
toDb: row.referenced_table_schema,
|
||||
toTable: row.referenced_table_name,
|
||||
toColumn: row.referenced_column_name,
|
||||
constraintName: row.constraint_name || null,
|
||||
};
|
||||
}
|
||||
|
||||
private async poolForQuery(): Promise<KtxSqlServerPool> {
|
||||
if (!this.pool) {
|
||||
const config = { ...this.poolConfig };
|
||||
if (this.endpointResolver) {
|
||||
this.resolvedEndpoint = await this.endpointResolver.resolve({
|
||||
host: config.server,
|
||||
port: config.port,
|
||||
connection: this.connection,
|
||||
});
|
||||
config.server = this.resolvedEndpoint.host;
|
||||
config.port = this.resolvedEndpoint.port;
|
||||
}
|
||||
this.pool = await this.poolFactory.createPool(config);
|
||||
}
|
||||
return this.pool;
|
||||
}
|
||||
|
||||
private async queryRaw<T extends Record<string, unknown>>(query: string, params?: Record<string, unknown>): Promise<T[]> {
|
||||
const pool = await this.poolForQuery();
|
||||
const request = pool.request();
|
||||
if (params) {
|
||||
for (const [key, value] of Object.entries(params)) {
|
||||
request.input(key, value);
|
||||
}
|
||||
}
|
||||
const result = await request.query(query);
|
||||
return (result.recordset ?? []) as T[];
|
||||
}
|
||||
|
||||
private async query(query: string, params?: Record<string, unknown>): Promise<Omit<KtxQueryResult, 'rowCount'>> {
|
||||
const pool = await this.poolForQuery();
|
||||
const request = pool.request();
|
||||
if (params) {
|
||||
for (const [key, value] of Object.entries(params)) {
|
||||
request.input(key, value);
|
||||
}
|
||||
}
|
||||
const result = await request.query(assertReadOnlySql(query));
|
||||
const recordset = result.recordset ?? [];
|
||||
const columnMetadata = recordset.columns ?? {};
|
||||
const metadataHeaders = Object.keys(columnMetadata);
|
||||
const headers = metadataHeaders.length > 0 ? metadataHeaders : Object.keys(recordset[0] ?? {});
|
||||
const headerTypes = headers.map((header) => columnMetadata[header]?.type?.declaration ?? 'unknown');
|
||||
return {
|
||||
headers,
|
||||
headerTypes,
|
||||
rows: recordset.map((row) => headers.map((header) => row[header])),
|
||||
totalRows: recordset.length,
|
||||
};
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`KTX SQL Server connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
49
packages/cli/src/connectors/sqlserver/dialect.test.ts
Normal file
49
packages/cli/src/connectors/sqlserver/dialect.test.ts
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KtxSqlServerDialect } from './dialect.js';
|
||||
|
||||
describe('KtxSqlServerDialect', () => {
|
||||
const dialect = new KtxSqlServerDialect();
|
||||
|
||||
it('quotes identifiers and formats schema-qualified table names', () => {
|
||||
expect(dialect.quoteIdentifier('events')).toBe('[events]');
|
||||
expect(dialect.quoteIdentifier('odd]name')).toBe('[odd]]name]');
|
||||
expect(dialect.formatTableName({ catalog: 'warehouse', db: 'dbo', name: 'events' })).toBe('[dbo].[events]');
|
||||
expect(dialect.formatTableName({ catalog: null, db: null, name: 'events' })).toBe('[events]');
|
||||
});
|
||||
|
||||
it('maps SQL Server types to KTX dimension types', () => {
|
||||
expect(dialect.mapToDimensionType('datetime2')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('decimal(18, 2)')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('bigint')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('bit')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('uniqueidentifier')).toBe('string');
|
||||
expect(dialect.mapToDimensionType('')).toBe('string');
|
||||
});
|
||||
|
||||
it('builds sampling, distinct-value, pagination, and time SQL', () => {
|
||||
expect(dialect.generateSampleQuery('[dbo].[events]', 25, ['id', 'event_name'])).toBe(
|
||||
'SELECT TOP 25 [id], [event_name] FROM [dbo].[events]',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('[dbo].[events]', 'event_name', 10)).toBe(
|
||||
"SELECT TOP 10 [event_name] FROM [dbo].[events] WHERE [event_name] IS NOT NULL AND LTRIM(RTRIM(CAST([event_name] AS NVARCHAR(MAX)))) != ''",
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('[dbo].[events]', '[event_name]', 5)).toContain('SELECT TOP 5 val');
|
||||
expect(dialect.getTopClause(10)).toBe('TOP 10');
|
||||
expect(dialect.getLimitOffsetClause(10, 20)).toBe('OFFSET 20 ROWS FETCH NEXT 10 ROWS ONLY');
|
||||
expect(dialect.getTimeTruncExpression('created_at', 'month')).toBe(
|
||||
'DATEFROMPARTS(YEAR(created_at), MONTH(created_at), 1)',
|
||||
);
|
||||
});
|
||||
|
||||
it('prepares named parameters using SQL Server @ parameters', () => {
|
||||
expect(
|
||||
dialect.prepareQuery('select * from events where id = :id and name = :name', {
|
||||
id: 10,
|
||||
name: 'signup',
|
||||
}),
|
||||
).toEqual({
|
||||
sql: 'select * from events where id = @id and name = @name',
|
||||
params: { id: 10, name: 'signup' },
|
||||
});
|
||||
});
|
||||
});
|
||||
201
packages/cli/src/connectors/sqlserver/dialect.ts
Normal file
201
packages/cli/src/connectors/sqlserver/dialect.ts
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
|
||||
|
||||
type SqlServerTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxSqlServerDialect {
|
||||
readonly type = 'sqlserver';
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
datetime: 'time',
|
||||
datetime2: 'time',
|
||||
date: 'time',
|
||||
time: 'time',
|
||||
datetimeoffset: 'time',
|
||||
smalldatetime: 'time',
|
||||
timestamp: 'time',
|
||||
int: 'number',
|
||||
bigint: 'number',
|
||||
smallint: 'number',
|
||||
tinyint: 'number',
|
||||
decimal: 'number',
|
||||
numeric: 'number',
|
||||
float: 'number',
|
||||
real: 'number',
|
||||
money: 'number',
|
||||
smallmoney: 'number',
|
||||
varchar: 'string',
|
||||
nvarchar: 'string',
|
||||
char: 'string',
|
||||
nchar: 'string',
|
||||
text: 'string',
|
||||
ntext: 'string',
|
||||
uniqueidentifier: 'string',
|
||||
xml: 'string',
|
||||
bit: 'boolean',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `[${identifier.replace(/\]/g, ']]')}]`;
|
||||
}
|
||||
|
||||
formatTableName(table: SqlServerTableNameRef): string {
|
||||
return table.db
|
||||
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
|
||||
: this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
return nativeType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
const lower = nativeType.toLowerCase().trim();
|
||||
const normalized = lower.includes('(') ? lower.split('(')[0]! : lower;
|
||||
if (this.typeMappings[normalized]) {
|
||||
return this.typeMappings[normalized];
|
||||
}
|
||||
if (normalized.includes('time') || normalized.includes('date')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalized.includes('int') ||
|
||||
normalized.includes('num') ||
|
||||
normalized.includes('dec') ||
|
||||
normalized.includes('float') ||
|
||||
normalized.includes('money')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalized.includes('bit')) {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT TOP ${limit} ${columnList} FROM ${tableName}`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quotedColumn = this.quoteIdentifier(columnName);
|
||||
return `SELECT TOP ${limit} ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND LTRIM(RTRIM(CAST(${quotedColumn} AS NVARCHAR(MAX)))) != ''`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
|
||||
if (!params) {
|
||||
return { sql, params: undefined };
|
||||
}
|
||||
let parameterizedQuery = sql;
|
||||
for (const key of Object.keys(params)) {
|
||||
parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
|
||||
}
|
||||
return { sql: parameterizedQuery, params };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `ABS(CHECKSUM(NEWID())) % 100 < ${Math.round(samplePct * 100)}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `TABLESAMPLE (${samplePct * 100} PERCENT)`;
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `OFFSET ${offset} ROWS FETCH NEXT ${limit} ROWS ONLY` : '';
|
||||
}
|
||||
|
||||
getTopClause(limit: number): string {
|
||||
return `TOP ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `SUM(CASE WHEN ${column} IS NULL THEN 1 ELSE 0 END)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `COUNT(DISTINCT ${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT TOP ${sampleSize} ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT TOP ${limit} val
|
||||
FROM (
|
||||
SELECT DISTINCT CAST(${columnName} AS NVARCHAR(MAX)) AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
) AS distinct_vals
|
||||
ORDER BY val
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT TOP ${sampleSize} ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY NEWID()
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
timezone?: string,
|
||||
): string {
|
||||
const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column;
|
||||
switch (granularity) {
|
||||
case 'day':
|
||||
return `CAST(${col} AS DATE)`;
|
||||
case 'week':
|
||||
return `DATEADD(WEEK, DATEDIFF(WEEK, 0, ${col}), 0)`;
|
||||
case 'month':
|
||||
return `DATEFROMPARTS(YEAR(${col}), MONTH(${col}), 1)`;
|
||||
case 'quarter':
|
||||
return `DATEFROMPARTS(YEAR(${col}), (DATEPART(QUARTER, ${col}) - 1) * 3 + 1, 1)`;
|
||||
case 'year':
|
||||
return `DATEFROMPARTS(YEAR(${col}), 1, 1)`;
|
||||
}
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
|
||||
const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column;
|
||||
const [amount, unit] = interval.split(' ');
|
||||
const originExpr = origin ? `'${origin}'` : `'1970-01-01'`;
|
||||
return `DATEADD(${unit}, (DATEDIFF(${unit}, ${originExpr}, ${col}) / ${amount}) * ${amount}, ${originExpr})`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
return `'${interval}'`;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/adapters/live-database/types.js';
|
||||
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
|
||||
import {
|
||||
KtxSqlServerScanConnector,
|
||||
type KtxSqlServerConnectionConfig,
|
||||
type KtxSqlServerEndpointResolver,
|
||||
type KtxSqlServerPoolFactory,
|
||||
} from './connector.js';
|
||||
|
||||
interface CreateSqlServerLiveDatabaseIntrospectionOptions {
|
||||
connections: Record<string, KtxProjectConnectionConfig>;
|
||||
poolFactory?: KtxSqlServerPoolFactory;
|
||||
endpointResolver?: KtxSqlServerEndpointResolver;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createSqlServerLiveDatabaseIntrospection(
|
||||
options: CreateSqlServerLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KtxSqlServerConnectionConfig | undefined;
|
||||
const connector = new KtxSqlServerScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
poolFactory: options.poolFactory,
|
||||
endpointResolver: options.endpointResolver,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect(
|
||||
{ connectionId, driver: 'sqlserver' },
|
||||
{ runId: `sqlserver-${connectionId}` },
|
||||
);
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
import { buildDefaultKtxProjectConfig, type KtxProjectConfig } from '@ktx/context/project';
|
||||
import { buildDefaultKtxProjectConfig, type KtxProjectConfig } from './context/project/config.js';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import type { KtxPublicIngestProject, KtxPublicIngestTargetResult } from './public-ingest.js';
|
||||
import {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import type { KtxProgressPort, KtxProgressUpdateOptions } from '@ktx/context/scan';
|
||||
import type { KtxProgressPort, KtxProgressUpdateOptions } from './context/scan/types.js';
|
||||
import type { KtxCliIo } from './index.js';
|
||||
import type { KtxIngestProgressUpdate } from './ingest.js';
|
||||
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
|
||||
|
|
@ -444,17 +444,20 @@ export function renderContextBuildView(
|
|||
const ESC_K_RE = new RegExp(`${ESC.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\[K`, 'g');
|
||||
const ANSI_RE = /\x1b\[[0-9;]*m/g;
|
||||
|
||||
/** @internal */
|
||||
export function extractProgressMessage(chunk: string): string | null {
|
||||
const cleaned = chunk.replace(/^\r/, '').replace(ESC_K_RE, '').replace(/\n$/, '').trim();
|
||||
const match = cleaned.match(/^\[(\d+)%\]\s*(.+)$/);
|
||||
return match ? `[${match[1]}%] ${match[2]}` : null;
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function parseScanSummary(output: string): string | null {
|
||||
const match = output.match(/(\d+) changes? across (\d+) tables?/);
|
||||
return match ? `${match[2]} tables` : null;
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function parseIngestSummary(output: string): string | null {
|
||||
const savedMemory = output.match(/Saved memory: (.+)/);
|
||||
if (savedMemory) return savedMemory[1];
|
||||
|
|
@ -560,6 +563,7 @@ function collectSourceProgress(targets: ContextBuildTargetState[]): ContextBuild
|
|||
});
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function viewStateFromSourceProgress(
|
||||
sources: ContextBuildSourceProgressUpdate[],
|
||||
now: number,
|
||||
|
|
|
|||
27
packages/cli/src/context/connections/connection-type.ts
Normal file
27
packages/cli/src/context/connections/connection-type.ts
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
import { z } from 'zod';
|
||||
|
||||
export const connectionTypeSchema = z.enum([
|
||||
'POSTGRESQL',
|
||||
'SQLITE',
|
||||
'SQLSERVER',
|
||||
'BIGQUERY',
|
||||
'SNOWFLAKE',
|
||||
'CENTRALREACH',
|
||||
'EPIC',
|
||||
'CERNER',
|
||||
'ATHENA',
|
||||
'QUICKBOOKS',
|
||||
'WORKDAY',
|
||||
'REST',
|
||||
'S3',
|
||||
'SLACK',
|
||||
'METABASE',
|
||||
'LOOKER',
|
||||
'NOTION',
|
||||
'MYSQL',
|
||||
'CLICKHOUSE',
|
||||
'PLAIN',
|
||||
'BETTERSTACK',
|
||||
]);
|
||||
|
||||
export type ConnectionType = z.infer<typeof connectionTypeSchema>;
|
||||
30
packages/cli/src/context/connections/dialects.test.ts
Normal file
30
packages/cli/src/context/connections/dialects.test.ts
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { getDialectForDriver } from './dialects.js';
|
||||
|
||||
describe('getDialectForDriver', () => {
|
||||
it.each([
|
||||
['postgres', '"public"."orders"'],
|
||||
['postgresql', '"public"."orders"'],
|
||||
['mysql', '`public`.`orders`'],
|
||||
['clickhouse', '`public`.`orders`'],
|
||||
['sqlite', '"orders"'],
|
||||
['snowflake', '"analytics"."public"."orders"'],
|
||||
['bigquery', '`analytics`.`public`.`orders`'],
|
||||
['sqlserver', '[analytics].[public].[orders]'],
|
||||
] as const)('formats table names for %s', (driver, expected) => {
|
||||
const dialect = getDialectForDriver(driver);
|
||||
expect(
|
||||
dialect.formatTableName({
|
||||
catalog: driver === 'snowflake' || driver === 'bigquery' || driver === 'sqlserver' ? 'analytics' : null,
|
||||
db: driver === 'sqlite' ? null : 'public',
|
||||
name: 'orders',
|
||||
}),
|
||||
).toBe(expected);
|
||||
});
|
||||
|
||||
it('throws with a supported-driver list for unknown drivers', () => {
|
||||
expect(() => getDialectForDriver('oracle')).toThrow(
|
||||
'Unsupported warehouse driver "oracle". Supported drivers: bigquery, clickhouse, mysql, postgres, postgresql, sqlite, sqlite3, snowflake, sqlserver',
|
||||
);
|
||||
});
|
||||
});
|
||||
102
packages/cli/src/context/connections/dialects.ts
Normal file
102
packages/cli/src/context/connections/dialects.ts
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../scan/types.js';
|
||||
|
||||
type SupportedDriver =
|
||||
| 'postgres'
|
||||
| 'postgresql'
|
||||
| 'mysql'
|
||||
| 'sqlserver'
|
||||
| 'snowflake'
|
||||
| 'bigquery'
|
||||
| 'clickhouse'
|
||||
| 'sqlite'
|
||||
| 'sqlite3';
|
||||
|
||||
export interface KtxDialect {
|
||||
readonly type: SupportedDriver;
|
||||
quoteIdentifier(identifier: string): string;
|
||||
formatTableName(table: KtxTableRef): string;
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType;
|
||||
}
|
||||
|
||||
const supportedDrivers: SupportedDriver[] = [
|
||||
'bigquery',
|
||||
'clickhouse',
|
||||
'mysql',
|
||||
'postgres',
|
||||
'postgresql',
|
||||
'sqlite',
|
||||
'sqlite3',
|
||||
'snowflake',
|
||||
'sqlserver',
|
||||
];
|
||||
|
||||
function doubleQuoted(identifier: string): string {
|
||||
return `"${identifier.replace(/"/g, '""')}"`;
|
||||
}
|
||||
|
||||
function backtickQuoted(identifier: string): string {
|
||||
return `\`${identifier.replace(/`/g, '``')}\``;
|
||||
}
|
||||
|
||||
function bigQueryQuoted(identifier: string): string {
|
||||
return `\`${identifier.replace(/`/g, '\\`')}\``;
|
||||
}
|
||||
|
||||
function bracketQuoted(identifier: string): string {
|
||||
return `[${identifier.replace(/\]/g, ']]')}]`;
|
||||
}
|
||||
|
||||
function inferDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
const normalized = nativeType.toLowerCase().trim();
|
||||
if (normalized.includes('date') || normalized.includes('time')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalized.includes('int') ||
|
||||
normalized.includes('num') ||
|
||||
normalized.includes('dec') ||
|
||||
normalized.includes('float') ||
|
||||
normalized.includes('double') ||
|
||||
normalized.includes('real')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalized.includes('bool') || normalized === 'bit') {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
function formatWithParts(table: KtxTableRef, quote: (identifier: string) => string, sqlite = false): string {
|
||||
const parts = sqlite ? [table.name] : [table.catalog, table.db, table.name].filter((part): part is string => !!part);
|
||||
return parts.map(quote).join('.');
|
||||
}
|
||||
|
||||
function createDialect(type: SupportedDriver, quote: (identifier: string) => string, sqlite = false): KtxDialect {
|
||||
return {
|
||||
type,
|
||||
quoteIdentifier: quote,
|
||||
formatTableName: (table) => formatWithParts(table, quote, sqlite),
|
||||
mapToDimensionType: inferDimensionType,
|
||||
};
|
||||
}
|
||||
|
||||
const dialects: Record<SupportedDriver, KtxDialect> = {
|
||||
postgres: createDialect('postgres', doubleQuoted),
|
||||
postgresql: createDialect('postgresql', doubleQuoted),
|
||||
mysql: createDialect('mysql', backtickQuoted),
|
||||
clickhouse: createDialect('clickhouse', backtickQuoted),
|
||||
sqlite: createDialect('sqlite', doubleQuoted, true),
|
||||
sqlite3: createDialect('sqlite3', doubleQuoted, true),
|
||||
snowflake: createDialect('snowflake', doubleQuoted),
|
||||
bigquery: createDialect('bigquery', bigQueryQuoted),
|
||||
sqlserver: createDialect('sqlserver', bracketQuoted),
|
||||
};
|
||||
|
||||
export function getDialectForDriver(driver: string): KtxDialect {
|
||||
const normalized = driver.toLowerCase().trim();
|
||||
if (normalized in dialects) {
|
||||
return dialects[normalized as SupportedDriver];
|
||||
}
|
||||
throw new Error(`Unsupported warehouse driver "${driver}". Supported drivers: ${supportedDrivers.join(', ')}`);
|
||||
}
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createDefaultLocalQueryExecutor } from './local-query-executor.js';
|
||||
|
||||
describe('createDefaultLocalQueryExecutor', () => {
|
||||
it('dispatches postgres and sqlite drivers to their executors', async () => {
|
||||
const postgres = {
|
||||
execute: vi.fn(async () => ({
|
||||
headers: ['pg'],
|
||||
rows: [[1]],
|
||||
totalRows: 1,
|
||||
command: 'SELECT',
|
||||
rowCount: 1,
|
||||
})),
|
||||
};
|
||||
const sqlite = {
|
||||
execute: vi.fn(async () => ({
|
||||
headers: ['sqlite'],
|
||||
rows: [[2]],
|
||||
totalRows: 1,
|
||||
command: 'SELECT',
|
||||
rowCount: 1,
|
||||
})),
|
||||
};
|
||||
const executor = createDefaultLocalQueryExecutor({ postgres, sqlite });
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'pg',
|
||||
connection: { driver: 'postgres' },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).resolves.toMatchObject({ headers: ['pg'] });
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'local',
|
||||
connection: { driver: 'sqlite' },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).resolves.toMatchObject({ headers: ['sqlite'] });
|
||||
|
||||
expect(postgres.execute).toHaveBeenCalledTimes(1);
|
||||
expect(sqlite.execute).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('rejects unsupported local execution drivers', async () => {
|
||||
const executor = createDefaultLocalQueryExecutor({
|
||||
postgres: { execute: vi.fn() },
|
||||
sqlite: { execute: vi.fn() },
|
||||
});
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'snowflake' },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).rejects.toThrow('No local query executor is configured for driver "snowflake".');
|
||||
});
|
||||
});
|
||||
34
packages/cli/src/context/connections/local-query-executor.ts
Normal file
34
packages/cli/src/context/connections/local-query-executor.ts
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import { createPostgresQueryExecutor } from './postgres-query-executor.js';
|
||||
import type {
|
||||
KtxSqlQueryExecutionInput,
|
||||
KtxSqlQueryExecutionResult,
|
||||
KtxSqlQueryExecutorPort,
|
||||
} from './query-executor.js';
|
||||
import { createSqliteQueryExecutor } from './sqlite-query-executor.js';
|
||||
|
||||
export interface DefaultLocalQueryExecutorOptions {
|
||||
postgres?: KtxSqlQueryExecutorPort;
|
||||
sqlite?: KtxSqlQueryExecutorPort;
|
||||
}
|
||||
|
||||
function driverFor(input: KtxSqlQueryExecutionInput): string {
|
||||
return String(input.connection?.driver ?? '').toLowerCase();
|
||||
}
|
||||
|
||||
export function createDefaultLocalQueryExecutor(options: DefaultLocalQueryExecutorOptions = {}): KtxSqlQueryExecutorPort {
|
||||
const postgres = options.postgres ?? createPostgresQueryExecutor();
|
||||
const sqlite = options.sqlite ?? createSqliteQueryExecutor();
|
||||
|
||||
return {
|
||||
async execute(input: KtxSqlQueryExecutionInput): Promise<KtxSqlQueryExecutionResult> {
|
||||
const driver = driverFor(input);
|
||||
if (driver === 'postgres' || driver === 'postgresql') {
|
||||
return postgres.execute(input);
|
||||
}
|
||||
if (driver === 'sqlite' || driver === 'sqlite3') {
|
||||
return sqlite.execute(input);
|
||||
}
|
||||
throw new Error(`No local query executor is configured for driver "${input.connection?.driver ?? 'unknown'}".`);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
localConnectionInfoFromConfig,
|
||||
localConnectionToWarehouseDescriptor,
|
||||
localConnectionTypeForConfig,
|
||||
} from './local-warehouse-descriptor.js';
|
||||
|
||||
describe('localConnectionToWarehouseDescriptor', () => {
|
||||
it('maps local Postgres URLs to canonical warehouse descriptors', () => {
|
||||
expect(
|
||||
localConnectionToWarehouseDescriptor('warehouse', {
|
||||
driver: 'postgres',
|
||||
url: 'postgresql://readonly@db.example.test/analytics',
|
||||
}),
|
||||
).toMatchObject({
|
||||
id: 'warehouse',
|
||||
connection_type: 'POSTGRESQL',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
});
|
||||
});
|
||||
|
||||
it('maps BigQuery project and dataset from explicit fields', () => {
|
||||
expect(
|
||||
localConnectionToWarehouseDescriptor('bq', {
|
||||
driver: 'bigquery',
|
||||
project_id: 'acme',
|
||||
dataset_id: 'warehouse',
|
||||
}),
|
||||
).toMatchObject({
|
||||
id: 'bq',
|
||||
connection_type: 'BIGQUERY',
|
||||
project_id: 'acme',
|
||||
dataset_id: 'warehouse',
|
||||
});
|
||||
});
|
||||
|
||||
it('returns null for non-warehouse adapters', () => {
|
||||
expect(
|
||||
localConnectionToWarehouseDescriptor('looker', {
|
||||
driver: 'looker',
|
||||
base_url: 'https://looker.example.com',
|
||||
client_id: 'client',
|
||||
}),
|
||||
).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('local connection info helpers', () => {
|
||||
it('returns canonical warehouse connection types for local catalogs', () => {
|
||||
expect(localConnectionTypeForConfig('warehouse', { driver: 'postgres' })).toBe('POSTGRESQL');
|
||||
expect(localConnectionTypeForConfig('bq', { driver: 'bigquery', project_id: 'acme' })).toBe('BIGQUERY');
|
||||
expect(localConnectionTypeForConfig('snowflake', { driver: 'snowflake' })).toBe('SNOWFLAKE');
|
||||
});
|
||||
|
||||
it('keeps non-warehouse adapter labels for display-only local connection surfaces', () => {
|
||||
expect(localConnectionTypeForConfig('prod-metabase', { driver: 'metabase', api_url: 'https://metabase.example.com' })).toBe(
|
||||
'metabase',
|
||||
);
|
||||
expect(localConnectionTypeForConfig('missing-driver', {} as never)).toBe('unknown');
|
||||
});
|
||||
|
||||
it('builds nullable local connection info records', () => {
|
||||
expect(localConnectionInfoFromConfig('warehouse', { driver: 'postgres' })).toEqual({
|
||||
id: 'warehouse',
|
||||
name: 'warehouse',
|
||||
connectionType: 'POSTGRESQL',
|
||||
});
|
||||
expect(localConnectionInfoFromConfig('missing', undefined)).toBeNull();
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
import type { KtxProjectConnectionConfig } from '../project/config.js';
|
||||
import type { ConnectionType } from './connection-type.js';
|
||||
|
||||
export interface LocalWarehouseDescriptor {
|
||||
id: string;
|
||||
connection_type: ConnectionType;
|
||||
host?: string | null;
|
||||
database?: string | null;
|
||||
account?: string | null;
|
||||
project_id?: string | null;
|
||||
dataset_id?: string | null;
|
||||
connection_params: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface LocalConnectionInfo {
|
||||
id: string;
|
||||
name: string;
|
||||
connectionType: string;
|
||||
}
|
||||
|
||||
const DRIVER_TO_CONNECTION_TYPE: Record<string, ConnectionType> = {
|
||||
postgres: 'POSTGRESQL',
|
||||
postgresql: 'POSTGRESQL',
|
||||
sqlite: 'SQLITE',
|
||||
sqlserver: 'SQLSERVER',
|
||||
mssql: 'SQLSERVER',
|
||||
mysql: 'MYSQL',
|
||||
clickhouse: 'CLICKHOUSE',
|
||||
snowflake: 'SNOWFLAKE',
|
||||
bigquery: 'BIGQUERY',
|
||||
};
|
||||
|
||||
export function localConnectionToWarehouseDescriptor(
|
||||
id: string,
|
||||
connection: KtxProjectConnectionConfig | undefined,
|
||||
): LocalWarehouseDescriptor | null {
|
||||
if (!connection) {
|
||||
return null;
|
||||
}
|
||||
const connectionType = DRIVER_TO_CONNECTION_TYPE[String(connection.driver ?? '').toLowerCase()];
|
||||
if (!connectionType) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const info: LocalWarehouseDescriptor = {
|
||||
id,
|
||||
connection_type: connectionType,
|
||||
connection_params: { ...connection },
|
||||
};
|
||||
const url = typeof connection.url === 'string' ? connection.url : null;
|
||||
if (url && !url.startsWith('env:') && !url.startsWith('file:')) {
|
||||
try {
|
||||
const parsed = new URL(url);
|
||||
info.host = parsed.hostname || null;
|
||||
if (parsed.pathname.length > 1) {
|
||||
const [first, second] = parsed.pathname.slice(1).split('/');
|
||||
if (connectionType === 'BIGQUERY') {
|
||||
info.project_id = stringField(connection.project_id) ?? parsed.hostname ?? first ?? null;
|
||||
info.dataset_id = stringField(connection.dataset_id) ?? second ?? null;
|
||||
} else {
|
||||
info.database = first ?? null;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
info.host = stringField(connection.host);
|
||||
}
|
||||
}
|
||||
|
||||
info.host = stringField(connection.host) ?? info.host ?? null;
|
||||
info.database = stringField(connection.database) ?? info.database ?? null;
|
||||
info.account = stringField(connection.account) ?? null;
|
||||
info.project_id = stringField(connection.project_id) ?? info.project_id ?? null;
|
||||
info.dataset_id = stringField(connection.dataset_id) ?? info.dataset_id ?? null;
|
||||
return info;
|
||||
}
|
||||
|
||||
export function localConnectionTypeForConfig(id: string, connection: KtxProjectConnectionConfig | undefined): string {
|
||||
const descriptor = localConnectionToWarehouseDescriptor(id, connection);
|
||||
if (descriptor) {
|
||||
return descriptor.connection_type;
|
||||
}
|
||||
const driver = typeof connection?.driver === 'string' ? connection.driver.trim() : '';
|
||||
return driver.length > 0 ? driver : 'unknown';
|
||||
}
|
||||
|
||||
export function localConnectionInfoFromConfig(
|
||||
id: string,
|
||||
connection: KtxProjectConnectionConfig | undefined,
|
||||
): LocalConnectionInfo | null {
|
||||
if (!connection) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
id,
|
||||
name: id,
|
||||
connectionType: localConnectionTypeForConfig(id, connection),
|
||||
};
|
||||
}
|
||||
|
||||
function stringField(value: unknown): string | null {
|
||||
return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null;
|
||||
}
|
||||
157
packages/cli/src/context/connections/notion-config.test.ts
Normal file
157
packages/cli/src/context/connections/notion-config.test.ts
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import {
|
||||
notionConnectionToPullConfig,
|
||||
parseNotionConnectionConfig,
|
||||
redactNotionConnectionConfig,
|
||||
resolveNotionAuthToken,
|
||||
} from './notion-config.js';
|
||||
|
||||
describe('standalone Notion connection config', () => {
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-notion-config-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('parses selected-root Notion config with safe defaults', () => {
|
||||
const parsed = parseNotionConnectionConfig({
|
||||
driver: 'notion',
|
||||
auth_token_ref: 'env:NOTION_TOKEN',
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: ['page-1'],
|
||||
});
|
||||
|
||||
expect(parsed).toEqual({
|
||||
driver: 'notion',
|
||||
auth_token: null,
|
||||
auth_token_ref: 'env:NOTION_TOKEN',
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: ['page-1'],
|
||||
root_database_ids: [],
|
||||
root_data_source_ids: [],
|
||||
max_pages_per_run: 1000,
|
||||
max_knowledge_creates_per_run: 25,
|
||||
max_knowledge_updates_per_run: 20,
|
||||
});
|
||||
expect(parsed).not.toHaveProperty('last_successful_cursor');
|
||||
});
|
||||
|
||||
it('parses inline Notion auth tokens without requiring auth_token_ref', () => {
|
||||
const parsed = parseNotionConnectionConfig({
|
||||
driver: 'notion',
|
||||
auth_token: ' ntn_inline_token ',
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: ['page-1'],
|
||||
});
|
||||
|
||||
expect(parsed).toMatchObject({
|
||||
driver: 'notion',
|
||||
auth_token: 'ntn_inline_token',
|
||||
auth_token_ref: null,
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: ['page-1'],
|
||||
});
|
||||
});
|
||||
|
||||
it('redacts token references from display output', () => {
|
||||
expect(
|
||||
redactNotionConnectionConfig(
|
||||
parseNotionConnectionConfig({
|
||||
driver: 'notion',
|
||||
auth_token_ref: 'file:/Users/example/.config/notion-token',
|
||||
crawl_mode: 'all_accessible',
|
||||
max_pages_per_run: 80,
|
||||
}),
|
||||
),
|
||||
).toEqual({
|
||||
driver: 'notion',
|
||||
hasAuthToken: true,
|
||||
crawlMode: 'all_accessible',
|
||||
rootPageIds: [],
|
||||
rootDatabaseIds: [],
|
||||
rootDataSourceIds: [],
|
||||
maxPagesPerRun: 80,
|
||||
maxKnowledgeCreatesPerRun: 25,
|
||||
maxKnowledgeUpdatesPerRun: 20,
|
||||
warning: 'Anything accessible to this Notion integration can become organization knowledge.',
|
||||
});
|
||||
});
|
||||
|
||||
it('requires at least one selected root in selected_roots mode', () => {
|
||||
expect(() =>
|
||||
parseNotionConnectionConfig({
|
||||
driver: 'notion',
|
||||
auth_token_ref: 'env:NOTION_TOKEN',
|
||||
crawl_mode: 'selected_roots',
|
||||
}),
|
||||
).toThrow('selected_roots requires at least one root page, database, or data source id');
|
||||
});
|
||||
|
||||
it('resolves env and file token references without exposing the reference in errors', async () => {
|
||||
const tokenPath = join(tempDir, 'notion-token.txt');
|
||||
await writeFile(tokenPath, 'ntn_file_token\n', 'utf-8');
|
||||
|
||||
await expect(
|
||||
resolveNotionAuthToken('env:NOTION_TOKEN', {
|
||||
env: { NOTION_TOKEN: 'ntn_env_token' },
|
||||
}),
|
||||
).resolves.toBe('ntn_env_token');
|
||||
await expect(resolveNotionAuthToken(`file:${tokenPath}`)).resolves.toBe('ntn_file_token');
|
||||
await expect(resolveNotionAuthToken('env:MISSING_NOTION_TOKEN', { env: {} })).rejects.toThrow(
|
||||
'Notion token environment variable MISSING_NOTION_TOKEN is not set',
|
||||
);
|
||||
});
|
||||
|
||||
it('converts standalone config into adapter pull config', async () => {
|
||||
const pullConfig = await notionConnectionToPullConfig(
|
||||
parseNotionConnectionConfig({
|
||||
driver: 'notion',
|
||||
auth_token_ref: 'env:NOTION_TOKEN',
|
||||
crawl_mode: 'all_accessible',
|
||||
max_pages_per_run: 12,
|
||||
max_knowledge_creates_per_run: 2,
|
||||
max_knowledge_updates_per_run: 7,
|
||||
last_successful_cursor: '{"phase":"all_accessible_pages","cursor":"cursor-1"}',
|
||||
}),
|
||||
{ env: { NOTION_TOKEN: 'ntn_env_token' } },
|
||||
);
|
||||
|
||||
expect(pullConfig).toEqual({
|
||||
authToken: 'ntn_env_token',
|
||||
crawlMode: 'all_accessible',
|
||||
rootPageIds: [],
|
||||
rootDatabaseIds: [],
|
||||
rootDataSourceIds: [],
|
||||
maxPagesPerRun: 12,
|
||||
maxKnowledgeCreatesPerRun: 2,
|
||||
maxKnowledgeUpdatesPerRun: 7,
|
||||
lastSuccessfulCursor: null,
|
||||
});
|
||||
});
|
||||
|
||||
it('uses inline Notion auth_token when building adapter pull config', async () => {
|
||||
const pullConfig = await notionConnectionToPullConfig(
|
||||
parseNotionConnectionConfig({
|
||||
driver: 'notion',
|
||||
auth_token: 'ntn_inline_token',
|
||||
auth_token_ref: 'env:STALE_NOTION_TOKEN',
|
||||
crawl_mode: 'all_accessible',
|
||||
}),
|
||||
{
|
||||
env: {},
|
||||
readTextFile: async () => {
|
||||
throw new Error('readTextFile should not be called for inline auth_token');
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
expect(pullConfig.authToken).toBe('ntn_inline_token');
|
||||
});
|
||||
});
|
||||
224
packages/cli/src/context/connections/notion-config.ts
Normal file
224
packages/cli/src/context/connections/notion-config.ts
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
import { readFile } from 'node:fs/promises';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import {
|
||||
NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN,
|
||||
type NotionPullConfig,
|
||||
notionPullConfigSchema,
|
||||
} from '../ingest/adapters/notion/types.js';
|
||||
import type { KtxProjectConnectionConfig } from '../project/config.js';
|
||||
|
||||
const KTX_NOTION_ORG_KNOWLEDGE_WARNING =
|
||||
'Anything accessible to this Notion integration can become organization knowledge.';
|
||||
|
||||
type KtxNotionCrawlMode = 'all_accessible' | 'selected_roots';
|
||||
|
||||
type RawKtxNotionConnectionConfig = Extract<KtxProjectConnectionConfig, { driver: 'notion' }>;
|
||||
|
||||
export type KtxNotionConnectionConfig = Omit<
|
||||
RawKtxNotionConnectionConfig,
|
||||
| 'auth_token'
|
||||
| 'auth_token_ref'
|
||||
| 'crawl_mode'
|
||||
| 'root_page_ids'
|
||||
| 'root_database_ids'
|
||||
| 'root_data_source_ids'
|
||||
| 'max_pages_per_run'
|
||||
| 'max_knowledge_creates_per_run'
|
||||
| 'max_knowledge_updates_per_run'
|
||||
> & {
|
||||
driver: 'notion';
|
||||
auth_token: string | null;
|
||||
auth_token_ref: string | null;
|
||||
crawl_mode: KtxNotionCrawlMode;
|
||||
root_page_ids: string[];
|
||||
root_database_ids: string[];
|
||||
root_data_source_ids: string[];
|
||||
max_pages_per_run: number;
|
||||
max_knowledge_creates_per_run: number;
|
||||
max_knowledge_updates_per_run: number;
|
||||
};
|
||||
|
||||
/** @internal */
|
||||
export interface RedactedKtxNotionConnectionConfig {
|
||||
driver: 'notion';
|
||||
hasAuthToken: boolean;
|
||||
crawlMode: KtxNotionCrawlMode;
|
||||
rootPageIds: string[];
|
||||
rootDatabaseIds: string[];
|
||||
rootDataSourceIds: string[];
|
||||
maxPagesPerRun: number;
|
||||
maxKnowledgeCreatesPerRun: number;
|
||||
maxKnowledgeUpdatesPerRun: number;
|
||||
warning: typeof KTX_NOTION_ORG_KNOWLEDGE_WARNING;
|
||||
}
|
||||
|
||||
interface ResolveNotionTokenOptions {
|
||||
env?: Record<string, string | undefined>;
|
||||
readTextFile?: (path: string) => Promise<string>;
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function record(value: unknown): Record<string, unknown> {
|
||||
if (!isRecord(value)) {
|
||||
throw new Error('Notion connection config must be an object');
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function stringValue(value: unknown, fallback: string): string {
|
||||
return typeof value === 'string' && value.trim().length > 0 ? value.trim() : fallback;
|
||||
}
|
||||
|
||||
function optionalString(value: unknown): string | null {
|
||||
return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null;
|
||||
}
|
||||
|
||||
function stringArray(value: unknown): string[] {
|
||||
if (!Array.isArray(value)) {
|
||||
return [];
|
||||
}
|
||||
return value.filter((item): item is string => typeof item === 'string' && item.trim().length > 0);
|
||||
}
|
||||
|
||||
function integerWithFallback(value: unknown, fallback: number, name: string): number {
|
||||
if (value === undefined || value === null) {
|
||||
return fallback;
|
||||
}
|
||||
if (typeof value !== 'number' || !Number.isInteger(value)) {
|
||||
throw new Error(`${name} must be an integer`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function boundedInteger(value: unknown, fallback: number, name: string, min: number, max: number): number {
|
||||
const parsed = integerWithFallback(value, fallback, name);
|
||||
if (parsed < min || parsed > max) {
|
||||
throw new Error(`${name} must be between ${min} and ${max}`);
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
export function parseNotionConnectionConfig(raw: unknown): KtxNotionConnectionConfig {
|
||||
const input = record(raw);
|
||||
if (input.driver !== 'notion') {
|
||||
throw new Error('Notion connection config requires driver: notion');
|
||||
}
|
||||
const authToken = optionalString(input.auth_token);
|
||||
const authTokenRef = optionalString(input.auth_token_ref);
|
||||
if (!authToken && !authTokenRef) {
|
||||
throw new Error('Notion connection config requires auth_token or auth_token_ref');
|
||||
}
|
||||
if (authTokenRef && !authTokenRef.startsWith('env:') && !authTokenRef.startsWith('file:')) {
|
||||
throw new Error('Notion auth_token_ref must use env:NAME or file:/path');
|
||||
}
|
||||
|
||||
const crawlMode = stringValue(input.crawl_mode, 'selected_roots');
|
||||
if (crawlMode !== 'selected_roots' && crawlMode !== 'all_accessible') {
|
||||
throw new Error(`Unsupported Notion crawl_mode: ${crawlMode}`);
|
||||
}
|
||||
const rootPageIds = stringArray(input.root_page_ids);
|
||||
const rootDatabaseIds = stringArray(input.root_database_ids);
|
||||
const rootDataSourceIds = stringArray(input.root_data_source_ids);
|
||||
if (crawlMode === 'selected_roots' && rootPageIds.length + rootDatabaseIds.length + rootDataSourceIds.length === 0) {
|
||||
throw new Error('selected_roots requires at least one root page, database, or data source id');
|
||||
}
|
||||
|
||||
return {
|
||||
driver: 'notion',
|
||||
auth_token: authToken,
|
||||
auth_token_ref: authTokenRef,
|
||||
crawl_mode: crawlMode,
|
||||
root_page_ids: rootPageIds,
|
||||
root_database_ids: rootDatabaseIds,
|
||||
root_data_source_ids: rootDataSourceIds,
|
||||
max_pages_per_run: boundedInteger(input.max_pages_per_run, 1000, 'max_pages_per_run', 1, 10_000),
|
||||
max_knowledge_creates_per_run: boundedInteger(
|
||||
input.max_knowledge_creates_per_run,
|
||||
NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN,
|
||||
'max_knowledge_creates_per_run',
|
||||
0,
|
||||
25,
|
||||
),
|
||||
max_knowledge_updates_per_run: boundedInteger(
|
||||
input.max_knowledge_updates_per_run,
|
||||
20,
|
||||
'max_knowledge_updates_per_run',
|
||||
0,
|
||||
100,
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function redactNotionConnectionConfig(config: KtxNotionConnectionConfig): RedactedKtxNotionConnectionConfig {
|
||||
return {
|
||||
driver: 'notion',
|
||||
hasAuthToken: Boolean(config.auth_token ?? config.auth_token_ref),
|
||||
crawlMode: config.crawl_mode,
|
||||
rootPageIds: config.root_page_ids,
|
||||
rootDatabaseIds: config.root_database_ids,
|
||||
rootDataSourceIds: config.root_data_source_ids,
|
||||
maxPagesPerRun: config.max_pages_per_run,
|
||||
maxKnowledgeCreatesPerRun: config.max_knowledge_creates_per_run,
|
||||
maxKnowledgeUpdatesPerRun: config.max_knowledge_updates_per_run,
|
||||
warning: KTX_NOTION_ORG_KNOWLEDGE_WARNING,
|
||||
};
|
||||
}
|
||||
|
||||
function expandHome(path: string): string {
|
||||
return path === '~' || path.startsWith('~/') ? resolve(homedir(), path.slice(2)) : path;
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export async function resolveNotionAuthToken(
|
||||
authTokenRef: string,
|
||||
options: ResolveNotionTokenOptions = {},
|
||||
): Promise<string> {
|
||||
if (authTokenRef.startsWith('env:')) {
|
||||
const envName = authTokenRef.slice('env:'.length);
|
||||
const value = (options.env ?? process.env)[envName];
|
||||
if (!value) {
|
||||
throw new Error(`Notion token environment variable ${envName} is not set`);
|
||||
}
|
||||
return value.trim();
|
||||
}
|
||||
if (authTokenRef.startsWith('file:')) {
|
||||
const path = expandHome(authTokenRef.slice('file:'.length));
|
||||
const readTextFile = options.readTextFile ?? ((filePath: string) => readFile(filePath, 'utf-8'));
|
||||
const value = (await readTextFile(path)).trim();
|
||||
if (!value) {
|
||||
throw new Error(`Notion token file is empty: ${path}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
throw new Error('Notion auth_token_ref must use env:NAME or file:/path');
|
||||
}
|
||||
|
||||
export async function resolveNotionConnectionAuthToken(
|
||||
config: Pick<KtxNotionConnectionConfig, 'auth_token' | 'auth_token_ref'>,
|
||||
options: ResolveNotionTokenOptions = {},
|
||||
): Promise<string> {
|
||||
return config.auth_token ?? (await resolveNotionAuthToken(config.auth_token_ref ?? '', options));
|
||||
}
|
||||
|
||||
export async function notionConnectionToPullConfig(
|
||||
config: KtxNotionConnectionConfig,
|
||||
options: ResolveNotionTokenOptions = {},
|
||||
): Promise<NotionPullConfig> {
|
||||
const authToken = await resolveNotionConnectionAuthToken(config, options);
|
||||
return notionPullConfigSchema.parse({
|
||||
authToken,
|
||||
crawlMode: config.crawl_mode,
|
||||
rootPageIds: config.root_page_ids,
|
||||
rootDatabaseIds: config.root_database_ids,
|
||||
rootDataSourceIds: config.root_data_source_ids,
|
||||
maxPagesPerRun: config.max_pages_per_run,
|
||||
maxKnowledgeCreatesPerRun: config.max_knowledge_creates_per_run,
|
||||
maxKnowledgeUpdatesPerRun: config.max_knowledge_updates_per_run,
|
||||
lastSuccessfulCursor: null,
|
||||
});
|
||||
}
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createPostgresQueryExecutor } from './postgres-query-executor.js';
|
||||
|
||||
function makeClient() {
|
||||
const calls: unknown[] = [];
|
||||
const client = {
|
||||
connect: vi.fn(async () => undefined),
|
||||
query: vi.fn(async (input: unknown) => {
|
||||
calls.push(input);
|
||||
if (input === 'BEGIN READ ONLY') {
|
||||
return { rows: [], fields: [], rowCount: null, command: 'BEGIN' };
|
||||
}
|
||||
if (input === 'COMMIT') {
|
||||
return { rows: [], fields: [], rowCount: null, command: 'COMMIT' };
|
||||
}
|
||||
return {
|
||||
rows: [
|
||||
['paid', 2],
|
||||
['open', 1],
|
||||
],
|
||||
fields: [{ name: 'status' }, { name: 'order_count' }],
|
||||
rowCount: 2,
|
||||
command: 'SELECT',
|
||||
};
|
||||
}),
|
||||
end: vi.fn(async () => undefined),
|
||||
};
|
||||
return { client, calls };
|
||||
}
|
||||
|
||||
describe('createPostgresQueryExecutor', () => {
|
||||
it('runs a read-only transaction in array row mode and closes the client', async () => {
|
||||
const { client, calls } = makeClient();
|
||||
const executor = createPostgresQueryExecutor({
|
||||
clientFactory: vi.fn(() => client),
|
||||
});
|
||||
|
||||
const result = await executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'postgres', url: 'postgres://example/db' },
|
||||
sql: 'select status, count(*) as order_count from public.orders group by status',
|
||||
maxRows: 50,
|
||||
});
|
||||
|
||||
expect(client.connect).toHaveBeenCalledTimes(1);
|
||||
expect(calls[0]).toBe('BEGIN READ ONLY');
|
||||
expect(calls[1]).toEqual({
|
||||
text: 'select * from (select status, count(*) as order_count from public.orders group by status) as ktx_query_result limit 50',
|
||||
rowMode: 'array',
|
||||
});
|
||||
expect(calls[2]).toBe('COMMIT');
|
||||
expect(client.end).toHaveBeenCalledTimes(1);
|
||||
expect(result).toEqual({
|
||||
headers: ['status', 'order_count'],
|
||||
rows: [
|
||||
['paid', 2],
|
||||
['open', 1],
|
||||
],
|
||||
totalRows: 2,
|
||||
command: 'SELECT',
|
||||
rowCount: 2,
|
||||
});
|
||||
});
|
||||
|
||||
it('rolls back and closes the client when query execution fails', async () => {
|
||||
const client = {
|
||||
connect: vi.fn(async () => undefined),
|
||||
query: vi.fn(async (input: unknown) => {
|
||||
if (input === 'BEGIN READ ONLY' || input === 'ROLLBACK') {
|
||||
return { rows: [], fields: [], rowCount: null, command: String(input) };
|
||||
}
|
||||
throw new Error('syntax error');
|
||||
}),
|
||||
end: vi.fn(async () => undefined),
|
||||
};
|
||||
const executor = createPostgresQueryExecutor({
|
||||
clientFactory: vi.fn(() => client),
|
||||
});
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'postgres', url: 'postgres://example/db' },
|
||||
sql: 'select * from broken',
|
||||
maxRows: 10,
|
||||
}),
|
||||
).rejects.toThrow('syntax error');
|
||||
expect(client.query).toHaveBeenCalledWith('ROLLBACK');
|
||||
expect(client.end).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('requires a Postgres url', async () => {
|
||||
const executor = createPostgresQueryExecutor({ clientFactory: vi.fn() });
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'postgres' },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).rejects.toThrow('Local Postgres execution requires connections.warehouse.url');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
import { Client, type ClientConfig } from 'pg';
|
||||
import type {
|
||||
KtxSqlQueryExecutionInput,
|
||||
KtxSqlQueryExecutionResult,
|
||||
KtxSqlQueryExecutorPort,
|
||||
} from './query-executor.js';
|
||||
import { limitSqlForExecution } from './read-only-sql.js';
|
||||
|
||||
interface PgClientLike {
|
||||
connect(): Promise<unknown>;
|
||||
query(input: string | { text: string; rowMode: 'array' }): Promise<{
|
||||
fields: Array<{ name: string }>;
|
||||
rows: unknown[][];
|
||||
command: string;
|
||||
rowCount: number | null;
|
||||
}>;
|
||||
end(): Promise<void>;
|
||||
}
|
||||
|
||||
interface PostgresQueryExecutorOptions {
|
||||
statementTimeoutMs?: number;
|
||||
queryTimeoutMs?: number;
|
||||
connectionTimeoutMs?: number;
|
||||
clientFactory?: (config: ClientConfig) => PgClientLike;
|
||||
}
|
||||
|
||||
function connectionDriver(input: KtxSqlQueryExecutionInput): string {
|
||||
return String(input.connection?.driver ?? '').toLowerCase();
|
||||
}
|
||||
|
||||
function createDefaultClient(config: ClientConfig): PgClientLike {
|
||||
return new Client(config);
|
||||
}
|
||||
|
||||
export function createPostgresQueryExecutor(options: PostgresQueryExecutorOptions = {}): KtxSqlQueryExecutorPort {
|
||||
const clientFactory = options.clientFactory ?? createDefaultClient;
|
||||
return {
|
||||
async execute(input: KtxSqlQueryExecutionInput): Promise<KtxSqlQueryExecutionResult> {
|
||||
const driver = connectionDriver(input);
|
||||
const connection = input.connection;
|
||||
if (driver !== 'postgres' && driver !== 'postgresql') {
|
||||
throw new Error(`Local Postgres execution cannot run driver "${connection?.driver ?? 'unknown'}".`);
|
||||
}
|
||||
if (typeof connection?.url !== 'string' || connection.url.trim().length === 0) {
|
||||
throw new Error(`Local Postgres execution requires connections.${input.connectionId}.url.`);
|
||||
}
|
||||
|
||||
const client = clientFactory({
|
||||
connectionString: connection.url,
|
||||
statement_timeout: options.statementTimeoutMs ?? 30_000,
|
||||
query_timeout: options.queryTimeoutMs ?? 35_000,
|
||||
connectionTimeoutMillis: options.connectionTimeoutMs ?? 5_000,
|
||||
application_name: 'ktx-local-query',
|
||||
});
|
||||
await client.connect();
|
||||
try {
|
||||
await client.query('BEGIN READ ONLY');
|
||||
const result = await client.query({
|
||||
text: limitSqlForExecution(input.sql, input.maxRows),
|
||||
rowMode: 'array',
|
||||
});
|
||||
await client.query('COMMIT');
|
||||
return {
|
||||
headers: result.fields.map((field) => field.name),
|
||||
rows: result.rows,
|
||||
totalRows: result.rows.length,
|
||||
command: result.command,
|
||||
rowCount: result.rowCount,
|
||||
};
|
||||
} catch (error) {
|
||||
await client.query('ROLLBACK').catch(() => undefined);
|
||||
throw error;
|
||||
} finally {
|
||||
await client.end();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
25
packages/cli/src/context/connections/query-executor.ts
Normal file
25
packages/cli/src/context/connections/query-executor.ts
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
|
||||
|
||||
export interface KtxSqlQueryExecutionInput {
|
||||
connectionId: string;
|
||||
projectDir?: string;
|
||||
connection: KtxProjectConnectionConfig | undefined;
|
||||
sql: string;
|
||||
maxRows?: number;
|
||||
}
|
||||
|
||||
export interface KtxSqlQueryExecutionResult {
|
||||
headers: string[];
|
||||
rows: unknown[][];
|
||||
totalRows: number;
|
||||
command: string;
|
||||
rowCount: number | null;
|
||||
}
|
||||
|
||||
export interface KtxSqlQueryExecutorPort {
|
||||
execute(input: KtxSqlQueryExecutionInput): Promise<KtxSqlQueryExecutionResult>;
|
||||
}
|
||||
|
||||
export function normalizeQueryRows(rows: unknown[]): unknown[][] {
|
||||
return rows.map((row) => (Array.isArray(row) ? row : Object.values(row as Record<string, unknown>)));
|
||||
}
|
||||
30
packages/cli/src/context/connections/read-only-sql.test.ts
Normal file
30
packages/cli/src/context/connections/read-only-sql.test.ts
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from './read-only-sql.js';
|
||||
|
||||
describe('assertReadOnlySql', () => {
|
||||
it('allows select and with queries', () => {
|
||||
expect(assertReadOnlySql('select * from orders')).toBe('select * from orders');
|
||||
expect(assertReadOnlySql('with paid as (select * from orders) select * from paid')).toContain('with paid');
|
||||
});
|
||||
|
||||
it('rejects mutating statements before opening a database connection', () => {
|
||||
expect(() => assertReadOnlySql('delete from orders')).toThrow(
|
||||
'Only read-only SELECT/WITH queries can be executed locally',
|
||||
);
|
||||
expect(() => assertReadOnlySql('create table x(id int)')).toThrow(
|
||||
'Only read-only SELECT/WITH queries can be executed locally',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('limitSqlForExecution', () => {
|
||||
it('wraps compiled SQL and strips trailing semicolons', () => {
|
||||
expect(limitSqlForExecution('select * from public.orders; ', 25)).toBe(
|
||||
'select * from (select * from public.orders) as ktx_query_result limit 25',
|
||||
);
|
||||
});
|
||||
|
||||
it('returns the trimmed SQL when no maxRows value is provided', () => {
|
||||
expect(limitSqlForExecution('select * from orders; ', undefined)).toBe('select * from orders');
|
||||
});
|
||||
});
|
||||
22
packages/cli/src/context/connections/read-only-sql.ts
Normal file
22
packages/cli/src/context/connections/read-only-sql.ts
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
const MUTATING_SQL =
|
||||
/^\s*(insert|update|delete|merge|alter|drop|create|truncate|grant|revoke|copy|call|do|vacuum|analyze|refresh)\b/i;
|
||||
const READ_SQL = /^\s*(select|with)\b/i;
|
||||
|
||||
export function assertReadOnlySql(sql: string): string {
|
||||
const trimmed = sql.trim();
|
||||
if (!READ_SQL.test(trimmed) || MUTATING_SQL.test(trimmed)) {
|
||||
throw new Error('Only read-only SELECT/WITH queries can be executed locally.');
|
||||
}
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
export function limitSqlForExecution(sql: string, maxRows: number | undefined): string {
|
||||
const trimmed = assertReadOnlySql(sql).replace(/;+\s*$/, '');
|
||||
if (!maxRows) {
|
||||
return trimmed;
|
||||
}
|
||||
if (!Number.isInteger(maxRows) || maxRows <= 0) {
|
||||
throw new Error('maxRows must be a positive integer.');
|
||||
}
|
||||
return `select * from (${trimmed}) as ktx_query_result limit ${maxRows}`;
|
||||
}
|
||||
|
|
@ -0,0 +1,139 @@
|
|||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { writeFileSync } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import Database from 'better-sqlite3';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { createSqliteQueryExecutor, sqliteDatabasePathFromConnection } from './sqlite-query-executor.js';
|
||||
|
||||
describe('createSqliteQueryExecutor', () => {
|
||||
let tempDir: string;
|
||||
let dbPath: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-sqlite-query-'));
|
||||
dbPath = join(tempDir, 'warehouse.db');
|
||||
const db = new Database(dbPath);
|
||||
db.exec(`
|
||||
CREATE TABLE orders (
|
||||
id INTEGER PRIMARY KEY,
|
||||
status TEXT NOT NULL,
|
||||
amount INTEGER NOT NULL
|
||||
);
|
||||
INSERT INTO orders (status, amount) VALUES
|
||||
('paid', 20),
|
||||
('paid', 30),
|
||||
('open', 10);
|
||||
`);
|
||||
db.close();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('executes read-only SELECT SQL against a relative SQLite path', async () => {
|
||||
const executor = createSqliteQueryExecutor();
|
||||
|
||||
const result = await executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', path: 'warehouse.db' },
|
||||
sql: 'select status, count(*) as order_count from orders group by status order by status',
|
||||
maxRows: 10,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
headers: ['status', 'order_count'],
|
||||
rows: [
|
||||
['open', 1],
|
||||
['paid', 2],
|
||||
],
|
||||
totalRows: 2,
|
||||
command: 'SELECT',
|
||||
rowCount: 2,
|
||||
});
|
||||
});
|
||||
|
||||
it('supports file urls for SQLite database paths', async () => {
|
||||
expect(
|
||||
sqliteDatabasePathFromConnection({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', url: `file://${dbPath}` },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
});
|
||||
|
||||
it('resolves file references for SQLite path fields', async () => {
|
||||
const pointerPath = join(tempDir, 'sqlite-path.txt');
|
||||
writeFileSync(pointerPath, dbPath, 'utf-8');
|
||||
|
||||
expect(
|
||||
sqliteDatabasePathFromConnection({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', path: `file:${pointerPath}` },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
});
|
||||
|
||||
it('resolves env references for SQLite database urls', async () => {
|
||||
const originalDatabaseUrl = process.env.KTX_SQLITE_TEST_URL;
|
||||
process.env.KTX_SQLITE_TEST_URL = `sqlite:${dbPath}`;
|
||||
|
||||
try {
|
||||
expect(
|
||||
sqliteDatabasePathFromConnection({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', url: 'env:KTX_SQLITE_TEST_URL' },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
} finally {
|
||||
if (originalDatabaseUrl === undefined) {
|
||||
delete process.env.KTX_SQLITE_TEST_URL;
|
||||
} else {
|
||||
process.env.KTX_SQLITE_TEST_URL = originalDatabaseUrl;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects mutating SQL before opening the database', async () => {
|
||||
const executor = createSqliteQueryExecutor();
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', path: 'warehouse.db' },
|
||||
sql: 'delete from orders',
|
||||
}),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
});
|
||||
|
||||
it('requires a SQLite driver and a database path', async () => {
|
||||
const executor = createSqliteQueryExecutor();
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'postgres', path: 'warehouse.db' },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).rejects.toThrow('Local SQLite execution cannot run driver "postgres"');
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite' },
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).rejects.toThrow('Local SQLite execution requires connections.warehouse.path or connections.warehouse.url');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
import { isAbsolute, resolve } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import Database from 'better-sqlite3';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import type {
|
||||
KtxSqlQueryExecutionInput,
|
||||
KtxSqlQueryExecutionResult,
|
||||
KtxSqlQueryExecutorPort,
|
||||
} from './query-executor.js';
|
||||
import { normalizeQueryRows } from './query-executor.js';
|
||||
import { limitSqlForExecution } from './read-only-sql.js';
|
||||
|
||||
type SqliteConnectionConfig = Record<string, unknown> | undefined;
|
||||
|
||||
function connectionDriver(input: KtxSqlQueryExecutionInput): string {
|
||||
return String(input.connection?.driver ?? '').toLowerCase();
|
||||
}
|
||||
|
||||
function stringConfigValue(connection: SqliteConnectionConfig, key: string): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(key, value.trim()) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(key: string, value: string): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return process.env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (key !== 'url' && value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function sqlitePathFromUrl(url: string): string {
|
||||
if (url.startsWith('file:')) {
|
||||
return fileURLToPath(url);
|
||||
}
|
||||
|
||||
if (url.startsWith('sqlite:')) {
|
||||
const parsed = new URL(url);
|
||||
if (parsed.pathname.length > 0) {
|
||||
return decodeURIComponent(parsed.pathname);
|
||||
}
|
||||
}
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function sqliteDatabasePathFromConnection(input: KtxSqlQueryExecutionInput): string {
|
||||
const driver = connectionDriver(input);
|
||||
if (driver !== 'sqlite' && driver !== 'sqlite3') {
|
||||
throw new Error(`Local SQLite execution cannot run driver "${input.connection?.driver ?? 'unknown'}".`);
|
||||
}
|
||||
|
||||
const pathValue = stringConfigValue(input.connection, 'path');
|
||||
const urlValue = stringConfigValue(input.connection, 'url');
|
||||
if (!pathValue && !urlValue) {
|
||||
throw new Error(
|
||||
`Local SQLite execution requires connections.${input.connectionId}.path or connections.${input.connectionId}.url.`,
|
||||
);
|
||||
}
|
||||
|
||||
const candidate = pathValue ?? sqlitePathFromUrl(urlValue as string);
|
||||
return isAbsolute(candidate) ? candidate : resolve(input.projectDir ?? process.cwd(), candidate);
|
||||
}
|
||||
|
||||
export function createSqliteQueryExecutor(): KtxSqlQueryExecutorPort {
|
||||
return {
|
||||
async execute(input: KtxSqlQueryExecutionInput): Promise<KtxSqlQueryExecutionResult> {
|
||||
const sql = limitSqlForExecution(input.sql, input.maxRows);
|
||||
const dbPath = sqliteDatabasePathFromConnection(input);
|
||||
const db = new Database(dbPath, { readonly: true, fileMustExist: true });
|
||||
try {
|
||||
const statement = db.prepare(sql);
|
||||
const rows = statement.all() as unknown[];
|
||||
return {
|
||||
headers: statement.columns().map((column) => column.name),
|
||||
rows: normalizeQueryRows(rows),
|
||||
totalRows: rows.length,
|
||||
command: 'SELECT',
|
||||
rowCount: rows.length,
|
||||
};
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
34
packages/cli/src/context/core/config-reference.test.ts
Normal file
34
packages/cli/src/context/core/config-reference.test.ts
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { resolveKtxConfigReference, resolveKtxHomePath } from './config-reference.js';
|
||||
|
||||
describe('KTX config references', () => {
|
||||
it('resolves env references without returning empty values', () => {
|
||||
expect(resolveKtxConfigReference('env:AI_GATEWAY_API_KEY', { AI_GATEWAY_API_KEY: ' gateway-key ' })).toBe(
|
||||
'gateway-key',
|
||||
);
|
||||
expect(resolveKtxConfigReference('env:AI_GATEWAY_API_KEY', { AI_GATEWAY_API_KEY: ' ' })).toBeUndefined();
|
||||
expect(resolveKtxConfigReference('env:AI_GATEWAY_API_KEY', {})).toBeUndefined();
|
||||
});
|
||||
|
||||
it('resolves file references and trims file content', async () => {
|
||||
const dir = join(tmpdir(), `ktx-config-reference-${process.pid}`);
|
||||
await mkdir(dir, { recursive: true });
|
||||
const keyPath = join(dir, 'gateway-key.txt');
|
||||
await writeFile(keyPath, 'file-gateway-key\n', 'utf8');
|
||||
|
||||
expect(resolveKtxConfigReference(`file:${keyPath}`, {})).toBe('file-gateway-key');
|
||||
});
|
||||
|
||||
it('returns literal values unchanged after trimming blank-only values', () => {
|
||||
expect(resolveKtxConfigReference('provider/model', {})).toBe('provider/model');
|
||||
expect(resolveKtxConfigReference(' ', {})).toBeUndefined();
|
||||
expect(resolveKtxConfigReference(undefined, {})).toBeUndefined();
|
||||
});
|
||||
|
||||
it('resolves home-prefixed paths', () => {
|
||||
expect(resolveKtxHomePath('~/ktx/key.txt')).toContain('/ktx/key.txt');
|
||||
});
|
||||
});
|
||||
37
packages/cli/src/context/core/config-reference.ts
Normal file
37
packages/cli/src/context/core/config-reference.ts
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
|
||||
/** @internal */
|
||||
export function resolveKtxHomePath(path: string): string {
|
||||
if (path === '~') {
|
||||
return homedir();
|
||||
}
|
||||
|
||||
if (path.startsWith('~/')) {
|
||||
return resolve(homedir(), path.slice(2));
|
||||
}
|
||||
|
||||
return resolve(path);
|
||||
}
|
||||
|
||||
export function resolveKtxConfigReference(value: string | undefined, env: NodeJS.ProcessEnv): string | undefined {
|
||||
if (!value) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (value.startsWith('env:')) {
|
||||
const envName = value.slice('env:'.length).trim();
|
||||
const envValue = env[envName];
|
||||
return envValue && envValue.trim().length > 0 ? envValue.trim() : undefined;
|
||||
}
|
||||
|
||||
if (value.startsWith('file:')) {
|
||||
const filePath = resolveKtxHomePath(value.slice('file:'.length).trim());
|
||||
const fileValue = readFileSync(filePath, 'utf8').trim();
|
||||
return fileValue.length > 0 ? fileValue : undefined;
|
||||
}
|
||||
|
||||
const trimmed = value.trim();
|
||||
return trimmed.length > 0 ? trimmed : undefined;
|
||||
}
|
||||
42
packages/cli/src/context/core/config.ts
Normal file
42
packages/cli/src/context/core/config.ts
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
interface KtxStorageConfig {
|
||||
configDir?: string;
|
||||
homeDir?: string;
|
||||
worktreesDir?: string;
|
||||
}
|
||||
|
||||
interface KtxGitConfig {
|
||||
userName: string;
|
||||
userEmail: string;
|
||||
bootstrapMessage?: string;
|
||||
bootstrapAuthor?: string;
|
||||
bootstrapAuthorEmail?: string;
|
||||
}
|
||||
|
||||
export interface KtxCoreConfig {
|
||||
storage: KtxStorageConfig;
|
||||
git: KtxGitConfig;
|
||||
}
|
||||
|
||||
export interface KtxLogger {
|
||||
debug(message: string): void;
|
||||
log(message: string): void;
|
||||
warn(message: string): void;
|
||||
error(message: string, error?: unknown): void;
|
||||
}
|
||||
|
||||
export const noopLogger: KtxLogger = {
|
||||
debug: () => undefined,
|
||||
log: () => undefined,
|
||||
warn: () => undefined,
|
||||
error: () => undefined,
|
||||
};
|
||||
|
||||
export function resolveConfigDir(config: KtxCoreConfig): string {
|
||||
const homeDir = config.storage.homeDir ?? '/tmp';
|
||||
return config.storage.configDir ?? `${homeDir}/ktx/config`;
|
||||
}
|
||||
|
||||
export function resolveWorktreesDir(config: KtxCoreConfig): string {
|
||||
const homeDir = config.storage.homeDir ?? '/tmp';
|
||||
return config.storage.worktreesDir ?? `${homeDir}/.worktrees`;
|
||||
}
|
||||
5
packages/cli/src/context/core/embedding.ts
Normal file
5
packages/cli/src/context/core/embedding.ts
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
export interface KtxEmbeddingPort {
|
||||
maxBatchSize: number;
|
||||
computeEmbedding(text: string): Promise<number[]>;
|
||||
computeEmbeddingsBulk(texts: string[]): Promise<number[][]>;
|
||||
}
|
||||
43
packages/cli/src/context/core/file-store.ts
Normal file
43
packages/cli/src/context/core/file-store.ts
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
export interface KtxFileWriteResult {
|
||||
commitHash?: string | null;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxFileReadResult {
|
||||
content: string;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxFileListResult {
|
||||
files: string[];
|
||||
}
|
||||
|
||||
export interface KtxFileHistoryEntry {
|
||||
sha?: string;
|
||||
message?: string;
|
||||
author?: string;
|
||||
date?: string | Date;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxFileStorePort<TSelf = unknown> {
|
||||
writeFile(
|
||||
path: string,
|
||||
content: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
commitMessage: string,
|
||||
options?: { skipLock?: boolean },
|
||||
): Promise<KtxFileWriteResult>;
|
||||
readFile(path: string): Promise<KtxFileReadResult>;
|
||||
deleteFile(
|
||||
path: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
commitMessage: string,
|
||||
options?: { skipLock?: boolean },
|
||||
): Promise<KtxFileWriteResult | null>;
|
||||
listFiles(path: string, recursive?: boolean): Promise<KtxFileListResult>;
|
||||
getFileHistory(path: string): Promise<KtxFileHistoryEntry[] | unknown>;
|
||||
forWorktree(workdir: string): TSelf;
|
||||
}
|
||||
29
packages/cli/src/context/core/git-env.ts
Normal file
29
packages/cli/src/context/core/git-env.ts
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
import { simpleGit, type SimpleGit } from 'simple-git';
|
||||
|
||||
const GIT_HOOK_ENV_KEYS = [
|
||||
'GIT_ALTERNATE_OBJECT_DIRECTORIES',
|
||||
'GIT_DIR',
|
||||
'GIT_INDEX_FILE',
|
||||
'GIT_OBJECT_DIRECTORY',
|
||||
'GIT_PREFIX',
|
||||
'GIT_QUARANTINE_PATH',
|
||||
'GIT_WORK_TREE',
|
||||
'GIT_EDITOR',
|
||||
'GIT_EXEC_PATH',
|
||||
'GIT_PAGER',
|
||||
'PAGER',
|
||||
'VISUAL',
|
||||
'EDITOR',
|
||||
] as const;
|
||||
|
||||
function sanitizedGitEnv(env: NodeJS.ProcessEnv = process.env): NodeJS.ProcessEnv {
|
||||
const sanitized = { ...env };
|
||||
for (const key of GIT_HOOK_ENV_KEYS) {
|
||||
delete sanitized[key];
|
||||
}
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
export function createSimpleGit(baseDir: string): SimpleGit {
|
||||
return simpleGit({ baseDir, unsafe: { allowUnsafeAskPass: true } }).env(sanitizedGitEnv());
|
||||
}
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import type { SimpleGit } from 'simple-git';
|
||||
import type { KtxCoreConfig } from './config.js';
|
||||
import { createSimpleGit } from './git-env.js';
|
||||
import { GitService } from './git.service.js';
|
||||
|
||||
describe('GitService.assertWorktreeClean', () => {
|
||||
let workdir: string;
|
||||
let git: SimpleGit;
|
||||
let gitService: GitService;
|
||||
|
||||
beforeEach(async () => {
|
||||
workdir = await mkdtemp(join(tmpdir(), 'gitsvc-clean-'));
|
||||
git = createSimpleGit(workdir);
|
||||
await git.init();
|
||||
await git.addConfig('user.email', 't@test');
|
||||
await git.addConfig('user.name', 'Test');
|
||||
await writeFile(join(workdir, 'init'), 'init');
|
||||
await git.add('.');
|
||||
await git.commit('init');
|
||||
const coreConfig: KtxCoreConfig = {
|
||||
storage: { configDir: workdir, homeDir: workdir },
|
||||
git: { userName: 'Test', userEmail: 't@test' },
|
||||
};
|
||||
gitService = new GitService(coreConfig);
|
||||
(gitService as any).git = git;
|
||||
(gitService as any).configDir = workdir;
|
||||
});
|
||||
|
||||
afterEach(async () => rm(workdir, { recursive: true, force: true }));
|
||||
|
||||
it('does not throw on a clean worktree', async () => {
|
||||
await expect(gitService.assertWorktreeClean()).resolves.toBeUndefined();
|
||||
});
|
||||
|
||||
it('throws when MERGE_HEAD exists', async () => {
|
||||
await writeFile(join(workdir, '.git', 'MERGE_HEAD'), 'deadbeef\n');
|
||||
await expect(gitService.assertWorktreeClean()).rejects.toThrow(/MERGE_HEAD/);
|
||||
});
|
||||
|
||||
it('throws when CHERRY_PICK_HEAD exists', async () => {
|
||||
await writeFile(join(workdir, '.git', 'CHERRY_PICK_HEAD'), 'deadbeef\n');
|
||||
await expect(gitService.assertWorktreeClean()).rejects.toThrow(/CHERRY_PICK_HEAD/);
|
||||
});
|
||||
|
||||
it('throws when REVERT_HEAD exists', async () => {
|
||||
await writeFile(join(workdir, '.git', 'REVERT_HEAD'), 'deadbeef\n');
|
||||
await expect(gitService.assertWorktreeClean()).rejects.toThrow(/REVERT_HEAD/);
|
||||
});
|
||||
|
||||
it('throws when sequencer/todo exists (interrupted multi-commit revert/cherry-pick)', async () => {
|
||||
await mkdir(join(workdir, '.git', 'sequencer'), { recursive: true });
|
||||
await writeFile(join(workdir, '.git', 'sequencer', 'todo'), 'pick deadbeef foo\n');
|
||||
await expect(gitService.assertWorktreeClean()).rejects.toThrow(/sequencer/);
|
||||
});
|
||||
|
||||
it('throws when the index has unmerged paths', async () => {
|
||||
await git.checkoutLocalBranch('a');
|
||||
await writeFile(join(workdir, 'shared'), 'A version');
|
||||
await git.add('.');
|
||||
await git.commit('a');
|
||||
await git.checkout('master').catch(() => git.checkout('main'));
|
||||
await git.checkoutLocalBranch('b');
|
||||
await writeFile(join(workdir, 'shared'), 'B version');
|
||||
await git.add('.');
|
||||
await git.commit('b');
|
||||
|
||||
await git.raw(['merge', 'a']).catch(() => undefined);
|
||||
|
||||
await expect(gitService.assertWorktreeClean()).rejects.toThrow();
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { mkdir, mkdtemp, readdir, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import type { SimpleGit } from 'simple-git';
|
||||
import type { KtxCoreConfig } from './config.js';
|
||||
import { createSimpleGit } from './git-env.js';
|
||||
import { GitService } from './git.service.js';
|
||||
|
||||
describe('GitService.deleteDirectories', () => {
|
||||
let workdir: string;
|
||||
let git: SimpleGit;
|
||||
let gitService: GitService;
|
||||
|
||||
beforeEach(async () => {
|
||||
workdir = await mkdtemp(join(tmpdir(), 'gitsvc-dd-'));
|
||||
git = createSimpleGit(workdir);
|
||||
await git.init();
|
||||
await git.addConfig('user.email', 't@test');
|
||||
await git.addConfig('user.name', 'Test');
|
||||
await writeFile(join(workdir, 'keep'), 'k');
|
||||
await git.add('.');
|
||||
await git.commit('init');
|
||||
|
||||
const coreConfig: KtxCoreConfig = {
|
||||
storage: { configDir: workdir, homeDir: workdir },
|
||||
git: { userName: 'Test', userEmail: 't@test' },
|
||||
};
|
||||
gitService = new GitService(coreConfig);
|
||||
(gitService as any).git = git;
|
||||
(gitService as any).configDir = workdir;
|
||||
});
|
||||
|
||||
afterEach(async () => rm(workdir, { recursive: true, force: true }));
|
||||
|
||||
it('removes multiple directories in a single commit', async () => {
|
||||
for (const name of ['a', 'b', 'c']) {
|
||||
await mkdir(join(workdir, name), { recursive: true });
|
||||
await writeFile(join(workdir, name, 'f.txt'), name);
|
||||
}
|
||||
await git.add('.');
|
||||
await git.commit('seed 3 dirs');
|
||||
const beforeCommits = (await git.log()).total;
|
||||
|
||||
const result = await gitService.deleteDirectories(['a', 'b'], 'gc: drop a+b', 'System User', 'system@example.com');
|
||||
expect(result.commitHash).toBeTruthy();
|
||||
|
||||
const entries = await readdir(workdir);
|
||||
expect(entries).not.toContain('a');
|
||||
expect(entries).not.toContain('b');
|
||||
expect(entries).toContain('c');
|
||||
|
||||
const afterCommits = (await git.log()).total;
|
||||
expect(afterCommits).toBe(beforeCommits + 1);
|
||||
});
|
||||
|
||||
it('no-ops and returns a null hash when the input list is empty', async () => {
|
||||
const result = await gitService.deleteDirectories([], 'empty', 'X', 'x@example.com');
|
||||
expect(result.commitHash).toBe('');
|
||||
expect(result.created).toBe(false);
|
||||
});
|
||||
|
||||
it('ignores paths that have already been deleted — commits only the remaining ones', async () => {
|
||||
await mkdir(join(workdir, 'stale'), { recursive: true });
|
||||
await writeFile(join(workdir, 'stale', 'x'), 'x');
|
||||
await git.add('.');
|
||||
await git.commit('seed stale');
|
||||
const result = await gitService.deleteDirectories(
|
||||
['stale', 'missing'],
|
||||
'gc: drop stale + missing',
|
||||
'System User',
|
||||
'system@example.com',
|
||||
);
|
||||
expect(result.commitHash).toBeTruthy();
|
||||
const entries = await readdir(workdir);
|
||||
expect(entries).not.toContain('stale');
|
||||
});
|
||||
});
|
||||
45
packages/cli/src/context/core/git.service.patch.test.ts
Normal file
45
packages/cli/src/context/core/git.service.patch.test.ts
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { GitService } from './git.service.js';
|
||||
|
||||
async function makeGit() {
|
||||
const homeDir = await mkdtemp(join(tmpdir(), 'ktx-git-patch-'));
|
||||
const configDir = join(homeDir, 'config');
|
||||
const git = new GitService({
|
||||
storage: { configDir, homeDir },
|
||||
git: {
|
||||
userName: 'System User',
|
||||
userEmail: 'system@example.com',
|
||||
bootstrapMessage: 'init',
|
||||
bootstrapAuthor: 'system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
});
|
||||
await git.onModuleInit();
|
||||
return { homeDir, configDir, git };
|
||||
}
|
||||
|
||||
describe('GitService patch helpers', () => {
|
||||
it('collects binary-safe no-rename patches and applies them with --3way --index', async () => {
|
||||
const { homeDir, configDir, git } = await makeGit();
|
||||
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
|
||||
await writeFile(join(configDir, 'wiki/global/page.md'), 'old\n');
|
||||
await git.commitFiles(['wiki/global/page.md'], 'add page', 'System User', 'system@example.com');
|
||||
const base = await git.revParseHead();
|
||||
|
||||
await writeFile(join(configDir, 'wiki/global/page.md'), 'new\n');
|
||||
await git.commitFiles(['wiki/global/page.md'], 'edit page', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'proposal.patch');
|
||||
await git.writeBinaryNoRenamePatch(base, 'HEAD', patchPath);
|
||||
|
||||
const targetDir = join(homeDir, 'target');
|
||||
await git.addWorktree(targetDir, 'target', base);
|
||||
const targetGit = git.forWorktree(targetDir);
|
||||
await targetGit.applyPatchFile3WayIndex(patchPath);
|
||||
await targetGit.commitStaged('apply proposal', 'System User', 'system@example.com');
|
||||
|
||||
await expect(readFile(join(targetDir, 'wiki/global/page.md'), 'utf-8')).resolves.toBe('new\n');
|
||||
});
|
||||
});
|
||||
56
packages/cli/src/context/core/git.service.reset-hard.test.ts
Normal file
56
packages/cli/src/context/core/git.service.reset-hard.test.ts
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import type { SimpleGit } from 'simple-git';
|
||||
import type { KtxCoreConfig } from './config.js';
|
||||
import { createSimpleGit } from './git-env.js';
|
||||
import { GitService } from './git.service.js';
|
||||
|
||||
describe('GitService.resetHardTo', () => {
|
||||
let workdir: string;
|
||||
let git: SimpleGit;
|
||||
let gitService: GitService;
|
||||
|
||||
beforeEach(async () => {
|
||||
workdir = await mkdtemp(join(tmpdir(), 'gitsvc-reset-'));
|
||||
git = createSimpleGit(workdir);
|
||||
await git.init();
|
||||
await git.addConfig('user.email', 't@test');
|
||||
await git.addConfig('user.name', 'Test');
|
||||
await writeFile(join(workdir, 'init'), 'init');
|
||||
await git.add('.');
|
||||
await git.commit('init');
|
||||
const coreConfig: KtxCoreConfig = {
|
||||
storage: { configDir: workdir, homeDir: workdir },
|
||||
git: { userName: 'Test', userEmail: 't@test' },
|
||||
};
|
||||
gitService = new GitService(coreConfig);
|
||||
(gitService as any).git = git;
|
||||
(gitService as any).configDir = workdir;
|
||||
});
|
||||
|
||||
afterEach(async () => rm(workdir, { recursive: true, force: true }));
|
||||
|
||||
it('rewinds HEAD to the target SHA, removing later commits and their files', async () => {
|
||||
const baseSha = (await git.revparse(['HEAD'])).trim();
|
||||
await writeFile(join(workdir, 'a'), 'a1');
|
||||
await git.add('.');
|
||||
await git.commit('a');
|
||||
await writeFile(join(workdir, 'b'), 'b1');
|
||||
await git.add('.');
|
||||
await git.commit('b');
|
||||
|
||||
await gitService.resetHardTo(baseSha);
|
||||
|
||||
expect((await git.revparse(['HEAD'])).trim()).toBe(baseSha);
|
||||
expect(await readFile(join(workdir, 'a'), 'utf-8').catch(() => null)).toBeNull();
|
||||
expect(await readFile(join(workdir, 'b'), 'utf-8').catch(() => null)).toBeNull();
|
||||
});
|
||||
|
||||
it('is a no-op when target SHA equals current HEAD', async () => {
|
||||
const sha = (await git.revparse(['HEAD'])).trim();
|
||||
await gitService.resetHardTo(sha);
|
||||
expect((await git.revparse(['HEAD'])).trim()).toBe(sha);
|
||||
});
|
||||
});
|
||||
450
packages/cli/src/context/core/git.service.test.ts
Normal file
450
packages/cli/src/context/core/git.service.test.ts
Normal file
|
|
@ -0,0 +1,450 @@
|
|||
import { mkdtemp, readFile, realpath, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import type { KtxCoreConfig } from './config.js';
|
||||
import { GitService } from './git.service.js';
|
||||
|
||||
// These tests drive a real git repo inside a temp directory — simple-git shells out to the
|
||||
// system `git` binary. They are fast enough to run as unit tests and catch real issues that
|
||||
// would be invisible with mocked git.
|
||||
describe('GitService', () => {
|
||||
let service: GitService;
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'git-service-spec-'));
|
||||
|
||||
const coreConfig: KtxCoreConfig = {
|
||||
storage: { configDir: tempDir, homeDir: tempDir },
|
||||
git: {
|
||||
userName: 'Test User',
|
||||
userEmail: 'test@example.com',
|
||||
bootstrapMessage: 'Initialize test config repo',
|
||||
bootstrapAuthor: 'test-system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
};
|
||||
|
||||
service = new GitService(coreConfig);
|
||||
await service.onModuleInit();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
const writeAndCommit = async (filePath: string, content: string, message = 'msg') => {
|
||||
await writeFile(join(tempDir, filePath), content, 'utf-8');
|
||||
return service.commitFile(filePath, message, 'Test', 'test@example.com');
|
||||
};
|
||||
|
||||
describe('cold-start bootstrap commit', () => {
|
||||
it('writes an empty commit on init so HEAD always resolves', async () => {
|
||||
// beforeEach already ran onModuleInit() against an empty temp dir.
|
||||
const head = await service.revParseHead();
|
||||
expect(head).toMatch(/^[0-9a-f]{40}$/);
|
||||
});
|
||||
|
||||
it('does not double-commit when re-initialized', async () => {
|
||||
const before = await service.revParseHead();
|
||||
await service.onModuleInit();
|
||||
const after = await service.revParseHead();
|
||||
expect(after).toBe(before);
|
||||
});
|
||||
|
||||
it('keeps git auto-maintenance attached for deterministic cleanup', async () => {
|
||||
const config = await readFile(join(tempDir, '.git', 'config'), 'utf-8');
|
||||
|
||||
expect(config).toMatch(/\[gc]\n\s+autoDetach = false/);
|
||||
expect(config).toMatch(/\[maintenance]\n\s+autoDetach = false/);
|
||||
});
|
||||
|
||||
it('initializes when release automation sets GIT_ASKPASS', async () => {
|
||||
const releaseEnvDir = await mkdtemp(join(tmpdir(), 'git-service-release-env-'));
|
||||
const previousAskPass = process.env.GIT_ASKPASS;
|
||||
process.env.GIT_ASKPASS = 'echo';
|
||||
|
||||
try {
|
||||
const releaseEnvService = new GitService({
|
||||
storage: { configDir: releaseEnvDir, homeDir: releaseEnvDir },
|
||||
git: {
|
||||
userName: 'Test User',
|
||||
userEmail: 'test@example.com',
|
||||
bootstrapMessage: 'Initialize test config repo',
|
||||
bootstrapAuthor: 'test-system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
});
|
||||
|
||||
await expect(releaseEnvService.onModuleInit()).resolves.toBeUndefined();
|
||||
} finally {
|
||||
if (previousAskPass === undefined) {
|
||||
delete process.env.GIT_ASKPASS;
|
||||
} else {
|
||||
process.env.GIT_ASKPASS = previousAskPass;
|
||||
}
|
||||
await rm(releaseEnvDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('commitFile `created` flag', () => {
|
||||
it('is true for a real commit', async () => {
|
||||
const info = await writeAndCommit('a.md', '# Hello');
|
||||
expect(info.created).toBe(true);
|
||||
});
|
||||
|
||||
it('is false on a no-op write (content unchanged)', async () => {
|
||||
await writeAndCommit('a.md', '# Hello');
|
||||
const second = await writeAndCommit('a.md', '# Hello', 'unused');
|
||||
expect(second.created).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('addNote / getNote', () => {
|
||||
it('attaches a note and reads it back', async () => {
|
||||
const info = await writeAndCommit('a.md', '# Hello');
|
||||
await service.addNote(info.commitHash, 'Rich message from LLM');
|
||||
expect(await service.getNote(info.commitHash)).toBe('Rich message from LLM');
|
||||
});
|
||||
|
||||
it('returns undefined when no note exists', async () => {
|
||||
const info = await writeAndCommit('a.md', '# Hello');
|
||||
expect(await service.getNote(info.commitHash)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('overwrites an existing note (idempotent retries)', async () => {
|
||||
const info = await writeAndCommit('a.md', '# Hello');
|
||||
await service.addNote(info.commitHash, 'First');
|
||||
await service.addNote(info.commitHash, 'Second');
|
||||
expect(await service.getNote(info.commitHash)).toBe('Second');
|
||||
});
|
||||
|
||||
it('skips empty/whitespace messages silently', async () => {
|
||||
const info = await writeAndCommit('a.md', '# Hello');
|
||||
await service.addNote(info.commitHash, ' ');
|
||||
expect(await service.getNote(info.commitHash)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('getFileHistory', () => {
|
||||
it('surfaces enhancedMessage when a note is present', async () => {
|
||||
const info = await writeAndCommit('a.md', '# Hello');
|
||||
await service.addNote(info.commitHash, 'Note body');
|
||||
|
||||
const history = await service.getFileHistory('a.md');
|
||||
expect(history[0]?.enhancedMessage).toBe('Note body');
|
||||
});
|
||||
|
||||
it('leaves enhancedMessage undefined when no note is attached', async () => {
|
||||
await writeAndCommit('a.md', '# Hello');
|
||||
const history = await service.getFileHistory('a.md');
|
||||
expect(history[0]?.enhancedMessage).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('getCommitDiff', () => {
|
||||
it('returns the patch scoped to the requested path', async () => {
|
||||
const info = await writeAndCommit('a.md', '# Hello');
|
||||
const diff = await service.getCommitDiff(info.commitHash, 'a.md');
|
||||
expect(diff).toContain('diff --git');
|
||||
expect(diff).toContain('Hello');
|
||||
});
|
||||
|
||||
it('handles the repository initial commit without throwing', async () => {
|
||||
const info = await writeAndCommit('first.md', 'first');
|
||||
await expect(service.getCommitDiff(info.commitHash, 'first.md')).resolves.toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('squashTo', () => {
|
||||
const writeAsSystem = async (filePath: string, content: string, message = 'msg') => {
|
||||
await writeFile(join(tempDir, filePath), content, 'utf-8');
|
||||
return service.commitFile(filePath, message, 'System User', 'system@example.com');
|
||||
};
|
||||
|
||||
it('collapses 3 commits after preHead into a single commit', async () => {
|
||||
const pre = await writeAsSystem('a.md', 'v1');
|
||||
const preHead = pre.commitHash;
|
||||
|
||||
await writeAsSystem('b.md', 'b', 'add b');
|
||||
await writeAsSystem('c.md', 'c', 'add c');
|
||||
await writeAsSystem('a.md', 'v2', 'update a');
|
||||
|
||||
const result = await service.squashTo(preHead, {
|
||||
message: 'Ingest: bundle 3 writes',
|
||||
author: 'System User',
|
||||
authorEmail: 'system@example.com',
|
||||
});
|
||||
|
||||
expect(result.squashed).toBe(true);
|
||||
expect(result.squashedCount).toBe(3);
|
||||
expect(result.commitHash).toBeTruthy();
|
||||
expect(result.commitHash).not.toBe(preHead);
|
||||
const commitHash = result.commitHash;
|
||||
if (!commitHash) {
|
||||
throw new Error('Expected squash commit hash');
|
||||
}
|
||||
|
||||
// The squashed commit should preserve the final tree state.
|
||||
const fileAtSquash = await service.getFileAtCommit('a.md', commitHash);
|
||||
expect(fileAtSquash).toBe('v2');
|
||||
const bAtSquash = await service.getFileAtCommit('b.md', commitHash);
|
||||
expect(bAtSquash).toBe('b');
|
||||
});
|
||||
|
||||
it('is a no-op when preHead equals HEAD', async () => {
|
||||
const pre = await writeAsSystem('a.md', 'v1');
|
||||
|
||||
const result = await service.squashTo(pre.commitHash, {
|
||||
message: 'nothing to squash',
|
||||
author: 'System User',
|
||||
authorEmail: 'system@example.com',
|
||||
});
|
||||
|
||||
expect(result.squashed).toBe(false);
|
||||
expect(result.commitHash).toBe(pre.commitHash);
|
||||
});
|
||||
|
||||
it('skips squash when a foreign-author commit sits between preHead and HEAD', async () => {
|
||||
const pre = await writeAsSystem('a.md', 'v1');
|
||||
const preHead = pre.commitHash;
|
||||
|
||||
await writeAsSystem('b.md', 'from us', 'ours');
|
||||
// Foreign commit
|
||||
await writeAndCommit('c.md', 'from someone else', 'foreign');
|
||||
await writeAsSystem('d.md', 'ours again', 'ours 2');
|
||||
|
||||
const result = await service.squashTo(preHead, {
|
||||
message: 'should be skipped',
|
||||
author: 'System User',
|
||||
authorEmail: 'system@example.com',
|
||||
});
|
||||
|
||||
expect(result.squashed).toBe(false);
|
||||
expect(result.reason).toContain('foreign');
|
||||
expect(result.squashedCount).toBe(3);
|
||||
});
|
||||
|
||||
it('returns cleanly when preHead is empty (no starting commit)', async () => {
|
||||
const result = await service.squashTo('', {
|
||||
message: 'would have squashed',
|
||||
author: 'System User',
|
||||
authorEmail: 'system@example.com',
|
||||
});
|
||||
|
||||
expect(result.squashed).toBe(false);
|
||||
expect(result.commitHash).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('worktree lifecycle', () => {
|
||||
// macOS canonicalizes tmp paths (/var/folders → /private/var/folders) when git
|
||||
// returns them from `worktree list`. Resolve through realpath() before comparing.
|
||||
const canonicalSiblingPath = async (suffix: string): Promise<string> => {
|
||||
const parent = await realpath(join(tempDir, '..'));
|
||||
return join(parent, `wt-${Date.now()}-${suffix}`);
|
||||
};
|
||||
|
||||
it('addWorktree creates a branch + directory at the given startSha', async () => {
|
||||
const { commitHash } = await writeAndCommit('seed.md', 'seed');
|
||||
const wtDir = await canonicalSiblingPath('add');
|
||||
await service.addWorktree(wtDir, 'session/alpha', commitHash);
|
||||
const list = await service.listWorktrees();
|
||||
expect(list.find((e) => e.path === wtDir && e.branch === 'refs/heads/session/alpha')).toBeTruthy();
|
||||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
|
||||
it('removeWorktree detaches the worktree entry', async () => {
|
||||
const { commitHash } = await writeAndCommit('seed.md', 'seed');
|
||||
const wtDir = await canonicalSiblingPath('rm');
|
||||
await service.addWorktree(wtDir, 'session/beta', commitHash);
|
||||
await service.removeWorktree(wtDir);
|
||||
const list = await service.listWorktrees();
|
||||
expect(list.find((e) => e.path === wtDir)).toBeFalsy();
|
||||
});
|
||||
|
||||
it('deleteBranch removes a branch ref', async () => {
|
||||
const { commitHash } = await writeAndCommit('seed.md', 'seed');
|
||||
const wtDir = await canonicalSiblingPath('br');
|
||||
await service.addWorktree(wtDir, 'session/gamma', commitHash);
|
||||
await service.removeWorktree(wtDir);
|
||||
await service.deleteBranch('session/gamma', true);
|
||||
const branches = await (service as unknown as { git: import('simple-git').SimpleGit }).git.branchLocal();
|
||||
expect(branches.all).not.toContain('session/gamma');
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
});
|
||||
|
||||
describe('forWorktree', () => {
|
||||
it('returns a GitService whose operations run inside the given worktree', async () => {
|
||||
const { commitHash } = await writeAndCommit('seed.md', 'seed');
|
||||
const parent = await realpath(join(tempDir, '..'));
|
||||
const wtDir = join(parent, `wt-${Date.now()}-fw`);
|
||||
await service.addWorktree(wtDir, 'session/delta', commitHash);
|
||||
|
||||
const scoped = service.forWorktree(wtDir);
|
||||
expect(await scoped.revParseHead()).toBe(commitHash);
|
||||
|
||||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
|
||||
it('serializes concurrent commits from scoped services targeting the same worktree', async () => {
|
||||
const { commitHash } = await writeAndCommit('seed.md', 'seed');
|
||||
const parent = await realpath(join(tempDir, '..'));
|
||||
const wtDir = join(parent, `wt-${Date.now()}-fw-concurrent`);
|
||||
await service.addWorktree(wtDir, 'session/concurrent', commitHash);
|
||||
|
||||
const first = service.forWorktree(wtDir);
|
||||
const second = service.forWorktree(wtDir);
|
||||
await writeFile(join(wtDir, 'a.md'), 'a\n', 'utf-8');
|
||||
await writeFile(join(wtDir, 'b.md'), 'b\n', 'utf-8');
|
||||
|
||||
const [a, b] = await Promise.all([
|
||||
first.commitFile('a.md', 'add a', 'System User', 'system@example.com'),
|
||||
second.commitFile('b.md', 'add b', 'System User', 'system@example.com'),
|
||||
]);
|
||||
|
||||
expect(a.commitHash).toMatch(/^[0-9a-f]{40}$/);
|
||||
expect(b.commitHash).toMatch(/^[0-9a-f]{40}$/);
|
||||
await expect(first.getFileAtCommit('a.md', a.commitHash)).resolves.toBe('a\n');
|
||||
await expect(second.getFileAtCommit('b.md', b.commitHash)).resolves.toBe('b\n');
|
||||
|
||||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
});
|
||||
|
||||
describe('squashMergeIntoMain', () => {
|
||||
it('merges a session branch as one commit on main, returning the new SHA + touched paths', async () => {
|
||||
const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed');
|
||||
const parent = await realpath(join(tempDir, '..'));
|
||||
const wtDir = join(parent, `wt-${Date.now()}-sm`);
|
||||
await service.addWorktree(wtDir, 'session/happy', baseSha);
|
||||
|
||||
const scoped = service.forWorktree(wtDir);
|
||||
await writeFile(join(wtDir, 'a.yaml'), 'one: 1\n', 'utf-8');
|
||||
await scoped.commitFile('a.yaml', 'wip a', 'System User', 'system@example.com');
|
||||
await writeFile(join(wtDir, 'b.yaml'), 'two: 2\n', 'utf-8');
|
||||
await scoped.commitFile('b.yaml', 'wip b', 'System User', 'system@example.com');
|
||||
|
||||
const result = await service.squashMergeIntoMain(
|
||||
'session/happy',
|
||||
'System User',
|
||||
'system@example.com',
|
||||
'Memory capture: 2 files [chat=abcd1234]',
|
||||
);
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
if (!result.ok) {
|
||||
throw new Error('unreachable');
|
||||
}
|
||||
expect(result.squashSha).toMatch(/^[0-9a-f]{40}$/);
|
||||
expect(result.touchedPaths.sort()).toEqual(['a.yaml', 'b.yaml']);
|
||||
|
||||
const mainHead = await service.revParseHead();
|
||||
expect(mainHead).toBe(result.squashSha);
|
||||
expect(mainHead).not.toBe(baseSha);
|
||||
|
||||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
|
||||
it('returns ok with empty touchedPaths when the session branch has no diff vs main', async () => {
|
||||
const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed');
|
||||
const parent = await realpath(join(tempDir, '..'));
|
||||
const wtDir = join(parent, `wt-${Date.now()}-sm-empty`);
|
||||
await service.addWorktree(wtDir, 'session/empty', baseSha);
|
||||
|
||||
const result = await service.squashMergeIntoMain(
|
||||
'session/empty',
|
||||
'System User',
|
||||
'system@example.com',
|
||||
'should be a no-op',
|
||||
);
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
if (!result.ok) {
|
||||
throw new Error('unreachable');
|
||||
}
|
||||
expect(result.touchedPaths).toEqual([]);
|
||||
expect(result.squashSha).toBe(baseSha);
|
||||
|
||||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
|
||||
it('returns conflict=true and leaves main clean when session+main touched same file differently', async () => {
|
||||
await writeAndCommit('shared.yaml', 'base\n');
|
||||
const base = await service.revParseHead();
|
||||
if (!base) {
|
||||
throw new Error('no base head');
|
||||
}
|
||||
|
||||
const parent = await realpath(join(tempDir, '..'));
|
||||
const wtDir = join(parent, `wt-${Date.now()}-conf`);
|
||||
await service.addWorktree(wtDir, 'session/conf', base);
|
||||
const scoped = service.forWorktree(wtDir);
|
||||
await writeFile(join(wtDir, 'shared.yaml'), 'session-edit\n', 'utf-8');
|
||||
await scoped.commitFile('shared.yaml', 'session edit', 'System User', 'system@example.com');
|
||||
|
||||
// Main edits the same file a different way, after the session branched.
|
||||
await writeAndCommit('shared.yaml', 'main-edit\n');
|
||||
|
||||
const result = await service.squashMergeIntoMain(
|
||||
'session/conf',
|
||||
'System User',
|
||||
'system@example.com',
|
||||
'Memory capture: 1 file [chat=dead1234]',
|
||||
);
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
if (result.ok) {
|
||||
throw new Error('unreachable');
|
||||
}
|
||||
expect(result.conflict).toBe(true);
|
||||
expect(result.conflictPaths).toContain('shared.yaml');
|
||||
|
||||
const status = await (service as unknown as { git: import('simple-git').SimpleGit }).git.status();
|
||||
expect(status.isClean()).toBe(true);
|
||||
|
||||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
|
||||
it('reports untracked files that would be overwritten by the squash merge', async () => {
|
||||
const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed');
|
||||
const parent = await realpath(join(tempDir, '..'));
|
||||
const wtDir = join(parent, `wt-${Date.now()}-untracked`);
|
||||
await service.addWorktree(wtDir, 'session/untracked', baseSha);
|
||||
|
||||
const scoped = service.forWorktree(wtDir);
|
||||
await writeFile(join(wtDir, 'knowledge.md'), 'session version\n', 'utf-8');
|
||||
await scoped.commitFile('knowledge.md', 'session write', 'System User', 'system@example.com');
|
||||
await writeFile(join(tempDir, 'knowledge.md'), 'untracked local version\n', 'utf-8');
|
||||
|
||||
const result = await service.squashMergeIntoMain(
|
||||
'session/untracked',
|
||||
'System User',
|
||||
'system@example.com',
|
||||
'Memory capture: 1 file [chat=untracked]',
|
||||
);
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
if (result.ok) {
|
||||
throw new Error('unreachable');
|
||||
}
|
||||
expect(result.conflict).toBe(true);
|
||||
expect(result.conflictPaths).toEqual(['knowledge.md']);
|
||||
|
||||
const status = await (service as unknown as { git: import('simple-git').SimpleGit }).git.status();
|
||||
expect(status.not_added).toContain('knowledge.md');
|
||||
|
||||
await service.removeWorktree(wtDir).catch(() => undefined);
|
||||
await rm(wtDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
});
|
||||
});
|
||||
});
|
||||
1062
packages/cli/src/context/core/git.service.ts
Normal file
1062
packages/cli/src/context/core/git.service.ts
Normal file
File diff suppressed because it is too large
Load diff
48
packages/cli/src/context/core/redaction.ts
Normal file
48
packages/cli/src/context/core/redaction.ts
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
/** @internal */
|
||||
export const REDACTED_KTX_CREDENTIAL_VALUE = '<redacted>';
|
||||
|
||||
const SENSITIVE_FIELD_NAME = /(password|secret|token|api[_-]?key|private[_-]?key|passphrase|credential|authorization|url)/i;
|
||||
const URL_CREDENTIAL_PATTERN = /([a-z][a-z0-9+.-]*:\/\/[^:\s/@]+:)([^@\s/]+)(@)/gi;
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function isSensitiveField(key: string): boolean {
|
||||
return SENSITIVE_FIELD_NAME.test(key);
|
||||
}
|
||||
|
||||
export function redactKtxSensitiveValue(key: string, value: unknown): unknown {
|
||||
if (isSensitiveField(key)) {
|
||||
return REDACTED_KTX_CREDENTIAL_VALUE;
|
||||
}
|
||||
if (Array.isArray(value)) {
|
||||
return value.map((item) => redactKtxSensitiveValue(key, item));
|
||||
}
|
||||
if (isRecord(value)) {
|
||||
return redactKtxSensitiveMetadata(value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export function redactKtxSensitiveMetadata(metadata: Record<string, unknown>): Record<string, unknown> {
|
||||
const redacted: Record<string, unknown> = {};
|
||||
for (const [key, value] of Object.entries(metadata)) {
|
||||
if (Array.isArray(value)) {
|
||||
redacted[key] = value.map((item) =>
|
||||
isRecord(item) ? redactKtxSensitiveMetadata(item) : redactKtxSensitiveValue(key, item),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
if (isRecord(value)) {
|
||||
redacted[key] = redactKtxSensitiveValue(key, value);
|
||||
continue;
|
||||
}
|
||||
redacted[key] = redactKtxSensitiveValue(key, value);
|
||||
}
|
||||
return redacted;
|
||||
}
|
||||
|
||||
export function redactKtxSensitiveText(value: string): string {
|
||||
return value.replace(URL_CREDENTIAL_PATTERN, `$1${REDACTED_KTX_CREDENTIAL_VALUE}$3`);
|
||||
}
|
||||
124
packages/cli/src/context/core/session-worktree.service.test.ts
Normal file
124
packages/cli/src/context/core/session-worktree.service.test.ts
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
import { mkdtemp, realpath, rm, stat } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import type { KtxCoreConfig } from './config.js';
|
||||
import { GitService } from './git.service.js';
|
||||
import { SessionWorktreeService, type WorktreeConfigPort } from './session-worktree.service.js';
|
||||
|
||||
interface TestWorktreeConfig extends WorktreeConfigPort<TestWorktreeConfig> {
|
||||
workdir?: string;
|
||||
}
|
||||
|
||||
// SessionWorktreeService glues a real GitService to a scoped config adapter.
|
||||
describe('SessionWorktreeService', () => {
|
||||
let sessionService: SessionWorktreeService<TestWorktreeConfig>;
|
||||
let gitService: GitService;
|
||||
let homeDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
homeDir = await mkdtemp(join(tmpdir(), 'sws-spec-'));
|
||||
homeDir = await realpath(homeDir);
|
||||
|
||||
const coreConfig: KtxCoreConfig = {
|
||||
storage: { configDir: homeDir, homeDir },
|
||||
git: {
|
||||
userName: 'System User',
|
||||
userEmail: 'system@example.com',
|
||||
bootstrapMessage: 'Initialize test config repo',
|
||||
bootstrapAuthor: 'test-system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
};
|
||||
|
||||
gitService = new GitService(coreConfig);
|
||||
await gitService.onModuleInit();
|
||||
const configService: TestWorktreeConfig = {
|
||||
forWorktree: vi.fn(
|
||||
(workdir: string): TestWorktreeConfig => ({ workdir, forWorktree: configService.forWorktree }),
|
||||
),
|
||||
};
|
||||
sessionService = new SessionWorktreeService({
|
||||
coreConfig,
|
||||
gitService,
|
||||
configService,
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(homeDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
describe('create', () => {
|
||||
it('creates a worktree + branch and returns scoped services', async () => {
|
||||
const baseSha = await gitService.revParseHead();
|
||||
if (!baseSha) {
|
||||
throw new Error('no base sha');
|
||||
}
|
||||
|
||||
const session = await sessionService.create('chat-abc', baseSha);
|
||||
|
||||
expect(session.workdir).toBe(join(homeDir, '.worktrees', 'session-chat-abc'));
|
||||
expect(session.branch).toBe('session/chat-abc');
|
||||
expect(session.baseSha).toBe(baseSha);
|
||||
const stats = await stat(session.workdir);
|
||||
expect(stats.isDirectory()).toBe(true);
|
||||
|
||||
// Scoped git instance reports the worktree's HEAD (= baseSha at creation time).
|
||||
expect(await session.git.revParseHead()).toBe(baseSha);
|
||||
|
||||
const list = await gitService.listWorktrees();
|
||||
expect(list.find((e) => e.path === session.workdir)).toBeTruthy();
|
||||
});
|
||||
|
||||
it('appends a timestamp suffix when the primary dir already exists', async () => {
|
||||
const baseSha = await gitService.revParseHead();
|
||||
if (!baseSha) {
|
||||
throw new Error('no base sha');
|
||||
}
|
||||
|
||||
const first = await sessionService.create('chat-dup', baseSha);
|
||||
const second = await sessionService.create('chat-dup', baseSha);
|
||||
|
||||
expect(first.workdir).not.toBe(second.workdir);
|
||||
expect(second.branch).toMatch(/^session\/chat-dup-\d+$/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('cleanup', () => {
|
||||
it('success removes the worktree dir and deletes the branch', async () => {
|
||||
const baseSha = await gitService.revParseHead();
|
||||
if (!baseSha) {
|
||||
throw new Error('no base sha');
|
||||
}
|
||||
|
||||
const session = await sessionService.create('chat-cleanup-ok', baseSha);
|
||||
await sessionService.cleanup(session, 'success');
|
||||
|
||||
const list = await gitService.listWorktrees();
|
||||
expect(list.find((e) => e.path === session.workdir)).toBeFalsy();
|
||||
await expect(stat(session.workdir)).rejects.toThrow();
|
||||
});
|
||||
|
||||
it('conflict keeps the worktree and writes a sentinel file', async () => {
|
||||
const baseSha = await gitService.revParseHead();
|
||||
if (!baseSha) {
|
||||
throw new Error('no base sha');
|
||||
}
|
||||
|
||||
const session = await sessionService.create('chat-cleanup-conflict', baseSha);
|
||||
await sessionService.cleanup(session, 'conflict', { conflictPaths: ['shared.yaml'] });
|
||||
|
||||
// Dir still exists.
|
||||
await expect(stat(session.workdir)).resolves.toBeTruthy();
|
||||
|
||||
const { readFile } = await import('node:fs/promises');
|
||||
const raw = await readFile(join(session.workdir, '.ktx-outcome'), 'utf-8');
|
||||
const parsed = JSON.parse(raw);
|
||||
expect(parsed.outcome).toBe('conflict');
|
||||
expect(parsed.chatId).toBe('chat-cleanup-conflict');
|
||||
expect(parsed.conflictPaths).toEqual(['shared.yaml']);
|
||||
expect(typeof parsed.at).toBe('string');
|
||||
});
|
||||
});
|
||||
});
|
||||
113
packages/cli/src/context/core/session-worktree.service.ts
Normal file
113
packages/cli/src/context/core/session-worktree.service.ts
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
import { mkdir, stat, writeFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { noopLogger, resolveWorktreesDir, type KtxCoreConfig, type KtxLogger } from './config.js';
|
||||
import { GitService } from './git.service.js';
|
||||
|
||||
export type SessionOutcome = 'success' | 'empty' | 'conflict' | 'crash';
|
||||
|
||||
interface SentinelPayload {
|
||||
outcome: SessionOutcome;
|
||||
at: string;
|
||||
chatId: string;
|
||||
baseSha: string;
|
||||
conflictPaths?: string[];
|
||||
}
|
||||
|
||||
export interface WorktreeConfigPort<TConfig> {
|
||||
forWorktree(workdir: string): TConfig;
|
||||
}
|
||||
|
||||
export interface SessionWorktree<TConfig> {
|
||||
chatId: string;
|
||||
workdir: string;
|
||||
branch: string;
|
||||
baseSha: string;
|
||||
createdAt: Date;
|
||||
git: GitService;
|
||||
config: TConfig;
|
||||
}
|
||||
|
||||
export interface SessionWorktreeServiceDeps<TConfig extends WorktreeConfigPort<TConfig>> {
|
||||
coreConfig: KtxCoreConfig;
|
||||
gitService: GitService;
|
||||
configService: TConfig;
|
||||
logger?: KtxLogger;
|
||||
}
|
||||
|
||||
export class SessionWorktreeService<TConfig extends WorktreeConfigPort<TConfig> = WorktreeConfigPort<never>> {
|
||||
private readonly logger: KtxLogger;
|
||||
private readonly worktreesRoot: string;
|
||||
|
||||
constructor(private readonly deps: SessionWorktreeServiceDeps<TConfig>) {
|
||||
this.logger = deps.logger ?? noopLogger;
|
||||
this.worktreesRoot = resolveWorktreesDir(deps.coreConfig);
|
||||
}
|
||||
|
||||
async create(sessionKey: string, baseSha: string): Promise<SessionWorktree<TConfig>> {
|
||||
await mkdir(this.worktreesRoot, { recursive: true });
|
||||
|
||||
let dirName = `session-${sessionKey}`;
|
||||
let branch = `session/${sessionKey}`;
|
||||
let workdir = join(this.worktreesRoot, dirName);
|
||||
|
||||
try {
|
||||
await stat(workdir);
|
||||
const suffix = Date.now().toString();
|
||||
dirName = `session-${sessionKey}-${suffix}`;
|
||||
branch = `session/${sessionKey}-${suffix}`;
|
||||
workdir = join(this.worktreesRoot, dirName);
|
||||
this.logger.warn(`session worktree collision for key=${sessionKey}; using suffix ${suffix}`);
|
||||
} catch {
|
||||
// no collision: primary name is free
|
||||
}
|
||||
|
||||
await this.deps.gitService.addWorktree(workdir, branch, baseSha);
|
||||
|
||||
return {
|
||||
chatId: sessionKey,
|
||||
workdir,
|
||||
branch,
|
||||
baseSha,
|
||||
createdAt: new Date(),
|
||||
git: this.deps.gitService.forWorktree(workdir),
|
||||
config: this.deps.configService.forWorktree(workdir),
|
||||
};
|
||||
}
|
||||
|
||||
async cleanup(
|
||||
session: SessionWorktree<TConfig>,
|
||||
outcome: SessionOutcome,
|
||||
extra?: { conflictPaths?: string[] },
|
||||
): Promise<void> {
|
||||
if (outcome === 'success' || outcome === 'empty') {
|
||||
try {
|
||||
await this.deps.gitService.removeWorktree(session.workdir);
|
||||
await this.deps.gitService.deleteBranch(session.branch, true);
|
||||
} catch (error) {
|
||||
this.logger.warn(
|
||||
`cleanup(${outcome}) failed for ${session.chatId}: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const payload: SentinelPayload = {
|
||||
outcome,
|
||||
at: new Date().toISOString(),
|
||||
chatId: session.chatId,
|
||||
baseSha: session.baseSha,
|
||||
...(extra?.conflictPaths ? { conflictPaths: extra.conflictPaths } : {}),
|
||||
};
|
||||
try {
|
||||
await writeFile(join(session.workdir, '.ktx-outcome'), JSON.stringify(payload, null, 2), 'utf-8');
|
||||
} catch (error) {
|
||||
this.logger.warn(
|
||||
`cleanup(${outcome}) failed to write sentinel for ${session.chatId}: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
339
packages/cli/src/context/daemon/semantic-layer-compute.test.ts
Normal file
339
packages/cli/src/context/daemon/semantic-layer-compute.test.ts
Normal file
|
|
@ -0,0 +1,339 @@
|
|||
import { once } from 'node:events';
|
||||
import { createServer } from 'node:http';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createHttpSemanticLayerComputePort, createPythonSemanticLayerComputePort } from './semantic-layer-compute.js';
|
||||
|
||||
const source = {
|
||||
name: 'orders',
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [{ name: 'id', type: 'number' }],
|
||||
joins: [],
|
||||
measures: [{ name: 'order_count', expr: 'count(*)' }],
|
||||
};
|
||||
|
||||
const sourceGenerationInput = {
|
||||
tables: [
|
||||
{
|
||||
name: 'orders',
|
||||
db: 'public',
|
||||
comment: 'Orders table',
|
||||
columns: [
|
||||
{ name: 'id', type: 'integer', primaryKey: true, nullable: false, comment: 'Order ID' },
|
||||
{ name: 'customer_id', type: 'integer' },
|
||||
{ name: 'amount', type: 'decimal', comment: 'Order amount' },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'customers',
|
||||
db: 'public',
|
||||
columns: [
|
||||
{ name: 'id', type: 'integer', primaryKey: true },
|
||||
{ name: 'email', type: 'varchar' },
|
||||
],
|
||||
},
|
||||
],
|
||||
links: [
|
||||
{
|
||||
fromTable: 'orders',
|
||||
fromColumn: 'customer_id',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
relationshipType: 'MANY_TO_ONE',
|
||||
},
|
||||
],
|
||||
dialect: 'postgres',
|
||||
};
|
||||
|
||||
const sourceGenerationDaemonPayload = {
|
||||
tables: [
|
||||
{
|
||||
name: 'orders',
|
||||
db: 'public',
|
||||
comment: 'Orders table',
|
||||
columns: [
|
||||
{ name: 'id', type: 'integer', primary_key: true, nullable: false, comment: 'Order ID' },
|
||||
{ name: 'customer_id', type: 'integer' },
|
||||
{ name: 'amount', type: 'decimal', comment: 'Order amount' },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'customers',
|
||||
db: 'public',
|
||||
columns: [
|
||||
{ name: 'id', type: 'integer', primary_key: true },
|
||||
{ name: 'email', type: 'varchar' },
|
||||
],
|
||||
},
|
||||
],
|
||||
links: [
|
||||
{
|
||||
from_table: 'orders',
|
||||
from_column: 'customer_id',
|
||||
to_table: 'customers',
|
||||
to_column: 'id',
|
||||
relationship_type: 'MANY_TO_ONE',
|
||||
},
|
||||
],
|
||||
dialect: 'postgres',
|
||||
};
|
||||
|
||||
const sourceGenerationDaemonResponse = {
|
||||
source_count: 2,
|
||||
sources: [
|
||||
{
|
||||
name: 'orders',
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [{ name: 'id', type: 'number' }],
|
||||
joins: [
|
||||
{
|
||||
to: 'customers',
|
||||
on: 'customer_id = customers.id',
|
||||
relationship: 'many_to_one',
|
||||
},
|
||||
],
|
||||
measures: [{ name: 'record_count', expr: 'count(id)' }],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
describe('createPythonSemanticLayerComputePort', () => {
|
||||
it('calls the semantic-query stdio command', async () => {
|
||||
const runJson = vi.fn(async () => ({
|
||||
sql: 'select count(*) from public.orders',
|
||||
dialect: 'postgres',
|
||||
columns: [{ name: 'orders.order_count' }],
|
||||
plan: { sources_used: ['orders'] },
|
||||
}));
|
||||
const port = createPythonSemanticLayerComputePort({ runJson });
|
||||
|
||||
await expect(
|
||||
port.query({
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
query: { measures: ['orders.order_count'], dimensions: [] },
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
sql: 'select count(*) from public.orders',
|
||||
dialect: 'postgres',
|
||||
columns: [{ name: 'orders.order_count' }],
|
||||
plan: { sources_used: ['orders'] },
|
||||
});
|
||||
|
||||
expect(runJson).toHaveBeenCalledWith('semantic-query', {
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
query: { measures: ['orders.order_count'], dimensions: [] },
|
||||
});
|
||||
});
|
||||
|
||||
it('calls the semantic-validate stdio command', async () => {
|
||||
const runJson = vi.fn(async () => ({
|
||||
valid: true,
|
||||
errors: [],
|
||||
warnings: [],
|
||||
per_source_warnings: {},
|
||||
}));
|
||||
const port = createPythonSemanticLayerComputePort({ runJson });
|
||||
|
||||
await expect(
|
||||
port.validateSources({
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
recentlyTouched: ['orders'],
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
valid: true,
|
||||
errors: [],
|
||||
warnings: [],
|
||||
perSourceWarnings: {},
|
||||
});
|
||||
|
||||
expect(runJson).toHaveBeenCalledWith('semantic-validate', {
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
recently_touched: ['orders'],
|
||||
});
|
||||
});
|
||||
|
||||
it('calls the semantic-generate-sources stdio command', async () => {
|
||||
const runJson = vi.fn(async () => sourceGenerationDaemonResponse);
|
||||
const port = createPythonSemanticLayerComputePort({ runJson });
|
||||
|
||||
await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({
|
||||
sourceCount: 2,
|
||||
sources: sourceGenerationDaemonResponse.sources,
|
||||
});
|
||||
|
||||
expect(runJson).toHaveBeenCalledWith('semantic-generate-sources', sourceGenerationDaemonPayload);
|
||||
});
|
||||
});
|
||||
|
||||
describe('createHttpSemanticLayerComputePort', () => {
|
||||
it('calls semantic query and validate HTTP endpoints through an injected runner', async () => {
|
||||
const requestJson = vi.fn(async (path: string) => {
|
||||
if (path === '/semantic-layer/query') {
|
||||
return {
|
||||
sql: 'select count(*) from public.orders',
|
||||
dialect: 'postgres',
|
||||
columns: [{ name: 'orders.order_count' }],
|
||||
plan: { sources_used: ['orders'] },
|
||||
};
|
||||
}
|
||||
return {
|
||||
valid: true,
|
||||
errors: [],
|
||||
warnings: [],
|
||||
per_source_warnings: {},
|
||||
};
|
||||
});
|
||||
const port = createHttpSemanticLayerComputePort({ baseUrl: 'http://127.0.0.1:8765/', requestJson });
|
||||
|
||||
await expect(
|
||||
port.query({
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
query: { measures: ['orders.order_count'], dimensions: [] },
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
sql: 'select count(*) from public.orders',
|
||||
dialect: 'postgres',
|
||||
columns: [{ name: 'orders.order_count' }],
|
||||
plan: { sources_used: ['orders'] },
|
||||
});
|
||||
|
||||
await expect(
|
||||
port.validateSources({
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
recentlyTouched: ['orders'],
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
valid: true,
|
||||
errors: [],
|
||||
warnings: [],
|
||||
perSourceWarnings: {},
|
||||
});
|
||||
|
||||
expect(requestJson).toHaveBeenNthCalledWith(1, '/semantic-layer/query', {
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
query: { measures: ['orders.order_count'], dimensions: [] },
|
||||
});
|
||||
expect(requestJson).toHaveBeenNthCalledWith(2, '/semantic-layer/validate', {
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
recently_touched: ['orders'],
|
||||
});
|
||||
});
|
||||
|
||||
it('calls the semantic source-generation HTTP endpoint through an injected runner', async () => {
|
||||
const requestJson = vi.fn(async () => sourceGenerationDaemonResponse);
|
||||
const port = createHttpSemanticLayerComputePort({ baseUrl: 'http://127.0.0.1:8765/', requestJson });
|
||||
|
||||
await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({
|
||||
sourceCount: 2,
|
||||
sources: sourceGenerationDaemonResponse.sources,
|
||||
});
|
||||
|
||||
expect(requestJson).toHaveBeenCalledWith('/semantic-layer/generate-sources', sourceGenerationDaemonPayload);
|
||||
});
|
||||
|
||||
it('posts JSON to a running HTTP daemon endpoint', async () => {
|
||||
const requests: Array<{ url: string | undefined; body: unknown }> = [];
|
||||
const server = createServer((request, response) => {
|
||||
const chunks: Buffer[] = [];
|
||||
request.on('data', (chunk: Buffer) => chunks.push(chunk));
|
||||
request.on('end', () => {
|
||||
requests.push({
|
||||
url: request.url,
|
||||
body: JSON.parse(Buffer.concat(chunks).toString('utf8')),
|
||||
});
|
||||
response.writeHead(200, { 'content-type': 'application/json' });
|
||||
response.end(
|
||||
JSON.stringify({
|
||||
sql: 'select count(*) from public.orders',
|
||||
dialect: 'postgres',
|
||||
columns: [{ name: 'orders.order_count' }],
|
||||
plan: { sources_used: ['orders'] },
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
server.listen(0, '127.0.0.1');
|
||||
await once(server, 'listening');
|
||||
try {
|
||||
const address = server.address();
|
||||
if (!address || typeof address === 'string') {
|
||||
throw new Error('expected TCP server address');
|
||||
}
|
||||
const port = createHttpSemanticLayerComputePort({ baseUrl: `http://127.0.0.1:${address.port}` });
|
||||
|
||||
await expect(
|
||||
port.query({
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
query: { measures: ['orders.order_count'], dimensions: [] },
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
sql: 'select count(*) from public.orders',
|
||||
dialect: 'postgres',
|
||||
});
|
||||
|
||||
expect(requests).toEqual([
|
||||
{
|
||||
url: '/semantic-layer/query',
|
||||
body: {
|
||||
sources: [source],
|
||||
dialect: 'postgres',
|
||||
query: { measures: ['orders.order_count'], dimensions: [] },
|
||||
},
|
||||
},
|
||||
]);
|
||||
} finally {
|
||||
server.close();
|
||||
}
|
||||
});
|
||||
|
||||
it('posts source-generation JSON to a running HTTP daemon endpoint', async () => {
|
||||
const requests: Array<{ url: string | undefined; body: unknown }> = [];
|
||||
const server = createServer((request, response) => {
|
||||
const chunks: Buffer[] = [];
|
||||
request.on('data', (chunk: Buffer) => chunks.push(chunk));
|
||||
request.on('end', () => {
|
||||
requests.push({
|
||||
url: request.url,
|
||||
body: JSON.parse(Buffer.concat(chunks).toString('utf8')),
|
||||
});
|
||||
response.writeHead(200, { 'content-type': 'application/json' });
|
||||
response.end(JSON.stringify(sourceGenerationDaemonResponse));
|
||||
});
|
||||
});
|
||||
|
||||
server.listen(0, '127.0.0.1');
|
||||
await once(server, 'listening');
|
||||
try {
|
||||
const address = server.address();
|
||||
if (!address || typeof address === 'string') {
|
||||
throw new Error('expected TCP server address');
|
||||
}
|
||||
const port = createHttpSemanticLayerComputePort({ baseUrl: `http://127.0.0.1:${address.port}` });
|
||||
|
||||
await expect(port.generateSources(sourceGenerationInput)).resolves.toEqual({
|
||||
sourceCount: 2,
|
||||
sources: sourceGenerationDaemonResponse.sources,
|
||||
});
|
||||
|
||||
expect(requests).toEqual([
|
||||
{
|
||||
url: '/semantic-layer/generate-sources',
|
||||
body: sourceGenerationDaemonPayload,
|
||||
},
|
||||
]);
|
||||
} finally {
|
||||
server.close();
|
||||
}
|
||||
});
|
||||
});
|
||||
314
packages/cli/src/context/daemon/semantic-layer-compute.ts
Normal file
314
packages/cli/src/context/daemon/semantic-layer-compute.ts
Normal file
|
|
@ -0,0 +1,314 @@
|
|||
import { request as httpRequest } from 'node:http';
|
||||
import { request as httpsRequest } from 'node:https';
|
||||
import { URL } from 'node:url';
|
||||
import { spawn } from 'node:child_process';
|
||||
import type { ResolvedSemanticLayerSource, SemanticLayerQueryInput } from '../sl/types.js';
|
||||
|
||||
interface KtxSemanticLayerComputeQueryResult {
|
||||
sql: string;
|
||||
dialect: string;
|
||||
columns: Array<Record<string, unknown>>;
|
||||
plan: Record<string, unknown>;
|
||||
}
|
||||
|
||||
interface KtxSemanticLayerComputeValidationResult {
|
||||
valid: boolean;
|
||||
errors: string[];
|
||||
warnings: string[];
|
||||
perSourceWarnings: Record<string, string[]>;
|
||||
}
|
||||
|
||||
interface KtxSemanticLayerSourceGenerationColumnInput {
|
||||
name: string;
|
||||
type: string;
|
||||
primaryKey?: boolean;
|
||||
nullable?: boolean;
|
||||
comment?: string | null;
|
||||
}
|
||||
|
||||
interface KtxSemanticLayerSourceGenerationTableInput {
|
||||
name: string;
|
||||
catalog?: string | null;
|
||||
db?: string | null;
|
||||
comment?: string | null;
|
||||
columns: KtxSemanticLayerSourceGenerationColumnInput[];
|
||||
}
|
||||
|
||||
interface KtxSemanticLayerSourceGenerationLinkInput {
|
||||
fromTable: string;
|
||||
fromColumn: string;
|
||||
toTable: string;
|
||||
toColumn: string;
|
||||
relationshipType: string;
|
||||
}
|
||||
|
||||
interface KtxSemanticLayerSourceGenerationInput {
|
||||
tables: KtxSemanticLayerSourceGenerationTableInput[];
|
||||
links: KtxSemanticLayerSourceGenerationLinkInput[];
|
||||
dialect?: string;
|
||||
}
|
||||
|
||||
interface KtxSemanticLayerSourceGenerationResult {
|
||||
sources: Array<Record<string, unknown>>;
|
||||
sourceCount: number;
|
||||
}
|
||||
|
||||
export interface KtxSemanticLayerComputePort {
|
||||
/**
|
||||
* Callers must pass sources sanitized through toResolvedWire. The Python
|
||||
* daemon rejects authoring-only fields such as usage and inherits_columns_from.
|
||||
*/
|
||||
query(input: {
|
||||
sources: ResolvedSemanticLayerSource[];
|
||||
query: SemanticLayerQueryInput;
|
||||
dialect: string;
|
||||
}): Promise<KtxSemanticLayerComputeQueryResult>;
|
||||
/**
|
||||
* Callers must pass sources sanitized through toResolvedWire. The Python
|
||||
* daemon rejects authoring-only fields such as usage and inherits_columns_from.
|
||||
*/
|
||||
validateSources(input: {
|
||||
sources: ResolvedSemanticLayerSource[];
|
||||
dialect: string;
|
||||
recentlyTouched?: string[];
|
||||
}): Promise<KtxSemanticLayerComputeValidationResult>;
|
||||
generateSources(input: KtxSemanticLayerSourceGenerationInput): Promise<KtxSemanticLayerSourceGenerationResult>;
|
||||
}
|
||||
|
||||
type KtxDaemonCommand = 'semantic-query' | 'semantic-validate' | 'semantic-generate-sources';
|
||||
|
||||
type KtxDaemonJsonRunner = (
|
||||
subcommand: KtxDaemonCommand,
|
||||
payload: Record<string, unknown>,
|
||||
) => Promise<Record<string, unknown>>;
|
||||
|
||||
type KtxDaemonHttpJsonRunner = (path: string, payload: Record<string, unknown>) => Promise<Record<string, unknown>>;
|
||||
|
||||
export interface PythonSemanticLayerComputeOptions {
|
||||
command?: string;
|
||||
args?: string[];
|
||||
cwd?: string;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
runJson?: KtxDaemonJsonRunner;
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export interface HttpSemanticLayerComputeOptions {
|
||||
baseUrl: string;
|
||||
requestJson?: KtxDaemonHttpJsonRunner;
|
||||
}
|
||||
|
||||
function parseJsonObject(raw: string, subcommand: string): Record<string, unknown> {
|
||||
const parsed = JSON.parse(raw) as unknown;
|
||||
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
||||
throw new Error(`ktx-daemon ${subcommand} returned non-object JSON`);
|
||||
}
|
||||
return parsed as Record<string, unknown>;
|
||||
}
|
||||
|
||||
function runProcessJson(
|
||||
options: Required<Pick<PythonSemanticLayerComputeOptions, 'command' | 'args'>> &
|
||||
Pick<PythonSemanticLayerComputeOptions, 'cwd' | 'env'>,
|
||||
): KtxDaemonJsonRunner {
|
||||
return async (subcommand: KtxDaemonCommand, payload: Record<string, unknown>): Promise<Record<string, unknown>> =>
|
||||
new Promise((resolve, reject) => {
|
||||
const child = spawn(options.command, [...options.args, subcommand], {
|
||||
cwd: options.cwd,
|
||||
env: { ...process.env, ...options.env },
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
});
|
||||
const stdout: Buffer[] = [];
|
||||
const stderr: Buffer[] = [];
|
||||
|
||||
child.stdout.on('data', (chunk: Buffer) => stdout.push(chunk));
|
||||
child.stderr.on('data', (chunk: Buffer) => stderr.push(chunk));
|
||||
child.on('error', reject);
|
||||
child.on('close', (code) => {
|
||||
const stdoutText = Buffer.concat(stdout).toString('utf8').trim();
|
||||
const stderrText = Buffer.concat(stderr).toString('utf8').trim();
|
||||
if (code !== 0) {
|
||||
reject(new Error(`ktx-daemon ${subcommand} failed: ${stderrText || `exit code ${code}`}`));
|
||||
return;
|
||||
}
|
||||
try {
|
||||
resolve(parseJsonObject(stdoutText, subcommand));
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
child.stdin.end(`${JSON.stringify(payload)}\n`);
|
||||
});
|
||||
}
|
||||
|
||||
function normalizedBaseUrl(baseUrl: string): string {
|
||||
return baseUrl.endsWith('/') ? baseUrl : `${baseUrl}/`;
|
||||
}
|
||||
|
||||
function postJson(baseUrl: string): KtxDaemonHttpJsonRunner {
|
||||
return async (path, payload) =>
|
||||
new Promise((resolve, reject) => {
|
||||
const target = new URL(path.replace(/^\//, ''), normalizedBaseUrl(baseUrl));
|
||||
const body = JSON.stringify(payload);
|
||||
const client = target.protocol === 'https:' ? httpsRequest : httpRequest;
|
||||
const request = client(
|
||||
target,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
accept: 'application/json',
|
||||
'content-type': 'application/json',
|
||||
'content-length': Buffer.byteLength(body),
|
||||
},
|
||||
},
|
||||
(response) => {
|
||||
const chunks: Buffer[] = [];
|
||||
response.on('data', (chunk: Buffer) => chunks.push(chunk));
|
||||
response.on('end', () => {
|
||||
const text = Buffer.concat(chunks).toString('utf8');
|
||||
const statusCode = response.statusCode ?? 0;
|
||||
if (statusCode < 200 || statusCode >= 300) {
|
||||
reject(new Error(`ktx-daemon HTTP ${path} failed with ${statusCode}: ${text}`));
|
||||
return;
|
||||
}
|
||||
try {
|
||||
resolve(parseJsonObject(text, path));
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
request.on('error', reject);
|
||||
request.end(body);
|
||||
});
|
||||
}
|
||||
|
||||
function stringArray(value: unknown): string[] {
|
||||
return Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string') : [];
|
||||
}
|
||||
|
||||
function recordValue(value: unknown): Record<string, unknown> {
|
||||
return value && typeof value === 'object' && !Array.isArray(value) ? (value as Record<string, unknown>) : {};
|
||||
}
|
||||
|
||||
function recordArray(value: unknown): Array<Record<string, unknown>> {
|
||||
return Array.isArray(value)
|
||||
? value.filter(
|
||||
(item): item is Record<string, unknown> => item !== null && typeof item === 'object' && !Array.isArray(item),
|
||||
)
|
||||
: [];
|
||||
}
|
||||
|
||||
function sourceGenerationPayload(input: KtxSemanticLayerSourceGenerationInput): Record<string, unknown> {
|
||||
return {
|
||||
tables: input.tables.map((table) => ({
|
||||
name: table.name,
|
||||
...(table.catalog !== undefined ? { catalog: table.catalog } : {}),
|
||||
...(table.db !== undefined ? { db: table.db } : {}),
|
||||
...(table.comment !== undefined ? { comment: table.comment } : {}),
|
||||
columns: table.columns.map((column) => ({
|
||||
name: column.name,
|
||||
type: column.type,
|
||||
...(column.primaryKey !== undefined ? { primary_key: column.primaryKey } : {}),
|
||||
...(column.nullable !== undefined ? { nullable: column.nullable } : {}),
|
||||
...(column.comment !== undefined ? { comment: column.comment } : {}),
|
||||
})),
|
||||
})),
|
||||
links: input.links.map((link) => ({
|
||||
from_table: link.fromTable,
|
||||
from_column: link.fromColumn,
|
||||
to_table: link.toTable,
|
||||
to_column: link.toColumn,
|
||||
relationship_type: link.relationshipType,
|
||||
})),
|
||||
dialect: input.dialect ?? 'postgres',
|
||||
};
|
||||
}
|
||||
|
||||
function sourceGenerationResult(raw: Record<string, unknown>): KtxSemanticLayerSourceGenerationResult {
|
||||
return {
|
||||
sources: recordArray(raw.sources),
|
||||
sourceCount: typeof raw.source_count === 'number' ? raw.source_count : recordArray(raw.sources).length,
|
||||
};
|
||||
}
|
||||
|
||||
export function createPythonSemanticLayerComputePort(
|
||||
options: PythonSemanticLayerComputeOptions = {},
|
||||
): KtxSemanticLayerComputePort {
|
||||
const command = options.command ?? 'python';
|
||||
const args = options.args ?? ['-m', 'ktx_daemon'];
|
||||
const runJson = options.runJson ?? runProcessJson({ command, args, cwd: options.cwd, env: options.env });
|
||||
|
||||
return {
|
||||
async query(input) {
|
||||
const raw = await runJson('semantic-query', {
|
||||
sources: input.sources,
|
||||
dialect: input.dialect,
|
||||
query: input.query,
|
||||
});
|
||||
return {
|
||||
sql: typeof raw.sql === 'string' ? raw.sql : '',
|
||||
dialect: typeof raw.dialect === 'string' ? raw.dialect : input.dialect,
|
||||
columns: recordArray(raw.columns),
|
||||
plan: recordValue(raw.plan),
|
||||
};
|
||||
},
|
||||
async validateSources(input) {
|
||||
const raw = await runJson('semantic-validate', {
|
||||
sources: input.sources,
|
||||
dialect: input.dialect,
|
||||
recently_touched: input.recentlyTouched,
|
||||
});
|
||||
return {
|
||||
valid: raw.valid === true,
|
||||
errors: stringArray(raw.errors),
|
||||
warnings: stringArray(raw.warnings),
|
||||
perSourceWarnings: recordValue(raw.per_source_warnings) as Record<string, string[]>,
|
||||
};
|
||||
},
|
||||
async generateSources(input) {
|
||||
const raw = await runJson('semantic-generate-sources', sourceGenerationPayload(input));
|
||||
return sourceGenerationResult(raw);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function createHttpSemanticLayerComputePort(
|
||||
options: HttpSemanticLayerComputeOptions,
|
||||
): KtxSemanticLayerComputePort {
|
||||
const requestJson = options.requestJson ?? postJson(options.baseUrl);
|
||||
|
||||
return {
|
||||
async query(input) {
|
||||
const raw = await requestJson('/semantic-layer/query', {
|
||||
sources: input.sources,
|
||||
dialect: input.dialect,
|
||||
query: input.query,
|
||||
});
|
||||
return {
|
||||
sql: typeof raw.sql === 'string' ? raw.sql : '',
|
||||
dialect: typeof raw.dialect === 'string' ? raw.dialect : input.dialect,
|
||||
columns: recordArray(raw.columns),
|
||||
plan: recordValue(raw.plan),
|
||||
};
|
||||
},
|
||||
async validateSources(input) {
|
||||
const raw = await requestJson('/semantic-layer/validate', {
|
||||
sources: input.sources,
|
||||
dialect: input.dialect,
|
||||
recently_touched: input.recentlyTouched,
|
||||
});
|
||||
return {
|
||||
valid: raw.valid === true,
|
||||
errors: stringArray(raw.errors),
|
||||
warnings: stringArray(raw.warnings),
|
||||
perSourceWarnings: recordValue(raw.per_source_warnings) as Record<string, string[]>,
|
||||
};
|
||||
},
|
||||
async generateSources(input) {
|
||||
const raw = await requestJson('/semantic-layer/generate-sources', sourceGenerationPayload(input));
|
||||
return sourceGenerationResult(raw);
|
||||
},
|
||||
};
|
||||
}
|
||||
196
packages/cli/src/context/index-sync/reindex.test.ts
Normal file
196
packages/cli/src/context/index-sync/reindex.test.ts
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import type { KtxEmbeddingPort } from '../../context/core/embedding.js';
|
||||
import { initKtxProject, loadKtxProject, type KtxLocalProject } from '../../context/project/project.js';
|
||||
import { SqliteKnowledgeIndex } from '../wiki/sqlite-knowledge-index.js';
|
||||
import { reindexLocalIndexes } from './reindex.js';
|
||||
|
||||
class FakeEmbeddingPort implements KtxEmbeddingPort {
|
||||
readonly maxBatchSize = 8;
|
||||
|
||||
async computeEmbedding(text: string): Promise<number[]> {
|
||||
return [text.length, 1];
|
||||
}
|
||||
|
||||
async computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
|
||||
return texts.map((text) => [text.length, 1]);
|
||||
}
|
||||
}
|
||||
|
||||
async function createProject(tempDir: string): Promise<KtxLocalProject> {
|
||||
await initKtxProject({ projectDir: tempDir, force: true });
|
||||
return loadKtxProject({ projectDir: tempDir });
|
||||
}
|
||||
|
||||
describe('reindexLocalIndexes', () => {
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-reindex-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('returns an empty summary when no wiki or semantic-layer directories exist', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await rm(join(project.projectDir, 'wiki'), { recursive: true, force: true });
|
||||
await rm(join(project.projectDir, 'semantic-layer'), { recursive: true, force: true });
|
||||
|
||||
await expect(reindexLocalIndexes(project, { force: false, embeddingService: null })).resolves.toMatchObject({
|
||||
scopes: [],
|
||||
totals: { scanned: 0, updated: 0, deleted: 0, embeddingsRecomputed: 0, embeddingsFailed: 0 },
|
||||
force: false,
|
||||
embeddingsAvailable: false,
|
||||
});
|
||||
});
|
||||
|
||||
it('discovers empty directories as zero-row scopes', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await mkdir(join(project.projectDir, 'wiki/user/local'), { recursive: true });
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
|
||||
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
|
||||
expect(summary.scopes.map((scope) => scope.label)).toEqual(['global', 'user/local', 'warehouse']);
|
||||
expect(summary.totals.scanned).toBe(0);
|
||||
});
|
||||
|
||||
it('indexes mixed wiki and SL sources and reports totals', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(
|
||||
join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'),
|
||||
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n',
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
const summary = await reindexLocalIndexes(project, {
|
||||
force: false,
|
||||
embeddingService: new FakeEmbeddingPort(),
|
||||
});
|
||||
|
||||
expect(summary.scopes).toHaveLength(2);
|
||||
expect(summary.totals).toMatchObject({ scanned: 2, updated: 2, deleted: 0, embeddingsRecomputed: 2 });
|
||||
expect(summary.embeddingsAvailable).toBe(true);
|
||||
});
|
||||
|
||||
it('does not report unchanged lexical-only rows as updated on repeated runs', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(
|
||||
join(project.projectDir, 'semantic-layer/warehouse/orders.yaml'),
|
||||
'name: orders\ntable: public.orders\ngrain: [id]\ncolumns:\n - name: id\n type: number\njoins: []\nmeasures: []\n',
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
const first = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
expect(first.totals).toMatchObject({
|
||||
scanned: 2,
|
||||
updated: 2,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
});
|
||||
|
||||
const second = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
|
||||
expect(second.totals).toMatchObject({
|
||||
scanned: 2,
|
||||
updated: 0,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
});
|
||||
expect(second.scopes.map((scope) => [scope.label, scope.updated])).toEqual([
|
||||
['global', 0],
|
||||
['warehouse', 0],
|
||||
]);
|
||||
});
|
||||
|
||||
it('force clears stale rows before rebuilding each discovered scope', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
const wikiIndex = new SqliteKnowledgeIndex({ dbPath: join(project.projectDir, '.ktx/db.sqlite') });
|
||||
wikiIndex.sync([
|
||||
{
|
||||
path: 'wiki/global/stale.md',
|
||||
key: 'stale',
|
||||
scope: 'GLOBAL',
|
||||
scopeId: null,
|
||||
summary: 'Stale',
|
||||
content: 'Stale content',
|
||||
tags: [],
|
||||
embedding: [1, 0],
|
||||
},
|
||||
]);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
|
||||
const summary = await reindexLocalIndexes(project, {
|
||||
force: true,
|
||||
embeddingService: new FakeEmbeddingPort(),
|
||||
});
|
||||
|
||||
expect(summary.force).toBe(true);
|
||||
expect(summary.totals).toMatchObject({ scanned: 1, updated: 1, deleted: 0 });
|
||||
expect(wikiIndex.search('Stale', 10)).toEqual([]);
|
||||
});
|
||||
|
||||
it('captures a per-scope error and continues other scopes', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(join(project.projectDir, 'semantic-layer/warehouse/broken.yaml'), 'not: [valid', 'utf-8');
|
||||
|
||||
const summary = await reindexLocalIndexes(project, { force: false, embeddingService: null });
|
||||
|
||||
expect(summary.scopes.find((scope) => scope.label === 'global')?.error).toBeUndefined();
|
||||
expect(summary.scopes.find((scope) => scope.label === 'warehouse')?.error).toContain('YAML');
|
||||
});
|
||||
|
||||
it('marks a scope errored when configured embeddings fail', async () => {
|
||||
const project = await createProject(tempDir);
|
||||
await writeFile(
|
||||
join(project.projectDir, 'wiki/global/revenue.md'),
|
||||
'---\nsummary: Revenue\nusage_mode: auto\n---\n\nPaid orders.\n',
|
||||
'utf-8',
|
||||
);
|
||||
const embeddingService: KtxEmbeddingPort = {
|
||||
maxBatchSize: 8,
|
||||
async computeEmbedding() {
|
||||
throw new Error('embedding provider unavailable');
|
||||
},
|
||||
async computeEmbeddingsBulk() {
|
||||
throw new Error('embedding provider unavailable');
|
||||
},
|
||||
};
|
||||
|
||||
const summary = await reindexLocalIndexes(project, { force: false, embeddingService });
|
||||
|
||||
expect(summary.scopes[0]).toMatchObject({
|
||||
label: 'global',
|
||||
embeddingsFailed: 1,
|
||||
error: '1 embedding recomputation failed',
|
||||
});
|
||||
});
|
||||
});
|
||||
166
packages/cli/src/context/index-sync/reindex.ts
Normal file
166
packages/cli/src/context/index-sync/reindex.ts
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
import { readdir, stat } from 'node:fs/promises';
|
||||
import { join, relative } from 'node:path';
|
||||
import { ktxLocalStateDbPath } from '../../context/project/local-state-db.js';
|
||||
import type { KtxLocalProject } from '../../context/project/project.js';
|
||||
import { loadLocalSlSourceRecords } from '../../context/sl/local-sl.js';
|
||||
import { SlSearchService } from '../../context/sl/sl-search.service.js';
|
||||
import { SqliteSlSourcesIndex } from '../../context/sl/sqlite-sl-sources-index.js';
|
||||
import { KnowledgeWikiService } from '../../context/wiki/knowledge-wiki.service.js';
|
||||
import { SqliteKnowledgeIndex } from '../../context/wiki/sqlite-knowledge-index.js';
|
||||
import type { ReindexOptions, ReindexScopeResult, ReindexSummary, ReindexWorkResult } from './types.js';
|
||||
|
||||
type DiscoveredScope =
|
||||
| { kind: 'wiki'; scope: 'GLOBAL'; scopeId: null; label: 'global' }
|
||||
| { kind: 'wiki'; scope: 'USER'; scopeId: string; label: `user/${string}` }
|
||||
| { kind: 'sl'; connectionId: string; label: string };
|
||||
|
||||
const ZERO: ReindexWorkResult = {
|
||||
scanned: 0,
|
||||
updated: 0,
|
||||
deleted: 0,
|
||||
embeddingsRecomputed: 0,
|
||||
embeddingsFailed: 0,
|
||||
};
|
||||
|
||||
async function directoryExists(path: string): Promise<boolean> {
|
||||
try {
|
||||
return (await stat(path)).isDirectory();
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function childDirectories(path: string): Promise<string[]> {
|
||||
try {
|
||||
const entries = await readdir(path, { withFileTypes: true });
|
||||
return entries
|
||||
.filter((entry) => entry.isDirectory())
|
||||
.map((entry) => entry.name)
|
||||
.sort((left, right) => left.localeCompare(right));
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
return [];
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async function discoverReindexScopes(project: KtxLocalProject): Promise<DiscoveredScope[]> {
|
||||
const scopes: DiscoveredScope[] = [];
|
||||
if (await directoryExists(join(project.projectDir, 'wiki/global'))) {
|
||||
scopes.push({ kind: 'wiki', scope: 'GLOBAL', scopeId: null, label: 'global' });
|
||||
}
|
||||
for (const userId of await childDirectories(join(project.projectDir, 'wiki/user'))) {
|
||||
scopes.push({ kind: 'wiki', scope: 'USER', scopeId: userId, label: `user/${userId}` });
|
||||
}
|
||||
for (const connectionId of await childDirectories(join(project.projectDir, 'semantic-layer'))) {
|
||||
if (connectionId !== '_schema') {
|
||||
scopes.push({ kind: 'sl', connectionId, label: connectionId });
|
||||
}
|
||||
}
|
||||
return scopes;
|
||||
}
|
||||
|
||||
function errorMessage(error: unknown): string {
|
||||
if (!(error instanceof Error)) {
|
||||
return String(error);
|
||||
}
|
||||
return error.name && error.name !== 'Error' ? `${error.name}: ${error.message}` : error.message;
|
||||
}
|
||||
|
||||
function addTotals(left: ReindexWorkResult, right: ReindexWorkResult): ReindexWorkResult {
|
||||
return {
|
||||
scanned: left.scanned + right.scanned,
|
||||
updated: left.updated + right.updated,
|
||||
deleted: left.deleted + right.deleted,
|
||||
embeddingsRecomputed: left.embeddingsRecomputed + right.embeddingsRecomputed,
|
||||
embeddingsFailed: left.embeddingsFailed + right.embeddingsFailed,
|
||||
};
|
||||
}
|
||||
|
||||
function durationSince(startedAt: bigint): number {
|
||||
return Number((process.hrtime.bigint() - startedAt) / 1_000_000n);
|
||||
}
|
||||
|
||||
function embeddingFailureError(work: ReindexWorkResult): string | undefined {
|
||||
if (work.embeddingsFailed === 0) {
|
||||
return undefined;
|
||||
}
|
||||
return `${work.embeddingsFailed} embedding recomputation${work.embeddingsFailed === 1 ? '' : 's'} failed`;
|
||||
}
|
||||
|
||||
export async function reindexLocalIndexes(
|
||||
project: KtxLocalProject,
|
||||
options: ReindexOptions,
|
||||
): Promise<ReindexSummary> {
|
||||
const startedAt = process.hrtime.bigint();
|
||||
const dbPath = ktxLocalStateDbPath(project);
|
||||
const scopes = await discoverReindexScopes(project);
|
||||
const wikiIndex = new SqliteKnowledgeIndex({ dbPath });
|
||||
const slIndex = new SqliteSlSourcesIndex({ dbPath });
|
||||
const wikiService = new KnowledgeWikiService(project.fileStore, options.embeddingService, wikiIndex, project.git);
|
||||
const slService = new SlSearchService(options.embeddingService, slIndex);
|
||||
const results: ReindexScopeResult[] = [];
|
||||
|
||||
for (const scope of scopes) {
|
||||
const scopeStartedAt = process.hrtime.bigint();
|
||||
try {
|
||||
let work: ReindexWorkResult;
|
||||
if (scope.kind === 'wiki') {
|
||||
if (options.force) {
|
||||
wikiIndex.clear(scope.scope, scope.scopeId);
|
||||
}
|
||||
work = await wikiService.syncIndex(scope.scope, scope.scopeId);
|
||||
results.push({
|
||||
kind: 'wiki',
|
||||
label: scope.label,
|
||||
scope: scope.scope === 'GLOBAL' ? 'global' : 'user',
|
||||
scopeId: scope.scopeId,
|
||||
...work,
|
||||
...(options.force ? { deleted: 0 } : {}),
|
||||
...(options.embeddingService && work.embeddingsFailed > 0 ? { error: embeddingFailureError(work) } : {}),
|
||||
durationMs: durationSince(scopeStartedAt),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (options.force) {
|
||||
await slIndex.clear(scope.connectionId);
|
||||
}
|
||||
const records = await loadLocalSlSourceRecords(project, { connectionId: scope.connectionId });
|
||||
work = await slService.indexSources(
|
||||
scope.connectionId,
|
||||
records.map((record) => record.source),
|
||||
);
|
||||
results.push({
|
||||
kind: 'sl',
|
||||
label: scope.label,
|
||||
connectionId: scope.connectionId,
|
||||
...work,
|
||||
...(options.force ? { deleted: 0 } : {}),
|
||||
...(options.embeddingService && work.embeddingsFailed > 0 ? { error: embeddingFailureError(work) } : {}),
|
||||
durationMs: durationSince(scopeStartedAt),
|
||||
});
|
||||
} catch (error) {
|
||||
results.push({
|
||||
kind: scope.kind,
|
||||
label: scope.label,
|
||||
...(scope.kind === 'wiki'
|
||||
? { scope: scope.scope === 'GLOBAL' ? 'global' : 'user', scopeId: scope.scopeId }
|
||||
: { connectionId: scope.connectionId }),
|
||||
...ZERO,
|
||||
durationMs: durationSince(scopeStartedAt),
|
||||
error: errorMessage(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
scopes: results,
|
||||
totals: results.reduce(addTotals, ZERO),
|
||||
dbPath: relative(project.projectDir, dbPath) || dbPath,
|
||||
force: options.force,
|
||||
embeddingsAvailable: options.embeddingService !== null,
|
||||
durationMs: durationSince(startedAt),
|
||||
};
|
||||
}
|
||||
33
packages/cli/src/context/index-sync/types.ts
Normal file
33
packages/cli/src/context/index-sync/types.ts
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
import type { KtxEmbeddingPort } from '../../context/core/embedding.js';
|
||||
|
||||
export interface ReindexOptions {
|
||||
force: boolean;
|
||||
embeddingService: KtxEmbeddingPort | null;
|
||||
}
|
||||
|
||||
export interface ReindexWorkResult {
|
||||
scanned: number;
|
||||
updated: number;
|
||||
deleted: number;
|
||||
embeddingsRecomputed: number;
|
||||
embeddingsFailed: number;
|
||||
}
|
||||
|
||||
export interface ReindexScopeResult extends ReindexWorkResult {
|
||||
kind: 'wiki' | 'sl';
|
||||
label: string;
|
||||
scope?: 'global' | 'user';
|
||||
scopeId?: string | null;
|
||||
connectionId?: string;
|
||||
durationMs: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface ReindexSummary {
|
||||
scopes: ReindexScopeResult[];
|
||||
totals: ReindexWorkResult;
|
||||
dbPath: string;
|
||||
force: boolean;
|
||||
embeddingsAvailable: boolean;
|
||||
durationMs: number;
|
||||
}
|
||||
42
packages/cli/src/context/ingest/action-identity.test.ts
Normal file
42
packages/cli/src/context/ingest/action-identity.test.ts
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { actionTargetConnectionId, memoryActionIdentity } from './action-identity.js';
|
||||
|
||||
describe('memory action target identity', () => {
|
||||
it('keys SL actions by target connection and wiki actions by run connection', () => {
|
||||
expect(
|
||||
memoryActionIdentity(
|
||||
{ target: 'sl', type: 'created', key: 'orders', detail: '', targetConnectionId: 'warehouse-b' },
|
||||
'looker-run',
|
||||
),
|
||||
).toBe('sl:warehouse-b:orders');
|
||||
|
||||
expect(memoryActionIdentity({ target: 'sl', type: 'created', key: 'orders', detail: '' }, 'warehouse-a')).toBe(
|
||||
'sl:warehouse-a:orders',
|
||||
);
|
||||
|
||||
expect(
|
||||
memoryActionIdentity(
|
||||
{
|
||||
target: 'wiki',
|
||||
type: 'created',
|
||||
key: 'wiki/global/orders.md',
|
||||
detail: '',
|
||||
targetConnectionId: 'ignored',
|
||||
},
|
||||
'looker-run',
|
||||
),
|
||||
).toBe('wiki:looker-run:wiki/global/orders.md');
|
||||
});
|
||||
|
||||
it('resolves action target connection only for SL actions', () => {
|
||||
expect(
|
||||
actionTargetConnectionId(
|
||||
{ target: 'sl', type: 'updated', key: 'orders', detail: '', targetConnectionId: 'warehouse-b' },
|
||||
'looker-run',
|
||||
),
|
||||
).toBe('warehouse-b');
|
||||
expect(actionTargetConnectionId({ target: 'wiki', type: 'updated', key: 'orders', detail: '' }, 'looker-run')).toBe(
|
||||
'looker-run',
|
||||
);
|
||||
});
|
||||
});
|
||||
9
packages/cli/src/context/ingest/action-identity.ts
Normal file
9
packages/cli/src/context/ingest/action-identity.ts
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
import type { MemoryAction } from '../../context/memory/types.js';
|
||||
|
||||
export function actionTargetConnectionId(action: MemoryAction, runConnectionId: string): string {
|
||||
return action.target === 'sl' ? (action.targetConnectionId ?? runConnectionId) : runConnectionId;
|
||||
}
|
||||
|
||||
export function memoryActionIdentity(action: MemoryAction, runConnectionId: string): string {
|
||||
return `${action.target}:${actionTargetConnectionId(action, runConnectionId)}:${action.key}`;
|
||||
}
|
||||
|
|
@ -0,0 +1,214 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { parseDbtSchemaFile, parseDbtSchemaFiles } from './parse-schema.js';
|
||||
|
||||
describe('dbt descriptions schema parser', () => {
|
||||
it('resolves shared dbt vars and defaults before parsing schema YAML', () => {
|
||||
const result = parseDbtSchemaFile(
|
||||
`
|
||||
version: 2
|
||||
sources:
|
||||
- name: raw
|
||||
database: "{{ var('database') }}"
|
||||
schema: "{{ var('schema', 'fallback_schema') }}"
|
||||
tables:
|
||||
- name: orders
|
||||
identifier: fct_orders
|
||||
description: "Orders from {{ var('database') }}"
|
||||
columns:
|
||||
- name: customer_id
|
||||
description: "Customer id"
|
||||
tests:
|
||||
- relationships:
|
||||
to: ref('customers')
|
||||
field: id
|
||||
models:
|
||||
- name: "{{ var('model_name', 'orders_model') }}"
|
||||
schema: "{{ var('model_schema') }}"
|
||||
columns:
|
||||
- name: id
|
||||
description: "Order id"
|
||||
`,
|
||||
{ path: 'models/schema.yml', variables: new Map([['database', 'analytics'], ['model_schema', 'mart']]) },
|
||||
);
|
||||
|
||||
expect(result.tables).toEqual([
|
||||
{
|
||||
name: 'fct_orders',
|
||||
description: 'Orders from analytics',
|
||||
database: 'analytics',
|
||||
schema: 'fallback_schema',
|
||||
columns: [
|
||||
{
|
||||
name: 'customer_id',
|
||||
description: 'Customer id',
|
||||
dataType: null,
|
||||
dataTests: [{ name: 'relationships', package: 'dbt', kwargs: { to: "ref('customers')", field: 'id' } }],
|
||||
},
|
||||
],
|
||||
resourceType: 'source',
|
||||
},
|
||||
{
|
||||
name: 'orders_model',
|
||||
description: null,
|
||||
database: null,
|
||||
schema: 'mart',
|
||||
columns: [{ name: 'id', description: 'Order id', dataType: null }],
|
||||
resourceType: 'model',
|
||||
},
|
||||
]);
|
||||
expect(result.relationships).toEqual([
|
||||
{
|
||||
fromTable: 'fct_orders',
|
||||
fromColumn: 'customer_id',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
fromSchema: 'fallback_schema',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('deduplicates tables by database schema and name while merging columns', () => {
|
||||
const result = parseDbtSchemaFiles([
|
||||
{
|
||||
path: 'models/a.yml',
|
||||
content: `
|
||||
version: 2
|
||||
models:
|
||||
- name: orders
|
||||
description: Orders
|
||||
columns:
|
||||
- name: id
|
||||
description: Primary key
|
||||
`,
|
||||
},
|
||||
{
|
||||
path: 'models/b.yml',
|
||||
content: `
|
||||
version: 2
|
||||
models:
|
||||
- name: orders
|
||||
columns:
|
||||
- name: status
|
||||
description: Status
|
||||
- name: id
|
||||
data_type: integer
|
||||
`,
|
||||
},
|
||||
]);
|
||||
|
||||
expect(result.tables).toEqual([
|
||||
{
|
||||
name: 'orders',
|
||||
description: 'Orders',
|
||||
database: null,
|
||||
schema: null,
|
||||
resourceType: 'model',
|
||||
columns: [
|
||||
{ name: 'id', description: 'Primary key', dataType: 'integer' },
|
||||
{ name: 'status', description: 'Status', dataType: null },
|
||||
],
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('returns an empty result for malformed YAML and preserves unresolved Jinja text', () => {
|
||||
expect(parseDbtSchemaFile('{{{{ invalid yaml', { path: 'broken.yml' })).toEqual({
|
||||
projectName: null,
|
||||
dbtVersion: null,
|
||||
tables: [],
|
||||
relationships: [],
|
||||
});
|
||||
|
||||
const unresolved = parseDbtSchemaFile(
|
||||
`
|
||||
version: 2
|
||||
models:
|
||||
- name: "{{ var('missing_model') }}"
|
||||
`,
|
||||
{ variables: new Map() },
|
||||
);
|
||||
expect(unresolved.tables[0]?.name).toBe("{{ var('missing_model') }}");
|
||||
});
|
||||
|
||||
it('extracts data tests, constraints, enum values, tags, and freshness', () => {
|
||||
const result = parseDbtSchemaFile(`
|
||||
version: 2
|
||||
sources:
|
||||
- name: raw
|
||||
schema: jaffle
|
||||
tags: ["raw"]
|
||||
tables:
|
||||
- name: customers
|
||||
tags: ["core"]
|
||||
loaded_at_field: updated_at
|
||||
freshness:
|
||||
warn_after: { count: 12, period: hour }
|
||||
columns:
|
||||
- name: id
|
||||
tests:
|
||||
- not_null
|
||||
- unique
|
||||
- name: status
|
||||
data_tests:
|
||||
- accepted_values:
|
||||
values: ['active', 'inactive']
|
||||
models:
|
||||
- name: orders
|
||||
tags: ["finance"]
|
||||
loaded_at_field: run_at
|
||||
columns:
|
||||
- name: status
|
||||
data_tests:
|
||||
- dbt_utils.expression_is_true:
|
||||
expression: "status is not null"
|
||||
- accepted_values: ['placed', 'shipped']
|
||||
`);
|
||||
|
||||
const customers = result.tables.find((table) => table.name === 'customers');
|
||||
expect(customers?.tagsDbt).toEqual(['raw', 'core']);
|
||||
expect(customers?.freshnessDbt?.loadedAtField).toBe('updated_at');
|
||||
expect(customers?.freshnessDbt?.raw).toBeDefined();
|
||||
const id = customers?.columns.find((column) => column.name === 'id');
|
||||
expect(id?.constraints?.dbt).toEqual({ not_null: true, unique: true });
|
||||
const status = customers?.columns.find((column) => column.name === 'status');
|
||||
expect(status?.enumValuesDbt).toEqual(['active', 'inactive']);
|
||||
|
||||
const orders = result.tables.find((table) => table.name === 'orders');
|
||||
expect(orders?.tagsDbt).toEqual(['finance']);
|
||||
expect(orders?.freshnessDbt?.loadedAtField).toBe('run_at');
|
||||
const ordersStatus = orders?.columns.find((column) => column.name === 'status');
|
||||
expect(ordersStatus?.enumValuesDbt).toEqual(['placed', 'shipped']);
|
||||
expect(ordersStatus?.dataTests).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({ package: 'dbt_utils', name: 'expression_is_true' }),
|
||||
expect.objectContaining({ package: 'dbt', name: 'accepted_values' }),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('parses relationships from model column data tests', () => {
|
||||
const result = parseDbtSchemaFile(`
|
||||
version: 2
|
||||
models:
|
||||
- name: orders
|
||||
schema: public
|
||||
columns:
|
||||
- name: customer_id
|
||||
data_tests:
|
||||
- relationships:
|
||||
arguments:
|
||||
to: "ref('customers')"
|
||||
field: id
|
||||
`);
|
||||
|
||||
expect(result.relationships).toEqual([
|
||||
{
|
||||
fromTable: 'orders',
|
||||
fromColumn: 'customer_id',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
fromSchema: 'public',
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,649 @@
|
|||
import { createHash } from 'node:crypto';
|
||||
import { parse as parseYaml } from 'yaml';
|
||||
import { type KtxLogger, noopLogger } from '../../../../context/core/config.js';
|
||||
import { resolveJinjaVariables } from '../../dbt-shared/project-vars.js';
|
||||
|
||||
interface DbtParsedColumn {
|
||||
name: string;
|
||||
description: string | null;
|
||||
dataType: string | null;
|
||||
dataTests?: DbtDataTestRef[];
|
||||
constraints?: DbtColumnConstraints;
|
||||
enumValuesDbt?: string[];
|
||||
}
|
||||
|
||||
interface DbtDataTestRef {
|
||||
name: string;
|
||||
package: string;
|
||||
kwargs?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
interface DbtColumnConstraints {
|
||||
dbt: {
|
||||
not_null?: boolean;
|
||||
unique?: boolean;
|
||||
};
|
||||
}
|
||||
|
||||
interface DbtParsedRelationship {
|
||||
fromTable: string;
|
||||
fromColumn: string;
|
||||
toTable: string;
|
||||
toColumn: string;
|
||||
fromSchema?: string;
|
||||
toSchema?: string;
|
||||
description?: string;
|
||||
}
|
||||
|
||||
interface DbtParsedTable {
|
||||
name: string;
|
||||
description: string | null;
|
||||
database: string | null;
|
||||
schema: string | null;
|
||||
columns: DbtParsedColumn[];
|
||||
resourceType?: 'source' | 'model';
|
||||
tagsDbt?: string[];
|
||||
freshnessDbt?: {
|
||||
raw?: unknown;
|
||||
loadedAtField?: string | null;
|
||||
};
|
||||
}
|
||||
|
||||
export interface DbtSchemaParseResult {
|
||||
projectName: string | null;
|
||||
dbtVersion: string | null;
|
||||
tables: DbtParsedTable[];
|
||||
relationships: DbtParsedRelationship[];
|
||||
}
|
||||
|
||||
export interface DbtSchemaFile {
|
||||
content: string;
|
||||
path: string;
|
||||
}
|
||||
|
||||
interface ParseDbtSchemaOptions {
|
||||
path?: string;
|
||||
variables?: Map<string, string>;
|
||||
projectName?: string | null;
|
||||
logger?: KtxLogger;
|
||||
}
|
||||
|
||||
interface DbtSchemaYaml {
|
||||
version?: number;
|
||||
sources?: DbtSchemaSource[];
|
||||
models?: DbtSchemaModel[];
|
||||
}
|
||||
|
||||
interface DbtSchemaSource {
|
||||
name: string;
|
||||
description?: string;
|
||||
database?: string;
|
||||
schema?: string;
|
||||
tags?: string[];
|
||||
tables?: DbtSchemaTable[];
|
||||
}
|
||||
|
||||
interface DbtSchemaTable {
|
||||
name: string;
|
||||
description?: string;
|
||||
identifier?: string;
|
||||
tags?: string[];
|
||||
loaded_at_field?: string;
|
||||
freshness?: unknown;
|
||||
columns?: DbtSchemaColumn[];
|
||||
}
|
||||
|
||||
interface DbtSchemaModel {
|
||||
name: string;
|
||||
description?: string;
|
||||
database?: string;
|
||||
schema?: string;
|
||||
tags?: string[];
|
||||
loaded_at_field?: string;
|
||||
freshness?: unknown;
|
||||
columns?: DbtSchemaColumn[];
|
||||
}
|
||||
|
||||
interface DbtSchemaColumn {
|
||||
name: string;
|
||||
description?: string;
|
||||
data_type?: string;
|
||||
data_tests?: DbtSchemaDataTest[];
|
||||
tests?: DbtSchemaDataTest[];
|
||||
}
|
||||
|
||||
type DbtSchemaDataTest =
|
||||
| string
|
||||
| {
|
||||
relationships?: {
|
||||
to?: string;
|
||||
field?: string;
|
||||
arguments?: { to?: string; field?: string };
|
||||
};
|
||||
not_null?: unknown;
|
||||
unique?: unknown;
|
||||
accepted_values?: { values?: unknown } | unknown;
|
||||
[key: string]: unknown;
|
||||
};
|
||||
|
||||
/** @internal */
|
||||
export function parseDbtSchemaFile(content: string, options: ParseDbtSchemaOptions = {}): DbtSchemaParseResult {
|
||||
return new DbtSchemaParser(options.logger ?? noopLogger).parseFile(content, options);
|
||||
}
|
||||
|
||||
export function parseDbtSchemaFiles(
|
||||
files: DbtSchemaFile[],
|
||||
variables?: Map<string, string>,
|
||||
options: { projectName?: string | null; logger?: KtxLogger } = {},
|
||||
): DbtSchemaParseResult {
|
||||
return new DbtSchemaParser(options.logger ?? noopLogger).parseFiles(files, variables, options.projectName ?? null);
|
||||
}
|
||||
|
||||
|
||||
class DbtSchemaParser {
|
||||
constructor(private readonly logger: KtxLogger) {}
|
||||
|
||||
parseFile(yamlContent: string, options: ParseDbtSchemaOptions = {}): DbtSchemaParseResult {
|
||||
this.logger.debug(`Parsing schema file: ${options.path ?? 'unknown'}`);
|
||||
|
||||
const resolved = options.variables
|
||||
? resolveJinjaVariables(yamlContent, options.variables)
|
||||
: { content: yamlContent, unresolvedVars: [] };
|
||||
if (resolved.unresolvedVars.length > 0) {
|
||||
this.logger.warn(
|
||||
`Unresolved dbt variables in ${options.path ?? 'schema file'}: ${resolved.unresolvedVars.join(', ')}`,
|
||||
);
|
||||
}
|
||||
|
||||
let schema: DbtSchemaYaml;
|
||||
try {
|
||||
schema = parseYaml(resolved.content) as DbtSchemaYaml;
|
||||
} catch (error) {
|
||||
this.logger.warn(`Failed to parse YAML${options.path ? ` at ${options.path}` : ''}: ${error}`);
|
||||
return this.emptyResult(options.projectName ?? null);
|
||||
}
|
||||
|
||||
if (!schema || typeof schema !== 'object') {
|
||||
return this.emptyResult(options.projectName ?? null);
|
||||
}
|
||||
|
||||
const tables = [...this.parseSources(schema.sources), ...this.parseModels(schema.models)];
|
||||
const relationships = [
|
||||
...this.parseSourceRelationships(schema.sources),
|
||||
...this.parseModelRelationships(schema.models),
|
||||
];
|
||||
|
||||
return {
|
||||
projectName: options.projectName ?? null,
|
||||
dbtVersion: null,
|
||||
tables,
|
||||
relationships,
|
||||
};
|
||||
}
|
||||
|
||||
parseFiles(
|
||||
files: DbtSchemaFile[],
|
||||
variables?: Map<string, string>,
|
||||
projectName: string | null = null,
|
||||
): DbtSchemaParseResult {
|
||||
const allTables: DbtParsedTable[] = [];
|
||||
const allRelationships: DbtParsedRelationship[] = [];
|
||||
|
||||
for (const file of files) {
|
||||
const result = this.parseFile(file.content, { path: file.path, variables, projectName });
|
||||
allTables.push(...result.tables);
|
||||
allRelationships.push(...result.relationships);
|
||||
}
|
||||
|
||||
return {
|
||||
projectName,
|
||||
dbtVersion: null,
|
||||
tables: this.deduplicateTables(allTables),
|
||||
relationships: this.deduplicateRelationships(allRelationships),
|
||||
};
|
||||
}
|
||||
|
||||
private parseSources(sources: DbtSchemaSource[] | undefined): DbtParsedTable[] {
|
||||
if (!sources || !Array.isArray(sources)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const tables: DbtParsedTable[] = [];
|
||||
|
||||
for (const source of sources) {
|
||||
const sourceSchema = source.schema ?? source.name;
|
||||
const sourceDatabase = source.database ?? null;
|
||||
const sourceTags = this.normalizeTagList(source.tags);
|
||||
|
||||
if (!source.tables || !Array.isArray(source.tables)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const table of source.tables) {
|
||||
const tagsDbt = this.mergeTagsDbt(sourceTags, this.normalizeTagList(table.tags));
|
||||
const freshnessDbt = this.buildFreshnessDbt(table.freshness, table.loaded_at_field);
|
||||
tables.push({
|
||||
name: table.identifier ?? table.name,
|
||||
description: this.normalizeDescription(table.description),
|
||||
database: sourceDatabase,
|
||||
schema: sourceSchema,
|
||||
columns: this.parseColumns(table.columns),
|
||||
resourceType: 'source',
|
||||
...(tagsDbt ? { tagsDbt } : {}),
|
||||
...(freshnessDbt ? { freshnessDbt } : {}),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return tables;
|
||||
}
|
||||
|
||||
private parseModels(models: DbtSchemaModel[] | undefined): DbtParsedTable[] {
|
||||
if (!models || !Array.isArray(models)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const tables: DbtParsedTable[] = [];
|
||||
|
||||
for (const model of models) {
|
||||
if (!model.name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const tagsDbt = this.mergeTagsDbt(this.normalizeTagList(model.tags));
|
||||
const freshnessDbt = this.buildFreshnessDbt(model.freshness, model.loaded_at_field);
|
||||
tables.push({
|
||||
name: model.name,
|
||||
description: this.normalizeDescription(model.description),
|
||||
database: model.database ?? null,
|
||||
schema: model.schema ?? null,
|
||||
columns: this.parseColumns(model.columns),
|
||||
resourceType: 'model',
|
||||
...(tagsDbt ? { tagsDbt } : {}),
|
||||
...(freshnessDbt ? { freshnessDbt } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
return tables;
|
||||
}
|
||||
|
||||
private parseColumns(columns: DbtSchemaColumn[] | undefined): DbtParsedColumn[] {
|
||||
if (!columns || !Array.isArray(columns)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return columns.map((column) => {
|
||||
const { refs, constraints, enumValues } = this.parseDataTests(column.data_tests ?? column.tests);
|
||||
return {
|
||||
name: column.name,
|
||||
description: this.normalizeDescription(column.description),
|
||||
dataType: column.data_type ?? null,
|
||||
...(refs.length > 0 ? { dataTests: refs } : {}),
|
||||
...(constraints ? { constraints } : {}),
|
||||
...(enumValues.length > 0 ? { enumValuesDbt: enumValues } : {}),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
private parseDataTests(tests: DbtSchemaDataTest[] | undefined): {
|
||||
refs: DbtDataTestRef[];
|
||||
constraints: DbtColumnConstraints | undefined;
|
||||
enumValues: string[];
|
||||
} {
|
||||
const refs: DbtDataTestRef[] = [];
|
||||
const dbt: { not_null?: boolean; unique?: boolean } = {};
|
||||
const enumValues: string[] = [];
|
||||
if (!tests?.length) {
|
||||
return { refs, constraints: undefined, enumValues };
|
||||
}
|
||||
|
||||
for (const test of tests) {
|
||||
if (typeof test === 'string') {
|
||||
const parsed = this.parseTestNameString(test);
|
||||
refs.push(parsed);
|
||||
if (parsed.package === 'dbt' && parsed.name === 'not_null') {
|
||||
dbt.not_null = true;
|
||||
}
|
||||
if (parsed.package === 'dbt' && parsed.name === 'unique') {
|
||||
dbt.unique = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const [key, value] of Object.entries(test)) {
|
||||
if (key === 'relationships') {
|
||||
refs.push({
|
||||
name: 'relationships',
|
||||
package: 'dbt',
|
||||
...(value && typeof value === 'object' && !Array.isArray(value)
|
||||
? { kwargs: value as Record<string, unknown> }
|
||||
: {}),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
if (key === 'not_null') {
|
||||
refs.push({ name: 'not_null', package: 'dbt' });
|
||||
dbt.not_null = true;
|
||||
continue;
|
||||
}
|
||||
if (key === 'unique') {
|
||||
refs.push({ name: 'unique', package: 'dbt' });
|
||||
dbt.unique = true;
|
||||
continue;
|
||||
}
|
||||
if (key === 'accepted_values') {
|
||||
if (Array.isArray(value)) {
|
||||
enumValues.push(...value.map((item) => String(item)));
|
||||
refs.push({ name: 'accepted_values', package: 'dbt', kwargs: { values: value } });
|
||||
continue;
|
||||
}
|
||||
if (value && typeof value === 'object' && !Array.isArray(value)) {
|
||||
const values = (value as { values?: unknown }).values;
|
||||
if (Array.isArray(values)) {
|
||||
enumValues.push(...values.map((item) => String(item)));
|
||||
}
|
||||
refs.push({ name: 'accepted_values', package: 'dbt', kwargs: value as Record<string, unknown> });
|
||||
continue;
|
||||
}
|
||||
}
|
||||
refs.push({
|
||||
...this.parseTestNameString(key),
|
||||
...(value && typeof value === 'object' && !Array.isArray(value)
|
||||
? { kwargs: value as Record<string, unknown> }
|
||||
: {}),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const constraints = dbt.not_null || dbt.unique ? { dbt } : undefined;
|
||||
return { refs, constraints, enumValues };
|
||||
}
|
||||
|
||||
private parseTestNameString(value: string): { name: string; package: string } {
|
||||
const parts = value.split('.');
|
||||
if (parts.length >= 2) {
|
||||
return { package: parts[0]!, name: parts.slice(1).join('.') };
|
||||
}
|
||||
return { package: 'dbt', name: value };
|
||||
}
|
||||
|
||||
private parseSourceRelationships(sources: DbtSchemaSource[] | undefined): DbtParsedRelationship[] {
|
||||
if (!sources || !Array.isArray(sources)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const relationships: DbtParsedRelationship[] = [];
|
||||
|
||||
for (const source of sources) {
|
||||
const sourceSchema = source.schema ?? source.name;
|
||||
|
||||
if (!source.tables || !Array.isArray(source.tables)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const table of source.tables) {
|
||||
const tableName = table.identifier ?? table.name;
|
||||
|
||||
if (!table.columns || !Array.isArray(table.columns)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const column of table.columns) {
|
||||
const tests = column.data_tests ?? column.tests ?? [];
|
||||
|
||||
for (const test of tests) {
|
||||
const relationship = this.parseRelationshipTest(test, tableName, column.name, sourceSchema);
|
||||
if (relationship) {
|
||||
relationships.push(relationship);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return relationships;
|
||||
}
|
||||
|
||||
private parseModelRelationships(models: DbtSchemaModel[] | undefined): DbtParsedRelationship[] {
|
||||
if (!models || !Array.isArray(models)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const relationships: DbtParsedRelationship[] = [];
|
||||
|
||||
for (const model of models) {
|
||||
if (!model.name || !model.columns || !Array.isArray(model.columns)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const column of model.columns) {
|
||||
const tests = column.data_tests ?? column.tests ?? [];
|
||||
|
||||
for (const test of tests) {
|
||||
const relationship = this.parseRelationshipTest(test, model.name, column.name, model.schema ?? undefined);
|
||||
if (relationship) {
|
||||
relationships.push(relationship);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return relationships;
|
||||
}
|
||||
|
||||
private parseRelationshipTest(
|
||||
test: DbtSchemaDataTest,
|
||||
fromTable: string,
|
||||
fromColumn: string,
|
||||
fromSchema?: string,
|
||||
): DbtParsedRelationship | null {
|
||||
if (typeof test === 'string' || !test.relationships) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const relationship = test.relationships;
|
||||
const toRef = relationship.to ?? relationship.arguments?.to;
|
||||
const toColumn = relationship.field ?? relationship.arguments?.field;
|
||||
|
||||
if (!toRef || !toColumn) {
|
||||
this.logger.debug(`Skipping incomplete relationship test for ${fromTable}.${fromColumn}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const toTable = this.parseRef(toRef);
|
||||
if (!toTable) {
|
||||
this.logger.debug(`Could not parse ref: ${toRef}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
fromTable,
|
||||
fromColumn,
|
||||
toTable,
|
||||
toColumn,
|
||||
...(fromSchema ? { fromSchema } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
private parseRef(refString: string): string | null {
|
||||
const refMatch = refString.match(/ref\s*\(\s*['"]([^'"]+)['"]\s*\)/);
|
||||
if (refMatch) {
|
||||
return refMatch[1];
|
||||
}
|
||||
|
||||
const sourceMatch = refString.match(/source\s*\(\s*['"][^'"]+['"]\s*,\s*['"]([^'"]+)['"]\s*\)/);
|
||||
if (sourceMatch) {
|
||||
return sourceMatch[1];
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private normalizeDescription(description: string | undefined): string | null {
|
||||
if (!description) {
|
||||
return null;
|
||||
}
|
||||
const trimmed = description.trim();
|
||||
return trimmed.length > 0 ? trimmed : null;
|
||||
}
|
||||
|
||||
private normalizeTagList(tags: string[] | undefined): string[] {
|
||||
if (!tags || !Array.isArray(tags)) {
|
||||
return [];
|
||||
}
|
||||
return tags.map((tag) => String(tag));
|
||||
}
|
||||
|
||||
private mergeTagsDbt(...lists: Array<string[] | undefined>): string[] | undefined {
|
||||
const merged: string[] = [];
|
||||
const seen = new Set<string>();
|
||||
for (const list of lists) {
|
||||
for (const item of list ?? []) {
|
||||
if (!seen.has(item)) {
|
||||
seen.add(item);
|
||||
merged.push(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
return merged.length > 0 ? merged : undefined;
|
||||
}
|
||||
|
||||
private buildFreshnessDbt(freshness: unknown, loadedAtField: string | undefined): DbtParsedTable['freshnessDbt'] {
|
||||
const loadedTrim = loadedAtField?.trim();
|
||||
const hasFreshness = freshness !== undefined && freshness !== null;
|
||||
if (!hasFreshness && !loadedTrim) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
...(hasFreshness ? { raw: freshness } : {}),
|
||||
...(hasFreshness ? { loadedAtField: loadedTrim ?? null } : loadedTrim ? { loadedAtField: loadedTrim } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
private deduplicateTables(tables: DbtParsedTable[]): DbtParsedTable[] {
|
||||
const seen = new Map<string, DbtParsedTable>();
|
||||
|
||||
for (const table of tables) {
|
||||
const key = `${table.database ?? ''}.${table.schema ?? ''}.${table.name}`.toLowerCase();
|
||||
const existing = seen.get(key);
|
||||
|
||||
if (!existing) {
|
||||
seen.set(key, table);
|
||||
continue;
|
||||
}
|
||||
|
||||
seen.set(key, {
|
||||
...existing,
|
||||
description: existing.description ?? table.description,
|
||||
columns: this.mergeColumns(existing.columns, table.columns),
|
||||
tagsDbt: this.mergeTagsDbt(existing.tagsDbt, table.tagsDbt),
|
||||
freshnessDbt: this.mergeFreshnessDbt(existing.freshnessDbt, table.freshnessDbt),
|
||||
});
|
||||
}
|
||||
|
||||
return Array.from(seen.values());
|
||||
}
|
||||
|
||||
private mergeColumns(existing: DbtParsedColumn[], incoming: DbtParsedColumn[]): DbtParsedColumn[] {
|
||||
const seen = new Map<string, DbtParsedColumn>();
|
||||
|
||||
for (const column of existing) {
|
||||
seen.set(column.name.toLowerCase(), column);
|
||||
}
|
||||
|
||||
for (const column of incoming) {
|
||||
const key = column.name.toLowerCase();
|
||||
const existingColumn = seen.get(key);
|
||||
|
||||
if (!existingColumn) {
|
||||
seen.set(key, column);
|
||||
continue;
|
||||
}
|
||||
|
||||
seen.set(key, {
|
||||
...existingColumn,
|
||||
description: existingColumn.description ?? column.description,
|
||||
dataType: existingColumn.dataType ?? column.dataType,
|
||||
dataTests: this.mergeDbtDataTests(existingColumn.dataTests, column.dataTests),
|
||||
constraints: this.mergeDbtConstraints(existingColumn.constraints, column.constraints),
|
||||
enumValuesDbt: this.mergeStringList(existingColumn.enumValuesDbt, column.enumValuesDbt),
|
||||
});
|
||||
}
|
||||
|
||||
return Array.from(seen.values());
|
||||
}
|
||||
|
||||
private deduplicateRelationships(relationships: DbtParsedRelationship[]): DbtParsedRelationship[] {
|
||||
const seen = new Set<string>();
|
||||
const result: DbtParsedRelationship[] = [];
|
||||
|
||||
for (const relationship of relationships) {
|
||||
const key =
|
||||
`${relationship.fromTable}.${relationship.fromColumn}->${relationship.toTable}.${relationship.toColumn}`.toLowerCase();
|
||||
if (!seen.has(key)) {
|
||||
seen.add(key);
|
||||
result.push(relationship);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private mergeFreshnessDbt(
|
||||
existing?: DbtParsedTable['freshnessDbt'],
|
||||
incoming?: DbtParsedTable['freshnessDbt'],
|
||||
): DbtParsedTable['freshnessDbt'] {
|
||||
if (!existing && !incoming) {
|
||||
return undefined;
|
||||
}
|
||||
const raw = existing?.raw !== undefined ? existing.raw : incoming?.raw;
|
||||
const loadedAtField = existing?.loadedAtField ?? incoming?.loadedAtField;
|
||||
return {
|
||||
...(raw !== undefined ? { raw } : {}),
|
||||
...(loadedAtField !== undefined ? { loadedAtField } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
private mergeDbtConstraints(
|
||||
existing?: DbtColumnConstraints,
|
||||
incoming?: DbtColumnConstraints,
|
||||
): DbtColumnConstraints | undefined {
|
||||
const notNull = !!(existing?.dbt.not_null || incoming?.dbt.not_null);
|
||||
const unique = !!(existing?.dbt.unique || incoming?.dbt.unique);
|
||||
if (!notNull && !unique) {
|
||||
return undefined;
|
||||
}
|
||||
return { dbt: { ...(notNull ? { not_null: true } : {}), ...(unique ? { unique: true } : {}) } };
|
||||
}
|
||||
|
||||
private mergeStringList(existing?: string[], incoming?: string[]): string[] | undefined {
|
||||
return this.mergeTagsDbt(existing, incoming);
|
||||
}
|
||||
|
||||
private mergeDbtDataTests(existing?: DbtDataTestRef[], incoming?: DbtDataTestRef[]): DbtDataTestRef[] | undefined {
|
||||
if (!existing?.length) {
|
||||
return incoming?.length ? [...incoming] : undefined;
|
||||
}
|
||||
if (!incoming?.length) {
|
||||
return [...existing];
|
||||
}
|
||||
const tests = new Map<string, DbtDataTestRef>();
|
||||
for (const test of [...existing, ...incoming]) {
|
||||
const kwargsKey =
|
||||
test.kwargs && Object.keys(test.kwargs).length > 0
|
||||
? `:${createHash('sha256').update(JSON.stringify(test.kwargs)).digest('hex').slice(0, 16)}`
|
||||
: '';
|
||||
tests.set(`${test.package}:${test.name}${kwargsKey}`, test);
|
||||
}
|
||||
return [...tests.values()];
|
||||
}
|
||||
|
||||
private emptyResult(projectName: string | null): DbtSchemaParseResult {
|
||||
return {
|
||||
projectName,
|
||||
dbtVersion: null,
|
||||
tables: [],
|
||||
relationships: [],
|
||||
};
|
||||
}
|
||||
}
|
||||
36
packages/cli/src/context/ingest/adapters/dbt/chunk.test.ts
Normal file
36
packages/cli/src/context/ingest/adapters/dbt/chunk.test.ts
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { chunkDbtProject } from './chunk.js';
|
||||
|
||||
describe('chunkDbtProject', () => {
|
||||
const diffSet = (modified: string[]) => ({ added: [], modified, deleted: [], unchanged: [] });
|
||||
|
||||
it('caps peerFileIndex when the project has very many yaml files', () => {
|
||||
const modelPaths = Array.from({ length: 201 }, (_, i) => `models/m${i}.yml`);
|
||||
const allPaths = ['dbt_project.yml', ...modelPaths].sort();
|
||||
const { workUnits } = chunkDbtProject({ allPaths });
|
||||
const [first] = workUnits;
|
||||
expect(first).toBeDefined();
|
||||
expect(first?.peerFileIndex).toHaveLength(200);
|
||||
expect(first?.notes).toMatch(/capped at 200/);
|
||||
});
|
||||
|
||||
it('keeps large-project model work units when dbt_project.yml changes', () => {
|
||||
const modelPaths = Array.from({ length: 30 }, (_, i) => `models/m${i}.yml`);
|
||||
const allPaths = ['dbt_project.yml', ...modelPaths].sort();
|
||||
const { workUnits } = chunkDbtProject({ allPaths }, { diffSet: diffSet(['dbt_project.yml']) });
|
||||
|
||||
expect(workUnits).toHaveLength(30);
|
||||
expect(workUnits[0]?.rawFiles).toEqual(['models/m0.yml']);
|
||||
expect(workUnits[0]?.dependencyPaths).toContain('dbt_project.yml');
|
||||
});
|
||||
|
||||
it('keeps large-project model work units when non-model yaml peers change', () => {
|
||||
const modelPaths = Array.from({ length: 30 }, (_, i) => `models/m${i}.yml`);
|
||||
const allPaths = ['dbt_project.yml', 'seeds/seed_properties.yml', ...modelPaths].sort();
|
||||
const { workUnits } = chunkDbtProject({ allPaths }, { diffSet: diffSet(['seeds/seed_properties.yml']) });
|
||||
|
||||
expect(workUnits).toHaveLength(30);
|
||||
expect(workUnits[0]?.rawFiles).toEqual(['models/m0.yml']);
|
||||
expect(workUnits[0]?.dependencyPaths).toContain('seeds/seed_properties.yml');
|
||||
});
|
||||
});
|
||||
130
packages/cli/src/context/ingest/adapters/dbt/chunk.ts
Normal file
130
packages/cli/src/context/ingest/adapters/dbt/chunk.ts
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
import type { ChunkResult, DiffSet, WorkUnit } from '../../types.js';
|
||||
import type { ParsedDbtProject } from './parse.js';
|
||||
|
||||
interface ChunkOptions {
|
||||
diffSet?: DiffSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Per-model work units (when the project has more than 25 YAML files) only name `rawFiles` under
|
||||
* `models/**`. Other `.yml` (e.g. some `seeds/` or custom layouts) still appear in `peerFileIndex`
|
||||
* or in the small-project / no-models fallbacks — v1 does not emit one WU per non-models file.
|
||||
*/
|
||||
const MODELS_PREFIX = 'models/';
|
||||
|
||||
/** `peerFileIndex` is a hint only (agents may not read those paths). Cap to limit prompt size. */
|
||||
const MAX_PEER_FILE_INDEX = 200;
|
||||
|
||||
function projectYamlPath(allPaths: string[]): string | undefined {
|
||||
if (allPaths.includes('dbt_project.yml')) {
|
||||
return 'dbt_project.yml';
|
||||
}
|
||||
if (allPaths.includes('dbt_project.yaml')) {
|
||||
return 'dbt_project.yaml';
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function modelRelativePaths(allPaths: string[]): string[] {
|
||||
return allPaths.filter((p) => p.replace(/\\/g, '/').startsWith(MODELS_PREFIX)).sort();
|
||||
}
|
||||
|
||||
function unitKeyForModelFile(mf: string): string {
|
||||
const base = mf
|
||||
.replace(/\.(ya?ml)$/i, '')
|
||||
.replace(/\\/g, '/')
|
||||
.replace(/[^a-zA-Z0-9]+/g, '-')
|
||||
.replace(/^-+|-+$/g, '');
|
||||
return `dbt-${base.toLowerCase()}`;
|
||||
}
|
||||
|
||||
function emitFirstRunWorkUnits(allPaths: string[], dbtDep: string | undefined): WorkUnit[] {
|
||||
if (allPaths.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (allPaths.length <= 25) {
|
||||
return [
|
||||
{
|
||||
unitKey: 'dbt-all',
|
||||
displayLabel: 'dbt project (all yaml)',
|
||||
rawFiles: [...allPaths],
|
||||
peerFileIndex: [],
|
||||
dependencyPaths: [],
|
||||
notes: 'dbt project — all YAML in one WorkUnit (≤25 files)',
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
const modelFiles = modelRelativePaths(allPaths);
|
||||
if (modelFiles.length === 0) {
|
||||
return [
|
||||
{
|
||||
unitKey: 'dbt-all',
|
||||
displayLabel: 'dbt project (all yaml, no models/**)',
|
||||
rawFiles: [...allPaths],
|
||||
peerFileIndex: [],
|
||||
dependencyPaths: dbtDep ? [dbtDep] : [],
|
||||
notes: 'dbt: no models/**/*.yml — single slice with dbt_project as dependency if present',
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
return modelFiles.map((mf) => {
|
||||
const allPeers = allPaths.filter((p) => p !== mf).sort();
|
||||
const truncated = allPeers.length > MAX_PEER_FILE_INDEX;
|
||||
const peerFileIndex = truncated ? allPeers.slice(0, MAX_PEER_FILE_INDEX) : allPeers;
|
||||
const dependencyPaths = dbtDep && allPaths.includes(dbtDep) && mf !== dbtDep ? [dbtDep].sort() : [];
|
||||
const notes = truncated
|
||||
? `dbt model schema slice (peer index capped at ${MAX_PEER_FILE_INDEX} of ${allPeers.length} paths)`
|
||||
: 'dbt model schema slice';
|
||||
return {
|
||||
unitKey: unitKeyForModelFile(mf),
|
||||
displayLabel: `dbt ${mf}`,
|
||||
rawFiles: [mf],
|
||||
peerFileIndex,
|
||||
dependencyPaths: dependencyPaths,
|
||||
notes,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function applyDiffSet(firstRunUnits: WorkUnit[], diffSet: DiffSet): ChunkResult {
|
||||
const touched = new Set([...diffSet.added, ...diffSet.modified]);
|
||||
const kept: WorkUnit[] = [];
|
||||
|
||||
for (const wu of firstRunUnits) {
|
||||
const touchedRawFiles = wu.rawFiles.filter((p) => touched.has(p));
|
||||
const touchedDependencies = wu.dependencyPaths.filter((p) => touched.has(p));
|
||||
const touchedPeerFiles = wu.peerFileIndex.filter((p) => touched.has(p));
|
||||
if (touchedRawFiles.length === 0 && touchedDependencies.length === 0 && touchedPeerFiles.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const rawFiles = touchedRawFiles.length > 0 ? touchedRawFiles : wu.rawFiles;
|
||||
const unchangedRaw = touchedRawFiles.length > 0 ? wu.rawFiles.filter((p) => !touched.has(p)) : [];
|
||||
for (const p of wu.rawFiles) {
|
||||
if (!rawFiles.includes(p) && !unchangedRaw.includes(p)) {
|
||||
unchangedRaw.push(p);
|
||||
}
|
||||
}
|
||||
const combinedDeps = new Set<string>([...wu.dependencyPaths, ...unchangedRaw, ...touchedPeerFiles]);
|
||||
kept.push({
|
||||
...wu,
|
||||
rawFiles: rawFiles.sort(),
|
||||
dependencyPaths: [...combinedDeps].sort(),
|
||||
});
|
||||
}
|
||||
|
||||
const eviction = diffSet.deleted.length > 0 ? { deletedRawPaths: [...diffSet.deleted].sort() } : undefined;
|
||||
return { workUnits: kept, eviction };
|
||||
}
|
||||
|
||||
export function chunkDbtProject(project: ParsedDbtProject, opts: ChunkOptions = {}): ChunkResult {
|
||||
const dbtDep = projectYamlPath(project.allPaths);
|
||||
const firstRun = emitFirstRunWorkUnits(project.allPaths, dbtDep);
|
||||
if (!opts.diffSet) {
|
||||
return { workUnits: firstRun };
|
||||
}
|
||||
return applyDiffSet(firstRun, opts.diffSet);
|
||||
}
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import type { SourceAdapter } from '../../types.js';
|
||||
import { DbtSourceAdapter } from './dbt.adapter.js';
|
||||
|
||||
describe('DbtSourceAdapter', () => {
|
||||
let stagedDir: string;
|
||||
let adapter: SourceAdapter;
|
||||
|
||||
beforeEach(async () => {
|
||||
stagedDir = await mkdtemp(join(tmpdir(), 'dbt-adapter-'));
|
||||
adapter = new DbtSourceAdapter();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(stagedDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('declares the expected source key and skill list', () => {
|
||||
expect(adapter.source).toBe('dbt');
|
||||
expect(adapter.skillNames).toEqual(['dbt_ingest']);
|
||||
});
|
||||
|
||||
it('detects a staged dbt project root (dbt_project.yml)', async () => {
|
||||
await writeFile(join(stagedDir, 'dbt_project.yml'), "name: 'jaffle'\nversion: '1.0.0'\n", 'utf-8');
|
||||
expect(await adapter.detect(stagedDir)).toBe(true);
|
||||
});
|
||||
|
||||
it('chunk: dbt_project.yml + models/a.yml yields one WU (≤25 files)', async () => {
|
||||
await writeFile(join(stagedDir, 'dbt_project.yml'), "name: 'jaffle'\n", 'utf-8');
|
||||
await mkdir(join(stagedDir, 'models'), { recursive: true });
|
||||
await writeFile(
|
||||
join(stagedDir, 'models/a.yml'),
|
||||
'version: 2\nmodels:\n - name: orders\n description: Orders\n',
|
||||
'utf-8',
|
||||
);
|
||||
const result = await adapter.chunk(stagedDir);
|
||||
expect(result.workUnits).toHaveLength(1);
|
||||
expect(result.workUnits[0].unitKey).toBe('dbt-all');
|
||||
expect(result.parseArtifacts).toMatchObject({
|
||||
projectName: 'jaffle',
|
||||
tables: [{ name: 'orders', description: 'Orders' }],
|
||||
});
|
||||
});
|
||||
|
||||
it('implements fetch() for git-backed dbt source setup', () => {
|
||||
expect(adapter.fetch).toBeTypeOf('function');
|
||||
});
|
||||
|
||||
it('reports mapped warehouse targets for bundle SL discovery', async () => {
|
||||
adapter = new DbtSourceAdapter({ targetConnectionIds: ['postgres-warehouse', 'postgres-warehouse'] });
|
||||
|
||||
await expect(adapter.listTargetConnectionIds?.(stagedDir)).resolves.toEqual(['postgres-warehouse']);
|
||||
});
|
||||
});
|
||||
53
packages/cli/src/context/ingest/adapters/dbt/dbt.adapter.ts
Normal file
53
packages/cli/src/context/ingest/adapters/dbt/dbt.adapter.ts
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
import { join } from 'node:path';
|
||||
import type { ChunkResult, DiffSet, SourceAdapter } from '../../types.js';
|
||||
import type { FetchContext } from '../../types.js';
|
||||
import { loadProjectInfo } from '../../dbt-shared/project-vars.js';
|
||||
import { loadDbtSchemaFiles } from '../../dbt-shared/schema-files.js';
|
||||
import { parseDbtSchemaFiles } from '../dbt-descriptions/parse-schema.js';
|
||||
import { chunkDbtProject } from './chunk.js';
|
||||
import { detectDbtStagedDir } from './detect.js';
|
||||
import { fetchDbtRepo, type DbtPullConfig } from './fetch.js';
|
||||
import { parseDbtStagedDir } from './parse.js';
|
||||
|
||||
interface DbtSourceAdapterOptions {
|
||||
homeDir?: string;
|
||||
targetConnectionIds?: string[];
|
||||
}
|
||||
|
||||
export class DbtSourceAdapter implements SourceAdapter {
|
||||
readonly source = 'dbt' as const;
|
||||
/** Runner merges: ingest_triage, sl_capture, wiki_capture (see ingest-bundle.runner.ts) */
|
||||
readonly skillNames: string[] = ['dbt_ingest'];
|
||||
|
||||
constructor(private readonly options: DbtSourceAdapterOptions = {}) {}
|
||||
|
||||
detect(stagedDir: string): Promise<boolean> {
|
||||
return detectDbtStagedDir(stagedDir);
|
||||
}
|
||||
|
||||
async listTargetConnectionIds(_stagedDir: string): Promise<string[]> {
|
||||
return [...new Set(this.options.targetConnectionIds ?? [])].sort((left, right) => left.localeCompare(right));
|
||||
}
|
||||
|
||||
async fetch(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise<void> {
|
||||
const config = pullConfig as DbtPullConfig | undefined;
|
||||
if (!config?.repoUrl) {
|
||||
throw new Error('dbt fetch requires repoUrl');
|
||||
}
|
||||
await fetchDbtRepo({
|
||||
config,
|
||||
cacheDir: join(this.options.homeDir ?? '.ktx/cache', 'dbt', ctx.connectionId),
|
||||
stagedDir,
|
||||
});
|
||||
}
|
||||
|
||||
async chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult> {
|
||||
const project = await parseDbtStagedDir(stagedDir);
|
||||
const projectInfo = await loadProjectInfo(stagedDir);
|
||||
const schemaFiles = await loadDbtSchemaFiles(stagedDir);
|
||||
const parseArtifacts = parseDbtSchemaFiles(schemaFiles, projectInfo.variables, {
|
||||
projectName: projectInfo.projectName,
|
||||
});
|
||||
return { ...chunkDbtProject(project, { diffSet }), parseArtifacts };
|
||||
}
|
||||
}
|
||||
12
packages/cli/src/context/ingest/adapters/dbt/detect.ts
Normal file
12
packages/cli/src/context/ingest/adapters/dbt/detect.ts
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
import { access } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
|
||||
export async function detectDbtStagedDir(stagedDir: string): Promise<boolean> {
|
||||
for (const name of ['dbt_project.yml', 'dbt_project.yaml'] as const) {
|
||||
try {
|
||||
await access(join(stagedDir, name));
|
||||
return true;
|
||||
} catch {}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
38
packages/cli/src/context/ingest/adapters/dbt/fetch.test.ts
Normal file
38
packages/cli/src/context/ingest/adapters/dbt/fetch.test.ts
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { fetchDbtRepo } from './fetch.js';
|
||||
|
||||
describe('fetchDbtRepo', () => {
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-dbt-fetch-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('copies dbt yaml files from a fetched repo subpath into staged dir', async () => {
|
||||
const cacheDir = join(tempDir, 'cache');
|
||||
const stagedDir = join(tempDir, 'staged');
|
||||
await mkdir(join(cacheDir, 'analytics', 'models'), { recursive: true });
|
||||
await writeFile(join(cacheDir, 'analytics', 'dbt_project.yml'), 'name: analytics\n', 'utf-8');
|
||||
await writeFile(join(cacheDir, 'analytics', 'models', 'orders.yml'), 'models: []\n', 'utf-8');
|
||||
const cloneOrPull = vi.fn(async () => ({ commitHash: 'abc123' }));
|
||||
|
||||
await expect(
|
||||
fetchDbtRepo({
|
||||
config: { repoUrl: 'https://github.com/acme/dbt.git', path: 'analytics' },
|
||||
cacheDir,
|
||||
stagedDir,
|
||||
deps: { cloneOrPull },
|
||||
}),
|
||||
).resolves.toEqual({ commitHash: 'abc123', filesCopied: 2 });
|
||||
|
||||
await expect(readFile(join(stagedDir, 'dbt_project.yml'), 'utf-8')).resolves.toContain('analytics');
|
||||
await expect(readFile(join(stagedDir, 'models', 'orders.yml'), 'utf-8')).resolves.toContain('models');
|
||||
});
|
||||
});
|
||||
60
packages/cli/src/context/ingest/adapters/dbt/fetch.ts
Normal file
60
packages/cli/src/context/ingest/adapters/dbt/fetch.ts
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
import { access, copyFile, mkdir, readdir } from 'node:fs/promises';
|
||||
import { dirname, join, relative } from 'node:path';
|
||||
import { cloneOrPull, sanitizeRepoError } from '../../repo-fetch.js';
|
||||
|
||||
export interface DbtPullConfig {
|
||||
repoUrl: string;
|
||||
branch?: string;
|
||||
path?: string;
|
||||
authToken?: string | null;
|
||||
}
|
||||
|
||||
export interface FetchDbtRepoParams {
|
||||
config: DbtPullConfig;
|
||||
cacheDir: string;
|
||||
stagedDir: string;
|
||||
deps?: {
|
||||
cloneOrPull?: typeof cloneOrPull;
|
||||
};
|
||||
}
|
||||
|
||||
export async function fetchDbtRepo(params: FetchDbtRepoParams): Promise<{ commitHash: string; filesCopied: number }> {
|
||||
try {
|
||||
const runCloneOrPull = params.deps?.cloneOrPull ?? cloneOrPull;
|
||||
const { commitHash } = await runCloneOrPull({
|
||||
repoUrl: params.config.repoUrl,
|
||||
authToken: params.config.authToken,
|
||||
cacheDir: params.cacheDir,
|
||||
branch: params.config.branch ?? 'main',
|
||||
});
|
||||
const sourceRoot = params.config.path ? join(params.cacheDir, params.config.path) : params.cacheDir;
|
||||
const filesCopied = await copyYamlFilesRecursive(sourceRoot, params.stagedDir);
|
||||
return { commitHash, filesCopied };
|
||||
} catch (error) {
|
||||
throw new Error(sanitizeRepoError(error, params.config.authToken));
|
||||
}
|
||||
}
|
||||
|
||||
async function copyYamlFilesRecursive(sourceRoot: string, destRoot: string): Promise<number> {
|
||||
try {
|
||||
await access(sourceRoot);
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
|
||||
await mkdir(destRoot, { recursive: true });
|
||||
const entries = await readdir(sourceRoot, { withFileTypes: true, recursive: true });
|
||||
let copied = 0;
|
||||
for (const entry of entries) {
|
||||
if (!entry.isFile() || !/\.ya?ml$/i.test(entry.name)) {
|
||||
continue;
|
||||
}
|
||||
const absSrc = join(entry.parentPath, entry.name);
|
||||
const rel = relative(sourceRoot, absSrc);
|
||||
const dest = join(destRoot, rel);
|
||||
await mkdir(dirname(dest), { recursive: true });
|
||||
await copyFile(absSrc, dest);
|
||||
copied += 1;
|
||||
}
|
||||
return copied;
|
||||
}
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { normalizeDbtPath } from './parse.js';
|
||||
|
||||
describe('normalizeDbtPath', () => {
|
||||
it('normalizes Windows separators to POSIX separators', () => {
|
||||
expect(normalizeDbtPath('models\\marts\\orders.yml')).toBe('models/marts/orders.yml');
|
||||
});
|
||||
});
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue