mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-13 08:15:14 +02:00
chore(workspace): gate dead-code with knip production mode (#196)
* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm * refactor(workspace): rewrite @ktx/llm imports to relative paths * refactor(workspace): fold internal packages into cli * chore(workspace): gate dead-code with knip production mode Turn on production-mode knip plus an autofix run in pre-commit and the `pnpm dead-code` script, document the `/** @internal */` convention for test-only exports in AGENTS.md, annotate test-only exports across the CLI with that JSDoc, and drop dead exports/wrappers the new gate surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`, `createLocalScanEnrichmentProvidersFromConfig`, `PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports). Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit production entries so cross-package barrel leaks are caught. * refactor(cli): delete internal barrel index.ts files The 34 `index.ts` re-export barrels inside `packages/cli/src/` were holdovers from the pre-fold multi-workspace structure. Post-fold-in they served no production purpose: external consumers go through the single package main entry, and in-repo callers mostly imported through them only because the path was short. Internally, knip flagged most barrel re-exports as production-dead (only reached via tests). This change: - Deletes every internal barrel except `packages/cli/src/index.ts` (the published package entry). - Rewrites ~270 source/test files to import each name directly from the file that defines it. - Moves `tools/warehouse-verification/index.ts` to `create-warehouse-verification-tools.ts` (the function it defined locally) and updates its single consumer. - Renames `search/backend-conformance.ts` → `.test-utils.ts` to match the existing test-helper file convention. - Deletes 13 dead test-only chains (dbt-descriptions/*, live-database/extracted-schema, live-database/structural-sync, relationship-* feedback/review chain) plus their tests and a cascading orphan integration test. - Updates test mocks that pointed at deleted barrel paths (notion-client, connector barrels in scan/local-scan-connectors tests) to mock the source files instead. - Points the maintainer benchmark script (`scripts/relationship-benchmark-report.mjs`) at source files instead of `dist/context/scan/index.js`. - Drops the barrel `!` entries from `knip.json`; adds explicit production entries only for the benchmark code reached via dist by the maintainer script. Net: 413 files changed, ~1.2k insertions, ~9.4k deletions. `pnpm run dead-code` (Biome + knip default + knip production) and `pnpm run type-check` are clean; 2277 tests pass. * refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly Promote the CLI workspace package to the public name `@kaelio/ktx` and drop the separate `scripts/build-public-npm-package.mjs` wrapper. The CLI package is now publishable in place (`publishConfig.access: public`, `provenance: true`), so artifact packing uses `pnpm pack` against `packages/cli/` instead of assembling a parallel package tree. Updates all workspace filter invocations, docs, tests, and release readiness checks to reference the new package name, and folds the tarball-name helper into `scripts/public-npm-release-metadata.mjs`. * docs: align "agent clients" and "data agents" terminology Replace "client agents" with "agent clients" and "database agents" with "data agents" across AGENTS.md, README.md, the docs-site copy, and the matching setup-agents test description, matching the canonical vocabulary in docs/terminology.md. Also moves packages/cli/tsconfig.json's tsBuildInfoFile from node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive node_modules reinstalls. * refactor(release): single source of truth for package version Make packages/cli/package.json the single source of truth for the @kaelio/ktx version. publicNpmPackageVersion() now reads it directly, so artifact filenames, release-readiness checks, and the Python wheel version all derive from one field. The duplicate release-policy.json.publicNpmPackageVersion is removed. Previously the two fields could drift: tarballs were named kaelio-ktx-0.4.1.tgz while internally containing @kaelio/ktx@0.0.0-private. - update-public-release-version.mjs rewrites both Python pyproject.toml files (ktx-daemon, ktx-sl) alongside the npm package.jsons, normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2). - semantic-release-config.cjs adds the two pyproject.toml files to @semantic-release/git assets so the release commit back to main carries every version source in lockstep. - The six "?? '0.0.0-private'" fallback literals across the CLI are replaced with "?? getKtxCliPackageInfo().version", and createDefaultKtxMcpServer makes its version arg required. - docs/release.md describes the actual commit-back model: the dev tree always reflects the most recent release; no sentinel pin to maintain. Verified: pnpm run artifacts:build now produces kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with @kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and 2287 vitests + 173 script tests pass. * refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and scan command entrypoints so tests can stub them, and teach resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime feature when ktx.yaml selects sentence-transformers. * chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal Both symbols are consumed only by status-project.test.ts. Annotating with /** @internal */ keeps knip's production-mode check clean without changing runtime behavior. * fix(cli): use real package metadata in print-command-tree The stubbed package name embedded a forbidden product identifier that tripped the boundary check in CI. Read the metadata from package.json instead — keeps the rendered tree unchanged and removes a duplicate source of truth. * feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer source counts, computed with `SUM(embedding_json IS NOT NULL)` over `knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to "Wiki" (canonical per `docs/terminology.md`) and rename the matching `localStats.knowledgePages` field to `localStats.wikiPages`. Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those duplicated the per-surface rows above. Disk now reports only actual byte usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` / `semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry` helpers, and the `filter` arg on `summarizeDir` are removed.
This commit is contained in:
parent
a1cfb03d73
commit
2366b00301
1002 changed files with 2286 additions and 12051 deletions
341
packages/cli/src/connectors/sqlserver/connector.test.ts
Normal file
341
packages/cli/src/connectors/sqlserver/connector.test.ts
Normal file
|
|
@ -0,0 +1,341 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createSqlServerLiveDatabaseIntrospection } from '../../connectors/sqlserver/live-database-introspection.js';
|
||||
import { isKtxSqlServerConnectionConfig, KtxSqlServerScanConnector, sqlServerConnectionPoolConfigFromConfig, type KtxSqlServerPoolFactory, type KtxSqlServerQueryResult } from '../../connectors/sqlserver/connector.js';
|
||||
|
||||
function recordset<T extends Record<string, unknown>>(
|
||||
rows: T[],
|
||||
columnNames: string[],
|
||||
): T[] & { columns: Record<string, { type: { declaration: string } }> } {
|
||||
const withColumns = rows as T[] & { columns: Record<string, { type: { declaration: string } }> };
|
||||
withColumns.columns = Object.fromEntries(columnNames.map((name) => [name, { type: { declaration: 'nvarchar' } }]));
|
||||
return withColumns;
|
||||
}
|
||||
|
||||
function result<T extends Record<string, unknown>>(rows: T[], columnNames: string[]): KtxSqlServerQueryResult {
|
||||
return { recordset: recordset(rows, columnNames) };
|
||||
}
|
||||
|
||||
function fakePoolFactory(): KtxSqlServerPoolFactory {
|
||||
const query = vi.fn(async (sql: string): Promise<KtxSqlServerQueryResult> => {
|
||||
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
|
||||
return result(
|
||||
[
|
||||
{ table_name: 'customers', table_type: 'BASE TABLE' },
|
||||
{ table_name: 'orders', table_type: 'BASE TABLE' },
|
||||
{ table_name: 'order_summary', table_type: 'VIEW' },
|
||||
],
|
||||
['table_name', 'table_type'],
|
||||
);
|
||||
}
|
||||
if (sql.includes("ep.name = 'MS_Description'") && sql.includes('ep.minor_id = 0')) {
|
||||
return result([{ table_name: 'customers', table_comment: 'Customer table' }], [
|
||||
'table_name',
|
||||
'table_comment',
|
||||
]);
|
||||
}
|
||||
if (sql.includes("ep.name = 'MS_Description'") && sql.includes('ep.minor_id = c.column_id')) {
|
||||
return result([{ table_name: 'customers', column_name: 'id', column_comment: 'PK' }], [
|
||||
'table_name',
|
||||
'column_name',
|
||||
'column_comment',
|
||||
]);
|
||||
}
|
||||
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
|
||||
return result(
|
||||
[
|
||||
{ table_name: 'customers', column_name: 'id', data_type: 'int', is_nullable: 'NO' },
|
||||
{ table_name: 'customers', column_name: 'name', data_type: 'nvarchar', is_nullable: 'NO' },
|
||||
{ table_name: 'orders', column_name: 'id', data_type: 'int', is_nullable: 'NO' },
|
||||
{ table_name: 'orders', column_name: 'customer_id', data_type: 'int', is_nullable: 'NO' },
|
||||
{ table_name: 'orders', column_name: 'status', data_type: 'nvarchar', is_nullable: 'YES' },
|
||||
{ table_name: 'order_summary', column_name: 'status', data_type: 'nvarchar', is_nullable: 'YES' },
|
||||
],
|
||||
['table_name', 'column_name', 'data_type', 'is_nullable'],
|
||||
);
|
||||
}
|
||||
if (sql.includes("CONSTRAINT_TYPE = 'PRIMARY KEY'")) {
|
||||
return result(
|
||||
[
|
||||
{ table_name: 'customers', column_name: 'id' },
|
||||
{ table_name: 'orders', column_name: 'id' },
|
||||
],
|
||||
['table_name', 'column_name'],
|
||||
);
|
||||
}
|
||||
if (sql.includes('REFERENTIAL_CONSTRAINTS')) {
|
||||
return result(
|
||||
[
|
||||
{
|
||||
table_name: 'orders',
|
||||
column_name: 'customer_id',
|
||||
referenced_table_schema: 'dbo',
|
||||
referenced_table_name: 'customers',
|
||||
referenced_column_name: 'id',
|
||||
constraint_name: 'orders_customer_id_fk',
|
||||
},
|
||||
],
|
||||
[
|
||||
'table_name',
|
||||
'column_name',
|
||||
'referenced_table_schema',
|
||||
'referenced_table_name',
|
||||
'referenced_column_name',
|
||||
'constraint_name',
|
||||
],
|
||||
);
|
||||
}
|
||||
if (sql.includes('sys.partitions') && sql.includes('GROUP BY t.name')) {
|
||||
return result(
|
||||
[
|
||||
{ table_name: 'customers', row_count: 2 },
|
||||
{ table_name: 'orders', row_count: 2 },
|
||||
],
|
||||
['table_name', 'row_count'],
|
||||
);
|
||||
}
|
||||
if (sql.includes('SELECT TOP 1 [id], [status] FROM [dbo].[orders]')) {
|
||||
return result([{ id: 10, status: 'paid' }], ['id', 'status']);
|
||||
}
|
||||
if (sql.includes('SELECT TOP 1 * FROM (select id, status from dbo.orders) AS ktx_query_result')) {
|
||||
return result([{ id: 10, status: 'paid' }], ['id', 'status']);
|
||||
}
|
||||
if (sql.includes('SELECT TOP 5 [status] FROM [dbo].[orders]')) {
|
||||
return result([{ status: 'paid' }, { status: 'open' }], ['status']);
|
||||
}
|
||||
if (sql.includes('COUNT(DISTINCT val)')) {
|
||||
return result([{ cardinality: 2 }], ['cardinality']);
|
||||
}
|
||||
if (sql.includes('SELECT TOP 10 val')) {
|
||||
return result([{ val: 'open' }, { val: 'paid' }], ['val']);
|
||||
}
|
||||
if (sql.includes('SUM(p.rows) AS row_count') && sql.includes('t.name = @tableName')) {
|
||||
return result([{ row_count: 2 }], ['row_count']);
|
||||
}
|
||||
if (sql.includes('SELECT s.name AS schema_name')) {
|
||||
return result([{ schema_name: 'dbo' }, { schema_name: 'sales' }], ['schema_name']);
|
||||
}
|
||||
if (sql.trim() === 'SELECT 1') {
|
||||
return result([{ ok: 1 }], ['ok']);
|
||||
}
|
||||
throw new Error(`Unexpected SQL: ${sql}`);
|
||||
});
|
||||
const request: { input(name: string, value: unknown): typeof request; query: typeof query } = {
|
||||
input: vi.fn((_key: string, _value: unknown) => request),
|
||||
query,
|
||||
};
|
||||
const close = vi.fn(async () => undefined);
|
||||
return {
|
||||
createPool: vi.fn(async () => ({
|
||||
request: () => request,
|
||||
close,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
describe('KtxSqlServerScanConnector', () => {
|
||||
it('resolves SQL Server connection configuration safely', () => {
|
||||
expect(
|
||||
isKtxSqlServerConnectionConfig({
|
||||
driver: 'sqlserver',
|
||||
host: 'localhost',
|
||||
database: 'analytics',
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(isKtxSqlServerConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(false);
|
||||
expect(
|
||||
sqlServerConnectionPoolConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'sqlserver',
|
||||
host: 'db.example.test',
|
||||
port: 14330,
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
trustServerCertificate: false,
|
||||
},
|
||||
}),
|
||||
).toMatchObject({
|
||||
server: 'db.example.test',
|
||||
port: 14330,
|
||||
database: 'analytics',
|
||||
user: 'reader',
|
||||
options: { encrypt: true, trustServerCertificate: false },
|
||||
});
|
||||
});
|
||||
|
||||
it('introspects schema, primary keys, comments, row counts, views, and foreign keys', async () => {
|
||||
const connector = new KtxSqlServerScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'sqlserver',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
schema: 'dbo',
|
||||
},
|
||||
poolFactory: fakePoolFactory(),
|
||||
now: () => new Date('2026-04-29T16:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'sqlserver' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'sqlserver',
|
||||
extractedAt: '2026-04-29T16:00:00.000Z',
|
||||
scope: { catalogs: ['analytics'], schemas: ['dbo'] },
|
||||
metadata: {
|
||||
database: 'analytics',
|
||||
host: 'db.example.test',
|
||||
schemas: ['dbo'],
|
||||
table_count: 3,
|
||||
total_columns: 6,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([
|
||||
['customers', 'table', 2, 'Customer table'],
|
||||
['orders', 'table', 2, null],
|
||||
['order_summary', 'view', null, null],
|
||||
]);
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
|
||||
name: 'id',
|
||||
nativeType: 'int',
|
||||
normalizedType: 'int',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'PK',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
|
||||
{
|
||||
fromColumn: 'customer_id',
|
||||
toCatalog: 'analytics',
|
||||
toDb: 'dbo',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
constraintName: 'orders_customer_id_fk',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => {
|
||||
const poolFactory = fakePoolFactory();
|
||||
const connector = new KtxSqlServerScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'sqlserver',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
schema: 'dbo',
|
||||
},
|
||||
poolFactory,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
table: { catalog: 'analytics', db: 'dbo', name: 'orders' },
|
||||
columns: ['id', 'status'],
|
||||
limit: 1,
|
||||
},
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({
|
||||
headers: ['id', 'status'],
|
||||
headerTypes: ['nvarchar', 'nvarchar'],
|
||||
rows: [[10, 'paid']],
|
||||
totalRows: 1,
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{ connectionId: 'warehouse', table: { catalog: 'analytics', db: 'dbo', name: 'orders' }, column: 'status', limit: 5 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
|
||||
|
||||
await expect(
|
||||
connector.getColumnDistinctValues(
|
||||
{ catalog: 'analytics', db: 'dbo', name: 'orders' },
|
||||
'status',
|
||||
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
|
||||
),
|
||||
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from dbo.orders', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
|
||||
await expect(connector.getTableRowCount('orders')).resolves.toBe(2);
|
||||
await expect(connector.listSchemas()).resolves.toEqual(['dbo', 'sales']);
|
||||
await expect(
|
||||
connector.columnStats(
|
||||
{ connectionId: 'warehouse', table: { catalog: 'analytics', db: 'dbo', name: 'orders' }, column: 'status' },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toBeNull();
|
||||
|
||||
await connector.cleanup();
|
||||
});
|
||||
|
||||
it('adapts native SQL Server snapshots to live-database introspection for local ingest', async () => {
|
||||
const introspection = createSqlServerLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'sqlserver',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
schema: 'dbo',
|
||||
},
|
||||
},
|
||||
poolFactory: fakePoolFactory(),
|
||||
now: () => new Date('2026-04-29T16:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
extractedAt: '2026-04-29T16:00:00.000Z',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
|
||||
name: 'customers',
|
||||
catalog: 'analytics',
|
||||
db: 'dbo',
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'int',
|
||||
normalizedType: 'int',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: 'PK',
|
||||
},
|
||||
{
|
||||
name: 'name',
|
||||
nativeType: 'nvarchar',
|
||||
normalizedType: 'nvarchar',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
});
|
||||
});
|
||||
});
|
||||
710
packages/cli/src/connectors/sqlserver/connector.ts
Normal file
710
packages/cli/src/connectors/sqlserver/connector.ts
Normal file
|
|
@ -0,0 +1,710 @@
|
|||
import { assertReadOnlySql } from '../../context/connections/read-only-sql.js';
|
||||
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import sql from 'mssql';
|
||||
import { KtxSqlServerDialect } from './dialect.js';
|
||||
|
||||
export interface KtxSqlServerConnectionConfig {
|
||||
driver?: string;
|
||||
host?: string;
|
||||
port?: number;
|
||||
database?: string;
|
||||
username?: string;
|
||||
user?: string;
|
||||
password?: string;
|
||||
url?: string;
|
||||
schema?: string;
|
||||
schemas?: string[];
|
||||
trustServerCertificate?: boolean;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerPoolConfig {
|
||||
server: string;
|
||||
port: number;
|
||||
database: string;
|
||||
user: string;
|
||||
password?: string;
|
||||
options: { encrypt: true; trustServerCertificate: boolean };
|
||||
pool: { max: number; min: number; idleTimeoutMillis: number };
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export interface KtxSqlServerQueryResult {
|
||||
recordset?: Array<Record<string, unknown>> & { columns?: Record<string, { type?: { declaration?: string } }> };
|
||||
}
|
||||
|
||||
interface KtxSqlServerRequest {
|
||||
input(name: string, value: unknown): KtxSqlServerRequest;
|
||||
query(query: string): Promise<KtxSqlServerQueryResult>;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerPool {
|
||||
request(): KtxSqlServerRequest;
|
||||
close(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerPoolFactory {
|
||||
createPool(config: KtxSqlServerPoolConfig): Promise<KtxSqlServerPool>;
|
||||
}
|
||||
|
||||
interface KtxSqlServerResolvedEndpoint {
|
||||
host: string;
|
||||
port: number;
|
||||
close?: () => Promise<void>;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerEndpointResolver {
|
||||
resolve(input: {
|
||||
host: string;
|
||||
port: number;
|
||||
connection: KtxSqlServerConnectionConfig;
|
||||
}): Promise<KtxSqlServerResolvedEndpoint>;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerScanConnectorOptions {
|
||||
connectionId: string;
|
||||
connection: KtxSqlServerConnectionConfig | undefined;
|
||||
poolFactory?: KtxSqlServerPoolFactory;
|
||||
endpointResolver?: KtxSqlServerEndpointResolver;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerReadOnlyQueryInput extends KtxReadOnlyQueryInput {
|
||||
params?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KtxSqlServerColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
interface KtxSqlServerTableSampleResult extends KtxTableSampleResult {
|
||||
headerTypes?: string[];
|
||||
}
|
||||
|
||||
function sqlTypeDeclaration(type: unknown): string {
|
||||
if (typeof type === 'function') {
|
||||
try {
|
||||
return sqlTypeDeclaration(type());
|
||||
} catch {
|
||||
return 'unknown';
|
||||
}
|
||||
}
|
||||
if (typeof type === 'object' && type !== null && 'declaration' in type) {
|
||||
const declaration = (type as { declaration?: unknown }).declaration;
|
||||
return typeof declaration === 'string' ? declaration : 'unknown';
|
||||
}
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
function sqlRecordset(
|
||||
rows: Array<Record<string, unknown>> | undefined,
|
||||
columns: Record<string, { type?: unknown }> | undefined,
|
||||
): NonNullable<KtxSqlServerQueryResult['recordset']> {
|
||||
const recordset = [...(rows ?? [])] as NonNullable<KtxSqlServerQueryResult['recordset']>;
|
||||
recordset.columns = Object.fromEntries(
|
||||
Object.entries(columns ?? {}).map(([name, metadata]) => [
|
||||
name,
|
||||
{ type: { declaration: sqlTypeDeclaration(metadata.type) } },
|
||||
]),
|
||||
);
|
||||
return recordset;
|
||||
}
|
||||
|
||||
class DefaultSqlServerPoolFactory implements KtxSqlServerPoolFactory {
|
||||
async createPool(config: KtxSqlServerPoolConfig): Promise<KtxSqlServerPool> {
|
||||
const pool = await new sql.ConnectionPool(config as sql.config).connect();
|
||||
return {
|
||||
request() {
|
||||
const request = pool.request();
|
||||
return {
|
||||
input(name: string, value: unknown) {
|
||||
request.input(name, value);
|
||||
return this;
|
||||
},
|
||||
async query(query: string) {
|
||||
const result = await request.query(query);
|
||||
return {
|
||||
recordset: sqlRecordset(result.recordset as Array<Record<string, unknown>> | undefined, result.recordset?.columns),
|
||||
};
|
||||
},
|
||||
};
|
||||
},
|
||||
close: () => pool.close(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxSqlServerConnectionConfig | undefined,
|
||||
key: keyof KtxSqlServerConnectionConfig,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function parseSqlServerUrl(url: string): Partial<KtxSqlServerConnectionConfig> {
|
||||
const parsed = new URL(url);
|
||||
return {
|
||||
host: parsed.hostname,
|
||||
port: parsed.port ? Number(parsed.port) : undefined,
|
||||
database: parsed.pathname.replace(/^\/+/, '') || undefined,
|
||||
username: parsed.username ? decodeURIComponent(parsed.username) : undefined,
|
||||
password: parsed.password ? decodeURIComponent(parsed.password) : undefined,
|
||||
trustServerCertificate: parsed.searchParams.get('trustServerCertificate') === 'true',
|
||||
};
|
||||
}
|
||||
|
||||
function maybeNumber(value: unknown): number | undefined {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
||||
function schemaNames(connection: KtxSqlServerConnectionConfig, env: NodeJS.ProcessEnv): string[] {
|
||||
if (Array.isArray(connection.schemas) && connection.schemas.length > 0) {
|
||||
return connection.schemas.filter((schema) => schema.trim().length > 0).map((schema) => resolveStringReference(schema, env));
|
||||
}
|
||||
return [stringConfigValue(connection, 'schema', env) ?? 'dbo'];
|
||||
}
|
||||
|
||||
function groupByTable<T extends { table_name: string }>(rows: T[]): Map<string, T[]> {
|
||||
const grouped = new Map<string, T[]>();
|
||||
for (const row of rows) {
|
||||
const values = grouped.get(row.table_name) ?? [];
|
||||
values.push(row);
|
||||
grouped.set(row.table_name, values);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
function firstNumber(value: unknown): number | null {
|
||||
const numberValue = Number(value);
|
||||
return Number.isFinite(numberValue) ? numberValue : null;
|
||||
}
|
||||
|
||||
function limitSqlForSqlServerExecution(sqlText: string, maxRows: number | undefined): string {
|
||||
const trimmed = assertReadOnlySql(sqlText).replace(/;+\s*$/, '');
|
||||
if (!maxRows) {
|
||||
return trimmed;
|
||||
}
|
||||
if (!Number.isInteger(maxRows) || maxRows <= 0) {
|
||||
throw new Error('maxRows must be a positive integer.');
|
||||
}
|
||||
return `SELECT TOP ${maxRows} * FROM (${trimmed}) AS ktx_query_result`;
|
||||
}
|
||||
|
||||
export function isKtxSqlServerConnectionConfig(
|
||||
connection: KtxSqlServerConnectionConfig | undefined,
|
||||
): connection is KtxSqlServerConnectionConfig {
|
||||
return String(connection?.driver ?? '').toLowerCase() === 'sqlserver';
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function sqlServerConnectionPoolConfigFromConfig(input: {
|
||||
connectionId: string;
|
||||
connection: KtxSqlServerConnectionConfig | undefined;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): KtxSqlServerPoolConfig {
|
||||
const inputDriver = input.connection?.driver ?? 'unknown';
|
||||
if (!isKtxSqlServerConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native SQL Server connector cannot run driver "${inputDriver}"`);
|
||||
}
|
||||
|
||||
const env = input.env ?? process.env;
|
||||
const referencedUrl = stringConfigValue(input.connection, 'url', env);
|
||||
const urlConfig = referencedUrl ? parseSqlServerUrl(referencedUrl) : {};
|
||||
const merged: KtxSqlServerConnectionConfig = { ...urlConfig, ...input.connection };
|
||||
const server = stringConfigValue(merged, 'host', env);
|
||||
const database = stringConfigValue(merged, 'database', env);
|
||||
const user = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env);
|
||||
|
||||
if (!server) {
|
||||
throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.host or url`);
|
||||
}
|
||||
if (!database) {
|
||||
throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.database or url`);
|
||||
}
|
||||
if (!user) {
|
||||
throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.username, user, or url`);
|
||||
}
|
||||
|
||||
return {
|
||||
server,
|
||||
port: maybeNumber(merged.port) ?? 1433,
|
||||
database,
|
||||
user,
|
||||
password: stringConfigValue(merged, 'password', env),
|
||||
options: { encrypt: true, trustServerCertificate: merged.trustServerCertificate ?? true },
|
||||
pool: { max: 10, min: 0, idleTimeoutMillis: 30000 },
|
||||
};
|
||||
}
|
||||
|
||||
export class KtxSqlServerScanConnector implements KtxScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'sqlserver' as const;
|
||||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: false,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: false,
|
||||
formalForeignKeys: true,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly connectionId: string;
|
||||
private readonly connection: KtxSqlServerConnectionConfig;
|
||||
private readonly poolConfig: KtxSqlServerPoolConfig;
|
||||
private readonly schemas: string[];
|
||||
private readonly poolFactory: KtxSqlServerPoolFactory;
|
||||
private readonly endpointResolver?: KtxSqlServerEndpointResolver;
|
||||
private readonly now: () => Date;
|
||||
private readonly dialect = new KtxSqlServerDialect();
|
||||
private pool: KtxSqlServerPool | null = null;
|
||||
private resolvedEndpoint: KtxSqlServerResolvedEndpoint | null = null;
|
||||
|
||||
constructor(options: KtxSqlServerScanConnectorOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.connection = options.connection ?? {};
|
||||
const env = options.env ?? process.env;
|
||||
this.poolConfig = sqlServerConnectionPoolConfigFromConfig({
|
||||
connectionId: options.connectionId,
|
||||
connection: options.connection,
|
||||
env,
|
||||
});
|
||||
this.schemas = schemaNames(this.connection, env);
|
||||
this.poolFactory = options.poolFactory ?? new DefaultSqlServerPoolFactory();
|
||||
this.endpointResolver = options.endpointResolver;
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.id = `sqlserver:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
try {
|
||||
await this.query('SELECT 1');
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const tables: KtxSchemaTable[] = [];
|
||||
for (const schemaName of this.schemas) {
|
||||
tables.push(...(await this.introspectSchema(schemaName)));
|
||||
}
|
||||
return {
|
||||
connectionId: this.connectionId,
|
||||
driver: 'sqlserver',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: { catalogs: [this.poolConfig.database], schemas: this.schemas },
|
||||
metadata: {
|
||||
database: this.poolConfig.database,
|
||||
schemas: this.schemas,
|
||||
host: this.poolConfig.server,
|
||||
table_count: tables.length,
|
||||
total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxSqlServerTableSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
|
||||
return { headers: result.headers, headerTypes: result.headerTypes, rows: result.rows, totalRows: result.totalRows };
|
||||
}
|
||||
|
||||
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
|
||||
return { values, nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
return null;
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxSqlServerReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const limitedSql = limitSqlForSqlServerExecution(input.sql, input.maxRows);
|
||||
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
|
||||
const result = await this.query(prepared.sql, prepared.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KtxTableRef,
|
||||
columnName: string,
|
||||
options: KtxSqlServerColumnDistinctValuesOptions,
|
||||
): Promise<KtxSqlServerColumnDistinctValuesResult | null> {
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinalityRows = await this.queryRaw<{ cardinality: unknown }>(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, options.sampleSize ?? 10000),
|
||||
);
|
||||
const cardinality = Number(cardinalityRows[0]?.cardinality);
|
||||
if (Number.isNaN(cardinality)) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valuesRows = await this.queryRaw<{ val: unknown }>(
|
||||
this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit),
|
||||
);
|
||||
return { values: valuesRows.filter((row) => row.val !== null).map((row) => String(row.val)), cardinality };
|
||||
}
|
||||
|
||||
async getTableRowCount(tableName: string, schemaName = this.schemas[0] ?? 'dbo'): Promise<number> {
|
||||
const rows = await this.queryRaw<{ row_count: unknown }>(
|
||||
`
|
||||
SELECT SUM(p.rows) AS row_count
|
||||
FROM sys.tables t
|
||||
INNER JOIN sys.partitions p ON t.object_id = p.object_id
|
||||
INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
|
||||
WHERE s.name = @schemaName
|
||||
AND t.name = @tableName
|
||||
AND p.index_id IN (0, 1)
|
||||
`,
|
||||
{ schemaName, tableName },
|
||||
);
|
||||
return firstNumber(rows[0]?.row_count) ?? 0;
|
||||
}
|
||||
|
||||
qTableName(table: Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
async listSchemas(): Promise<string[]> {
|
||||
const rows = await this.queryRaw<{ schema_name: string }>(`
|
||||
SELECT s.name AS schema_name
|
||||
FROM sys.schemas s
|
||||
WHERE s.name NOT IN (
|
||||
'INFORMATION_SCHEMA', 'sys', 'guest',
|
||||
'db_owner', 'db_accessadmin', 'db_securityadmin', 'db_ddladmin',
|
||||
'db_backupoperator', 'db_datareader', 'db_datawriter',
|
||||
'db_denydatareader', 'db_denydatawriter'
|
||||
)
|
||||
ORDER BY s.name
|
||||
`);
|
||||
return rows.map((row) => row.schema_name);
|
||||
}
|
||||
|
||||
async listTables(schemas?: string[]): Promise<KtxTableListEntry[]> {
|
||||
const filterSchemas = schemas ?? (await this.listSchemas());
|
||||
if (filterSchemas.length === 0) return [];
|
||||
const params: Record<string, unknown> = {};
|
||||
const placeholders = filterSchemas.map((s, i) => {
|
||||
params[`schema${i}`] = s;
|
||||
return `@schema${i}`;
|
||||
});
|
||||
const rows = await this.queryRaw<{ schema_name: string; table_name: string; table_type: string }>(
|
||||
`
|
||||
SELECT s.name AS schema_name, o.name AS table_name, o.type_desc AS table_type
|
||||
FROM sys.objects o
|
||||
JOIN sys.schemas s ON o.schema_id = s.schema_id
|
||||
WHERE o.type IN ('U', 'V')
|
||||
AND s.name IN (${placeholders.join(', ')})
|
||||
ORDER BY s.name, o.name
|
||||
`,
|
||||
params,
|
||||
);
|
||||
return rows.map((row) => ({
|
||||
schema: row.schema_name,
|
||||
name: row.table_name,
|
||||
kind: row.table_type === 'VIEW' ? ('view' as const) : ('table' as const),
|
||||
}));
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
if (this.pool) {
|
||||
await this.pool.close();
|
||||
this.pool = null;
|
||||
}
|
||||
if (this.resolvedEndpoint?.close) {
|
||||
await this.resolvedEndpoint.close();
|
||||
this.resolvedEndpoint = null;
|
||||
}
|
||||
}
|
||||
|
||||
private async introspectSchema(schemaName: string): Promise<KtxSchemaTable[]> {
|
||||
const tables = await this.queryRaw<{ table_name: string; table_type: string }>(
|
||||
`
|
||||
SELECT TABLE_NAME AS table_name, TABLE_TYPE AS table_type
|
||||
FROM INFORMATION_SCHEMA.TABLES
|
||||
WHERE TABLE_SCHEMA = @schemaName
|
||||
AND TABLE_TYPE IN ('BASE TABLE', 'VIEW')
|
||||
ORDER BY TABLE_NAME
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
const columns = await this.queryRaw<{
|
||||
table_name: string;
|
||||
column_name: string;
|
||||
data_type: string;
|
||||
is_nullable: string;
|
||||
}>(
|
||||
`
|
||||
SELECT TABLE_NAME AS table_name, COLUMN_NAME AS column_name, DATA_TYPE AS data_type, IS_NULLABLE AS is_nullable
|
||||
FROM INFORMATION_SCHEMA.COLUMNS
|
||||
WHERE TABLE_SCHEMA = @schemaName
|
||||
ORDER BY TABLE_NAME, ORDINAL_POSITION
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
const tableComments = await this.tableComments(schemaName);
|
||||
const columnComments = await this.columnComments(schemaName);
|
||||
const primaryKeys = await this.primaryKeys(schemaName);
|
||||
const foreignKeys = await this.foreignKeys(schemaName);
|
||||
const rowCounts = await this.rowCounts(schemaName);
|
||||
const columnsByTable = groupByTable(columns);
|
||||
const foreignKeysByTable = groupByTable(foreignKeys);
|
||||
|
||||
return tables.map((table) => ({
|
||||
catalog: this.poolConfig.database,
|
||||
db: schemaName,
|
||||
name: table.table_name,
|
||||
kind: table.table_type === 'VIEW' ? 'view' : 'table',
|
||||
comment: tableComments.get(table.table_name) ?? null,
|
||||
estimatedRows: table.table_type === 'VIEW' ? null : rowCounts.get(table.table_name) ?? 0,
|
||||
columns: (columnsByTable.get(table.table_name) ?? []).map((column) =>
|
||||
this.toSchemaColumn(column, primaryKeys.get(table.table_name) ?? new Set(), columnComments),
|
||||
),
|
||||
foreignKeys: (foreignKeysByTable.get(table.table_name) ?? []).map((row) => this.toSchemaForeignKey(row)),
|
||||
}));
|
||||
}
|
||||
|
||||
private async tableComments(schemaName: string): Promise<Map<string, string>> {
|
||||
const rows = await this.queryRaw<{ table_name: string; table_comment: string }>(
|
||||
`
|
||||
SELECT o.name AS table_name, CAST(ep.value AS NVARCHAR(MAX)) AS table_comment
|
||||
FROM sys.objects o
|
||||
INNER JOIN sys.schemas s ON o.schema_id = s.schema_id
|
||||
INNER JOIN sys.extended_properties ep ON ep.major_id = o.object_id
|
||||
AND ep.minor_id = 0
|
||||
AND ep.name = 'MS_Description'
|
||||
WHERE s.name = @schemaName
|
||||
AND o.type IN ('U', 'V')
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
return new Map(rows.map((row) => [row.table_name, row.table_comment]));
|
||||
}
|
||||
|
||||
private async columnComments(schemaName: string): Promise<Map<string, string>> {
|
||||
const rows = await this.queryRaw<{ table_name: string; column_name: string; column_comment: string }>(
|
||||
`
|
||||
SELECT o.name AS table_name, c.name AS column_name, CAST(ep.value AS NVARCHAR(MAX)) AS column_comment
|
||||
FROM sys.columns c
|
||||
INNER JOIN sys.objects o ON c.object_id = o.object_id
|
||||
INNER JOIN sys.schemas s ON o.schema_id = s.schema_id
|
||||
INNER JOIN sys.extended_properties ep ON ep.major_id = c.object_id
|
||||
AND ep.minor_id = c.column_id
|
||||
AND ep.name = 'MS_Description'
|
||||
WHERE s.name = @schemaName
|
||||
AND o.type IN ('U', 'V')
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
return new Map(rows.map((row) => [`${row.table_name}.${row.column_name}`, row.column_comment]));
|
||||
}
|
||||
|
||||
private async primaryKeys(schemaName: string): Promise<Map<string, Set<string>>> {
|
||||
const rows = await this.queryRaw<{ table_name: string; column_name: string }>(
|
||||
`
|
||||
SELECT tc.TABLE_NAME AS table_name, kcu.COLUMN_NAME AS column_name
|
||||
FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
|
||||
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu
|
||||
ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME
|
||||
AND tc.TABLE_SCHEMA = kcu.TABLE_SCHEMA
|
||||
WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY'
|
||||
AND tc.TABLE_SCHEMA = @schemaName
|
||||
ORDER BY tc.TABLE_NAME, kcu.ORDINAL_POSITION
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
const grouped = new Map<string, Set<string>>();
|
||||
for (const row of rows) {
|
||||
const columns = grouped.get(row.table_name) ?? new Set<string>();
|
||||
columns.add(row.column_name);
|
||||
grouped.set(row.table_name, columns);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
private async foreignKeys(schemaName: string): Promise<
|
||||
Array<{
|
||||
table_name: string;
|
||||
column_name: string;
|
||||
referenced_table_schema: string;
|
||||
referenced_table_name: string;
|
||||
referenced_column_name: string;
|
||||
constraint_name: string;
|
||||
}>
|
||||
> {
|
||||
return this.queryRaw(
|
||||
`
|
||||
SELECT
|
||||
fk.TABLE_NAME AS table_name,
|
||||
fk.COLUMN_NAME AS column_name,
|
||||
pk.TABLE_SCHEMA AS referenced_table_schema,
|
||||
pk.TABLE_NAME AS referenced_table_name,
|
||||
pk.COLUMN_NAME AS referenced_column_name,
|
||||
fk.CONSTRAINT_NAME AS constraint_name
|
||||
FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS rc
|
||||
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE fk
|
||||
ON fk.CONSTRAINT_CATALOG = rc.CONSTRAINT_CATALOG
|
||||
AND fk.CONSTRAINT_SCHEMA = rc.CONSTRAINT_SCHEMA
|
||||
AND fk.CONSTRAINT_NAME = rc.CONSTRAINT_NAME
|
||||
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE pk
|
||||
ON pk.CONSTRAINT_CATALOG = rc.UNIQUE_CONSTRAINT_CATALOG
|
||||
AND pk.CONSTRAINT_SCHEMA = rc.UNIQUE_CONSTRAINT_SCHEMA
|
||||
AND pk.CONSTRAINT_NAME = rc.UNIQUE_CONSTRAINT_NAME
|
||||
AND pk.ORDINAL_POSITION = fk.ORDINAL_POSITION
|
||||
WHERE fk.TABLE_SCHEMA = @schemaName
|
||||
ORDER BY fk.TABLE_NAME, fk.COLUMN_NAME
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
}
|
||||
|
||||
private async rowCounts(schemaName: string): Promise<Map<string, number>> {
|
||||
const rows = await this.queryRaw<{ table_name: string; row_count: unknown }>(
|
||||
`
|
||||
SELECT t.name AS table_name, SUM(p.rows) AS row_count
|
||||
FROM sys.tables t
|
||||
INNER JOIN sys.partitions p ON t.object_id = p.object_id
|
||||
INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
|
||||
WHERE s.name = @schemaName
|
||||
AND p.index_id IN (0, 1)
|
||||
GROUP BY t.name
|
||||
`,
|
||||
{ schemaName },
|
||||
);
|
||||
return new Map(rows.map((row) => [row.table_name, firstNumber(row.row_count) ?? 0]));
|
||||
}
|
||||
|
||||
private toSchemaColumn(
|
||||
column: { table_name: string; column_name: string; data_type: string; is_nullable: string },
|
||||
primaryKeys: Set<string>,
|
||||
comments: Map<string, string>,
|
||||
): KtxSchemaColumn {
|
||||
return {
|
||||
name: column.column_name,
|
||||
nativeType: column.data_type,
|
||||
normalizedType: this.dialect.mapDataType(column.data_type),
|
||||
dimensionType: this.dialect.mapToDimensionType(column.data_type),
|
||||
nullable: column.is_nullable === 'YES',
|
||||
primaryKey: primaryKeys.has(column.column_name),
|
||||
comment: comments.get(`${column.table_name}.${column.column_name}`) ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
private toSchemaForeignKey(row: {
|
||||
column_name: string;
|
||||
referenced_table_schema: string;
|
||||
referenced_table_name: string;
|
||||
referenced_column_name: string;
|
||||
constraint_name: string;
|
||||
}): KtxSchemaForeignKey {
|
||||
return {
|
||||
fromColumn: row.column_name,
|
||||
toCatalog: this.poolConfig.database,
|
||||
toDb: row.referenced_table_schema,
|
||||
toTable: row.referenced_table_name,
|
||||
toColumn: row.referenced_column_name,
|
||||
constraintName: row.constraint_name || null,
|
||||
};
|
||||
}
|
||||
|
||||
private async poolForQuery(): Promise<KtxSqlServerPool> {
|
||||
if (!this.pool) {
|
||||
const config = { ...this.poolConfig };
|
||||
if (this.endpointResolver) {
|
||||
this.resolvedEndpoint = await this.endpointResolver.resolve({
|
||||
host: config.server,
|
||||
port: config.port,
|
||||
connection: this.connection,
|
||||
});
|
||||
config.server = this.resolvedEndpoint.host;
|
||||
config.port = this.resolvedEndpoint.port;
|
||||
}
|
||||
this.pool = await this.poolFactory.createPool(config);
|
||||
}
|
||||
return this.pool;
|
||||
}
|
||||
|
||||
private async queryRaw<T extends Record<string, unknown>>(query: string, params?: Record<string, unknown>): Promise<T[]> {
|
||||
const pool = await this.poolForQuery();
|
||||
const request = pool.request();
|
||||
if (params) {
|
||||
for (const [key, value] of Object.entries(params)) {
|
||||
request.input(key, value);
|
||||
}
|
||||
}
|
||||
const result = await request.query(query);
|
||||
return (result.recordset ?? []) as T[];
|
||||
}
|
||||
|
||||
private async query(query: string, params?: Record<string, unknown>): Promise<Omit<KtxQueryResult, 'rowCount'>> {
|
||||
const pool = await this.poolForQuery();
|
||||
const request = pool.request();
|
||||
if (params) {
|
||||
for (const [key, value] of Object.entries(params)) {
|
||||
request.input(key, value);
|
||||
}
|
||||
}
|
||||
const result = await request.query(assertReadOnlySql(query));
|
||||
const recordset = result.recordset ?? [];
|
||||
const columnMetadata = recordset.columns ?? {};
|
||||
const metadataHeaders = Object.keys(columnMetadata);
|
||||
const headers = metadataHeaders.length > 0 ? metadataHeaders : Object.keys(recordset[0] ?? {});
|
||||
const headerTypes = headers.map((header) => columnMetadata[header]?.type?.declaration ?? 'unknown');
|
||||
return {
|
||||
headers,
|
||||
headerTypes,
|
||||
rows: recordset.map((row) => headers.map((header) => row[header])),
|
||||
totalRows: recordset.length,
|
||||
};
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`KTX SQL Server connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
49
packages/cli/src/connectors/sqlserver/dialect.test.ts
Normal file
49
packages/cli/src/connectors/sqlserver/dialect.test.ts
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KtxSqlServerDialect } from './dialect.js';
|
||||
|
||||
describe('KtxSqlServerDialect', () => {
|
||||
const dialect = new KtxSqlServerDialect();
|
||||
|
||||
it('quotes identifiers and formats schema-qualified table names', () => {
|
||||
expect(dialect.quoteIdentifier('events')).toBe('[events]');
|
||||
expect(dialect.quoteIdentifier('odd]name')).toBe('[odd]]name]');
|
||||
expect(dialect.formatTableName({ catalog: 'warehouse', db: 'dbo', name: 'events' })).toBe('[dbo].[events]');
|
||||
expect(dialect.formatTableName({ catalog: null, db: null, name: 'events' })).toBe('[events]');
|
||||
});
|
||||
|
||||
it('maps SQL Server types to KTX dimension types', () => {
|
||||
expect(dialect.mapToDimensionType('datetime2')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('decimal(18, 2)')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('bigint')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('bit')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('uniqueidentifier')).toBe('string');
|
||||
expect(dialect.mapToDimensionType('')).toBe('string');
|
||||
});
|
||||
|
||||
it('builds sampling, distinct-value, pagination, and time SQL', () => {
|
||||
expect(dialect.generateSampleQuery('[dbo].[events]', 25, ['id', 'event_name'])).toBe(
|
||||
'SELECT TOP 25 [id], [event_name] FROM [dbo].[events]',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('[dbo].[events]', 'event_name', 10)).toBe(
|
||||
"SELECT TOP 10 [event_name] FROM [dbo].[events] WHERE [event_name] IS NOT NULL AND LTRIM(RTRIM(CAST([event_name] AS NVARCHAR(MAX)))) != ''",
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('[dbo].[events]', '[event_name]', 5)).toContain('SELECT TOP 5 val');
|
||||
expect(dialect.getTopClause(10)).toBe('TOP 10');
|
||||
expect(dialect.getLimitOffsetClause(10, 20)).toBe('OFFSET 20 ROWS FETCH NEXT 10 ROWS ONLY');
|
||||
expect(dialect.getTimeTruncExpression('created_at', 'month')).toBe(
|
||||
'DATEFROMPARTS(YEAR(created_at), MONTH(created_at), 1)',
|
||||
);
|
||||
});
|
||||
|
||||
it('prepares named parameters using SQL Server @ parameters', () => {
|
||||
expect(
|
||||
dialect.prepareQuery('select * from events where id = :id and name = :name', {
|
||||
id: 10,
|
||||
name: 'signup',
|
||||
}),
|
||||
).toEqual({
|
||||
sql: 'select * from events where id = @id and name = @name',
|
||||
params: { id: 10, name: 'signup' },
|
||||
});
|
||||
});
|
||||
});
|
||||
201
packages/cli/src/connectors/sqlserver/dialect.ts
Normal file
201
packages/cli/src/connectors/sqlserver/dialect.ts
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
|
||||
|
||||
type SqlServerTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxSqlServerDialect {
|
||||
readonly type = 'sqlserver';
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
datetime: 'time',
|
||||
datetime2: 'time',
|
||||
date: 'time',
|
||||
time: 'time',
|
||||
datetimeoffset: 'time',
|
||||
smalldatetime: 'time',
|
||||
timestamp: 'time',
|
||||
int: 'number',
|
||||
bigint: 'number',
|
||||
smallint: 'number',
|
||||
tinyint: 'number',
|
||||
decimal: 'number',
|
||||
numeric: 'number',
|
||||
float: 'number',
|
||||
real: 'number',
|
||||
money: 'number',
|
||||
smallmoney: 'number',
|
||||
varchar: 'string',
|
||||
nvarchar: 'string',
|
||||
char: 'string',
|
||||
nchar: 'string',
|
||||
text: 'string',
|
||||
ntext: 'string',
|
||||
uniqueidentifier: 'string',
|
||||
xml: 'string',
|
||||
bit: 'boolean',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `[${identifier.replace(/\]/g, ']]')}]`;
|
||||
}
|
||||
|
||||
formatTableName(table: SqlServerTableNameRef): string {
|
||||
return table.db
|
||||
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
|
||||
: this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
return nativeType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
const lower = nativeType.toLowerCase().trim();
|
||||
const normalized = lower.includes('(') ? lower.split('(')[0]! : lower;
|
||||
if (this.typeMappings[normalized]) {
|
||||
return this.typeMappings[normalized];
|
||||
}
|
||||
if (normalized.includes('time') || normalized.includes('date')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalized.includes('int') ||
|
||||
normalized.includes('num') ||
|
||||
normalized.includes('dec') ||
|
||||
normalized.includes('float') ||
|
||||
normalized.includes('money')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalized.includes('bit')) {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT TOP ${limit} ${columnList} FROM ${tableName}`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quotedColumn = this.quoteIdentifier(columnName);
|
||||
return `SELECT TOP ${limit} ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND LTRIM(RTRIM(CAST(${quotedColumn} AS NVARCHAR(MAX)))) != ''`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
|
||||
if (!params) {
|
||||
return { sql, params: undefined };
|
||||
}
|
||||
let parameterizedQuery = sql;
|
||||
for (const key of Object.keys(params)) {
|
||||
parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
|
||||
}
|
||||
return { sql: parameterizedQuery, params };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `ABS(CHECKSUM(NEWID())) % 100 < ${Math.round(samplePct * 100)}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `TABLESAMPLE (${samplePct * 100} PERCENT)`;
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `OFFSET ${offset} ROWS FETCH NEXT ${limit} ROWS ONLY` : '';
|
||||
}
|
||||
|
||||
getTopClause(limit: number): string {
|
||||
return `TOP ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `SUM(CASE WHEN ${column} IS NULL THEN 1 ELSE 0 END)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `COUNT(DISTINCT ${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT TOP ${sampleSize} ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT TOP ${limit} val
|
||||
FROM (
|
||||
SELECT DISTINCT CAST(${columnName} AS NVARCHAR(MAX)) AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
) AS distinct_vals
|
||||
ORDER BY val
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT TOP ${sampleSize} ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY NEWID()
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
timezone?: string,
|
||||
): string {
|
||||
const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column;
|
||||
switch (granularity) {
|
||||
case 'day':
|
||||
return `CAST(${col} AS DATE)`;
|
||||
case 'week':
|
||||
return `DATEADD(WEEK, DATEDIFF(WEEK, 0, ${col}), 0)`;
|
||||
case 'month':
|
||||
return `DATEFROMPARTS(YEAR(${col}), MONTH(${col}), 1)`;
|
||||
case 'quarter':
|
||||
return `DATEFROMPARTS(YEAR(${col}), (DATEPART(QUARTER, ${col}) - 1) * 3 + 1, 1)`;
|
||||
case 'year':
|
||||
return `DATEFROMPARTS(YEAR(${col}), 1, 1)`;
|
||||
}
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
|
||||
const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column;
|
||||
const [amount, unit] = interval.split(' ');
|
||||
const originExpr = origin ? `'${origin}'` : `'1970-01-01'`;
|
||||
return `DATEADD(${unit}, (DATEDIFF(${unit}, ${originExpr}, ${col}) / ${amount}) * ${amount}, ${originExpr})`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
return `'${interval}'`;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/adapters/live-database/types.js';
|
||||
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
|
||||
import {
|
||||
KtxSqlServerScanConnector,
|
||||
type KtxSqlServerConnectionConfig,
|
||||
type KtxSqlServerEndpointResolver,
|
||||
type KtxSqlServerPoolFactory,
|
||||
} from './connector.js';
|
||||
|
||||
interface CreateSqlServerLiveDatabaseIntrospectionOptions {
|
||||
connections: Record<string, KtxProjectConnectionConfig>;
|
||||
poolFactory?: KtxSqlServerPoolFactory;
|
||||
endpointResolver?: KtxSqlServerEndpointResolver;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createSqlServerLiveDatabaseIntrospection(
|
||||
options: CreateSqlServerLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KtxSqlServerConnectionConfig | undefined;
|
||||
const connector = new KtxSqlServerScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
poolFactory: options.poolFactory,
|
||||
endpointResolver: options.endpointResolver,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect(
|
||||
{ connectionId, driver: 'sqlserver' },
|
||||
{ runId: `sqlserver-${connectionId}` },
|
||||
);
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue