Initial open-source release

This commit is contained in:
Andrey Avtomonov 2026-05-10 23:12:26 +02:00
commit 1a42152e6f
1199 changed files with 257054 additions and 0 deletions

View file

@ -0,0 +1,358 @@
import { describe, expect, it, vi } from 'vitest';
import {
createSqlServerLiveDatabaseIntrospection,
isKloSqlServerConnectionConfig,
KloSqlServerScanConnector,
sqlServerConnectionPoolConfigFromConfig,
type KloSqlServerPoolFactory,
type KloSqlServerQueryResult,
} from './index.js';
function recordset<T extends Record<string, unknown>>(
rows: T[],
columnNames: string[],
): T[] & { columns: Record<string, { type: { declaration: string } }> } {
const withColumns = rows as T[] & { columns: Record<string, { type: { declaration: string } }> };
withColumns.columns = Object.fromEntries(columnNames.map((name) => [name, { type: { declaration: 'nvarchar' } }]));
return withColumns;
}
function result<T extends Record<string, unknown>>(rows: T[], columnNames: string[]): KloSqlServerQueryResult {
return { recordset: recordset(rows, columnNames) };
}
function fakePoolFactory(): KloSqlServerPoolFactory {
const query = vi.fn(async (sql: string): Promise<KloSqlServerQueryResult> => {
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
return result(
[
{ table_name: 'customers', table_type: 'BASE TABLE' },
{ table_name: 'orders', table_type: 'BASE TABLE' },
{ table_name: 'order_summary', table_type: 'VIEW' },
],
['table_name', 'table_type'],
);
}
if (sql.includes("ep.name = 'MS_Description'") && sql.includes('ep.minor_id = 0')) {
return result([{ table_name: 'customers', table_comment: 'Customer table' }], [
'table_name',
'table_comment',
]);
}
if (sql.includes("ep.name = 'MS_Description'") && sql.includes('ep.minor_id = c.column_id')) {
return result([{ table_name: 'customers', column_name: 'id', column_comment: 'PK' }], [
'table_name',
'column_name',
'column_comment',
]);
}
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
return result(
[
{ table_name: 'customers', column_name: 'id', data_type: 'int', is_nullable: 'NO' },
{ table_name: 'customers', column_name: 'name', data_type: 'nvarchar', is_nullable: 'NO' },
{ table_name: 'orders', column_name: 'id', data_type: 'int', is_nullable: 'NO' },
{ table_name: 'orders', column_name: 'customer_id', data_type: 'int', is_nullable: 'NO' },
{ table_name: 'orders', column_name: 'status', data_type: 'nvarchar', is_nullable: 'YES' },
{ table_name: 'order_summary', column_name: 'status', data_type: 'nvarchar', is_nullable: 'YES' },
],
['table_name', 'column_name', 'data_type', 'is_nullable'],
);
}
if (sql.includes("CONSTRAINT_TYPE = 'PRIMARY KEY'")) {
return result(
[
{ table_name: 'customers', column_name: 'id' },
{ table_name: 'orders', column_name: 'id' },
],
['table_name', 'column_name'],
);
}
if (sql.includes('REFERENTIAL_CONSTRAINTS')) {
return result(
[
{
table_name: 'orders',
column_name: 'customer_id',
referenced_table_schema: 'dbo',
referenced_table_name: 'customers',
referenced_column_name: 'id',
constraint_name: 'orders_customer_id_fk',
},
],
[
'table_name',
'column_name',
'referenced_table_schema',
'referenced_table_name',
'referenced_column_name',
'constraint_name',
],
);
}
if (sql.includes('sys.partitions') && sql.includes('GROUP BY t.name')) {
return result(
[
{ table_name: 'customers', row_count: 2 },
{ table_name: 'orders', row_count: 2 },
],
['table_name', 'row_count'],
);
}
if (sql.includes('SELECT TOP 1 [id], [status] FROM [dbo].[orders]')) {
return result([{ id: 10, status: 'paid' }], ['id', 'status']);
}
if (sql.includes('SELECT TOP 1 * FROM (select id, status from dbo.orders) AS klo_query_result')) {
return result([{ id: 10, status: 'paid' }], ['id', 'status']);
}
if (sql.includes('SELECT TOP 5 [status] FROM [dbo].[orders]')) {
return result([{ status: 'paid' }, { status: 'open' }], ['status']);
}
if (sql.includes('COUNT(DISTINCT val)')) {
return result([{ cardinality: 2 }], ['cardinality']);
}
if (sql.includes('SELECT TOP 10 val')) {
return result([{ val: 'open' }, { val: 'paid' }], ['val']);
}
if (sql.includes('SUM(p.rows) AS row_count') && sql.includes('t.name = @tableName')) {
return result([{ row_count: 2 }], ['row_count']);
}
if (sql.includes('SELECT s.name AS schema_name')) {
return result([{ schema_name: 'dbo' }, { schema_name: 'sales' }], ['schema_name']);
}
if (sql.trim() === 'SELECT 1') {
return result([{ ok: 1 }], ['ok']);
}
throw new Error(`Unexpected SQL: ${sql}`);
});
const request: { input(name: string, value: unknown): typeof request; query: typeof query } = {
input: vi.fn((_key: string, _value: unknown) => request),
query,
};
const close = vi.fn(async () => undefined);
return {
createPool: vi.fn(async () => ({
request: () => request,
close,
})),
};
}
describe('KloSqlServerScanConnector', () => {
it('resolves SQL Server connection configuration safely', () => {
expect(
isKloSqlServerConnectionConfig({
driver: 'sqlserver',
host: 'localhost',
database: 'analytics',
readonly: true,
}),
).toBe(true);
expect(isKloSqlServerConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(false);
expect(
sqlServerConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
port: 14330,
database: 'analytics',
username: 'reader',
trustServerCertificate: false,
readonly: true,
},
}),
).toMatchObject({
server: 'db.example.test',
port: 14330,
database: 'analytics',
user: 'reader',
options: { encrypt: true, trustServerCertificate: false },
});
expect(() =>
sqlServerConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { driver: 'sqlserver', host: 'db.example.test', database: 'analytics', readonly: false },
}),
).toThrow('Native SQL Server connector requires connections.warehouse.readonly: true');
});
it('introspects schema, primary keys, comments, row counts, views, and foreign keys', async () => {
const connector = new KloSqlServerScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
readonly: true,
},
poolFactory: fakePoolFactory(),
now: () => new Date('2026-04-29T16:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'sqlserver' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'sqlserver',
extractedAt: '2026-04-29T16:00:00.000Z',
scope: { catalogs: ['analytics'], schemas: ['dbo'] },
metadata: {
database: 'analytics',
host: 'db.example.test',
schemas: ['dbo'],
table_count: 3,
total_columns: 6,
},
});
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([
['customers', 'table', 2, 'Customer table'],
['orders', 'table', 2, null],
['order_summary', 'view', null, null],
]);
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
name: 'id',
nativeType: 'int',
normalizedType: 'int',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'PK',
});
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
{
fromColumn: 'customer_id',
toCatalog: 'analytics',
toDb: 'dbo',
toTable: 'customers',
toColumn: 'id',
constraintName: 'orders_customer_id_fk',
},
]);
});
it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => {
const poolFactory = fakePoolFactory();
const connector = new KloSqlServerScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
readonly: true,
},
poolFactory,
});
await expect(
connector.sampleTable(
{
connectionId: 'warehouse',
table: { catalog: 'analytics', db: 'dbo', name: 'orders' },
columns: ['id', 'status'],
limit: 1,
},
{ runId: 'scan-run-1' },
),
).resolves.toEqual({
headers: ['id', 'status'],
headerTypes: ['nvarchar', 'nvarchar'],
rows: [[10, 'paid']],
totalRows: 1,
});
await expect(
connector.sampleColumn(
{ connectionId: 'warehouse', table: { catalog: 'analytics', db: 'dbo', name: 'orders' }, column: 'status', limit: 5 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
await expect(
connector.getColumnDistinctValues(
{ catalog: 'analytics', db: 'dbo', name: 'orders' },
'status',
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
),
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from dbo.orders', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 });
await expect(
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
await expect(connector.getTableRowCount('orders')).resolves.toBe(2);
await expect(connector.listSchemas()).resolves.toEqual(['dbo', 'sales']);
await expect(
connector.columnStats(
{ connectionId: 'warehouse', table: { catalog: 'analytics', db: 'dbo', name: 'orders' }, column: 'status' },
{ runId: 'scan-run-1' },
),
).resolves.toBeNull();
await connector.cleanup();
});
it('adapts native SQL Server snapshots to live-database introspection for local ingest', async () => {
const introspection = createSqlServerLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
readonly: true,
},
},
poolFactory: fakePoolFactory(),
now: () => new Date('2026-04-29T16:00:00.000Z'),
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
extractedAt: '2026-04-29T16:00:00.000Z',
});
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
name: 'customers',
catalog: 'analytics',
db: 'dbo',
columns: [
{
name: 'id',
nativeType: 'int',
normalizedType: 'int',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'PK',
},
{
name: 'name',
nativeType: 'nvarchar',
normalizedType: 'nvarchar',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: null,
},
],
foreignKeys: [],
});
});
});

View file

@ -0,0 +1,701 @@
import { assertReadOnlySql } from '@klo/context/connections';
import {
createKloConnectorCapabilities,
type KloColumnSampleInput,
type KloColumnSampleResult,
type KloColumnStatsInput,
type KloColumnStatsResult,
type KloQueryResult,
type KloReadOnlyQueryInput,
type KloScanConnector,
type KloScanContext,
type KloScanInput,
type KloSchemaColumn,
type KloSchemaForeignKey,
type KloSchemaSnapshot,
type KloSchemaTable,
type KloTableRef,
type KloTableSampleInput,
type KloTableSampleResult,
} from '@klo/context/scan';
import { readFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import sql from 'mssql';
import { KloSqlServerDialect } from './dialect.js';
export interface KloSqlServerConnectionConfig {
driver?: string;
host?: string;
port?: number;
database?: string;
username?: string;
user?: string;
password?: string;
url?: string;
schema?: string;
schemas?: string[];
trustServerCertificate?: boolean;
readonly?: boolean;
[key: string]: unknown;
}
export interface KloSqlServerPoolConfig {
server: string;
port: number;
database: string;
user: string;
password?: string;
options: { encrypt: true; trustServerCertificate: boolean };
pool: { max: number; min: number; idleTimeoutMillis: number };
}
export interface KloSqlServerQueryResult {
recordset?: Array<Record<string, unknown>> & { columns?: Record<string, { type?: { declaration?: string } }> };
}
interface KloSqlServerRequest {
input(name: string, value: unknown): KloSqlServerRequest;
query(query: string): Promise<KloSqlServerQueryResult>;
}
export interface KloSqlServerPool {
request(): KloSqlServerRequest;
close(): Promise<void>;
}
export interface KloSqlServerPoolFactory {
createPool(config: KloSqlServerPoolConfig): Promise<KloSqlServerPool>;
}
interface KloSqlServerResolvedEndpoint {
host: string;
port: number;
close?: () => Promise<void>;
}
export interface KloSqlServerEndpointResolver {
resolve(input: {
host: string;
port: number;
connection: KloSqlServerConnectionConfig;
}): Promise<KloSqlServerResolvedEndpoint>;
}
export interface KloSqlServerScanConnectorOptions {
connectionId: string;
connection: KloSqlServerConnectionConfig | undefined;
poolFactory?: KloSqlServerPoolFactory;
endpointResolver?: KloSqlServerEndpointResolver;
env?: NodeJS.ProcessEnv;
now?: () => Date;
}
export interface KloSqlServerReadOnlyQueryInput extends KloReadOnlyQueryInput {
params?: Record<string, unknown>;
}
export interface KloSqlServerColumnDistinctValuesOptions {
maxCardinality: number;
limit: number;
sampleSize?: number;
}
export interface KloSqlServerColumnDistinctValuesResult {
values: string[] | null;
cardinality: number;
}
interface KloSqlServerTableSampleResult extends KloTableSampleResult {
headerTypes?: string[];
}
function sqlTypeDeclaration(type: unknown): string {
if (typeof type === 'function') {
try {
return sqlTypeDeclaration(type());
} catch {
return 'unknown';
}
}
if (typeof type === 'object' && type !== null && 'declaration' in type) {
const declaration = (type as { declaration?: unknown }).declaration;
return typeof declaration === 'string' ? declaration : 'unknown';
}
return 'unknown';
}
function sqlRecordset(
rows: Array<Record<string, unknown>> | undefined,
columns: Record<string, { type?: unknown }> | undefined,
): NonNullable<KloSqlServerQueryResult['recordset']> {
const recordset = [...(rows ?? [])] as NonNullable<KloSqlServerQueryResult['recordset']>;
recordset.columns = Object.fromEntries(
Object.entries(columns ?? {}).map(([name, metadata]) => [
name,
{ type: { declaration: sqlTypeDeclaration(metadata.type) } },
]),
);
return recordset;
}
class DefaultSqlServerPoolFactory implements KloSqlServerPoolFactory {
async createPool(config: KloSqlServerPoolConfig): Promise<KloSqlServerPool> {
const pool = await new sql.ConnectionPool(config as sql.config).connect();
return {
request() {
const request = pool.request();
return {
input(name: string, value: unknown) {
request.input(name, value);
return this;
},
async query(query: string) {
const result = await request.query(query);
return {
recordset: sqlRecordset(result.recordset as Array<Record<string, unknown>> | undefined, result.recordset?.columns),
};
},
};
},
close: () => pool.close(),
};
}
}
function stringConfigValue(
connection: KloSqlServerConnectionConfig | undefined,
key: keyof KloSqlServerConnectionConfig,
env: NodeJS.ProcessEnv,
): string | undefined {
const value = connection?.[key];
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
}
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
if (value.startsWith('env:')) {
return env[value.slice('env:'.length)] ?? '';
}
if (value.startsWith('file:')) {
const rawPath = value.slice('file:'.length);
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
return readFileSync(path, 'utf-8').trim();
}
return value;
}
function parseSqlServerUrl(url: string): Partial<KloSqlServerConnectionConfig> {
const parsed = new URL(url);
return {
host: parsed.hostname,
port: parsed.port ? Number(parsed.port) : undefined,
database: parsed.pathname.replace(/^\/+/, '') || undefined,
username: parsed.username ? decodeURIComponent(parsed.username) : undefined,
password: parsed.password ? decodeURIComponent(parsed.password) : undefined,
trustServerCertificate: parsed.searchParams.get('trustServerCertificate') === 'true',
};
}
function maybeNumber(value: unknown): number | undefined {
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
}
function schemaNames(connection: KloSqlServerConnectionConfig, env: NodeJS.ProcessEnv): string[] {
if (Array.isArray(connection.schemas) && connection.schemas.length > 0) {
return connection.schemas.filter((schema) => schema.trim().length > 0).map((schema) => resolveStringReference(schema, env));
}
return [stringConfigValue(connection, 'schema', env) ?? 'dbo'];
}
function groupByTable<T extends { table_name: string }>(rows: T[]): Map<string, T[]> {
const grouped = new Map<string, T[]>();
for (const row of rows) {
const values = grouped.get(row.table_name) ?? [];
values.push(row);
grouped.set(row.table_name, values);
}
return grouped;
}
function firstNumber(value: unknown): number | null {
const numberValue = Number(value);
return Number.isFinite(numberValue) ? numberValue : null;
}
function limitSqlForSqlServerExecution(sqlText: string, maxRows: number | undefined): string {
const trimmed = assertReadOnlySql(sqlText).replace(/;+\s*$/, '');
if (!maxRows) {
return trimmed;
}
if (!Number.isInteger(maxRows) || maxRows <= 0) {
throw new Error('maxRows must be a positive integer.');
}
return `SELECT TOP ${maxRows} * FROM (${trimmed}) AS klo_query_result`;
}
export function isKloSqlServerConnectionConfig(connection: KloSqlServerConnectionConfig | undefined): boolean {
return String(connection?.driver ?? '').toLowerCase() === 'sqlserver';
}
export function sqlServerConnectionPoolConfigFromConfig(input: {
connectionId: string;
connection: KloSqlServerConnectionConfig | undefined;
env?: NodeJS.ProcessEnv;
}): KloSqlServerPoolConfig {
if (!isKloSqlServerConnectionConfig(input.connection)) {
throw new Error(`Native SQL Server connector cannot run driver "${input.connection?.driver ?? 'unknown'}"`);
}
if (input.connection?.readonly !== true) {
throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.readonly: true`);
}
const env = input.env ?? process.env;
const referencedUrl = stringConfigValue(input.connection, 'url', env);
const urlConfig = referencedUrl ? parseSqlServerUrl(referencedUrl) : {};
const merged: KloSqlServerConnectionConfig = { ...urlConfig, ...input.connection };
const server = stringConfigValue(merged, 'host', env);
const database = stringConfigValue(merged, 'database', env);
const user = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env);
if (!server) {
throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.host or url`);
}
if (!database) {
throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.database or url`);
}
if (!user) {
throw new Error(`Native SQL Server connector requires connections.${input.connectionId}.username, user, or url`);
}
return {
server,
port: maybeNumber(merged.port) ?? 1433,
database,
user,
password: stringConfigValue(merged, 'password', env),
options: { encrypt: true, trustServerCertificate: merged.trustServerCertificate ?? true },
pool: { max: 10, min: 0, idleTimeoutMillis: 30000 },
};
}
export class KloSqlServerScanConnector implements KloScanConnector {
readonly id: string;
readonly driver = 'sqlserver' as const;
readonly capabilities = createKloConnectorCapabilities({
tableSampling: true,
columnSampling: true,
columnStats: false,
readOnlySql: true,
nestedAnalysis: false,
formalForeignKeys: true,
estimatedRowCounts: true,
});
private readonly connectionId: string;
private readonly connection: KloSqlServerConnectionConfig;
private readonly poolConfig: KloSqlServerPoolConfig;
private readonly schemas: string[];
private readonly poolFactory: KloSqlServerPoolFactory;
private readonly endpointResolver?: KloSqlServerEndpointResolver;
private readonly now: () => Date;
private readonly dialect = new KloSqlServerDialect();
private pool: KloSqlServerPool | null = null;
private resolvedEndpoint: KloSqlServerResolvedEndpoint | null = null;
constructor(options: KloSqlServerScanConnectorOptions) {
this.connectionId = options.connectionId;
this.connection = options.connection ?? {};
const env = options.env ?? process.env;
this.poolConfig = sqlServerConnectionPoolConfigFromConfig({
connectionId: options.connectionId,
connection: options.connection,
env,
});
this.schemas = schemaNames(this.connection, env);
this.poolFactory = options.poolFactory ?? new DefaultSqlServerPoolFactory();
this.endpointResolver = options.endpointResolver;
this.now = options.now ?? (() => new Date());
this.id = `sqlserver:${options.connectionId}`;
}
async testConnection(): Promise<{ success: boolean; error?: string }> {
try {
await this.query('SELECT 1');
return { success: true };
} catch (error) {
return { success: false, error: error instanceof Error ? error.message : String(error) };
}
}
async introspect(input: KloScanInput, _ctx: KloScanContext): Promise<KloSchemaSnapshot> {
this.assertConnection(input.connectionId);
const tables: KloSchemaTable[] = [];
for (const schemaName of this.schemas) {
tables.push(...(await this.introspectSchema(schemaName)));
}
return {
connectionId: this.connectionId,
driver: 'sqlserver',
extractedAt: this.now().toISOString(),
scope: { catalogs: [this.poolConfig.database], schemas: this.schemas },
metadata: {
database: this.poolConfig.database,
schemas: this.schemas,
host: this.poolConfig.server,
table_count: tables.length,
total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0),
},
tables,
};
}
async sampleTable(input: KloTableSampleInput, _ctx: KloScanContext): Promise<KloSqlServerTableSampleResult> {
this.assertConnection(input.connectionId);
const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
return { headers: result.headers, headerTypes: result.headerTypes, rows: result.rows, totalRows: result.totalRows };
}
async sampleColumn(input: KloColumnSampleInput, _ctx: KloScanContext): Promise<KloColumnSampleResult> {
this.assertConnection(input.connectionId);
const result = await this.query(
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
);
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
return { values, nullCount: null, distinctCount: null };
}
async columnStats(_input: KloColumnStatsInput, _ctx: KloScanContext): Promise<KloColumnStatsResult | null> {
return null;
}
async executeReadOnly(input: KloSqlServerReadOnlyQueryInput, _ctx: KloScanContext): Promise<KloQueryResult> {
this.assertConnection(input.connectionId);
const limitedSql = limitSqlForSqlServerExecution(input.sql, input.maxRows);
const prepared = this.dialect.prepareQuery(limitedSql, input.params);
const result = await this.query(prepared.sql, prepared.params);
return { ...result, rowCount: result.rows.length };
}
async getColumnDistinctValues(
table: KloTableRef,
columnName: string,
options: KloSqlServerColumnDistinctValuesOptions,
): Promise<KloSqlServerColumnDistinctValuesResult | null> {
const tableName = this.qTableName(table);
const quotedColumn = this.dialect.quoteIdentifier(columnName);
const cardinalityRows = await this.queryRaw<{ cardinality: unknown }>(
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, options.sampleSize ?? 10000),
);
const cardinality = Number(cardinalityRows[0]?.cardinality);
if (Number.isNaN(cardinality)) {
return null;
}
if (cardinality === 0) {
return { values: [], cardinality: 0 };
}
if (cardinality > options.maxCardinality) {
return { values: null, cardinality };
}
const valuesRows = await this.queryRaw<{ val: unknown }>(
this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit),
);
return { values: valuesRows.filter((row) => row.val !== null).map((row) => String(row.val)), cardinality };
}
async getTableRowCount(tableName: string, schemaName = this.schemas[0] ?? 'dbo'): Promise<number> {
const rows = await this.queryRaw<{ row_count: unknown }>(
`
SELECT SUM(p.rows) AS row_count
FROM sys.tables t
INNER JOIN sys.partitions p ON t.object_id = p.object_id
INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
WHERE s.name = @schemaName
AND t.name = @tableName
AND p.index_id IN (0, 1)
`,
{ schemaName, tableName },
);
return firstNumber(rows[0]?.row_count) ?? 0;
}
qTableName(table: Pick<KloTableRef, 'name'> & Partial<Pick<KloTableRef, 'catalog' | 'db'>>): string {
return this.dialect.formatTableName(table);
}
quoteIdentifier(identifier: string): string {
return this.dialect.quoteIdentifier(identifier);
}
async listSchemas(): Promise<string[]> {
const rows = await this.queryRaw<{ schema_name: string }>(`
SELECT s.name AS schema_name
FROM sys.schemas s
WHERE s.name NOT IN (
'INFORMATION_SCHEMA', 'sys', 'guest',
'db_owner', 'db_accessadmin', 'db_securityadmin', 'db_ddladmin',
'db_backupoperator', 'db_datareader', 'db_datawriter',
'db_denydatareader', 'db_denydatawriter'
)
ORDER BY s.name
`);
return rows.map((row) => row.schema_name);
}
async cleanup(): Promise<void> {
if (this.pool) {
await this.pool.close();
this.pool = null;
}
if (this.resolvedEndpoint?.close) {
await this.resolvedEndpoint.close();
this.resolvedEndpoint = null;
}
}
private async introspectSchema(schemaName: string): Promise<KloSchemaTable[]> {
const tables = await this.queryRaw<{ table_name: string; table_type: string }>(
`
SELECT TABLE_NAME AS table_name, TABLE_TYPE AS table_type
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_SCHEMA = @schemaName
AND TABLE_TYPE IN ('BASE TABLE', 'VIEW')
ORDER BY TABLE_NAME
`,
{ schemaName },
);
const columns = await this.queryRaw<{
table_name: string;
column_name: string;
data_type: string;
is_nullable: string;
}>(
`
SELECT TABLE_NAME AS table_name, COLUMN_NAME AS column_name, DATA_TYPE AS data_type, IS_NULLABLE AS is_nullable
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = @schemaName
ORDER BY TABLE_NAME, ORDINAL_POSITION
`,
{ schemaName },
);
const tableComments = await this.tableComments(schemaName);
const columnComments = await this.columnComments(schemaName);
const primaryKeys = await this.primaryKeys(schemaName);
const foreignKeys = await this.foreignKeys(schemaName);
const rowCounts = await this.rowCounts(schemaName);
const columnsByTable = groupByTable(columns);
const foreignKeysByTable = groupByTable(foreignKeys);
return tables.map((table) => ({
catalog: this.poolConfig.database,
db: schemaName,
name: table.table_name,
kind: table.table_type === 'VIEW' ? 'view' : 'table',
comment: tableComments.get(table.table_name) ?? null,
estimatedRows: table.table_type === 'VIEW' ? null : rowCounts.get(table.table_name) ?? 0,
columns: (columnsByTable.get(table.table_name) ?? []).map((column) =>
this.toSchemaColumn(column, primaryKeys.get(table.table_name) ?? new Set(), columnComments),
),
foreignKeys: (foreignKeysByTable.get(table.table_name) ?? []).map((row) => this.toSchemaForeignKey(row)),
}));
}
private async tableComments(schemaName: string): Promise<Map<string, string>> {
const rows = await this.queryRaw<{ table_name: string; table_comment: string }>(
`
SELECT o.name AS table_name, CAST(ep.value AS NVARCHAR(MAX)) AS table_comment
FROM sys.objects o
INNER JOIN sys.schemas s ON o.schema_id = s.schema_id
INNER JOIN sys.extended_properties ep ON ep.major_id = o.object_id
AND ep.minor_id = 0
AND ep.name = 'MS_Description'
WHERE s.name = @schemaName
AND o.type IN ('U', 'V')
`,
{ schemaName },
);
return new Map(rows.map((row) => [row.table_name, row.table_comment]));
}
private async columnComments(schemaName: string): Promise<Map<string, string>> {
const rows = await this.queryRaw<{ table_name: string; column_name: string; column_comment: string }>(
`
SELECT o.name AS table_name, c.name AS column_name, CAST(ep.value AS NVARCHAR(MAX)) AS column_comment
FROM sys.columns c
INNER JOIN sys.objects o ON c.object_id = o.object_id
INNER JOIN sys.schemas s ON o.schema_id = s.schema_id
INNER JOIN sys.extended_properties ep ON ep.major_id = c.object_id
AND ep.minor_id = c.column_id
AND ep.name = 'MS_Description'
WHERE s.name = @schemaName
AND o.type IN ('U', 'V')
`,
{ schemaName },
);
return new Map(rows.map((row) => [`${row.table_name}.${row.column_name}`, row.column_comment]));
}
private async primaryKeys(schemaName: string): Promise<Map<string, Set<string>>> {
const rows = await this.queryRaw<{ table_name: string; column_name: string }>(
`
SELECT tc.TABLE_NAME AS table_name, kcu.COLUMN_NAME AS column_name
FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu
ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME
AND tc.TABLE_SCHEMA = kcu.TABLE_SCHEMA
WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY'
AND tc.TABLE_SCHEMA = @schemaName
ORDER BY tc.TABLE_NAME, kcu.ORDINAL_POSITION
`,
{ schemaName },
);
const grouped = new Map<string, Set<string>>();
for (const row of rows) {
const columns = grouped.get(row.table_name) ?? new Set<string>();
columns.add(row.column_name);
grouped.set(row.table_name, columns);
}
return grouped;
}
private async foreignKeys(schemaName: string): Promise<
Array<{
table_name: string;
column_name: string;
referenced_table_schema: string;
referenced_table_name: string;
referenced_column_name: string;
constraint_name: string;
}>
> {
return this.queryRaw(
`
SELECT
fk.TABLE_NAME AS table_name,
fk.COLUMN_NAME AS column_name,
pk.TABLE_SCHEMA AS referenced_table_schema,
pk.TABLE_NAME AS referenced_table_name,
pk.COLUMN_NAME AS referenced_column_name,
fk.CONSTRAINT_NAME AS constraint_name
FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS rc
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE fk
ON fk.CONSTRAINT_CATALOG = rc.CONSTRAINT_CATALOG
AND fk.CONSTRAINT_SCHEMA = rc.CONSTRAINT_SCHEMA
AND fk.CONSTRAINT_NAME = rc.CONSTRAINT_NAME
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE pk
ON pk.CONSTRAINT_CATALOG = rc.UNIQUE_CONSTRAINT_CATALOG
AND pk.CONSTRAINT_SCHEMA = rc.UNIQUE_CONSTRAINT_SCHEMA
AND pk.CONSTRAINT_NAME = rc.UNIQUE_CONSTRAINT_NAME
AND pk.ORDINAL_POSITION = fk.ORDINAL_POSITION
WHERE fk.TABLE_SCHEMA = @schemaName
ORDER BY fk.TABLE_NAME, fk.COLUMN_NAME
`,
{ schemaName },
);
}
private async rowCounts(schemaName: string): Promise<Map<string, number>> {
const rows = await this.queryRaw<{ table_name: string; row_count: unknown }>(
`
SELECT t.name AS table_name, SUM(p.rows) AS row_count
FROM sys.tables t
INNER JOIN sys.partitions p ON t.object_id = p.object_id
INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
WHERE s.name = @schemaName
AND p.index_id IN (0, 1)
GROUP BY t.name
`,
{ schemaName },
);
return new Map(rows.map((row) => [row.table_name, firstNumber(row.row_count) ?? 0]));
}
private toSchemaColumn(
column: { table_name: string; column_name: string; data_type: string; is_nullable: string },
primaryKeys: Set<string>,
comments: Map<string, string>,
): KloSchemaColumn {
return {
name: column.column_name,
nativeType: column.data_type,
normalizedType: this.dialect.mapDataType(column.data_type),
dimensionType: this.dialect.mapToDimensionType(column.data_type),
nullable: column.is_nullable === 'YES',
primaryKey: primaryKeys.has(column.column_name),
comment: comments.get(`${column.table_name}.${column.column_name}`) ?? null,
};
}
private toSchemaForeignKey(row: {
column_name: string;
referenced_table_schema: string;
referenced_table_name: string;
referenced_column_name: string;
constraint_name: string;
}): KloSchemaForeignKey {
return {
fromColumn: row.column_name,
toCatalog: this.poolConfig.database,
toDb: row.referenced_table_schema,
toTable: row.referenced_table_name,
toColumn: row.referenced_column_name,
constraintName: row.constraint_name || null,
};
}
private async poolForQuery(): Promise<KloSqlServerPool> {
if (!this.pool) {
const config = { ...this.poolConfig };
if (this.endpointResolver) {
this.resolvedEndpoint = await this.endpointResolver.resolve({
host: config.server,
port: config.port,
connection: this.connection,
});
config.server = this.resolvedEndpoint.host;
config.port = this.resolvedEndpoint.port;
}
this.pool = await this.poolFactory.createPool(config);
}
return this.pool;
}
private async queryRaw<T extends Record<string, unknown>>(query: string, params?: Record<string, unknown>): Promise<T[]> {
const pool = await this.poolForQuery();
const request = pool.request();
if (params) {
for (const [key, value] of Object.entries(params)) {
request.input(key, value);
}
}
const result = await request.query(query);
return (result.recordset ?? []) as T[];
}
private async query(query: string, params?: Record<string, unknown>): Promise<Omit<KloQueryResult, 'rowCount'>> {
const pool = await this.poolForQuery();
const request = pool.request();
if (params) {
for (const [key, value] of Object.entries(params)) {
request.input(key, value);
}
}
const result = await request.query(assertReadOnlySql(query));
const recordset = result.recordset ?? [];
const columnMetadata = recordset.columns ?? {};
const metadataHeaders = Object.keys(columnMetadata);
const headers = metadataHeaders.length > 0 ? metadataHeaders : Object.keys(recordset[0] ?? {});
const headerTypes = headers.map((header) => columnMetadata[header]?.type?.declaration ?? 'unknown');
return {
headers,
headerTypes,
rows: recordset.map((row) => headers.map((header) => row[header])),
totalRows: recordset.length,
};
}
private assertConnection(connectionId: string): void {
if (connectionId !== this.connectionId) {
throw new Error(`KLO SQL Server connector ${this.id} cannot serve connection ${connectionId}`);
}
}
}

View file

@ -0,0 +1,49 @@
import { describe, expect, it } from 'vitest';
import { KloSqlServerDialect } from './dialect.js';
describe('KloSqlServerDialect', () => {
const dialect = new KloSqlServerDialect();
it('quotes identifiers and formats schema-qualified table names', () => {
expect(dialect.quoteIdentifier('events')).toBe('[events]');
expect(dialect.quoteIdentifier('odd]name')).toBe('[odd]]name]');
expect(dialect.formatTableName({ catalog: 'warehouse', db: 'dbo', name: 'events' })).toBe('[dbo].[events]');
expect(dialect.formatTableName({ catalog: null, db: null, name: 'events' })).toBe('[events]');
});
it('maps SQL Server types to KLO dimension types', () => {
expect(dialect.mapToDimensionType('datetime2')).toBe('time');
expect(dialect.mapToDimensionType('decimal(18, 2)')).toBe('number');
expect(dialect.mapToDimensionType('bigint')).toBe('number');
expect(dialect.mapToDimensionType('bit')).toBe('boolean');
expect(dialect.mapToDimensionType('uniqueidentifier')).toBe('string');
expect(dialect.mapToDimensionType('')).toBe('string');
});
it('builds sampling, distinct-value, pagination, and time SQL', () => {
expect(dialect.generateSampleQuery('[dbo].[events]', 25, ['id', 'event_name'])).toBe(
'SELECT TOP 25 [id], [event_name] FROM [dbo].[events]',
);
expect(dialect.generateColumnSampleQuery('[dbo].[events]', 'event_name', 10)).toBe(
"SELECT TOP 10 [event_name] FROM [dbo].[events] WHERE [event_name] IS NOT NULL AND LTRIM(RTRIM(CAST([event_name] AS NVARCHAR(MAX)))) != ''",
);
expect(dialect.generateDistinctValuesQuery('[dbo].[events]', '[event_name]', 5)).toContain('SELECT TOP 5 val');
expect(dialect.getTopClause(10)).toBe('TOP 10');
expect(dialect.getLimitOffsetClause(10, 20)).toBe('OFFSET 20 ROWS FETCH NEXT 10 ROWS ONLY');
expect(dialect.getTimeTruncExpression('created_at', 'month')).toBe(
'DATEFROMPARTS(YEAR(created_at), MONTH(created_at), 1)',
);
});
it('prepares named parameters using SQL Server @ parameters', () => {
expect(
dialect.prepareQuery('select * from events where id = :id and name = :name', {
id: 10,
name: 'signup',
}),
).toEqual({
sql: 'select * from events where id = @id and name = @name',
params: { id: 10, name: 'signup' },
});
});
});

View file

@ -0,0 +1,201 @@
import type { KloSchemaDimensionType, KloTableRef } from '@klo/context/scan';
type SqlServerTableNameRef = Pick<KloTableRef, 'name'> & Partial<Pick<KloTableRef, 'catalog' | 'db'>>;
export class KloSqlServerDialect {
readonly type = 'sqlserver';
private readonly typeMappings: Record<string, KloSchemaDimensionType> = {
datetime: 'time',
datetime2: 'time',
date: 'time',
time: 'time',
datetimeoffset: 'time',
smalldatetime: 'time',
timestamp: 'time',
int: 'number',
bigint: 'number',
smallint: 'number',
tinyint: 'number',
decimal: 'number',
numeric: 'number',
float: 'number',
real: 'number',
money: 'number',
smallmoney: 'number',
varchar: 'string',
nvarchar: 'string',
char: 'string',
nchar: 'string',
text: 'string',
ntext: 'string',
uniqueidentifier: 'string',
xml: 'string',
bit: 'boolean',
};
quoteIdentifier(identifier: string): string {
return `[${identifier.replace(/\]/g, ']]')}]`;
}
formatTableName(table: SqlServerTableNameRef): string {
return table.db
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
: this.quoteIdentifier(table.name);
}
mapDataType(nativeType: string): string {
return nativeType;
}
mapToDimensionType(nativeType: string): KloSchemaDimensionType {
if (!nativeType) {
return 'string';
}
const lower = nativeType.toLowerCase().trim();
const normalized = lower.includes('(') ? lower.split('(')[0]! : lower;
if (this.typeMappings[normalized]) {
return this.typeMappings[normalized];
}
if (normalized.includes('time') || normalized.includes('date')) {
return 'time';
}
if (
normalized.includes('int') ||
normalized.includes('num') ||
normalized.includes('dec') ||
normalized.includes('float') ||
normalized.includes('money')
) {
return 'number';
}
if (normalized.includes('bit')) {
return 'boolean';
}
return 'string';
}
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
const columnList =
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
return `SELECT TOP ${limit} ${columnList} FROM ${tableName}`;
}
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
const quotedColumn = this.quoteIdentifier(columnName);
return `SELECT TOP ${limit} ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND LTRIM(RTRIM(CAST(${quotedColumn} AS NVARCHAR(MAX)))) != ''`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: Record<string, unknown> } {
if (!params) {
return { sql, params: undefined };
}
let parameterizedQuery = sql;
for (const key of Object.keys(params)) {
parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${key}\\b`, 'g'), `@${key}`);
}
return { sql: parameterizedQuery, params };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
}
return `ABS(CHECKSUM(NEWID())) % 100 < ${Math.round(samplePct * 100)}`;
}
getTableSampleClause(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
}
return `TABLESAMPLE (${samplePct * 100} PERCENT)`;
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `OFFSET ${offset} ROWS FETCH NEXT ${limit} ROWS ONLY` : '';
}
getTopClause(limit: number): string {
return `TOP ${limit}`;
}
getNullCountExpression(column: string): string {
return `SUM(CASE WHEN ${column} IS NULL THEN 1 ELSE 0 END)`;
}
getDistinctCountExpression(column: string): string {
return `COUNT(DISTINCT ${column})`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
SELECT TOP ${sampleSize} ${columnName} AS val
FROM ${tableName}
WHERE ${columnName} IS NOT NULL
)
SELECT COUNT(DISTINCT val) AS cardinality
FROM sampled
`;
}
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
return `
SELECT TOP ${limit} val
FROM (
SELECT DISTINCT CAST(${columnName} AS NVARCHAR(MAX)) AS val
FROM ${tableName}
WHERE ${columnName} IS NOT NULL
) AS distinct_vals
ORDER BY val
`;
}
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
return null;
}
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
SELECT TOP ${sampleSize} ${columnName} AS val
FROM ${tableName}
WHERE ${columnName} IS NOT NULL
ORDER BY NEWID()
)
SELECT COUNT(DISTINCT val) AS cardinality
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column;
switch (granularity) {
case 'day':
return `CAST(${col} AS DATE)`;
case 'week':
return `DATEADD(WEEK, DATEDIFF(WEEK, 0, ${col}), 0)`;
case 'month':
return `DATEFROMPARTS(YEAR(${col}), MONTH(${col}), 1)`;
case 'quarter':
return `DATEFROMPARTS(YEAR(${col}), (DATEPART(QUARTER, ${col}) - 1) * 3 + 1, 1)`;
case 'year':
return `DATEFROMPARTS(YEAR(${col}), 1, 1)`;
}
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `${column} AT TIME ZONE 'UTC' AT TIME ZONE '${timezone}'` : column;
const [amount, unit] = interval.split(' ');
const originExpr = origin ? `'${origin}'` : `'1970-01-01'`;
return `DATEADD(${unit}, (DATEDIFF(${unit}, ${originExpr}, ${col}) / ${amount}) * ${amount}, ${originExpr})`;
}
parseIntervalToSql(interval: string): string {
return `'${interval}'`;
}
}

View file

@ -0,0 +1,17 @@
export { KloSqlServerDialect } from './dialect.js';
export {
isKloSqlServerConnectionConfig,
KloSqlServerScanConnector,
sqlServerConnectionPoolConfigFromConfig,
type KloSqlServerColumnDistinctValuesOptions,
type KloSqlServerColumnDistinctValuesResult,
type KloSqlServerConnectionConfig,
type KloSqlServerEndpointResolver,
type KloSqlServerPool,
type KloSqlServerPoolConfig,
type KloSqlServerPoolFactory,
type KloSqlServerQueryResult,
type KloSqlServerReadOnlyQueryInput,
type KloSqlServerScanConnectorOptions,
} from './connector.js';
export { createSqlServerLiveDatabaseIntrospection } from './live-database-introspection.js';

View file

@ -0,0 +1,40 @@
import type { LiveDatabaseIntrospectionPort } from '@klo/context/ingest';
import type { KloProjectConnectionConfig } from '@klo/context/project';
import {
KloSqlServerScanConnector,
type KloSqlServerConnectionConfig,
type KloSqlServerEndpointResolver,
type KloSqlServerPoolFactory,
} from './connector.js';
interface CreateSqlServerLiveDatabaseIntrospectionOptions {
connections: Record<string, KloProjectConnectionConfig>;
poolFactory?: KloSqlServerPoolFactory;
endpointResolver?: KloSqlServerEndpointResolver;
now?: () => Date;
}
export function createSqlServerLiveDatabaseIntrospection(
options: CreateSqlServerLiveDatabaseIntrospectionOptions,
): LiveDatabaseIntrospectionPort {
return {
async extractSchema(connectionId: string) {
const connection = options.connections[connectionId] as KloSqlServerConnectionConfig | undefined;
const connector = new KloSqlServerScanConnector({
connectionId,
connection,
poolFactory: options.poolFactory,
endpointResolver: options.endpointResolver,
now: options.now,
});
try {
return await connector.introspect(
{ connectionId, driver: 'sqlserver' },
{ runId: `sqlserver-${connectionId}` },
);
} finally {
await connector.cleanup();
}
},
};
}

View file

@ -0,0 +1,12 @@
import { describe, expect, it } from 'vitest';
describe('@klo/connector-sqlserver package exports', () => {
it('exports public connector APIs during package bootstrap', async () => {
const connector = await import('./index.js');
expect(connector.KloSqlServerDialect).toBeTypeOf('function');
expect(connector.KloSqlServerScanConnector).toBeTypeOf('function');
expect(connector.createSqlServerLiveDatabaseIntrospection).toBeTypeOf('function');
expect(connector.sqlServerConnectionPoolConfigFromConfig).toBeTypeOf('function');
});
});