mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-22 08:38:08 +02:00
Initial open-source release
This commit is contained in:
commit
1a42152e6f
1199 changed files with 257054 additions and 0 deletions
48
packages/connector-postgres/package.json
Normal file
48
packages/connector-postgres/package.json
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
{
|
||||
"name": "@klo/connector-postgres",
|
||||
"version": "0.0.0-private",
|
||||
"description": "PostgreSQL connector package for KLO scan interfaces",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"engines": {
|
||||
"node": ">=22.0.0"
|
||||
},
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./dist/index.d.ts",
|
||||
"import": "./dist/index.js",
|
||||
"default": "./dist/index.js"
|
||||
},
|
||||
"./package.json": "./package.json"
|
||||
},
|
||||
"files": [
|
||||
"dist"
|
||||
],
|
||||
"scripts": {
|
||||
"build": "tsc -p tsconfig.json",
|
||||
"test": "vitest run",
|
||||
"type-check": "tsc -p tsconfig.json --noEmit"
|
||||
},
|
||||
"dependencies": {
|
||||
"@klo/context": "workspace:*",
|
||||
"pg": "^8.19.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^24.3.0",
|
||||
"@types/pg": "^8.16.0",
|
||||
"typescript": "^5.9.3",
|
||||
"vitest": "^4.0.18"
|
||||
},
|
||||
"license": "Apache-2.0",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/kaelio/ktx.git",
|
||||
"directory": "packages/connector-postgres"
|
||||
},
|
||||
"bugs": {
|
||||
"url": "https://github.com/kaelio/ktx/issues"
|
||||
},
|
||||
"homepage": "https://github.com/kaelio/ktx#readme"
|
||||
}
|
||||
342
packages/connector-postgres/src/connector.test.ts
Normal file
342
packages/connector-postgres/src/connector.test.ts
Normal file
|
|
@ -0,0 +1,342 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
createPostgresLiveDatabaseIntrospection,
|
||||
isKloPostgresConnectionConfig,
|
||||
KloPostgresScanConnector,
|
||||
postgresPoolConfigFromConfig,
|
||||
type KloPostgresPoolFactory,
|
||||
} from './index.js';
|
||||
|
||||
interface FakeQueryResult {
|
||||
rows: Record<string, unknown>[];
|
||||
fields?: Array<{ name: string; dataTypeID: number }>;
|
||||
}
|
||||
|
||||
function fakePoolFactory(results: Map<string, FakeQueryResult>): KloPostgresPoolFactory {
|
||||
const query = vi.fn(async (sql: string, params?: unknown[]) => {
|
||||
const normalized = sql.replace(/\s+/g, ' ').trim();
|
||||
for (const [key, value] of results.entries()) {
|
||||
if (normalized.includes(key)) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
throw new Error(`Unexpected SQL: ${normalized} params=${JSON.stringify(params ?? [])}`);
|
||||
});
|
||||
return {
|
||||
createPool() {
|
||||
return {
|
||||
async connect() {
|
||||
return {
|
||||
query,
|
||||
release: vi.fn(),
|
||||
};
|
||||
},
|
||||
end: vi.fn(async () => undefined),
|
||||
};
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function metadataResults(): Map<string, FakeQueryResult> {
|
||||
return new Map<string, FakeQueryResult>([
|
||||
[
|
||||
'FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n',
|
||||
{
|
||||
rows: [
|
||||
{ table_name: 'customers', table_kind: 'r', row_count: '2', table_comment: 'Customers' },
|
||||
{ table_name: 'orders', table_kind: 'r', row_count: '3', table_comment: null },
|
||||
{ table_name: 'recent_orders', table_kind: 'v', row_count: '0', table_comment: 'Recent orders' },
|
||||
],
|
||||
},
|
||||
],
|
||||
[
|
||||
'FROM pg_catalog.pg_attribute a JOIN pg_catalog.pg_class c',
|
||||
{
|
||||
rows: [
|
||||
{ table_name: 'customers', column_name: 'id', data_type: 'integer', is_nullable: false, column_comment: null },
|
||||
{ table_name: 'customers', column_name: 'name', data_type: 'text', is_nullable: false, column_comment: 'Name' },
|
||||
{ table_name: 'orders', column_name: 'id', data_type: 'integer', is_nullable: false, column_comment: null },
|
||||
{ table_name: 'orders', column_name: 'customer_id', data_type: 'integer', is_nullable: false, column_comment: null },
|
||||
{ table_name: 'orders', column_name: 'status', data_type: 'text', is_nullable: true, column_comment: null },
|
||||
{ table_name: 'recent_orders', column_name: 'id', data_type: 'integer', is_nullable: true, column_comment: null },
|
||||
],
|
||||
},
|
||||
],
|
||||
[
|
||||
"tc.constraint_type = 'FOREIGN KEY'",
|
||||
{
|
||||
rows: [
|
||||
{
|
||||
table_name: 'orders',
|
||||
column_name: 'customer_id',
|
||||
foreign_table_schema: 'public',
|
||||
foreign_table_name: 'customers',
|
||||
foreign_column_name: 'id',
|
||||
constraint_name: 'orders_customer_id_fkey',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
[
|
||||
"tc.constraint_type = 'PRIMARY KEY'",
|
||||
{
|
||||
rows: [
|
||||
{ table_name: 'customers', column_name: 'id' },
|
||||
{ table_name: 'orders', column_name: 'id' },
|
||||
],
|
||||
},
|
||||
],
|
||||
['SELECT "id" FROM "public"."orders" LIMIT 1', { rows: [{ id: 10 }], fields: [{ name: 'id', dataTypeID: 23 }] }],
|
||||
[
|
||||
'SELECT "status" FROM "public"."orders" WHERE "status" IS NOT NULL',
|
||||
{ rows: [{ status: 'paid' }, { status: 'open' }], fields: [{ name: 'status', dataTypeID: 25 }] },
|
||||
],
|
||||
['COUNT(DISTINCT val) AS cardinality', { rows: [{ cardinality: '2' }] }],
|
||||
['SELECT DISTINCT "status"::text AS val', { rows: [{ val: 'open' }, { val: 'paid' }] }],
|
||||
['SELECT COUNT(*) AS count FROM "public"."orders"', { rows: [{ count: '3' }] }],
|
||||
['FROM pg_stats s', { rows: [{ column_name: 'status', estimated_cardinality: '2' }] }],
|
||||
['SELECT 1', { rows: [{ '?column?': 1 }], fields: [{ name: '?column?', dataTypeID: 23 }] }],
|
||||
['SELECT schema_name FROM information_schema.schemata', { rows: [{ schema_name: 'public' }] }],
|
||||
]);
|
||||
}
|
||||
|
||||
describe('KloPostgresScanConnector', () => {
|
||||
it('resolves configuration safely', () => {
|
||||
expect(isKloPostgresConnectionConfig({ driver: 'postgres', url: 'env:DATABASE_URL', readonly: true })).toBe(true);
|
||||
expect(isKloPostgresConnectionConfig({ driver: 'postgresql', host: 'db', database: 'analytics' })).toBe(true);
|
||||
expect(isKloPostgresConnectionConfig({ driver: 'mysql', host: 'db' })).toBe(false);
|
||||
expect(
|
||||
postgresPoolConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
schemas: ['analytics', 'public'],
|
||||
readonly: true,
|
||||
ssl: true,
|
||||
rejectUnauthorized: false,
|
||||
},
|
||||
}),
|
||||
).toMatchObject({
|
||||
host: 'db.example.test',
|
||||
port: 5432,
|
||||
database: 'analytics',
|
||||
user: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
options: '-c search_path=analytics,public',
|
||||
ssl: { rejectUnauthorized: false },
|
||||
});
|
||||
expect(() =>
|
||||
postgresPoolConfigFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'postgres', host: 'db.example.test', database: 'analytics', username: 'reader' },
|
||||
}),
|
||||
).toThrow('Native PostgreSQL connector requires connections.warehouse.readonly: true');
|
||||
});
|
||||
|
||||
it('introspects schemas, tables, views, primary keys, comments, row counts, and foreign keys', async () => {
|
||||
const connector = new KloPostgresScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
schema: 'public',
|
||||
readonly: true,
|
||||
},
|
||||
poolFactory: fakePoolFactory(metadataResults()),
|
||||
now: () => new Date('2026-04-29T10:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'postgres' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
extractedAt: '2026-04-29T10:00:00.000Z',
|
||||
scope: { schemas: ['public'] },
|
||||
metadata: {
|
||||
database: 'analytics',
|
||||
schemas: ['public'],
|
||||
host: 'db.example.test',
|
||||
table_count: 3,
|
||||
total_columns: 6,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables.map((table) => [table.db, table.name, table.kind, table.estimatedRows])).toEqual([
|
||||
['public', 'customers', 'table', 2],
|
||||
['public', 'orders', 'table', 3],
|
||||
['public', 'recent_orders', 'view', null],
|
||||
]);
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
|
||||
{
|
||||
fromColumn: 'customer_id',
|
||||
toCatalog: null,
|
||||
toDb: 'public',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
constraintName: 'orders_customer_id_fkey',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('runs samples, distinct values, statistics, read-only SQL, and schema listing', async () => {
|
||||
const connector = new KloPostgresScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
schema: 'public',
|
||||
readonly: true,
|
||||
},
|
||||
poolFactory: fakePoolFactory(metadataResults()),
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: 'public', name: 'orders' }, columns: ['id'], limit: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({ headers: ['id'], headerTypes: ['integer'], rows: [[10]], totalRows: 1 });
|
||||
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: 'public', name: 'orders' }, column: 'status', limit: 5 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
|
||||
|
||||
await expect(
|
||||
connector.getColumnDistinctValues(
|
||||
{ catalog: null, db: 'public', name: 'orders' },
|
||||
'status',
|
||||
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
|
||||
),
|
||||
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
|
||||
|
||||
await expect(connector.getColumnStatistics({ catalog: null, db: 'public', name: 'orders' })).resolves.toEqual({
|
||||
cardinalityByColumn: new Map([['status', 2]]),
|
||||
});
|
||||
await expect(connector.getTableRowCount({ db: 'public', name: 'orders' })).resolves.toBe(3);
|
||||
await expect(connector.listSchemas()).resolves.toEqual(['public']);
|
||||
await expect(connector.testConnection()).resolves.toEqual({ success: true });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
});
|
||||
|
||||
it('adapts native PostgreSQL snapshots to live-database introspection for local ingest', async () => {
|
||||
const introspection = createPostgresLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
schema: 'public',
|
||||
readonly: true,
|
||||
},
|
||||
},
|
||||
poolFactory: fakePoolFactory(metadataResults()),
|
||||
now: () => new Date('2026-04-29T10:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
extractedAt: '2026-04-29T10:00:00.000Z',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: 'public',
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'integer',
|
||||
normalizedType: 'integer',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: null,
|
||||
},
|
||||
{
|
||||
name: 'name',
|
||||
nativeType: 'text',
|
||||
normalizedType: 'text',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: 'Name',
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('does not end the pool before introspection completes', async () => {
|
||||
let endCalled = false;
|
||||
const endAwarePoolFactory: KloPostgresPoolFactory = {
|
||||
createPool() {
|
||||
const inner = fakePoolFactory(metadataResults()).createPool({
|
||||
max: 1,
|
||||
idleTimeoutMillis: 1,
|
||||
connectionTimeoutMillis: 1,
|
||||
});
|
||||
return {
|
||||
async connect() {
|
||||
if (endCalled) {
|
||||
throw new Error('Cannot use a pool after calling end on the pool');
|
||||
}
|
||||
return inner.connect();
|
||||
},
|
||||
async end() {
|
||||
endCalled = true;
|
||||
return inner.end();
|
||||
},
|
||||
};
|
||||
},
|
||||
};
|
||||
const introspection = createPostgresLiveDatabaseIntrospection({
|
||||
connections: {
|
||||
warehouse: {
|
||||
driver: 'postgres',
|
||||
host: 'db.example.test',
|
||||
database: 'analytics',
|
||||
username: 'reader',
|
||||
password: 'test-password', // pragma: allowlist secret
|
||||
schema: 'public',
|
||||
readonly: true,
|
||||
},
|
||||
},
|
||||
poolFactory: endAwarePoolFactory,
|
||||
now: () => new Date('2026-04-29T10:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
expect(snapshot.tables.length).toBeGreaterThan(0);
|
||||
expect(endCalled).toBe(true);
|
||||
});
|
||||
});
|
||||
707
packages/connector-postgres/src/connector.ts
Normal file
707
packages/connector-postgres/src/connector.ts
Normal file
|
|
@ -0,0 +1,707 @@
|
|||
import { readFileSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { resolve } from 'node:path';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '@klo/context/connections';
|
||||
import {
|
||||
createKloConnectorCapabilities,
|
||||
type KloColumnSampleInput,
|
||||
type KloColumnSampleResult,
|
||||
type KloColumnStatsInput,
|
||||
type KloColumnStatsResult,
|
||||
type KloQueryResult,
|
||||
type KloReadOnlyQueryInput,
|
||||
type KloScanConnector,
|
||||
type KloScanContext,
|
||||
type KloScanInput,
|
||||
type KloSchemaColumn,
|
||||
type KloSchemaForeignKey,
|
||||
type KloSchemaSnapshot,
|
||||
type KloSchemaTable,
|
||||
type KloTableRef,
|
||||
type KloTableSampleInput,
|
||||
type KloTableSampleResult,
|
||||
} from '@klo/context/scan';
|
||||
import { Pool } from 'pg';
|
||||
import { KloPostgresDialect } from './dialect.js';
|
||||
|
||||
const PG_OID_TYPE_MAP: Record<number, string> = {
|
||||
16: 'boolean',
|
||||
20: 'bigint',
|
||||
21: 'smallint',
|
||||
23: 'integer',
|
||||
25: 'text',
|
||||
700: 'real',
|
||||
701: 'double precision',
|
||||
1043: 'varchar',
|
||||
1082: 'date',
|
||||
1114: 'timestamp',
|
||||
1184: 'timestamptz',
|
||||
1700: 'numeric',
|
||||
2950: 'uuid',
|
||||
3802: 'jsonb',
|
||||
114: 'json',
|
||||
1009: 'text[]',
|
||||
1007: 'integer[]',
|
||||
1016: 'bigint[]',
|
||||
};
|
||||
|
||||
export interface KloPostgresConnectionConfig {
|
||||
driver?: string;
|
||||
host?: string;
|
||||
port?: number;
|
||||
database?: string;
|
||||
username?: string;
|
||||
user?: string;
|
||||
password?: string;
|
||||
url?: string;
|
||||
schema?: string;
|
||||
schemas?: string[];
|
||||
ssl?: boolean;
|
||||
rejectUnauthorized?: boolean;
|
||||
readonly?: boolean;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KloPostgresPoolConfig {
|
||||
host?: string;
|
||||
port?: number;
|
||||
database?: string;
|
||||
user?: string;
|
||||
password?: string;
|
||||
connectionString?: string;
|
||||
max: number;
|
||||
idleTimeoutMillis: number;
|
||||
connectionTimeoutMillis: number;
|
||||
options?: string;
|
||||
ssl?: { rejectUnauthorized: boolean };
|
||||
}
|
||||
|
||||
interface KloPostgresQueryResult {
|
||||
fields?: Array<{ name: string; dataTypeID: number }>;
|
||||
rows: Record<string, unknown>[];
|
||||
}
|
||||
|
||||
interface KloPostgresClient {
|
||||
query(sql: string, params?: unknown[]): Promise<KloPostgresQueryResult>;
|
||||
release(): void;
|
||||
}
|
||||
|
||||
interface KloPostgresPool {
|
||||
connect(): Promise<KloPostgresClient>;
|
||||
end(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface KloPostgresPoolFactory {
|
||||
createPool(config: KloPostgresPoolConfig): KloPostgresPool;
|
||||
}
|
||||
|
||||
interface KloPostgresResolvedEndpoint {
|
||||
host: string;
|
||||
port: number;
|
||||
close?: () => Promise<void>;
|
||||
}
|
||||
|
||||
export interface KloPostgresEndpointResolver {
|
||||
resolve(input: {
|
||||
host: string;
|
||||
port: number;
|
||||
connection: KloPostgresConnectionConfig;
|
||||
}): Promise<KloPostgresResolvedEndpoint>;
|
||||
}
|
||||
|
||||
export interface KloPostgresScanConnectorOptions {
|
||||
connectionId: string;
|
||||
connection: KloPostgresConnectionConfig | undefined;
|
||||
poolFactory?: KloPostgresPoolFactory;
|
||||
endpointResolver?: KloPostgresEndpointResolver;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export interface KloPostgresReadOnlyQueryInput extends KloReadOnlyQueryInput {
|
||||
params?: Record<string, unknown> | unknown[];
|
||||
}
|
||||
|
||||
export interface KloPostgresColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KloPostgresColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
export interface KloPostgresColumnStatisticsResult {
|
||||
cardinalityByColumn: Map<string, number>;
|
||||
}
|
||||
|
||||
export interface KloPostgresTableSampleResult extends KloTableSampleResult {
|
||||
headerTypes?: string[];
|
||||
}
|
||||
|
||||
type PostgresTableRef = Pick<KloTableRef, 'name'> & Partial<Pick<KloTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
interface PostgresTableRow {
|
||||
table_name: string;
|
||||
table_kind: string;
|
||||
row_count: unknown;
|
||||
table_comment: string | null;
|
||||
}
|
||||
|
||||
interface PostgresColumnRow {
|
||||
table_name: string;
|
||||
column_name: string;
|
||||
data_type: string;
|
||||
is_nullable: boolean;
|
||||
column_comment: string | null;
|
||||
}
|
||||
|
||||
interface PostgresPrimaryKeyRow {
|
||||
table_name: string;
|
||||
column_name: string;
|
||||
}
|
||||
|
||||
interface PostgresForeignKeyRow {
|
||||
table_name: string;
|
||||
column_name: string;
|
||||
foreign_table_schema: string | null;
|
||||
foreign_table_name: string;
|
||||
foreign_column_name: string;
|
||||
constraint_name: string | null;
|
||||
}
|
||||
|
||||
interface PostgresSchemaRow {
|
||||
schema_name: string;
|
||||
}
|
||||
|
||||
interface PostgresCountRow {
|
||||
count?: unknown;
|
||||
cardinality?: unknown;
|
||||
}
|
||||
|
||||
interface PostgresDistinctValueRow {
|
||||
val: unknown;
|
||||
}
|
||||
|
||||
interface PostgresStatsRow {
|
||||
column_name: string;
|
||||
estimated_cardinality: unknown;
|
||||
}
|
||||
|
||||
class DefaultPostgresPoolFactory implements KloPostgresPoolFactory {
|
||||
createPool(config: KloPostgresPoolConfig): KloPostgresPool {
|
||||
return new Pool(config);
|
||||
}
|
||||
}
|
||||
|
||||
function groupByTable<T extends { table_name: string }>(rows: T[]): Map<string, T[]> {
|
||||
const grouped = new Map<string, T[]>();
|
||||
for (const row of rows) {
|
||||
const tableRows = grouped.get(row.table_name) ?? [];
|
||||
tableRows.push(row);
|
||||
grouped.set(row.table_name, tableRows);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
function primaryKeyMap(rows: PostgresPrimaryKeyRow[]): Map<string, Set<string>> {
|
||||
const grouped = new Map<string, Set<string>>();
|
||||
for (const row of rows) {
|
||||
const columns = grouped.get(row.table_name) ?? new Set<string>();
|
||||
columns.add(row.column_name);
|
||||
grouped.set(row.table_name, columns);
|
||||
}
|
||||
return grouped;
|
||||
}
|
||||
|
||||
function queryRows(result: KloPostgresQueryResult): unknown[][] {
|
||||
const headers = (result.fields ?? []).map((field) => field.name);
|
||||
return result.rows.map((row) => headers.map((header) => row[header]));
|
||||
}
|
||||
|
||||
function finiteNumber(value: unknown): number | null {
|
||||
const parsed = Number(value);
|
||||
return Number.isFinite(parsed) ? parsed : null;
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KloPostgresConnectionConfig | undefined,
|
||||
key: keyof KloPostgresConnectionConfig,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
if (value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function numberValue(value: unknown): number | undefined {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
||||
function parsePostgresUrl(url: string): Partial<KloPostgresConnectionConfig> {
|
||||
const parsed = new URL(url);
|
||||
return {
|
||||
host: parsed.hostname,
|
||||
port: parsed.port ? Number(parsed.port) : undefined,
|
||||
database: parsed.pathname.replace(/^\/+/, '') || undefined,
|
||||
username: parsed.username ? decodeURIComponent(parsed.username) : undefined,
|
||||
password: parsed.password ? decodeURIComponent(parsed.password) : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
function schemasFromConnection(connection: KloPostgresConnectionConfig): string[] {
|
||||
if (Array.isArray(connection.schemas) && connection.schemas.length > 0) {
|
||||
return connection.schemas.filter((schema): schema is string => typeof schema === 'string' && schema.length > 0);
|
||||
}
|
||||
return typeof connection.schema === 'string' && connection.schema.length > 0 ? [connection.schema] : ['public'];
|
||||
}
|
||||
|
||||
function searchPathSchemasFromConnection(connection: KloPostgresConnectionConfig): string[] {
|
||||
const schemas = schemasFromConnection(connection);
|
||||
return schemas.includes('public') ? schemas : [...schemas, 'public'];
|
||||
}
|
||||
|
||||
export function isKloPostgresConnectionConfig(connection: KloPostgresConnectionConfig | undefined): boolean {
|
||||
const driver = String(connection?.driver ?? '').toLowerCase();
|
||||
return driver === 'postgres' || driver === 'postgresql';
|
||||
}
|
||||
|
||||
export function postgresPoolConfigFromConfig(input: {
|
||||
connectionId: string;
|
||||
connection: KloPostgresConnectionConfig | undefined;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): KloPostgresPoolConfig {
|
||||
if (!isKloPostgresConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native PostgreSQL connector cannot run driver "${input.connection?.driver ?? 'unknown'}"`);
|
||||
}
|
||||
if (input.connection?.readonly !== true) {
|
||||
throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.readonly: true`);
|
||||
}
|
||||
|
||||
const env = input.env ?? process.env;
|
||||
const referencedUrl = stringConfigValue(input.connection, 'url', env);
|
||||
const urlConfig = referencedUrl ? parsePostgresUrl(referencedUrl) : {};
|
||||
const merged: KloPostgresConnectionConfig = { ...urlConfig, ...input.connection };
|
||||
const host = stringConfigValue(merged, 'host', env);
|
||||
const database = stringConfigValue(merged, 'database', env);
|
||||
const user = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env);
|
||||
const password = stringConfigValue(merged, 'password', env);
|
||||
|
||||
if (!referencedUrl && !host) {
|
||||
throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.host or url`);
|
||||
}
|
||||
if (!database && !referencedUrl) {
|
||||
throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.database or url`);
|
||||
}
|
||||
if (!user && !referencedUrl) {
|
||||
throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.username, user, or url`);
|
||||
}
|
||||
|
||||
const config: KloPostgresPoolConfig = {
|
||||
max: 10,
|
||||
idleTimeoutMillis: 30_000,
|
||||
connectionTimeoutMillis: 10_000,
|
||||
...(referencedUrl
|
||||
? { connectionString: referencedUrl }
|
||||
: { host, port: numberValue(merged.port) ?? 5432, database, user, password }),
|
||||
};
|
||||
const searchPathSchemas = searchPathSchemasFromConnection(merged);
|
||||
if (searchPathSchemas.length > 0) {
|
||||
config.options = `-c search_path=${searchPathSchemas.join(',')}`;
|
||||
}
|
||||
if (merged.ssl) {
|
||||
config.ssl = { rejectUnauthorized: merged.rejectUnauthorized ?? true };
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
export class KloPostgresScanConnector implements KloScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'postgres' as const;
|
||||
readonly capabilities = createKloConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: true,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: true,
|
||||
formalForeignKeys: true,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly connectionId: string;
|
||||
private readonly connection: KloPostgresConnectionConfig;
|
||||
private readonly poolConfig: KloPostgresPoolConfig;
|
||||
private readonly poolFactory: KloPostgresPoolFactory;
|
||||
private readonly endpointResolver?: KloPostgresEndpointResolver;
|
||||
private readonly now: () => Date;
|
||||
private readonly dialect = new KloPostgresDialect();
|
||||
private pool: KloPostgresPool | null = null;
|
||||
private resolvedEndpoint: KloPostgresResolvedEndpoint | null = null;
|
||||
|
||||
constructor(options: KloPostgresScanConnectorOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.connection = options.connection ?? {};
|
||||
this.poolConfig = postgresPoolConfigFromConfig({
|
||||
connectionId: options.connectionId,
|
||||
connection: options.connection,
|
||||
env: options.env,
|
||||
});
|
||||
this.poolFactory = options.poolFactory ?? new DefaultPostgresPoolFactory();
|
||||
this.endpointResolver = options.endpointResolver;
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.id = `postgres:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
try {
|
||||
await this.query('SELECT 1');
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
async introspect(input: KloScanInput, _ctx: KloScanContext): Promise<KloSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const schemas = schemasFromConnection(this.connection);
|
||||
const allTables: KloSchemaTable[] = [];
|
||||
for (const schema of schemas) {
|
||||
const tables = await this.loadSchemaTables(schema);
|
||||
allTables.push(...tables);
|
||||
}
|
||||
return {
|
||||
connectionId: this.connectionId,
|
||||
driver: 'postgres',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: { schemas },
|
||||
metadata: {
|
||||
database: this.poolConfig.database ?? this.connection.database ?? null,
|
||||
schemas,
|
||||
host: this.poolConfig.host ?? this.connection.host ?? null,
|
||||
table_count: allTables.length,
|
||||
total_columns: allTables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables: allTables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KloTableSampleInput, _ctx: KloScanContext): Promise<KloPostgresTableSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
|
||||
return {
|
||||
headers: result.headers,
|
||||
headerTypes: result.headerTypes,
|
||||
rows: result.rows,
|
||||
totalRows: result.totalRows,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleColumn(input: KloColumnSampleInput, _ctx: KloScanContext): Promise<KloColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = await this.query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
|
||||
return { values, nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(input: KloColumnStatsInput, _ctx: KloScanContext): Promise<KloColumnStatsResult | null> {
|
||||
const stats = await this.getColumnStatistics(input.table);
|
||||
const value = stats?.cardinalityByColumn.get(input.column);
|
||||
return value === undefined
|
||||
? null
|
||||
: { min: null, max: null, average: null, nullCount: null, distinctCount: value };
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KloPostgresReadOnlyQueryInput, _ctx: KloScanContext): Promise<KloQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
|
||||
const prepared = Array.isArray(input.params)
|
||||
? { sql: limitedSql, params: input.params }
|
||||
: this.dialect.prepareQuery(limitedSql, input.params);
|
||||
const result = await this.query(prepared.sql, prepared.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KloTableRef,
|
||||
columnName: string,
|
||||
options: KloPostgresColumnDistinctValuesOptions,
|
||||
): Promise<KloPostgresColumnDistinctValuesResult | null> {
|
||||
const sampleSize = options.sampleSize ?? 10000;
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinalityRows = await this.queryRaw<PostgresCountRow>(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, sampleSize),
|
||||
);
|
||||
const cardinality = finiteNumber(cardinalityRows[0]?.cardinality);
|
||||
if (cardinality === null) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valuesRows = await this.queryRaw<PostgresDistinctValueRow>(
|
||||
this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit),
|
||||
);
|
||||
return {
|
||||
values: valuesRows.filter((row) => row.val !== null).map((row) => String(row.val)),
|
||||
cardinality,
|
||||
};
|
||||
}
|
||||
|
||||
async getColumnStatistics(table: KloTableRef): Promise<KloPostgresColumnStatisticsResult | null> {
|
||||
const schema = table.db ?? schemasFromConnection(this.connection)[0] ?? 'public';
|
||||
const sql = this.dialect.generateColumnStatisticsQuery(schema, table.name);
|
||||
if (!sql) {
|
||||
return null;
|
||||
}
|
||||
const rows = await this.queryRaw<PostgresStatsRow>(sql);
|
||||
const cardinalityByColumn = new Map<string, number>();
|
||||
for (const row of rows) {
|
||||
const cardinality = finiteNumber(row.estimated_cardinality);
|
||||
if (cardinality !== null) {
|
||||
cardinalityByColumn.set(row.column_name, cardinality);
|
||||
}
|
||||
}
|
||||
return cardinalityByColumn.size > 0 ? { cardinalityByColumn } : null;
|
||||
}
|
||||
|
||||
async getTableRowCount(table: string | PostgresTableRef): Promise<number> {
|
||||
const tableRef =
|
||||
typeof table === 'string'
|
||||
? { catalog: null, db: schemasFromConnection(this.connection)[0] ?? 'public', name: table }
|
||||
: table;
|
||||
const rows = await this.queryRaw<PostgresCountRow>(`SELECT COUNT(*) AS count FROM ${this.qTableName(tableRef)}`);
|
||||
return finiteNumber(rows[0]?.count) ?? 0;
|
||||
}
|
||||
|
||||
qTableName(table: PostgresTableRef): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
async listSchemas(): Promise<string[]> {
|
||||
const rows = await this.queryRaw<PostgresSchemaRow>(`
|
||||
SELECT schema_name
|
||||
FROM information_schema.schemata
|
||||
WHERE schema_name <> 'information_schema'
|
||||
AND schema_name NOT LIKE 'pg_%'
|
||||
ORDER BY schema_name
|
||||
`);
|
||||
return rows.map((row) => row.schema_name);
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
if (this.pool) {
|
||||
await this.pool.end();
|
||||
this.pool = null;
|
||||
}
|
||||
if (this.resolvedEndpoint?.close) {
|
||||
await this.resolvedEndpoint.close();
|
||||
this.resolvedEndpoint = null;
|
||||
}
|
||||
}
|
||||
|
||||
private async loadSchemaTables(schema: string): Promise<KloSchemaTable[]> {
|
||||
const tables = await this.queryRaw<PostgresTableRow>(
|
||||
`
|
||||
SELECT
|
||||
c.relname AS table_name,
|
||||
c.relkind AS table_kind,
|
||||
c.reltuples::bigint AS row_count,
|
||||
d.description AS table_comment
|
||||
FROM pg_catalog.pg_class c
|
||||
JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
|
||||
LEFT JOIN pg_catalog.pg_description d
|
||||
ON d.objoid = c.oid AND d.objsubid = 0
|
||||
WHERE n.nspname = $1
|
||||
AND c.relkind IN ('r', 'v')
|
||||
ORDER BY c.relname
|
||||
`,
|
||||
[schema],
|
||||
);
|
||||
const columns = await this.queryRaw<PostgresColumnRow>(
|
||||
`
|
||||
SELECT
|
||||
c.relname AS table_name,
|
||||
a.attname AS column_name,
|
||||
format_type(a.atttypid, a.atttypmod) AS data_type,
|
||||
NOT a.attnotnull AS is_nullable,
|
||||
d.description AS column_comment
|
||||
FROM pg_catalog.pg_attribute a
|
||||
JOIN pg_catalog.pg_class c ON a.attrelid = c.oid
|
||||
JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
|
||||
LEFT JOIN pg_catalog.pg_description d
|
||||
ON d.objoid = c.oid AND d.objsubid = a.attnum
|
||||
WHERE n.nspname = $1
|
||||
AND c.relkind IN ('r', 'v')
|
||||
AND a.attnum > 0
|
||||
AND NOT a.attisdropped
|
||||
ORDER BY c.relname, a.attnum
|
||||
`,
|
||||
[schema],
|
||||
);
|
||||
const primaryKeys = await this.queryRaw<PostgresPrimaryKeyRow>(
|
||||
`
|
||||
SELECT tc.table_name, kcu.column_name
|
||||
FROM information_schema.table_constraints tc
|
||||
JOIN information_schema.key_column_usage kcu
|
||||
ON tc.constraint_name = kcu.constraint_name
|
||||
AND tc.table_schema = kcu.table_schema
|
||||
WHERE tc.constraint_type = 'PRIMARY KEY'
|
||||
AND tc.table_schema = $1
|
||||
ORDER BY tc.table_name, kcu.ordinal_position
|
||||
`,
|
||||
[schema],
|
||||
);
|
||||
const foreignKeys = await this.queryRaw<PostgresForeignKeyRow>(
|
||||
`
|
||||
SELECT
|
||||
tc.table_name,
|
||||
kcu.column_name,
|
||||
ccu.table_schema AS foreign_table_schema,
|
||||
ccu.table_name AS foreign_table_name,
|
||||
ccu.column_name AS foreign_column_name,
|
||||
tc.constraint_name
|
||||
FROM information_schema.table_constraints AS tc
|
||||
JOIN information_schema.key_column_usage AS kcu
|
||||
ON tc.constraint_name = kcu.constraint_name
|
||||
AND tc.table_schema = kcu.table_schema
|
||||
JOIN information_schema.constraint_column_usage AS ccu
|
||||
ON ccu.constraint_name = tc.constraint_name
|
||||
AND ccu.table_schema = tc.table_schema
|
||||
WHERE tc.constraint_type = 'FOREIGN KEY'
|
||||
AND tc.table_schema = $1
|
||||
ORDER BY tc.table_name, kcu.column_name
|
||||
`,
|
||||
[schema],
|
||||
);
|
||||
|
||||
const columnsByTable = groupByTable(columns);
|
||||
const primaryKeysByTable = primaryKeyMap(primaryKeys);
|
||||
const foreignKeysByTable = groupByTable(foreignKeys);
|
||||
return tables.map((table) =>
|
||||
this.toSchemaTable(
|
||||
schema,
|
||||
table,
|
||||
columnsByTable.get(table.table_name) ?? [],
|
||||
primaryKeysByTable.get(table.table_name) ?? new Set<string>(),
|
||||
foreignKeysByTable.get(table.table_name) ?? [],
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
private toSchemaTable(
|
||||
schema: string,
|
||||
table: PostgresTableRow,
|
||||
columns: PostgresColumnRow[],
|
||||
primaryKeys: Set<string>,
|
||||
foreignKeys: PostgresForeignKeyRow[],
|
||||
): KloSchemaTable {
|
||||
const kind = table.table_kind === 'v' ? 'view' : 'table';
|
||||
return {
|
||||
catalog: null,
|
||||
db: schema,
|
||||
name: table.table_name,
|
||||
kind,
|
||||
comment: table.table_comment || null,
|
||||
estimatedRows: kind === 'view' ? null : finiteNumber(table.row_count),
|
||||
columns: columns.map((column) => this.toSchemaColumn(column, primaryKeys)),
|
||||
foreignKeys: foreignKeys.map((foreignKey) => this.toSchemaForeignKey(foreignKey)),
|
||||
};
|
||||
}
|
||||
|
||||
private toSchemaColumn(column: PostgresColumnRow, primaryKeys: Set<string>): KloSchemaColumn {
|
||||
return {
|
||||
name: column.column_name,
|
||||
nativeType: column.data_type,
|
||||
normalizedType: this.dialect.mapDataType(column.data_type),
|
||||
dimensionType: this.dialect.mapToDimensionType(column.data_type),
|
||||
nullable: column.is_nullable,
|
||||
primaryKey: primaryKeys.has(column.column_name),
|
||||
comment: column.column_comment || null,
|
||||
};
|
||||
}
|
||||
|
||||
private toSchemaForeignKey(row: PostgresForeignKeyRow): KloSchemaForeignKey {
|
||||
return {
|
||||
fromColumn: row.column_name,
|
||||
toCatalog: null,
|
||||
toDb: row.foreign_table_schema,
|
||||
toTable: row.foreign_table_name,
|
||||
toColumn: row.foreign_column_name,
|
||||
constraintName: row.constraint_name || null,
|
||||
};
|
||||
}
|
||||
|
||||
private async getPool(): Promise<KloPostgresPool> {
|
||||
if (!this.pool) {
|
||||
let config = { ...this.poolConfig };
|
||||
if (this.endpointResolver) {
|
||||
const endpoint = await this.endpointResolver.resolve({
|
||||
host: config.host ?? this.connection.host ?? 'localhost',
|
||||
port: config.port ?? numberValue(this.connection.port) ?? 5432,
|
||||
connection: this.connection,
|
||||
});
|
||||
this.resolvedEndpoint = endpoint;
|
||||
config = { ...config, host: endpoint.host, port: endpoint.port };
|
||||
}
|
||||
this.pool = this.poolFactory.createPool(config);
|
||||
}
|
||||
return this.pool;
|
||||
}
|
||||
|
||||
private async queryRaw<T>(sql: string, params?: unknown[]): Promise<T[]> {
|
||||
const pool = await this.getPool();
|
||||
const client = await pool.connect();
|
||||
try {
|
||||
const result = await client.query(sql, params);
|
||||
return result.rows as T[];
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
private async query(sql: string, params?: Record<string, unknown> | unknown[]): Promise<KloQueryResult> {
|
||||
const pool = await this.getPool();
|
||||
const client = await pool.connect();
|
||||
try {
|
||||
const result = await client.query(assertReadOnlySql(sql), Array.isArray(params) ? params : undefined);
|
||||
return {
|
||||
headers: (result.fields ?? []).map((field) => field.name),
|
||||
headerTypes: (result.fields ?? []).map((field) => PG_OID_TYPE_MAP[field.dataTypeID] ?? `oid:${field.dataTypeID}`),
|
||||
rows: queryRows(result),
|
||||
totalRows: result.rows.length,
|
||||
rowCount: result.rows.length,
|
||||
};
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`PostgreSQL connector ${this.connectionId} cannot run scan for ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
52
packages/connector-postgres/src/dialect.test.ts
Normal file
52
packages/connector-postgres/src/dialect.test.ts
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KloPostgresDialect } from './dialect.js';
|
||||
|
||||
describe('KloPostgresDialect', () => {
|
||||
const dialect = new KloPostgresDialect();
|
||||
|
||||
it('quotes identifiers and formats schema-qualified tables', () => {
|
||||
expect(dialect.quoteIdentifier('order"items')).toBe('"order""items"');
|
||||
expect(dialect.formatTableName({ catalog: null, db: 'public', name: 'orders' })).toBe('"public"."orders"');
|
||||
expect(dialect.formatTableName({ catalog: null, db: null, name: 'orders' })).toBe('"orders"');
|
||||
});
|
||||
|
||||
it('maps native PostgreSQL types to KLO dimension types', () => {
|
||||
expect(dialect.mapToDimensionType('timestamp with time zone')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('numeric(12,2)')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('uuid')).toBe('string');
|
||||
expect(dialect.mapToDimensionType('boolean')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('jsonb')).toBe('string');
|
||||
});
|
||||
|
||||
it('generates sample, distinct-value, statistics, and time SQL', () => {
|
||||
expect(dialect.generateSampleQuery('"public"."orders"', 5, ['id', 'status'])).toBe(
|
||||
'SELECT "id", "status" FROM "public"."orders" LIMIT 5',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('"public"."orders"', 'status', 10)).toContain(
|
||||
'TRIM(CAST("status" AS TEXT)) != \'\'',
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('"public"."orders"', '"status"', 20)).toContain(
|
||||
'SELECT DISTINCT "status"::text AS val',
|
||||
);
|
||||
expect(dialect.generateColumnStatisticsQuery('public', 'orders')).toContain('FROM pg_stats s');
|
||||
expect(dialect.getTimeTruncExpression('"created_at"', 'month')).toBe('DATE_TRUNC(\'month\', "created_at")');
|
||||
});
|
||||
|
||||
it('prepares named parameters with PostgreSQL positional parameters', () => {
|
||||
expect(
|
||||
dialect.prepareQuery('select * from orders where id = :id and status = :status', { id: 1, status: 'paid' }),
|
||||
).toEqual({
|
||||
sql: 'select * from orders where id = $1 and status = $2',
|
||||
params: [1, 'paid'],
|
||||
});
|
||||
expect(
|
||||
dialect.prepareQuery('select :Client_Name_10, :Client_Name_1', {
|
||||
Client_Name_1: 'short',
|
||||
Client_Name_10: 'long',
|
||||
}),
|
||||
).toEqual({
|
||||
sql: 'select $2, $1',
|
||||
params: ['short', 'long'],
|
||||
});
|
||||
});
|
||||
});
|
||||
213
packages/connector-postgres/src/dialect.ts
Normal file
213
packages/connector-postgres/src/dialect.ts
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
import type { KloSchemaDimensionType, KloTableRef } from '@klo/context/scan';
|
||||
|
||||
type PostgresTableNameRef = Pick<KloTableRef, 'name'> & Partial<Pick<KloTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KloPostgresDialect {
|
||||
readonly type = 'postgresql';
|
||||
|
||||
private readonly typeMappings: Record<string, KloSchemaDimensionType> = {
|
||||
timestamp: 'time',
|
||||
'timestamp without time zone': 'time',
|
||||
'timestamp with time zone': 'time',
|
||||
timestamptz: 'time',
|
||||
datetime: 'time',
|
||||
date: 'time',
|
||||
time: 'time',
|
||||
integer: 'number',
|
||||
int: 'number',
|
||||
int2: 'number',
|
||||
int4: 'number',
|
||||
int8: 'number',
|
||||
bigint: 'number',
|
||||
smallint: 'number',
|
||||
decimal: 'number',
|
||||
numeric: 'number',
|
||||
float: 'number',
|
||||
float4: 'number',
|
||||
float8: 'number',
|
||||
'double precision': 'number',
|
||||
real: 'number',
|
||||
money: 'number',
|
||||
text: 'string',
|
||||
varchar: 'string',
|
||||
'character varying': 'string',
|
||||
char: 'string',
|
||||
character: 'string',
|
||||
uuid: 'string',
|
||||
json: 'string',
|
||||
jsonb: 'string',
|
||||
boolean: 'boolean',
|
||||
bool: 'boolean',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `"${identifier.replace(/"/g, '""')}"`;
|
||||
}
|
||||
|
||||
formatTableName(table: PostgresTableNameRef): string {
|
||||
return table.db
|
||||
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
|
||||
: this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
return nativeType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KloSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
const lower = nativeType.toLowerCase().trim();
|
||||
const normalized = lower.includes('(') ? lower.split('(')[0]!.trim() : lower;
|
||||
if (this.typeMappings[normalized]) {
|
||||
return this.typeMappings[normalized];
|
||||
}
|
||||
if (normalized.includes('time') || normalized.includes('date')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalized.includes('int') ||
|
||||
normalized.includes('num') ||
|
||||
normalized.includes('dec') ||
|
||||
normalized.includes('float') ||
|
||||
normalized.includes('double')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalized.includes('bool')) {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT ${columnList} FROM ${tableName} LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quotedColumn = this.quoteIdentifier(columnName);
|
||||
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS TEXT)) != '' LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown[] } {
|
||||
if (!params) {
|
||||
return { sql, params: undefined };
|
||||
}
|
||||
const paramNames = Object.keys(params);
|
||||
const values: unknown[] = new Array(paramNames.length);
|
||||
const paramIndexMap = new Map<string, number>();
|
||||
paramNames.forEach((name, index) => {
|
||||
paramIndexMap.set(name, index + 1);
|
||||
values[index] = params[name];
|
||||
});
|
||||
const sortedKeys = [...paramNames].sort((a, b) => b.length - a.length);
|
||||
let parameterizedQuery = sql;
|
||||
for (const name of sortedKeys) {
|
||||
parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${name}\\b`, 'g'), `$${paramIndexMap.get(name)}`);
|
||||
}
|
||||
return { sql: parameterizedQuery, params: values };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `RANDOM() < ${samplePct}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `TABLESAMPLE SYSTEM (${samplePct * 100})`;
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `COUNT(*) FILTER (WHERE ${column} IS NULL)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `COUNT(DISTINCT ${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT DISTINCT ${columnName}::text AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY val
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(schemaName: string, tableName: string): string | null {
|
||||
return `
|
||||
SELECT
|
||||
s.attname AS column_name,
|
||||
CASE
|
||||
WHEN s.n_distinct > 0 THEN s.n_distinct::bigint
|
||||
WHEN s.n_distinct < 0 THEN (-s.n_distinct * c.reltuples)::bigint
|
||||
ELSE NULL
|
||||
END AS estimated_cardinality
|
||||
FROM pg_stats s
|
||||
JOIN pg_class c ON c.relname = s.tablename
|
||||
JOIN pg_namespace n ON c.relnamespace = n.oid AND n.nspname = s.schemaname
|
||||
WHERE s.schemaname = '${schemaName.replace(/'/g, "''")}'
|
||||
AND s.tablename = '${tableName.replace(/'/g, "''")}'
|
||||
AND s.n_distinct IS NOT NULL
|
||||
`;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY RANDOM()
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
timezone?: string,
|
||||
): string {
|
||||
const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column;
|
||||
return `DATE_TRUNC('${granularity}', ${col})`;
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
|
||||
const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column;
|
||||
const originExpr = origin ? `TIMESTAMP '${origin.replace(/'/g, "''")}'` : "TIMESTAMP '1970-01-01'";
|
||||
return `${originExpr} + FLOOR(EXTRACT(EPOCH FROM (${col} - ${originExpr})) / EXTRACT(EPOCH FROM INTERVAL '${interval.replace(/'/g, "''")}')) * INTERVAL '${interval.replace(/'/g, "''")}'`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
return `INTERVAL '${interval.replace(/'/g, "''")}'`;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { KloPostgresHistoricSqlQueryClient } from './historic-sql-query-client.js';
|
||||
import type { KloPostgresPoolConfig, KloPostgresPoolFactory } from './connector.js';
|
||||
|
||||
describe('KloPostgresHistoricSqlQueryClient', () => {
|
||||
it('executes parameterized read-only SQL through the native Postgres connector pool', async () => {
|
||||
const queryCalls: Array<{ sql: string; params?: unknown[] }> = [];
|
||||
const release = vi.fn();
|
||||
const end = vi.fn(async () => {});
|
||||
const poolFactory: KloPostgresPoolFactory = {
|
||||
createPool(_config: KloPostgresPoolConfig) {
|
||||
return {
|
||||
async connect() {
|
||||
return {
|
||||
async query(sql: string, params?: unknown[]) {
|
||||
queryCalls.push({ sql, params });
|
||||
return {
|
||||
fields: [{ name: 'answer', dataTypeID: 23 }],
|
||||
rows: [{ answer: 42 }],
|
||||
};
|
||||
},
|
||||
release,
|
||||
};
|
||||
},
|
||||
end,
|
||||
};
|
||||
},
|
||||
};
|
||||
const client = new KloPostgresHistoricSqlQueryClient({
|
||||
connectionId: 'warehouse',
|
||||
connection: {
|
||||
driver: 'postgres',
|
||||
readonly: true,
|
||||
url: 'postgresql://readonly:secret@pg.example.test/warehouse', // pragma: allowlist secret
|
||||
},
|
||||
poolFactory,
|
||||
});
|
||||
|
||||
await expect(client.executeQuery('SELECT $1::int AS answer', [42])).resolves.toEqual({
|
||||
headers: ['answer'],
|
||||
rows: [[42]],
|
||||
totalRows: 1,
|
||||
});
|
||||
expect(queryCalls).toEqual([{ sql: 'SELECT $1::int AS answer', params: [42] }]);
|
||||
|
||||
await client.cleanup();
|
||||
expect(release).toHaveBeenCalledTimes(1);
|
||||
expect(end).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
37
packages/connector-postgres/src/historic-sql-query-client.ts
Normal file
37
packages/connector-postgres/src/historic-sql-query-client.ts
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
import type { KloPostgresQueryClient } from '@klo/context/ingest';
|
||||
import { KloPostgresScanConnector, type KloPostgresScanConnectorOptions } from './connector.js';
|
||||
|
||||
export type KloPostgresHistoricSqlQueryClientOptions = KloPostgresScanConnectorOptions;
|
||||
|
||||
export class KloPostgresHistoricSqlQueryClient implements KloPostgresQueryClient {
|
||||
private readonly connectionId: string;
|
||||
private readonly connector: KloPostgresScanConnector;
|
||||
|
||||
constructor(options: KloPostgresHistoricSqlQueryClientOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.connector = new KloPostgresScanConnector(options);
|
||||
}
|
||||
|
||||
async executeQuery(
|
||||
sql: string,
|
||||
params?: unknown[],
|
||||
): Promise<{ headers: string[]; rows: unknown[][]; totalRows: number }> {
|
||||
const result = await this.connector.executeReadOnly(
|
||||
{
|
||||
connectionId: this.connectionId,
|
||||
sql,
|
||||
params,
|
||||
},
|
||||
{} as never,
|
||||
);
|
||||
return {
|
||||
headers: result.headers,
|
||||
rows: result.rows,
|
||||
totalRows: result.totalRows,
|
||||
};
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
await this.connector.cleanup();
|
||||
}
|
||||
}
|
||||
21
packages/connector-postgres/src/index.ts
Normal file
21
packages/connector-postgres/src/index.ts
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
export { KloPostgresDialect } from './dialect.js';
|
||||
export {
|
||||
isKloPostgresConnectionConfig,
|
||||
KloPostgresScanConnector,
|
||||
postgresPoolConfigFromConfig,
|
||||
type KloPostgresColumnDistinctValuesOptions,
|
||||
type KloPostgresColumnDistinctValuesResult,
|
||||
type KloPostgresColumnStatisticsResult,
|
||||
type KloPostgresConnectionConfig,
|
||||
type KloPostgresEndpointResolver,
|
||||
type KloPostgresPoolConfig,
|
||||
type KloPostgresPoolFactory,
|
||||
type KloPostgresReadOnlyQueryInput,
|
||||
type KloPostgresScanConnectorOptions,
|
||||
type KloPostgresTableSampleResult,
|
||||
} from './connector.js';
|
||||
export {
|
||||
KloPostgresHistoricSqlQueryClient,
|
||||
type KloPostgresHistoricSqlQueryClientOptions,
|
||||
} from './historic-sql-query-client.js';
|
||||
export { createPostgresLiveDatabaseIntrospection } from './live-database-introspection.js';
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '@klo/context/ingest';
|
||||
import type { KloProjectConnectionConfig } from '@klo/context/project';
|
||||
import {
|
||||
KloPostgresScanConnector,
|
||||
type KloPostgresConnectionConfig,
|
||||
type KloPostgresEndpointResolver,
|
||||
type KloPostgresPoolFactory,
|
||||
} from './connector.js';
|
||||
|
||||
interface CreatePostgresLiveDatabaseIntrospectionOptions {
|
||||
connections: Record<string, KloProjectConnectionConfig>;
|
||||
poolFactory?: KloPostgresPoolFactory;
|
||||
endpointResolver?: KloPostgresEndpointResolver;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createPostgresLiveDatabaseIntrospection(
|
||||
options: CreatePostgresLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KloPostgresConnectionConfig | undefined;
|
||||
const connector = new KloPostgresScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
poolFactory: options.poolFactory,
|
||||
endpointResolver: options.endpointResolver,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect({ connectionId, driver: 'postgres' }, { runId: `postgres-${connectionId}` });
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
13
packages/connector-postgres/src/package-exports.test.ts
Normal file
13
packages/connector-postgres/src/package-exports.test.ts
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
describe('@klo/connector-postgres package exports', () => {
|
||||
it('exports the connector, dialect, and live-database adapter', async () => {
|
||||
const connector = await import('./index.js');
|
||||
expect(connector.KloPostgresDialect).toBeTypeOf('function');
|
||||
expect(connector.KloPostgresScanConnector).toBeTypeOf('function');
|
||||
expect(connector.KloPostgresHistoricSqlQueryClient).toBeTypeOf('function');
|
||||
expect(connector.createPostgresLiveDatabaseIntrospection).toBeTypeOf('function');
|
||||
expect(connector.isKloPostgresConnectionConfig).toBeTypeOf('function');
|
||||
expect(connector.postgresPoolConfigFromConfig).toBeTypeOf('function');
|
||||
});
|
||||
});
|
||||
9
packages/connector-postgres/tsconfig.json
Normal file
9
packages/connector-postgres/tsconfig.json
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
{
|
||||
"extends": "../../tsconfig.base.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src"
|
||||
},
|
||||
"include": ["src/**/*.ts"],
|
||||
"exclude": ["dist", "node_modules"]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue