Initial open-source release

This commit is contained in:
Andrey Avtomonov 2026-05-10 23:12:26 +02:00
commit 1a42152e6f
1199 changed files with 257054 additions and 0 deletions

View file

@ -0,0 +1,342 @@
import { describe, expect, it, vi } from 'vitest';
import {
createPostgresLiveDatabaseIntrospection,
isKloPostgresConnectionConfig,
KloPostgresScanConnector,
postgresPoolConfigFromConfig,
type KloPostgresPoolFactory,
} from './index.js';
interface FakeQueryResult {
rows: Record<string, unknown>[];
fields?: Array<{ name: string; dataTypeID: number }>;
}
function fakePoolFactory(results: Map<string, FakeQueryResult>): KloPostgresPoolFactory {
const query = vi.fn(async (sql: string, params?: unknown[]) => {
const normalized = sql.replace(/\s+/g, ' ').trim();
for (const [key, value] of results.entries()) {
if (normalized.includes(key)) {
return value;
}
}
throw new Error(`Unexpected SQL: ${normalized} params=${JSON.stringify(params ?? [])}`);
});
return {
createPool() {
return {
async connect() {
return {
query,
release: vi.fn(),
};
},
end: vi.fn(async () => undefined),
};
},
};
}
function metadataResults(): Map<string, FakeQueryResult> {
return new Map<string, FakeQueryResult>([
[
'FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n',
{
rows: [
{ table_name: 'customers', table_kind: 'r', row_count: '2', table_comment: 'Customers' },
{ table_name: 'orders', table_kind: 'r', row_count: '3', table_comment: null },
{ table_name: 'recent_orders', table_kind: 'v', row_count: '0', table_comment: 'Recent orders' },
],
},
],
[
'FROM pg_catalog.pg_attribute a JOIN pg_catalog.pg_class c',
{
rows: [
{ table_name: 'customers', column_name: 'id', data_type: 'integer', is_nullable: false, column_comment: null },
{ table_name: 'customers', column_name: 'name', data_type: 'text', is_nullable: false, column_comment: 'Name' },
{ table_name: 'orders', column_name: 'id', data_type: 'integer', is_nullable: false, column_comment: null },
{ table_name: 'orders', column_name: 'customer_id', data_type: 'integer', is_nullable: false, column_comment: null },
{ table_name: 'orders', column_name: 'status', data_type: 'text', is_nullable: true, column_comment: null },
{ table_name: 'recent_orders', column_name: 'id', data_type: 'integer', is_nullable: true, column_comment: null },
],
},
],
[
"tc.constraint_type = 'FOREIGN KEY'",
{
rows: [
{
table_name: 'orders',
column_name: 'customer_id',
foreign_table_schema: 'public',
foreign_table_name: 'customers',
foreign_column_name: 'id',
constraint_name: 'orders_customer_id_fkey',
},
],
},
],
[
"tc.constraint_type = 'PRIMARY KEY'",
{
rows: [
{ table_name: 'customers', column_name: 'id' },
{ table_name: 'orders', column_name: 'id' },
],
},
],
['SELECT "id" FROM "public"."orders" LIMIT 1', { rows: [{ id: 10 }], fields: [{ name: 'id', dataTypeID: 23 }] }],
[
'SELECT "status" FROM "public"."orders" WHERE "status" IS NOT NULL',
{ rows: [{ status: 'paid' }, { status: 'open' }], fields: [{ name: 'status', dataTypeID: 25 }] },
],
['COUNT(DISTINCT val) AS cardinality', { rows: [{ cardinality: '2' }] }],
['SELECT DISTINCT "status"::text AS val', { rows: [{ val: 'open' }, { val: 'paid' }] }],
['SELECT COUNT(*) AS count FROM "public"."orders"', { rows: [{ count: '3' }] }],
['FROM pg_stats s', { rows: [{ column_name: 'status', estimated_cardinality: '2' }] }],
['SELECT 1', { rows: [{ '?column?': 1 }], fields: [{ name: '?column?', dataTypeID: 23 }] }],
['SELECT schema_name FROM information_schema.schemata', { rows: [{ schema_name: 'public' }] }],
]);
}
describe('KloPostgresScanConnector', () => {
it('resolves configuration safely', () => {
expect(isKloPostgresConnectionConfig({ driver: 'postgres', url: 'env:DATABASE_URL', readonly: true })).toBe(true);
expect(isKloPostgresConnectionConfig({ driver: 'postgresql', host: 'db', database: 'analytics' })).toBe(true);
expect(isKloPostgresConnectionConfig({ driver: 'mysql', host: 'db' })).toBe(false);
expect(
postgresPoolConfigFromConfig({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schemas: ['analytics', 'public'],
readonly: true,
ssl: true,
rejectUnauthorized: false,
},
}),
).toMatchObject({
host: 'db.example.test',
port: 5432,
database: 'analytics',
user: 'reader',
password: 'test-password', // pragma: allowlist secret
options: '-c search_path=analytics,public',
ssl: { rejectUnauthorized: false },
});
expect(() =>
postgresPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { driver: 'postgres', host: 'db.example.test', database: 'analytics', username: 'reader' },
}),
).toThrow('Native PostgreSQL connector requires connections.warehouse.readonly: true');
});
it('introspects schemas, tables, views, primary keys, comments, row counts, and foreign keys', async () => {
const connector = new KloPostgresScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
readonly: true,
},
poolFactory: fakePoolFactory(metadataResults()),
now: () => new Date('2026-04-29T10:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'postgres' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'postgres',
extractedAt: '2026-04-29T10:00:00.000Z',
scope: { schemas: ['public'] },
metadata: {
database: 'analytics',
schemas: ['public'],
host: 'db.example.test',
table_count: 3,
total_columns: 6,
},
});
expect(snapshot.tables.map((table) => [table.db, table.name, table.kind, table.estimatedRows])).toEqual([
['public', 'customers', 'table', 2],
['public', 'orders', 'table', 3],
['public', 'recent_orders', 'view', null],
]);
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
});
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
{
fromColumn: 'customer_id',
toCatalog: null,
toDb: 'public',
toTable: 'customers',
toColumn: 'id',
constraintName: 'orders_customer_id_fkey',
},
]);
});
it('runs samples, distinct values, statistics, read-only SQL, and schema listing', async () => {
const connector = new KloPostgresScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
readonly: true,
},
poolFactory: fakePoolFactory(metadataResults()),
});
await expect(
connector.sampleTable(
{ connectionId: 'warehouse', table: { catalog: null, db: 'public', name: 'orders' }, columns: ['id'], limit: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toEqual({ headers: ['id'], headerTypes: ['integer'], rows: [[10]], totalRows: 1 });
await expect(
connector.sampleColumn(
{ connectionId: 'warehouse', table: { catalog: null, db: 'public', name: 'orders' }, column: 'status', limit: 5 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
await expect(
connector.getColumnDistinctValues(
{ catalog: null, db: 'public', name: 'orders' },
'status',
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
),
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
await expect(connector.getColumnStatistics({ catalog: null, db: 'public', name: 'orders' })).resolves.toEqual({
cardinalityByColumn: new Map([['status', 2]]),
});
await expect(connector.getTableRowCount({ db: 'public', name: 'orders' })).resolves.toBe(3);
await expect(connector.listSchemas()).resolves.toEqual(['public']);
await expect(connector.testConnection()).resolves.toEqual({ success: true });
await expect(
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
});
it('adapts native PostgreSQL snapshots to live-database introspection for local ingest', async () => {
const introspection = createPostgresLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
readonly: true,
},
},
poolFactory: fakePoolFactory(metadataResults()),
now: () => new Date('2026-04-29T10:00:00.000Z'),
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
extractedAt: '2026-04-29T10:00:00.000Z',
});
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
name: 'customers',
catalog: null,
db: 'public',
columns: [
{
name: 'id',
nativeType: 'integer',
normalizedType: 'integer',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
{
name: 'name',
nativeType: 'text',
normalizedType: 'text',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: 'Name',
},
],
foreignKeys: [],
});
});
it('does not end the pool before introspection completes', async () => {
let endCalled = false;
const endAwarePoolFactory: KloPostgresPoolFactory = {
createPool() {
const inner = fakePoolFactory(metadataResults()).createPool({
max: 1,
idleTimeoutMillis: 1,
connectionTimeoutMillis: 1,
});
return {
async connect() {
if (endCalled) {
throw new Error('Cannot use a pool after calling end on the pool');
}
return inner.connect();
},
async end() {
endCalled = true;
return inner.end();
},
};
},
};
const introspection = createPostgresLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'postgres',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
password: 'test-password', // pragma: allowlist secret
schema: 'public',
readonly: true,
},
},
poolFactory: endAwarePoolFactory,
now: () => new Date('2026-04-29T10:00:00.000Z'),
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot.tables.length).toBeGreaterThan(0);
expect(endCalled).toBe(true);
});
});

View file

@ -0,0 +1,707 @@
import { readFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import { assertReadOnlySql, limitSqlForExecution } from '@klo/context/connections';
import {
createKloConnectorCapabilities,
type KloColumnSampleInput,
type KloColumnSampleResult,
type KloColumnStatsInput,
type KloColumnStatsResult,
type KloQueryResult,
type KloReadOnlyQueryInput,
type KloScanConnector,
type KloScanContext,
type KloScanInput,
type KloSchemaColumn,
type KloSchemaForeignKey,
type KloSchemaSnapshot,
type KloSchemaTable,
type KloTableRef,
type KloTableSampleInput,
type KloTableSampleResult,
} from '@klo/context/scan';
import { Pool } from 'pg';
import { KloPostgresDialect } from './dialect.js';
const PG_OID_TYPE_MAP: Record<number, string> = {
16: 'boolean',
20: 'bigint',
21: 'smallint',
23: 'integer',
25: 'text',
700: 'real',
701: 'double precision',
1043: 'varchar',
1082: 'date',
1114: 'timestamp',
1184: 'timestamptz',
1700: 'numeric',
2950: 'uuid',
3802: 'jsonb',
114: 'json',
1009: 'text[]',
1007: 'integer[]',
1016: 'bigint[]',
};
export interface KloPostgresConnectionConfig {
driver?: string;
host?: string;
port?: number;
database?: string;
username?: string;
user?: string;
password?: string;
url?: string;
schema?: string;
schemas?: string[];
ssl?: boolean;
rejectUnauthorized?: boolean;
readonly?: boolean;
[key: string]: unknown;
}
export interface KloPostgresPoolConfig {
host?: string;
port?: number;
database?: string;
user?: string;
password?: string;
connectionString?: string;
max: number;
idleTimeoutMillis: number;
connectionTimeoutMillis: number;
options?: string;
ssl?: { rejectUnauthorized: boolean };
}
interface KloPostgresQueryResult {
fields?: Array<{ name: string; dataTypeID: number }>;
rows: Record<string, unknown>[];
}
interface KloPostgresClient {
query(sql: string, params?: unknown[]): Promise<KloPostgresQueryResult>;
release(): void;
}
interface KloPostgresPool {
connect(): Promise<KloPostgresClient>;
end(): Promise<void>;
}
export interface KloPostgresPoolFactory {
createPool(config: KloPostgresPoolConfig): KloPostgresPool;
}
interface KloPostgresResolvedEndpoint {
host: string;
port: number;
close?: () => Promise<void>;
}
export interface KloPostgresEndpointResolver {
resolve(input: {
host: string;
port: number;
connection: KloPostgresConnectionConfig;
}): Promise<KloPostgresResolvedEndpoint>;
}
export interface KloPostgresScanConnectorOptions {
connectionId: string;
connection: KloPostgresConnectionConfig | undefined;
poolFactory?: KloPostgresPoolFactory;
endpointResolver?: KloPostgresEndpointResolver;
env?: NodeJS.ProcessEnv;
now?: () => Date;
}
export interface KloPostgresReadOnlyQueryInput extends KloReadOnlyQueryInput {
params?: Record<string, unknown> | unknown[];
}
export interface KloPostgresColumnDistinctValuesOptions {
maxCardinality: number;
limit: number;
sampleSize?: number;
}
export interface KloPostgresColumnDistinctValuesResult {
values: string[] | null;
cardinality: number;
}
export interface KloPostgresColumnStatisticsResult {
cardinalityByColumn: Map<string, number>;
}
export interface KloPostgresTableSampleResult extends KloTableSampleResult {
headerTypes?: string[];
}
type PostgresTableRef = Pick<KloTableRef, 'name'> & Partial<Pick<KloTableRef, 'catalog' | 'db'>>;
interface PostgresTableRow {
table_name: string;
table_kind: string;
row_count: unknown;
table_comment: string | null;
}
interface PostgresColumnRow {
table_name: string;
column_name: string;
data_type: string;
is_nullable: boolean;
column_comment: string | null;
}
interface PostgresPrimaryKeyRow {
table_name: string;
column_name: string;
}
interface PostgresForeignKeyRow {
table_name: string;
column_name: string;
foreign_table_schema: string | null;
foreign_table_name: string;
foreign_column_name: string;
constraint_name: string | null;
}
interface PostgresSchemaRow {
schema_name: string;
}
interface PostgresCountRow {
count?: unknown;
cardinality?: unknown;
}
interface PostgresDistinctValueRow {
val: unknown;
}
interface PostgresStatsRow {
column_name: string;
estimated_cardinality: unknown;
}
class DefaultPostgresPoolFactory implements KloPostgresPoolFactory {
createPool(config: KloPostgresPoolConfig): KloPostgresPool {
return new Pool(config);
}
}
function groupByTable<T extends { table_name: string }>(rows: T[]): Map<string, T[]> {
const grouped = new Map<string, T[]>();
for (const row of rows) {
const tableRows = grouped.get(row.table_name) ?? [];
tableRows.push(row);
grouped.set(row.table_name, tableRows);
}
return grouped;
}
function primaryKeyMap(rows: PostgresPrimaryKeyRow[]): Map<string, Set<string>> {
const grouped = new Map<string, Set<string>>();
for (const row of rows) {
const columns = grouped.get(row.table_name) ?? new Set<string>();
columns.add(row.column_name);
grouped.set(row.table_name, columns);
}
return grouped;
}
function queryRows(result: KloPostgresQueryResult): unknown[][] {
const headers = (result.fields ?? []).map((field) => field.name);
return result.rows.map((row) => headers.map((header) => row[header]));
}
function finiteNumber(value: unknown): number | null {
const parsed = Number(value);
return Number.isFinite(parsed) ? parsed : null;
}
function stringConfigValue(
connection: KloPostgresConnectionConfig | undefined,
key: keyof KloPostgresConnectionConfig,
env: NodeJS.ProcessEnv,
): string | undefined {
const value = connection?.[key];
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
}
function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
if (value.startsWith('env:')) {
return env[value.slice('env:'.length)] ?? '';
}
if (value.startsWith('file:')) {
const rawPath = value.slice('file:'.length);
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
return readFileSync(path, 'utf-8').trim();
}
return value;
}
function numberValue(value: unknown): number | undefined {
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
}
function parsePostgresUrl(url: string): Partial<KloPostgresConnectionConfig> {
const parsed = new URL(url);
return {
host: parsed.hostname,
port: parsed.port ? Number(parsed.port) : undefined,
database: parsed.pathname.replace(/^\/+/, '') || undefined,
username: parsed.username ? decodeURIComponent(parsed.username) : undefined,
password: parsed.password ? decodeURIComponent(parsed.password) : undefined,
};
}
function schemasFromConnection(connection: KloPostgresConnectionConfig): string[] {
if (Array.isArray(connection.schemas) && connection.schemas.length > 0) {
return connection.schemas.filter((schema): schema is string => typeof schema === 'string' && schema.length > 0);
}
return typeof connection.schema === 'string' && connection.schema.length > 0 ? [connection.schema] : ['public'];
}
function searchPathSchemasFromConnection(connection: KloPostgresConnectionConfig): string[] {
const schemas = schemasFromConnection(connection);
return schemas.includes('public') ? schemas : [...schemas, 'public'];
}
export function isKloPostgresConnectionConfig(connection: KloPostgresConnectionConfig | undefined): boolean {
const driver = String(connection?.driver ?? '').toLowerCase();
return driver === 'postgres' || driver === 'postgresql';
}
export function postgresPoolConfigFromConfig(input: {
connectionId: string;
connection: KloPostgresConnectionConfig | undefined;
env?: NodeJS.ProcessEnv;
}): KloPostgresPoolConfig {
if (!isKloPostgresConnectionConfig(input.connection)) {
throw new Error(`Native PostgreSQL connector cannot run driver "${input.connection?.driver ?? 'unknown'}"`);
}
if (input.connection?.readonly !== true) {
throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.readonly: true`);
}
const env = input.env ?? process.env;
const referencedUrl = stringConfigValue(input.connection, 'url', env);
const urlConfig = referencedUrl ? parsePostgresUrl(referencedUrl) : {};
const merged: KloPostgresConnectionConfig = { ...urlConfig, ...input.connection };
const host = stringConfigValue(merged, 'host', env);
const database = stringConfigValue(merged, 'database', env);
const user = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env);
const password = stringConfigValue(merged, 'password', env);
if (!referencedUrl && !host) {
throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.host or url`);
}
if (!database && !referencedUrl) {
throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.database or url`);
}
if (!user && !referencedUrl) {
throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.username, user, or url`);
}
const config: KloPostgresPoolConfig = {
max: 10,
idleTimeoutMillis: 30_000,
connectionTimeoutMillis: 10_000,
...(referencedUrl
? { connectionString: referencedUrl }
: { host, port: numberValue(merged.port) ?? 5432, database, user, password }),
};
const searchPathSchemas = searchPathSchemasFromConnection(merged);
if (searchPathSchemas.length > 0) {
config.options = `-c search_path=${searchPathSchemas.join(',')}`;
}
if (merged.ssl) {
config.ssl = { rejectUnauthorized: merged.rejectUnauthorized ?? true };
}
return config;
}
export class KloPostgresScanConnector implements KloScanConnector {
readonly id: string;
readonly driver = 'postgres' as const;
readonly capabilities = createKloConnectorCapabilities({
tableSampling: true,
columnSampling: true,
columnStats: true,
readOnlySql: true,
nestedAnalysis: true,
formalForeignKeys: true,
estimatedRowCounts: true,
});
private readonly connectionId: string;
private readonly connection: KloPostgresConnectionConfig;
private readonly poolConfig: KloPostgresPoolConfig;
private readonly poolFactory: KloPostgresPoolFactory;
private readonly endpointResolver?: KloPostgresEndpointResolver;
private readonly now: () => Date;
private readonly dialect = new KloPostgresDialect();
private pool: KloPostgresPool | null = null;
private resolvedEndpoint: KloPostgresResolvedEndpoint | null = null;
constructor(options: KloPostgresScanConnectorOptions) {
this.connectionId = options.connectionId;
this.connection = options.connection ?? {};
this.poolConfig = postgresPoolConfigFromConfig({
connectionId: options.connectionId,
connection: options.connection,
env: options.env,
});
this.poolFactory = options.poolFactory ?? new DefaultPostgresPoolFactory();
this.endpointResolver = options.endpointResolver;
this.now = options.now ?? (() => new Date());
this.id = `postgres:${options.connectionId}`;
}
async testConnection(): Promise<{ success: boolean; error?: string }> {
try {
await this.query('SELECT 1');
return { success: true };
} catch (error) {
return { success: false, error: error instanceof Error ? error.message : String(error) };
}
}
async introspect(input: KloScanInput, _ctx: KloScanContext): Promise<KloSchemaSnapshot> {
this.assertConnection(input.connectionId);
const schemas = schemasFromConnection(this.connection);
const allTables: KloSchemaTable[] = [];
for (const schema of schemas) {
const tables = await this.loadSchemaTables(schema);
allTables.push(...tables);
}
return {
connectionId: this.connectionId,
driver: 'postgres',
extractedAt: this.now().toISOString(),
scope: { schemas },
metadata: {
database: this.poolConfig.database ?? this.connection.database ?? null,
schemas,
host: this.poolConfig.host ?? this.connection.host ?? null,
table_count: allTables.length,
total_columns: allTables.reduce((sum, table) => sum + table.columns.length, 0),
},
tables: allTables,
};
}
async sampleTable(input: KloTableSampleInput, _ctx: KloScanContext): Promise<KloPostgresTableSampleResult> {
this.assertConnection(input.connectionId);
const result = await this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
return {
headers: result.headers,
headerTypes: result.headerTypes,
rows: result.rows,
totalRows: result.totalRows,
};
}
async sampleColumn(input: KloColumnSampleInput, _ctx: KloScanContext): Promise<KloColumnSampleResult> {
this.assertConnection(input.connectionId);
const result = await this.query(
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
);
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
return { values, nullCount: null, distinctCount: null };
}
async columnStats(input: KloColumnStatsInput, _ctx: KloScanContext): Promise<KloColumnStatsResult | null> {
const stats = await this.getColumnStatistics(input.table);
const value = stats?.cardinalityByColumn.get(input.column);
return value === undefined
? null
: { min: null, max: null, average: null, nullCount: null, distinctCount: value };
}
async executeReadOnly(input: KloPostgresReadOnlyQueryInput, _ctx: KloScanContext): Promise<KloQueryResult> {
this.assertConnection(input.connectionId);
const limitedSql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows);
const prepared = Array.isArray(input.params)
? { sql: limitedSql, params: input.params }
: this.dialect.prepareQuery(limitedSql, input.params);
const result = await this.query(prepared.sql, prepared.params);
return { ...result, rowCount: result.rows.length };
}
async getColumnDistinctValues(
table: KloTableRef,
columnName: string,
options: KloPostgresColumnDistinctValuesOptions,
): Promise<KloPostgresColumnDistinctValuesResult | null> {
const sampleSize = options.sampleSize ?? 10000;
const tableName = this.qTableName(table);
const quotedColumn = this.dialect.quoteIdentifier(columnName);
const cardinalityRows = await this.queryRaw<PostgresCountRow>(
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, sampleSize),
);
const cardinality = finiteNumber(cardinalityRows[0]?.cardinality);
if (cardinality === null) {
return null;
}
if (cardinality === 0) {
return { values: [], cardinality: 0 };
}
if (cardinality > options.maxCardinality) {
return { values: null, cardinality };
}
const valuesRows = await this.queryRaw<PostgresDistinctValueRow>(
this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit),
);
return {
values: valuesRows.filter((row) => row.val !== null).map((row) => String(row.val)),
cardinality,
};
}
async getColumnStatistics(table: KloTableRef): Promise<KloPostgresColumnStatisticsResult | null> {
const schema = table.db ?? schemasFromConnection(this.connection)[0] ?? 'public';
const sql = this.dialect.generateColumnStatisticsQuery(schema, table.name);
if (!sql) {
return null;
}
const rows = await this.queryRaw<PostgresStatsRow>(sql);
const cardinalityByColumn = new Map<string, number>();
for (const row of rows) {
const cardinality = finiteNumber(row.estimated_cardinality);
if (cardinality !== null) {
cardinalityByColumn.set(row.column_name, cardinality);
}
}
return cardinalityByColumn.size > 0 ? { cardinalityByColumn } : null;
}
async getTableRowCount(table: string | PostgresTableRef): Promise<number> {
const tableRef =
typeof table === 'string'
? { catalog: null, db: schemasFromConnection(this.connection)[0] ?? 'public', name: table }
: table;
const rows = await this.queryRaw<PostgresCountRow>(`SELECT COUNT(*) AS count FROM ${this.qTableName(tableRef)}`);
return finiteNumber(rows[0]?.count) ?? 0;
}
qTableName(table: PostgresTableRef): string {
return this.dialect.formatTableName(table);
}
quoteIdentifier(identifier: string): string {
return this.dialect.quoteIdentifier(identifier);
}
async listSchemas(): Promise<string[]> {
const rows = await this.queryRaw<PostgresSchemaRow>(`
SELECT schema_name
FROM information_schema.schemata
WHERE schema_name <> 'information_schema'
AND schema_name NOT LIKE 'pg_%'
ORDER BY schema_name
`);
return rows.map((row) => row.schema_name);
}
async cleanup(): Promise<void> {
if (this.pool) {
await this.pool.end();
this.pool = null;
}
if (this.resolvedEndpoint?.close) {
await this.resolvedEndpoint.close();
this.resolvedEndpoint = null;
}
}
private async loadSchemaTables(schema: string): Promise<KloSchemaTable[]> {
const tables = await this.queryRaw<PostgresTableRow>(
`
SELECT
c.relname AS table_name,
c.relkind AS table_kind,
c.reltuples::bigint AS row_count,
d.description AS table_comment
FROM pg_catalog.pg_class c
JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
LEFT JOIN pg_catalog.pg_description d
ON d.objoid = c.oid AND d.objsubid = 0
WHERE n.nspname = $1
AND c.relkind IN ('r', 'v')
ORDER BY c.relname
`,
[schema],
);
const columns = await this.queryRaw<PostgresColumnRow>(
`
SELECT
c.relname AS table_name,
a.attname AS column_name,
format_type(a.atttypid, a.atttypmod) AS data_type,
NOT a.attnotnull AS is_nullable,
d.description AS column_comment
FROM pg_catalog.pg_attribute a
JOIN pg_catalog.pg_class c ON a.attrelid = c.oid
JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
LEFT JOIN pg_catalog.pg_description d
ON d.objoid = c.oid AND d.objsubid = a.attnum
WHERE n.nspname = $1
AND c.relkind IN ('r', 'v')
AND a.attnum > 0
AND NOT a.attisdropped
ORDER BY c.relname, a.attnum
`,
[schema],
);
const primaryKeys = await this.queryRaw<PostgresPrimaryKeyRow>(
`
SELECT tc.table_name, kcu.column_name
FROM information_schema.table_constraints tc
JOIN information_schema.key_column_usage kcu
ON tc.constraint_name = kcu.constraint_name
AND tc.table_schema = kcu.table_schema
WHERE tc.constraint_type = 'PRIMARY KEY'
AND tc.table_schema = $1
ORDER BY tc.table_name, kcu.ordinal_position
`,
[schema],
);
const foreignKeys = await this.queryRaw<PostgresForeignKeyRow>(
`
SELECT
tc.table_name,
kcu.column_name,
ccu.table_schema AS foreign_table_schema,
ccu.table_name AS foreign_table_name,
ccu.column_name AS foreign_column_name,
tc.constraint_name
FROM information_schema.table_constraints AS tc
JOIN information_schema.key_column_usage AS kcu
ON tc.constraint_name = kcu.constraint_name
AND tc.table_schema = kcu.table_schema
JOIN information_schema.constraint_column_usage AS ccu
ON ccu.constraint_name = tc.constraint_name
AND ccu.table_schema = tc.table_schema
WHERE tc.constraint_type = 'FOREIGN KEY'
AND tc.table_schema = $1
ORDER BY tc.table_name, kcu.column_name
`,
[schema],
);
const columnsByTable = groupByTable(columns);
const primaryKeysByTable = primaryKeyMap(primaryKeys);
const foreignKeysByTable = groupByTable(foreignKeys);
return tables.map((table) =>
this.toSchemaTable(
schema,
table,
columnsByTable.get(table.table_name) ?? [],
primaryKeysByTable.get(table.table_name) ?? new Set<string>(),
foreignKeysByTable.get(table.table_name) ?? [],
),
);
}
private toSchemaTable(
schema: string,
table: PostgresTableRow,
columns: PostgresColumnRow[],
primaryKeys: Set<string>,
foreignKeys: PostgresForeignKeyRow[],
): KloSchemaTable {
const kind = table.table_kind === 'v' ? 'view' : 'table';
return {
catalog: null,
db: schema,
name: table.table_name,
kind,
comment: table.table_comment || null,
estimatedRows: kind === 'view' ? null : finiteNumber(table.row_count),
columns: columns.map((column) => this.toSchemaColumn(column, primaryKeys)),
foreignKeys: foreignKeys.map((foreignKey) => this.toSchemaForeignKey(foreignKey)),
};
}
private toSchemaColumn(column: PostgresColumnRow, primaryKeys: Set<string>): KloSchemaColumn {
return {
name: column.column_name,
nativeType: column.data_type,
normalizedType: this.dialect.mapDataType(column.data_type),
dimensionType: this.dialect.mapToDimensionType(column.data_type),
nullable: column.is_nullable,
primaryKey: primaryKeys.has(column.column_name),
comment: column.column_comment || null,
};
}
private toSchemaForeignKey(row: PostgresForeignKeyRow): KloSchemaForeignKey {
return {
fromColumn: row.column_name,
toCatalog: null,
toDb: row.foreign_table_schema,
toTable: row.foreign_table_name,
toColumn: row.foreign_column_name,
constraintName: row.constraint_name || null,
};
}
private async getPool(): Promise<KloPostgresPool> {
if (!this.pool) {
let config = { ...this.poolConfig };
if (this.endpointResolver) {
const endpoint = await this.endpointResolver.resolve({
host: config.host ?? this.connection.host ?? 'localhost',
port: config.port ?? numberValue(this.connection.port) ?? 5432,
connection: this.connection,
});
this.resolvedEndpoint = endpoint;
config = { ...config, host: endpoint.host, port: endpoint.port };
}
this.pool = this.poolFactory.createPool(config);
}
return this.pool;
}
private async queryRaw<T>(sql: string, params?: unknown[]): Promise<T[]> {
const pool = await this.getPool();
const client = await pool.connect();
try {
const result = await client.query(sql, params);
return result.rows as T[];
} finally {
client.release();
}
}
private async query(sql: string, params?: Record<string, unknown> | unknown[]): Promise<KloQueryResult> {
const pool = await this.getPool();
const client = await pool.connect();
try {
const result = await client.query(assertReadOnlySql(sql), Array.isArray(params) ? params : undefined);
return {
headers: (result.fields ?? []).map((field) => field.name),
headerTypes: (result.fields ?? []).map((field) => PG_OID_TYPE_MAP[field.dataTypeID] ?? `oid:${field.dataTypeID}`),
rows: queryRows(result),
totalRows: result.rows.length,
rowCount: result.rows.length,
};
} finally {
client.release();
}
}
private assertConnection(connectionId: string): void {
if (connectionId !== this.connectionId) {
throw new Error(`PostgreSQL connector ${this.connectionId} cannot run scan for ${connectionId}`);
}
}
}

View file

@ -0,0 +1,52 @@
import { describe, expect, it } from 'vitest';
import { KloPostgresDialect } from './dialect.js';
describe('KloPostgresDialect', () => {
const dialect = new KloPostgresDialect();
it('quotes identifiers and formats schema-qualified tables', () => {
expect(dialect.quoteIdentifier('order"items')).toBe('"order""items"');
expect(dialect.formatTableName({ catalog: null, db: 'public', name: 'orders' })).toBe('"public"."orders"');
expect(dialect.formatTableName({ catalog: null, db: null, name: 'orders' })).toBe('"orders"');
});
it('maps native PostgreSQL types to KLO dimension types', () => {
expect(dialect.mapToDimensionType('timestamp with time zone')).toBe('time');
expect(dialect.mapToDimensionType('numeric(12,2)')).toBe('number');
expect(dialect.mapToDimensionType('uuid')).toBe('string');
expect(dialect.mapToDimensionType('boolean')).toBe('boolean');
expect(dialect.mapToDimensionType('jsonb')).toBe('string');
});
it('generates sample, distinct-value, statistics, and time SQL', () => {
expect(dialect.generateSampleQuery('"public"."orders"', 5, ['id', 'status'])).toBe(
'SELECT "id", "status" FROM "public"."orders" LIMIT 5',
);
expect(dialect.generateColumnSampleQuery('"public"."orders"', 'status', 10)).toContain(
'TRIM(CAST("status" AS TEXT)) != \'\'',
);
expect(dialect.generateDistinctValuesQuery('"public"."orders"', '"status"', 20)).toContain(
'SELECT DISTINCT "status"::text AS val',
);
expect(dialect.generateColumnStatisticsQuery('public', 'orders')).toContain('FROM pg_stats s');
expect(dialect.getTimeTruncExpression('"created_at"', 'month')).toBe('DATE_TRUNC(\'month\', "created_at")');
});
it('prepares named parameters with PostgreSQL positional parameters', () => {
expect(
dialect.prepareQuery('select * from orders where id = :id and status = :status', { id: 1, status: 'paid' }),
).toEqual({
sql: 'select * from orders where id = $1 and status = $2',
params: [1, 'paid'],
});
expect(
dialect.prepareQuery('select :Client_Name_10, :Client_Name_1', {
Client_Name_1: 'short',
Client_Name_10: 'long',
}),
).toEqual({
sql: 'select $2, $1',
params: ['short', 'long'],
});
});
});

View file

@ -0,0 +1,213 @@
import type { KloSchemaDimensionType, KloTableRef } from '@klo/context/scan';
type PostgresTableNameRef = Pick<KloTableRef, 'name'> & Partial<Pick<KloTableRef, 'catalog' | 'db'>>;
export class KloPostgresDialect {
readonly type = 'postgresql';
private readonly typeMappings: Record<string, KloSchemaDimensionType> = {
timestamp: 'time',
'timestamp without time zone': 'time',
'timestamp with time zone': 'time',
timestamptz: 'time',
datetime: 'time',
date: 'time',
time: 'time',
integer: 'number',
int: 'number',
int2: 'number',
int4: 'number',
int8: 'number',
bigint: 'number',
smallint: 'number',
decimal: 'number',
numeric: 'number',
float: 'number',
float4: 'number',
float8: 'number',
'double precision': 'number',
real: 'number',
money: 'number',
text: 'string',
varchar: 'string',
'character varying': 'string',
char: 'string',
character: 'string',
uuid: 'string',
json: 'string',
jsonb: 'string',
boolean: 'boolean',
bool: 'boolean',
};
quoteIdentifier(identifier: string): string {
return `"${identifier.replace(/"/g, '""')}"`;
}
formatTableName(table: PostgresTableNameRef): string {
return table.db
? `${this.quoteIdentifier(table.db)}.${this.quoteIdentifier(table.name)}`
: this.quoteIdentifier(table.name);
}
mapDataType(nativeType: string): string {
return nativeType;
}
mapToDimensionType(nativeType: string): KloSchemaDimensionType {
if (!nativeType) {
return 'string';
}
const lower = nativeType.toLowerCase().trim();
const normalized = lower.includes('(') ? lower.split('(')[0]!.trim() : lower;
if (this.typeMappings[normalized]) {
return this.typeMappings[normalized];
}
if (normalized.includes('time') || normalized.includes('date')) {
return 'time';
}
if (
normalized.includes('int') ||
normalized.includes('num') ||
normalized.includes('dec') ||
normalized.includes('float') ||
normalized.includes('double')
) {
return 'number';
}
if (normalized.includes('bool')) {
return 'boolean';
}
return 'string';
}
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
const columnList =
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
return `SELECT ${columnList} FROM ${tableName} LIMIT ${limit}`;
}
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
const quotedColumn = this.quoteIdentifier(columnName);
return `SELECT ${quotedColumn} FROM ${tableName} WHERE ${quotedColumn} IS NOT NULL AND TRIM(CAST(${quotedColumn} AS TEXT)) != '' LIMIT ${limit}`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown[] } {
if (!params) {
return { sql, params: undefined };
}
const paramNames = Object.keys(params);
const values: unknown[] = new Array(paramNames.length);
const paramIndexMap = new Map<string, number>();
paramNames.forEach((name, index) => {
paramIndexMap.set(name, index + 1);
values[index] = params[name];
});
const sortedKeys = [...paramNames].sort((a, b) => b.length - a.length);
let parameterizedQuery = sql;
for (const name of sortedKeys) {
parameterizedQuery = parameterizedQuery.replace(new RegExp(`:${name}\\b`, 'g'), `$${paramIndexMap.get(name)}`);
}
return { sql: parameterizedQuery, params: values };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
}
return `RANDOM() < ${samplePct}`;
}
getTableSampleClause(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
}
return `TABLESAMPLE SYSTEM (${samplePct * 100})`;
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
}
getNullCountExpression(column: string): string {
return `COUNT(*) FILTER (WHERE ${column} IS NULL)`;
}
getDistinctCountExpression(column: string): string {
return `COUNT(DISTINCT ${column})`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
SELECT ${columnName} AS val
FROM ${tableName}
WHERE ${columnName} IS NOT NULL
LIMIT ${sampleSize}
)
SELECT COUNT(DISTINCT val) AS cardinality
FROM sampled
`;
}
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
return `
SELECT DISTINCT ${columnName}::text AS val
FROM ${tableName}
WHERE ${columnName} IS NOT NULL
ORDER BY val
LIMIT ${limit}
`;
}
generateColumnStatisticsQuery(schemaName: string, tableName: string): string | null {
return `
SELECT
s.attname AS column_name,
CASE
WHEN s.n_distinct > 0 THEN s.n_distinct::bigint
WHEN s.n_distinct < 0 THEN (-s.n_distinct * c.reltuples)::bigint
ELSE NULL
END AS estimated_cardinality
FROM pg_stats s
JOIN pg_class c ON c.relname = s.tablename
JOIN pg_namespace n ON c.relnamespace = n.oid AND n.nspname = s.schemaname
WHERE s.schemaname = '${schemaName.replace(/'/g, "''")}'
AND s.tablename = '${tableName.replace(/'/g, "''")}'
AND s.n_distinct IS NOT NULL
`;
}
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
SELECT ${columnName} AS val
FROM ${tableName}
WHERE ${columnName} IS NOT NULL
ORDER BY RANDOM()
LIMIT ${sampleSize}
)
SELECT COUNT(DISTINCT val) AS cardinality
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
timezone?: string,
): string {
const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column;
return `DATE_TRUNC('${granularity}', ${col})`;
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, timezone?: string): string {
const col = timezone ? `(${column} AT TIME ZONE '${timezone.replace(/'/g, "''")}')` : column;
const originExpr = origin ? `TIMESTAMP '${origin.replace(/'/g, "''")}'` : "TIMESTAMP '1970-01-01'";
return `${originExpr} + FLOOR(EXTRACT(EPOCH FROM (${col} - ${originExpr})) / EXTRACT(EPOCH FROM INTERVAL '${interval.replace(/'/g, "''")}')) * INTERVAL '${interval.replace(/'/g, "''")}'`;
}
parseIntervalToSql(interval: string): string {
return `INTERVAL '${interval.replace(/'/g, "''")}'`;
}
}

View file

@ -0,0 +1,50 @@
import { describe, expect, it, vi } from 'vitest';
import { KloPostgresHistoricSqlQueryClient } from './historic-sql-query-client.js';
import type { KloPostgresPoolConfig, KloPostgresPoolFactory } from './connector.js';
describe('KloPostgresHistoricSqlQueryClient', () => {
it('executes parameterized read-only SQL through the native Postgres connector pool', async () => {
const queryCalls: Array<{ sql: string; params?: unknown[] }> = [];
const release = vi.fn();
const end = vi.fn(async () => {});
const poolFactory: KloPostgresPoolFactory = {
createPool(_config: KloPostgresPoolConfig) {
return {
async connect() {
return {
async query(sql: string, params?: unknown[]) {
queryCalls.push({ sql, params });
return {
fields: [{ name: 'answer', dataTypeID: 23 }],
rows: [{ answer: 42 }],
};
},
release,
};
},
end,
};
},
};
const client = new KloPostgresHistoricSqlQueryClient({
connectionId: 'warehouse',
connection: {
driver: 'postgres',
readonly: true,
url: 'postgresql://readonly:secret@pg.example.test/warehouse', // pragma: allowlist secret
},
poolFactory,
});
await expect(client.executeQuery('SELECT $1::int AS answer', [42])).resolves.toEqual({
headers: ['answer'],
rows: [[42]],
totalRows: 1,
});
expect(queryCalls).toEqual([{ sql: 'SELECT $1::int AS answer', params: [42] }]);
await client.cleanup();
expect(release).toHaveBeenCalledTimes(1);
expect(end).toHaveBeenCalledTimes(1);
});
});

View file

@ -0,0 +1,37 @@
import type { KloPostgresQueryClient } from '@klo/context/ingest';
import { KloPostgresScanConnector, type KloPostgresScanConnectorOptions } from './connector.js';
export type KloPostgresHistoricSqlQueryClientOptions = KloPostgresScanConnectorOptions;
export class KloPostgresHistoricSqlQueryClient implements KloPostgresQueryClient {
private readonly connectionId: string;
private readonly connector: KloPostgresScanConnector;
constructor(options: KloPostgresHistoricSqlQueryClientOptions) {
this.connectionId = options.connectionId;
this.connector = new KloPostgresScanConnector(options);
}
async executeQuery(
sql: string,
params?: unknown[],
): Promise<{ headers: string[]; rows: unknown[][]; totalRows: number }> {
const result = await this.connector.executeReadOnly(
{
connectionId: this.connectionId,
sql,
params,
},
{} as never,
);
return {
headers: result.headers,
rows: result.rows,
totalRows: result.totalRows,
};
}
async cleanup(): Promise<void> {
await this.connector.cleanup();
}
}

View file

@ -0,0 +1,21 @@
export { KloPostgresDialect } from './dialect.js';
export {
isKloPostgresConnectionConfig,
KloPostgresScanConnector,
postgresPoolConfigFromConfig,
type KloPostgresColumnDistinctValuesOptions,
type KloPostgresColumnDistinctValuesResult,
type KloPostgresColumnStatisticsResult,
type KloPostgresConnectionConfig,
type KloPostgresEndpointResolver,
type KloPostgresPoolConfig,
type KloPostgresPoolFactory,
type KloPostgresReadOnlyQueryInput,
type KloPostgresScanConnectorOptions,
type KloPostgresTableSampleResult,
} from './connector.js';
export {
KloPostgresHistoricSqlQueryClient,
type KloPostgresHistoricSqlQueryClientOptions,
} from './historic-sql-query-client.js';
export { createPostgresLiveDatabaseIntrospection } from './live-database-introspection.js';

View file

@ -0,0 +1,37 @@
import type { LiveDatabaseIntrospectionPort } from '@klo/context/ingest';
import type { KloProjectConnectionConfig } from '@klo/context/project';
import {
KloPostgresScanConnector,
type KloPostgresConnectionConfig,
type KloPostgresEndpointResolver,
type KloPostgresPoolFactory,
} from './connector.js';
interface CreatePostgresLiveDatabaseIntrospectionOptions {
connections: Record<string, KloProjectConnectionConfig>;
poolFactory?: KloPostgresPoolFactory;
endpointResolver?: KloPostgresEndpointResolver;
now?: () => Date;
}
export function createPostgresLiveDatabaseIntrospection(
options: CreatePostgresLiveDatabaseIntrospectionOptions,
): LiveDatabaseIntrospectionPort {
return {
async extractSchema(connectionId: string) {
const connection = options.connections[connectionId] as KloPostgresConnectionConfig | undefined;
const connector = new KloPostgresScanConnector({
connectionId,
connection,
poolFactory: options.poolFactory,
endpointResolver: options.endpointResolver,
now: options.now,
});
try {
return await connector.introspect({ connectionId, driver: 'postgres' }, { runId: `postgres-${connectionId}` });
} finally {
await connector.cleanup();
}
},
};
}

View file

@ -0,0 +1,13 @@
import { describe, expect, it } from 'vitest';
describe('@klo/connector-postgres package exports', () => {
it('exports the connector, dialect, and live-database adapter', async () => {
const connector = await import('./index.js');
expect(connector.KloPostgresDialect).toBeTypeOf('function');
expect(connector.KloPostgresScanConnector).toBeTypeOf('function');
expect(connector.KloPostgresHistoricSqlQueryClient).toBeTypeOf('function');
expect(connector.createPostgresLiveDatabaseIntrospection).toBeTypeOf('function');
expect(connector.isKloPostgresConnectionConfig).toBeTypeOf('function');
expect(connector.postgresPoolConfigFromConfig).toBeTypeOf('function');
});
});