Initial open-source release

This commit is contained in:
Andrey Avtomonov 2026-05-10 23:12:26 +02:00
commit 1a42152e6f
1199 changed files with 257054 additions and 0 deletions

View file

@ -0,0 +1,255 @@
import Database from 'better-sqlite3';
import { writeFileSync } from 'node:fs';
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import {
createSqliteLiveDatabaseIntrospection,
isKloSqliteConnectionConfig,
KloSqliteScanConnector,
sqliteDatabasePathFromConfig,
} from './index.js';
describe('KloSqliteScanConnector', () => {
let tempDir: string;
let dbPath: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'klo-connector-sqlite-'));
dbPath = join(tempDir, 'warehouse.db');
const db = new Database(dbPath);
db.exec(`
PRAGMA foreign_keys = ON;
CREATE TABLE customers (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
tier TEXT
);
CREATE TABLE orders (
id INTEGER PRIMARY KEY,
customer_id INTEGER NOT NULL,
status TEXT,
total NUMERIC,
created_at TEXT,
FOREIGN KEY(customer_id) REFERENCES customers(id)
);
CREATE VIEW recent_orders AS SELECT id, customer_id, status FROM orders;
INSERT INTO customers (id, name, tier) VALUES (1, 'Ada', 'enterprise'), (2, 'Grace', 'growth');
INSERT INTO orders (id, customer_id, status, total, created_at)
VALUES (10, 1, 'paid', 42.5, '2026-04-28'), (11, 2, 'open', 9.5, '2026-04-29');
`);
db.close();
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('resolves SQLite path configuration safely', () => {
const originalDatabaseUrl = process.env.KLO_SQLITE_TEST_URL;
const pointerPath = join(tempDir, 'sqlite-path.txt');
process.env.KLO_SQLITE_TEST_URL = `sqlite:${dbPath}`;
writeFileSync(pointerPath, dbPath, 'utf-8');
try {
expect(isKloSqliteConnectionConfig({ driver: 'sqlite', path: 'warehouse.db', readonly: true })).toBe(true);
expect(isKloSqliteConnectionConfig({ driver: 'postgres', url: 'env:DATABASE_URL', readonly: true })).toBe(
false,
);
expect(
sqliteDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', path: 'warehouse.db', readonly: true },
}),
).toBe(dbPath);
expect(
sqliteDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', url: 'env:KLO_SQLITE_TEST_URL', readonly: true },
}),
).toBe(dbPath);
expect(
sqliteDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', url: `file://${dbPath}`, readonly: true },
}),
).toBe(dbPath);
expect(
sqliteDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', path: `file:${pointerPath}`, readonly: true },
}),
).toBe(dbPath);
expect(() =>
sqliteDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'sqlite', path: 'warehouse.db', readonly: false },
}),
).toThrow('Native SQLite connector requires connections.warehouse.readonly: true');
} finally {
if (originalDatabaseUrl === undefined) {
delete process.env.KLO_SQLITE_TEST_URL;
} else {
process.env.KLO_SQLITE_TEST_URL = originalDatabaseUrl;
}
}
});
it('introspects schema, primary keys, row counts, views, and foreign keys', async () => {
const connector = new KloSqliteScanConnector({
connectionId: 'warehouse',
connection: { driver: 'sqlite', path: dbPath, readonly: true },
now: () => new Date('2026-04-29T10:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'sqlite' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'sqlite',
extractedAt: '2026-04-29T10:00:00.000Z',
metadata: {
file_path: dbPath,
table_count: 3,
total_columns: 11,
},
});
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows])).toEqual([
['customers', 'table', 2],
['orders', 'table', 2],
['recent_orders', 'view', null],
]);
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
name: 'id',
nativeType: 'INTEGER',
normalizedType: 'INTEGER',
dimensionType: 'number',
nullable: false,
primaryKey: true,
});
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
{
fromColumn: 'customer_id',
toCatalog: null,
toDb: null,
toTable: 'customers',
toColumn: 'id',
constraintName: null,
},
]);
});
it('runs samples, distinct values, statistics, and read-only SQL', async () => {
const connector = new KloSqliteScanConnector({
connectionId: 'warehouse',
connection: { driver: 'sqlite', path: dbPath, readonly: true },
});
await expect(
connector.sampleTable(
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, columns: ['id'], limit: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toEqual({ headers: ['id'], rows: [[10]], totalRows: 1 });
await expect(
connector.sampleColumn(
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, column: 'status', limit: 5 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
await expect(
connector.getColumnDistinctValues(
{ catalog: null, db: null, name: 'orders' },
'status',
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
),
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from orders order by id', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toEqual({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 });
await expect(
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
await expect(
connector.columnStats(
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, column: 'status' },
{ runId: 'scan-run-1' },
),
).resolves.toBeNull();
});
it('adapts native SQLite snapshots to live-database introspection for local ingest', async () => {
const introspection = createSqliteLiveDatabaseIntrospection({
projectDir: tempDir,
connections: {
warehouse: { driver: 'sqlite', path: 'warehouse.db', readonly: true },
},
now: () => new Date('2026-04-29T10:00:00.000Z'),
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
extractedAt: '2026-04-29T10:00:00.000Z',
});
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
name: 'customers',
catalog: null,
db: null,
columns: [
{
name: 'id',
nativeType: 'INTEGER',
normalizedType: 'INTEGER',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: null,
},
{
name: 'name',
nativeType: 'TEXT',
normalizedType: 'TEXT',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: null,
},
{
name: 'tier',
nativeType: 'TEXT',
normalizedType: 'TEXT',
dimensionType: 'string',
nullable: true,
primaryKey: false,
comment: null,
},
],
foreignKeys: [],
});
expect(snapshot.tables.find((table) => table.name === 'orders')).toMatchObject({
name: 'orders',
catalog: null,
db: null,
foreignKeys: [{ fromColumn: 'customer_id', toTable: 'customers', toColumn: 'id' }],
});
});
});

View file

@ -0,0 +1,371 @@
import Database from 'better-sqlite3';
import { existsSync, readFileSync, statSync } from 'node:fs';
import { homedir } from 'node:os';
import { isAbsolute, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import { assertReadOnlySql, limitSqlForExecution, normalizeQueryRows } from '@klo/context/connections';
import {
createKloConnectorCapabilities,
type KloColumnSampleInput,
type KloColumnSampleResult,
type KloColumnStatsInput,
type KloColumnStatsResult,
type KloQueryResult,
type KloReadOnlyQueryInput,
type KloScanConnector,
type KloScanContext,
type KloScanInput,
type KloSchemaForeignKey,
type KloSchemaSnapshot,
type KloSchemaTable,
type KloTableRef,
type KloTableSampleInput,
type KloTableSampleResult,
} from '@klo/context/scan';
import { KloSqliteDialect } from './dialect.js';
export interface KloSqliteConnectionConfig {
driver?: string;
path?: string;
url?: string;
file_path?: string;
readonly?: boolean;
[key: string]: unknown;
}
export interface SqliteDatabasePathInput {
connectionId: string;
projectDir?: string;
connection: KloSqliteConnectionConfig | undefined;
}
export interface KloSqliteScanConnectorOptions extends SqliteDatabasePathInput {
now?: () => Date;
}
export interface KloSqliteReadOnlyQueryInput extends KloReadOnlyQueryInput {
params?: Record<string, unknown> | unknown[];
}
export interface KloSqliteColumnDistinctValuesOptions {
maxCardinality: number;
limit: number;
sampleSize?: number;
}
export interface KloSqliteColumnDistinctValuesResult {
values: string[] | null;
cardinality: number;
}
interface SqliteMasterRow {
name: string;
type: 'table' | 'view';
}
interface SqliteTableInfoRow {
cid: number;
name: string;
type: string;
notnull: number;
dflt_value: unknown;
pk: number;
}
interface SqliteForeignKeyRow {
id: number;
seq: number;
table: string;
from: string;
to: string;
}
function stringConfigValue(
connection: KloSqliteConnectionConfig | undefined,
key: keyof KloSqliteConnectionConfig,
): string | undefined {
const value = connection?.[key];
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(key, value.trim()) : undefined;
}
function resolveStringReference(key: keyof KloSqliteConnectionConfig, value: string): string {
if (value.startsWith('env:')) {
return process.env[value.slice('env:'.length)] ?? '';
}
if (key !== 'url' && value.startsWith('file:')) {
const rawPath = value.slice('file:'.length);
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
return readFileSync(path, 'utf-8').trim();
}
return value;
}
function sqlitePathFromUrl(url: string): string {
if (url.startsWith('file:')) {
return fileURLToPath(url);
}
if (url.startsWith('sqlite:')) {
const parsed = new URL(url);
return decodeURIComponent(parsed.pathname);
}
return url;
}
function stripLeadingSqlComments(sql: string): string {
let index = 0;
while (index < sql.length) {
while (/\s/.test(sql[index] ?? '')) {
index += 1;
}
if (sql.startsWith('--', index)) {
const end = sql.indexOf('\n', index + 2);
index = end === -1 ? sql.length : end + 1;
continue;
}
if (sql.startsWith('/*', index)) {
const end = sql.indexOf('*/', index + 2);
if (end === -1) {
return sql.slice(index);
}
index = end + 2;
continue;
}
break;
}
return sql.slice(index);
}
export function isKloSqliteConnectionConfig(connection: KloSqliteConnectionConfig | undefined): boolean {
const driver = String(connection?.driver ?? '').toLowerCase();
return driver === 'sqlite' || driver === 'sqlite3';
}
export function sqliteDatabasePathFromConfig(input: SqliteDatabasePathInput): string {
if (!isKloSqliteConnectionConfig(input.connection)) {
throw new Error(`Native SQLite connector cannot run driver "${input.connection?.driver ?? 'unknown'}"`);
}
if (input.connection?.readonly !== true) {
throw new Error(`Native SQLite connector requires connections.${input.connectionId}.readonly: true`);
}
const configuredPath =
stringConfigValue(input.connection, 'path') ??
stringConfigValue(input.connection, 'file_path') ??
sqlitePathFromUrl(stringConfigValue(input.connection, 'url') ?? '');
if (!configuredPath) {
throw new Error(`Native SQLite connector requires connections.${input.connectionId}.path, file_path, or url`);
}
return isAbsolute(configuredPath) ? configuredPath : resolve(input.projectDir ?? process.cwd(), configuredPath);
}
export class KloSqliteScanConnector implements KloScanConnector {
readonly id: string;
readonly driver = 'sqlite' as const;
readonly capabilities = createKloConnectorCapabilities({
tableSampling: true,
columnSampling: true,
columnStats: false,
readOnlySql: true,
nestedAnalysis: false,
formalForeignKeys: true,
estimatedRowCounts: true,
});
private readonly connectionId: string;
private readonly dbPath: string;
private readonly now: () => Date;
private readonly dialect = new KloSqliteDialect();
private db: Database.Database | null = null;
constructor(options: KloSqliteScanConnectorOptions) {
this.connectionId = options.connectionId;
this.dbPath = sqliteDatabasePathFromConfig(options);
this.now = options.now ?? (() => new Date());
this.id = `sqlite:${options.connectionId}`;
}
async testConnection(): Promise<{ success: boolean; error?: string }> {
try {
if (!existsSync(this.dbPath) || !statSync(this.dbPath).isFile()) {
return { success: false, error: `File not found: ${this.dbPath}` };
}
this.database().prepare('SELECT 1').get();
return { success: true };
} catch (error) {
return { success: false, error: error instanceof Error ? error.message : String(error) };
}
}
async introspect(input: KloScanInput, _ctx: KloScanContext): Promise<KloSchemaSnapshot> {
this.assertConnection(input.connectionId);
const database = this.database();
const rawTables = database
.prepare(
`SELECT name, type FROM sqlite_master WHERE type IN ('table', 'view') AND name NOT LIKE 'sqlite_%' ORDER BY name`,
)
.all() as SqliteMasterRow[];
const tables = rawTables.map((table) => this.readTable(database, table));
const fileStats = existsSync(this.dbPath) ? statSync(this.dbPath) : null;
return {
connectionId: this.connectionId,
driver: 'sqlite',
extractedAt: this.now().toISOString(),
scope: {},
metadata: {
file_path: this.dbPath,
file_size: fileStats ? fileStats.size : 0,
table_count: tables.length,
total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0),
},
tables,
};
}
async sampleTable(input: KloTableSampleInput, _ctx: KloScanContext): Promise<KloTableSampleResult> {
this.assertConnection(input.connectionId);
const result = this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
return { headers: result.headers, rows: result.rows, totalRows: result.totalRows };
}
async sampleColumn(input: KloColumnSampleInput, _ctx: KloScanContext): Promise<KloColumnSampleResult> {
this.assertConnection(input.connectionId);
const result = this.query(
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
);
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
return { values, nullCount: null, distinctCount: null };
}
async columnStats(_input: KloColumnStatsInput, _ctx: KloScanContext): Promise<KloColumnStatsResult | null> {
return null;
}
async executeReadOnly(input: KloSqliteReadOnlyQueryInput, _ctx: KloScanContext): Promise<KloQueryResult> {
this.assertConnection(input.connectionId);
const result = this.query(limitSqlForExecution(stripLeadingSqlComments(input.sql), input.maxRows), input.params);
return { ...result, rowCount: result.rows.length };
}
async getColumnDistinctValues(
table: KloTableRef,
columnName: string,
options: KloSqliteColumnDistinctValuesOptions,
): Promise<KloSqliteColumnDistinctValuesResult | null> {
const sampleSize = options.sampleSize ?? 10000;
const tableName = this.qTableName(table);
const quotedColumn = this.dialect.quoteIdentifier(columnName);
const cardinalityResult = this.query(
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, sampleSize),
);
if (cardinalityResult.rows.length === 0) {
return null;
}
const cardinality = Number(cardinalityResult.rows[0][0]);
if (Number.isNaN(cardinality)) {
return null;
}
if (cardinality === 0) {
return { values: [], cardinality: 0 };
}
if (cardinality > options.maxCardinality) {
return { values: null, cardinality };
}
const valuesResult = this.query(this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit));
return {
values: valuesResult.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => String(row[0])),
cardinality,
};
}
async getTableRowCount(tableName: string): Promise<number> {
const result = this.query(`SELECT COUNT(*) AS count FROM ${this.dialect.quoteIdentifier(tableName)}`);
return Number(result.rows[0]?.[0] ?? 0);
}
qTableName(table: Pick<KloTableRef, 'name'>): string {
return this.dialect.formatTableName(table);
}
quoteIdentifier(identifier: string): string {
return this.dialect.quoteIdentifier(identifier);
}
async cleanup(): Promise<void> {
if (this.db) {
this.db.close();
this.db = null;
}
}
private database(): Database.Database {
if (!this.db) {
this.db = new Database(this.dbPath, { readonly: true, fileMustExist: true });
}
return this.db;
}
private query(sql: string, params?: Record<string, unknown> | unknown[]): Omit<KloQueryResult, 'rowCount'> {
const statement = this.database().prepare(assertReadOnlySql(sql));
const rows = (params ? statement.all(params) : statement.all()) as unknown[];
return {
headers: statement.columns().map((column) => column.name),
rows: normalizeQueryRows(rows),
totalRows: rows.length,
};
}
private readTable(database: Database.Database, table: SqliteMasterRow): KloSchemaTable {
const columns = database
.prepare(`PRAGMA table_info(${this.dialect.quoteIdentifier(table.name)})`)
.all() as SqliteTableInfoRow[];
const foreignKeys = database
.prepare(`PRAGMA foreign_key_list(${this.dialect.quoteIdentifier(table.name)})`)
.all() as SqliteForeignKeyRow[];
const estimatedRows =
table.type === 'table'
? Number(
(
database
.prepare(`SELECT COUNT(*) AS count FROM ${this.dialect.quoteIdentifier(table.name)}`)
.get() as { count: unknown }
).count,
)
: null;
return {
catalog: null,
db: null,
name: table.name,
kind: table.type,
comment: null,
estimatedRows,
columns: columns.map((column) => ({
name: column.name,
nativeType: column.type,
normalizedType: this.dialect.mapDataType(column.type),
dimensionType: this.dialect.mapToDimensionType(column.type),
nullable: column.notnull === 0 && column.pk === 0,
primaryKey: column.pk > 0,
comment: null,
})),
foreignKeys: this.mapForeignKeys(foreignKeys),
};
}
private mapForeignKeys(rows: SqliteForeignKeyRow[]): KloSchemaForeignKey[] {
return rows
.sort((a, b) => a.id - b.id || a.seq - b.seq)
.map((row) => ({
fromColumn: row.from,
toCatalog: null,
toDb: null,
toTable: row.table,
toColumn: row.to,
constraintName: null,
}));
}
private assertConnection(connectionId: string): void {
if (connectionId !== this.connectionId) {
throw new Error(`KLO SQLite connector ${this.id} cannot serve connection ${connectionId}`);
}
}
}

View file

@ -0,0 +1,33 @@
import { describe, expect, it } from 'vitest';
import { KloSqliteDialect } from './dialect.js';
describe('KloSqliteDialect', () => {
const dialect = new KloSqliteDialect();
it('quotes identifiers and formats single-file SQLite table names', () => {
expect(dialect.quoteIdentifier('orders')).toBe('"orders"');
expect(dialect.quoteIdentifier('weird"name')).toBe('"weird""name"');
expect(dialect.formatTableName({ catalog: 'ignored', db: 'ignored', name: 'orders' })).toBe('"orders"');
});
it('maps native SQLite types to KLO dimension types', () => {
expect(dialect.mapToDimensionType('INTEGER')).toBe('number');
expect(dialect.mapToDimensionType('numeric(10,2)')).toBe('number');
expect(dialect.mapToDimensionType('timestamp')).toBe('time');
expect(dialect.mapToDimensionType('VARCHAR(255)')).toBe('string');
expect(dialect.mapToDimensionType('bool')).toBe('boolean');
expect(dialect.mapToDimensionType('')).toBe('string');
});
it('builds sampling and distinct-value SQL without host-specific state', () => {
expect(dialect.generateSampleQuery('"orders"', 25, ['id', 'status'])).toBe(
'SELECT "id", "status" FROM "orders" LIMIT 25',
);
expect(dialect.generateColumnSampleQuery('"orders"', 'status', 10)).toBe(
'SELECT "status" FROM "orders" WHERE "status" IS NOT NULL AND TRIM(CAST("status" AS TEXT)) != \'\' LIMIT 10',
);
expect(dialect.generateDistinctValuesQuery('"orders"', '"status"', 5)).toContain(
'SELECT DISTINCT CAST("status" AS TEXT) AS val',
);
});
});

View file

@ -0,0 +1,177 @@
import type { KloSchemaDimensionType, KloTableRef } from '@klo/context/scan';
type SqliteTableNameRef = Pick<KloTableRef, 'name'> & Partial<Pick<KloTableRef, 'catalog' | 'db'>>;
export class KloSqliteDialect {
readonly type = 'sqlite';
private readonly typeMappings: Record<string, KloSchemaDimensionType> = {
DATETIME: 'time',
DATE: 'time',
TIMESTAMP: 'time',
TIME: 'time',
INTEGER: 'number',
INT: 'number',
REAL: 'number',
NUMERIC: 'number',
FLOAT: 'number',
DOUBLE: 'number',
TEXT: 'string',
VARCHAR: 'string',
CHAR: 'string',
BLOB: 'string',
BOOLEAN: 'boolean',
BOOL: 'boolean',
};
quoteIdentifier(identifier: string): string {
return `"${identifier.replace(/"/g, '""')}"`;
}
formatTableName(table: SqliteTableNameRef): string {
return this.quoteIdentifier(table.name);
}
mapDataType(nativeType: string): string {
return nativeType;
}
mapToDimensionType(nativeType: string): KloSchemaDimensionType {
if (!nativeType) {
return 'string';
}
let normalized = nativeType.toUpperCase().trim();
if (normalized.includes('(')) {
normalized = normalized.split('(')[0];
}
if (this.typeMappings[normalized]) {
return this.typeMappings[normalized];
}
if (normalized.includes('TIME') || normalized.includes('DATE')) {
return 'time';
}
if (
normalized.includes('INT') ||
normalized.includes('NUM') ||
normalized.includes('REAL') ||
normalized.includes('FLOAT') ||
normalized.includes('DOUBLE')
) {
return 'number';
}
if (normalized.includes('BOOL')) {
return 'boolean';
}
return 'string';
}
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
const columnList =
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
return `SELECT ${columnList} FROM ${tableName} LIMIT ${limit}`;
}
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
const quoted = this.quoteIdentifier(columnName);
return `SELECT ${quoted} FROM ${tableName} WHERE ${quoted} IS NOT NULL AND TRIM(CAST(${quoted} AS TEXT)) != '' LIMIT ${limit}`;
}
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown } {
return params ? { sql, params } : { sql };
}
getRandomSampleFilter(samplePct: number): string {
if (samplePct <= 0 || samplePct >= 1) {
return '';
}
return `(RANDOM() % 100) < ${Math.round(samplePct * 100)}`;
}
getTableSampleClause(_samplePct: number): string {
return '';
}
getLimitOffsetClause(limit: number, offset?: number): string {
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
}
getNullCountExpression(column: string): string {
return `SUM(CASE WHEN ${column} IS NULL THEN 1 ELSE 0 END)`;
}
getDistinctCountExpression(column: string): string {
return `COUNT(DISTINCT ${column})`;
}
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
SELECT ${columnName} AS val
FROM ${tableName}
WHERE ${columnName} IS NOT NULL
LIMIT ${sampleSize}
)
SELECT COUNT(DISTINCT val) AS cardinality
FROM sampled
`;
}
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
return `
SELECT DISTINCT CAST(${columnName} AS TEXT) AS val
FROM ${tableName}
WHERE ${columnName} IS NOT NULL
ORDER BY val
LIMIT ${limit}
`;
}
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
return null;
}
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
return `
WITH sampled AS (
SELECT ${columnName} AS val
FROM ${tableName}
WHERE ${columnName} IS NOT NULL
ORDER BY RANDOM()
LIMIT ${sampleSize}
)
SELECT COUNT(DISTINCT val) AS cardinality
FROM sampled
`;
}
getTimeTruncExpression(
column: string,
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
_timezone?: string,
): string {
switch (granularity) {
case 'day':
return `DATE(${column})`;
case 'week':
return `DATE(${column}, 'weekday 0', '-6 days')`;
case 'month':
return `DATE(${column}, 'start of month')`;
case 'quarter':
return `DATE(${column}, 'start of month', '-' || ((CAST(STRFTIME('%m', ${column}) AS INTEGER) - 1) % 3) || ' months')`;
case 'year':
return `DATE(${column}, 'start of year')`;
}
}
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, _timezone?: string): string {
const [amount, unit] = interval.split(' ');
const originExpr = origin ? `julianday('${origin}')` : `julianday('1970-01-01')`;
const unitDays = unit === 'day' ? 1 : unit === 'week' ? 7 : 30;
const intervalDays = Number(amount) * unitDays;
return `DATE(julianday('1970-01-01') + (CAST((julianday(${column}) - ${originExpr}) / ${intervalDays} AS INTEGER) * ${intervalDays}))`;
}
parseIntervalToSql(interval: string): string {
return `'${interval}'`;
}
}

View file

@ -0,0 +1,16 @@
export { KloSqliteDialect } from './dialect.js';
export {
isKloSqliteConnectionConfig,
KloSqliteScanConnector,
sqliteDatabasePathFromConfig,
type KloSqliteColumnDistinctValuesOptions,
type KloSqliteColumnDistinctValuesResult,
type KloSqliteConnectionConfig,
type KloSqliteReadOnlyQueryInput,
type KloSqliteScanConnectorOptions,
type SqliteDatabasePathInput,
} from './connector.js';
export {
createSqliteLiveDatabaseIntrospection,
type CreateSqliteLiveDatabaseIntrospectionOptions,
} from './live-database-introspection.js';

View file

@ -0,0 +1,30 @@
import type { LiveDatabaseIntrospectionPort } from '@klo/context/ingest';
import type { KloProjectConnectionConfig } from '@klo/context/project';
import { KloSqliteScanConnector, type KloSqliteConnectionConfig } from './connector.js';
export interface CreateSqliteLiveDatabaseIntrospectionOptions {
projectDir?: string;
connections: Record<string, KloProjectConnectionConfig>;
now?: () => Date;
}
export function createSqliteLiveDatabaseIntrospection(
options: CreateSqliteLiveDatabaseIntrospectionOptions,
): LiveDatabaseIntrospectionPort {
return {
async extractSchema(connectionId: string) {
const connection = options.connections[connectionId] as KloSqliteConnectionConfig | undefined;
const connector = new KloSqliteScanConnector({
connectionId,
connection,
projectDir: options.projectDir,
now: options.now,
});
try {
return await connector.introspect({ connectionId, driver: 'sqlite' }, { runId: `sqlite-${connectionId}` });
} finally {
await connector.cleanup();
}
},
};
}

View file

@ -0,0 +1,13 @@
import { describe, expect, it } from 'vitest';
describe('@klo/connector-sqlite package exports', () => {
it('exports the native SQLite scan connector surface', async () => {
const connector = await import('./index.js');
expect(connector.KloSqliteDialect).toBeTypeOf('function');
expect(connector.KloSqliteScanConnector).toBeTypeOf('function');
expect(connector.createSqliteLiveDatabaseIntrospection).toBeTypeOf('function');
expect(connector.isKloSqliteConnectionConfig).toBeTypeOf('function');
expect(connector.sqliteDatabasePathFromConfig).toBeTypeOf('function');
});
});