mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-13 08:15:14 +02:00
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
385 lines
13 KiB
TypeScript
385 lines
13 KiB
TypeScript
import Database from 'better-sqlite3';
|
|
import { existsSync, readFileSync, statSync } from 'node:fs';
|
|
import { homedir } from 'node:os';
|
|
import { isAbsolute, resolve } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import { getDialectForDriver } from '../../context/connections/dialects.js';
|
|
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
|
|
import { normalizeQueryRows } from '../../context/connections/query-executor.js';
|
|
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
|
|
import { scopedTableNames } from '../../context/scan/table-ref.js';
|
|
|
|
export interface KtxSqliteConnectionConfig {
|
|
driver?: string;
|
|
path?: string;
|
|
url?: string;
|
|
[key: string]: unknown;
|
|
}
|
|
|
|
/** @internal */
|
|
export interface SqliteDatabasePathInput {
|
|
connectionId: string;
|
|
projectDir?: string;
|
|
connection: KtxSqliteConnectionConfig | undefined;
|
|
}
|
|
|
|
export interface KtxSqliteScanConnectorOptions extends SqliteDatabasePathInput {
|
|
now?: () => Date;
|
|
}
|
|
|
|
export interface KtxSqliteReadOnlyQueryInput extends KtxReadOnlyQueryInput {
|
|
params?: Record<string, unknown> | unknown[];
|
|
}
|
|
|
|
export interface KtxSqliteColumnDistinctValuesOptions {
|
|
maxCardinality: number;
|
|
limit: number;
|
|
sampleSize?: number;
|
|
}
|
|
|
|
export interface KtxSqliteColumnDistinctValuesResult {
|
|
values: string[] | null;
|
|
cardinality: number;
|
|
}
|
|
|
|
interface SqliteMasterRow {
|
|
name: string;
|
|
type: 'table' | 'view';
|
|
}
|
|
|
|
interface SqliteTableInfoRow {
|
|
cid: number;
|
|
name: string;
|
|
type: string;
|
|
notnull: number;
|
|
dflt_value: unknown;
|
|
pk: number;
|
|
}
|
|
|
|
interface SqliteForeignKeyRow {
|
|
id: number;
|
|
seq: number;
|
|
table: string;
|
|
from: string;
|
|
to: string;
|
|
}
|
|
|
|
function stringConfigValue(
|
|
connection: KtxSqliteConnectionConfig | undefined,
|
|
key: keyof KtxSqliteConnectionConfig,
|
|
): string | undefined {
|
|
const value = connection?.[key];
|
|
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(key, value.trim()) : undefined;
|
|
}
|
|
|
|
function resolveStringReference(key: keyof KtxSqliteConnectionConfig, value: string): string {
|
|
if (value.startsWith('env:')) {
|
|
return process.env[value.slice('env:'.length)] ?? '';
|
|
}
|
|
// `file:` on the `url` key is SQLite's native URI form (e.g. `file:///db.sqlite`), not a
|
|
// file-contents reference — skip the read so the URI passes through verbatim.
|
|
if (key !== 'url' && value.startsWith('file:')) {
|
|
const rawPath = value.slice('file:'.length);
|
|
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
|
return readFileSync(path, 'utf-8').trim();
|
|
}
|
|
return value;
|
|
}
|
|
|
|
function sqlitePathFromUrl(url: string): string {
|
|
if (url.startsWith('file:')) {
|
|
return fileURLToPath(url);
|
|
}
|
|
if (url.startsWith('sqlite:')) {
|
|
const parsed = new URL(url);
|
|
return decodeURIComponent(parsed.pathname);
|
|
}
|
|
return url;
|
|
}
|
|
|
|
function stripLeadingSqlComments(sql: string): string {
|
|
let index = 0;
|
|
while (index < sql.length) {
|
|
while (/\s/.test(sql[index] ?? '')) {
|
|
index += 1;
|
|
}
|
|
if (sql.startsWith('--', index)) {
|
|
const end = sql.indexOf('\n', index + 2);
|
|
index = end === -1 ? sql.length : end + 1;
|
|
continue;
|
|
}
|
|
if (sql.startsWith('/*', index)) {
|
|
const end = sql.indexOf('*/', index + 2);
|
|
if (end === -1) {
|
|
return sql.slice(index);
|
|
}
|
|
index = end + 2;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
return sql.slice(index);
|
|
}
|
|
|
|
export function isKtxSqliteConnectionConfig(
|
|
connection: KtxSqliteConnectionConfig | undefined,
|
|
): connection is KtxSqliteConnectionConfig {
|
|
const driver = String(connection?.driver ?? '').toLowerCase();
|
|
return driver === 'sqlite';
|
|
}
|
|
|
|
/** @internal */
|
|
export function sqliteDatabasePathFromConfig(input: SqliteDatabasePathInput): string {
|
|
const inputDriver = input.connection?.driver ?? 'unknown';
|
|
if (!isKtxSqliteConnectionConfig(input.connection)) {
|
|
throw new Error(`Native SQLite connector cannot run driver "${inputDriver}"`);
|
|
}
|
|
const configuredPath = stringConfigValue(input.connection, 'path') ?? sqlitePathFromUrl(stringConfigValue(input.connection, 'url') ?? '');
|
|
if (!configuredPath) {
|
|
throw new Error(`Native SQLite connector requires connections.${input.connectionId}.path or url`);
|
|
}
|
|
return isAbsolute(configuredPath) ? configuredPath : resolve(input.projectDir ?? process.cwd(), configuredPath);
|
|
}
|
|
|
|
export class KtxSqliteScanConnector implements KtxScanConnector {
|
|
readonly id: string;
|
|
readonly driver = 'sqlite' as const;
|
|
readonly capabilities = createKtxConnectorCapabilities({
|
|
tableSampling: true,
|
|
columnSampling: true,
|
|
columnStats: false,
|
|
readOnlySql: true,
|
|
nestedAnalysis: false,
|
|
formalForeignKeys: true,
|
|
estimatedRowCounts: true,
|
|
});
|
|
|
|
private readonly connectionId: string;
|
|
private readonly dbPath: string;
|
|
private readonly now: () => Date;
|
|
private readonly dialect = getDialectForDriver('sqlite');
|
|
private db: Database.Database | null = null;
|
|
|
|
constructor(options: KtxSqliteScanConnectorOptions) {
|
|
this.connectionId = options.connectionId;
|
|
this.dbPath = sqliteDatabasePathFromConfig(options);
|
|
this.now = options.now ?? (() => new Date());
|
|
this.id = `sqlite:${options.connectionId}`;
|
|
}
|
|
|
|
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
|
try {
|
|
if (!existsSync(this.dbPath) || !statSync(this.dbPath).isFile()) {
|
|
return { success: false, error: `File not found: ${this.dbPath}` };
|
|
}
|
|
this.database().prepare('SELECT 1').get();
|
|
return { success: true };
|
|
} catch (error) {
|
|
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
|
}
|
|
}
|
|
|
|
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
|
|
this.assertConnection(input.connectionId);
|
|
const database = this.database();
|
|
const scopedNames = input.tableScope ? scopedTableNames(input.tableScope, { catalog: null, db: null }) : null;
|
|
const scopeClause = scopedNames ? `AND name IN (${scopedNames.map(() => '?').join(', ')})` : '';
|
|
const rawTables =
|
|
scopedNames && scopedNames.length === 0
|
|
? []
|
|
: (database
|
|
.prepare(
|
|
`SELECT name, type FROM sqlite_master WHERE type IN ('table', 'view') AND name NOT LIKE 'sqlite_%' ${scopeClause} ORDER BY name`,
|
|
)
|
|
.all(...(scopedNames ?? [])) as SqliteMasterRow[]);
|
|
const tables = rawTables.map((table) => this.readTable(database, table));
|
|
const fileStats = existsSync(this.dbPath) ? statSync(this.dbPath) : null;
|
|
return {
|
|
connectionId: this.connectionId,
|
|
driver: 'sqlite',
|
|
extractedAt: this.now().toISOString(),
|
|
scope: {},
|
|
metadata: {
|
|
file_path: this.dbPath,
|
|
file_size: fileStats ? fileStats.size : 0,
|
|
table_count: tables.length,
|
|
total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0),
|
|
},
|
|
tables,
|
|
};
|
|
}
|
|
|
|
async listSchemas(): Promise<string[]> {
|
|
return [];
|
|
}
|
|
|
|
async listTables(_schemas?: string[]): Promise<KtxTableListEntry[]> {
|
|
const rows = this.database()
|
|
.prepare(
|
|
`
|
|
SELECT name, type
|
|
FROM sqlite_master
|
|
WHERE type IN ('table', 'view')
|
|
AND name NOT LIKE 'sqlite_%'
|
|
ORDER BY name
|
|
`,
|
|
)
|
|
.all() as SqliteMasterRow[];
|
|
|
|
return rows.map((row) => ({
|
|
catalog: null,
|
|
schema: '',
|
|
name: row.name,
|
|
kind: row.type === 'view' ? ('view' as const) : ('table' as const),
|
|
}));
|
|
}
|
|
|
|
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxTableSampleResult> {
|
|
this.assertConnection(input.connectionId);
|
|
const result = this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
|
|
return { headers: result.headers, rows: result.rows, totalRows: result.totalRows };
|
|
}
|
|
|
|
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
|
|
this.assertConnection(input.connectionId);
|
|
const result = this.query(
|
|
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
|
);
|
|
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
|
|
return { values, nullCount: null, distinctCount: null };
|
|
}
|
|
|
|
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
|
return null;
|
|
}
|
|
|
|
async executeReadOnly(input: KtxSqliteReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
|
this.assertConnection(input.connectionId);
|
|
const result = this.query(limitSqlForExecution(stripLeadingSqlComments(input.sql), input.maxRows), input.params);
|
|
return { ...result, rowCount: result.rows.length };
|
|
}
|
|
|
|
async getColumnDistinctValues(
|
|
table: KtxTableRef,
|
|
columnName: string,
|
|
options: KtxSqliteColumnDistinctValuesOptions,
|
|
): Promise<KtxSqliteColumnDistinctValuesResult | null> {
|
|
const sampleSize = options.sampleSize ?? 10000;
|
|
const tableName = this.qTableName(table);
|
|
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
|
const cardinalityResult = this.query(
|
|
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, sampleSize),
|
|
);
|
|
if (cardinalityResult.rows.length === 0) {
|
|
return null;
|
|
}
|
|
const cardinality = Number(cardinalityResult.rows[0][0]);
|
|
if (Number.isNaN(cardinality)) {
|
|
return null;
|
|
}
|
|
if (cardinality === 0) {
|
|
return { values: [], cardinality: 0 };
|
|
}
|
|
if (cardinality > options.maxCardinality) {
|
|
return { values: null, cardinality };
|
|
}
|
|
const valuesResult = this.query(this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit));
|
|
return {
|
|
values: valuesResult.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => String(row[0])),
|
|
cardinality,
|
|
};
|
|
}
|
|
|
|
async getTableRowCount(tableName: string): Promise<number> {
|
|
const result = this.query(`SELECT COUNT(*) AS count FROM ${this.dialect.quoteIdentifier(tableName)}`);
|
|
return Number(result.rows[0]?.[0] ?? 0);
|
|
}
|
|
|
|
qTableName(table: Pick<KtxTableRef, 'name'>): string {
|
|
return this.dialect.formatTableName(table);
|
|
}
|
|
|
|
quoteIdentifier(identifier: string): string {
|
|
return this.dialect.quoteIdentifier(identifier);
|
|
}
|
|
|
|
async cleanup(): Promise<void> {
|
|
if (this.db) {
|
|
this.db.close();
|
|
this.db = null;
|
|
}
|
|
}
|
|
|
|
private database(): Database.Database {
|
|
if (!this.db) {
|
|
this.db = new Database(this.dbPath, { readonly: true, fileMustExist: true });
|
|
}
|
|
return this.db;
|
|
}
|
|
|
|
private query(sql: string, params?: Record<string, unknown> | unknown[]): Omit<KtxQueryResult, 'rowCount'> {
|
|
const statement = this.database().prepare(assertReadOnlySql(sql));
|
|
const rows = (params ? statement.all(params) : statement.all()) as unknown[];
|
|
return {
|
|
headers: statement.columns().map((column) => column.name),
|
|
rows: normalizeQueryRows(rows),
|
|
totalRows: rows.length,
|
|
};
|
|
}
|
|
|
|
private readTable(database: Database.Database, table: SqliteMasterRow): KtxSchemaTable {
|
|
const columns = database
|
|
.prepare(`PRAGMA table_info(${this.dialect.quoteIdentifier(table.name)})`)
|
|
.all() as SqliteTableInfoRow[];
|
|
const foreignKeys = database
|
|
.prepare(`PRAGMA foreign_key_list(${this.dialect.quoteIdentifier(table.name)})`)
|
|
.all() as SqliteForeignKeyRow[];
|
|
const estimatedRows =
|
|
table.type === 'table'
|
|
? Number(
|
|
(
|
|
database
|
|
.prepare(`SELECT COUNT(*) AS count FROM ${this.dialect.quoteIdentifier(table.name)}`)
|
|
.get() as { count: unknown }
|
|
).count,
|
|
)
|
|
: null;
|
|
return {
|
|
catalog: null,
|
|
db: null,
|
|
name: table.name,
|
|
kind: table.type,
|
|
comment: null,
|
|
estimatedRows,
|
|
columns: columns.map((column) => ({
|
|
name: column.name,
|
|
nativeType: column.type,
|
|
normalizedType: this.dialect.mapDataType(column.type),
|
|
dimensionType: this.dialect.mapToDimensionType(column.type),
|
|
nullable: column.notnull === 0 && column.pk === 0,
|
|
primaryKey: column.pk > 0,
|
|
comment: null,
|
|
})),
|
|
foreignKeys: this.mapForeignKeys(foreignKeys),
|
|
};
|
|
}
|
|
|
|
private mapForeignKeys(rows: SqliteForeignKeyRow[]): KtxSchemaForeignKey[] {
|
|
return rows
|
|
.sort((a, b) => a.id - b.id || a.seq - b.seq)
|
|
.map((row) => ({
|
|
fromColumn: row.from,
|
|
toCatalog: null,
|
|
toDb: null,
|
|
toTable: row.table,
|
|
toColumn: row.to,
|
|
constraintName: null,
|
|
}));
|
|
}
|
|
|
|
private assertConnection(connectionId: string): void {
|
|
if (connectionId !== this.connectionId) {
|
|
throw new Error(`KTX SQLite connector ${this.id} cannot serve connection ${connectionId}`);
|
|
}
|
|
}
|
|
}
|