mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-22 08:38:08 +02:00
chore(workspace): gate dead-code with knip production mode (#196)
* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm * refactor(workspace): rewrite @ktx/llm imports to relative paths * refactor(workspace): fold internal packages into cli * chore(workspace): gate dead-code with knip production mode Turn on production-mode knip plus an autofix run in pre-commit and the `pnpm dead-code` script, document the `/** @internal */` convention for test-only exports in AGENTS.md, annotate test-only exports across the CLI with that JSDoc, and drop dead exports/wrappers the new gate surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`, `createLocalScanEnrichmentProvidersFromConfig`, `PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports). Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit production entries so cross-package barrel leaks are caught. * refactor(cli): delete internal barrel index.ts files The 34 `index.ts` re-export barrels inside `packages/cli/src/` were holdovers from the pre-fold multi-workspace structure. Post-fold-in they served no production purpose: external consumers go through the single package main entry, and in-repo callers mostly imported through them only because the path was short. Internally, knip flagged most barrel re-exports as production-dead (only reached via tests). This change: - Deletes every internal barrel except `packages/cli/src/index.ts` (the published package entry). - Rewrites ~270 source/test files to import each name directly from the file that defines it. - Moves `tools/warehouse-verification/index.ts` to `create-warehouse-verification-tools.ts` (the function it defined locally) and updates its single consumer. - Renames `search/backend-conformance.ts` → `.test-utils.ts` to match the existing test-helper file convention. - Deletes 13 dead test-only chains (dbt-descriptions/*, live-database/extracted-schema, live-database/structural-sync, relationship-* feedback/review chain) plus their tests and a cascading orphan integration test. - Updates test mocks that pointed at deleted barrel paths (notion-client, connector barrels in scan/local-scan-connectors tests) to mock the source files instead. - Points the maintainer benchmark script (`scripts/relationship-benchmark-report.mjs`) at source files instead of `dist/context/scan/index.js`. - Drops the barrel `!` entries from `knip.json`; adds explicit production entries only for the benchmark code reached via dist by the maintainer script. Net: 413 files changed, ~1.2k insertions, ~9.4k deletions. `pnpm run dead-code` (Biome + knip default + knip production) and `pnpm run type-check` are clean; 2277 tests pass. * refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly Promote the CLI workspace package to the public name `@kaelio/ktx` and drop the separate `scripts/build-public-npm-package.mjs` wrapper. The CLI package is now publishable in place (`publishConfig.access: public`, `provenance: true`), so artifact packing uses `pnpm pack` against `packages/cli/` instead of assembling a parallel package tree. Updates all workspace filter invocations, docs, tests, and release readiness checks to reference the new package name, and folds the tarball-name helper into `scripts/public-npm-release-metadata.mjs`. * docs: align "agent clients" and "data agents" terminology Replace "client agents" with "agent clients" and "database agents" with "data agents" across AGENTS.md, README.md, the docs-site copy, and the matching setup-agents test description, matching the canonical vocabulary in docs/terminology.md. Also moves packages/cli/tsconfig.json's tsBuildInfoFile from node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive node_modules reinstalls. * refactor(release): single source of truth for package version Make packages/cli/package.json the single source of truth for the @kaelio/ktx version. publicNpmPackageVersion() now reads it directly, so artifact filenames, release-readiness checks, and the Python wheel version all derive from one field. The duplicate release-policy.json.publicNpmPackageVersion is removed. Previously the two fields could drift: tarballs were named kaelio-ktx-0.4.1.tgz while internally containing @kaelio/ktx@0.0.0-private. - update-public-release-version.mjs rewrites both Python pyproject.toml files (ktx-daemon, ktx-sl) alongside the npm package.jsons, normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2). - semantic-release-config.cjs adds the two pyproject.toml files to @semantic-release/git assets so the release commit back to main carries every version source in lockstep. - The six "?? '0.0.0-private'" fallback literals across the CLI are replaced with "?? getKtxCliPackageInfo().version", and createDefaultKtxMcpServer makes its version arg required. - docs/release.md describes the actual commit-back model: the dev tree always reflects the most recent release; no sentinel pin to maintain. Verified: pnpm run artifacts:build now produces kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with @kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and 2287 vitests + 173 script tests pass. * refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and scan command entrypoints so tests can stub them, and teach resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime feature when ktx.yaml selects sentence-transformers. * chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal Both symbols are consumed only by status-project.test.ts. Annotating with /** @internal */ keeps knip's production-mode check clean without changing runtime behavior. * fix(cli): use real package metadata in print-command-tree The stubbed package name embedded a forbidden product identifier that tripped the boundary check in CI. Read the metadata from package.json instead — keeps the rendered tree unchanged and removes a duplicate source of truth. * feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer source counts, computed with `SUM(embedding_json IS NOT NULL)` over `knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to "Wiki" (canonical per `docs/terminology.md`) and rename the matching `localStats.knowledgePages` field to `localStats.wikiPages`. Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those duplicated the per-surface rows above. Disk now reports only actual byte usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` / `semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry` helpers, and the `filter` arg on `summarizeDir` are removed.
This commit is contained in:
parent
a1cfb03d73
commit
2366b00301
1002 changed files with 2286 additions and 12051 deletions
256
packages/cli/src/connectors/sqlite/connector.test.ts
Normal file
256
packages/cli/src/connectors/sqlite/connector.test.ts
Normal file
|
|
@ -0,0 +1,256 @@
|
|||
import Database from 'better-sqlite3';
|
||||
import { writeFileSync } from 'node:fs';
|
||||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { createSqliteLiveDatabaseIntrospection } from '../../connectors/sqlite/live-database-introspection.js';
|
||||
import { isKtxSqliteConnectionConfig, KtxSqliteScanConnector, sqliteDatabasePathFromConfig } from '../../connectors/sqlite/connector.js';
|
||||
|
||||
describe('KtxSqliteScanConnector', () => {
|
||||
let tempDir: string;
|
||||
let dbPath: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-connector-sqlite-'));
|
||||
dbPath = join(tempDir, 'warehouse.db');
|
||||
const db = new Database(dbPath);
|
||||
db.exec(`
|
||||
PRAGMA foreign_keys = ON;
|
||||
CREATE TABLE customers (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
tier TEXT
|
||||
);
|
||||
CREATE TABLE orders (
|
||||
id INTEGER PRIMARY KEY,
|
||||
customer_id INTEGER NOT NULL,
|
||||
status TEXT,
|
||||
total NUMERIC,
|
||||
created_at TEXT,
|
||||
FOREIGN KEY(customer_id) REFERENCES customers(id)
|
||||
);
|
||||
CREATE VIEW recent_orders AS SELECT id, customer_id, status FROM orders;
|
||||
INSERT INTO customers (id, name, tier) VALUES (1, 'Ada', 'enterprise'), (2, 'Grace', 'growth');
|
||||
INSERT INTO orders (id, customer_id, status, total, created_at)
|
||||
VALUES (10, 1, 'paid', 42.5, '2026-04-28'), (11, 2, 'open', 9.5, '2026-04-29');
|
||||
`);
|
||||
db.close();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('resolves SQLite path configuration safely', () => {
|
||||
const originalDatabaseUrl = process.env.KTX_SQLITE_TEST_URL;
|
||||
const pointerPath = join(tempDir, 'sqlite-path.txt');
|
||||
process.env.KTX_SQLITE_TEST_URL = `sqlite:${dbPath}`;
|
||||
writeFileSync(pointerPath, dbPath, 'utf-8');
|
||||
|
||||
try {
|
||||
expect(isKtxSqliteConnectionConfig({ driver: 'sqlite', path: 'warehouse.db' })).toBe(true);
|
||||
expect(isKtxSqliteConnectionConfig({ driver: 'postgres', url: 'env:DATABASE_URL' })).toBe(false);
|
||||
expect(
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', path: 'warehouse.db' },
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
expect(
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', url: 'env:KTX_SQLITE_TEST_URL' },
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
expect(
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', url: `file://${dbPath}` },
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
expect(
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', path: `file:${pointerPath}` },
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
expect(
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', path: 'warehouse.db' },
|
||||
}),
|
||||
).toBe(dbPath);
|
||||
expect(() =>
|
||||
sqliteDatabasePathFromConfig({
|
||||
connectionId: 'warehouse',
|
||||
projectDir: tempDir,
|
||||
connection: { driver: 'sqlite', file_path: 'warehouse.db' },
|
||||
}),
|
||||
).toThrow('Native SQLite connector requires connections.warehouse.path or url');
|
||||
} finally {
|
||||
if (originalDatabaseUrl === undefined) {
|
||||
delete process.env.KTX_SQLITE_TEST_URL;
|
||||
} else {
|
||||
process.env.KTX_SQLITE_TEST_URL = originalDatabaseUrl;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('introspects schema, primary keys, row counts, views, and foreign keys', async () => {
|
||||
const connector = new KtxSqliteScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'sqlite', path: dbPath },
|
||||
now: () => new Date('2026-04-29T10:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await connector.introspect(
|
||||
{ connectionId: 'warehouse', driver: 'sqlite' },
|
||||
{ runId: 'scan-run-1' },
|
||||
);
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
driver: 'sqlite',
|
||||
extractedAt: '2026-04-29T10:00:00.000Z',
|
||||
metadata: {
|
||||
file_path: dbPath,
|
||||
table_count: 3,
|
||||
total_columns: 11,
|
||||
},
|
||||
});
|
||||
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows])).toEqual([
|
||||
['customers', 'table', 2],
|
||||
['orders', 'table', 2],
|
||||
['recent_orders', 'view', null],
|
||||
]);
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
|
||||
name: 'id',
|
||||
nativeType: 'INTEGER',
|
||||
normalizedType: 'INTEGER',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
|
||||
{
|
||||
fromColumn: 'customer_id',
|
||||
toCatalog: null,
|
||||
toDb: null,
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
constraintName: null,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('runs samples, distinct values, statistics, and read-only SQL', async () => {
|
||||
const connector = new KtxSqliteScanConnector({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'sqlite', path: dbPath },
|
||||
});
|
||||
|
||||
await expect(
|
||||
connector.sampleTable(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, columns: ['id'], limit: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({ headers: ['id'], rows: [[10]], totalRows: 1 });
|
||||
|
||||
await expect(
|
||||
connector.sampleColumn(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, column: 'status', limit: 5 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
|
||||
|
||||
await expect(
|
||||
connector.getColumnDistinctValues(
|
||||
{ catalog: null, db: null, name: 'orders' },
|
||||
'status',
|
||||
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
|
||||
),
|
||||
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from orders order by id', maxRows: 1 },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toEqual({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 });
|
||||
|
||||
await expect(
|
||||
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
|
||||
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
|
||||
|
||||
await expect(
|
||||
connector.columnStats(
|
||||
{ connectionId: 'warehouse', table: { catalog: null, db: null, name: 'orders' }, column: 'status' },
|
||||
{ runId: 'scan-run-1' },
|
||||
),
|
||||
).resolves.toBeNull();
|
||||
});
|
||||
|
||||
it('adapts native SQLite snapshots to live-database introspection for local ingest', async () => {
|
||||
const introspection = createSqliteLiveDatabaseIntrospection({
|
||||
projectDir: tempDir,
|
||||
connections: {
|
||||
warehouse: { driver: 'sqlite', path: 'warehouse.db' },
|
||||
},
|
||||
now: () => new Date('2026-04-29T10:00:00.000Z'),
|
||||
});
|
||||
|
||||
const snapshot = await introspection.extractSchema('warehouse');
|
||||
|
||||
expect(snapshot).toMatchObject({
|
||||
connectionId: 'warehouse',
|
||||
extractedAt: '2026-04-29T10:00:00.000Z',
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
|
||||
name: 'customers',
|
||||
catalog: null,
|
||||
db: null,
|
||||
columns: [
|
||||
{
|
||||
name: 'id',
|
||||
nativeType: 'INTEGER',
|
||||
normalizedType: 'INTEGER',
|
||||
dimensionType: 'number',
|
||||
nullable: false,
|
||||
primaryKey: true,
|
||||
comment: null,
|
||||
},
|
||||
{
|
||||
name: 'name',
|
||||
nativeType: 'TEXT',
|
||||
normalizedType: 'TEXT',
|
||||
dimensionType: 'string',
|
||||
nullable: false,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
{
|
||||
name: 'tier',
|
||||
nativeType: 'TEXT',
|
||||
normalizedType: 'TEXT',
|
||||
dimensionType: 'string',
|
||||
nullable: true,
|
||||
primaryKey: false,
|
||||
comment: null,
|
||||
},
|
||||
],
|
||||
foreignKeys: [],
|
||||
});
|
||||
expect(snapshot.tables.find((table) => table.name === 'orders')).toMatchObject({
|
||||
name: 'orders',
|
||||
catalog: null,
|
||||
db: null,
|
||||
foreignKeys: [{ fromColumn: 'customer_id', toTable: 'customers', toColumn: 'id' }],
|
||||
});
|
||||
});
|
||||
});
|
||||
354
packages/cli/src/connectors/sqlite/connector.ts
Normal file
354
packages/cli/src/connectors/sqlite/connector.ts
Normal file
|
|
@ -0,0 +1,354 @@
|
|||
import Database from 'better-sqlite3';
|
||||
import { existsSync, readFileSync, statSync } from 'node:fs';
|
||||
import { homedir } from 'node:os';
|
||||
import { isAbsolute, resolve } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
|
||||
import { normalizeQueryRows } from '../../context/connections/query-executor.js';
|
||||
import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js';
|
||||
import { KtxSqliteDialect } from './dialect.js';
|
||||
|
||||
export interface KtxSqliteConnectionConfig {
|
||||
driver?: string;
|
||||
path?: string;
|
||||
url?: string;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export interface SqliteDatabasePathInput {
|
||||
connectionId: string;
|
||||
projectDir?: string;
|
||||
connection: KtxSqliteConnectionConfig | undefined;
|
||||
}
|
||||
|
||||
export interface KtxSqliteScanConnectorOptions extends SqliteDatabasePathInput {
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export interface KtxSqliteReadOnlyQueryInput extends KtxReadOnlyQueryInput {
|
||||
params?: Record<string, unknown> | unknown[];
|
||||
}
|
||||
|
||||
export interface KtxSqliteColumnDistinctValuesOptions {
|
||||
maxCardinality: number;
|
||||
limit: number;
|
||||
sampleSize?: number;
|
||||
}
|
||||
|
||||
export interface KtxSqliteColumnDistinctValuesResult {
|
||||
values: string[] | null;
|
||||
cardinality: number;
|
||||
}
|
||||
|
||||
interface SqliteMasterRow {
|
||||
name: string;
|
||||
type: 'table' | 'view';
|
||||
}
|
||||
|
||||
interface SqliteTableInfoRow {
|
||||
cid: number;
|
||||
name: string;
|
||||
type: string;
|
||||
notnull: number;
|
||||
dflt_value: unknown;
|
||||
pk: number;
|
||||
}
|
||||
|
||||
interface SqliteForeignKeyRow {
|
||||
id: number;
|
||||
seq: number;
|
||||
table: string;
|
||||
from: string;
|
||||
to: string;
|
||||
}
|
||||
|
||||
function stringConfigValue(
|
||||
connection: KtxSqliteConnectionConfig | undefined,
|
||||
key: keyof KtxSqliteConnectionConfig,
|
||||
): string | undefined {
|
||||
const value = connection?.[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(key, value.trim()) : undefined;
|
||||
}
|
||||
|
||||
function resolveStringReference(key: keyof KtxSqliteConnectionConfig, value: string): string {
|
||||
if (value.startsWith('env:')) {
|
||||
return process.env[value.slice('env:'.length)] ?? '';
|
||||
}
|
||||
// `file:` on the `url` key is SQLite's native URI form (e.g. `file:///db.sqlite`), not a
|
||||
// file-contents reference — skip the read so the URI passes through verbatim.
|
||||
if (key !== 'url' && value.startsWith('file:')) {
|
||||
const rawPath = value.slice('file:'.length);
|
||||
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
|
||||
return readFileSync(path, 'utf-8').trim();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function sqlitePathFromUrl(url: string): string {
|
||||
if (url.startsWith('file:')) {
|
||||
return fileURLToPath(url);
|
||||
}
|
||||
if (url.startsWith('sqlite:')) {
|
||||
const parsed = new URL(url);
|
||||
return decodeURIComponent(parsed.pathname);
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
function stripLeadingSqlComments(sql: string): string {
|
||||
let index = 0;
|
||||
while (index < sql.length) {
|
||||
while (/\s/.test(sql[index] ?? '')) {
|
||||
index += 1;
|
||||
}
|
||||
if (sql.startsWith('--', index)) {
|
||||
const end = sql.indexOf('\n', index + 2);
|
||||
index = end === -1 ? sql.length : end + 1;
|
||||
continue;
|
||||
}
|
||||
if (sql.startsWith('/*', index)) {
|
||||
const end = sql.indexOf('*/', index + 2);
|
||||
if (end === -1) {
|
||||
return sql.slice(index);
|
||||
}
|
||||
index = end + 2;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return sql.slice(index);
|
||||
}
|
||||
|
||||
export function isKtxSqliteConnectionConfig(
|
||||
connection: KtxSqliteConnectionConfig | undefined,
|
||||
): connection is KtxSqliteConnectionConfig {
|
||||
const driver = String(connection?.driver ?? '').toLowerCase();
|
||||
return driver === 'sqlite' || driver === 'sqlite3';
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function sqliteDatabasePathFromConfig(input: SqliteDatabasePathInput): string {
|
||||
const inputDriver = input.connection?.driver ?? 'unknown';
|
||||
if (!isKtxSqliteConnectionConfig(input.connection)) {
|
||||
throw new Error(`Native SQLite connector cannot run driver "${inputDriver}"`);
|
||||
}
|
||||
const configuredPath = stringConfigValue(input.connection, 'path') ?? sqlitePathFromUrl(stringConfigValue(input.connection, 'url') ?? '');
|
||||
if (!configuredPath) {
|
||||
throw new Error(`Native SQLite connector requires connections.${input.connectionId}.path or url`);
|
||||
}
|
||||
return isAbsolute(configuredPath) ? configuredPath : resolve(input.projectDir ?? process.cwd(), configuredPath);
|
||||
}
|
||||
|
||||
export class KtxSqliteScanConnector implements KtxScanConnector {
|
||||
readonly id: string;
|
||||
readonly driver = 'sqlite' as const;
|
||||
readonly capabilities = createKtxConnectorCapabilities({
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: false,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: false,
|
||||
formalForeignKeys: true,
|
||||
estimatedRowCounts: true,
|
||||
});
|
||||
|
||||
private readonly connectionId: string;
|
||||
private readonly dbPath: string;
|
||||
private readonly now: () => Date;
|
||||
private readonly dialect = new KtxSqliteDialect();
|
||||
private db: Database.Database | null = null;
|
||||
|
||||
constructor(options: KtxSqliteScanConnectorOptions) {
|
||||
this.connectionId = options.connectionId;
|
||||
this.dbPath = sqliteDatabasePathFromConfig(options);
|
||||
this.now = options.now ?? (() => new Date());
|
||||
this.id = `sqlite:${options.connectionId}`;
|
||||
}
|
||||
|
||||
async testConnection(): Promise<{ success: boolean; error?: string }> {
|
||||
try {
|
||||
if (!existsSync(this.dbPath) || !statSync(this.dbPath).isFile()) {
|
||||
return { success: false, error: `File not found: ${this.dbPath}` };
|
||||
}
|
||||
this.database().prepare('SELECT 1').get();
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
|
||||
async introspect(input: KtxScanInput, _ctx: KtxScanContext): Promise<KtxSchemaSnapshot> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const database = this.database();
|
||||
const rawTables = database
|
||||
.prepare(
|
||||
`SELECT name, type FROM sqlite_master WHERE type IN ('table', 'view') AND name NOT LIKE 'sqlite_%' ORDER BY name`,
|
||||
)
|
||||
.all() as SqliteMasterRow[];
|
||||
const tables = rawTables.map((table) => this.readTable(database, table));
|
||||
const fileStats = existsSync(this.dbPath) ? statSync(this.dbPath) : null;
|
||||
return {
|
||||
connectionId: this.connectionId,
|
||||
driver: 'sqlite',
|
||||
extractedAt: this.now().toISOString(),
|
||||
scope: {},
|
||||
metadata: {
|
||||
file_path: this.dbPath,
|
||||
file_size: fileStats ? fileStats.size : 0,
|
||||
table_count: tables.length,
|
||||
total_columns: tables.reduce((sum, table) => sum + table.columns.length, 0),
|
||||
},
|
||||
tables,
|
||||
};
|
||||
}
|
||||
|
||||
async sampleTable(input: KtxTableSampleInput, _ctx: KtxScanContext): Promise<KtxTableSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = this.query(this.dialect.generateSampleQuery(this.qTableName(input.table), input.limit, input.columns));
|
||||
return { headers: result.headers, rows: result.rows, totalRows: result.totalRows };
|
||||
}
|
||||
|
||||
async sampleColumn(input: KtxColumnSampleInput, _ctx: KtxScanContext): Promise<KtxColumnSampleResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = this.query(
|
||||
this.dialect.generateColumnSampleQuery(this.qTableName(input.table), input.column, input.limit),
|
||||
);
|
||||
const values = result.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => row[0]);
|
||||
return { values, nullCount: null, distinctCount: null };
|
||||
}
|
||||
|
||||
async columnStats(_input: KtxColumnStatsInput, _ctx: KtxScanContext): Promise<KtxColumnStatsResult | null> {
|
||||
return null;
|
||||
}
|
||||
|
||||
async executeReadOnly(input: KtxSqliteReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
|
||||
this.assertConnection(input.connectionId);
|
||||
const result = this.query(limitSqlForExecution(stripLeadingSqlComments(input.sql), input.maxRows), input.params);
|
||||
return { ...result, rowCount: result.rows.length };
|
||||
}
|
||||
|
||||
async getColumnDistinctValues(
|
||||
table: KtxTableRef,
|
||||
columnName: string,
|
||||
options: KtxSqliteColumnDistinctValuesOptions,
|
||||
): Promise<KtxSqliteColumnDistinctValuesResult | null> {
|
||||
const sampleSize = options.sampleSize ?? 10000;
|
||||
const tableName = this.qTableName(table);
|
||||
const quotedColumn = this.dialect.quoteIdentifier(columnName);
|
||||
const cardinalityResult = this.query(
|
||||
this.dialect.generateCardinalitySampleQuery(tableName, quotedColumn, sampleSize),
|
||||
);
|
||||
if (cardinalityResult.rows.length === 0) {
|
||||
return null;
|
||||
}
|
||||
const cardinality = Number(cardinalityResult.rows[0][0]);
|
||||
if (Number.isNaN(cardinality)) {
|
||||
return null;
|
||||
}
|
||||
if (cardinality === 0) {
|
||||
return { values: [], cardinality: 0 };
|
||||
}
|
||||
if (cardinality > options.maxCardinality) {
|
||||
return { values: null, cardinality };
|
||||
}
|
||||
const valuesResult = this.query(this.dialect.generateDistinctValuesQuery(tableName, quotedColumn, options.limit));
|
||||
return {
|
||||
values: valuesResult.rows.filter((row) => row.length > 0 && row[0] !== null).map((row) => String(row[0])),
|
||||
cardinality,
|
||||
};
|
||||
}
|
||||
|
||||
async getTableRowCount(tableName: string): Promise<number> {
|
||||
const result = this.query(`SELECT COUNT(*) AS count FROM ${this.dialect.quoteIdentifier(tableName)}`);
|
||||
return Number(result.rows[0]?.[0] ?? 0);
|
||||
}
|
||||
|
||||
qTableName(table: Pick<KtxTableRef, 'name'>): string {
|
||||
return this.dialect.formatTableName(table);
|
||||
}
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return this.dialect.quoteIdentifier(identifier);
|
||||
}
|
||||
|
||||
async cleanup(): Promise<void> {
|
||||
if (this.db) {
|
||||
this.db.close();
|
||||
this.db = null;
|
||||
}
|
||||
}
|
||||
|
||||
private database(): Database.Database {
|
||||
if (!this.db) {
|
||||
this.db = new Database(this.dbPath, { readonly: true, fileMustExist: true });
|
||||
}
|
||||
return this.db;
|
||||
}
|
||||
|
||||
private query(sql: string, params?: Record<string, unknown> | unknown[]): Omit<KtxQueryResult, 'rowCount'> {
|
||||
const statement = this.database().prepare(assertReadOnlySql(sql));
|
||||
const rows = (params ? statement.all(params) : statement.all()) as unknown[];
|
||||
return {
|
||||
headers: statement.columns().map((column) => column.name),
|
||||
rows: normalizeQueryRows(rows),
|
||||
totalRows: rows.length,
|
||||
};
|
||||
}
|
||||
|
||||
private readTable(database: Database.Database, table: SqliteMasterRow): KtxSchemaTable {
|
||||
const columns = database
|
||||
.prepare(`PRAGMA table_info(${this.dialect.quoteIdentifier(table.name)})`)
|
||||
.all() as SqliteTableInfoRow[];
|
||||
const foreignKeys = database
|
||||
.prepare(`PRAGMA foreign_key_list(${this.dialect.quoteIdentifier(table.name)})`)
|
||||
.all() as SqliteForeignKeyRow[];
|
||||
const estimatedRows =
|
||||
table.type === 'table'
|
||||
? Number(
|
||||
(
|
||||
database
|
||||
.prepare(`SELECT COUNT(*) AS count FROM ${this.dialect.quoteIdentifier(table.name)}`)
|
||||
.get() as { count: unknown }
|
||||
).count,
|
||||
)
|
||||
: null;
|
||||
return {
|
||||
catalog: null,
|
||||
db: null,
|
||||
name: table.name,
|
||||
kind: table.type,
|
||||
comment: null,
|
||||
estimatedRows,
|
||||
columns: columns.map((column) => ({
|
||||
name: column.name,
|
||||
nativeType: column.type,
|
||||
normalizedType: this.dialect.mapDataType(column.type),
|
||||
dimensionType: this.dialect.mapToDimensionType(column.type),
|
||||
nullable: column.notnull === 0 && column.pk === 0,
|
||||
primaryKey: column.pk > 0,
|
||||
comment: null,
|
||||
})),
|
||||
foreignKeys: this.mapForeignKeys(foreignKeys),
|
||||
};
|
||||
}
|
||||
|
||||
private mapForeignKeys(rows: SqliteForeignKeyRow[]): KtxSchemaForeignKey[] {
|
||||
return rows
|
||||
.sort((a, b) => a.id - b.id || a.seq - b.seq)
|
||||
.map((row) => ({
|
||||
fromColumn: row.from,
|
||||
toCatalog: null,
|
||||
toDb: null,
|
||||
toTable: row.table,
|
||||
toColumn: row.to,
|
||||
constraintName: null,
|
||||
}));
|
||||
}
|
||||
|
||||
private assertConnection(connectionId: string): void {
|
||||
if (connectionId !== this.connectionId) {
|
||||
throw new Error(`KTX SQLite connector ${this.id} cannot serve connection ${connectionId}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
33
packages/cli/src/connectors/sqlite/dialect.test.ts
Normal file
33
packages/cli/src/connectors/sqlite/dialect.test.ts
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { KtxSqliteDialect } from './dialect.js';
|
||||
|
||||
describe('KtxSqliteDialect', () => {
|
||||
const dialect = new KtxSqliteDialect();
|
||||
|
||||
it('quotes identifiers and formats single-file SQLite table names', () => {
|
||||
expect(dialect.quoteIdentifier('orders')).toBe('"orders"');
|
||||
expect(dialect.quoteIdentifier('weird"name')).toBe('"weird""name"');
|
||||
expect(dialect.formatTableName({ catalog: 'ignored', db: 'ignored', name: 'orders' })).toBe('"orders"');
|
||||
});
|
||||
|
||||
it('maps native SQLite types to KTX dimension types', () => {
|
||||
expect(dialect.mapToDimensionType('INTEGER')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('numeric(10,2)')).toBe('number');
|
||||
expect(dialect.mapToDimensionType('timestamp')).toBe('time');
|
||||
expect(dialect.mapToDimensionType('VARCHAR(255)')).toBe('string');
|
||||
expect(dialect.mapToDimensionType('bool')).toBe('boolean');
|
||||
expect(dialect.mapToDimensionType('')).toBe('string');
|
||||
});
|
||||
|
||||
it('builds sampling and distinct-value SQL without host-specific state', () => {
|
||||
expect(dialect.generateSampleQuery('"orders"', 25, ['id', 'status'])).toBe(
|
||||
'SELECT "id", "status" FROM "orders" LIMIT 25',
|
||||
);
|
||||
expect(dialect.generateColumnSampleQuery('"orders"', 'status', 10)).toBe(
|
||||
'SELECT "status" FROM "orders" WHERE "status" IS NOT NULL AND TRIM(CAST("status" AS TEXT)) != \'\' LIMIT 10',
|
||||
);
|
||||
expect(dialect.generateDistinctValuesQuery('"orders"', '"status"', 5)).toContain(
|
||||
'SELECT DISTINCT CAST("status" AS TEXT) AS val',
|
||||
);
|
||||
});
|
||||
});
|
||||
177
packages/cli/src/connectors/sqlite/dialect.ts
Normal file
177
packages/cli/src/connectors/sqlite/dialect.ts
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
import type { KtxSchemaDimensionType, KtxTableRef } from '../../context/scan/types.js';
|
||||
|
||||
type SqliteTableNameRef = Pick<KtxTableRef, 'name'> & Partial<Pick<KtxTableRef, 'catalog' | 'db'>>;
|
||||
|
||||
export class KtxSqliteDialect {
|
||||
readonly type = 'sqlite';
|
||||
|
||||
private readonly typeMappings: Record<string, KtxSchemaDimensionType> = {
|
||||
DATETIME: 'time',
|
||||
DATE: 'time',
|
||||
TIMESTAMP: 'time',
|
||||
TIME: 'time',
|
||||
INTEGER: 'number',
|
||||
INT: 'number',
|
||||
REAL: 'number',
|
||||
NUMERIC: 'number',
|
||||
FLOAT: 'number',
|
||||
DOUBLE: 'number',
|
||||
TEXT: 'string',
|
||||
VARCHAR: 'string',
|
||||
CHAR: 'string',
|
||||
BLOB: 'string',
|
||||
BOOLEAN: 'boolean',
|
||||
BOOL: 'boolean',
|
||||
};
|
||||
|
||||
quoteIdentifier(identifier: string): string {
|
||||
return `"${identifier.replace(/"/g, '""')}"`;
|
||||
}
|
||||
|
||||
formatTableName(table: SqliteTableNameRef): string {
|
||||
return this.quoteIdentifier(table.name);
|
||||
}
|
||||
|
||||
mapDataType(nativeType: string): string {
|
||||
return nativeType;
|
||||
}
|
||||
|
||||
mapToDimensionType(nativeType: string): KtxSchemaDimensionType {
|
||||
if (!nativeType) {
|
||||
return 'string';
|
||||
}
|
||||
let normalized = nativeType.toUpperCase().trim();
|
||||
if (normalized.includes('(')) {
|
||||
normalized = normalized.split('(')[0];
|
||||
}
|
||||
if (this.typeMappings[normalized]) {
|
||||
return this.typeMappings[normalized];
|
||||
}
|
||||
if (normalized.includes('TIME') || normalized.includes('DATE')) {
|
||||
return 'time';
|
||||
}
|
||||
if (
|
||||
normalized.includes('INT') ||
|
||||
normalized.includes('NUM') ||
|
||||
normalized.includes('REAL') ||
|
||||
normalized.includes('FLOAT') ||
|
||||
normalized.includes('DOUBLE')
|
||||
) {
|
||||
return 'number';
|
||||
}
|
||||
if (normalized.includes('BOOL')) {
|
||||
return 'boolean';
|
||||
}
|
||||
return 'string';
|
||||
}
|
||||
|
||||
generateSampleQuery(tableName: string, limit: number, columns?: string[]): string {
|
||||
const columnList =
|
||||
columns && columns.length > 0 ? columns.map((column) => this.quoteIdentifier(column)).join(', ') : '*';
|
||||
return `SELECT ${columnList} FROM ${tableName} LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
generateColumnSampleQuery(tableName: string, columnName: string, limit: number): string {
|
||||
const quoted = this.quoteIdentifier(columnName);
|
||||
return `SELECT ${quoted} FROM ${tableName} WHERE ${quoted} IS NOT NULL AND TRIM(CAST(${quoted} AS TEXT)) != '' LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
prepareQuery(sql: string, params?: Record<string, unknown>): { sql: string; params?: unknown } {
|
||||
return params ? { sql, params } : { sql };
|
||||
}
|
||||
|
||||
getRandomSampleFilter(samplePct: number): string {
|
||||
if (samplePct <= 0 || samplePct >= 1) {
|
||||
return '';
|
||||
}
|
||||
return `(RANDOM() % 100) < ${Math.round(samplePct * 100)}`;
|
||||
}
|
||||
|
||||
getTableSampleClause(_samplePct: number): string {
|
||||
return '';
|
||||
}
|
||||
|
||||
getLimitOffsetClause(limit: number, offset?: number): string {
|
||||
return offset !== undefined && offset > 0 ? `LIMIT ${limit} OFFSET ${offset}` : `LIMIT ${limit}`;
|
||||
}
|
||||
|
||||
getNullCountExpression(column: string): string {
|
||||
return `SUM(CASE WHEN ${column} IS NULL THEN 1 ELSE 0 END)`;
|
||||
}
|
||||
|
||||
getDistinctCountExpression(column: string): string {
|
||||
return `COUNT(DISTINCT ${column})`;
|
||||
}
|
||||
|
||||
generateCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
generateDistinctValuesQuery(tableName: string, columnName: string, limit: number): string {
|
||||
return `
|
||||
SELECT DISTINCT CAST(${columnName} AS TEXT) AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY val
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
}
|
||||
|
||||
generateColumnStatisticsQuery(_schemaName: string, _tableName: string): string | null {
|
||||
return null;
|
||||
}
|
||||
|
||||
generateRandomizedCardinalitySampleQuery(tableName: string, columnName: string, sampleSize: number): string {
|
||||
return `
|
||||
WITH sampled AS (
|
||||
SELECT ${columnName} AS val
|
||||
FROM ${tableName}
|
||||
WHERE ${columnName} IS NOT NULL
|
||||
ORDER BY RANDOM()
|
||||
LIMIT ${sampleSize}
|
||||
)
|
||||
SELECT COUNT(DISTINCT val) AS cardinality
|
||||
FROM sampled
|
||||
`;
|
||||
}
|
||||
|
||||
getTimeTruncExpression(
|
||||
column: string,
|
||||
granularity: 'day' | 'week' | 'month' | 'quarter' | 'year',
|
||||
_timezone?: string,
|
||||
): string {
|
||||
switch (granularity) {
|
||||
case 'day':
|
||||
return `DATE(${column})`;
|
||||
case 'week':
|
||||
return `DATE(${column}, 'weekday 0', '-6 days')`;
|
||||
case 'month':
|
||||
return `DATE(${column}, 'start of month')`;
|
||||
case 'quarter':
|
||||
return `DATE(${column}, 'start of month', '-' || ((CAST(STRFTIME('%m', ${column}) AS INTEGER) - 1) % 3) || ' months')`;
|
||||
case 'year':
|
||||
return `DATE(${column}, 'start of year')`;
|
||||
}
|
||||
}
|
||||
|
||||
getCustomTimeTruncExpression(column: string, interval: string, origin?: string, _timezone?: string): string {
|
||||
const [amount, unit] = interval.split(' ');
|
||||
const originExpr = origin ? `julianday('${origin}')` : `julianday('1970-01-01')`;
|
||||
const unitDays = unit === 'day' ? 1 : unit === 'week' ? 7 : 30;
|
||||
const intervalDays = Number(amount) * unitDays;
|
||||
return `DATE(julianday('1970-01-01') + (CAST((julianday(${column}) - ${originExpr}) / ${intervalDays} AS INTEGER) * ${intervalDays}))`;
|
||||
}
|
||||
|
||||
parseIntervalToSql(interval: string): string {
|
||||
return `'${interval}'`;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
import type { LiveDatabaseIntrospectionPort } from '../../context/ingest/adapters/live-database/types.js';
|
||||
import type { KtxProjectConnectionConfig } from '../../context/project/config.js';
|
||||
import { KtxSqliteScanConnector, type KtxSqliteConnectionConfig } from './connector.js';
|
||||
|
||||
export interface CreateSqliteLiveDatabaseIntrospectionOptions {
|
||||
projectDir?: string;
|
||||
connections: Record<string, KtxProjectConnectionConfig>;
|
||||
now?: () => Date;
|
||||
}
|
||||
|
||||
export function createSqliteLiveDatabaseIntrospection(
|
||||
options: CreateSqliteLiveDatabaseIntrospectionOptions,
|
||||
): LiveDatabaseIntrospectionPort {
|
||||
return {
|
||||
async extractSchema(connectionId: string) {
|
||||
const connection = options.connections[connectionId] as KtxSqliteConnectionConfig | undefined;
|
||||
const connector = new KtxSqliteScanConnector({
|
||||
connectionId,
|
||||
connection,
|
||||
projectDir: options.projectDir,
|
||||
now: options.now,
|
||||
});
|
||||
try {
|
||||
return await connector.introspect({ connectionId, driver: 'sqlite' }, { runId: `sqlite-${connectionId}` });
|
||||
} finally {
|
||||
await connector.cleanup();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue