feat: wire duckdb through cli runtime

This commit is contained in:
Andrey Avtomonov 2026-05-18 15:25:33 +02:00
parent 22ad9dca99
commit 460c6fae63
17 changed files with 308 additions and 8 deletions

View file

@ -39,6 +39,7 @@
"@commander-js/extra-typings": "14.0.0",
"@ktx/connector-bigquery": "workspace:*",
"@ktx/connector-clickhouse": "workspace:*",
"@ktx/connector-duckdb": "workspace:*",
"@ktx/connector-mysql": "workspace:*",
"@ktx/connector-postgres": "workspace:*",
"@ktx/connector-snowflake": "workspace:*",

View file

@ -38,6 +38,7 @@ function llmBackend(value: string): KtxSetupLlmBackend {
function databaseDriver(value: string): KtxSetupDatabaseDriver {
if (
value === 'sqlite' ||
value === 'duckdb' ||
value === 'postgres' ||
value === 'mysql' ||
value === 'clickhouse' ||

View file

@ -491,7 +491,7 @@ describe('runKtxConnection', () => {
await initKtxProject({ projectDir });
await writeFile(
join(projectDir, 'ktx.yaml'),
'connections:\n mystery:\n driver: duckdb\n',
'connections:\n mystery:\n driver: nope\n',
'utf-8',
);
const io = makeIo();

View file

@ -41,6 +41,7 @@ export interface KtxConnectionDeps {
const SUPPORTED_TEST_DRIVERS = [
'sqlite',
'duckdb',
'postgres',
'mysql',
'clickhouse',
@ -276,6 +277,7 @@ async function testConnectionByDriver(
if (
driver === 'sqlite' ||
driver === 'sqlite3' ||
driver === 'duckdb' ||
driver === 'postgres' ||
driver === 'postgresql' ||
driver === 'mysql' ||

View file

@ -4,6 +4,7 @@ export type KtxDatabaseContextDepth = 'fast' | 'deep';
const KTX_DATABASE_DRIVER_IDS = new Set([
'sqlite',
'duckdb',
'postgres',
'postgresql',
'mysql',

View file

@ -2,9 +2,21 @@ import { mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { loadKtxProject } from '@ktx/context/project';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { createKtxCliLocalIngestAdapters } from './local-adapters.js';
const duckDbMock = vi.hoisted(() => ({
extractSchema: vi.fn(),
}));
vi.mock('@ktx/connector-duckdb', () => ({
isKtxDuckDbConnectionConfig: (connection: { driver?: unknown } | undefined) =>
String(connection?.driver ?? '').toLowerCase() === 'duckdb',
createDuckDbLiveDatabaseIntrospection: () => ({
extractSchema: duckDbMock.extractSchema,
}),
}));
function sqlAnalysisStub() {
return {
async analyzeForFingerprint(sql: string) {
@ -33,6 +45,7 @@ describe('CLI local ingest adapters', () => {
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-local-adapters-'));
duckDbMock.extractSchema.mockReset();
});
afterEach(async () => {
@ -69,6 +82,40 @@ describe('CLI local ingest adapters', () => {
]);
});
it('routes DuckDB live database introspection through the native connector', async () => {
duckDbMock.extractSchema.mockResolvedValue({
connectionId: 'warehouse',
driver: 'duckdb',
extractedAt: '2026-05-18T12:00:00.000Z',
scope: {},
metadata: {},
tables: [],
});
await writeProject(
tempDir,
[
'connections:',
' warehouse:',
' driver: duckdb',
' path: warehouse.duckdb',
'ingest:',
' adapters:',
' - live-database',
'',
].join('\n'),
);
const project = await loadKtxProject({ projectDir: tempDir });
const adapters = createKtxCliLocalIngestAdapters(project);
const liveDatabase = adapters.find((adapter) => adapter.source === 'live-database');
if (!liveDatabase?.fetch) {
throw new Error('Expected live-database adapter');
}
await liveDatabase.fetch({}, join(tempDir, 'staged'), { connectionId: 'warehouse', sourceKey: 'live-database' });
expect(duckDbMock.extractSchema).toHaveBeenCalledWith('warehouse');
});
it('registers Postgres historic SQL from connection context query history', async () => {
await writeProject(
tempDir,

View file

@ -5,6 +5,7 @@ import {
type KtxBigQueryConnectionConfig,
} from '@ktx/connector-bigquery';
import { createClickHouseLiveDatabaseIntrospection, isKtxClickHouseConnectionConfig } from '@ktx/connector-clickhouse';
import { createDuckDbLiveDatabaseIntrospection, isKtxDuckDbConnectionConfig } from '@ktx/connector-duckdb';
import { createMysqlLiveDatabaseIntrospection, isKtxMysqlConnectionConfig } from '@ktx/connector-mysql';
import {
createPostgresLiveDatabaseIntrospection,
@ -104,6 +105,10 @@ function createKtxCliLiveDatabaseIntrospection(
projectDir: project.projectDir,
connections: project.config.connections,
});
const duckdb = createDuckDbLiveDatabaseIntrospection({
projectDir: project.projectDir,
connections: project.config.connections,
});
const mysql = createMysqlLiveDatabaseIntrospection({
connections: project.config.connections,
});
@ -128,6 +133,9 @@ function createKtxCliLiveDatabaseIntrospection(
if (isKtxSqliteConnectionConfig(connection)) {
return sqlite.extractSchema(connectionId);
}
if (isKtxDuckDbConnectionConfig(connection)) {
return duckdb.extractSchema(connectionId);
}
if (isKtxMysqlConnectionConfig(connection)) {
return mysql.extractSchema(connectionId);
}

View file

@ -92,7 +92,7 @@ describe('createKtxCliScanConnector', () => {
expect(bigQueryMock.constructorInputs[0]).not.toHaveProperty('maxBytesBilled');
});
it('rejects daemon-only fallback driver configs at config parse time', async () => {
it('creates a native duckdb connector from standalone config', async () => {
await initKtxProject({ projectDir: tempDir });
await writeFile(
join(tempDir, 'ktx.yaml'),
@ -105,10 +105,12 @@ describe('createKtxCliScanConnector', () => {
].join('\n'),
'utf-8',
);
const project = await loadKtxProject({ projectDir: tempDir });
await expect(loadKtxProject({ projectDir: tempDir })).rejects.toThrow(
/connections\.warehouse\.driver:.*Invalid discriminator value/,
);
const connector = await createKtxCliScanConnector(project, 'warehouse');
expect(connector.id).toBe('duckdb:warehouse');
expect(connector.driver).toBe('duckdb');
});
it('rejects connection blocks with no driver field at config parse time', async () => {

View file

@ -1,7 +1,7 @@
import type { KtxLocalProject } from '@ktx/context/project';
import type { KtxScanConnector } from '@ktx/context/scan';
const SUPPORTED_DRIVERS = 'sqlite, postgres, mysql, clickhouse, sqlserver, bigquery, snowflake';
const SUPPORTED_DRIVERS = 'sqlite, duckdb, postgres, mysql, clickhouse, sqlserver, bigquery, snowflake';
export async function createKtxCliScanConnector(
project: KtxLocalProject,
@ -23,6 +23,12 @@ export async function createKtxCliScanConnector(
return new KtxSqliteScanConnector({ connectionId, connection, projectDir: project.projectDir });
}
}
if (driver === 'duckdb') {
const { KtxDuckDbScanConnector, isKtxDuckDbConnectionConfig } = await import('@ktx/connector-duckdb');
if (isKtxDuckDbConnectionConfig(connection)) {
return new KtxDuckDbScanConnector({ connectionId, connection, projectDir: project.projectDir });
}
}
if (driver === 'postgres' || driver === 'postgresql') {
const { KtxPostgresScanConnector, isKtxPostgresConnectionConfig } = await import('@ktx/connector-postgres');
if (isKtxPostgresConnectionConfig(connection)) {

View file

@ -142,6 +142,7 @@ describe('setup databases step', () => {
'Use Up/Down to move, Space to select or unselect, Enter to confirm, Escape to go back, or Ctrl+C to exit.',
options: [
{ value: 'sqlite', label: 'SQLite' },
{ value: 'duckdb', label: 'DuckDB' },
{ value: 'postgres', label: 'PostgreSQL' },
{ value: 'mysql', label: 'MySQL' },
{ value: 'clickhouse', label: 'ClickHouse' },
@ -370,6 +371,20 @@ describe('setup databases step', () => {
},
],
},
{
driver: 'duckdb',
textValues: ['', './warehouse.duckdb'],
expectedTextPrompts: [
{
message: connectionNamePrompt('DuckDB'),
placeholder: 'duckdb-local',
initialValue: 'duckdb-local',
},
{
message: 'DuckDB database file\nEnter a relative or absolute path, for example ./warehouse.duckdb.',
},
],
},
{
driver: 'postgres',
selectValues: ['url'],
@ -1632,6 +1647,42 @@ describe('setup databases step', () => {
expect((await readKtxSetupState(tempDir)).completed_steps).toContain('databases');
});
it('adds one non-interactive DuckDB connection from --database-url without prompting', async () => {
const io = makeIo();
const prompts = makePromptAdapter({});
const testConnection = vi.fn(async () => 0);
const scanConnection = vi.fn(async () => 0);
const result = await runKtxSetupDatabasesStep(
{
projectDir: tempDir,
inputMode: 'disabled',
databaseDrivers: ['duckdb'],
databaseConnectionId: 'warehouse',
databaseUrl: './warehouse.duckdb',
databaseSchemas: [],
skipDatabases: false,
},
io.io,
{ prompts, testConnection, scanConnection },
);
expect(result.status).toBe('ready');
expect(prompts.text).not.toHaveBeenCalled();
expect(testConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything());
expect(scanConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything());
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
expect(config.connections.warehouse).toEqual({
driver: 'duckdb',
path: './warehouse.duckdb',
context: { depth: 'fast' },
});
expect(config.setup).toEqual({
database_connection_ids: ['warehouse'],
});
expect((await readKtxSetupState(tempDir)).completed_steps).toContain('databases');
});
it('selects multiple existing connections and validates each before recording setup ids', async () => {
await writeFile(
join(tempDir, 'ktx.yaml'),
@ -2057,6 +2108,29 @@ describe('setup databases step', () => {
expect(config.ingest.adapters).toEqual([]);
});
it('rejects query history for DuckDB setup', async () => {
const io = makeIo();
await expect(
runKtxSetupDatabasesStep(
{
projectDir: tempDir,
inputMode: 'disabled',
databaseDrivers: ['duckdb'],
databaseConnectionId: 'warehouse',
databaseUrl: './warehouse.duckdb',
databaseSchemas: [],
enableQueryHistory: true,
skipDatabases: false,
},
io.io,
{
testConnection: vi.fn(async () => 0),
scanConnection: vi.fn(async () => 0),
},
),
).rejects.toThrow('Query history setup is only supported for Snowflake, BigQuery, and Postgres, not DuckDB.');
});
it('enables query history on an existing Postgres connection', async () => {
await writeFile(
join(tempDir, 'ktx.yaml'),

View file

@ -33,6 +33,7 @@ const execFileAsync = promisify(execFileCallback);
export type KtxSetupDatabaseDriver =
| 'sqlite'
| 'duckdb'
| 'postgres'
| 'mysql'
| 'clickhouse'
@ -103,6 +104,7 @@ export interface KtxSetupDatabasesDeps {
const DRIVER_OPTIONS: Array<{ value: KtxSetupDatabaseDriver; label: string }> = [
{ value: 'sqlite', label: 'SQLite' },
{ value: 'duckdb', label: 'DuckDB' },
{ value: 'postgres', label: 'PostgreSQL' },
{ value: 'mysql', label: 'MySQL' },
{ value: 'clickhouse', label: 'ClickHouse' },
@ -124,6 +126,7 @@ const HISTORIC_SQL_DIALECT_BY_DRIVER: Partial<Record<KtxSetupDatabaseDriver, His
const DEFAULT_CONNECTION_IDS: Record<KtxSetupDatabaseDriver, string> = {
sqlite: 'sqlite-local',
duckdb: 'duckdb-local',
postgres: 'postgres-warehouse',
mysql: 'mysql-warehouse',
clickhouse: 'clickhouse-warehouse',
@ -811,6 +814,18 @@ async function buildConnectionConfig(input: {
if (path === undefined) return 'back';
return path ? { driver: 'sqlite', path } : null;
}
if (driver === 'duckdb') {
if (args.inputMode === 'disabled' && !args.databaseUrl) return null;
const path =
args.databaseUrl ??
(await promptText(
prompts,
'DuckDB database file\nEnter a relative or absolute path, for example ./warehouse.duckdb.',
stringConfigField(input.existingConnection, 'path'),
));
if (path === undefined) return 'back';
return path ? { driver: 'duckdb', path } : null;
}
if (driver === 'postgres' || driver === 'mysql' || driver === 'clickhouse' || driver === 'sqlserver') {
return await buildUrlConnectionConfig({
driver,

View file

@ -475,6 +475,80 @@ joins: []
expect(stderr.write).not.toHaveBeenCalled();
});
it('injects a duckdb-capable executor for sl query --execute', async () => {
const projectDir = join(tempDir, 'project');
const project = await initKtxProject({ projectDir });
project.config.connections.warehouse = { driver: 'duckdb', path: 'warehouse.duckdb' };
await project.fileStore.writeFile(
'semantic-layer/warehouse/orders.yaml',
`name: orders
table: main.orders
grain: [id]
columns:
- name: id
type: number
- name: amount
type: number
measures:
- name: amount_sum
expr: sum(amount)
joins: []
`,
'ktx',
'ktx@example.com',
'Add orders source',
);
const queryExecutor = {
execute: vi.fn(async () => ({
headers: ['total'],
rows: [[42]],
totalRows: 1,
command: 'SELECT',
rowCount: 1,
})),
};
const compute = {
query: vi.fn(async () => ({
dialect: 'duckdb',
sql: 'select 42 as total',
columns: [{ name: 'total' }],
rows: [],
totalRows: 0,
plan: {},
})),
validateSources: vi.fn(),
generateSources: vi.fn(),
};
await expect(
runKtxSl(
{
command: 'query',
projectDir,
connectionId: 'warehouse',
query: { measures: ['sum(orders.amount)'], dimensions: [] },
format: 'json',
execute: true,
cliVersion: '0.0.0-test',
runtimeInstallPolicy: 'never',
},
makeIo().io,
{
loadProject: async () => project,
createSemanticLayerCompute: () => compute,
createQueryExecutor: () => queryExecutor,
},
),
).resolves.toBe(0);
expect(queryExecutor.execute).toHaveBeenCalledWith(
expect.objectContaining({
connectionId: 'warehouse',
connection: expect.objectContaining({ driver: 'duckdb' }),
sql: 'select 42 as total',
}),
);
});
it('executes sl query against a local SQLite connection through the default executor', async () => {
const projectDir = join(tempDir, 'project');
const project = await initKtxProject({ projectDir });

View file

@ -1,4 +1,5 @@
import { readFile } from 'node:fs/promises';
import { createDuckDbQueryExecutor } from '@ktx/connector-duckdb';
import { createDefaultLocalQueryExecutor, type KtxSqlQueryExecutorPort } from '@ktx/context/connections';
import {
createLocalKtxEmbeddingProviderFromConfig,
@ -81,6 +82,10 @@ function slSearchEmbeddingService(project: KtxLocalProject, deps: KtxSlDeps): Kt
return provider ? new KtxIngestEmbeddingPortAdapter(provider) : null;
}
function createKtxCliSlQueryExecutor(): KtxSqlQueryExecutorPort {
return createDefaultLocalQueryExecutor({ duckdb: createDuckDbQueryExecutor() });
}
async function printSlSources(input: {
rows: ReadonlyArray<LocalSlSourceSummary>;
command: 'sl list';
@ -239,7 +244,7 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx
installPolicy: args.runtimeInstallPolicy,
io,
});
const queryExecutor = args.execute ? (deps.createQueryExecutor ?? createDefaultLocalQueryExecutor)() : undefined;
const queryExecutor = args.execute ? (deps.createQueryExecutor ?? createKtxCliSlQueryExecutor)() : undefined;
const result = await compileLocalSlQuery(project as KtxLocalProject, {
connectionId: args.connectionId,
query,

View file

@ -129,6 +129,46 @@ describe('runKtxSql', () => {
expect(io.stderr()).toBe('');
});
it('validates duckdb SQL with the duckdb analysis dialect', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, { warehouse: { driver: 'duckdb', path: 'warehouse.duckdb' } });
const sqlAnalysis = makeSqlAnalysis({ ok: true, error: null });
const connector = makeConnector({
id: 'duckdb:warehouse',
driver: 'duckdb',
executeReadOnly: vi.fn(async () => ({
headers: ['id'],
rows: [[1]],
totalRows: 1,
rowCount: 1,
})),
});
const createScanConnector = vi.fn(async () => connector);
const io = makeIo();
await expect(
runKtxSql(
{
command: 'execute',
projectDir,
connectionId: 'warehouse',
sql: 'select id from orders',
maxRows: 1000,
output: 'json',
json: false,
cliVersion: '0.0.0-test',
},
io.io,
{
createSqlAnalysis: () => sqlAnalysis,
createScanConnector,
},
),
).resolves.toBe(0);
expect(sqlAnalysis.validateReadOnly).toHaveBeenCalledWith('select id from orders', 'duckdb');
});
it('prints JSON output', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });

View file

@ -47,6 +47,7 @@ function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDia
mssql: 'tsql',
sqlite: 'sqlite',
sqlite3: 'sqlite',
duckdb: 'duckdb',
clickhouse: 'clickhouse',
redshift: 'redshift',
};

View file

@ -321,6 +321,11 @@ function buildConnectionStatus(
if (typeof path === 'string' && path.length > 0) return ok(`path: ${path}`);
return warn('path not set', 'Rerun `ktx setup`');
}
case 'duckdb': {
const path = (conn as Record<string, unknown>).path ?? (conn as Record<string, unknown>).url;
if (typeof path === 'string' && path.length > 0) return ok(`path: ${path}`);
return warn('path not set', 'Rerun `ktx setup`');
}
case 'notion': {
const tokenRef =
(conn as Record<string, unknown>).auth_token_ref ??

18
pnpm-lock.yaml generated
View file

@ -109,6 +109,9 @@ importers:
'@ktx/connector-clickhouse':
specifier: workspace:*
version: file:packages/connector-clickhouse(js-yaml@4.1.1)(ws@8.20.0)
'@ktx/connector-duckdb':
specifier: workspace:*
version: file:packages/connector-duckdb(js-yaml@4.1.1)(ws@8.20.0)
'@ktx/connector-mysql':
specifier: workspace:*
version: file:packages/connector-mysql(@types/node@24.12.2)(js-yaml@4.1.1)(ws@8.20.0)
@ -1476,6 +1479,10 @@ packages:
resolution: {directory: packages/connector-clickhouse, type: directory}
engines: {node: '>=22.0.0'}
'@ktx/connector-duckdb@file:packages/connector-duckdb':
resolution: {directory: packages/connector-duckdb, type: directory}
engines: {node: '>=22.0.0'}
'@ktx/connector-mysql@file:packages/connector-mysql':
resolution: {directory: packages/connector-mysql, type: directory}
engines: {node: '>=22.0.0'}
@ -7689,6 +7696,17 @@ snapshots:
- supports-color
- ws
'@ktx/connector-duckdb@file:packages/connector-duckdb(js-yaml@4.1.1)(ws@8.20.0)':
dependencies:
'@duckdb/node-api': 1.5.2-r.1
'@ktx/context': file:packages/context(js-yaml@4.1.1)(ws@8.20.0)
transitivePeerDependencies:
- '@cfworker/json-schema'
- js-yaml
- pg-native
- supports-color
- ws
'@ktx/connector-mysql@file:packages/connector-mysql(@types/node@24.12.2)(js-yaml@4.1.1)(ws@8.20.0)':
dependencies:
'@ktx/context': file:packages/context(js-yaml@4.1.1)(ws@8.20.0)