feat: add duckdb connector package foundation

This commit is contained in:
Andrey Avtomonov 2026-05-18 15:10:34 +02:00
parent 15ef2f949a
commit 1b92789c01
9 changed files with 514 additions and 0 deletions

View file

@ -0,0 +1,48 @@
{
"name": "@ktx/connector-duckdb",
"version": "0.0.0-private",
"description": "DuckDB connector package for KTX scan interfaces",
"private": true,
"type": "module",
"engines": {
"node": ">=22.0.0"
},
"main": "dist/index.js",
"types": "dist/index.d.ts",
"exports": {
".": {
"types": "./dist/index.d.ts",
"import": "./dist/index.js",
"default": "./dist/index.js"
},
"./package.json": "./package.json"
},
"files": [
"dist"
],
"scripts": {
"build": "tsc -p tsconfig.json",
"test": "vitest run",
"type-check": "tsc -p tsconfig.json --noEmit"
},
"dependencies": {
"@duckdb/node-api": "^1.5.2-r.1",
"@ktx/context": "workspace:*"
},
"devDependencies": {
"@types/node": "^25.7.0",
"@vitest/coverage-v8": "^4.1.6",
"typescript": "^6.0.3",
"vitest": "^4.1.6"
},
"license": "Apache-2.0",
"repository": {
"type": "git",
"url": "git+https://github.com/kaelio/ktx.git",
"directory": "packages/connector-duckdb"
},
"bugs": {
"url": "https://github.com/kaelio/ktx/issues"
},
"homepage": "https://github.com/kaelio/ktx#readme"
}

View file

@ -0,0 +1,101 @@
import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join, resolve } from 'node:path';
import { pathToFileURL } from 'node:url';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import {
duckDbDatabasePathFromConfig,
isKtxDuckDbConnectionConfig,
KtxDuckDbScanConnector,
} from './connector.js';
describe('DuckDB connection config and path resolution', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-duckdb-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
delete process.env.KTX_DUCKDB_FIXTURE;
});
it('recognizes duckdb configs', () => {
expect(isKtxDuckDbConnectionConfig({ driver: 'duckdb', path: 'warehouse.duckdb' })).toBe(true);
expect(isKtxDuckDbConnectionConfig({ driver: 'sqlite', path: 'warehouse.duckdb' })).toBe(false);
});
it('resolves project-relative path, env refs, file refs, and file URLs', async () => {
const dbPath = join(tempDir, 'warehouse.duckdb');
const pathRefFile = join(tempDir, 'warehouse-path.txt');
await writeFile(dbPath, '', 'utf-8');
await writeFile(pathRefFile, dbPath, 'utf-8');
process.env.KTX_DUCKDB_FIXTURE = dbPath;
expect(
duckDbDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'duckdb', path: 'warehouse.duckdb' },
}),
).toBe(resolve(tempDir, 'warehouse.duckdb'));
expect(
duckDbDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'duckdb', path: 'env:KTX_DUCKDB_FIXTURE' },
}),
).toBe(dbPath);
expect(
duckDbDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'duckdb', path: `file:${pathRefFile}` },
}),
).toBe(dbPath);
expect(
duckDbDatabasePathFromConfig({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'duckdb', url: pathToFileURL(dbPath).href },
}),
).toBe(dbPath);
});
it('rejects in-memory, missing, and directory targets before opening DuckDB', async () => {
await mkdir(join(tempDir, 'directory.duckdb'));
expect(() =>
new KtxDuckDbScanConnector({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'duckdb', path: ':memory:' },
}),
).toThrow('DuckDB in-memory connections are not supported');
const missing = join(tempDir, 'missing.duckdb');
const missingConnector = new KtxDuckDbScanConnector({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'duckdb', path: missing },
});
await expect(missingConnector.testConnection()).resolves.toEqual({
success: false,
error: `File not found: ${missing}`,
});
await expect(stat(missing)).rejects.toThrow();
const directory = join(tempDir, 'directory.duckdb');
const directoryConnector = new KtxDuckDbScanConnector({
connectionId: 'warehouse',
projectDir: tempDir,
connection: { driver: 'duckdb', path: directory },
});
await expect(directoryConnector.testConnection()).resolves.toEqual({
success: false,
error: `Expected a DuckDB database file, got directory: ${directory}`,
});
await expect(readFile(directory)).rejects.toThrow();
});
});

View file

@ -0,0 +1,147 @@
import { existsSync, readFileSync, statSync } from 'node:fs';
import { homedir } from 'node:os';
import { isAbsolute, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import {
createKtxConnectorCapabilities,
type KtxConnectionDriver,
type KtxScanConnector,
} from '@ktx/context/scan';
import { loadDuckDbNodeApi, type DuckDbNativeLoader } from './native.js';
export interface KtxDuckDbConnectionConfig {
driver?: string;
path?: string;
url?: string;
[key: string]: unknown;
}
export interface DuckDbDatabasePathInput {
connectionId: string;
projectDir?: string;
connection: KtxDuckDbConnectionConfig | undefined;
}
export interface KtxDuckDbScanConnectorOptions extends DuckDbDatabasePathInput {
now?: () => Date;
nativeLoader?: DuckDbNativeLoader;
}
function resolveTilde(path: string): string {
return path.startsWith('~') ? resolve(homedir(), path.slice(1)) : path;
}
function resolveStringReference(key: 'path' | 'url', value: string): string {
if (value === ':memory:') {
throw new Error('DuckDB in-memory connections are not supported');
}
if (value.startsWith('env:')) {
return process.env[value.slice('env:'.length)] ?? '';
}
if (key === 'path' && value.startsWith('file:')) {
return readFileSync(resolveTilde(value.slice('file:'.length)), 'utf-8').trim();
}
return value;
}
function duckDbPathFromUrl(url: string): string {
if (url === ':memory:') {
throw new Error('DuckDB in-memory connections are not supported');
}
if (url.startsWith('file:')) {
return fileURLToPath(url);
}
return url;
}
function stringConfigValue(
connection: KtxDuckDbConnectionConfig | undefined,
key: 'path' | 'url',
): string | undefined {
const value = connection?.[key];
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(key, value.trim()) : undefined;
}
export function isKtxDuckDbConnectionConfig(
connection: KtxDuckDbConnectionConfig | undefined,
): connection is KtxDuckDbConnectionConfig {
return String(connection?.driver ?? '').toLowerCase() === 'duckdb';
}
export function duckDbDatabasePathFromConfig(input: DuckDbDatabasePathInput): string {
const inputDriver = input.connection?.driver ?? 'unknown';
if (!isKtxDuckDbConnectionConfig(input.connection)) {
throw new Error(`Native DuckDB connector cannot run driver "${inputDriver}"`);
}
const configuredPath =
stringConfigValue(input.connection, 'path') ?? duckDbPathFromUrl(stringConfigValue(input.connection, 'url') ?? '');
if (!configuredPath) {
throw new Error(`connections.${input.connectionId}.path or url is required`);
}
if (configuredPath === ':memory:') {
throw new Error('DuckDB in-memory connections are not supported');
}
return isAbsolute(configuredPath) ? configuredPath : resolve(input.projectDir ?? process.cwd(), configuredPath);
}
export function assertDuckDbDatabaseFile(dbPath: string): void {
if (!existsSync(dbPath)) {
throw new Error(`File not found: ${dbPath}`);
}
const stats = statSync(dbPath);
if (stats.isDirectory()) {
throw new Error(`Expected a DuckDB database file, got directory: ${dbPath}`);
}
if (!stats.isFile()) {
throw new Error(`Expected a DuckDB database file, got non-file path: ${dbPath}`);
}
}
export class KtxDuckDbScanConnector implements KtxScanConnector {
readonly id: string;
readonly driver = 'duckdb' as KtxConnectionDriver;
readonly capabilities = createKtxConnectorCapabilities({
tableSampling: true,
columnSampling: true,
columnStats: false,
readOnlySql: true,
nestedAnalysis: false,
formalForeignKeys: true,
estimatedRowCounts: true,
});
private readonly connectionId: string;
private readonly dbPath: string;
private readonly nativeLoader: DuckDbNativeLoader;
constructor(options: KtxDuckDbScanConnectorOptions) {
this.connectionId = options.connectionId;
this.dbPath = duckDbDatabasePathFromConfig(options);
this.nativeLoader = options.nativeLoader ?? { load: loadDuckDbNodeApi };
this.id = `duckdb:${options.connectionId}`;
}
async testConnection(): Promise<{ success: boolean; error?: string }> {
try {
assertDuckDbDatabaseFile(this.dbPath);
const { DuckDBInstance } = await this.nativeLoader.load();
const instance = await DuckDBInstance.create(this.dbPath, { access_mode: 'READ_ONLY' });
const connection = await instance.connect();
try {
await connection.runAndReadAll('SELECT 1');
return { success: true };
} finally {
connection.disconnectSync();
instance.closeSync();
}
} catch (error) {
return { success: false, error: error instanceof Error ? error.message : String(error) };
}
}
async introspect(): Promise<never> {
throw new Error('DuckDB schema introspection is implemented in Task 2.');
}
async cleanup(): Promise<void> {}
}

View file

@ -0,0 +1,17 @@
export {
assertDuckDbDatabaseFile,
duckDbDatabasePathFromConfig,
isKtxDuckDbConnectionConfig,
KtxDuckDbScanConnector,
type DuckDbDatabasePathInput,
type KtxDuckDbConnectionConfig,
type KtxDuckDbScanConnectorOptions,
} from './connector.js';
export {
assertSupportedDuckDbPlatform,
currentDuckDbPlatform,
detectDuckDbLibc,
formatDuckDbNativeLoadError,
type DuckDbLibc,
type DuckDbPlatformInfo,
} from './platform.js';

View file

@ -0,0 +1,16 @@
import { assertSupportedDuckDbPlatform, formatDuckDbNativeLoadError } from './platform.js';
export type DuckDbNodeApi = typeof import('@duckdb/node-api');
export interface DuckDbNativeLoader {
load(): Promise<DuckDbNodeApi>;
}
export async function loadDuckDbNodeApi(): Promise<DuckDbNodeApi> {
assertSupportedDuckDbPlatform();
try {
return await import('@duckdb/node-api');
} catch (error) {
throw formatDuckDbNativeLoadError(error);
}
}

View file

@ -0,0 +1,25 @@
import { describe, expect, it } from 'vitest';
import { assertSupportedDuckDbPlatform, formatDuckDbNativeLoadError } from './platform.js';
describe('DuckDB native platform guard', () => {
it('rejects Linux musl before native loading', () => {
expect(() =>
assertSupportedDuckDbPlatform({ platform: 'linux', arch: 'x64', libc: 'musl' }),
).toThrow('DuckDB native bindings are not supported on linux x64 musl');
});
it('accepts macOS arm64', () => {
expect(() =>
assertSupportedDuckDbPlatform({ platform: 'darwin', arch: 'arm64', libc: 'unknown' }),
).not.toThrow();
});
it('formats missing optional binary errors with platform details', () => {
const error = formatDuckDbNativeLoadError(
new Error("Cannot find module '@duckdb/node-bindings-darwin-arm64'"),
{ platform: 'darwin', arch: 'arm64', libc: 'unknown' },
);
expect(error.message).toContain('@duckdb/node-api native bindings could not be loaded');
expect(error.message).toContain('darwin arm64');
});
});

View file

@ -0,0 +1,62 @@
import { existsSync, readFileSync } from 'node:fs';
import { join } from 'node:path';
export type DuckDbLibc = 'glibc' | 'musl' | 'unknown';
export interface DuckDbPlatformInfo {
platform: NodeJS.Platform;
arch: NodeJS.Architecture;
libc: DuckDbLibc;
}
export function detectDuckDbLibc(): DuckDbLibc {
const report = process.report?.getReport?.();
const header = (report as { header?: { glibcVersionRuntime?: string } } | undefined)?.header;
if (header?.glibcVersionRuntime) return 'glibc';
if (process.platform === 'linux') {
const muslLoaderHints = [
'/lib/ld-musl-x86_64.so.1',
'/lib/ld-musl-aarch64.so.1',
join('/usr', 'bin', 'ldd'),
];
if (
muslLoaderHints.some((path) => {
if (!existsSync(path)) return false;
if (path.includes('musl')) return true;
try {
return readFileSync(path, 'utf-8').includes('musl');
} catch {
return false;
}
})
) {
return 'musl';
}
}
return 'unknown';
}
export function currentDuckDbPlatform(): DuckDbPlatformInfo {
return { platform: process.platform, arch: process.arch, libc: detectDuckDbLibc() };
}
export function assertSupportedDuckDbPlatform(info: DuckDbPlatformInfo = currentDuckDbPlatform()): void {
const supported =
(info.platform === 'darwin' && (info.arch === 'arm64' || info.arch === 'x64')) ||
(info.platform === 'win32' && (info.arch === 'arm64' || info.arch === 'x64')) ||
(info.platform === 'linux' && (info.arch === 'arm64' || info.arch === 'x64') && info.libc !== 'musl');
if (!supported) {
throw new Error(
`DuckDB native bindings are not supported on ${info.platform} ${info.arch} ${info.libc}. ` +
'KTX DuckDB v1 supports macOS arm64/x64, Windows arm64/x64, and Linux glibc arm64/x64.',
);
}
}
export function formatDuckDbNativeLoadError(error: unknown, info = currentDuckDbPlatform()): Error {
const detail = error instanceof Error ? error.message : String(error);
return new Error(
`@duckdb/node-api native bindings could not be loaded for ${info.platform} ${info.arch} ${info.libc}. ` +
`Install optional dependencies for @duckdb/node-api or use a supported platform. ${detail}`,
);
}

View file

@ -0,0 +1,9 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"outDir": "./dist",
"rootDir": "./src"
},
"include": ["src/**/*.ts"],
"exclude": ["dist", "node_modules"]
}

89
pnpm-lock.yaml generated
View file

@ -218,6 +218,28 @@ importers:
specifier: ^4.1.6
version: 4.1.6(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(@vitest/coverage-v8@4.1.6)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.28.0)(jiti@2.7.0)(yaml@2.9.0))
packages/connector-duckdb:
dependencies:
'@duckdb/node-api':
specifier: ^1.5.2-r.1
version: 1.5.2-r.1
'@ktx/context':
specifier: workspace:*
version: file:packages/context(js-yaml@4.1.1)
devDependencies:
'@types/node':
specifier: ^24.3.0
version: 24.12.2
'@vitest/coverage-v8':
specifier: ^4.1.6
version: 4.1.6(vitest@4.1.6)
typescript:
specifier: ^6.0.3
version: 6.0.3
vitest:
specifier: ^4.1.6
version: 4.1.6(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(@vitest/coverage-v8@4.1.6)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.28.0)(jiti@2.7.0)(yaml@2.9.0))
packages/connector-mysql:
dependencies:
'@ktx/context':
@ -1004,6 +1026,42 @@ packages:
'@dabh/diagnostics@2.0.8':
resolution: {integrity: sha512-R4MSXTVnuMzGD7bzHdW2ZhhdPC/igELENcq5IjEverBvq5hn1SXCWcsi6eSsdWP0/Ur+SItRRjAktmdoX/8R/Q==}
'@duckdb/node-api@1.5.2-r.1':
resolution: {integrity: sha512-OzBBnS0JGXMoS5mzKNY/Ylr7SshcRQiLFIoxQ4AlePwJ2fNeDL/fbHu/knjxUrXwW1fJBTUgwWftmxDdnZZb3A==}
'@duckdb/node-bindings-darwin-arm64@1.5.2-r.1':
resolution: {integrity: sha512-v35FyKOb8EJCvaiPF7k0gvKiJTXR7PPQDNoWR0Gu+YSX5O9b+DIguzt1348Of3HebHy6ATSMzlUekaVA9YXu+g==}
cpu: [arm64]
os: [darwin]
'@duckdb/node-bindings-darwin-x64@1.5.2-r.1':
resolution: {integrity: sha512-SU9dIJ1BluKkkGxi4UsP4keqkkstB2YDySF9KcYu3EZKIVM3FTv2zc7XO38dXnHOq6+F3WqhWWZvD+XU945p7A==}
cpu: [x64]
os: [darwin]
'@duckdb/node-bindings-linux-arm64@1.5.2-r.1':
resolution: {integrity: sha512-3Tra9xM3aM3denaER4KhJ6//6PpmPbik9ECBQ+sh9PyKaEgHw/0kAcKnLm5EzWUnXF0qYmZlewvkCrse8KmOYw==}
cpu: [arm64]
os: [linux]
'@duckdb/node-bindings-linux-x64@1.5.2-r.1':
resolution: {integrity: sha512-pcQvZRHiIfJ9cq8parkSQczQHEml/IeGfnDCMAbEgD6+jaV9Y9Y5Ph1kP9aR+bm6him1S5ZIEr3kZbihjKnWbA==}
cpu: [x64]
os: [linux]
'@duckdb/node-bindings-win32-arm64@1.5.2-r.1':
resolution: {integrity: sha512-Ji8tym+N3LkrhVt0Up3bsacD/kpg4/JXFJQqxswiYvBaNCQOk+D+aiVS0GN5pcqvmnG7V7TpsDRzkLEFaWp1vw==}
cpu: [arm64]
os: [win32]
'@duckdb/node-bindings-win32-x64@1.5.2-r.1':
resolution: {integrity: sha512-5XqcqC+4R8ghBEEbnc2a0sqfz1zyPBRb9YcmIWfiuDoCYSYFbKhmHcEyNftZDHcwCoLOHXnUin45jraex4STqQ==}
cpu: [x64]
os: [win32]
'@duckdb/node-bindings@1.5.2-r.1':
resolution: {integrity: sha512-bUg3bLVj70YVku6fKyQJS8ASORl7kM7YFVFznsEB9pWbtazPj+ME2x2FUk0WiTzjJdutjzSSGXF066mB4bGGZA==}
'@electric-sql/pglite-socket@0.1.5':
resolution: {integrity: sha512-/RAye+3EPKfO9nY4tljzxXmkT7yIpFDm0L3F+c28b+Z6uxPOjy/Zz/QEHYHXcrfuUC88/a9S72EO0+3E0j97wQ==}
hasBin: true
@ -7293,6 +7351,37 @@ snapshots:
enabled: 2.0.0
kuler: 2.0.0
'@duckdb/node-api@1.5.2-r.1':
dependencies:
'@duckdb/node-bindings': 1.5.2-r.1
'@duckdb/node-bindings-darwin-arm64@1.5.2-r.1':
optional: true
'@duckdb/node-bindings-darwin-x64@1.5.2-r.1':
optional: true
'@duckdb/node-bindings-linux-arm64@1.5.2-r.1':
optional: true
'@duckdb/node-bindings-linux-x64@1.5.2-r.1':
optional: true
'@duckdb/node-bindings-win32-arm64@1.5.2-r.1':
optional: true
'@duckdb/node-bindings-win32-x64@1.5.2-r.1':
optional: true
'@duckdb/node-bindings@1.5.2-r.1':
optionalDependencies:
'@duckdb/node-bindings-darwin-arm64': 1.5.2-r.1
'@duckdb/node-bindings-darwin-x64': 1.5.2-r.1
'@duckdb/node-bindings-linux-arm64': 1.5.2-r.1
'@duckdb/node-bindings-linux-x64': 1.5.2-r.1
'@duckdb/node-bindings-win32-arm64': 1.5.2-r.1
'@duckdb/node-bindings-win32-x64': 1.5.2-r.1
'@electric-sql/pglite-socket@0.1.5(@electric-sql/pglite@0.4.5)':
dependencies:
'@electric-sql/pglite': 0.4.5