diff --git a/packages/connector-duckdb/package.json b/packages/connector-duckdb/package.json new file mode 100644 index 00000000..0fbad91f --- /dev/null +++ b/packages/connector-duckdb/package.json @@ -0,0 +1,48 @@ +{ + "name": "@ktx/connector-duckdb", + "version": "0.0.0-private", + "description": "DuckDB connector package for KTX scan interfaces", + "private": true, + "type": "module", + "engines": { + "node": ">=22.0.0" + }, + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js", + "default": "./dist/index.js" + }, + "./package.json": "./package.json" + }, + "files": [ + "dist" + ], + "scripts": { + "build": "tsc -p tsconfig.json", + "test": "vitest run", + "type-check": "tsc -p tsconfig.json --noEmit" + }, + "dependencies": { + "@duckdb/node-api": "^1.5.2-r.1", + "@ktx/context": "workspace:*" + }, + "devDependencies": { + "@types/node": "^25.7.0", + "@vitest/coverage-v8": "^4.1.6", + "typescript": "^6.0.3", + "vitest": "^4.1.6" + }, + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/kaelio/ktx.git", + "directory": "packages/connector-duckdb" + }, + "bugs": { + "url": "https://github.com/kaelio/ktx/issues" + }, + "homepage": "https://github.com/kaelio/ktx#readme" +} diff --git a/packages/connector-duckdb/src/connector.test.ts b/packages/connector-duckdb/src/connector.test.ts new file mode 100644 index 00000000..25ba7739 --- /dev/null +++ b/packages/connector-duckdb/src/connector.test.ts @@ -0,0 +1,101 @@ +import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join, resolve } from 'node:path'; +import { pathToFileURL } from 'node:url'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { + duckDbDatabasePathFromConfig, + isKtxDuckDbConnectionConfig, + KtxDuckDbScanConnector, +} from './connector.js'; + +describe('DuckDB connection config and path resolution', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'ktx-duckdb-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + delete process.env.KTX_DUCKDB_FIXTURE; + }); + + it('recognizes duckdb configs', () => { + expect(isKtxDuckDbConnectionConfig({ driver: 'duckdb', path: 'warehouse.duckdb' })).toBe(true); + expect(isKtxDuckDbConnectionConfig({ driver: 'sqlite', path: 'warehouse.duckdb' })).toBe(false); + }); + + it('resolves project-relative path, env refs, file refs, and file URLs', async () => { + const dbPath = join(tempDir, 'warehouse.duckdb'); + const pathRefFile = join(tempDir, 'warehouse-path.txt'); + await writeFile(dbPath, '', 'utf-8'); + await writeFile(pathRefFile, dbPath, 'utf-8'); + process.env.KTX_DUCKDB_FIXTURE = dbPath; + + expect( + duckDbDatabasePathFromConfig({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'duckdb', path: 'warehouse.duckdb' }, + }), + ).toBe(resolve(tempDir, 'warehouse.duckdb')); + expect( + duckDbDatabasePathFromConfig({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'duckdb', path: 'env:KTX_DUCKDB_FIXTURE' }, + }), + ).toBe(dbPath); + expect( + duckDbDatabasePathFromConfig({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'duckdb', path: `file:${pathRefFile}` }, + }), + ).toBe(dbPath); + expect( + duckDbDatabasePathFromConfig({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'duckdb', url: pathToFileURL(dbPath).href }, + }), + ).toBe(dbPath); + }); + + it('rejects in-memory, missing, and directory targets before opening DuckDB', async () => { + await mkdir(join(tempDir, 'directory.duckdb')); + expect(() => + new KtxDuckDbScanConnector({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'duckdb', path: ':memory:' }, + }), + ).toThrow('DuckDB in-memory connections are not supported'); + + const missing = join(tempDir, 'missing.duckdb'); + const missingConnector = new KtxDuckDbScanConnector({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'duckdb', path: missing }, + }); + await expect(missingConnector.testConnection()).resolves.toEqual({ + success: false, + error: `File not found: ${missing}`, + }); + await expect(stat(missing)).rejects.toThrow(); + + const directory = join(tempDir, 'directory.duckdb'); + const directoryConnector = new KtxDuckDbScanConnector({ + connectionId: 'warehouse', + projectDir: tempDir, + connection: { driver: 'duckdb', path: directory }, + }); + await expect(directoryConnector.testConnection()).resolves.toEqual({ + success: false, + error: `Expected a DuckDB database file, got directory: ${directory}`, + }); + + await expect(readFile(directory)).rejects.toThrow(); + }); +}); diff --git a/packages/connector-duckdb/src/connector.ts b/packages/connector-duckdb/src/connector.ts new file mode 100644 index 00000000..9f4aaf9e --- /dev/null +++ b/packages/connector-duckdb/src/connector.ts @@ -0,0 +1,147 @@ +import { existsSync, readFileSync, statSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { isAbsolute, resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { + createKtxConnectorCapabilities, + type KtxConnectionDriver, + type KtxScanConnector, +} from '@ktx/context/scan'; +import { loadDuckDbNodeApi, type DuckDbNativeLoader } from './native.js'; + +export interface KtxDuckDbConnectionConfig { + driver?: string; + path?: string; + url?: string; + [key: string]: unknown; +} + +export interface DuckDbDatabasePathInput { + connectionId: string; + projectDir?: string; + connection: KtxDuckDbConnectionConfig | undefined; +} + +export interface KtxDuckDbScanConnectorOptions extends DuckDbDatabasePathInput { + now?: () => Date; + nativeLoader?: DuckDbNativeLoader; +} + +function resolveTilde(path: string): string { + return path.startsWith('~') ? resolve(homedir(), path.slice(1)) : path; +} + +function resolveStringReference(key: 'path' | 'url', value: string): string { + if (value === ':memory:') { + throw new Error('DuckDB in-memory connections are not supported'); + } + if (value.startsWith('env:')) { + return process.env[value.slice('env:'.length)] ?? ''; + } + if (key === 'path' && value.startsWith('file:')) { + return readFileSync(resolveTilde(value.slice('file:'.length)), 'utf-8').trim(); + } + return value; +} + +function duckDbPathFromUrl(url: string): string { + if (url === ':memory:') { + throw new Error('DuckDB in-memory connections are not supported'); + } + if (url.startsWith('file:')) { + return fileURLToPath(url); + } + return url; +} + +function stringConfigValue( + connection: KtxDuckDbConnectionConfig | undefined, + key: 'path' | 'url', +): string | undefined { + const value = connection?.[key]; + return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(key, value.trim()) : undefined; +} + +export function isKtxDuckDbConnectionConfig( + connection: KtxDuckDbConnectionConfig | undefined, +): connection is KtxDuckDbConnectionConfig { + return String(connection?.driver ?? '').toLowerCase() === 'duckdb'; +} + +export function duckDbDatabasePathFromConfig(input: DuckDbDatabasePathInput): string { + const inputDriver = input.connection?.driver ?? 'unknown'; + if (!isKtxDuckDbConnectionConfig(input.connection)) { + throw new Error(`Native DuckDB connector cannot run driver "${inputDriver}"`); + } + const configuredPath = + stringConfigValue(input.connection, 'path') ?? duckDbPathFromUrl(stringConfigValue(input.connection, 'url') ?? ''); + if (!configuredPath) { + throw new Error(`connections.${input.connectionId}.path or url is required`); + } + if (configuredPath === ':memory:') { + throw new Error('DuckDB in-memory connections are not supported'); + } + return isAbsolute(configuredPath) ? configuredPath : resolve(input.projectDir ?? process.cwd(), configuredPath); +} + +export function assertDuckDbDatabaseFile(dbPath: string): void { + if (!existsSync(dbPath)) { + throw new Error(`File not found: ${dbPath}`); + } + const stats = statSync(dbPath); + if (stats.isDirectory()) { + throw new Error(`Expected a DuckDB database file, got directory: ${dbPath}`); + } + if (!stats.isFile()) { + throw new Error(`Expected a DuckDB database file, got non-file path: ${dbPath}`); + } +} + +export class KtxDuckDbScanConnector implements KtxScanConnector { + readonly id: string; + readonly driver = 'duckdb' as KtxConnectionDriver; + readonly capabilities = createKtxConnectorCapabilities({ + tableSampling: true, + columnSampling: true, + columnStats: false, + readOnlySql: true, + nestedAnalysis: false, + formalForeignKeys: true, + estimatedRowCounts: true, + }); + + private readonly connectionId: string; + private readonly dbPath: string; + private readonly nativeLoader: DuckDbNativeLoader; + + constructor(options: KtxDuckDbScanConnectorOptions) { + this.connectionId = options.connectionId; + this.dbPath = duckDbDatabasePathFromConfig(options); + this.nativeLoader = options.nativeLoader ?? { load: loadDuckDbNodeApi }; + this.id = `duckdb:${options.connectionId}`; + } + + async testConnection(): Promise<{ success: boolean; error?: string }> { + try { + assertDuckDbDatabaseFile(this.dbPath); + const { DuckDBInstance } = await this.nativeLoader.load(); + const instance = await DuckDBInstance.create(this.dbPath, { access_mode: 'READ_ONLY' }); + const connection = await instance.connect(); + try { + await connection.runAndReadAll('SELECT 1'); + return { success: true }; + } finally { + connection.disconnectSync(); + instance.closeSync(); + } + } catch (error) { + return { success: false, error: error instanceof Error ? error.message : String(error) }; + } + } + + async introspect(): Promise { + throw new Error('DuckDB schema introspection is implemented in Task 2.'); + } + + async cleanup(): Promise {} +} diff --git a/packages/connector-duckdb/src/index.ts b/packages/connector-duckdb/src/index.ts new file mode 100644 index 00000000..88bdddd2 --- /dev/null +++ b/packages/connector-duckdb/src/index.ts @@ -0,0 +1,17 @@ +export { + assertDuckDbDatabaseFile, + duckDbDatabasePathFromConfig, + isKtxDuckDbConnectionConfig, + KtxDuckDbScanConnector, + type DuckDbDatabasePathInput, + type KtxDuckDbConnectionConfig, + type KtxDuckDbScanConnectorOptions, +} from './connector.js'; +export { + assertSupportedDuckDbPlatform, + currentDuckDbPlatform, + detectDuckDbLibc, + formatDuckDbNativeLoadError, + type DuckDbLibc, + type DuckDbPlatformInfo, +} from './platform.js'; diff --git a/packages/connector-duckdb/src/native.ts b/packages/connector-duckdb/src/native.ts new file mode 100644 index 00000000..409c787e --- /dev/null +++ b/packages/connector-duckdb/src/native.ts @@ -0,0 +1,16 @@ +import { assertSupportedDuckDbPlatform, formatDuckDbNativeLoadError } from './platform.js'; + +export type DuckDbNodeApi = typeof import('@duckdb/node-api'); + +export interface DuckDbNativeLoader { + load(): Promise; +} + +export async function loadDuckDbNodeApi(): Promise { + assertSupportedDuckDbPlatform(); + try { + return await import('@duckdb/node-api'); + } catch (error) { + throw formatDuckDbNativeLoadError(error); + } +} diff --git a/packages/connector-duckdb/src/platform.test.ts b/packages/connector-duckdb/src/platform.test.ts new file mode 100644 index 00000000..d36ebc6c --- /dev/null +++ b/packages/connector-duckdb/src/platform.test.ts @@ -0,0 +1,25 @@ +import { describe, expect, it } from 'vitest'; +import { assertSupportedDuckDbPlatform, formatDuckDbNativeLoadError } from './platform.js'; + +describe('DuckDB native platform guard', () => { + it('rejects Linux musl before native loading', () => { + expect(() => + assertSupportedDuckDbPlatform({ platform: 'linux', arch: 'x64', libc: 'musl' }), + ).toThrow('DuckDB native bindings are not supported on linux x64 musl'); + }); + + it('accepts macOS arm64', () => { + expect(() => + assertSupportedDuckDbPlatform({ platform: 'darwin', arch: 'arm64', libc: 'unknown' }), + ).not.toThrow(); + }); + + it('formats missing optional binary errors with platform details', () => { + const error = formatDuckDbNativeLoadError( + new Error("Cannot find module '@duckdb/node-bindings-darwin-arm64'"), + { platform: 'darwin', arch: 'arm64', libc: 'unknown' }, + ); + expect(error.message).toContain('@duckdb/node-api native bindings could not be loaded'); + expect(error.message).toContain('darwin arm64'); + }); +}); diff --git a/packages/connector-duckdb/src/platform.ts b/packages/connector-duckdb/src/platform.ts new file mode 100644 index 00000000..091a9cb7 --- /dev/null +++ b/packages/connector-duckdb/src/platform.ts @@ -0,0 +1,62 @@ +import { existsSync, readFileSync } from 'node:fs'; +import { join } from 'node:path'; + +export type DuckDbLibc = 'glibc' | 'musl' | 'unknown'; + +export interface DuckDbPlatformInfo { + platform: NodeJS.Platform; + arch: NodeJS.Architecture; + libc: DuckDbLibc; +} + +export function detectDuckDbLibc(): DuckDbLibc { + const report = process.report?.getReport?.(); + const header = (report as { header?: { glibcVersionRuntime?: string } } | undefined)?.header; + if (header?.glibcVersionRuntime) return 'glibc'; + if (process.platform === 'linux') { + const muslLoaderHints = [ + '/lib/ld-musl-x86_64.so.1', + '/lib/ld-musl-aarch64.so.1', + join('/usr', 'bin', 'ldd'), + ]; + if ( + muslLoaderHints.some((path) => { + if (!existsSync(path)) return false; + if (path.includes('musl')) return true; + try { + return readFileSync(path, 'utf-8').includes('musl'); + } catch { + return false; + } + }) + ) { + return 'musl'; + } + } + return 'unknown'; +} + +export function currentDuckDbPlatform(): DuckDbPlatformInfo { + return { platform: process.platform, arch: process.arch, libc: detectDuckDbLibc() }; +} + +export function assertSupportedDuckDbPlatform(info: DuckDbPlatformInfo = currentDuckDbPlatform()): void { + const supported = + (info.platform === 'darwin' && (info.arch === 'arm64' || info.arch === 'x64')) || + (info.platform === 'win32' && (info.arch === 'arm64' || info.arch === 'x64')) || + (info.platform === 'linux' && (info.arch === 'arm64' || info.arch === 'x64') && info.libc !== 'musl'); + if (!supported) { + throw new Error( + `DuckDB native bindings are not supported on ${info.platform} ${info.arch} ${info.libc}. ` + + 'KTX DuckDB v1 supports macOS arm64/x64, Windows arm64/x64, and Linux glibc arm64/x64.', + ); + } +} + +export function formatDuckDbNativeLoadError(error: unknown, info = currentDuckDbPlatform()): Error { + const detail = error instanceof Error ? error.message : String(error); + return new Error( + `@duckdb/node-api native bindings could not be loaded for ${info.platform} ${info.arch} ${info.libc}. ` + + `Install optional dependencies for @duckdb/node-api or use a supported platform. ${detail}`, + ); +} diff --git a/packages/connector-duckdb/tsconfig.json b/packages/connector-duckdb/tsconfig.json new file mode 100644 index 00000000..965e6978 --- /dev/null +++ b/packages/connector-duckdb/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "./dist", + "rootDir": "./src" + }, + "include": ["src/**/*.ts"], + "exclude": ["dist", "node_modules"] +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 094c1deb..bdece26a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -218,6 +218,28 @@ importers: specifier: ^4.1.6 version: 4.1.6(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(@vitest/coverage-v8@4.1.6)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.28.0)(jiti@2.7.0)(yaml@2.9.0)) + packages/connector-duckdb: + dependencies: + '@duckdb/node-api': + specifier: ^1.5.2-r.1 + version: 1.5.2-r.1 + '@ktx/context': + specifier: workspace:* + version: file:packages/context(js-yaml@4.1.1) + devDependencies: + '@types/node': + specifier: ^24.3.0 + version: 24.12.2 + '@vitest/coverage-v8': + specifier: ^4.1.6 + version: 4.1.6(vitest@4.1.6) + typescript: + specifier: ^6.0.3 + version: 6.0.3 + vitest: + specifier: ^4.1.6 + version: 4.1.6(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(@vitest/coverage-v8@4.1.6)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.28.0)(jiti@2.7.0)(yaml@2.9.0)) + packages/connector-mysql: dependencies: '@ktx/context': @@ -1004,6 +1026,42 @@ packages: '@dabh/diagnostics@2.0.8': resolution: {integrity: sha512-R4MSXTVnuMzGD7bzHdW2ZhhdPC/igELENcq5IjEverBvq5hn1SXCWcsi6eSsdWP0/Ur+SItRRjAktmdoX/8R/Q==} + '@duckdb/node-api@1.5.2-r.1': + resolution: {integrity: sha512-OzBBnS0JGXMoS5mzKNY/Ylr7SshcRQiLFIoxQ4AlePwJ2fNeDL/fbHu/knjxUrXwW1fJBTUgwWftmxDdnZZb3A==} + + '@duckdb/node-bindings-darwin-arm64@1.5.2-r.1': + resolution: {integrity: sha512-v35FyKOb8EJCvaiPF7k0gvKiJTXR7PPQDNoWR0Gu+YSX5O9b+DIguzt1348Of3HebHy6ATSMzlUekaVA9YXu+g==} + cpu: [arm64] + os: [darwin] + + '@duckdb/node-bindings-darwin-x64@1.5.2-r.1': + resolution: {integrity: sha512-SU9dIJ1BluKkkGxi4UsP4keqkkstB2YDySF9KcYu3EZKIVM3FTv2zc7XO38dXnHOq6+F3WqhWWZvD+XU945p7A==} + cpu: [x64] + os: [darwin] + + '@duckdb/node-bindings-linux-arm64@1.5.2-r.1': + resolution: {integrity: sha512-3Tra9xM3aM3denaER4KhJ6//6PpmPbik9ECBQ+sh9PyKaEgHw/0kAcKnLm5EzWUnXF0qYmZlewvkCrse8KmOYw==} + cpu: [arm64] + os: [linux] + + '@duckdb/node-bindings-linux-x64@1.5.2-r.1': + resolution: {integrity: sha512-pcQvZRHiIfJ9cq8parkSQczQHEml/IeGfnDCMAbEgD6+jaV9Y9Y5Ph1kP9aR+bm6him1S5ZIEr3kZbihjKnWbA==} + cpu: [x64] + os: [linux] + + '@duckdb/node-bindings-win32-arm64@1.5.2-r.1': + resolution: {integrity: sha512-Ji8tym+N3LkrhVt0Up3bsacD/kpg4/JXFJQqxswiYvBaNCQOk+D+aiVS0GN5pcqvmnG7V7TpsDRzkLEFaWp1vw==} + cpu: [arm64] + os: [win32] + + '@duckdb/node-bindings-win32-x64@1.5.2-r.1': + resolution: {integrity: sha512-5XqcqC+4R8ghBEEbnc2a0sqfz1zyPBRb9YcmIWfiuDoCYSYFbKhmHcEyNftZDHcwCoLOHXnUin45jraex4STqQ==} + cpu: [x64] + os: [win32] + + '@duckdb/node-bindings@1.5.2-r.1': + resolution: {integrity: sha512-bUg3bLVj70YVku6fKyQJS8ASORl7kM7YFVFznsEB9pWbtazPj+ME2x2FUk0WiTzjJdutjzSSGXF066mB4bGGZA==} + '@electric-sql/pglite-socket@0.1.5': resolution: {integrity: sha512-/RAye+3EPKfO9nY4tljzxXmkT7yIpFDm0L3F+c28b+Z6uxPOjy/Zz/QEHYHXcrfuUC88/a9S72EO0+3E0j97wQ==} hasBin: true @@ -7293,6 +7351,37 @@ snapshots: enabled: 2.0.0 kuler: 2.0.0 + '@duckdb/node-api@1.5.2-r.1': + dependencies: + '@duckdb/node-bindings': 1.5.2-r.1 + + '@duckdb/node-bindings-darwin-arm64@1.5.2-r.1': + optional: true + + '@duckdb/node-bindings-darwin-x64@1.5.2-r.1': + optional: true + + '@duckdb/node-bindings-linux-arm64@1.5.2-r.1': + optional: true + + '@duckdb/node-bindings-linux-x64@1.5.2-r.1': + optional: true + + '@duckdb/node-bindings-win32-arm64@1.5.2-r.1': + optional: true + + '@duckdb/node-bindings-win32-x64@1.5.2-r.1': + optional: true + + '@duckdb/node-bindings@1.5.2-r.1': + optionalDependencies: + '@duckdb/node-bindings-darwin-arm64': 1.5.2-r.1 + '@duckdb/node-bindings-darwin-x64': 1.5.2-r.1 + '@duckdb/node-bindings-linux-arm64': 1.5.2-r.1 + '@duckdb/node-bindings-linux-x64': 1.5.2-r.1 + '@duckdb/node-bindings-win32-arm64': 1.5.2-r.1 + '@duckdb/node-bindings-win32-x64': 1.5.2-r.1 + '@electric-sql/pglite-socket@0.1.5(@electric-sql/pglite@0.4.5)': dependencies: '@electric-sql/pglite': 0.4.5