From 099c605dc5fa07f806ca1e707c8661277e33e7d0 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Mon, 18 May 2026 15:27:06 +0200 Subject: [PATCH] docs: document duckdb support --- README.md | 5 +-- .../docs/cli-reference/ktx-connection.mdx | 3 ++ .../content/docs/cli-reference/ktx-setup.mdx | 8 +++-- .../content/docs/cli-reference/ktx-sql.mdx | 3 ++ .../content/docs/community/contributing.mdx | 1 + .../docs/integrations/primary-sources.mdx | 36 +++++++++++++++++-- python/ktx-daemon/tests/test_sql_analysis.py | 24 +++++++++++++ python/ktx-sl/tests/test_generator.py | 3 ++ scripts/build-public-npm-package.mjs | 2 ++ scripts/examples-docs.test.mjs | 7 ++++ scripts/package-artifacts.mjs | 1 + 11 files changed, 86 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index e058b828..0796802b 100644 --- a/README.md +++ b/README.md @@ -28,8 +28,8 @@ Use KTX when you want agents to: - Explain metric provenance with warehouse evidence - Work alongside dbt, MetricFlow, LookML, Looker, Metabase, and Notion -Supports PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, and -SQLite. +Supports PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, +DuckDB, and SQLite. ## Quick Start @@ -143,6 +143,7 @@ artifacts. | `packages/llm` | LLM and embedding providers | | `packages/connector-bigquery` | BigQuery scan connector | | `packages/connector-clickhouse` | ClickHouse scan connector | +| `packages/connector-duckdb` | DuckDB scan connector | | `packages/connector-mysql` | MySQL scan connector | | `packages/connector-postgres` | Postgres scan connector | | `packages/connector-snowflake` | Snowflake scan connector | diff --git a/docs-site/content/docs/cli-reference/ktx-connection.mdx b/docs-site/content/docs/cli-reference/ktx-connection.mdx index 2d61451f..6cf26311 100644 --- a/docs-site/content/docs/cli-reference/ktx-connection.mdx +++ b/docs-site/content/docs/cli-reference/ktx-connection.mdx @@ -70,6 +70,9 @@ Native database connections report `Status: ok` when the connector probe passes. Source connectors report connector-specific details such as Metabase database count, Looker user, Notion bot, or Git repo URL. +DuckDB connection tests open the configured file read-only. Missing files are +reported as `File not found: ` and are not created. + ```text Connection test passed: my-warehouse Driver: postgres diff --git a/docs-site/content/docs/cli-reference/ktx-setup.mdx b/docs-site/content/docs/cli-reference/ktx-setup.mdx index 562b5f28..ab382192 100644 --- a/docs-site/content/docs/cli-reference/ktx-setup.mdx +++ b/docs-site/content/docs/cli-reference/ktx-setup.mdx @@ -100,10 +100,10 @@ runtime features are missing. | Flag | Description | |------|-------------| -| `--database ` | Database driver to configure; repeatable. Choices: `sqlite`, `postgres`, `mysql`, `clickhouse`, `sqlserver`, `bigquery`, `snowflake` | +| `--database ` | Database driver to configure; repeatable. Choices: `sqlite`, `duckdb`, `postgres`, `mysql`, `clickhouse`, `sqlserver`, `bigquery`, `snowflake` | | `--database-connection-id ` | Existing selected connection id; repeatable | | `--new-database-connection-id ` | Connection id for one new database connection | -| `--database-url ` | URL, `env:NAME`, or `file:/path` for one new URL-style database connection; also used as the SQLite path | +| `--database-url ` | URL, `env:NAME`, or `file:/path` for one new URL-style database connection; also used as the SQLite or DuckDB path | | `--database-schema ` | Database schema or dataset to include; repeatable | | `--skip-databases` | Leave database setup incomplete | @@ -111,6 +111,10 @@ KTX needs at least one database connection before it can build database context. Use `--skip-databases` only when intentionally leaving the project incomplete. +```bash +ktx setup --new --database duckdb --new-database-connection-id warehouse --database-url ./data/warehouse.duckdb +``` + ### Query History | Flag | Description | diff --git a/docs-site/content/docs/cli-reference/ktx-sql.mdx b/docs-site/content/docs/cli-reference/ktx-sql.mdx index ae4c9990..c959028a 100644 --- a/docs-site/content/docs/cli-reference/ktx-sql.mdx +++ b/docs-site/content/docs/cli-reference/ktx-sql.mdx @@ -35,6 +35,9 @@ Quote SQL in shell scripts and when the query contains spaces or punctuation. # Count rows in a table ktx sql --connection warehouse "select count(*) from public.orders" +# Count rows in a DuckDB table +ktx sql --connection warehouse "select count(*) as rows from orders" + # Return a small result set ktx sql \ --connection warehouse \ diff --git a/docs-site/content/docs/community/contributing.mdx b/docs-site/content/docs/community/contributing.mdx index 791d865c..6a693a9d 100644 --- a/docs-site/content/docs/community/contributing.mdx +++ b/docs-site/content/docs/community/contributing.mdx @@ -92,6 +92,7 @@ packages/ connector-snowflake/ # Snowflake connector connector-bigquery/ # BigQuery connector connector-clickhouse/ # ClickHouse connector + connector-duckdb/ # DuckDB connector connector-mysql/ # MySQL connector connector-sqlserver/ # SQL Server connector connector-sqlite/ # SQLite connector diff --git a/docs-site/content/docs/integrations/primary-sources.mdx b/docs-site/content/docs/integrations/primary-sources.mdx index 00cc39aa..0390c549 100644 --- a/docs-site/content/docs/integrations/primary-sources.mdx +++ b/docs-site/content/docs/integrations/primary-sources.mdx @@ -1,6 +1,6 @@ --- title: Primary Sources -description: Connect KTX to PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, or SQLite. +description: Connect KTX to PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, DuckDB, or SQLite. --- KTX connects to your data warehouse or database to build schema context, @@ -21,12 +21,12 @@ Agents should prefer environment or file references over literal secrets. | Field | Required | Applies to | Description | |-------|----------|------------|-------------| -| `driver` | Yes | all connections | Connector driver such as `postgres`, `snowflake`, `bigquery`, `clickhouse`, `mysql`, `sqlserver`, or `sqlite` | +| `driver` | Yes | all connections | Connector driver such as `postgres`, `snowflake`, `bigquery`, `clickhouse`, `mysql`, `sqlserver`, `duckdb`, or `sqlite` | | `url` | One of the connection methods | URL-style connectors | Database URL, `env:NAME`, or `file:/path/to/secret` | | `host`, `port`, `database`, `username`, `password` | One of the connection methods | PostgreSQL, MySQL, ClickHouse, SQL Server | Field-by-field connection values | | `schema` or `schemas` | No | schema-aware warehouses | Single schema or list of schemas to scan | | `context.queryHistory` | No | PostgreSQL, Snowflake, BigQuery | Enables query-history ingestion when the warehouse supports it | -| `path` | Yes for path-style SQLite | SQLite | Local SQLite database path or `env:NAME` reference | +| `path` | Yes for path-style SQLite and DuckDB | SQLite, DuckDB | Local database path or `env:NAME` reference | | `max_bytes_billed` | No | BigQuery | Maximum bytes billed per query job | | `job_timeout_ms` | No | BigQuery | BigQuery query job timeout in milliseconds | | `project_id` | No | BigQuery | Optional local descriptor and mapping metadata; not used for BigQuery authentication | @@ -502,6 +502,36 @@ No authentication required - SQLite is file-based. The file must be readable by - Foreign key enforcement requires explicit `PRAGMA foreign_keys = ON` - Database file must exist before `ktx connection test` or ingest runs +## DuckDB + +File-backed local connector using `@duckdb/node-api`. DuckDB support is +local-file only in v1. + +### Connection config + +```yaml title="ktx.yaml" +connections: + warehouse: + driver: duckdb + path: data/warehouse.duckdb +``` + +KTX opens the configured file read-only and fails if the file is missing, +points to a directory, or uses `:memory:`. + +### Features + +| Feature | Supported | Notes | +|---------|-----------|-------| +| Tables & views | Yes | Via `information_schema.tables` | +| Primary keys | Yes | Via `duckdb_constraints()` | +| Foreign keys | Yes | Via `duckdb_constraints()` | +| Row count estimates | Yes | Exact count via `SELECT COUNT(*)` | +| Column statistics | No | - | +| Query history | No | - | +| Table sampling | Yes | - | +| Nested analysis | No | - | + ## Common errors | Error or symptom | Likely cause | Recovery | diff --git a/python/ktx-daemon/tests/test_sql_analysis.py b/python/ktx-daemon/tests/test_sql_analysis.py index 855d16fd..37c85a60 100644 --- a/python/ktx-daemon/tests/test_sql_analysis.py +++ b/python/ktx-daemon/tests/test_sql_analysis.py @@ -129,3 +129,27 @@ def test_validate_read_only_sql_reports_parse_errors() -> None: assert response.ok is False assert response.error is not None assert "Invalid expression" in response.error + + +def test_validate_read_only_sql_accepts_duckdb_select() -> None: + response = validate_read_only_sql_response( + ValidateReadOnlySqlRequest( + dialect="duckdb", + sql="select * from read_csv_auto('orders.csv') limit 10", + ) + ) + + assert response.ok is True + assert response.error is None + + +def test_validate_read_only_sql_rejects_duckdb_mutation() -> None: + response = validate_read_only_sql_response( + ValidateReadOnlySqlRequest( + dialect="duckdb", + sql="create table copied as select 1", + ) + ) + + assert response.ok is False + assert response.error diff --git a/python/ktx-sl/tests/test_generator.py b/python/ktx-sl/tests/test_generator.py index 9ef147ea..4f748180 100644 --- a/python/ktx-sl/tests/test_generator.py +++ b/python/ktx-sl/tests/test_generator.py @@ -715,6 +715,8 @@ class TestGeneratorEdgeCases: assert_valid_sql(result.sql) def test_dialect_duckdb(self): + import sqlglot + engine = SemanticEngine(SOURCES_DIR, dialect="duckdb") result = engine.query( { @@ -724,6 +726,7 @@ class TestGeneratorEdgeCases: ) assert result.dialect == "duckdb" assert result.sql + sqlglot.parse_one(result.sql, read="duckdb") def test_dialect_mysql(self): engine = SemanticEngine(SOURCES_DIR, dialect="mysql") diff --git a/scripts/build-public-npm-package.mjs b/scripts/build-public-npm-package.mjs index 0e34ae6d..f173c306 100644 --- a/scripts/build-public-npm-package.mjs +++ b/scripts/build-public-npm-package.mjs @@ -25,6 +25,7 @@ export const PUBLIC_BUNDLED_WORKSPACE_PACKAGES = [ '@ktx/context', '@ktx/connector-bigquery', '@ktx/connector-clickhouse', + '@ktx/connector-duckdb', '@ktx/connector-mysql', '@ktx/connector-postgres', '@ktx/connector-snowflake', @@ -37,6 +38,7 @@ export const PUBLIC_BUNDLED_WORKSPACE_PACKAGE_ROOTS = { '@ktx/context': 'packages/context', '@ktx/connector-bigquery': 'packages/connector-bigquery', '@ktx/connector-clickhouse': 'packages/connector-clickhouse', + '@ktx/connector-duckdb': 'packages/connector-duckdb', '@ktx/connector-mysql': 'packages/connector-mysql', '@ktx/connector-postgres': 'packages/connector-postgres', '@ktx/connector-snowflake': 'packages/connector-snowflake', diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs index 2f6c9ef6..0e003d86 100644 --- a/scripts/examples-docs.test.mjs +++ b/scripts/examples-docs.test.mjs @@ -152,6 +152,7 @@ describe('standalone example docs', () => { assert.match(contributing, /llm\/\s+# LLM client abstraction/); assert.match(contributing, /connector-bigquery\/\s+# BigQuery connector/); assert.match(contributing, /connector-clickhouse\/\s+# ClickHouse connector/); + assert.match(contributing, /connector-duckdb\/\s+# DuckDB connector/); assert.match(contributing, /connector-mysql\/\s+# MySQL connector/); assert.match(contributing, /connector-postgres\/\s+# PostgreSQL connector/); assert.match(contributing, /connector-snowflake\/\s+# Snowflake connector/); @@ -161,6 +162,12 @@ describe('standalone example docs', () => { assert.match(contributing, /ktx-daemon\/\s+# Daemon/); }); + it('lists the DuckDB connector in the root package table', async () => { + const readme = await readText('README.md'); + + assert.match(readme, /\| `packages\/connector-duckdb` \| DuckDB scan connector \|/); + }); + it('documents agent-facing CLI commands', async () => { const servingAgents = await readText('docs-site/content/docs/guides/serving-agents.mdx'); diff --git a/scripts/package-artifacts.mjs b/scripts/package-artifacts.mjs index 8e1f174d..b857914d 100644 --- a/scripts/package-artifacts.mjs +++ b/scripts/package-artifacts.mjs @@ -29,6 +29,7 @@ export const INTERNAL_NPM_WORKSPACE_PACKAGES = [ { name: '@ktx/llm', packageRoot: 'packages/llm' }, { name: '@ktx/connector-bigquery', packageRoot: 'packages/connector-bigquery' }, { name: '@ktx/connector-clickhouse', packageRoot: 'packages/connector-clickhouse' }, + { name: '@ktx/connector-duckdb', packageRoot: 'packages/connector-duckdb' }, { name: '@ktx/connector-mysql', packageRoot: 'packages/connector-mysql' }, { name: '@ktx/connector-postgres', packageRoot: 'packages/connector-postgres' }, { name: '@ktx/connector-snowflake', packageRoot: 'packages/connector-snowflake' },