From 9ac37166f588725d1227eafe920ebd55a8bf402d Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Tue, 9 Jun 2026 14:28:05 +0200 Subject: [PATCH] fix(connections): enforce scan_enabled:false on explicit scan/ingest commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit scan_enabled:false promised the connection is 'never used as a scan/ingest target,' but the predicate only gated automatic selection — explicit ktx scan / ktx ingest still resolved the connection id and reached the live-database introspection path, so an execute-only connection could still be scanned or ingested. Guard runKtxScan and runKtxIngest at entry: if the target connection is execute-only, refuse with an actionable error (remove the flag to scan, or use ktx sql to query) before doing any work. This makes the flag a single declaration honored on every scan/ingest entry point, not just auto-selection. --- .../content/docs/configuration/ktx-yaml.mdx | 10 +++--- packages/cli/src/ingest.ts | 8 +++++ packages/cli/src/scan.ts | 8 +++++ packages/cli/test/ingest.test.ts | 34 +++++++++++++++++++ packages/cli/test/scan.test.ts | 29 ++++++++++++++++ 5 files changed, 85 insertions(+), 4 deletions(-) diff --git a/docs-site/content/docs/configuration/ktx-yaml.mdx b/docs-site/content/docs/configuration/ktx-yaml.mdx index a98773dd..182a4bea 100644 --- a/docs-site/content/docs/configuration/ktx-yaml.mdx +++ b/docs-site/content/docs/configuration/ktx-yaml.mdx @@ -162,10 +162,12 @@ connections: Set `scan_enabled: false` to register a warehouse for SQL execution only. The connection is usable by `ktx sql` and the agent `sql_execution` tool, but **ktx** -never introspects, scans, or ingests it — and `ktx setup` validates the -credential without discovering or scanning its schemas. This is the supported way -to run read-only queries against shared or public data (for example a BigQuery -billing project full of unrelated datasets) without making it a context source. +never introspects, scans, or ingests it: automatic ingest skips it, `ktx setup` +validates the credential without discovering or scanning its schemas, and even an +explicit `ktx scan ` or `ktx ingest ` is refused with guidance. This is +the supported way to run read-only queries against shared or public data (for +example a BigQuery billing project full of unrelated datasets) without making it +a context source. ```yaml connections: diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index 233b1b6e..46b04692 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -8,6 +8,7 @@ import type { MemoryFlowEvent, MemoryFlowReplayInput } from './context/ingest/me import { renderMemoryFlowReplay } from './context/ingest/memory-flow/render.js'; import type { KtxSqlQueryExecutorPort } from './context/connections/query-executor.js'; import { loadKtxProject, type KtxLocalProject } from './context/project/project.js'; +import { isExecuteOnlyConnection } from './context/connections/local-warehouse-descriptor.js'; import { getKtxCliPackageInfo } from './cli-runtime.js'; import { resolveProjectEmbeddingProvider } from './embedding-resolution.js'; import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js'; @@ -695,6 +696,13 @@ export async function runKtxIngest( const project = await loadKtxProject({ projectDir: args.projectDir }); const env = deps.env ?? process.env; if (args.command === 'run') { + if (isExecuteOnlyConnection(project.config.connections[args.connectionId])) { + io.stderr.write( + `Connection '${args.connectionId}' is registered for SQL execution only (scan_enabled: false) and ` + + 'cannot be ingested. Remove scan_enabled: false to make it a scan/ingest target, or use `ktx sql` to query it.\n', + ); + return 1; + } const resolveEmbeddingProvider = deps.resolveEmbeddingProvider ?? resolveProjectEmbeddingProvider; const resolution = await resolveEmbeddingProvider(project, { mode: 'ensure', diff --git a/packages/cli/src/scan.ts b/packages/cli/src/scan.ts index 5961e3f1..8169cf08 100644 --- a/packages/cli/src/scan.ts +++ b/packages/cli/src/scan.ts @@ -1,6 +1,7 @@ import type { KtxProgressPort, KtxScanMode, KtxScanReport, KtxScanWarning } from './context/scan/types.js'; import { runLocalScan } from './context/scan/local-scan.js'; import { loadKtxProject, type KtxLocalProject } from './context/project/project.js'; +import { isExecuteOnlyConnection } from './context/connections/local-warehouse-descriptor.js'; import { getKtxCliPackageInfo } from './cli-runtime.js'; import { resolveProjectEmbeddingProvider } from './embedding-resolution.js'; import type { KtxCliIo } from './index.js'; @@ -326,6 +327,13 @@ export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps let project: KtxLocalProject | undefined; try { project = await loadKtxProject({ projectDir: args.projectDir }); + if (isExecuteOnlyConnection(project.config.connections[args.connectionId])) { + io.stderr.write( + `Connection '${args.connectionId}' is registered for SQL execution only (scan_enabled: false) and ` + + 'cannot be scanned. Remove scan_enabled: false to make it a scan target, or use `ktx sql` to query it.\n', + ); + return 1; + } const resolveEmbeddingProvider = deps.resolveEmbeddingProvider ?? resolveProjectEmbeddingProvider; const resolution = await resolveEmbeddingProvider(project, { mode: 'ensure', diff --git a/packages/cli/test/ingest.test.ts b/packages/cli/test/ingest.test.ts index c1abfe8b..e2e965d3 100644 --- a/packages/cli/test/ingest.test.ts +++ b/packages/cli/test/ingest.test.ts @@ -54,6 +54,40 @@ describe('runKtxIngest', () => { await rm(tempDir, { recursive: true, force: true }); }); + it('refuses to ingest a connection marked execute-only (scan_enabled: false)', async () => { + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir }); + await writeFile( + join(projectDir, 'ktx.yaml'), + [ + 'connections:', + ' public_bq:', + ' driver: bigquery', + ' scan_enabled: false', + 'ingest:', + ' adapters:', + ' - fake', + ' embeddings:', + ' backend: none', + '', + ].join('\n'), + 'utf-8', + ); + const runLocal = vi.fn(); + const io = makeIo(); + + await expect( + runKtxIngest( + { command: 'run', projectDir, connectionId: 'public_bq', adapter: 'fake', outputMode: 'plain' }, + io.io, + { runLocalIngest: runLocal }, + ), + ).resolves.toBe(1); + + expect(runLocal).not.toHaveBeenCalled(); + expect(io.stderr()).toContain('scan_enabled: false'); + }); + it('runs local ingest and reads status', async () => { const projectDir = join(tempDir, 'project'); await writeWarehouseConfig(projectDir); diff --git a/packages/cli/test/scan.test.ts b/packages/cli/test/scan.test.ts index 51c55498..2396dc7b 100644 --- a/packages/cli/test/scan.test.ts +++ b/packages/cli/test/scan.test.ts @@ -332,6 +332,35 @@ describe('runKtxScan', () => { await rm(tempDir, { recursive: true, force: true }); }); + it('refuses to scan a connection marked execute-only (scan_enabled: false)', async () => { + await initKtxProject({ projectDir: tempDir }); + await writeFile( + join(tempDir, 'ktx.yaml'), + ['connections:', ' public_bq:', ' driver: bigquery', ' scan_enabled: false', ''].join('\n'), + 'utf-8', + ); + const runLocalScan = vi.fn(); + const io = makeIo(); + + await expect( + runKtxScan( + { + command: 'run', + projectDir: tempDir, + connectionId: 'public_bq', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + io.io, + { runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters }, + ), + ).resolves.toBe(1); + + expect(runLocalScan).not.toHaveBeenCalled(); + expect(io.stderr()).toContain('scan_enabled: false'); + }); + it('runs structural scans and prints a dev-friendly plain summary', async () => { await initKtxProject({ projectDir: tempDir }); const runLocalScan = vi.fn(