diff --git a/docs-site/content/docs/configuration/ktx-yaml.mdx b/docs-site/content/docs/configuration/ktx-yaml.mdx index a98773dd..182a4bea 100644 --- a/docs-site/content/docs/configuration/ktx-yaml.mdx +++ b/docs-site/content/docs/configuration/ktx-yaml.mdx @@ -162,10 +162,12 @@ connections: Set `scan_enabled: false` to register a warehouse for SQL execution only. The connection is usable by `ktx sql` and the agent `sql_execution` tool, but **ktx** -never introspects, scans, or ingests it — and `ktx setup` validates the -credential without discovering or scanning its schemas. This is the supported way -to run read-only queries against shared or public data (for example a BigQuery -billing project full of unrelated datasets) without making it a context source. +never introspects, scans, or ingests it: automatic ingest skips it, `ktx setup` +validates the credential without discovering or scanning its schemas, and even an +explicit `ktx scan ` or `ktx ingest ` is refused with guidance. This is +the supported way to run read-only queries against shared or public data (for +example a BigQuery billing project full of unrelated datasets) without making it +a context source. ```yaml connections: diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index 233b1b6e..46b04692 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -8,6 +8,7 @@ import type { MemoryFlowEvent, MemoryFlowReplayInput } from './context/ingest/me import { renderMemoryFlowReplay } from './context/ingest/memory-flow/render.js'; import type { KtxSqlQueryExecutorPort } from './context/connections/query-executor.js'; import { loadKtxProject, type KtxLocalProject } from './context/project/project.js'; +import { isExecuteOnlyConnection } from './context/connections/local-warehouse-descriptor.js'; import { getKtxCliPackageInfo } from './cli-runtime.js'; import { resolveProjectEmbeddingProvider } from './embedding-resolution.js'; import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js'; @@ -695,6 +696,13 @@ export async function runKtxIngest( const project = await loadKtxProject({ projectDir: args.projectDir }); const env = deps.env ?? process.env; if (args.command === 'run') { + if (isExecuteOnlyConnection(project.config.connections[args.connectionId])) { + io.stderr.write( + `Connection '${args.connectionId}' is registered for SQL execution only (scan_enabled: false) and ` + + 'cannot be ingested. Remove scan_enabled: false to make it a scan/ingest target, or use `ktx sql` to query it.\n', + ); + return 1; + } const resolveEmbeddingProvider = deps.resolveEmbeddingProvider ?? resolveProjectEmbeddingProvider; const resolution = await resolveEmbeddingProvider(project, { mode: 'ensure', diff --git a/packages/cli/src/scan.ts b/packages/cli/src/scan.ts index 5961e3f1..8169cf08 100644 --- a/packages/cli/src/scan.ts +++ b/packages/cli/src/scan.ts @@ -1,6 +1,7 @@ import type { KtxProgressPort, KtxScanMode, KtxScanReport, KtxScanWarning } from './context/scan/types.js'; import { runLocalScan } from './context/scan/local-scan.js'; import { loadKtxProject, type KtxLocalProject } from './context/project/project.js'; +import { isExecuteOnlyConnection } from './context/connections/local-warehouse-descriptor.js'; import { getKtxCliPackageInfo } from './cli-runtime.js'; import { resolveProjectEmbeddingProvider } from './embedding-resolution.js'; import type { KtxCliIo } from './index.js'; @@ -326,6 +327,13 @@ export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps let project: KtxLocalProject | undefined; try { project = await loadKtxProject({ projectDir: args.projectDir }); + if (isExecuteOnlyConnection(project.config.connections[args.connectionId])) { + io.stderr.write( + `Connection '${args.connectionId}' is registered for SQL execution only (scan_enabled: false) and ` + + 'cannot be scanned. Remove scan_enabled: false to make it a scan target, or use `ktx sql` to query it.\n', + ); + return 1; + } const resolveEmbeddingProvider = deps.resolveEmbeddingProvider ?? resolveProjectEmbeddingProvider; const resolution = await resolveEmbeddingProvider(project, { mode: 'ensure', diff --git a/packages/cli/test/ingest.test.ts b/packages/cli/test/ingest.test.ts index c1abfe8b..e2e965d3 100644 --- a/packages/cli/test/ingest.test.ts +++ b/packages/cli/test/ingest.test.ts @@ -54,6 +54,40 @@ describe('runKtxIngest', () => { await rm(tempDir, { recursive: true, force: true }); }); + it('refuses to ingest a connection marked execute-only (scan_enabled: false)', async () => { + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir }); + await writeFile( + join(projectDir, 'ktx.yaml'), + [ + 'connections:', + ' public_bq:', + ' driver: bigquery', + ' scan_enabled: false', + 'ingest:', + ' adapters:', + ' - fake', + ' embeddings:', + ' backend: none', + '', + ].join('\n'), + 'utf-8', + ); + const runLocal = vi.fn(); + const io = makeIo(); + + await expect( + runKtxIngest( + { command: 'run', projectDir, connectionId: 'public_bq', adapter: 'fake', outputMode: 'plain' }, + io.io, + { runLocalIngest: runLocal }, + ), + ).resolves.toBe(1); + + expect(runLocal).not.toHaveBeenCalled(); + expect(io.stderr()).toContain('scan_enabled: false'); + }); + it('runs local ingest and reads status', async () => { const projectDir = join(tempDir, 'project'); await writeWarehouseConfig(projectDir); diff --git a/packages/cli/test/scan.test.ts b/packages/cli/test/scan.test.ts index 51c55498..2396dc7b 100644 --- a/packages/cli/test/scan.test.ts +++ b/packages/cli/test/scan.test.ts @@ -332,6 +332,35 @@ describe('runKtxScan', () => { await rm(tempDir, { recursive: true, force: true }); }); + it('refuses to scan a connection marked execute-only (scan_enabled: false)', async () => { + await initKtxProject({ projectDir: tempDir }); + await writeFile( + join(tempDir, 'ktx.yaml'), + ['connections:', ' public_bq:', ' driver: bigquery', ' scan_enabled: false', ''].join('\n'), + 'utf-8', + ); + const runLocalScan = vi.fn(); + const io = makeIo(); + + await expect( + runKtxScan( + { + command: 'run', + projectDir: tempDir, + connectionId: 'public_bq', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + io.io, + { runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters }, + ), + ).resolves.toBe(1); + + expect(runLocalScan).not.toHaveBeenCalled(); + expect(io.stderr()).toContain('scan_enabled: false'); + }); + it('runs structural scans and prints a dev-friendly plain summary', async () => { await initKtxProject({ projectDir: tempDir }); const runLocalScan = vi.fn(