fix(connections): enforce scan_enabled:false on explicit scan/ingest commands

scan_enabled:false promised the connection is 'never used as a scan/ingest
target,' but the predicate only gated automatic selection — explicit
ktx scan <id> / ktx ingest <id> still resolved the connection id and reached the
live-database introspection path, so an execute-only connection could still be
scanned or ingested.

Guard runKtxScan and runKtxIngest at entry: if the target connection is
execute-only, refuse with an actionable error (remove the flag to scan, or use
ktx sql to query) before doing any work. This makes the flag a single declaration
honored on every scan/ingest entry point, not just auto-selection.
This commit is contained in:
Andrey Avtomonov 2026-06-09 14:28:05 +02:00
parent f446d207ba
commit 9ac37166f5
5 changed files with 85 additions and 4 deletions

View file

@ -162,10 +162,12 @@ connections:
Set `scan_enabled: false` to register a warehouse for SQL execution only. The
connection is usable by `ktx sql` and the agent `sql_execution` tool, but **ktx**
never introspects, scans, or ingests it — and `ktx setup` validates the
credential without discovering or scanning its schemas. This is the supported way
to run read-only queries against shared or public data (for example a BigQuery
billing project full of unrelated datasets) without making it a context source.
never introspects, scans, or ingests it: automatic ingest skips it, `ktx setup`
validates the credential without discovering or scanning its schemas, and even an
explicit `ktx scan <id>` or `ktx ingest <id>` is refused with guidance. This is
the supported way to run read-only queries against shared or public data (for
example a BigQuery billing project full of unrelated datasets) without making it
a context source.
```yaml
connections:

View file

@ -8,6 +8,7 @@ import type { MemoryFlowEvent, MemoryFlowReplayInput } from './context/ingest/me
import { renderMemoryFlowReplay } from './context/ingest/memory-flow/render.js';
import type { KtxSqlQueryExecutorPort } from './context/connections/query-executor.js';
import { loadKtxProject, type KtxLocalProject } from './context/project/project.js';
import { isExecuteOnlyConnection } from './context/connections/local-warehouse-descriptor.js';
import { getKtxCliPackageInfo } from './cli-runtime.js';
import { resolveProjectEmbeddingProvider } from './embedding-resolution.js';
import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
@ -695,6 +696,13 @@ export async function runKtxIngest(
const project = await loadKtxProject({ projectDir: args.projectDir });
const env = deps.env ?? process.env;
if (args.command === 'run') {
if (isExecuteOnlyConnection(project.config.connections[args.connectionId])) {
io.stderr.write(
`Connection '${args.connectionId}' is registered for SQL execution only (scan_enabled: false) and ` +
'cannot be ingested. Remove scan_enabled: false to make it a scan/ingest target, or use `ktx sql` to query it.\n',
);
return 1;
}
const resolveEmbeddingProvider = deps.resolveEmbeddingProvider ?? resolveProjectEmbeddingProvider;
const resolution = await resolveEmbeddingProvider(project, {
mode: 'ensure',

View file

@ -1,6 +1,7 @@
import type { KtxProgressPort, KtxScanMode, KtxScanReport, KtxScanWarning } from './context/scan/types.js';
import { runLocalScan } from './context/scan/local-scan.js';
import { loadKtxProject, type KtxLocalProject } from './context/project/project.js';
import { isExecuteOnlyConnection } from './context/connections/local-warehouse-descriptor.js';
import { getKtxCliPackageInfo } from './cli-runtime.js';
import { resolveProjectEmbeddingProvider } from './embedding-resolution.js';
import type { KtxCliIo } from './index.js';
@ -326,6 +327,13 @@ export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps
let project: KtxLocalProject | undefined;
try {
project = await loadKtxProject({ projectDir: args.projectDir });
if (isExecuteOnlyConnection(project.config.connections[args.connectionId])) {
io.stderr.write(
`Connection '${args.connectionId}' is registered for SQL execution only (scan_enabled: false) and ` +
'cannot be scanned. Remove scan_enabled: false to make it a scan target, or use `ktx sql` to query it.\n',
);
return 1;
}
const resolveEmbeddingProvider = deps.resolveEmbeddingProvider ?? resolveProjectEmbeddingProvider;
const resolution = await resolveEmbeddingProvider(project, {
mode: 'ensure',

View file

@ -54,6 +54,40 @@ describe('runKtxIngest', () => {
await rm(tempDir, { recursive: true, force: true });
});
it('refuses to ingest a connection marked execute-only (scan_enabled: false)', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeFile(
join(projectDir, 'ktx.yaml'),
[
'connections:',
' public_bq:',
' driver: bigquery',
' scan_enabled: false',
'ingest:',
' adapters:',
' - fake',
' embeddings:',
' backend: none',
'',
].join('\n'),
'utf-8',
);
const runLocal = vi.fn();
const io = makeIo();
await expect(
runKtxIngest(
{ command: 'run', projectDir, connectionId: 'public_bq', adapter: 'fake', outputMode: 'plain' },
io.io,
{ runLocalIngest: runLocal },
),
).resolves.toBe(1);
expect(runLocal).not.toHaveBeenCalled();
expect(io.stderr()).toContain('scan_enabled: false');
});
it('runs local ingest and reads status', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);

View file

@ -332,6 +332,35 @@ describe('runKtxScan', () => {
await rm(tempDir, { recursive: true, force: true });
});
it('refuses to scan a connection marked execute-only (scan_enabled: false)', async () => {
await initKtxProject({ projectDir: tempDir });
await writeFile(
join(tempDir, 'ktx.yaml'),
['connections:', ' public_bq:', ' driver: bigquery', ' scan_enabled: false', ''].join('\n'),
'utf-8',
);
const runLocalScan = vi.fn();
const io = makeIo();
await expect(
runKtxScan(
{
command: 'run',
projectDir: tempDir,
connectionId: 'public_bq',
mode: 'structural',
detectRelationships: false,
dryRun: false,
},
io.io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
),
).resolves.toBe(1);
expect(runLocalScan).not.toHaveBeenCalled();
expect(io.stderr()).toContain('scan_enabled: false');
});
it('runs structural scans and prints a dev-friendly plain summary', async () => {
await initKtxProject({ projectDir: tempDir });
const runLocalScan = vi.fn(