mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
fix(connections): enforce scan_enabled:false on explicit scan/ingest commands
scan_enabled:false promised the connection is 'never used as a scan/ingest target,' but the predicate only gated automatic selection — explicit ktx scan <id> / ktx ingest <id> still resolved the connection id and reached the live-database introspection path, so an execute-only connection could still be scanned or ingested. Guard runKtxScan and runKtxIngest at entry: if the target connection is execute-only, refuse with an actionable error (remove the flag to scan, or use ktx sql to query) before doing any work. This makes the flag a single declaration honored on every scan/ingest entry point, not just auto-selection.
This commit is contained in:
parent
f446d207ba
commit
9ac37166f5
5 changed files with 85 additions and 4 deletions
|
|
@ -162,10 +162,12 @@ connections:
|
|||
|
||||
Set `scan_enabled: false` to register a warehouse for SQL execution only. The
|
||||
connection is usable by `ktx sql` and the agent `sql_execution` tool, but **ktx**
|
||||
never introspects, scans, or ingests it — and `ktx setup` validates the
|
||||
credential without discovering or scanning its schemas. This is the supported way
|
||||
to run read-only queries against shared or public data (for example a BigQuery
|
||||
billing project full of unrelated datasets) without making it a context source.
|
||||
never introspects, scans, or ingests it: automatic ingest skips it, `ktx setup`
|
||||
validates the credential without discovering or scanning its schemas, and even an
|
||||
explicit `ktx scan <id>` or `ktx ingest <id>` is refused with guidance. This is
|
||||
the supported way to run read-only queries against shared or public data (for
|
||||
example a BigQuery billing project full of unrelated datasets) without making it
|
||||
a context source.
|
||||
|
||||
```yaml
|
||||
connections:
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import type { MemoryFlowEvent, MemoryFlowReplayInput } from './context/ingest/me
|
|||
import { renderMemoryFlowReplay } from './context/ingest/memory-flow/render.js';
|
||||
import type { KtxSqlQueryExecutorPort } from './context/connections/query-executor.js';
|
||||
import { loadKtxProject, type KtxLocalProject } from './context/project/project.js';
|
||||
import { isExecuteOnlyConnection } from './context/connections/local-warehouse-descriptor.js';
|
||||
import { getKtxCliPackageInfo } from './cli-runtime.js';
|
||||
import { resolveProjectEmbeddingProvider } from './embedding-resolution.js';
|
||||
import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
|
||||
|
|
@ -695,6 +696,13 @@ export async function runKtxIngest(
|
|||
const project = await loadKtxProject({ projectDir: args.projectDir });
|
||||
const env = deps.env ?? process.env;
|
||||
if (args.command === 'run') {
|
||||
if (isExecuteOnlyConnection(project.config.connections[args.connectionId])) {
|
||||
io.stderr.write(
|
||||
`Connection '${args.connectionId}' is registered for SQL execution only (scan_enabled: false) and ` +
|
||||
'cannot be ingested. Remove scan_enabled: false to make it a scan/ingest target, or use `ktx sql` to query it.\n',
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
const resolveEmbeddingProvider = deps.resolveEmbeddingProvider ?? resolveProjectEmbeddingProvider;
|
||||
const resolution = await resolveEmbeddingProvider(project, {
|
||||
mode: 'ensure',
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import type { KtxProgressPort, KtxScanMode, KtxScanReport, KtxScanWarning } from './context/scan/types.js';
|
||||
import { runLocalScan } from './context/scan/local-scan.js';
|
||||
import { loadKtxProject, type KtxLocalProject } from './context/project/project.js';
|
||||
import { isExecuteOnlyConnection } from './context/connections/local-warehouse-descriptor.js';
|
||||
import { getKtxCliPackageInfo } from './cli-runtime.js';
|
||||
import { resolveProjectEmbeddingProvider } from './embedding-resolution.js';
|
||||
import type { KtxCliIo } from './index.js';
|
||||
|
|
@ -326,6 +327,13 @@ export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps
|
|||
let project: KtxLocalProject | undefined;
|
||||
try {
|
||||
project = await loadKtxProject({ projectDir: args.projectDir });
|
||||
if (isExecuteOnlyConnection(project.config.connections[args.connectionId])) {
|
||||
io.stderr.write(
|
||||
`Connection '${args.connectionId}' is registered for SQL execution only (scan_enabled: false) and ` +
|
||||
'cannot be scanned. Remove scan_enabled: false to make it a scan target, or use `ktx sql` to query it.\n',
|
||||
);
|
||||
return 1;
|
||||
}
|
||||
const resolveEmbeddingProvider = deps.resolveEmbeddingProvider ?? resolveProjectEmbeddingProvider;
|
||||
const resolution = await resolveEmbeddingProvider(project, {
|
||||
mode: 'ensure',
|
||||
|
|
|
|||
|
|
@ -54,6 +54,40 @@ describe('runKtxIngest', () => {
|
|||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('refuses to ingest a connection marked execute-only (scan_enabled: false)', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await initKtxProject({ projectDir });
|
||||
await writeFile(
|
||||
join(projectDir, 'ktx.yaml'),
|
||||
[
|
||||
'connections:',
|
||||
' public_bq:',
|
||||
' driver: bigquery',
|
||||
' scan_enabled: false',
|
||||
'ingest:',
|
||||
' adapters:',
|
||||
' - fake',
|
||||
' embeddings:',
|
||||
' backend: none',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
const runLocal = vi.fn();
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxIngest(
|
||||
{ command: 'run', projectDir, connectionId: 'public_bq', adapter: 'fake', outputMode: 'plain' },
|
||||
io.io,
|
||||
{ runLocalIngest: runLocal },
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(runLocal).not.toHaveBeenCalled();
|
||||
expect(io.stderr()).toContain('scan_enabled: false');
|
||||
});
|
||||
|
||||
it('runs local ingest and reads status', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await writeWarehouseConfig(projectDir);
|
||||
|
|
|
|||
|
|
@ -332,6 +332,35 @@ describe('runKtxScan', () => {
|
|||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('refuses to scan a connection marked execute-only (scan_enabled: false)', async () => {
|
||||
await initKtxProject({ projectDir: tempDir });
|
||||
await writeFile(
|
||||
join(tempDir, 'ktx.yaml'),
|
||||
['connections:', ' public_bq:', ' driver: bigquery', ' scan_enabled: false', ''].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
const runLocalScan = vi.fn();
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxScan(
|
||||
{
|
||||
command: 'run',
|
||||
projectDir: tempDir,
|
||||
connectionId: 'public_bq',
|
||||
mode: 'structural',
|
||||
detectRelationships: false,
|
||||
dryRun: false,
|
||||
},
|
||||
io.io,
|
||||
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(runLocalScan).not.toHaveBeenCalled();
|
||||
expect(io.stderr()).toContain('scan_enabled: false');
|
||||
});
|
||||
|
||||
it('runs structural scans and prints a dev-friendly plain summary', async () => {
|
||||
await initKtxProject({ projectDir: tempDir });
|
||||
const runLocalScan = vi.fn(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue