mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
feat(context): add warehouse verification tools (#46)
* feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references
This commit is contained in:
parent
bcb0d2f8f7
commit
c22248dabf
89 changed files with 7818 additions and 191 deletions
86
packages/cli/src/ingest-query-executor.test.ts
Normal file
86
packages/cli/src/ingest-query-executor.test.ts
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
import type { KtxLocalProject } from '@ktx/context/project';
|
||||
import { createKtxConnectorCapabilities, type KtxScanConnector } from '@ktx/context/scan';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
|
||||
|
||||
function project(): KtxLocalProject {
|
||||
return {
|
||||
projectDir: '/tmp/ktx-query-project',
|
||||
config: {
|
||||
project: 'warehouse',
|
||||
connections: {
|
||||
warehouse: { driver: 'postgres', url: 'postgresql://readonly@example.test/db' },
|
||||
},
|
||||
},
|
||||
} as unknown as KtxLocalProject;
|
||||
}
|
||||
|
||||
function connector(overrides: Partial<KtxScanConnector> = {}): KtxScanConnector {
|
||||
return {
|
||||
id: 'warehouse',
|
||||
driver: 'postgres',
|
||||
capabilities: createKtxConnectorCapabilities({ readOnlySql: true }),
|
||||
async introspect() {
|
||||
throw new Error('introspect is not used by this test');
|
||||
},
|
||||
executeReadOnly: vi.fn(async () => ({
|
||||
headers: ['answer'],
|
||||
rows: [[1]],
|
||||
totalRows: 1,
|
||||
rowCount: 1,
|
||||
})),
|
||||
cleanup: vi.fn(async () => {}),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('createKtxCliIngestQueryExecutor', () => {
|
||||
it('executes read-only SQL through the scan connector and cleans it up', async () => {
|
||||
const scanConnector = connector();
|
||||
const createConnector = vi.fn(async () => scanConnector);
|
||||
const executor = createKtxCliIngestQueryExecutor(project(), { createConnector });
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'postgres', url: 'postgresql://readonly@example.test/db' },
|
||||
projectDir: '/tmp/ktx-query-project',
|
||||
sql: 'select 1',
|
||||
maxRows: 5,
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
headers: ['answer'],
|
||||
rows: [[1]],
|
||||
totalRows: 1,
|
||||
command: 'SELECT',
|
||||
rowCount: 1,
|
||||
});
|
||||
|
||||
expect(createConnector).toHaveBeenCalledWith(project(), 'warehouse');
|
||||
expect(scanConnector.executeReadOnly).toHaveBeenCalledWith(
|
||||
{ connectionId: 'warehouse', sql: 'select 1', maxRows: 5 },
|
||||
{ runId: 'ingest-sql-execution' },
|
||||
);
|
||||
expect(scanConnector.cleanup).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('rejects connectors without read-only SQL support', async () => {
|
||||
const scanConnector = connector({
|
||||
capabilities: createKtxConnectorCapabilities({ readOnlySql: false }),
|
||||
executeReadOnly: undefined,
|
||||
});
|
||||
const executor = createKtxCliIngestQueryExecutor(project(), {
|
||||
createConnector: vi.fn(async () => scanConnector),
|
||||
});
|
||||
|
||||
await expect(
|
||||
executor.execute({
|
||||
connectionId: 'warehouse',
|
||||
connection: { driver: 'postgres' },
|
||||
projectDir: '/tmp/ktx-query-project',
|
||||
sql: 'select 1',
|
||||
}),
|
||||
).rejects.toThrow('Connection "warehouse" driver "postgres" does not support read-only SQL execution.');
|
||||
expect(scanConnector.cleanup).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
49
packages/cli/src/ingest-query-executor.ts
Normal file
49
packages/cli/src/ingest-query-executor.ts
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
import type { KtxSqlQueryExecutionInput, KtxSqlQueryExecutorPort } from '@ktx/context/connections';
|
||||
import type { KtxLocalProject } from '@ktx/context/project';
|
||||
import type { KtxScanConnector, KtxScanContext } from '@ktx/context/scan';
|
||||
import { createKtxCliScanConnector } from './local-scan-connectors.js';
|
||||
|
||||
type CreateConnector = typeof createKtxCliScanConnector;
|
||||
|
||||
export interface KtxCliIngestQueryExecutorDeps {
|
||||
createConnector?: CreateConnector;
|
||||
}
|
||||
|
||||
async function cleanupConnector(connector: KtxScanConnector | null): Promise<void> {
|
||||
await connector?.cleanup?.();
|
||||
}
|
||||
|
||||
export function createKtxCliIngestQueryExecutor(
|
||||
project: KtxLocalProject,
|
||||
deps: KtxCliIngestQueryExecutorDeps = {},
|
||||
): KtxSqlQueryExecutorPort {
|
||||
const createConnector = deps.createConnector ?? createKtxCliScanConnector;
|
||||
return {
|
||||
async execute(input: KtxSqlQueryExecutionInput) {
|
||||
let connector: KtxScanConnector | null = null;
|
||||
try {
|
||||
connector = await createConnector(project, input.connectionId);
|
||||
if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) {
|
||||
throw new Error(
|
||||
`Connection "${input.connectionId}" driver "${connector.driver}" does not support read-only SQL execution.`,
|
||||
);
|
||||
}
|
||||
|
||||
const ctx: KtxScanContext = { runId: 'ingest-sql-execution' };
|
||||
const result = await connector.executeReadOnly(
|
||||
{ connectionId: input.connectionId, sql: input.sql, maxRows: input.maxRows },
|
||||
ctx,
|
||||
);
|
||||
return {
|
||||
headers: result.headers,
|
||||
rows: result.rows,
|
||||
totalRows: result.totalRows,
|
||||
command: 'SELECT',
|
||||
rowCount: result.rowCount,
|
||||
};
|
||||
} finally {
|
||||
await cleanupConnector(connector);
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -261,6 +261,18 @@ export class CliLookerSlWritingAgentRunner extends AgentRunnerService {
|
|||
params.telemetryTags?.operationName === 'ingest-bundle-wu' &&
|
||||
params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders'
|
||||
) {
|
||||
const ledger = params.toolSet.record_verification_ledger;
|
||||
if (!ledger?.execute) {
|
||||
throw new Error('record_verification_ledger tool was not available to the Looker WorkUnit');
|
||||
}
|
||||
await ledger.execute(
|
||||
{
|
||||
summary: 'Test fixture verified Looker explore target identifiers before writing SL.',
|
||||
verifiedIdentifiers: ['prod-warehouse', 'public.orders'],
|
||||
unverifiedIdentifiers: [],
|
||||
},
|
||||
{ toolCallId: 'cli-looker-verification-ledger', messages: [] },
|
||||
);
|
||||
const slWrite = params.toolSet.sl_write_source;
|
||||
if (!slWrite?.execute) {
|
||||
throw new Error('sl_write_source tool was not available to the Looker WorkUnit');
|
||||
|
|
|
|||
|
|
@ -810,6 +810,44 @@ describe('runKtxIngest', () => {
|
|||
expect(runLocalIngest).toHaveBeenCalledWith(expect.objectContaining({ llmDebugRequestFile: debugFile }));
|
||||
});
|
||||
|
||||
it('supplies a scan-connector query executor to local ingest runs', async () => {
|
||||
const io = makeIo();
|
||||
const projectDir = join(tempDir, 'query-executor-project');
|
||||
await writeWarehouseConfig(projectDir);
|
||||
const queryExecutor = {
|
||||
execute: vi.fn(async () => ({
|
||||
headers: [],
|
||||
rows: [],
|
||||
totalRows: 0,
|
||||
command: 'SELECT',
|
||||
rowCount: 0,
|
||||
})),
|
||||
};
|
||||
const runLocalIngest = vi.fn(async (input: RunLocalIngestOptions): Promise<LocalIngestResult> =>
|
||||
completedLocalBundleRun(input, 'query-executor-run'),
|
||||
);
|
||||
|
||||
await expect(
|
||||
runKtxIngest(
|
||||
{
|
||||
command: 'run',
|
||||
projectDir,
|
||||
connectionId: 'warehouse',
|
||||
adapter: 'fake',
|
||||
outputMode: 'json',
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
runLocalIngest,
|
||||
createAdapters: () => [],
|
||||
createQueryExecutor: () => queryExecutor,
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(runLocalIngest).toHaveBeenCalledWith(expect.objectContaining({ queryExecutor }));
|
||||
});
|
||||
|
||||
it('passes daemon database introspection URL to default local ingest adapters', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await writeWarehouseConfig(projectDir);
|
||||
|
|
|
|||
|
|
@ -16,7 +16,9 @@ import {
|
|||
runLocalMetabaseIngest,
|
||||
savedMemoryCountsForReport,
|
||||
} from '@ktx/context/ingest';
|
||||
import { loadKtxProject } from '@ktx/context/project';
|
||||
import type { KtxSqlQueryExecutorPort } from '@ktx/context/connections';
|
||||
import { loadKtxProject, type KtxLocalProject } from '@ktx/context/project';
|
||||
import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js';
|
||||
import { readIngestReportSnapshotFile } from './ingest-report-file.js';
|
||||
import { createCliOperationalLogger } from './io/logger.js';
|
||||
import { createKtxCliLocalIngestAdapters } from './local-adapters.js';
|
||||
|
|
@ -69,6 +71,7 @@ interface KtxIngestDeps {
|
|||
jobIdFactory?: () => string;
|
||||
now?: () => Date;
|
||||
createAdapters?: typeof createKtxCliLocalIngestAdapters;
|
||||
createQueryExecutor?: (project: KtxLocalProject) => KtxSqlQueryExecutorPort;
|
||||
runLocalIngest?: typeof runLocalIngest;
|
||||
runLocalMetabaseIngest?: typeof runLocalMetabaseIngest;
|
||||
readReportFile?: typeof readIngestReportSnapshotFile;
|
||||
|
|
@ -532,6 +535,9 @@ export async function runKtxIngest(
|
|||
...(args.adapter === 'historic-sql' ? { historicSqlConnectionId: args.connectionId } : {}),
|
||||
logger: operationalLogger,
|
||||
};
|
||||
const queryExecutor =
|
||||
localIngestOptions.queryExecutor ??
|
||||
(deps.createQueryExecutor ?? createKtxCliIngestQueryExecutor)(project);
|
||||
if (args.adapter === 'metabase' && args.sourceDir) {
|
||||
throw new Error('source-dir uploads are not supported for the Metabase fan-out adapter');
|
||||
}
|
||||
|
|
@ -544,6 +550,7 @@ export async function runKtxIngest(
|
|||
adapters: createAdapters(project, adapterOptions),
|
||||
metabaseConnectionId: args.connectionId,
|
||||
...localIngestOptions,
|
||||
queryExecutor,
|
||||
trigger: 'manual_resync',
|
||||
jobIdFactory: deps.jobIdFactory,
|
||||
...(progress ? { progress } : {}),
|
||||
|
|
@ -604,6 +611,7 @@ export async function runKtxIngest(
|
|||
trigger: 'manual_resync',
|
||||
jobId,
|
||||
...localIngestOptions,
|
||||
queryExecutor,
|
||||
pullConfigOptions: adapterOptions,
|
||||
...(args.debugLlmRequestFile ? { llmDebugRequestFile: args.debugLlmRequestFile } : {}),
|
||||
...(memoryFlow ? { memoryFlow } : {}),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue