mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-22 08:38:08 +02:00
feat(query-history): scope mining to modeled schemas by default (#258)
* feat(query-history): structure SQL analysis table refs * feat(query-history): qualify SQL analysis table refs * feat(query-history): wire modeled scope floor through ingest * chore(query-history): verify scope floor * test(query-history): align daemon SQL batch endpoint contract * feat(query-history): build scope from same-run scan catalog * feat(query-history): fail open on scope-floor catalog failures * chore(query-history): verify scope-floor v1 closure * refactor(query-history): share scope membership * feat(setup): apply derived query history filters * docs: document derived query history filters * fix(query-history): redact filter picker LLM prompt SQL * fix(setup): run filter picker SQL analysis through managed daemon * chore(query-history): verify filter picker v1 closure * fix(query-history): fail open on partial service-account attribution * fix(query-history): aggregate BigQuery users by execution count * fix(query-history): aggregate Snowflake users by execution count * fix(query-history): use BigQuery query info hash
This commit is contained in:
parent
ce1516b357
commit
e70ae1e63b
42 changed files with 3090 additions and 274 deletions
|
|
@ -91,7 +91,10 @@ describe('BigQueryHistoricSqlQueryHistoryReader', () => {
|
|||
40,
|
||||
0.05,
|
||||
null,
|
||||
JSON.stringify([{ user: 'analyst@example.test', executions: 1 }]),
|
||||
JSON.stringify([
|
||||
{ user: 'svc-loader@example.test', executions: 40 },
|
||||
{ user: 'analyst@example.test', executions: 2 },
|
||||
]),
|
||||
],
|
||||
],
|
||||
totalRows: 1,
|
||||
|
|
@ -103,15 +106,25 @@ describe('BigQueryHistoricSqlQueryHistoryReader', () => {
|
|||
for await (const row of reader.fetchAggregated(
|
||||
client,
|
||||
{ start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') },
|
||||
{ dialect: 'bigquery', minExecutions: 5, windowDays: 90, enabledTables: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
|
||||
{ dialect: 'bigquery', minExecutions: 5, windowDays: 90, enabledTables: [], enabledSchemas: [], modeledTableCatalog: [], scopeFloorWarnings: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
|
||||
)) {
|
||||
rows.push(row);
|
||||
}
|
||||
|
||||
const sql = firstQuery(client);
|
||||
expect(sql).toContain('WITH filtered_jobs AS');
|
||||
expect(sql).toContain('query_info.query_hashes.normalized_literals');
|
||||
expect(sql).toContain('TO_HEX(SHA256(query))');
|
||||
expect(sql).toContain('AS template_id');
|
||||
expect(sql).toContain('template_stats AS');
|
||||
expect(sql).toContain('template_users AS');
|
||||
expect(sql).toContain('COUNT(*) AS executions');
|
||||
expect(sql).toContain('COUNT(DISTINCT user_email) AS distinct_users');
|
||||
expect(sql).toContain('GROUP BY query_hash');
|
||||
expect(sql).toContain('GROUP BY template_id');
|
||||
expect(sql).toContain('GROUP BY template_id, user_email');
|
||||
expect(sql).toContain('ORDER BY users.executions DESC');
|
||||
expect(sql).not.toMatch(/\bquery_hash\b/);
|
||||
expect(sql).not.toContain('LIMIT 5');
|
||||
expect(sql).toContain('HAVING COUNT(*) >= 5');
|
||||
expect(rows).toMatchObject([
|
||||
{
|
||||
|
|
@ -120,7 +133,10 @@ describe('BigQueryHistoricSqlQueryHistoryReader', () => {
|
|||
executions: 42,
|
||||
errorRate: 0.05,
|
||||
},
|
||||
topUsers: [{ user: 'analyst@example.test', executions: 1 }],
|
||||
topUsers: [
|
||||
{ user: 'svc-loader@example.test', executions: 40 },
|
||||
{ user: 'analyst@example.test', executions: 2 },
|
||||
],
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
|
@ -137,6 +153,9 @@ describe('BigQueryHistoricSqlQueryHistoryReader', () => {
|
|||
minExecutions: 5,
|
||||
windowDays: 90,
|
||||
enabledTables: [],
|
||||
enabledSchemas: [],
|
||||
modeledTableCatalog: [],
|
||||
scopeFloorWarnings: [],
|
||||
filters: { dropTrivialProbes: true },
|
||||
redactionPatterns: [],
|
||||
staleArchiveAfterDays: 90,
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ async function writeUnifiedStagedDir(root: string): Promise<void> {
|
|||
});
|
||||
await writeJson(root, 'tables/public.orders.json', {
|
||||
table: 'public.orders',
|
||||
tableRef: { catalog: null, db: 'public', name: 'orders' },
|
||||
stats: {
|
||||
executionsBucket: '10-100',
|
||||
distinctUsersBucket: '2-5',
|
||||
|
|
@ -46,7 +47,10 @@ async function writeUnifiedStagedDir(root: string): Promise<void> {
|
|||
{
|
||||
id: 'orders',
|
||||
canonicalSql: 'select * from public.orders join public.customers on true',
|
||||
tablesTouched: ['public.orders', 'public.customers'],
|
||||
tablesTouched: [
|
||||
{ catalog: null, db: 'public', name: 'orders' },
|
||||
{ catalog: null, db: 'public', name: 'customers' },
|
||||
],
|
||||
executionsBucket: '10-100',
|
||||
distinctUsersBucket: '2-5',
|
||||
dialect: 'postgres',
|
||||
|
|
@ -58,7 +62,10 @@ async function writeUnifiedStagedDir(root: string): Promise<void> {
|
|||
{
|
||||
id: 'orders',
|
||||
canonicalSql: 'select * from public.orders join public.customers on true',
|
||||
tablesTouched: ['public.orders', 'public.customers'],
|
||||
tablesTouched: [
|
||||
{ catalog: null, db: 'public', name: 'orders' },
|
||||
{ catalog: null, db: 'public', name: 'customers' },
|
||||
],
|
||||
executionsBucket: '10-100',
|
||||
distinctUsersBucket: '2-5',
|
||||
dialect: 'postgres',
|
||||
|
|
@ -155,7 +162,10 @@ describe('chunkHistoricSqlUnifiedStagedDir', () => {
|
|||
{
|
||||
id: 'line-items',
|
||||
canonicalSql: 'select * from public.orders join public.line_items on true',
|
||||
tablesTouched: ['public.orders', 'public.line_items'],
|
||||
tablesTouched: [
|
||||
{ catalog: null, db: 'public', name: 'orders' },
|
||||
{ catalog: null, db: 'public', name: 'line_items' },
|
||||
],
|
||||
executionsBucket: '10-100',
|
||||
distinctUsersBucket: '2-5',
|
||||
dialect: 'postgres',
|
||||
|
|
|
|||
|
|
@ -76,7 +76,10 @@ describe('HistoricSqlSourceAdapter', () => {
|
|||
[
|
||||
'pg:1',
|
||||
{
|
||||
tablesTouched: ['public.orders', 'public.customers'],
|
||||
tablesTouched: [
|
||||
{ catalog: null, db: 'public', name: 'orders' },
|
||||
{ catalog: null, db: 'public', name: 'customers' },
|
||||
],
|
||||
columnsByClause: { select: ['status'], join: ['customer_id', 'id'], groupBy: ['status'] },
|
||||
},
|
||||
],
|
||||
|
|
|
|||
|
|
@ -126,7 +126,10 @@ function acceptanceSqlAnalysis(): SqlAnalysisPort {
|
|||
items.map((item) => [
|
||||
item.id,
|
||||
{
|
||||
tablesTouched: ['public.orders', 'public.customers'],
|
||||
tablesTouched: [
|
||||
{ catalog: null, db: 'public', name: 'orders' },
|
||||
{ catalog: null, db: 'public', name: 'customers' },
|
||||
],
|
||||
columnsByClause: {
|
||||
select: ['status', 'segment'],
|
||||
where: ['status'],
|
||||
|
|
|
|||
|
|
@ -9,11 +9,18 @@ import type { StagedPatternsInput } from '../../../../../src/context/ingest/adap
|
|||
|
||||
type PatternTemplate = StagedPatternsInput['templates'][number];
|
||||
|
||||
function tableRef(value: string): { catalog: string | null; db: string | null; name: string } {
|
||||
const parts = value.split('.');
|
||||
if (parts.length === 3) return { catalog: parts[0]!, db: parts[1]!, name: parts[2]! };
|
||||
if (parts.length === 2) return { catalog: null, db: parts[0]!, name: parts[1]! };
|
||||
return { catalog: null, db: null, name: value };
|
||||
}
|
||||
|
||||
function template(id: string, tablesTouched: string[], canonicalSql = 'select 1'): PatternTemplate {
|
||||
return {
|
||||
id,
|
||||
canonicalSql,
|
||||
tablesTouched,
|
||||
tablesTouched: tablesTouched.map(tableRef),
|
||||
executionsBucket: '10-100',
|
||||
distinctUsersBucket: '2-5',
|
||||
dialect: 'postgres',
|
||||
|
|
@ -32,7 +39,7 @@ describe('historic-SQL pattern input sharding', () => {
|
|||
],
|
||||
};
|
||||
|
||||
const result = splitHistoricSqlPatternInputs(input, { maxBytes: 760 });
|
||||
const result = splitHistoricSqlPatternInputs(input, { maxBytes: 1200 });
|
||||
|
||||
expect(result.auditInput.templates.map((entry) => entry.id)).toEqual([
|
||||
'orders-customers-1',
|
||||
|
|
@ -51,7 +58,7 @@ describe('historic-SQL pattern input sharding', () => {
|
|||
'orders-customers-1',
|
||||
'orders-customers-2',
|
||||
]);
|
||||
expect(result.shards.every((shard) => shard.byteLength <= 760)).toBe(true);
|
||||
expect(result.shards.every((shard) => shard.byteLength <= 1200)).toBe(true);
|
||||
expect(result.shards.flatMap((shard) => shard.input.templates).some((entry) => entry.id === 'single-table-orders')).toBe(false);
|
||||
expect(result.warnings).toEqual([]);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -215,7 +215,7 @@ describe('PostgresPgssReader aggregate path', () => {
|
|||
for await (const row of reader.fetchAggregated(
|
||||
{ executeQuery },
|
||||
{ start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') },
|
||||
{ dialect: 'postgres', minExecutions: 5, enabledTables: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
|
||||
{ dialect: 'postgres', minExecutions: 5, enabledTables: [], enabledSchemas: [], modeledTableCatalog: [], scopeFloorWarnings: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
|
||||
)) {
|
||||
rows.push(row);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,274 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import type { KtxLlmRuntimePort } from '../../../../../src/context/llm/runtime-port.js';
|
||||
import type {
|
||||
SqlAnalysisBatchItem,
|
||||
SqlAnalysisBatchResult,
|
||||
SqlAnalysisPort,
|
||||
} from '../../../../../src/context/sql-analysis/ports.js';
|
||||
import {
|
||||
proposeQueryHistoryServiceAccountFilters,
|
||||
regexEscapeForExactRolePattern,
|
||||
} from '../../../../../src/context/ingest/adapters/historic-sql/query-history-filter-picker.js';
|
||||
import type {
|
||||
AggregatedTemplate,
|
||||
HistoricSqlReader,
|
||||
} from '../../../../../src/context/ingest/adapters/historic-sql/types.js';
|
||||
|
||||
function aggregate(overrides: Partial<AggregatedTemplate> & { templateId: string; canonicalSql: string }): AggregatedTemplate {
|
||||
return {
|
||||
templateId: overrides.templateId,
|
||||
canonicalSql: overrides.canonicalSql,
|
||||
dialect: overrides.dialect ?? 'postgres',
|
||||
stats: overrides.stats ?? {
|
||||
executions: 25,
|
||||
distinctUsers: 1,
|
||||
firstSeen: '2026-05-01T00:00:00.000Z',
|
||||
lastSeen: '2026-06-01T00:00:00.000Z',
|
||||
p50RuntimeMs: 50,
|
||||
p95RuntimeMs: 100,
|
||||
errorRate: 0,
|
||||
rowsProduced: 10,
|
||||
},
|
||||
topUsers: overrides.topUsers ?? [{ user: 'analyst', executions: 25 }],
|
||||
};
|
||||
}
|
||||
|
||||
function reader(...templates: AggregatedTemplate[]): HistoricSqlReader {
|
||||
return {
|
||||
async probe() {
|
||||
return { warnings: [], info: [] };
|
||||
},
|
||||
async *fetchAggregated() {
|
||||
for (const template of templates) {
|
||||
yield template;
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function sqlAnalysis(tablesById: Record<string, Array<{ catalog: string | null; db: string | null; name: string }>>): SqlAnalysisPort {
|
||||
return {
|
||||
analyzeForFingerprint: vi.fn(),
|
||||
analyzeBatch: vi.fn(async (items: SqlAnalysisBatchItem[]): Promise<Map<string, SqlAnalysisBatchResult>> =>
|
||||
new Map<string, SqlAnalysisBatchResult>(
|
||||
items.map((item) => [
|
||||
item.id,
|
||||
{
|
||||
tablesTouched: tablesById[item.id] ?? [],
|
||||
columnsByClause: {},
|
||||
},
|
||||
]),
|
||||
),
|
||||
),
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||
};
|
||||
}
|
||||
|
||||
function llm(decisions: Array<{ role: string; exclude: boolean; reason: string }>): KtxLlmRuntimePort {
|
||||
const generateObject = vi.fn(async () => ({ roles: decisions })) as KtxLlmRuntimePort['generateObject'];
|
||||
return {
|
||||
generateText: vi.fn(),
|
||||
generateObject,
|
||||
runAgentLoop: vi.fn(),
|
||||
};
|
||||
}
|
||||
|
||||
describe('query-history filter picker', () => {
|
||||
it('emits anchored escaped patterns for excluded roles from one batched LLM call', async () => {
|
||||
const runtime = llm([
|
||||
{ role: 'svc.loader+prod', exclude: true, reason: 'Runs recurring loader traffic only.' },
|
||||
{ role: 'analyst', exclude: false, reason: 'Interactive analytic usage.' },
|
||||
]);
|
||||
const analysis = sqlAnalysis({
|
||||
loader: [{ catalog: null, db: 'analytics', name: 'orders' }],
|
||||
analyst: [{ catalog: null, db: 'analytics', name: 'orders' }],
|
||||
});
|
||||
|
||||
const proposal = await proposeQueryHistoryServiceAccountFilters({
|
||||
connectionId: 'warehouse',
|
||||
dialect: 'postgres',
|
||||
queryClient: {},
|
||||
reader: reader(
|
||||
aggregate({
|
||||
templateId: 'loader',
|
||||
canonicalSql: 'merge into analytics.orders using staging.orders_delta on orders.id = orders_delta.id',
|
||||
topUsers: [{ user: 'svc.loader+prod', executions: 40 }],
|
||||
}),
|
||||
aggregate({
|
||||
templateId: 'analyst',
|
||||
canonicalSql: 'select status, count(*) from analytics.orders group by status',
|
||||
topUsers: [{ user: 'analyst', executions: 25 }],
|
||||
}),
|
||||
),
|
||||
sqlAnalysis: analysis,
|
||||
llmRuntime: runtime,
|
||||
pullConfig: {
|
||||
dialect: 'postgres',
|
||||
enabledSchemas: ['analytics'],
|
||||
enabledTables: [],
|
||||
modeledTableCatalog: [{ catalog: null, db: 'analytics', name: 'orders' }],
|
||||
filters: { dropTrivialProbes: true },
|
||||
},
|
||||
now: new Date('2026-06-03T00:00:00.000Z'),
|
||||
});
|
||||
|
||||
expect(runtime.generateObject).toHaveBeenCalledTimes(1);
|
||||
expect(proposal).toMatchObject({
|
||||
excludedRoles: [
|
||||
{
|
||||
role: 'svc.loader+prod',
|
||||
pattern: '^svc\\.loader\\+prod$',
|
||||
reason: 'Runs recurring loader traffic only.',
|
||||
},
|
||||
],
|
||||
consideredRoleCount: 2,
|
||||
skipped: null,
|
||||
warnings: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('redacts representative SQL before sending role records to the LLM', async () => {
|
||||
const originalSql =
|
||||
"select * from public.api_events where api_key = 'sk_live_abc123' and note = 'Secret_Token_9f'"; // pragma: allowlist secret
|
||||
const runtime = llm([
|
||||
{ role: 'svc_loader', exclude: false, reason: 'Keep by default.' },
|
||||
{ role: 'analyst', exclude: false, reason: 'Interactive analytic usage.' },
|
||||
]);
|
||||
const analysis = sqlAnalysis({
|
||||
secret: [{ catalog: null, db: 'public', name: 'api_events' }],
|
||||
analyst: [{ catalog: null, db: 'public', name: 'orders' }],
|
||||
});
|
||||
|
||||
await proposeQueryHistoryServiceAccountFilters({
|
||||
connectionId: 'warehouse',
|
||||
dialect: 'postgres',
|
||||
queryClient: {},
|
||||
reader: reader(
|
||||
aggregate({
|
||||
templateId: 'secret',
|
||||
canonicalSql: originalSql,
|
||||
topUsers: [{ user: 'svc_loader', executions: 30 }],
|
||||
}),
|
||||
aggregate({
|
||||
templateId: 'analyst',
|
||||
canonicalSql: 'select status, count(*) from public.orders group by status',
|
||||
topUsers: [{ user: 'analyst', executions: 25 }],
|
||||
}),
|
||||
),
|
||||
sqlAnalysis: analysis,
|
||||
llmRuntime: runtime,
|
||||
pullConfig: {
|
||||
dialect: 'postgres',
|
||||
enabledSchemas: ['public'],
|
||||
enabledTables: [],
|
||||
modeledTableCatalog: [],
|
||||
redactionPatterns: ['sk_live_[A-Za-z0-9]+', '(?i)secret_token_[a-z0-9]+'],
|
||||
filters: { dropTrivialProbes: true },
|
||||
},
|
||||
now: new Date('2026-06-03T00:00:00.000Z'),
|
||||
});
|
||||
|
||||
expect(analysis.analyzeBatch).toHaveBeenCalledWith(
|
||||
[
|
||||
{ id: 'secret', sql: originalSql },
|
||||
{ id: 'analyst', sql: 'select status, count(*) from public.orders group by status' },
|
||||
],
|
||||
'postgres',
|
||||
undefined,
|
||||
);
|
||||
const call = vi.mocked(runtime.generateObject).mock.calls[0]?.[0];
|
||||
expect(call?.prompt).toContain('[REDACTED]');
|
||||
expect(call?.prompt).not.toContain('sk_live_abc123');
|
||||
expect(call?.prompt).not.toContain('Secret_Token_9f');
|
||||
});
|
||||
|
||||
it('fails open with no LLM runtime', async () => {
|
||||
const proposal = await proposeQueryHistoryServiceAccountFilters({
|
||||
connectionId: 'warehouse',
|
||||
dialect: 'postgres',
|
||||
queryClient: {},
|
||||
reader: reader(),
|
||||
sqlAnalysis: sqlAnalysis({}),
|
||||
llmRuntime: null,
|
||||
pullConfig: { dialect: 'postgres', filters: { dropTrivialProbes: true } },
|
||||
});
|
||||
|
||||
expect(proposal).toEqual({
|
||||
excludedRoles: [],
|
||||
consideredRoleCount: 0,
|
||||
skipped: { reason: 'no-llm' },
|
||||
warnings: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('proposes nothing for a single-role stack', async () => {
|
||||
const runtime = llm([{ role: 'warehouse_user', exclude: true, reason: 'Only observed role.' }]);
|
||||
|
||||
const proposal = await proposeQueryHistoryServiceAccountFilters({
|
||||
connectionId: 'warehouse',
|
||||
dialect: 'postgres',
|
||||
queryClient: {},
|
||||
reader: reader(
|
||||
aggregate({
|
||||
templateId: 'single-role',
|
||||
canonicalSql: 'select * from analytics.orders',
|
||||
topUsers: [{ user: 'warehouse_user', executions: 40 }],
|
||||
}),
|
||||
),
|
||||
sqlAnalysis: sqlAnalysis({
|
||||
'single-role': [{ catalog: null, db: 'analytics', name: 'orders' }],
|
||||
}),
|
||||
llmRuntime: runtime,
|
||||
pullConfig: { dialect: 'postgres', enabledSchemas: ['analytics'], filters: { dropTrivialProbes: true } },
|
||||
});
|
||||
|
||||
expect(runtime.generateObject).not.toHaveBeenCalled();
|
||||
expect(proposal.excludedRoles).toEqual([]);
|
||||
expect(proposal.skipped).toEqual({ reason: 'no-in-scope-history' });
|
||||
});
|
||||
|
||||
it('keeps clean in-scope history when the model excludes nothing', async () => {
|
||||
const proposal = await proposeQueryHistoryServiceAccountFilters({
|
||||
connectionId: 'warehouse',
|
||||
dialect: 'bigquery',
|
||||
queryClient: {},
|
||||
reader: reader(
|
||||
aggregate({
|
||||
templateId: 'dashboard',
|
||||
canonicalSql: 'select status, count(*) from `demo.analytics.orders` group by status',
|
||||
dialect: 'bigquery',
|
||||
topUsers: [{ user: 'bi_runner', executions: 1 }],
|
||||
}),
|
||||
aggregate({
|
||||
templateId: 'analyst',
|
||||
canonicalSql: 'select * from `demo.analytics.orders` where id = @id',
|
||||
dialect: 'bigquery',
|
||||
topUsers: [{ user: 'analyst', executions: 1 }],
|
||||
}),
|
||||
),
|
||||
sqlAnalysis: sqlAnalysis({
|
||||
dashboard: [{ catalog: 'demo', db: 'analytics', name: 'orders' }],
|
||||
analyst: [{ catalog: 'demo', db: 'analytics', name: 'orders' }],
|
||||
}),
|
||||
llmRuntime: llm([
|
||||
{ role: 'bi_runner', exclude: false, reason: 'Dashboard usage is analytic.' },
|
||||
{ role: 'analyst', exclude: false, reason: 'Interactive analyst usage.' },
|
||||
]),
|
||||
pullConfig: {
|
||||
dialect: 'bigquery',
|
||||
windowDays: 90,
|
||||
enabledSchemas: ['analytics'],
|
||||
filters: { dropTrivialProbes: true },
|
||||
},
|
||||
});
|
||||
|
||||
expect(proposal.excludedRoles).toEqual([]);
|
||||
expect(proposal.consideredRoleCount).toBe(2);
|
||||
expect(proposal.skipped).toBeNull();
|
||||
});
|
||||
|
||||
it('escapes regex metacharacters for exact role matches', () => {
|
||||
expect(regexEscapeForExactRolePattern('svc.loader+prod')).toBe('^svc\\.loader\\+prod$');
|
||||
expect(regexEscapeForExactRolePattern('team[etl](west)')).toBe('^team\\[etl\\]\\(west\\)$');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,194 @@
|
|||
import { mkdir, mkdtemp, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { resolveQueryHistoryScopeFloor } from '../../../../../src/context/ingest/adapters/historic-sql/scope-floor.js';
|
||||
|
||||
async function tempProject(): Promise<string> {
|
||||
return mkdtemp(join(tmpdir(), 'ktx-qh-scope-'));
|
||||
}
|
||||
|
||||
async function seedLiveScanTable(
|
||||
projectDir: string,
|
||||
connectionId: string,
|
||||
syncId: string,
|
||||
table: { catalog: string | null; db: string | null; name: string },
|
||||
): Promise<void> {
|
||||
const root = join(projectDir, 'raw-sources', connectionId, 'live-database', syncId);
|
||||
await mkdir(join(root, 'tables'), { recursive: true });
|
||||
await writeFile(
|
||||
join(root, 'connection.json'),
|
||||
`${JSON.stringify({ connectionId, driver: 'postgres' }, null, 2)}\n`,
|
||||
'utf-8',
|
||||
);
|
||||
await writeFile(
|
||||
join(root, 'tables', `${table.db ?? 'default'}-${table.name}.json`),
|
||||
`${JSON.stringify(
|
||||
{
|
||||
...table,
|
||||
kind: 'table',
|
||||
comment: null,
|
||||
estimatedRows: null,
|
||||
columns: [],
|
||||
foreignKeys: [],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
'utf-8',
|
||||
);
|
||||
await writeFile(
|
||||
join(root, 'scan-report.json'),
|
||||
`${JSON.stringify(
|
||||
{
|
||||
connectionId,
|
||||
driver: 'postgres',
|
||||
syncId,
|
||||
runId: `scan-${syncId}`,
|
||||
trigger: 'cli',
|
||||
mode: 'enriched',
|
||||
dryRun: false,
|
||||
artifactPaths: {
|
||||
rawSourcesDir: `raw-sources/${connectionId}/live-database/${syncId}`,
|
||||
reportPath: `raw-sources/${connectionId}/live-database/${syncId}/scan-report.json`,
|
||||
manifestShards: [],
|
||||
enrichmentArtifacts: [],
|
||||
},
|
||||
counts: {},
|
||||
warnings: [],
|
||||
enrichment: {},
|
||||
enrichmentState: {},
|
||||
},
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
'utf-8',
|
||||
);
|
||||
}
|
||||
|
||||
describe('resolveQueryHistoryScopeFloor', () => {
|
||||
it('computes modeled schemas from connection schemas plus semantic source tables', async () => {
|
||||
const projectDir = await tempProject();
|
||||
await mkdir(join(projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(
|
||||
join(projectDir, 'semantic-layer/warehouse/revenue.yaml'),
|
||||
[
|
||||
'name: revenue',
|
||||
'table: orbit_analytics.mart_revenue',
|
||||
'grain: [id]',
|
||||
'columns:',
|
||||
' - name: id',
|
||||
' type: string',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
await seedLiveScanTable(projectDir, 'warehouse', 'sync-1', {
|
||||
catalog: null,
|
||||
db: 'orbit_raw',
|
||||
name: 'accounts',
|
||||
});
|
||||
|
||||
const scope = await resolveQueryHistoryScopeFloor({
|
||||
projectDir,
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
connection: { driver: 'postgres', schemas: ['orbit_raw'] },
|
||||
storedQueryHistory: {},
|
||||
});
|
||||
|
||||
expect(scope.enabledSchemas).toEqual(['orbit_analytics', 'orbit_raw']);
|
||||
expect(scope.modeledTableCatalog).toEqual([
|
||||
{ catalog: null, db: 'orbit_analytics', name: 'mart_revenue' },
|
||||
{ catalog: null, db: 'orbit_raw', name: 'accounts' },
|
||||
]);
|
||||
expect(scope.enabledTables).toEqual([]);
|
||||
expect(scope.floorDisabled).toBe(false);
|
||||
});
|
||||
|
||||
it('uses explicit enabledTables before explicit enabledSchemas and computed scope', async () => {
|
||||
const scope = await resolveQueryHistoryScopeFloor({
|
||||
projectDir: await tempProject(),
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
connection: { driver: 'postgres', schemas: ['orbit_raw'] },
|
||||
storedQueryHistory: {
|
||||
enabledTables: ['orbit_analytics.mart_revenue'],
|
||||
enabledSchemas: ['orbit_raw'],
|
||||
},
|
||||
});
|
||||
|
||||
expect(scope.enabledTables).toEqual([{ catalog: null, db: 'orbit_analytics', name: 'mart_revenue' }]);
|
||||
expect(scope.enabledSchemas).toEqual([]);
|
||||
expect(scope.floorDisabled).toBe(false);
|
||||
});
|
||||
|
||||
it('disables the floor for enabledSchemas star', async () => {
|
||||
const scope = await resolveQueryHistoryScopeFloor({
|
||||
projectDir: await tempProject(),
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
connection: { driver: 'postgres', schemas: ['orbit_raw'] },
|
||||
storedQueryHistory: { enabledSchemas: ['*'] },
|
||||
});
|
||||
|
||||
expect(scope.enabledTables).toEqual([]);
|
||||
expect(scope.enabledSchemas).toEqual(['*']);
|
||||
expect(scope.floorDisabled).toBe(true);
|
||||
});
|
||||
|
||||
it('adds latest live-database scan tables to the modeled table catalog', async () => {
|
||||
const projectDir = await tempProject();
|
||||
await mkdir(join(projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(
|
||||
join(projectDir, 'semantic-layer/warehouse/revenue.yaml'),
|
||||
[
|
||||
'name: revenue',
|
||||
'table: orbit_analytics.mart_revenue',
|
||||
'grain: [id]',
|
||||
'columns:',
|
||||
' - name: id',
|
||||
' type: string',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
await seedLiveScanTable(projectDir, 'warehouse', 'sync-1', {
|
||||
catalog: null,
|
||||
db: 'orbit_raw',
|
||||
name: 'accounts',
|
||||
});
|
||||
|
||||
const scope = await resolveQueryHistoryScopeFloor({
|
||||
projectDir,
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
connection: { driver: 'postgres', schemas: ['orbit_raw'] },
|
||||
storedQueryHistory: {},
|
||||
});
|
||||
|
||||
expect(scope.enabledSchemas).toEqual(['orbit_analytics', 'orbit_raw']);
|
||||
expect(scope.modeledTableCatalog).toEqual([
|
||||
{ catalog: null, db: 'orbit_analytics', name: 'mart_revenue' },
|
||||
{ catalog: null, db: 'orbit_raw', name: 'accounts' },
|
||||
]);
|
||||
expect(scope.warnings).toEqual([]);
|
||||
expect(scope.floorDisabled).toBe(false);
|
||||
});
|
||||
|
||||
it('fails open when schema scope exists but the scan catalog is unavailable', async () => {
|
||||
const scope = await resolveQueryHistoryScopeFloor({
|
||||
projectDir: await tempProject(),
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
connection: { driver: 'postgres', schemas: ['orbit_raw'] },
|
||||
storedQueryHistory: {},
|
||||
});
|
||||
|
||||
expect(scope.enabledTables).toEqual([]);
|
||||
expect(scope.enabledSchemas).toEqual(['*']);
|
||||
expect(scope.modeledTableCatalog).toEqual([]);
|
||||
expect(scope.floorDisabled).toBe(true);
|
||||
expect(scope.warnings).toContain('query_history_scope_floor_disabled:catalog_unavailable');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
includedQueryHistoryTableRefs,
|
||||
isQueryHistoryScopeFloorDisabled,
|
||||
shouldFailOpenQueryHistoryScope,
|
||||
} from '../../../../../src/context/ingest/adapters/historic-sql/scope-membership.js';
|
||||
import type { KtxTableRef } from '../../../../../src/context/scan/types.js';
|
||||
|
||||
function ref(db: string | null, name: string, catalog: string | null = null): KtxTableRef {
|
||||
return { catalog, db, name };
|
||||
}
|
||||
|
||||
describe('query-history scope membership', () => {
|
||||
it('prefers explicit enabled tables over schema scope', () => {
|
||||
const orders = ref('analytics', 'orders');
|
||||
const noise = ref('metabase', 'application_table');
|
||||
|
||||
expect(
|
||||
includedQueryHistoryTableRefs([orders, noise], {
|
||||
enabledTables: [orders],
|
||||
enabledSchemas: ['metabase'],
|
||||
}),
|
||||
).toEqual([orders]);
|
||||
});
|
||||
|
||||
it('matches schema scope by the db component across catalogs', () => {
|
||||
const modeled = ref('orbit_analytics', 'orders', 'demo-project');
|
||||
const noise = ref('metabase', 'application_table', 'demo-project');
|
||||
|
||||
expect(
|
||||
includedQueryHistoryTableRefs([modeled, noise], {
|
||||
enabledTables: [],
|
||||
enabledSchemas: ['orbit_analytics'],
|
||||
}),
|
||||
).toEqual([modeled]);
|
||||
});
|
||||
|
||||
it('keeps every touched ref when wildcard scope disables the floor', () => {
|
||||
const tables = [ref('analytics', 'orders'), ref('metabase', 'application_table')];
|
||||
|
||||
expect(isQueryHistoryScopeFloorDisabled({ enabledTables: [], enabledSchemas: ['*'] })).toBe(true);
|
||||
expect(includedQueryHistoryTableRefs(tables, { enabledTables: [], enabledSchemas: ['*'] })).toEqual(tables);
|
||||
});
|
||||
|
||||
it('fails open when no tables, schemas, or wildcard are configured', () => {
|
||||
const tables = [ref('metabase', 'application_table')];
|
||||
|
||||
expect(shouldFailOpenQueryHistoryScope({ enabledTables: [], enabledSchemas: [] })).toBe(true);
|
||||
expect(includedQueryHistoryTableRefs(tables, { enabledTables: [], enabledSchemas: [] })).toEqual(tables);
|
||||
});
|
||||
});
|
||||
|
|
@ -90,7 +90,10 @@ describe('SnowflakeHistoricSqlQueryHistoryReader', () => {
|
|||
40,
|
||||
0.05,
|
||||
100,
|
||||
JSON.stringify([{ user: 'ANALYST', executions: 1 }]),
|
||||
JSON.stringify([
|
||||
{ user: 'SVC_LOADER', executions: 40 },
|
||||
{ user: 'ANALYST', executions: 2 },
|
||||
]),
|
||||
],
|
||||
],
|
||||
totalRows: 1,
|
||||
|
|
@ -102,15 +105,20 @@ describe('SnowflakeHistoricSqlQueryHistoryReader', () => {
|
|||
for await (const row of reader.fetchAggregated(
|
||||
client,
|
||||
{ start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') },
|
||||
{ dialect: 'snowflake', minExecutions: 5, windowDays: 90, enabledTables: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
|
||||
{ dialect: 'snowflake', minExecutions: 5, windowDays: 90, enabledTables: [], enabledSchemas: [], modeledTableCatalog: [], scopeFloorWarnings: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
|
||||
)) {
|
||||
rows.push(row);
|
||||
}
|
||||
|
||||
const sql = firstQuery(client);
|
||||
expect(sql).toContain('WITH filtered_queries AS');
|
||||
expect(sql).toContain('template_stats AS');
|
||||
expect(sql).toContain('template_users AS');
|
||||
expect(sql).toContain('SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY');
|
||||
expect(sql).toContain('COUNT(*) AS executions');
|
||||
expect(sql).toContain('GROUP BY query_hash');
|
||||
expect(sql).toContain('COUNT(DISTINCT user_name) AS distinct_users');
|
||||
expect(sql).toContain('GROUP BY query_hash, user_name');
|
||||
expect(sql).toContain('ORDER BY users.executions DESC');
|
||||
expect(sql).toContain('HAVING COUNT(*) >= 5');
|
||||
expect(rows).toMatchObject([
|
||||
{
|
||||
|
|
@ -119,7 +127,10 @@ describe('SnowflakeHistoricSqlQueryHistoryReader', () => {
|
|||
executions: 42,
|
||||
errorRate: 0.05,
|
||||
},
|
||||
topUsers: [{ user: 'ANALYST', executions: 1 }],
|
||||
topUsers: [
|
||||
{ user: 'SVC_LOADER', executions: 40 },
|
||||
{ user: 'ANALYST', executions: 2 },
|
||||
],
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
|
@ -136,6 +147,9 @@ describe('SnowflakeHistoricSqlQueryHistoryReader', () => {
|
|||
minExecutions: 5,
|
||||
windowDays: 90,
|
||||
enabledTables: [],
|
||||
enabledSchemas: [],
|
||||
modeledTableCatalog: [],
|
||||
scopeFloorWarnings: [],
|
||||
filters: { dropTrivialProbes: true },
|
||||
redactionPatterns: [],
|
||||
staleArchiveAfterDays: 90,
|
||||
|
|
|
|||
|
|
@ -14,6 +14,13 @@ async function readJson<T>(root: string, relPath: string): Promise<T> {
|
|||
return JSON.parse(await readFile(join(root, relPath), 'utf-8')) as T;
|
||||
}
|
||||
|
||||
function tableRef(value: string): { catalog: string | null; db: string | null; name: string } {
|
||||
const parts = value.split('.');
|
||||
if (parts.length === 3) return { catalog: parts[0]!, db: parts[1]!, name: parts[2]! };
|
||||
if (parts.length === 2) return { catalog: null, db: parts[0]!, name: parts[1]! };
|
||||
return { catalog: null, db: null, name: value };
|
||||
}
|
||||
|
||||
function aggregate(overrides: Partial<AggregatedTemplate> & { templateId: string; canonicalSql: string }): AggregatedTemplate {
|
||||
return {
|
||||
templateId: overrides.templateId,
|
||||
|
|
@ -72,7 +79,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
[
|
||||
'orders-by-status',
|
||||
{
|
||||
tablesTouched: ['public.orders', 'public.customers'],
|
||||
tablesTouched: [tableRef('public.orders'), tableRef('public.customers')],
|
||||
columnsByClause: {
|
||||
select: ['status'],
|
||||
where: ['created_at'],
|
||||
|
|
@ -94,6 +101,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
sqlAnalysis,
|
||||
pullConfig: {
|
||||
dialect: 'postgres',
|
||||
enabledSchemas: ['public'],
|
||||
filters: {
|
||||
serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' },
|
||||
},
|
||||
|
|
@ -111,6 +119,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
{ id: 'bad-parse', sql: 'select broken from' },
|
||||
],
|
||||
'postgres',
|
||||
undefined,
|
||||
);
|
||||
|
||||
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['public.customers.json', 'public.orders.json']);
|
||||
|
|
@ -131,6 +140,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
const orders = await readJson<Record<string, any>>(stagedDir, 'tables/public.orders.json');
|
||||
expect(orders).toMatchObject({
|
||||
table: 'public.orders',
|
||||
tableRef: tableRef('public.orders'),
|
||||
stats: {
|
||||
executionsBucket: '10-100',
|
||||
distinctUsersBucket: '2-5',
|
||||
|
|
@ -159,7 +169,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
{
|
||||
id: 'orders-by-status',
|
||||
canonicalSql: expect.stringContaining('public.orders'),
|
||||
tablesTouched: ['public.customers', 'public.orders'],
|
||||
tablesTouched: [tableRef('public.customers'), tableRef('public.orders')],
|
||||
executionsBucket: '10-100',
|
||||
distinctUsersBucket: '2-5',
|
||||
dialect: 'postgres',
|
||||
|
|
@ -167,6 +177,129 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
]);
|
||||
});
|
||||
|
||||
it('keeps templates when service-account topUsers are only a partial execution sample', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
const reader: HistoricSqlReader = {
|
||||
async probe() {
|
||||
return { warnings: [], info: [] };
|
||||
},
|
||||
async *fetchAggregated() {
|
||||
yield aggregate({
|
||||
templateId: 'shared-bigquery-template',
|
||||
canonicalSql: 'select status, count(*) from `demo.analytics.orders` group by status',
|
||||
dialect: 'bigquery',
|
||||
stats: {
|
||||
executions: 42,
|
||||
distinctUsers: 2,
|
||||
firstSeen: '2026-05-01T00:00:00.000Z',
|
||||
lastSeen: '2026-05-11T00:00:00.000Z',
|
||||
p50RuntimeMs: 20,
|
||||
p95RuntimeMs: 80,
|
||||
errorRate: 0,
|
||||
rowsProduced: null,
|
||||
},
|
||||
topUsers: [{ user: 'svc_loader', executions: 5 }],
|
||||
});
|
||||
},
|
||||
};
|
||||
const sqlAnalysis: SqlAnalysisPort = {
|
||||
analyzeForFingerprint: vi.fn(),
|
||||
analyzeBatch: vi.fn(async () =>
|
||||
new Map([
|
||||
[
|
||||
'shared-bigquery-template',
|
||||
{
|
||||
tablesTouched: [tableRef('demo.analytics.orders')],
|
||||
columnsByClause: { select: ['status'], groupBy: ['status'] },
|
||||
},
|
||||
],
|
||||
]),
|
||||
),
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||
};
|
||||
|
||||
await stageHistoricSqlAggregatedSnapshot({
|
||||
stagedDir,
|
||||
connectionId: 'warehouse',
|
||||
queryClient: {},
|
||||
reader,
|
||||
sqlAnalysis,
|
||||
pullConfig: {
|
||||
dialect: 'bigquery',
|
||||
windowDays: 90,
|
||||
enabledSchemas: ['analytics'],
|
||||
filters: {
|
||||
serviceAccounts: { patterns: ['^svc_loader$'], mode: 'exclude' },
|
||||
},
|
||||
},
|
||||
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const patterns = await readJson<Record<string, any>>(stagedDir, 'patterns-input.json');
|
||||
expect(patterns.templates.map((template: { id: string }) => template.id)).toEqual([
|
||||
'shared-bigquery-template',
|
||||
]);
|
||||
const orders = await readJson<Record<string, any>>(stagedDir, 'tables/demo.analytics.orders.json');
|
||||
expect(orders.topTemplates).toEqual([
|
||||
{
|
||||
id: 'shared-bigquery-template',
|
||||
canonicalSql: 'select status, count(*) from `demo.analytics.orders` group by status',
|
||||
topUsers: [{ user: 'svc_loader' }],
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('drops service-account-only templates when matched users cover all executions', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
const reader: HistoricSqlReader = {
|
||||
async probe() {
|
||||
return { warnings: [], info: [] };
|
||||
},
|
||||
async *fetchAggregated() {
|
||||
yield aggregate({
|
||||
templateId: 'service-only-template',
|
||||
canonicalSql: 'merge into analytics.orders using staging.orders_delta on orders.id = orders_delta.id',
|
||||
stats: {
|
||||
executions: 12,
|
||||
distinctUsers: 1,
|
||||
firstSeen: '2026-05-01T00:00:00.000Z',
|
||||
lastSeen: '2026-05-11T00:00:00.000Z',
|
||||
p50RuntimeMs: 20,
|
||||
p95RuntimeMs: 80,
|
||||
errorRate: 0,
|
||||
rowsProduced: 0,
|
||||
},
|
||||
topUsers: [{ user: 'svc_loader', executions: 12 }],
|
||||
});
|
||||
},
|
||||
};
|
||||
const sqlAnalysis: SqlAnalysisPort = {
|
||||
analyzeForFingerprint: vi.fn(),
|
||||
analyzeBatch: vi.fn(async () => new Map()),
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||
};
|
||||
|
||||
await stageHistoricSqlAggregatedSnapshot({
|
||||
stagedDir,
|
||||
connectionId: 'warehouse',
|
||||
queryClient: {},
|
||||
reader,
|
||||
sqlAnalysis,
|
||||
pullConfig: {
|
||||
dialect: 'postgres',
|
||||
enabledSchemas: ['analytics'],
|
||||
filters: {
|
||||
serviceAccounts: { patterns: ['^svc_loader$'], mode: 'exclude' },
|
||||
},
|
||||
},
|
||||
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledWith([], 'postgres', undefined);
|
||||
const patterns = await readJson<Record<string, any>>(stagedDir, 'patterns-input.json');
|
||||
expect(patterns.templates).toEqual([]);
|
||||
});
|
||||
|
||||
it('redacts configured SQL substrings in staged artifacts while analyzing original SQL', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
const originalSql =
|
||||
|
|
@ -198,7 +331,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
[
|
||||
'api-events-with-secret',
|
||||
{
|
||||
tablesTouched: ['public.api_events'],
|
||||
tablesTouched: [tableRef('public.api_events')],
|
||||
columnsByClause: {
|
||||
select: [],
|
||||
where: ['api_key', 'note'],
|
||||
|
|
@ -219,6 +352,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
sqlAnalysis,
|
||||
pullConfig: {
|
||||
dialect: 'postgres',
|
||||
enabledSchemas: ['public'],
|
||||
redactionPatterns: ['sk_live_[A-Za-z0-9]+', '(?i)secret_token_[a-z0-9]+'],
|
||||
},
|
||||
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||
|
|
@ -227,6 +361,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledWith(
|
||||
[{ id: 'api-events-with-secret', sql: originalSql }],
|
||||
'postgres',
|
||||
undefined,
|
||||
);
|
||||
|
||||
const tableJson = await readFile(join(stagedDir, 'tables/public.api_events.json'), 'utf-8');
|
||||
|
|
@ -266,21 +401,21 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
[
|
||||
'selected-qualified',
|
||||
{
|
||||
tablesTouched: ['orbit_analytics.int_active_contract_arr'],
|
||||
tablesTouched: [tableRef('orbit_analytics.int_active_contract_arr')],
|
||||
columnsByClause: { select: [], where: [], join: [], groupBy: [] },
|
||||
},
|
||||
],
|
||||
[
|
||||
'selected-unqualified',
|
||||
{
|
||||
tablesTouched: ['int_customer_health_signals'],
|
||||
tablesTouched: [tableRef('orbit_analytics.int_customer_health_signals')],
|
||||
columnsByClause: { select: [], where: [], join: [], groupBy: [] },
|
||||
},
|
||||
],
|
||||
[
|
||||
'unselected',
|
||||
{
|
||||
tablesTouched: ['orbit_raw.accounts'],
|
||||
tablesTouched: [tableRef('orbit_raw.accounts')],
|
||||
columnsByClause: { select: [], where: [], join: [], groupBy: [] },
|
||||
},
|
||||
],
|
||||
|
|
@ -297,16 +432,16 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
pullConfig: {
|
||||
dialect: 'postgres',
|
||||
enabledTables: [
|
||||
'orbit_analytics.int_active_contract_arr',
|
||||
'orbit_analytics.int_customer_health_signals',
|
||||
tableRef('orbit_analytics.int_active_contract_arr'),
|
||||
tableRef('orbit_analytics.int_customer_health_signals'),
|
||||
],
|
||||
},
|
||||
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
expect(await readdir(join(stagedDir, 'tables'))).toEqual([
|
||||
'int_customer_health_signals.json',
|
||||
'orbit_analytics.int_active_contract_arr.json',
|
||||
'orbit_analytics.int_customer_health_signals.json',
|
||||
]);
|
||||
const manifest = await readJson<Record<string, any>>(stagedDir, 'manifest.json');
|
||||
expect(manifest.touchedTableCount).toBe(2);
|
||||
|
|
@ -372,7 +507,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
[
|
||||
'orders-customers-a',
|
||||
{
|
||||
tablesTouched: ['public.orders', 'public.customers'],
|
||||
tablesTouched: [tableRef('public.orders'), tableRef('public.customers')],
|
||||
columnsByClause: {
|
||||
select: [],
|
||||
where: ['payload'],
|
||||
|
|
@ -384,7 +519,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
[
|
||||
'orders-customers-b',
|
||||
{
|
||||
tablesTouched: ['public.orders', 'public.customers'],
|
||||
tablesTouched: [tableRef('public.orders'), tableRef('public.customers')],
|
||||
columnsByClause: {
|
||||
select: [],
|
||||
where: ['payload_b'],
|
||||
|
|
@ -396,7 +531,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
[
|
||||
'orders-single-table',
|
||||
{
|
||||
tablesTouched: ['public.orders'],
|
||||
tablesTouched: [tableRef('public.orders')],
|
||||
columnsByClause: {
|
||||
select: [],
|
||||
where: [],
|
||||
|
|
@ -415,7 +550,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
queryClient: {},
|
||||
reader,
|
||||
sqlAnalysis,
|
||||
pullConfig: { dialect: 'postgres' },
|
||||
pullConfig: { dialect: 'postgres', enabledSchemas: ['public'] },
|
||||
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
|
|
@ -456,7 +591,13 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
const sqlAnalysis: SqlAnalysisPort = {
|
||||
analyzeForFingerprint: vi.fn(),
|
||||
analyzeBatch: vi.fn(async () => new Map([
|
||||
['analytic', { tablesTouched: ['public.orders'], columnsByClause: { select: ['status'], where: [], join: [], groupBy: ['status'] } }],
|
||||
[
|
||||
'analytic',
|
||||
{
|
||||
tablesTouched: [tableRef('public.orders')],
|
||||
columnsByClause: { select: ['status'], where: [], join: [], groupBy: ['status'] },
|
||||
},
|
||||
],
|
||||
])),
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||
};
|
||||
|
|
@ -467,7 +608,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
queryClient: {},
|
||||
reader,
|
||||
sqlAnalysis,
|
||||
pullConfig: { dialect: 'postgres' },
|
||||
pullConfig: { dialect: 'postgres', enabledSchemas: ['public'] },
|
||||
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
|
|
@ -475,26 +616,27 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledWith(
|
||||
[{ id: 'analytic', sql: 'select status, count(*) from public.orders group by status' }],
|
||||
'postgres',
|
||||
undefined,
|
||||
);
|
||||
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['public.orders.json']);
|
||||
});
|
||||
|
||||
it('merges bare and schema-qualified references to the same table into one work unit', async () => {
|
||||
it('keeps modeled-schema refs and drops unmodeled-schema refs by default', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
const reader: HistoricSqlReader = {
|
||||
async probe() {
|
||||
return { warnings: [], info: [] };
|
||||
},
|
||||
async *fetchAggregated() {
|
||||
yield aggregate({ templateId: 'qualified', canonicalSql: 'select count(*) from orbit_raw.accounts' });
|
||||
yield aggregate({ templateId: 'bare', canonicalSql: 'select id from accounts where active' });
|
||||
yield aggregate({ templateId: 'modeled', canonicalSql: 'select count(*) from orbit_raw.accounts' });
|
||||
yield aggregate({ templateId: 'noise', canonicalSql: 'select count(*) from metabase.application_table' });
|
||||
},
|
||||
};
|
||||
const sqlAnalysis: SqlAnalysisPort = {
|
||||
analyzeForFingerprint: vi.fn(),
|
||||
analyzeBatch: vi.fn(async () => new Map([
|
||||
['qualified', { tablesTouched: ['orbit_raw.accounts'], columnsByClause: { select: [], where: [], join: [], groupBy: [] } }],
|
||||
['bare', { tablesTouched: ['accounts'], columnsByClause: { select: ['id'], where: ['active'], join: [], groupBy: [] } }],
|
||||
['modeled', { tablesTouched: [{ catalog: null, db: 'orbit_raw', name: 'accounts' }], columnsByClause: {} }],
|
||||
['noise', { tablesTouched: [{ catalog: null, db: 'metabase', name: 'application_table' }], columnsByClause: {} }],
|
||||
])),
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||
};
|
||||
|
|
@ -505,16 +647,213 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
|||
queryClient: {},
|
||||
reader,
|
||||
sqlAnalysis,
|
||||
pullConfig: { dialect: 'postgres' },
|
||||
pullConfig: {
|
||||
dialect: 'postgres',
|
||||
enabledSchemas: ['orbit_raw'],
|
||||
modeledTableCatalog: [{ catalog: null, db: 'orbit_raw', name: 'accounts' }],
|
||||
},
|
||||
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
// The bare `accounts` reference resolves to the unique qualified `orbit_raw.accounts`,
|
||||
// so the two templates collapse into a single work unit instead of two.
|
||||
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['orbit_raw.accounts.json']);
|
||||
const merged = await readJson<Record<string, any>>(stagedDir, 'tables/orbit_raw.accounts.json');
|
||||
expect(merged.topTemplates.map((t: any) => t.id).sort()).toEqual(['bare', 'qualified']);
|
||||
const manifest = await readJson<Record<string, any>>(stagedDir, 'manifest.json');
|
||||
expect(manifest.touchedTableCount).toBe(1);
|
||||
});
|
||||
|
||||
it('fails open when the implicit modeled scope is empty', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
const reader: HistoricSqlReader = {
|
||||
async probe() {
|
||||
return { warnings: [], info: [] };
|
||||
},
|
||||
async *fetchAggregated() {
|
||||
yield aggregate({ templateId: 'any-table', canonicalSql: 'select count(*) from metabase.application_table' });
|
||||
},
|
||||
};
|
||||
const sqlAnalysis: SqlAnalysisPort = {
|
||||
analyzeForFingerprint: vi.fn(),
|
||||
analyzeBatch: vi.fn(async () => new Map([
|
||||
['any-table', { tablesTouched: [{ catalog: null, db: 'metabase', name: 'application_table' }], columnsByClause: {} }],
|
||||
])),
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||
};
|
||||
|
||||
await stageHistoricSqlAggregatedSnapshot({
|
||||
stagedDir,
|
||||
connectionId: 'warehouse',
|
||||
queryClient: {},
|
||||
reader,
|
||||
sqlAnalysis,
|
||||
pullConfig: { dialect: 'postgres', enabledSchemas: [], modeledTableCatalog: [] },
|
||||
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['metabase.application_table.json']);
|
||||
const manifest = await readJson<Record<string, any>>(stagedDir, 'manifest.json');
|
||||
expect(manifest.warnings).toContain('query_history_scope_floor_disabled:empty_modeled_scope');
|
||||
});
|
||||
|
||||
it('lets enabledSchemas star disable the floor', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
const reader: HistoricSqlReader = {
|
||||
async probe() {
|
||||
return { warnings: [], info: [] };
|
||||
},
|
||||
async *fetchAggregated() {
|
||||
yield aggregate({ templateId: 'noise', canonicalSql: 'select count(*) from metabase.application_table' });
|
||||
},
|
||||
};
|
||||
const sqlAnalysis: SqlAnalysisPort = {
|
||||
analyzeForFingerprint: vi.fn(),
|
||||
analyzeBatch: vi.fn(async () => new Map([
|
||||
['noise', { tablesTouched: [{ catalog: null, db: 'metabase', name: 'application_table' }], columnsByClause: {} }],
|
||||
])),
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||
};
|
||||
|
||||
await stageHistoricSqlAggregatedSnapshot({
|
||||
stagedDir,
|
||||
connectionId: 'warehouse',
|
||||
queryClient: {},
|
||||
reader,
|
||||
sqlAnalysis,
|
||||
pullConfig: {
|
||||
dialect: 'postgres',
|
||||
enabledSchemas: ['*'],
|
||||
modeledTableCatalog: [{ catalog: null, db: 'orbit_raw', name: 'accounts' }],
|
||||
},
|
||||
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['metabase.application_table.json']);
|
||||
});
|
||||
|
||||
it('matches BigQuery dataset scope even when refs include a catalog', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
const reader: HistoricSqlReader = {
|
||||
async probe() {
|
||||
return { warnings: [], info: [] };
|
||||
},
|
||||
async *fetchAggregated() {
|
||||
yield aggregate({ templateId: 'modeled', canonicalSql: 'select count(*) from `demo-project.orbit_analytics.orders`' });
|
||||
yield aggregate({ templateId: 'noise', canonicalSql: 'select count(*) from `demo-project.metabase.application_table`' });
|
||||
},
|
||||
};
|
||||
const sqlAnalysis: SqlAnalysisPort = {
|
||||
analyzeForFingerprint: vi.fn(),
|
||||
analyzeBatch: vi.fn(async () => new Map([
|
||||
['modeled', { tablesTouched: [{ catalog: 'demo-project', db: 'orbit_analytics', name: 'orders' }], columnsByClause: {} }],
|
||||
['noise', { tablesTouched: [{ catalog: 'demo-project', db: 'metabase', name: 'application_table' }], columnsByClause: {} }],
|
||||
])),
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||
};
|
||||
|
||||
await stageHistoricSqlAggregatedSnapshot({
|
||||
stagedDir,
|
||||
connectionId: 'warehouse',
|
||||
queryClient: {},
|
||||
reader,
|
||||
sqlAnalysis,
|
||||
pullConfig: {
|
||||
dialect: 'bigquery',
|
||||
enabledSchemas: ['orbit_analytics'],
|
||||
modeledTableCatalog: [{ catalog: 'demo-project', db: 'orbit_analytics', name: 'orders' }],
|
||||
},
|
||||
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['demo-project.orbit_analytics.orders.json']);
|
||||
});
|
||||
|
||||
it('writes propagated scope-floor warnings to the staged manifest', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
const reader: HistoricSqlReader = {
|
||||
async probe() {
|
||||
return { warnings: [], info: [] };
|
||||
},
|
||||
async *fetchAggregated() {
|
||||
yield aggregate({ templateId: 'any-table', canonicalSql: 'select count(*) from metabase.application_table' });
|
||||
},
|
||||
};
|
||||
const sqlAnalysis: SqlAnalysisPort = {
|
||||
analyzeForFingerprint: vi.fn(),
|
||||
analyzeBatch: vi.fn(async () => new Map([
|
||||
['any-table', { tablesTouched: [{ catalog: null, db: 'metabase', name: 'application_table' }], columnsByClause: {} }],
|
||||
])),
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||
};
|
||||
|
||||
await stageHistoricSqlAggregatedSnapshot({
|
||||
stagedDir,
|
||||
connectionId: 'warehouse',
|
||||
queryClient: {},
|
||||
reader,
|
||||
sqlAnalysis,
|
||||
pullConfig: {
|
||||
dialect: 'postgres',
|
||||
enabledSchemas: ['*'],
|
||||
scopeFloorWarnings: ['query_history_scope_floor_disabled:catalog_unavailable'],
|
||||
},
|
||||
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const manifest = await readJson<Record<string, any>>(stagedDir, 'manifest.json');
|
||||
expect(manifest.warnings).toContain('query_history_scope_floor_disabled:catalog_unavailable');
|
||||
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['metabase.application_table.json']);
|
||||
});
|
||||
|
||||
it('retries without the catalog and disables the floor when catalog qualification fails wholesale', async () => {
|
||||
const stagedDir = await tempDir();
|
||||
const reader: HistoricSqlReader = {
|
||||
async probe() {
|
||||
return { warnings: [], info: [] };
|
||||
},
|
||||
async *fetchAggregated() {
|
||||
yield aggregate({ templateId: 'noise', canonicalSql: 'select count(*) from metabase.application_table' });
|
||||
},
|
||||
};
|
||||
const sqlAnalysis: SqlAnalysisPort = {
|
||||
analyzeForFingerprint: vi.fn(),
|
||||
analyzeBatch: vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(new Error('catalog qualification failed'))
|
||||
.mockResolvedValueOnce(
|
||||
new Map([
|
||||
['noise', { tablesTouched: [{ catalog: null, db: 'metabase', name: 'application_table' }], columnsByClause: {} }],
|
||||
]),
|
||||
),
|
||||
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||
};
|
||||
|
||||
await stageHistoricSqlAggregatedSnapshot({
|
||||
stagedDir,
|
||||
connectionId: 'warehouse',
|
||||
queryClient: {},
|
||||
reader,
|
||||
sqlAnalysis,
|
||||
pullConfig: {
|
||||
dialect: 'postgres',
|
||||
enabledSchemas: ['orbit_raw'],
|
||||
modeledTableCatalog: [{ catalog: null, db: 'orbit_raw', name: 'accounts' }],
|
||||
},
|
||||
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledTimes(2);
|
||||
expect(sqlAnalysis.analyzeBatch).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
[{ id: 'noise', sql: 'select count(*) from metabase.application_table' }],
|
||||
'postgres',
|
||||
{ catalog: { tables: [{ catalog: null, db: 'orbit_raw', name: 'accounts' }] } },
|
||||
);
|
||||
expect(sqlAnalysis.analyzeBatch).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
[{ id: 'noise', sql: 'select count(*) from metabase.application_table' }],
|
||||
'postgres',
|
||||
undefined,
|
||||
);
|
||||
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['metabase.application_table.json']);
|
||||
const manifest = await readJson<Record<string, any>>(stagedDir, 'manifest.json');
|
||||
expect(manifest.warnings).toContain('query_history_scope_floor_disabled:catalog_qualification_failed');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ describe('historic-sql unified contracts', () => {
|
|||
expect(
|
||||
stagedTableInputSchema.parse({
|
||||
table: 'public.orders',
|
||||
tableRef: { catalog: null, db: 'public', name: 'orders' },
|
||||
stats: {
|
||||
executionsBucket: '10-100',
|
||||
distinctUsersBucket: '2-5',
|
||||
|
|
@ -81,7 +82,7 @@ describe('historic-sql unified contracts', () => {
|
|||
{
|
||||
id: 'pg:123',
|
||||
canonicalSql: 'select * from public.orders',
|
||||
tablesTouched: ['public.orders'],
|
||||
tablesTouched: [{ catalog: null, db: 'public', name: 'orders' }],
|
||||
executionsBucket: '10-100',
|
||||
distinctUsersBucket: '2-5',
|
||||
dialect: 'postgres',
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
|
@ -34,6 +34,36 @@ describe('local ingest adapters', () => {
|
|||
};
|
||||
}
|
||||
|
||||
async function seedLiveScanTable(
|
||||
projectDir: string,
|
||||
connectionId: string,
|
||||
table: { catalog: string | null; db: string | null; name: string },
|
||||
): Promise<void> {
|
||||
const rawRoot = join(projectDir, 'raw-sources', connectionId, 'live-database', 'sync-1');
|
||||
await mkdir(join(rawRoot, 'tables'), { recursive: true });
|
||||
await writeFile(
|
||||
join(rawRoot, 'connection.json'),
|
||||
`${JSON.stringify({ connectionId, driver: 'postgres' }, null, 2)}\n`,
|
||||
'utf-8',
|
||||
);
|
||||
await writeFile(
|
||||
join(rawRoot, 'tables', `${table.db ?? 'default'}-${table.name}.json`),
|
||||
`${JSON.stringify(
|
||||
{
|
||||
...table,
|
||||
kind: 'table',
|
||||
comment: null,
|
||||
estimatedRows: null,
|
||||
columns: [],
|
||||
foreignKeys: [],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
'utf-8',
|
||||
);
|
||||
}
|
||||
|
||||
it('registers Metabase locally as a staged-bundle adapter', () => {
|
||||
const adapters = createDefaultLocalIngestAdapters(project);
|
||||
|
||||
|
|
@ -205,11 +235,14 @@ describe('local ingest adapters', () => {
|
|||
dialect: 'postgres',
|
||||
minExecutions: 7,
|
||||
enabledTables: [],
|
||||
enabledSchemas: [],
|
||||
modeledTableCatalog: [],
|
||||
filters: {
|
||||
serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' },
|
||||
dropTrivialProbes: true,
|
||||
},
|
||||
redactionPatterns: [],
|
||||
scopeFloorWarnings: [],
|
||||
staleArchiveAfterDays: 90,
|
||||
});
|
||||
});
|
||||
|
|
@ -237,6 +270,71 @@ describe('local ingest adapters', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('passes computed modeled scope to direct historic-sql adapter pull config', async () => {
|
||||
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(
|
||||
join(project.projectDir, 'semantic-layer/warehouse/revenue.yaml'),
|
||||
[
|
||||
'name: revenue',
|
||||
'table: orbit_analytics.mart_revenue',
|
||||
'grain: [id]',
|
||||
'columns:',
|
||||
' - name: id',
|
||||
' type: string',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
await seedLiveScanTable(project.projectDir, 'warehouse', {
|
||||
catalog: null,
|
||||
db: 'orbit_raw',
|
||||
name: 'accounts',
|
||||
});
|
||||
const projectWithQueryHistory = projectWithConnections({
|
||||
warehouse: {
|
||||
driver: 'postgres',
|
||||
schemas: ['orbit_raw'],
|
||||
context: {
|
||||
queryHistory: {
|
||||
enabled: true,
|
||||
minExecutions: 7,
|
||||
filters: { dropTrivialProbes: true },
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
const adapter = { source: 'historic-sql' } as never;
|
||||
|
||||
await expect(localPullConfigForAdapter(projectWithQueryHistory, adapter, 'warehouse')).resolves.toMatchObject({
|
||||
dialect: 'postgres',
|
||||
minExecutions: 7,
|
||||
enabledSchemas: ['orbit_analytics', 'orbit_raw'],
|
||||
modeledTableCatalog: [
|
||||
{ catalog: null, db: 'orbit_analytics', name: 'mart_revenue' },
|
||||
{ catalog: null, db: 'orbit_raw', name: 'accounts' },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('passes query-history scope fail-open warnings to direct historic-sql pull config', async () => {
|
||||
const projectDir = await mkdtemp(join(tmpdir(), 'ktx-local-qh-scope-warning-'));
|
||||
const project = await initKtxProject({ projectDir });
|
||||
project.config.connections.warehouse = {
|
||||
driver: 'postgres',
|
||||
schemas: ['orbit_raw'],
|
||||
context: { queryHistory: { enabled: true } },
|
||||
} as never;
|
||||
const adapter = { source: 'historic-sql' } as never;
|
||||
|
||||
await expect(localPullConfigForAdapter(project, adapter, 'warehouse')).resolves.toMatchObject({
|
||||
dialect: 'postgres',
|
||||
enabledSchemas: ['*'],
|
||||
scopeFloorWarnings: ['query_history_scope_floor_disabled:catalog_unavailable'],
|
||||
});
|
||||
|
||||
await rm(projectDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('rejects local historic-sql pulls when the connection has not enabled historic SQL', async () => {
|
||||
const historicSql = createDefaultLocalIngestAdapters(project, {
|
||||
historicSql: {
|
||||
|
|
|
|||
|
|
@ -49,7 +49,10 @@ describe('createHttpSqlAnalysisPort', () => {
|
|||
const requestJson = vi.fn(async () => ({
|
||||
results: {
|
||||
orders: {
|
||||
tables_touched: ['public.orders', 'public.customers'],
|
||||
tables_touched: [
|
||||
{ catalog: null, db: 'public', name: 'orders' },
|
||||
{ catalog: null, db: 'public', name: 'customers' },
|
||||
],
|
||||
columns_by_clause: {
|
||||
select: ['status'],
|
||||
where: ['created_at'],
|
||||
|
|
@ -79,7 +82,10 @@ describe('createHttpSqlAnalysisPort', () => {
|
|||
[
|
||||
'orders',
|
||||
{
|
||||
tablesTouched: ['public.orders', 'public.customers'],
|
||||
tablesTouched: [
|
||||
{ catalog: null, db: 'public', name: 'orders' },
|
||||
{ catalog: null, db: 'public', name: 'customers' },
|
||||
],
|
||||
columnsByClause: {
|
||||
select: ['status'],
|
||||
where: ['created_at'],
|
||||
|
|
@ -108,6 +114,62 @@ describe('createHttpSqlAnalysisPort', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('passes an optional catalog and maps structured table refs for SQL batch analysis', async () => {
|
||||
const requestJson = vi.fn(async () => ({
|
||||
results: {
|
||||
orders: {
|
||||
tables_touched: [
|
||||
{ catalog: null, db: 'orbit_raw', name: 'accounts' },
|
||||
{ catalog: 'demo_project', db: 'orbit_analytics', name: 'orders' },
|
||||
],
|
||||
columns_by_clause: { select: ['id'] },
|
||||
error: null,
|
||||
},
|
||||
},
|
||||
}));
|
||||
const port = createHttpSqlAnalysisPort({ baseUrl: 'http://python.test', requestJson });
|
||||
|
||||
await expect(
|
||||
port.analyzeBatch(
|
||||
[{ id: 'orders', sql: 'select id from accounts' }],
|
||||
'postgres',
|
||||
{
|
||||
catalog: {
|
||||
tables: [
|
||||
{ catalog: null, db: 'orbit_raw', name: 'accounts', columns: ['id'] },
|
||||
{ catalog: 'demo_project', db: 'orbit_analytics', name: 'orders', columns: ['id'] },
|
||||
],
|
||||
},
|
||||
},
|
||||
),
|
||||
).resolves.toEqual(
|
||||
new Map([
|
||||
[
|
||||
'orders',
|
||||
{
|
||||
tablesTouched: [
|
||||
{ catalog: null, db: 'orbit_raw', name: 'accounts' },
|
||||
{ catalog: 'demo_project', db: 'orbit_analytics', name: 'orders' },
|
||||
],
|
||||
columnsByClause: { select: ['id'] },
|
||||
error: null,
|
||||
},
|
||||
],
|
||||
]),
|
||||
);
|
||||
|
||||
expect(requestJson).toHaveBeenCalledWith('/sql/analyze-batch', {
|
||||
dialect: 'postgres',
|
||||
items: [{ id: 'orders', sql: 'select id from accounts' }],
|
||||
catalog: {
|
||||
tables: [
|
||||
{ catalog: null, db: 'orbit_raw', name: 'accounts', columns: ['id'] },
|
||||
{ catalog: 'demo_project', db: 'orbit_analytics', name: 'orders', columns: ['id'] },
|
||||
],
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('maps read-only SQL validation responses', async () => {
|
||||
const requests: Array<{ path: string; payload: Record<string, unknown> }> = [];
|
||||
const port = createHttpSqlAnalysisPort({
|
||||
|
|
@ -150,7 +212,7 @@ describe('createHttpSqlAnalysisPort', () => {
|
|||
const requestJson = vi.fn(async () => ({
|
||||
results: {
|
||||
orders: {
|
||||
tables_touched: ['public.orders'],
|
||||
tables_touched: [{ catalog: null, db: 'public', name: 'orders' }],
|
||||
columns_by_clause: { select: ['status'], where: [42] },
|
||||
error: null,
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue