feat(query-history): scope mining to modeled schemas by default (#258)

* feat(query-history): structure SQL analysis table refs

* feat(query-history): qualify SQL analysis table refs

* feat(query-history): wire modeled scope floor through ingest

* chore(query-history): verify scope floor

* test(query-history): align daemon SQL batch endpoint contract

* feat(query-history): build scope from same-run scan catalog

* feat(query-history): fail open on scope-floor catalog failures

* chore(query-history): verify scope-floor v1 closure

* refactor(query-history): share scope membership

* feat(setup): apply derived query history filters

* docs: document derived query history filters

* fix(query-history): redact filter picker LLM prompt SQL

* fix(setup): run filter picker SQL analysis through managed daemon

* chore(query-history): verify filter picker v1 closure

* fix(query-history): fail open on partial service-account attribution

* fix(query-history): aggregate BigQuery users by execution count

* fix(query-history): aggregate Snowflake users by execution count

* fix(query-history): use BigQuery query info hash
This commit is contained in:
Andrey Avtomonov 2026-06-03 17:19:42 +02:00 committed by GitHub
parent ce1516b357
commit e70ae1e63b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
42 changed files with 3090 additions and 274 deletions

View file

@ -200,27 +200,78 @@ export class BigQueryHistoricSqlQueryHistoryReader {
config: HistoricSqlUnifiedPullConfig,
): AsyncIterable<AggregatedTemplate> {
const sql = `
WITH filtered_jobs AS (
SELECT
COALESCE(query_info.query_hashes.normalized_literals, TO_HEX(SHA256(query))) AS template_id,
query,
user_email,
creation_time,
end_time,
error_result
FROM ${this.viewPath}
WHERE job_type = 'QUERY'
AND statement_type IN ('SELECT', 'MERGE')
AND creation_time >= ${timestampExpression(window.start)}
AND creation_time < ${timestampExpression(window.end)}
AND query IS NOT NULL
),
template_stats AS (
SELECT
template_id,
MIN(query) AS canonical_sql,
COUNT(*) AS executions,
COUNT(DISTINCT user_email) AS distinct_users,
MIN(creation_time) AS first_seen,
MAX(creation_time) AS last_seen,
APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND), 100)[OFFSET(50)] AS p50_ms,
APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND), 100)[OFFSET(95)] AS p95_ms,
SAFE_DIVIDE(COUNTIF(error_result IS NOT NULL), COUNT(*)) AS error_rate,
CAST(NULL AS INT64) AS rows_produced
FROM filtered_jobs
GROUP BY template_id
HAVING COUNT(*) >= ${config.minExecutions}
),
template_users AS (
SELECT
template_id,
user_email AS user,
COUNT(*) AS executions,
MAX(creation_time) AS last_seen
FROM filtered_jobs
GROUP BY template_id, user_email
)
SELECT
query_hash AS template_id,
MIN(query) AS canonical_sql,
COUNT(*) AS executions,
COUNT(DISTINCT user_email) AS distinct_users,
MIN(creation_time) AS first_seen,
MAX(creation_time) AS last_seen,
APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND), 100)[OFFSET(50)] AS p50_ms,
APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND), 100)[OFFSET(95)] AS p95_ms,
SAFE_DIVIDE(COUNTIF(error_result IS NOT NULL), COUNT(*)) AS error_rate,
CAST(NULL AS INT64) AS rows_produced,
TO_JSON_STRING(ARRAY_AGG(STRUCT(user_email AS user, 1 AS executions) ORDER BY creation_time DESC LIMIT 5)) AS top_users
FROM ${this.viewPath}
WHERE job_type = 'QUERY'
AND statement_type IN ('SELECT', 'MERGE')
AND creation_time >= ${timestampExpression(window.start)}
AND creation_time < ${timestampExpression(window.end)}
AND query IS NOT NULL
GROUP BY query_hash
HAVING COUNT(*) >= ${config.minExecutions}
ORDER BY executions DESC`.trim();
stats.template_id,
stats.canonical_sql,
stats.executions,
stats.distinct_users,
stats.first_seen,
stats.last_seen,
stats.p50_ms,
stats.p95_ms,
stats.error_rate,
stats.rows_produced,
TO_JSON_STRING(
ARRAY_AGG(
STRUCT(users.user AS user, users.executions AS executions)
ORDER BY users.executions DESC, users.last_seen DESC
)
) AS top_users
FROM template_stats AS stats
JOIN template_users AS users
ON users.template_id = stats.template_id
GROUP BY
stats.template_id,
stats.canonical_sql,
stats.executions,
stats.distinct_users,
stats.first_seen,
stats.last_seen,
stats.p50_ms,
stats.p95_ms,
stats.error_rate,
stats.rows_produced
ORDER BY stats.executions DESC`.trim();
const result = await queryClient(client).executeQuery(sql);
if (result.error) {
throw grantsError(result.error);

View file

@ -1,6 +1,7 @@
import { createHash } from 'node:crypto';
import { readFile, readdir } from 'node:fs/promises';
import { join, relative } from 'node:path';
import { tableRefKey } from '../../../scan/table-ref.js';
import type { ChunkResult, DiffSet, ScopeDescriptor, WorkUnit } from '../../types.js';
import { isHistoricSqlPatternInputShardPath } from './pattern-inputs.js';
import { stagedManifestSchema, stagedPatternsInputSchema, stagedTableInputSchema } from './types.js';
@ -37,7 +38,7 @@ export async function chunkHistoricSqlUnifiedStagedDir(stagedDir: string, diffSe
}
const table = stagedTableInputSchema.parse(await readJson(stagedDir, path));
workUnits.push({
unitKey: `historic-sql-table-${safeUnitKey(table.table)}`,
unitKey: `historic-sql-table-${safeUnitKey(tableRefKey(table.tableRef))}`,
displayLabel: `Historic SQL usage: ${table.table}`,
rawFiles: [path],
dependencyPaths: ['manifest.json'],

View file

@ -1,4 +1,5 @@
import { Buffer } from 'node:buffer';
import { tableRefKey } from '../../../scan/table-ref.js';
import type { StagedPatternsInput } from './types.js';
const HISTORIC_SQL_PATTERN_WORKUNIT_DIR = 'patterns-input';
@ -44,11 +45,16 @@ function sortedAuditTemplates(templates: readonly PatternTemplate[]): PatternTem
function sortedPatternCandidates(templates: readonly PatternTemplate[]): PatternTemplate[] {
return [...templates]
.filter((template) => template.tablesTouched.length >= 2)
.map((template) => ({ ...template, tablesTouched: [...template.tablesTouched].sort() }))
.map((template) => ({
...template,
tablesTouched: [...template.tablesTouched].sort((left, right) => tableRefKey(left).localeCompare(tableRefKey(right))),
}))
.sort((left, right) => {
const cardinality = right.tablesTouched.length - left.tablesTouched.length;
if (cardinality !== 0) return cardinality;
const tableSignature = left.tablesTouched.join('\0').localeCompare(right.tablesTouched.join('\0'));
const leftSignature = left.tablesTouched.map(tableRefKey).join('\0');
const rightSignature = right.tablesTouched.map(tableRefKey).join('\0');
const tableSignature = leftSignature.localeCompare(rightSignature);
if (tableSignature !== 0) return tableSignature;
return left.id.localeCompare(right.id);
});

View file

@ -0,0 +1,278 @@
import { z } from 'zod';
import type { KtxLlmRuntimePort } from '../../../../context/llm/runtime-port.js';
import type { SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
import { tableRefKey } from '../../../scan/table-ref.js';
import type { KtxTableRef } from '../../../scan/types.js';
import { bucketDistinctUsers, bucketExecutions, bucketRecency } from './buckets.js';
import {
compileHistoricSqlRedactionPatterns,
redactHistoricSqlText,
type HistoricSqlRedactionPattern,
} from './redaction.js';
import { includedQueryHistoryTableRefs } from './scope-membership.js';
import {
aggregatedTemplateSchema,
historicSqlUnifiedPullConfigSchema,
type AggregatedTemplate,
type HistoricSqlDialect,
type HistoricSqlReader,
} from './types.js';
export interface QueryHistoryFilterProposal {
excludedRoles: Array<{ role: string; reason: string; pattern: string }>;
consideredRoleCount: number;
skipped: { reason: 'no-llm' | 'no-daemon' | 'no-in-scope-history' | 'user-block-present' } | null;
warnings: string[];
}
export interface ProposeQueryHistoryServiceAccountFiltersInput {
connectionId: string;
dialect: HistoricSqlDialect;
queryClient: unknown;
reader: HistoricSqlReader;
sqlAnalysis: SqlAnalysisPort;
llmRuntime: KtxLlmRuntimePort | null;
pullConfig: unknown;
now?: Date;
userServiceAccountsPresent?: boolean;
}
interface ParsedTemplateForPicker {
template: AggregatedTemplate;
tablesTouched: KtxTableRef[];
includedTables: KtxTableRef[];
}
interface RoleAccumulator {
role: string;
executions: number;
distinctUsers: number;
lastSeen: string;
tables: Map<string, KtxTableRef>;
templates: AggregatedTemplate[];
}
interface QueryHistoryRoleRecord {
role: string;
inScopeTables: string[];
executionsBucket: string;
distinctUsersBucket: string;
recencyBucket: string;
representativeTemplates: Array<{ id: string; canonicalSql: string; dialect: HistoricSqlDialect }>;
}
const queryHistoryFilterAdjudicationSchema = z.object({
roles: z.array(
z.object({
role: z.string().min(1),
exclude: z.boolean(),
reason: z.string().min(1),
}).strict(),
),
}).strict();
type QueryHistoryFilterAdjudication = z.infer<typeof queryHistoryFilterAdjudicationSchema>;
function emptyProposal(skipped: QueryHistoryFilterProposal['skipped'], warnings: string[] = []): QueryHistoryFilterProposal {
return { excludedRoles: [], consideredRoleCount: 0, skipped, warnings };
}
function displayTableRef(ref: KtxTableRef): string {
return [ref.catalog, ref.db, ref.name].filter((part): part is string => !!part && part.length > 0).join('.');
}
function redactTemplateSqlForPicker(
template: AggregatedTemplate,
redactors: readonly HistoricSqlRedactionPattern[],
): AggregatedTemplate {
if (redactors.length === 0) {
return template;
}
return {
...template,
canonicalSql: redactHistoricSqlText(template.canonicalSql, redactors),
};
}
/** @internal */
export function regexEscapeForExactRolePattern(role: string): string {
return `^${role.replace(/[\\^$.*+?()[\]{}|]/g, '\\$&')}$`;
}
function recordRole(
acc: RoleAccumulator,
template: AggregatedTemplate,
tables: readonly KtxTableRef[],
executions: number,
): void {
acc.executions += executions;
acc.distinctUsers = Math.max(acc.distinctUsers, template.stats.distinctUsers);
acc.lastSeen = template.stats.lastSeen > acc.lastSeen ? template.stats.lastSeen : acc.lastSeen;
for (const table of tables) {
acc.tables.set(tableRefKey(table), table);
}
acc.templates.push(template);
}
function roleRecords(parsedTemplates: readonly ParsedTemplateForPicker[], now: Date): QueryHistoryRoleRecord[] {
const byRole = new Map<string, RoleAccumulator>();
for (const parsed of parsedTemplates) {
for (const entry of parsed.template.topUsers) {
if (!entry.user || entry.user.trim().length === 0 || entry.executions <= 0) {
continue;
}
const role = entry.user.trim();
const acc =
byRole.get(role) ??
{
role,
executions: 0,
distinctUsers: 0,
lastSeen: '1970-01-01T00:00:00.000Z',
tables: new Map<string, KtxTableRef>(),
templates: [],
};
recordRole(acc, parsed.template, parsed.includedTables, entry.executions);
byRole.set(role, acc);
}
}
return [...byRole.values()]
.sort((left, right) => right.executions - left.executions || left.role.localeCompare(right.role))
.map((acc) => ({
role: acc.role,
inScopeTables: [...acc.tables.entries()]
.sort(([left], [right]) => left.localeCompare(right))
.slice(0, 25)
.map(([, ref]) => displayTableRef(ref)),
executionsBucket: bucketExecutions(acc.executions),
distinctUsersBucket: bucketDistinctUsers(acc.distinctUsers),
recencyBucket: bucketRecency(acc.lastSeen, now),
representativeTemplates: [...acc.templates]
.sort((left, right) => right.stats.executions - left.stats.executions || left.templateId.localeCompare(right.templateId))
.slice(0, 3)
.map((template) => ({
id: template.templateId,
canonicalSql: template.canonicalSql,
dialect: template.dialect,
})),
}));
}
function adjudicationSystemPrompt(): string {
return [
'You are helping ktx decide whether observed query-history roles are operational service accounts.',
'Default every role to keep. Mark exclude true only when the aggregate evidence clearly shows loader, ELT, reverse-ETL, export, refresh, or maintenance traffic rather than analyst or BI-dashboard usage.',
'Use only the observed role records. Do not rely on a hardcoded denylist. Return structured output only.',
].join('\n');
}
export async function proposeQueryHistoryServiceAccountFilters(
input: ProposeQueryHistoryServiceAccountFiltersInput,
): Promise<QueryHistoryFilterProposal> {
if (!input.llmRuntime) {
return emptyProposal({ reason: 'no-llm' });
}
const config = historicSqlUnifiedPullConfigSchema.parse(input.pullConfig);
const redactors = compileHistoricSqlRedactionPatterns(config.redactionPatterns);
const now = input.now ?? new Date();
const windowDays = 'windowDays' in config ? config.windowDays : 90;
const windowStart = new Date(now.getTime() - windowDays * 24 * 60 * 60 * 1000);
const warnings: string[] = [];
const snapshot: AggregatedTemplate[] = [];
try {
for await (const row of input.reader.fetchAggregated(input.queryClient, { start: windowStart, end: now }, config)) {
snapshot.push(aggregatedTemplateSchema.parse(row));
}
} catch (error) {
return emptyProposal(null, [
`query_history_filter_picker_read_failed:${error instanceof Error ? error.message : String(error)}`,
]);
}
if (snapshot.length === 0) {
return emptyProposal({ reason: 'no-in-scope-history' });
}
const analysisItems = snapshot.map((template) => ({ id: template.templateId, sql: template.canonicalSql }));
const analysisOptions =
config.modeledTableCatalog.length > 0 ? { catalog: { tables: config.modeledTableCatalog } } : undefined;
let analysis: Awaited<ReturnType<SqlAnalysisPort['analyzeBatch']>>;
try {
analysis = await input.sqlAnalysis.analyzeBatch(analysisItems, input.dialect, analysisOptions);
} catch (error) {
return emptyProposal({ reason: 'no-daemon' }, [
`query_history_filter_picker_analysis_failed:${error instanceof Error ? error.message : String(error)}`,
]);
}
const parsedTemplates: ParsedTemplateForPicker[] = [];
for (const template of snapshot) {
const parsed = analysis.get(template.templateId);
if (!parsed || parsed.error) {
warnings.push(`query_history_filter_picker_parse_failed:${template.templateId}`);
continue;
}
const tablesTouched = [...new Map(parsed.tablesTouched.map((ref) => [tableRefKey(ref), ref])).values()]
.filter((ref) => ref.name.length > 0)
.sort((left, right) => tableRefKey(left).localeCompare(tableRefKey(right)));
const includedTables = includedQueryHistoryTableRefs(tablesTouched, config);
if (includedTables.length === 0) {
continue;
}
parsedTemplates.push({
template: redactTemplateSqlForPicker(template, redactors),
tablesTouched,
includedTables,
});
}
const records = roleRecords(parsedTemplates, now);
if (records.length <= 1) {
return {
excludedRoles: [],
consideredRoleCount: records.length,
skipped: { reason: 'no-in-scope-history' },
warnings,
};
}
let generated: QueryHistoryFilterAdjudication;
try {
generated = await input.llmRuntime.generateObject<QueryHistoryFilterAdjudication, typeof queryHistoryFilterAdjudicationSchema>({
role: 'candidateExtraction',
system: adjudicationSystemPrompt(),
prompt: JSON.stringify({ connectionId: input.connectionId, dialect: input.dialect, roles: records }),
schema: queryHistoryFilterAdjudicationSchema,
});
} catch (error) {
return {
excludedRoles: [],
consideredRoleCount: records.length,
skipped: { reason: 'no-llm' },
warnings: [
...warnings,
`query_history_filter_picker_llm_failed:${error instanceof Error ? error.message : String(error)}`,
],
};
}
const knownRoles = new Set(records.map((record) => record.role));
const excludedRoles = generated.roles
.filter((role) => role.exclude && knownRoles.has(role.role))
.sort((left, right) => left.role.localeCompare(right.role))
.map((role) => ({
role: role.role,
reason: role.reason,
pattern: regexEscapeForExactRolePattern(role.role),
}));
return {
excludedRoles,
consideredRoleCount: records.length,
skipped: input.userServiceAccountsPresent ? { reason: 'user-block-present' } : null,
warnings,
};
}

View file

@ -0,0 +1,260 @@
import type { Dirent } from 'node:fs';
import { access, readdir, readFile } from 'node:fs/promises';
import { join, relative } from 'node:path';
import YAML from 'yaml';
import { getDriverRegistration } from '../../../connections/drivers.js';
import { parseDottedTableEntry } from '../../../scan/enabled-tables.js';
import { tableRefKey, tableRefSet, type KtxTableRefKey } from '../../../scan/table-ref.js';
import type { KtxTableRef } from '../../../scan/types.js';
import { readLiveDatabaseTableFiles } from '../live-database/stage.js';
export interface QueryHistoryScopeFloorInput {
projectDir: string;
connectionId: string;
driver: string;
connection: Record<string, unknown>;
storedQueryHistory: Record<string, unknown>;
}
export interface QueryHistoryScopeFloor {
enabledTables: KtxTableRef[];
enabledTableKeys: ReadonlySet<KtxTableRefKey> | null;
enabledSchemas: string[];
modeledTableCatalog: KtxTableRef[];
floorDisabled: boolean;
warnings: string[];
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value);
}
function stringArray(value: unknown): string[] {
return Array.isArray(value)
? value
.filter((item): item is string => typeof item === 'string' && item.trim().length > 0)
.map((item) => item.trim())
: [];
}
function tableRefsFromValues(values: unknown): KtxTableRef[] {
if (!Array.isArray(values)) return [];
return values.flatMap((value) => {
if (typeof value === 'string') {
const ref = parseDottedTableEntry(value);
return ref ? [ref] : [];
}
if (isRecord(value) && typeof value.name === 'string' && value.name.length > 0) {
return [
{
catalog: typeof value.catalog === 'string' ? value.catalog : null,
db: typeof value.db === 'string' ? value.db : null,
name: value.name,
},
];
}
return [];
});
}
function declaredSchemas(driver: string, connection: Record<string, unknown>): string[] {
const key = getDriverRegistration(driver)?.scopeConfigKey;
if (!key) return [];
return [...new Set(stringArray(connection[key]))].sort();
}
function uniqueSortedTableRefs(refs: readonly KtxTableRef[]): KtxTableRef[] {
const byKey = new Map<KtxTableRefKey, KtxTableRef>();
for (const ref of refs) {
byKey.set(tableRefKey(ref), ref);
}
return [...byKey.entries()]
.sort(([left], [right]) => left.localeCompare(right))
.map(([, ref]) => ref);
}
async function latestLiveDatabaseScanDir(projectDir: string, connectionId: string): Promise<string | null> {
const root = join(projectDir, 'raw-sources', connectionId, 'live-database');
let entries: Dirent[];
try {
entries = await readdir(root, { withFileTypes: true });
} catch (error) {
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') return null;
throw error;
}
const syncDirs = entries
.filter((entry) => entry.isDirectory())
.map((entry) => entry.name)
.sort()
.reverse();
for (const syncDir of syncDirs) {
const absolute = join(root, syncDir);
try {
await access(join(absolute, 'connection.json'));
return absolute;
} catch {
continue;
}
}
return null;
}
async function scannedTableRefs(
projectDir: string,
connectionId: string,
): Promise<{ refs: KtxTableRef[]; catalogAvailable: boolean; warnings: string[] }> {
const scanDir = await latestLiveDatabaseScanDir(projectDir, connectionId);
if (!scanDir) {
return { refs: [], catalogAvailable: false, warnings: [] };
}
try {
const tableFiles = await readLiveDatabaseTableFiles(scanDir);
return {
refs: uniqueSortedTableRefs(
tableFiles.map(({ table }) => ({ catalog: table.catalog, db: table.db, name: table.name })),
),
catalogAvailable: true,
warnings: [],
};
} catch (error) {
return {
refs: [],
catalogAvailable: false,
warnings: [
`query_history_scope_floor_catalog_read_failed:live_database_scan:${error instanceof Error ? error.message : String(error)}`,
],
};
}
}
async function listYamlFiles(root: string): Promise<string[]> {
try {
const entries = await readdir(root, { withFileTypes: true, recursive: true });
return entries
.filter((entry) => entry.isFile() && /\.ya?ml$/i.test(entry.name))
.map((entry) => relative(root, join(entry.parentPath, entry.name)).replace(/\\/g, '/'))
.sort();
} catch (error) {
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') return [];
throw error;
}
}
function refsFromManifest(content: string): KtxTableRef[] {
const parsed = YAML.parse(content) as unknown;
if (!isRecord(parsed) || !isRecord(parsed.tables)) return [];
return Object.values(parsed.tables).flatMap((entry) => {
if (!isRecord(entry) || typeof entry.table !== 'string') return [];
const ref = parseDottedTableEntry(entry.table);
return ref ? [ref] : [];
});
}
function refsFromStandaloneSource(content: string): KtxTableRef[] {
const parsed = YAML.parse(content) as unknown;
if (!isRecord(parsed) || typeof parsed.table !== 'string') return [];
const ref = parseDottedTableEntry(parsed.table);
return ref ? [ref] : [];
}
async function semanticTableRefs(
projectDir: string,
connectionId: string,
): Promise<{ refs: KtxTableRef[]; warnings: string[] }> {
const root = join(projectDir, 'semantic-layer', connectionId);
const files = await listYamlFiles(root);
const refs: KtxTableRef[] = [];
const warnings: string[] = [];
for (const file of files) {
try {
const content = await readFile(join(root, file), 'utf-8');
refs.push(...(file.startsWith('_schema/') ? refsFromManifest(content) : refsFromStandaloneSource(content)));
} catch (error) {
warnings.push(
`query_history_scope_floor_catalog_read_failed:${file}:${error instanceof Error ? error.message : String(error)}`,
);
}
}
return { refs: uniqueSortedTableRefs(refs), warnings };
}
export async function resolveQueryHistoryScopeFloor(input: QueryHistoryScopeFloorInput): Promise<QueryHistoryScopeFloor> {
const explicitEnabledTables = [
...tableRefsFromValues(input.storedQueryHistory.enabledTables),
...tableRefsFromValues(input.connection.enabled_tables),
];
const semanticTables = await semanticTableRefs(input.projectDir, input.connectionId);
const scannedTables = await scannedTableRefs(input.projectDir, input.connectionId);
const modeledTables = uniqueSortedTableRefs([
...semanticTables.refs,
...scannedTables.refs,
...explicitEnabledTables,
]);
const warnings = [...semanticTables.warnings, ...scannedTables.warnings];
if (explicitEnabledTables.length > 0) {
return {
enabledTables: explicitEnabledTables,
enabledTableKeys: tableRefSet(explicitEnabledTables),
enabledSchemas: [],
modeledTableCatalog: modeledTables,
floorDisabled: false,
warnings,
};
}
const explicitSchemas = stringArray(input.storedQueryHistory.enabledSchemas);
if (explicitSchemas.includes('*')) {
return {
enabledTables: [],
enabledTableKeys: null,
enabledSchemas: ['*'],
modeledTableCatalog: modeledTables,
floorDisabled: true,
warnings,
};
}
if (explicitSchemas.length > 0) {
if (!scannedTables.catalogAvailable || modeledTables.length === 0) {
return {
enabledTables: [],
enabledTableKeys: null,
enabledSchemas: ['*'],
modeledTableCatalog: modeledTables,
floorDisabled: true,
warnings: [...warnings, 'query_history_scope_floor_disabled:catalog_unavailable'],
};
}
return {
enabledTables: [],
enabledTableKeys: null,
enabledSchemas: [...new Set(explicitSchemas)].sort(),
modeledTableCatalog: modeledTables,
floorDisabled: false,
warnings,
};
}
const schemas = new Set(declaredSchemas(input.driver, input.connection));
for (const ref of semanticTables.refs) {
if (ref.db) schemas.add(ref.db);
}
if (schemas.size > 0 && (!scannedTables.catalogAvailable || modeledTables.length === 0)) {
return {
enabledTables: [],
enabledTableKeys: null,
enabledSchemas: ['*'],
modeledTableCatalog: modeledTables,
floorDisabled: true,
warnings: [...warnings, 'query_history_scope_floor_disabled:catalog_unavailable'],
};
}
return {
enabledTables: [],
enabledTableKeys: null,
enabledSchemas: [...schemas].sort(),
modeledTableCatalog: modeledTables,
floorDisabled: false,
warnings,
};
}

View file

@ -0,0 +1,45 @@
import { tableRefKey, tableRefSet } from '../../../scan/table-ref.js';
import type { KtxTableRef } from '../../../scan/types.js';
export interface QueryHistoryScopeMembershipConfig {
enabledTables: readonly KtxTableRef[];
enabledSchemas: readonly string[];
}
function schemaNameForRef(ref: KtxTableRef): string | null {
return ref.db && ref.db.length > 0 ? ref.db : null;
}
function schemaNamesFromConfig(enabledSchemas: readonly string[]): Set<string> {
return new Set(enabledSchemas.filter((schema) => schema !== '*'));
}
export function isQueryHistoryScopeFloorDisabled(config: QueryHistoryScopeMembershipConfig): boolean {
return config.enabledSchemas.includes('*');
}
export function shouldFailOpenQueryHistoryScope(config: QueryHistoryScopeMembershipConfig): boolean {
return (
config.enabledTables.length === 0 &&
!isQueryHistoryScopeFloorDisabled(config) &&
config.enabledSchemas.length === 0
);
}
export function includedQueryHistoryTableRefs(
tablesTouched: readonly KtxTableRef[],
config: QueryHistoryScopeMembershipConfig,
): KtxTableRef[] {
if (config.enabledTables.length > 0) {
const enabled = tableRefSet(config.enabledTables);
return tablesTouched.filter((ref) => enabled.has(tableRefKey(ref)));
}
if (isQueryHistoryScopeFloorDisabled(config) || shouldFailOpenQueryHistoryScope(config)) {
return [...tablesTouched];
}
const schemas = schemaNamesFromConfig(config.enabledSchemas);
return tablesTouched.filter((ref) => {
const schema = schemaNameForRef(ref);
return schema !== null && schemas.has(schema);
});
}

View file

@ -188,26 +188,75 @@ export class SnowflakeHistoricSqlQueryHistoryReader {
config: HistoricSqlUnifiedPullConfig,
): AsyncIterable<AggregatedTemplate> {
const sql = `
WITH filtered_queries AS (
SELECT
query_hash,
query_text,
user_name,
start_time,
total_elapsed_time,
execution_status,
rows_produced
FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY
WHERE query_text IS NOT NULL
AND query_type IN ('SELECT', 'MERGE')
AND start_time >= ${timestampLiteral(window.start)}
AND start_time < ${timestampLiteral(window.end)}
),
template_stats AS (
SELECT
query_hash AS template_id,
MIN(query_text) AS canonical_sql,
COUNT(*) AS executions,
COUNT(DISTINCT user_name) AS distinct_users,
MIN(start_time) AS first_seen,
MAX(start_time) AS last_seen,
APPROX_PERCENTILE(total_elapsed_time, 0.50) AS p50_ms,
APPROX_PERCENTILE(total_elapsed_time, 0.95) AS p95_ms,
DIV0(COUNT_IF(execution_status != 'SUCCESS'), COUNT(*)) AS error_rate,
SUM(rows_produced) AS rows_produced
FROM filtered_queries
GROUP BY query_hash
HAVING COUNT(*) >= ${config.minExecutions}
),
template_users AS (
SELECT
query_hash AS template_id,
user_name AS user,
COUNT(*) AS executions,
MAX(start_time) AS last_seen
FROM filtered_queries
GROUP BY query_hash, user_name
)
SELECT
query_hash AS template_id,
MIN(query_text) AS canonical_sql,
COUNT(*) AS executions,
COUNT(DISTINCT user_name) AS distinct_users,
MIN(start_time) AS first_seen,
MAX(start_time) AS last_seen,
APPROX_PERCENTILE(total_elapsed_time, 0.50) AS p50_ms,
APPROX_PERCENTILE(total_elapsed_time, 0.95) AS p95_ms,
DIV0(COUNT_IF(execution_status != 'SUCCESS'), COUNT(*)) AS error_rate,
SUM(rows_produced) AS rows_produced,
ARRAY_AGG(OBJECT_CONSTRUCT('user', user_name, 'executions', 1)) WITHIN GROUP (ORDER BY start_time DESC)::string AS top_users
FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY
WHERE query_text IS NOT NULL
AND query_type IN ('SELECT', 'MERGE')
AND start_time >= ${timestampLiteral(window.start)}
AND start_time < ${timestampLiteral(window.end)}
GROUP BY query_hash
HAVING COUNT(*) >= ${config.minExecutions}
ORDER BY executions DESC`.trim();
stats.template_id,
stats.canonical_sql,
stats.executions,
stats.distinct_users,
stats.first_seen,
stats.last_seen,
stats.p50_ms,
stats.p95_ms,
stats.error_rate,
stats.rows_produced,
ARRAY_AGG(
OBJECT_CONSTRUCT('user', users.user, 'executions', users.executions)
) WITHIN GROUP (ORDER BY users.executions DESC, users.last_seen DESC)::string AS top_users
FROM template_stats AS stats
JOIN template_users AS users
ON users.template_id = stats.template_id
GROUP BY
stats.template_id,
stats.canonical_sql,
stats.executions,
stats.distinct_users,
stats.first_seen,
stats.last_seen,
stats.p50_ms,
stats.p95_ms,
stats.error_rate,
stats.rows_produced
ORDER BY stats.executions DESC`.trim();
const result = await queryClient(client).executeQuery(sql);
if (result.error) {
throw grantsError(result.error);

View file

@ -1,6 +1,8 @@
import { mkdir, writeFile } from 'node:fs/promises';
import { dirname, join } from 'node:path';
import type { SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
import { tableRefKey, type KtxTableRefKey } from '../../../scan/table-ref.js';
import type { KtxTableRef } from '../../../scan/types.js';
import {
bucketDistinctUsers,
bucketErrorRate,
@ -15,6 +17,11 @@ import {
redactHistoricSqlText,
type HistoricSqlRedactionPattern,
} from './redaction.js';
import {
includedQueryHistoryTableRefs,
isQueryHistoryScopeFloorDisabled,
shouldFailOpenQueryHistoryScope,
} from './scope-membership.js';
import {
HISTORIC_SQL_SOURCE_KEY,
aggregatedTemplateSchema,
@ -38,17 +45,13 @@ interface StageHistoricSqlAggregatedSnapshotInput {
interface ParsedTemplate {
template: AggregatedTemplate;
tablesTouched: string[];
includedTables: string[];
tablesTouched: KtxTableRef[];
includedTables: KtxTableRef[];
columnsByClause: Record<string, string[]>;
}
interface EnabledTableFilter {
exact: Set<string>;
uniqueUnqualified: Set<string>;
}
interface TableAccumulator {
tableRef: KtxTableRef;
table: string;
executions: number;
distinctUsers: number;
@ -105,8 +108,7 @@ function shouldDropByUsers(template: AggregatedTemplate, config: HistoricSqlUnif
const matchingExecutions = template.topUsers
.filter((entry) => matchesAny(entry.user, patterns))
.reduce((sum, entry) => sum + entry.executions, 0);
const allExecutions = template.topUsers.reduce((sum, entry) => sum + entry.executions, 0);
const serviceOnly = allExecutions > 0 && matchingExecutions >= allExecutions;
const serviceOnly = template.stats.executions > 0 && matchingExecutions >= template.stats.executions;
return service.mode === 'exclude' ? serviceOnly : !serviceOnly;
}
@ -122,90 +124,8 @@ function shouldDropTemplate(template: AggregatedTemplate, config: HistoricSqlUni
return false;
}
function normalizeTableIdentifier(value: string): string {
return value.trim().toLowerCase();
}
function unqualifiedTableIdentifier(value: string): string {
const parts = normalizeTableIdentifier(value).split('.').filter(Boolean);
return parts.at(-1) ?? '';
}
function buildEnabledTableFilter(enabledTables: string[]): EnabledTableFilter | null {
if (enabledTables.length === 0) {
return null;
}
const exact = new Set(enabledTables.map(normalizeTableIdentifier).filter((value) => value.length > 0));
const unqualifiedCounts = new Map<string, number>();
for (const table of exact) {
const unqualified = unqualifiedTableIdentifier(table);
if (unqualified.length > 0) {
unqualifiedCounts.set(unqualified, (unqualifiedCounts.get(unqualified) ?? 0) + 1);
}
}
return {
exact,
uniqueUnqualified: new Set(
[...unqualifiedCounts.entries()]
.filter(([, count]) => count === 1)
.map(([table]) => table),
),
};
}
function isEnabledTable(table: string, filter: EnabledTableFilter | null): boolean {
if (!filter) {
return true;
}
const normalized = normalizeTableIdentifier(table);
return filter.exact.has(normalized) || filter.uniqueUnqualified.has(unqualifiedTableIdentifier(normalized));
}
/**
* pg_stat_statements records queries as written, so the same physical table can appear
* both bare (`accounts`, resolved via search_path) and schema-qualified
* (`orbit_raw.accounts`). Collapse a bare identifier into its schema-qualified form when
* exactly one qualified form shares its unqualified name, so the two never become separate
* work units. Ambiguous bare names (two qualified forms) are left untouched.
*/
function canonicalizeTableIdentifiers(parsedTemplates: ParsedTemplate[]): void {
const all = new Set<string>();
for (const parsed of parsedTemplates) {
for (const table of parsed.includedTables) {
all.add(table);
}
}
const qualifiedByUnqualified = new Map<string, Set<string>>();
for (const table of all) {
if (!table.includes('.')) {
continue;
}
const unqualified = unqualifiedTableIdentifier(table);
if (unqualified.length === 0) {
continue;
}
const forms = qualifiedByUnqualified.get(unqualified) ?? new Set<string>();
forms.add(table);
qualifiedByUnqualified.set(unqualified, forms);
}
const canonical = new Map<string, string>();
for (const table of all) {
if (table.includes('.')) {
continue;
}
const forms = qualifiedByUnqualified.get(unqualifiedTableIdentifier(table));
if (forms && forms.size === 1) {
canonical.set(table, [...forms][0]);
}
}
if (canonical.size === 0) {
return;
}
const remap = (table: string): string => canonical.get(table) ?? table;
for (const parsed of parsedTemplates) {
parsed.includedTables = [...new Set(parsed.includedTables.map(remap))].sort();
parsed.tablesTouched = [...new Set(parsed.tablesTouched.map(remap))].sort();
}
function displayTableRef(ref: KtxTableRef): string {
return [ref.catalog, ref.db, ref.name].filter((part): part is string => !!part && part.length > 0).join('.');
}
function historicSqlWindowDays(config: HistoricSqlUnifiedPullConfig): number {
@ -240,9 +160,10 @@ function recordJoin(acc: TableAccumulator, otherTable: string, columns: string[]
}
}
function accumulatorFor(table: string): TableAccumulator {
function accumulatorFor(tableRef: KtxTableRef): TableAccumulator {
return {
table,
tableRef,
table: displayTableRef(tableRef),
executions: 0,
distinctUsers: 0,
errorRateNumerator: 0,
@ -272,8 +193,8 @@ function addTemplate(acc: TableAccumulator, parsed: ParsedTemplate): void {
}
}
const joinColumns = parsed.columnsByClause.join ?? [];
for (const otherTable of parsed.tablesTouched.filter((table) => table !== acc.table)) {
recordJoin(acc, otherTable, joinColumns, executions);
for (const otherTable of parsed.tablesTouched.filter((table) => tableRefKey(table) !== tableRefKey(acc.tableRef))) {
recordJoin(acc, displayTableRef(otherTable), joinColumns, executions);
}
acc.topTemplates.push(parsed.template);
}
@ -310,6 +231,7 @@ function toStagedTable(acc: TableAccumulator, now: Date): StagedTableInput {
return {
table: acc.table,
tableRef: acc.tableRef,
stats: {
executionsBucket: bucketExecutions(acc.executions),
distinctUsersBucket: bucketDistinctUsers(acc.distinctUsers),
@ -329,7 +251,7 @@ function toPatternsInput(parsedTemplates: ParsedTemplate[]): StagedPatternsInput
.map(({ template, tablesTouched }) => ({
id: template.templateId,
canonicalSql: template.canonicalSql,
tablesTouched: [...tablesTouched].sort(),
tablesTouched: [...tablesTouched].sort((left, right) => tableRefKey(left).localeCompare(tableRefKey(right))),
executionsBucket: bucketExecutions(template.stats.executions),
distinctUsersBucket: bucketDistinctUsers(template.stats.distinctUsers),
dialect: template.dialect,
@ -340,7 +262,6 @@ function toPatternsInput(parsedTemplates: ParsedTemplate[]): StagedPatternsInput
export async function stageHistoricSqlAggregatedSnapshot(input: StageHistoricSqlAggregatedSnapshotInput): Promise<void> {
const config = historicSqlUnifiedPullConfigSchema.parse(input.pullConfig);
const enabledTableFilter = buildEnabledTableFilter(config.enabledTables);
const redactors = compileHistoricSqlRedactionPatterns(config.redactionPatterns);
const now = input.now ?? new Date();
const windowStart = new Date(now.getTime() - historicSqlWindowDays(config) * 24 * 60 * 60 * 1000);
@ -356,11 +277,25 @@ export async function stageHistoricSqlAggregatedSnapshot(input: StageHistoricSql
}
}
const analysis = await input.sqlAnalysis.analyzeBatch(
snapshot.map((template) => ({ id: template.templateId, sql: template.canonicalSql })),
config.dialect,
);
const warnings: string[] = [];
const analysisItems = snapshot.map((template) => ({ id: template.templateId, sql: template.canonicalSql }));
const analysisOptions =
config.modeledTableCatalog.length > 0 ? { catalog: { tables: config.modeledTableCatalog } } : undefined;
const warnings: string[] = [
...config.scopeFloorWarnings,
...(shouldFailOpenQueryHistoryScope(config) ? ['query_history_scope_floor_disabled:empty_modeled_scope'] : []),
];
let scopeDisabledByQualificationFailure = false;
let analysis: Awaited<ReturnType<SqlAnalysisPort['analyzeBatch']>>;
try {
analysis = await input.sqlAnalysis.analyzeBatch(analysisItems, config.dialect, analysisOptions);
} catch (error) {
if (!analysisOptions || config.enabledTables.length > 0 || isQueryHistoryScopeFloorDisabled(config)) {
throw error;
}
warnings.push('query_history_scope_floor_disabled:catalog_qualification_failed');
scopeDisabledByQualificationFailure = true;
analysis = await input.sqlAnalysis.analyzeBatch(analysisItems, config.dialect, undefined);
}
const parsedTemplates: ParsedTemplate[] = [];
for (const template of snapshot) {
const parsed = analysis.get(template.templateId);
@ -368,8 +303,12 @@ export async function stageHistoricSqlAggregatedSnapshot(input: StageHistoricSql
warnings.push(`parse_failed:${template.templateId}`);
continue;
}
const tablesTouched = [...new Set(parsed.tablesTouched)].filter((table) => table.length > 0).sort();
const includedTables = tablesTouched.filter((table) => isEnabledTable(table, enabledTableFilter));
const tablesTouched = [...new Map(parsed.tablesTouched.map((ref) => [tableRefKey(ref), ref])).values()]
.filter((ref) => ref.name.length > 0)
.sort((left, right) => tableRefKey(left).localeCompare(tableRefKey(right)));
const includedTables = scopeDisabledByQualificationFailure
? [...tablesTouched]
: includedQueryHistoryTableRefs(tablesTouched, config);
if (includedTables.length === 0) {
continue;
}
@ -383,24 +322,23 @@ export async function stageHistoricSqlAggregatedSnapshot(input: StageHistoricSql
});
}
canonicalizeTableIdentifiers(parsedTemplates);
const byTable = new Map<string, TableAccumulator>();
const byTable = new Map<KtxTableRefKey, TableAccumulator>();
for (const parsed of parsedTemplates) {
for (const table of parsed.includedTables) {
const acc = byTable.get(table) ?? accumulatorFor(table);
for (const tableRef of parsed.includedTables) {
const key = tableRefKey(tableRef);
const acc = byTable.get(key) ?? accumulatorFor(tableRef);
addTemplate(acc, parsed);
byTable.set(table, acc);
byTable.set(key, acc);
}
}
await mkdir(input.stagedDir, { recursive: true });
for (const [table, acc] of [...byTable.entries()].sort(([left], [right]) => left.localeCompare(right))) {
await writeJson(input.stagedDir, `tables/${table}.json`, toStagedTable(acc, now));
for (const [, acc] of [...byTable.entries()].sort((left, right) => left[0].localeCompare(right[0]))) {
await writeJson(input.stagedDir, `tables/${acc.table}.json`, toStagedTable(acc, now));
}
const patternsInput = toPatternsInput(parsedTemplates);
const patternInputSplit = splitHistoricSqlPatternInputs(patternsInput);
const allWarnings = [...warnings, ...patternInputSplit.warnings];
const allWarnings = [...new Set([...warnings, ...patternInputSplit.warnings])];
await writeJson(input.stagedDir, 'patterns-input.json', patternInputSplit.auditInput);
for (const shard of patternInputSplit.shards) {
await writeJson(input.stagedDir, shard.path, shard.input);

View file

@ -8,9 +8,22 @@ export type HistoricSqlDialect = z.infer<typeof historicSqlDialectSchema>;
const filterModeSchema = z.enum(['exclude', 'include', 'mark-only']);
const ktxTableRefSchema = z.object({
catalog: z.string().nullable(),
db: z.string().nullable(),
name: z.string().min(1),
}).strict();
const ktxTableRefWithColumnsSchema = ktxTableRefSchema.extend({
columns: z.array(z.string().min(1)).optional(),
}).strict();
const historicSqlCommonPullConfigSchema = z.object({
minExecutions: z.number().int().nonnegative().default(5),
enabledTables: z.array(z.string().min(1)).default([]),
enabledTables: z.array(ktxTableRefSchema).default([]),
enabledSchemas: z.array(z.string().min(1)).default([]),
modeledTableCatalog: z.array(ktxTableRefWithColumnsSchema).default([]),
scopeFloorWarnings: z.array(z.string()).default([]),
filters: z.object({
serviceAccounts: z.object({
patterns: z.array(z.string()).default([]),
@ -68,6 +81,7 @@ export type AggregatedTemplate = z.infer<typeof aggregatedTemplateSchema>;
export const stagedTableInputSchema = z.object({
table: z.string().min(1),
tableRef: ktxTableRefSchema,
stats: z.object({
executionsBucket: z.string(),
distinctUsersBucket: z.string(),
@ -93,7 +107,7 @@ export const stagedPatternsInputSchema = z.object({
templates: z.array(z.object({
id: z.string(),
canonicalSql: z.string(),
tablesTouched: z.array(z.string()),
tablesTouched: z.array(ktxTableRefSchema),
executionsBucket: z.string(),
distinctUsersBucket: z.string(),
dialect: historicSqlDialectSchema,

View file

@ -9,6 +9,7 @@ import { DbtSourceAdapter } from './adapters/dbt/dbt.adapter.js';
import { FakeSourceAdapter } from './adapters/fake/fake.adapter.js';
import { HistoricSqlSourceAdapter } from './adapters/historic-sql/historic-sql.adapter.js';
import { PostgresPgssReader } from './adapters/historic-sql/postgres-pgss-reader.js';
import { resolveQueryHistoryScopeFloor } from './adapters/historic-sql/scope-floor.js';
import {
HISTORIC_SQL_SOURCE_KEY,
historicSqlUnifiedPullConfigSchema,
@ -179,12 +180,39 @@ function queryHistoryRecord(connection: unknown): Record<string, unknown> | null
return queryHistory;
}
function queryHistoryPullConfig(connection: unknown): Record<string, unknown> | null {
async function queryHistoryPullConfig(
project: KtxLocalProject,
connectionId: string,
connection: unknown,
): Promise<Record<string, unknown> | null> {
const queryHistory = queryHistoryRecord(connection);
if (queryHistory?.enabled !== true || !isRecord(connection)) return null;
const dialect = historicSqlDialectByDriver.get(String(connection.driver ?? '').toLowerCase());
const driver = String(connection.driver ?? '').toLowerCase();
const dialect = historicSqlDialectByDriver.get(driver);
if (!dialect) return null;
return { ...queryHistory, dialect };
const scopeFloor = await resolveQueryHistoryScopeFloor({
projectDir: project.projectDir,
connectionId,
driver,
connection,
storedQueryHistory: queryHistory,
});
const {
enabled: _enabled,
dialect: _dialect,
enabledTables: _enabledTables,
enabledSchemas: _enabledSchemas,
scopeFloorWarnings: _scopeFloorWarnings,
...stored
} = queryHistory;
return {
...stored,
dialect,
...(scopeFloor.enabledTables.length > 0 ? { enabledTables: scopeFloor.enabledTables } : {}),
...(scopeFloor.enabledSchemas.length > 0 ? { enabledSchemas: scopeFloor.enabledSchemas } : {}),
...(scopeFloor.modeledTableCatalog.length > 0 ? { modeledTableCatalog: scopeFloor.modeledTableCatalog } : {}),
...(scopeFloor.warnings.length > 0 ? { scopeFloorWarnings: scopeFloor.warnings } : {}),
};
}
function stringField(value: unknown): string | null {
@ -245,7 +273,7 @@ export async function localPullConfigForAdapter(
if (options.historicSqlPullConfigOverride) {
return historicSqlUnifiedPullConfigSchema.parse(options.historicSqlPullConfigOverride);
}
const queryHistory = queryHistoryPullConfig(connection);
const queryHistory = await queryHistoryPullConfig(project, connectionId, connection);
if (!queryHistory) {
throw new Error(`Connection "${connectionId}" does not have context.queryHistory.enabled: true`);
}

View file

@ -1,8 +1,10 @@
import { request as httpRequest } from 'node:http';
import { request as httpsRequest } from 'node:https';
import { URL } from 'node:url';
import type { KtxTableRef } from '../scan/types.js';
import type {
SqlAnalysisBatchItem,
SqlAnalysisBatchOptions,
SqlAnalysisBatchResult,
SqlAnalysisDialect,
SqlAnalysisFingerprintResult,
@ -89,6 +91,14 @@ function optionalString(raw: Record<string, unknown>, field: string): string | n
throw new Error(`sql analysis response has invalid optional string field ${field}`);
}
function optionalNullableStringField(raw: Record<string, unknown>, field: string): string | null {
const value = raw[field];
if (value === null || value === undefined || typeof value === 'string') {
return value ?? null;
}
throw new Error(`sql analysis response has invalid optional nullable string field ${field}`);
}
function requiredStringArray(raw: Record<string, unknown>, field: string): string[] {
const value = raw[field];
if (!Array.isArray(value) || value.some((item) => typeof item !== 'string')) {
@ -175,10 +185,34 @@ function mapColumnsByClause(raw: Record<string, unknown>): SqlAnalysisBatchResul
return result;
}
function requiredTableRef(raw: unknown, field: string): KtxTableRef {
if (!raw || typeof raw !== 'object' || Array.isArray(raw)) {
throw new Error(`sql analysis response contains invalid table ref in ${field}`);
}
const record = raw as Record<string, unknown>;
const name = record.name;
if (typeof name !== 'string' || name.length === 0) {
throw new Error(`sql analysis response table ref in ${field} is missing name`);
}
return {
catalog: optionalNullableStringField(record, 'catalog'),
db: optionalNullableStringField(record, 'db'),
name,
};
}
function requiredTableRefArray(raw: Record<string, unknown>, field: string): KtxTableRef[] {
const value = raw[field];
if (!Array.isArray(value)) {
throw new Error(`sql analysis response is missing table-ref[] field ${field}`);
}
return value.map((item, index) => requiredTableRef(item, `${field}.${index}`));
}
function mapBatchResult(raw: Record<string, unknown>): SqlAnalysisBatchResult {
const error = optionalString(raw, 'error');
return {
tablesTouched: requiredStringArray(raw, 'tables_touched'),
tablesTouched: requiredTableRefArray(raw, 'tables_touched'),
columnsByClause: mapColumnsByClause(raw),
...(error !== undefined ? { error } : {}),
};
@ -215,10 +249,11 @@ export function createHttpSqlAnalysisPort(options: HttpSqlAnalysisPortOptions):
});
return mapResult(raw);
},
async analyzeBatch(items: SqlAnalysisBatchItem[], dialect: SqlAnalysisDialect) {
async analyzeBatch(items: SqlAnalysisBatchItem[], dialect: SqlAnalysisDialect, options?: SqlAnalysisBatchOptions) {
const raw = await requestJson('/sql/analyze-batch', {
dialect,
items,
...(options?.catalog ? { catalog: options.catalog } : {}),
});
return mapBatchResponse(raw);
},

View file

@ -1,3 +1,5 @@
import type { KtxTableRef } from '../scan/types.js';
export type SqlAnalysisDialect =
| 'bigquery'
| 'snowflake'
@ -32,8 +34,20 @@ export interface SqlAnalysisBatchItem {
sql: string;
}
interface SqlAnalysisCatalogTable extends KtxTableRef {
columns?: string[];
}
interface SqlAnalysisCatalog {
tables: SqlAnalysisCatalogTable[];
}
export interface SqlAnalysisBatchOptions {
catalog?: SqlAnalysisCatalog;
}
export interface SqlAnalysisBatchResult {
tablesTouched: string[];
tablesTouched: KtxTableRef[];
columnsByClause: Partial<Record<SqlAnalysisClause, string[]>>;
error?: string | null;
}
@ -48,6 +62,7 @@ export interface SqlAnalysisPort {
analyzeBatch(
items: SqlAnalysisBatchItem[],
dialect: SqlAnalysisDialect,
options?: SqlAnalysisBatchOptions,
): Promise<Map<string, SqlAnalysisBatchResult>>;
validateReadOnly(sql: string, dialect: SqlAnalysisDialect): Promise<SqlReadOnlyValidationResult>;
}