mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-13 08:15:14 +02:00
feat(query-history): scope mining to modeled schemas by default (#258)
* feat(query-history): structure SQL analysis table refs * feat(query-history): qualify SQL analysis table refs * feat(query-history): wire modeled scope floor through ingest * chore(query-history): verify scope floor * test(query-history): align daemon SQL batch endpoint contract * feat(query-history): build scope from same-run scan catalog * feat(query-history): fail open on scope-floor catalog failures * chore(query-history): verify scope-floor v1 closure * refactor(query-history): share scope membership * feat(setup): apply derived query history filters * docs: document derived query history filters * fix(query-history): redact filter picker LLM prompt SQL * fix(setup): run filter picker SQL analysis through managed daemon * chore(query-history): verify filter picker v1 closure * fix(query-history): fail open on partial service-account attribution * fix(query-history): aggregate BigQuery users by execution count * fix(query-history): aggregate Snowflake users by execution count * fix(query-history): use BigQuery query info hash
This commit is contained in:
parent
ce1516b357
commit
e70ae1e63b
42 changed files with 3090 additions and 274 deletions
|
|
@ -200,27 +200,78 @@ export class BigQueryHistoricSqlQueryHistoryReader {
|
|||
config: HistoricSqlUnifiedPullConfig,
|
||||
): AsyncIterable<AggregatedTemplate> {
|
||||
const sql = `
|
||||
WITH filtered_jobs AS (
|
||||
SELECT
|
||||
COALESCE(query_info.query_hashes.normalized_literals, TO_HEX(SHA256(query))) AS template_id,
|
||||
query,
|
||||
user_email,
|
||||
creation_time,
|
||||
end_time,
|
||||
error_result
|
||||
FROM ${this.viewPath}
|
||||
WHERE job_type = 'QUERY'
|
||||
AND statement_type IN ('SELECT', 'MERGE')
|
||||
AND creation_time >= ${timestampExpression(window.start)}
|
||||
AND creation_time < ${timestampExpression(window.end)}
|
||||
AND query IS NOT NULL
|
||||
),
|
||||
template_stats AS (
|
||||
SELECT
|
||||
template_id,
|
||||
MIN(query) AS canonical_sql,
|
||||
COUNT(*) AS executions,
|
||||
COUNT(DISTINCT user_email) AS distinct_users,
|
||||
MIN(creation_time) AS first_seen,
|
||||
MAX(creation_time) AS last_seen,
|
||||
APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND), 100)[OFFSET(50)] AS p50_ms,
|
||||
APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND), 100)[OFFSET(95)] AS p95_ms,
|
||||
SAFE_DIVIDE(COUNTIF(error_result IS NOT NULL), COUNT(*)) AS error_rate,
|
||||
CAST(NULL AS INT64) AS rows_produced
|
||||
FROM filtered_jobs
|
||||
GROUP BY template_id
|
||||
HAVING COUNT(*) >= ${config.minExecutions}
|
||||
),
|
||||
template_users AS (
|
||||
SELECT
|
||||
template_id,
|
||||
user_email AS user,
|
||||
COUNT(*) AS executions,
|
||||
MAX(creation_time) AS last_seen
|
||||
FROM filtered_jobs
|
||||
GROUP BY template_id, user_email
|
||||
)
|
||||
SELECT
|
||||
query_hash AS template_id,
|
||||
MIN(query) AS canonical_sql,
|
||||
COUNT(*) AS executions,
|
||||
COUNT(DISTINCT user_email) AS distinct_users,
|
||||
MIN(creation_time) AS first_seen,
|
||||
MAX(creation_time) AS last_seen,
|
||||
APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND), 100)[OFFSET(50)] AS p50_ms,
|
||||
APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND), 100)[OFFSET(95)] AS p95_ms,
|
||||
SAFE_DIVIDE(COUNTIF(error_result IS NOT NULL), COUNT(*)) AS error_rate,
|
||||
CAST(NULL AS INT64) AS rows_produced,
|
||||
TO_JSON_STRING(ARRAY_AGG(STRUCT(user_email AS user, 1 AS executions) ORDER BY creation_time DESC LIMIT 5)) AS top_users
|
||||
FROM ${this.viewPath}
|
||||
WHERE job_type = 'QUERY'
|
||||
AND statement_type IN ('SELECT', 'MERGE')
|
||||
AND creation_time >= ${timestampExpression(window.start)}
|
||||
AND creation_time < ${timestampExpression(window.end)}
|
||||
AND query IS NOT NULL
|
||||
GROUP BY query_hash
|
||||
HAVING COUNT(*) >= ${config.minExecutions}
|
||||
ORDER BY executions DESC`.trim();
|
||||
stats.template_id,
|
||||
stats.canonical_sql,
|
||||
stats.executions,
|
||||
stats.distinct_users,
|
||||
stats.first_seen,
|
||||
stats.last_seen,
|
||||
stats.p50_ms,
|
||||
stats.p95_ms,
|
||||
stats.error_rate,
|
||||
stats.rows_produced,
|
||||
TO_JSON_STRING(
|
||||
ARRAY_AGG(
|
||||
STRUCT(users.user AS user, users.executions AS executions)
|
||||
ORDER BY users.executions DESC, users.last_seen DESC
|
||||
)
|
||||
) AS top_users
|
||||
FROM template_stats AS stats
|
||||
JOIN template_users AS users
|
||||
ON users.template_id = stats.template_id
|
||||
GROUP BY
|
||||
stats.template_id,
|
||||
stats.canonical_sql,
|
||||
stats.executions,
|
||||
stats.distinct_users,
|
||||
stats.first_seen,
|
||||
stats.last_seen,
|
||||
stats.p50_ms,
|
||||
stats.p95_ms,
|
||||
stats.error_rate,
|
||||
stats.rows_produced
|
||||
ORDER BY stats.executions DESC`.trim();
|
||||
const result = await queryClient(client).executeQuery(sql);
|
||||
if (result.error) {
|
||||
throw grantsError(result.error);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { createHash } from 'node:crypto';
|
||||
import { readFile, readdir } from 'node:fs/promises';
|
||||
import { join, relative } from 'node:path';
|
||||
import { tableRefKey } from '../../../scan/table-ref.js';
|
||||
import type { ChunkResult, DiffSet, ScopeDescriptor, WorkUnit } from '../../types.js';
|
||||
import { isHistoricSqlPatternInputShardPath } from './pattern-inputs.js';
|
||||
import { stagedManifestSchema, stagedPatternsInputSchema, stagedTableInputSchema } from './types.js';
|
||||
|
|
@ -37,7 +38,7 @@ export async function chunkHistoricSqlUnifiedStagedDir(stagedDir: string, diffSe
|
|||
}
|
||||
const table = stagedTableInputSchema.parse(await readJson(stagedDir, path));
|
||||
workUnits.push({
|
||||
unitKey: `historic-sql-table-${safeUnitKey(table.table)}`,
|
||||
unitKey: `historic-sql-table-${safeUnitKey(tableRefKey(table.tableRef))}`,
|
||||
displayLabel: `Historic SQL usage: ${table.table}`,
|
||||
rawFiles: [path],
|
||||
dependencyPaths: ['manifest.json'],
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import { Buffer } from 'node:buffer';
|
||||
import { tableRefKey } from '../../../scan/table-ref.js';
|
||||
import type { StagedPatternsInput } from './types.js';
|
||||
|
||||
const HISTORIC_SQL_PATTERN_WORKUNIT_DIR = 'patterns-input';
|
||||
|
|
@ -44,11 +45,16 @@ function sortedAuditTemplates(templates: readonly PatternTemplate[]): PatternTem
|
|||
function sortedPatternCandidates(templates: readonly PatternTemplate[]): PatternTemplate[] {
|
||||
return [...templates]
|
||||
.filter((template) => template.tablesTouched.length >= 2)
|
||||
.map((template) => ({ ...template, tablesTouched: [...template.tablesTouched].sort() }))
|
||||
.map((template) => ({
|
||||
...template,
|
||||
tablesTouched: [...template.tablesTouched].sort((left, right) => tableRefKey(left).localeCompare(tableRefKey(right))),
|
||||
}))
|
||||
.sort((left, right) => {
|
||||
const cardinality = right.tablesTouched.length - left.tablesTouched.length;
|
||||
if (cardinality !== 0) return cardinality;
|
||||
const tableSignature = left.tablesTouched.join('\0').localeCompare(right.tablesTouched.join('\0'));
|
||||
const leftSignature = left.tablesTouched.map(tableRefKey).join('\0');
|
||||
const rightSignature = right.tablesTouched.map(tableRefKey).join('\0');
|
||||
const tableSignature = leftSignature.localeCompare(rightSignature);
|
||||
if (tableSignature !== 0) return tableSignature;
|
||||
return left.id.localeCompare(right.id);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -0,0 +1,278 @@
|
|||
import { z } from 'zod';
|
||||
import type { KtxLlmRuntimePort } from '../../../../context/llm/runtime-port.js';
|
||||
import type { SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
|
||||
import { tableRefKey } from '../../../scan/table-ref.js';
|
||||
import type { KtxTableRef } from '../../../scan/types.js';
|
||||
import { bucketDistinctUsers, bucketExecutions, bucketRecency } from './buckets.js';
|
||||
import {
|
||||
compileHistoricSqlRedactionPatterns,
|
||||
redactHistoricSqlText,
|
||||
type HistoricSqlRedactionPattern,
|
||||
} from './redaction.js';
|
||||
import { includedQueryHistoryTableRefs } from './scope-membership.js';
|
||||
import {
|
||||
aggregatedTemplateSchema,
|
||||
historicSqlUnifiedPullConfigSchema,
|
||||
type AggregatedTemplate,
|
||||
type HistoricSqlDialect,
|
||||
type HistoricSqlReader,
|
||||
} from './types.js';
|
||||
|
||||
export interface QueryHistoryFilterProposal {
|
||||
excludedRoles: Array<{ role: string; reason: string; pattern: string }>;
|
||||
consideredRoleCount: number;
|
||||
skipped: { reason: 'no-llm' | 'no-daemon' | 'no-in-scope-history' | 'user-block-present' } | null;
|
||||
warnings: string[];
|
||||
}
|
||||
|
||||
export interface ProposeQueryHistoryServiceAccountFiltersInput {
|
||||
connectionId: string;
|
||||
dialect: HistoricSqlDialect;
|
||||
queryClient: unknown;
|
||||
reader: HistoricSqlReader;
|
||||
sqlAnalysis: SqlAnalysisPort;
|
||||
llmRuntime: KtxLlmRuntimePort | null;
|
||||
pullConfig: unknown;
|
||||
now?: Date;
|
||||
userServiceAccountsPresent?: boolean;
|
||||
}
|
||||
|
||||
interface ParsedTemplateForPicker {
|
||||
template: AggregatedTemplate;
|
||||
tablesTouched: KtxTableRef[];
|
||||
includedTables: KtxTableRef[];
|
||||
}
|
||||
|
||||
interface RoleAccumulator {
|
||||
role: string;
|
||||
executions: number;
|
||||
distinctUsers: number;
|
||||
lastSeen: string;
|
||||
tables: Map<string, KtxTableRef>;
|
||||
templates: AggregatedTemplate[];
|
||||
}
|
||||
|
||||
interface QueryHistoryRoleRecord {
|
||||
role: string;
|
||||
inScopeTables: string[];
|
||||
executionsBucket: string;
|
||||
distinctUsersBucket: string;
|
||||
recencyBucket: string;
|
||||
representativeTemplates: Array<{ id: string; canonicalSql: string; dialect: HistoricSqlDialect }>;
|
||||
}
|
||||
|
||||
const queryHistoryFilterAdjudicationSchema = z.object({
|
||||
roles: z.array(
|
||||
z.object({
|
||||
role: z.string().min(1),
|
||||
exclude: z.boolean(),
|
||||
reason: z.string().min(1),
|
||||
}).strict(),
|
||||
),
|
||||
}).strict();
|
||||
|
||||
type QueryHistoryFilterAdjudication = z.infer<typeof queryHistoryFilterAdjudicationSchema>;
|
||||
|
||||
function emptyProposal(skipped: QueryHistoryFilterProposal['skipped'], warnings: string[] = []): QueryHistoryFilterProposal {
|
||||
return { excludedRoles: [], consideredRoleCount: 0, skipped, warnings };
|
||||
}
|
||||
|
||||
function displayTableRef(ref: KtxTableRef): string {
|
||||
return [ref.catalog, ref.db, ref.name].filter((part): part is string => !!part && part.length > 0).join('.');
|
||||
}
|
||||
|
||||
function redactTemplateSqlForPicker(
|
||||
template: AggregatedTemplate,
|
||||
redactors: readonly HistoricSqlRedactionPattern[],
|
||||
): AggregatedTemplate {
|
||||
if (redactors.length === 0) {
|
||||
return template;
|
||||
}
|
||||
return {
|
||||
...template,
|
||||
canonicalSql: redactHistoricSqlText(template.canonicalSql, redactors),
|
||||
};
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function regexEscapeForExactRolePattern(role: string): string {
|
||||
return `^${role.replace(/[\\^$.*+?()[\]{}|]/g, '\\$&')}$`;
|
||||
}
|
||||
|
||||
function recordRole(
|
||||
acc: RoleAccumulator,
|
||||
template: AggregatedTemplate,
|
||||
tables: readonly KtxTableRef[],
|
||||
executions: number,
|
||||
): void {
|
||||
acc.executions += executions;
|
||||
acc.distinctUsers = Math.max(acc.distinctUsers, template.stats.distinctUsers);
|
||||
acc.lastSeen = template.stats.lastSeen > acc.lastSeen ? template.stats.lastSeen : acc.lastSeen;
|
||||
for (const table of tables) {
|
||||
acc.tables.set(tableRefKey(table), table);
|
||||
}
|
||||
acc.templates.push(template);
|
||||
}
|
||||
|
||||
function roleRecords(parsedTemplates: readonly ParsedTemplateForPicker[], now: Date): QueryHistoryRoleRecord[] {
|
||||
const byRole = new Map<string, RoleAccumulator>();
|
||||
for (const parsed of parsedTemplates) {
|
||||
for (const entry of parsed.template.topUsers) {
|
||||
if (!entry.user || entry.user.trim().length === 0 || entry.executions <= 0) {
|
||||
continue;
|
||||
}
|
||||
const role = entry.user.trim();
|
||||
const acc =
|
||||
byRole.get(role) ??
|
||||
{
|
||||
role,
|
||||
executions: 0,
|
||||
distinctUsers: 0,
|
||||
lastSeen: '1970-01-01T00:00:00.000Z',
|
||||
tables: new Map<string, KtxTableRef>(),
|
||||
templates: [],
|
||||
};
|
||||
recordRole(acc, parsed.template, parsed.includedTables, entry.executions);
|
||||
byRole.set(role, acc);
|
||||
}
|
||||
}
|
||||
|
||||
return [...byRole.values()]
|
||||
.sort((left, right) => right.executions - left.executions || left.role.localeCompare(right.role))
|
||||
.map((acc) => ({
|
||||
role: acc.role,
|
||||
inScopeTables: [...acc.tables.entries()]
|
||||
.sort(([left], [right]) => left.localeCompare(right))
|
||||
.slice(0, 25)
|
||||
.map(([, ref]) => displayTableRef(ref)),
|
||||
executionsBucket: bucketExecutions(acc.executions),
|
||||
distinctUsersBucket: bucketDistinctUsers(acc.distinctUsers),
|
||||
recencyBucket: bucketRecency(acc.lastSeen, now),
|
||||
representativeTemplates: [...acc.templates]
|
||||
.sort((left, right) => right.stats.executions - left.stats.executions || left.templateId.localeCompare(right.templateId))
|
||||
.slice(0, 3)
|
||||
.map((template) => ({
|
||||
id: template.templateId,
|
||||
canonicalSql: template.canonicalSql,
|
||||
dialect: template.dialect,
|
||||
})),
|
||||
}));
|
||||
}
|
||||
|
||||
function adjudicationSystemPrompt(): string {
|
||||
return [
|
||||
'You are helping ktx decide whether observed query-history roles are operational service accounts.',
|
||||
'Default every role to keep. Mark exclude true only when the aggregate evidence clearly shows loader, ELT, reverse-ETL, export, refresh, or maintenance traffic rather than analyst or BI-dashboard usage.',
|
||||
'Use only the observed role records. Do not rely on a hardcoded denylist. Return structured output only.',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
export async function proposeQueryHistoryServiceAccountFilters(
|
||||
input: ProposeQueryHistoryServiceAccountFiltersInput,
|
||||
): Promise<QueryHistoryFilterProposal> {
|
||||
if (!input.llmRuntime) {
|
||||
return emptyProposal({ reason: 'no-llm' });
|
||||
}
|
||||
|
||||
const config = historicSqlUnifiedPullConfigSchema.parse(input.pullConfig);
|
||||
const redactors = compileHistoricSqlRedactionPatterns(config.redactionPatterns);
|
||||
const now = input.now ?? new Date();
|
||||
const windowDays = 'windowDays' in config ? config.windowDays : 90;
|
||||
const windowStart = new Date(now.getTime() - windowDays * 24 * 60 * 60 * 1000);
|
||||
const warnings: string[] = [];
|
||||
const snapshot: AggregatedTemplate[] = [];
|
||||
|
||||
try {
|
||||
for await (const row of input.reader.fetchAggregated(input.queryClient, { start: windowStart, end: now }, config)) {
|
||||
snapshot.push(aggregatedTemplateSchema.parse(row));
|
||||
}
|
||||
} catch (error) {
|
||||
return emptyProposal(null, [
|
||||
`query_history_filter_picker_read_failed:${error instanceof Error ? error.message : String(error)}`,
|
||||
]);
|
||||
}
|
||||
|
||||
if (snapshot.length === 0) {
|
||||
return emptyProposal({ reason: 'no-in-scope-history' });
|
||||
}
|
||||
|
||||
const analysisItems = snapshot.map((template) => ({ id: template.templateId, sql: template.canonicalSql }));
|
||||
const analysisOptions =
|
||||
config.modeledTableCatalog.length > 0 ? { catalog: { tables: config.modeledTableCatalog } } : undefined;
|
||||
let analysis: Awaited<ReturnType<SqlAnalysisPort['analyzeBatch']>>;
|
||||
try {
|
||||
analysis = await input.sqlAnalysis.analyzeBatch(analysisItems, input.dialect, analysisOptions);
|
||||
} catch (error) {
|
||||
return emptyProposal({ reason: 'no-daemon' }, [
|
||||
`query_history_filter_picker_analysis_failed:${error instanceof Error ? error.message : String(error)}`,
|
||||
]);
|
||||
}
|
||||
|
||||
const parsedTemplates: ParsedTemplateForPicker[] = [];
|
||||
for (const template of snapshot) {
|
||||
const parsed = analysis.get(template.templateId);
|
||||
if (!parsed || parsed.error) {
|
||||
warnings.push(`query_history_filter_picker_parse_failed:${template.templateId}`);
|
||||
continue;
|
||||
}
|
||||
const tablesTouched = [...new Map(parsed.tablesTouched.map((ref) => [tableRefKey(ref), ref])).values()]
|
||||
.filter((ref) => ref.name.length > 0)
|
||||
.sort((left, right) => tableRefKey(left).localeCompare(tableRefKey(right)));
|
||||
const includedTables = includedQueryHistoryTableRefs(tablesTouched, config);
|
||||
if (includedTables.length === 0) {
|
||||
continue;
|
||||
}
|
||||
parsedTemplates.push({
|
||||
template: redactTemplateSqlForPicker(template, redactors),
|
||||
tablesTouched,
|
||||
includedTables,
|
||||
});
|
||||
}
|
||||
|
||||
const records = roleRecords(parsedTemplates, now);
|
||||
if (records.length <= 1) {
|
||||
return {
|
||||
excludedRoles: [],
|
||||
consideredRoleCount: records.length,
|
||||
skipped: { reason: 'no-in-scope-history' },
|
||||
warnings,
|
||||
};
|
||||
}
|
||||
|
||||
let generated: QueryHistoryFilterAdjudication;
|
||||
try {
|
||||
generated = await input.llmRuntime.generateObject<QueryHistoryFilterAdjudication, typeof queryHistoryFilterAdjudicationSchema>({
|
||||
role: 'candidateExtraction',
|
||||
system: adjudicationSystemPrompt(),
|
||||
prompt: JSON.stringify({ connectionId: input.connectionId, dialect: input.dialect, roles: records }),
|
||||
schema: queryHistoryFilterAdjudicationSchema,
|
||||
});
|
||||
} catch (error) {
|
||||
return {
|
||||
excludedRoles: [],
|
||||
consideredRoleCount: records.length,
|
||||
skipped: { reason: 'no-llm' },
|
||||
warnings: [
|
||||
...warnings,
|
||||
`query_history_filter_picker_llm_failed:${error instanceof Error ? error.message : String(error)}`,
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
const knownRoles = new Set(records.map((record) => record.role));
|
||||
const excludedRoles = generated.roles
|
||||
.filter((role) => role.exclude && knownRoles.has(role.role))
|
||||
.sort((left, right) => left.role.localeCompare(right.role))
|
||||
.map((role) => ({
|
||||
role: role.role,
|
||||
reason: role.reason,
|
||||
pattern: regexEscapeForExactRolePattern(role.role),
|
||||
}));
|
||||
|
||||
return {
|
||||
excludedRoles,
|
||||
consideredRoleCount: records.length,
|
||||
skipped: input.userServiceAccountsPresent ? { reason: 'user-block-present' } : null,
|
||||
warnings,
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,260 @@
|
|||
import type { Dirent } from 'node:fs';
|
||||
import { access, readdir, readFile } from 'node:fs/promises';
|
||||
import { join, relative } from 'node:path';
|
||||
import YAML from 'yaml';
|
||||
import { getDriverRegistration } from '../../../connections/drivers.js';
|
||||
import { parseDottedTableEntry } from '../../../scan/enabled-tables.js';
|
||||
import { tableRefKey, tableRefSet, type KtxTableRefKey } from '../../../scan/table-ref.js';
|
||||
import type { KtxTableRef } from '../../../scan/types.js';
|
||||
import { readLiveDatabaseTableFiles } from '../live-database/stage.js';
|
||||
|
||||
export interface QueryHistoryScopeFloorInput {
|
||||
projectDir: string;
|
||||
connectionId: string;
|
||||
driver: string;
|
||||
connection: Record<string, unknown>;
|
||||
storedQueryHistory: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface QueryHistoryScopeFloor {
|
||||
enabledTables: KtxTableRef[];
|
||||
enabledTableKeys: ReadonlySet<KtxTableRefKey> | null;
|
||||
enabledSchemas: string[];
|
||||
modeledTableCatalog: KtxTableRef[];
|
||||
floorDisabled: boolean;
|
||||
warnings: string[];
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function stringArray(value: unknown): string[] {
|
||||
return Array.isArray(value)
|
||||
? value
|
||||
.filter((item): item is string => typeof item === 'string' && item.trim().length > 0)
|
||||
.map((item) => item.trim())
|
||||
: [];
|
||||
}
|
||||
|
||||
function tableRefsFromValues(values: unknown): KtxTableRef[] {
|
||||
if (!Array.isArray(values)) return [];
|
||||
return values.flatMap((value) => {
|
||||
if (typeof value === 'string') {
|
||||
const ref = parseDottedTableEntry(value);
|
||||
return ref ? [ref] : [];
|
||||
}
|
||||
if (isRecord(value) && typeof value.name === 'string' && value.name.length > 0) {
|
||||
return [
|
||||
{
|
||||
catalog: typeof value.catalog === 'string' ? value.catalog : null,
|
||||
db: typeof value.db === 'string' ? value.db : null,
|
||||
name: value.name,
|
||||
},
|
||||
];
|
||||
}
|
||||
return [];
|
||||
});
|
||||
}
|
||||
|
||||
function declaredSchemas(driver: string, connection: Record<string, unknown>): string[] {
|
||||
const key = getDriverRegistration(driver)?.scopeConfigKey;
|
||||
if (!key) return [];
|
||||
return [...new Set(stringArray(connection[key]))].sort();
|
||||
}
|
||||
|
||||
function uniqueSortedTableRefs(refs: readonly KtxTableRef[]): KtxTableRef[] {
|
||||
const byKey = new Map<KtxTableRefKey, KtxTableRef>();
|
||||
for (const ref of refs) {
|
||||
byKey.set(tableRefKey(ref), ref);
|
||||
}
|
||||
return [...byKey.entries()]
|
||||
.sort(([left], [right]) => left.localeCompare(right))
|
||||
.map(([, ref]) => ref);
|
||||
}
|
||||
|
||||
async function latestLiveDatabaseScanDir(projectDir: string, connectionId: string): Promise<string | null> {
|
||||
const root = join(projectDir, 'raw-sources', connectionId, 'live-database');
|
||||
let entries: Dirent[];
|
||||
try {
|
||||
entries = await readdir(root, { withFileTypes: true });
|
||||
} catch (error) {
|
||||
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') return null;
|
||||
throw error;
|
||||
}
|
||||
const syncDirs = entries
|
||||
.filter((entry) => entry.isDirectory())
|
||||
.map((entry) => entry.name)
|
||||
.sort()
|
||||
.reverse();
|
||||
for (const syncDir of syncDirs) {
|
||||
const absolute = join(root, syncDir);
|
||||
try {
|
||||
await access(join(absolute, 'connection.json'));
|
||||
return absolute;
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function scannedTableRefs(
|
||||
projectDir: string,
|
||||
connectionId: string,
|
||||
): Promise<{ refs: KtxTableRef[]; catalogAvailable: boolean; warnings: string[] }> {
|
||||
const scanDir = await latestLiveDatabaseScanDir(projectDir, connectionId);
|
||||
if (!scanDir) {
|
||||
return { refs: [], catalogAvailable: false, warnings: [] };
|
||||
}
|
||||
try {
|
||||
const tableFiles = await readLiveDatabaseTableFiles(scanDir);
|
||||
return {
|
||||
refs: uniqueSortedTableRefs(
|
||||
tableFiles.map(({ table }) => ({ catalog: table.catalog, db: table.db, name: table.name })),
|
||||
),
|
||||
catalogAvailable: true,
|
||||
warnings: [],
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
refs: [],
|
||||
catalogAvailable: false,
|
||||
warnings: [
|
||||
`query_history_scope_floor_catalog_read_failed:live_database_scan:${error instanceof Error ? error.message : String(error)}`,
|
||||
],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async function listYamlFiles(root: string): Promise<string[]> {
|
||||
try {
|
||||
const entries = await readdir(root, { withFileTypes: true, recursive: true });
|
||||
return entries
|
||||
.filter((entry) => entry.isFile() && /\.ya?ml$/i.test(entry.name))
|
||||
.map((entry) => relative(root, join(entry.parentPath, entry.name)).replace(/\\/g, '/'))
|
||||
.sort();
|
||||
} catch (error) {
|
||||
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') return [];
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
function refsFromManifest(content: string): KtxTableRef[] {
|
||||
const parsed = YAML.parse(content) as unknown;
|
||||
if (!isRecord(parsed) || !isRecord(parsed.tables)) return [];
|
||||
return Object.values(parsed.tables).flatMap((entry) => {
|
||||
if (!isRecord(entry) || typeof entry.table !== 'string') return [];
|
||||
const ref = parseDottedTableEntry(entry.table);
|
||||
return ref ? [ref] : [];
|
||||
});
|
||||
}
|
||||
|
||||
function refsFromStandaloneSource(content: string): KtxTableRef[] {
|
||||
const parsed = YAML.parse(content) as unknown;
|
||||
if (!isRecord(parsed) || typeof parsed.table !== 'string') return [];
|
||||
const ref = parseDottedTableEntry(parsed.table);
|
||||
return ref ? [ref] : [];
|
||||
}
|
||||
|
||||
async function semanticTableRefs(
|
||||
projectDir: string,
|
||||
connectionId: string,
|
||||
): Promise<{ refs: KtxTableRef[]; warnings: string[] }> {
|
||||
const root = join(projectDir, 'semantic-layer', connectionId);
|
||||
const files = await listYamlFiles(root);
|
||||
const refs: KtxTableRef[] = [];
|
||||
const warnings: string[] = [];
|
||||
for (const file of files) {
|
||||
try {
|
||||
const content = await readFile(join(root, file), 'utf-8');
|
||||
refs.push(...(file.startsWith('_schema/') ? refsFromManifest(content) : refsFromStandaloneSource(content)));
|
||||
} catch (error) {
|
||||
warnings.push(
|
||||
`query_history_scope_floor_catalog_read_failed:${file}:${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
return { refs: uniqueSortedTableRefs(refs), warnings };
|
||||
}
|
||||
|
||||
export async function resolveQueryHistoryScopeFloor(input: QueryHistoryScopeFloorInput): Promise<QueryHistoryScopeFloor> {
|
||||
const explicitEnabledTables = [
|
||||
...tableRefsFromValues(input.storedQueryHistory.enabledTables),
|
||||
...tableRefsFromValues(input.connection.enabled_tables),
|
||||
];
|
||||
const semanticTables = await semanticTableRefs(input.projectDir, input.connectionId);
|
||||
const scannedTables = await scannedTableRefs(input.projectDir, input.connectionId);
|
||||
const modeledTables = uniqueSortedTableRefs([
|
||||
...semanticTables.refs,
|
||||
...scannedTables.refs,
|
||||
...explicitEnabledTables,
|
||||
]);
|
||||
const warnings = [...semanticTables.warnings, ...scannedTables.warnings];
|
||||
|
||||
if (explicitEnabledTables.length > 0) {
|
||||
return {
|
||||
enabledTables: explicitEnabledTables,
|
||||
enabledTableKeys: tableRefSet(explicitEnabledTables),
|
||||
enabledSchemas: [],
|
||||
modeledTableCatalog: modeledTables,
|
||||
floorDisabled: false,
|
||||
warnings,
|
||||
};
|
||||
}
|
||||
|
||||
const explicitSchemas = stringArray(input.storedQueryHistory.enabledSchemas);
|
||||
if (explicitSchemas.includes('*')) {
|
||||
return {
|
||||
enabledTables: [],
|
||||
enabledTableKeys: null,
|
||||
enabledSchemas: ['*'],
|
||||
modeledTableCatalog: modeledTables,
|
||||
floorDisabled: true,
|
||||
warnings,
|
||||
};
|
||||
}
|
||||
if (explicitSchemas.length > 0) {
|
||||
if (!scannedTables.catalogAvailable || modeledTables.length === 0) {
|
||||
return {
|
||||
enabledTables: [],
|
||||
enabledTableKeys: null,
|
||||
enabledSchemas: ['*'],
|
||||
modeledTableCatalog: modeledTables,
|
||||
floorDisabled: true,
|
||||
warnings: [...warnings, 'query_history_scope_floor_disabled:catalog_unavailable'],
|
||||
};
|
||||
}
|
||||
return {
|
||||
enabledTables: [],
|
||||
enabledTableKeys: null,
|
||||
enabledSchemas: [...new Set(explicitSchemas)].sort(),
|
||||
modeledTableCatalog: modeledTables,
|
||||
floorDisabled: false,
|
||||
warnings,
|
||||
};
|
||||
}
|
||||
|
||||
const schemas = new Set(declaredSchemas(input.driver, input.connection));
|
||||
for (const ref of semanticTables.refs) {
|
||||
if (ref.db) schemas.add(ref.db);
|
||||
}
|
||||
if (schemas.size > 0 && (!scannedTables.catalogAvailable || modeledTables.length === 0)) {
|
||||
return {
|
||||
enabledTables: [],
|
||||
enabledTableKeys: null,
|
||||
enabledSchemas: ['*'],
|
||||
modeledTableCatalog: modeledTables,
|
||||
floorDisabled: true,
|
||||
warnings: [...warnings, 'query_history_scope_floor_disabled:catalog_unavailable'],
|
||||
};
|
||||
}
|
||||
return {
|
||||
enabledTables: [],
|
||||
enabledTableKeys: null,
|
||||
enabledSchemas: [...schemas].sort(),
|
||||
modeledTableCatalog: modeledTables,
|
||||
floorDisabled: false,
|
||||
warnings,
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
import { tableRefKey, tableRefSet } from '../../../scan/table-ref.js';
|
||||
import type { KtxTableRef } from '../../../scan/types.js';
|
||||
|
||||
export interface QueryHistoryScopeMembershipConfig {
|
||||
enabledTables: readonly KtxTableRef[];
|
||||
enabledSchemas: readonly string[];
|
||||
}
|
||||
|
||||
function schemaNameForRef(ref: KtxTableRef): string | null {
|
||||
return ref.db && ref.db.length > 0 ? ref.db : null;
|
||||
}
|
||||
|
||||
function schemaNamesFromConfig(enabledSchemas: readonly string[]): Set<string> {
|
||||
return new Set(enabledSchemas.filter((schema) => schema !== '*'));
|
||||
}
|
||||
|
||||
export function isQueryHistoryScopeFloorDisabled(config: QueryHistoryScopeMembershipConfig): boolean {
|
||||
return config.enabledSchemas.includes('*');
|
||||
}
|
||||
|
||||
export function shouldFailOpenQueryHistoryScope(config: QueryHistoryScopeMembershipConfig): boolean {
|
||||
return (
|
||||
config.enabledTables.length === 0 &&
|
||||
!isQueryHistoryScopeFloorDisabled(config) &&
|
||||
config.enabledSchemas.length === 0
|
||||
);
|
||||
}
|
||||
|
||||
export function includedQueryHistoryTableRefs(
|
||||
tablesTouched: readonly KtxTableRef[],
|
||||
config: QueryHistoryScopeMembershipConfig,
|
||||
): KtxTableRef[] {
|
||||
if (config.enabledTables.length > 0) {
|
||||
const enabled = tableRefSet(config.enabledTables);
|
||||
return tablesTouched.filter((ref) => enabled.has(tableRefKey(ref)));
|
||||
}
|
||||
if (isQueryHistoryScopeFloorDisabled(config) || shouldFailOpenQueryHistoryScope(config)) {
|
||||
return [...tablesTouched];
|
||||
}
|
||||
const schemas = schemaNamesFromConfig(config.enabledSchemas);
|
||||
return tablesTouched.filter((ref) => {
|
||||
const schema = schemaNameForRef(ref);
|
||||
return schema !== null && schemas.has(schema);
|
||||
});
|
||||
}
|
||||
|
|
@ -188,26 +188,75 @@ export class SnowflakeHistoricSqlQueryHistoryReader {
|
|||
config: HistoricSqlUnifiedPullConfig,
|
||||
): AsyncIterable<AggregatedTemplate> {
|
||||
const sql = `
|
||||
WITH filtered_queries AS (
|
||||
SELECT
|
||||
query_hash,
|
||||
query_text,
|
||||
user_name,
|
||||
start_time,
|
||||
total_elapsed_time,
|
||||
execution_status,
|
||||
rows_produced
|
||||
FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY
|
||||
WHERE query_text IS NOT NULL
|
||||
AND query_type IN ('SELECT', 'MERGE')
|
||||
AND start_time >= ${timestampLiteral(window.start)}
|
||||
AND start_time < ${timestampLiteral(window.end)}
|
||||
),
|
||||
template_stats AS (
|
||||
SELECT
|
||||
query_hash AS template_id,
|
||||
MIN(query_text) AS canonical_sql,
|
||||
COUNT(*) AS executions,
|
||||
COUNT(DISTINCT user_name) AS distinct_users,
|
||||
MIN(start_time) AS first_seen,
|
||||
MAX(start_time) AS last_seen,
|
||||
APPROX_PERCENTILE(total_elapsed_time, 0.50) AS p50_ms,
|
||||
APPROX_PERCENTILE(total_elapsed_time, 0.95) AS p95_ms,
|
||||
DIV0(COUNT_IF(execution_status != 'SUCCESS'), COUNT(*)) AS error_rate,
|
||||
SUM(rows_produced) AS rows_produced
|
||||
FROM filtered_queries
|
||||
GROUP BY query_hash
|
||||
HAVING COUNT(*) >= ${config.minExecutions}
|
||||
),
|
||||
template_users AS (
|
||||
SELECT
|
||||
query_hash AS template_id,
|
||||
user_name AS user,
|
||||
COUNT(*) AS executions,
|
||||
MAX(start_time) AS last_seen
|
||||
FROM filtered_queries
|
||||
GROUP BY query_hash, user_name
|
||||
)
|
||||
SELECT
|
||||
query_hash AS template_id,
|
||||
MIN(query_text) AS canonical_sql,
|
||||
COUNT(*) AS executions,
|
||||
COUNT(DISTINCT user_name) AS distinct_users,
|
||||
MIN(start_time) AS first_seen,
|
||||
MAX(start_time) AS last_seen,
|
||||
APPROX_PERCENTILE(total_elapsed_time, 0.50) AS p50_ms,
|
||||
APPROX_PERCENTILE(total_elapsed_time, 0.95) AS p95_ms,
|
||||
DIV0(COUNT_IF(execution_status != 'SUCCESS'), COUNT(*)) AS error_rate,
|
||||
SUM(rows_produced) AS rows_produced,
|
||||
ARRAY_AGG(OBJECT_CONSTRUCT('user', user_name, 'executions', 1)) WITHIN GROUP (ORDER BY start_time DESC)::string AS top_users
|
||||
FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY
|
||||
WHERE query_text IS NOT NULL
|
||||
AND query_type IN ('SELECT', 'MERGE')
|
||||
AND start_time >= ${timestampLiteral(window.start)}
|
||||
AND start_time < ${timestampLiteral(window.end)}
|
||||
GROUP BY query_hash
|
||||
HAVING COUNT(*) >= ${config.minExecutions}
|
||||
ORDER BY executions DESC`.trim();
|
||||
stats.template_id,
|
||||
stats.canonical_sql,
|
||||
stats.executions,
|
||||
stats.distinct_users,
|
||||
stats.first_seen,
|
||||
stats.last_seen,
|
||||
stats.p50_ms,
|
||||
stats.p95_ms,
|
||||
stats.error_rate,
|
||||
stats.rows_produced,
|
||||
ARRAY_AGG(
|
||||
OBJECT_CONSTRUCT('user', users.user, 'executions', users.executions)
|
||||
) WITHIN GROUP (ORDER BY users.executions DESC, users.last_seen DESC)::string AS top_users
|
||||
FROM template_stats AS stats
|
||||
JOIN template_users AS users
|
||||
ON users.template_id = stats.template_id
|
||||
GROUP BY
|
||||
stats.template_id,
|
||||
stats.canonical_sql,
|
||||
stats.executions,
|
||||
stats.distinct_users,
|
||||
stats.first_seen,
|
||||
stats.last_seen,
|
||||
stats.p50_ms,
|
||||
stats.p95_ms,
|
||||
stats.error_rate,
|
||||
stats.rows_produced
|
||||
ORDER BY stats.executions DESC`.trim();
|
||||
const result = await queryClient(client).executeQuery(sql);
|
||||
if (result.error) {
|
||||
throw grantsError(result.error);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { dirname, join } from 'node:path';
|
||||
import type { SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
|
||||
import { tableRefKey, type KtxTableRefKey } from '../../../scan/table-ref.js';
|
||||
import type { KtxTableRef } from '../../../scan/types.js';
|
||||
import {
|
||||
bucketDistinctUsers,
|
||||
bucketErrorRate,
|
||||
|
|
@ -15,6 +17,11 @@ import {
|
|||
redactHistoricSqlText,
|
||||
type HistoricSqlRedactionPattern,
|
||||
} from './redaction.js';
|
||||
import {
|
||||
includedQueryHistoryTableRefs,
|
||||
isQueryHistoryScopeFloorDisabled,
|
||||
shouldFailOpenQueryHistoryScope,
|
||||
} from './scope-membership.js';
|
||||
import {
|
||||
HISTORIC_SQL_SOURCE_KEY,
|
||||
aggregatedTemplateSchema,
|
||||
|
|
@ -38,17 +45,13 @@ interface StageHistoricSqlAggregatedSnapshotInput {
|
|||
|
||||
interface ParsedTemplate {
|
||||
template: AggregatedTemplate;
|
||||
tablesTouched: string[];
|
||||
includedTables: string[];
|
||||
tablesTouched: KtxTableRef[];
|
||||
includedTables: KtxTableRef[];
|
||||
columnsByClause: Record<string, string[]>;
|
||||
}
|
||||
|
||||
interface EnabledTableFilter {
|
||||
exact: Set<string>;
|
||||
uniqueUnqualified: Set<string>;
|
||||
}
|
||||
|
||||
interface TableAccumulator {
|
||||
tableRef: KtxTableRef;
|
||||
table: string;
|
||||
executions: number;
|
||||
distinctUsers: number;
|
||||
|
|
@ -105,8 +108,7 @@ function shouldDropByUsers(template: AggregatedTemplate, config: HistoricSqlUnif
|
|||
const matchingExecutions = template.topUsers
|
||||
.filter((entry) => matchesAny(entry.user, patterns))
|
||||
.reduce((sum, entry) => sum + entry.executions, 0);
|
||||
const allExecutions = template.topUsers.reduce((sum, entry) => sum + entry.executions, 0);
|
||||
const serviceOnly = allExecutions > 0 && matchingExecutions >= allExecutions;
|
||||
const serviceOnly = template.stats.executions > 0 && matchingExecutions >= template.stats.executions;
|
||||
return service.mode === 'exclude' ? serviceOnly : !serviceOnly;
|
||||
}
|
||||
|
||||
|
|
@ -122,90 +124,8 @@ function shouldDropTemplate(template: AggregatedTemplate, config: HistoricSqlUni
|
|||
return false;
|
||||
}
|
||||
|
||||
function normalizeTableIdentifier(value: string): string {
|
||||
return value.trim().toLowerCase();
|
||||
}
|
||||
|
||||
function unqualifiedTableIdentifier(value: string): string {
|
||||
const parts = normalizeTableIdentifier(value).split('.').filter(Boolean);
|
||||
return parts.at(-1) ?? '';
|
||||
}
|
||||
|
||||
function buildEnabledTableFilter(enabledTables: string[]): EnabledTableFilter | null {
|
||||
if (enabledTables.length === 0) {
|
||||
return null;
|
||||
}
|
||||
const exact = new Set(enabledTables.map(normalizeTableIdentifier).filter((value) => value.length > 0));
|
||||
const unqualifiedCounts = new Map<string, number>();
|
||||
for (const table of exact) {
|
||||
const unqualified = unqualifiedTableIdentifier(table);
|
||||
if (unqualified.length > 0) {
|
||||
unqualifiedCounts.set(unqualified, (unqualifiedCounts.get(unqualified) ?? 0) + 1);
|
||||
}
|
||||
}
|
||||
return {
|
||||
exact,
|
||||
uniqueUnqualified: new Set(
|
||||
[...unqualifiedCounts.entries()]
|
||||
.filter(([, count]) => count === 1)
|
||||
.map(([table]) => table),
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
function isEnabledTable(table: string, filter: EnabledTableFilter | null): boolean {
|
||||
if (!filter) {
|
||||
return true;
|
||||
}
|
||||
const normalized = normalizeTableIdentifier(table);
|
||||
return filter.exact.has(normalized) || filter.uniqueUnqualified.has(unqualifiedTableIdentifier(normalized));
|
||||
}
|
||||
|
||||
/**
|
||||
* pg_stat_statements records queries as written, so the same physical table can appear
|
||||
* both bare (`accounts`, resolved via search_path) and schema-qualified
|
||||
* (`orbit_raw.accounts`). Collapse a bare identifier into its schema-qualified form when
|
||||
* exactly one qualified form shares its unqualified name, so the two never become separate
|
||||
* work units. Ambiguous bare names (two qualified forms) are left untouched.
|
||||
*/
|
||||
function canonicalizeTableIdentifiers(parsedTemplates: ParsedTemplate[]): void {
|
||||
const all = new Set<string>();
|
||||
for (const parsed of parsedTemplates) {
|
||||
for (const table of parsed.includedTables) {
|
||||
all.add(table);
|
||||
}
|
||||
}
|
||||
const qualifiedByUnqualified = new Map<string, Set<string>>();
|
||||
for (const table of all) {
|
||||
if (!table.includes('.')) {
|
||||
continue;
|
||||
}
|
||||
const unqualified = unqualifiedTableIdentifier(table);
|
||||
if (unqualified.length === 0) {
|
||||
continue;
|
||||
}
|
||||
const forms = qualifiedByUnqualified.get(unqualified) ?? new Set<string>();
|
||||
forms.add(table);
|
||||
qualifiedByUnqualified.set(unqualified, forms);
|
||||
}
|
||||
const canonical = new Map<string, string>();
|
||||
for (const table of all) {
|
||||
if (table.includes('.')) {
|
||||
continue;
|
||||
}
|
||||
const forms = qualifiedByUnqualified.get(unqualifiedTableIdentifier(table));
|
||||
if (forms && forms.size === 1) {
|
||||
canonical.set(table, [...forms][0]);
|
||||
}
|
||||
}
|
||||
if (canonical.size === 0) {
|
||||
return;
|
||||
}
|
||||
const remap = (table: string): string => canonical.get(table) ?? table;
|
||||
for (const parsed of parsedTemplates) {
|
||||
parsed.includedTables = [...new Set(parsed.includedTables.map(remap))].sort();
|
||||
parsed.tablesTouched = [...new Set(parsed.tablesTouched.map(remap))].sort();
|
||||
}
|
||||
function displayTableRef(ref: KtxTableRef): string {
|
||||
return [ref.catalog, ref.db, ref.name].filter((part): part is string => !!part && part.length > 0).join('.');
|
||||
}
|
||||
|
||||
function historicSqlWindowDays(config: HistoricSqlUnifiedPullConfig): number {
|
||||
|
|
@ -240,9 +160,10 @@ function recordJoin(acc: TableAccumulator, otherTable: string, columns: string[]
|
|||
}
|
||||
}
|
||||
|
||||
function accumulatorFor(table: string): TableAccumulator {
|
||||
function accumulatorFor(tableRef: KtxTableRef): TableAccumulator {
|
||||
return {
|
||||
table,
|
||||
tableRef,
|
||||
table: displayTableRef(tableRef),
|
||||
executions: 0,
|
||||
distinctUsers: 0,
|
||||
errorRateNumerator: 0,
|
||||
|
|
@ -272,8 +193,8 @@ function addTemplate(acc: TableAccumulator, parsed: ParsedTemplate): void {
|
|||
}
|
||||
}
|
||||
const joinColumns = parsed.columnsByClause.join ?? [];
|
||||
for (const otherTable of parsed.tablesTouched.filter((table) => table !== acc.table)) {
|
||||
recordJoin(acc, otherTable, joinColumns, executions);
|
||||
for (const otherTable of parsed.tablesTouched.filter((table) => tableRefKey(table) !== tableRefKey(acc.tableRef))) {
|
||||
recordJoin(acc, displayTableRef(otherTable), joinColumns, executions);
|
||||
}
|
||||
acc.topTemplates.push(parsed.template);
|
||||
}
|
||||
|
|
@ -310,6 +231,7 @@ function toStagedTable(acc: TableAccumulator, now: Date): StagedTableInput {
|
|||
|
||||
return {
|
||||
table: acc.table,
|
||||
tableRef: acc.tableRef,
|
||||
stats: {
|
||||
executionsBucket: bucketExecutions(acc.executions),
|
||||
distinctUsersBucket: bucketDistinctUsers(acc.distinctUsers),
|
||||
|
|
@ -329,7 +251,7 @@ function toPatternsInput(parsedTemplates: ParsedTemplate[]): StagedPatternsInput
|
|||
.map(({ template, tablesTouched }) => ({
|
||||
id: template.templateId,
|
||||
canonicalSql: template.canonicalSql,
|
||||
tablesTouched: [...tablesTouched].sort(),
|
||||
tablesTouched: [...tablesTouched].sort((left, right) => tableRefKey(left).localeCompare(tableRefKey(right))),
|
||||
executionsBucket: bucketExecutions(template.stats.executions),
|
||||
distinctUsersBucket: bucketDistinctUsers(template.stats.distinctUsers),
|
||||
dialect: template.dialect,
|
||||
|
|
@ -340,7 +262,6 @@ function toPatternsInput(parsedTemplates: ParsedTemplate[]): StagedPatternsInput
|
|||
|
||||
export async function stageHistoricSqlAggregatedSnapshot(input: StageHistoricSqlAggregatedSnapshotInput): Promise<void> {
|
||||
const config = historicSqlUnifiedPullConfigSchema.parse(input.pullConfig);
|
||||
const enabledTableFilter = buildEnabledTableFilter(config.enabledTables);
|
||||
const redactors = compileHistoricSqlRedactionPatterns(config.redactionPatterns);
|
||||
const now = input.now ?? new Date();
|
||||
const windowStart = new Date(now.getTime() - historicSqlWindowDays(config) * 24 * 60 * 60 * 1000);
|
||||
|
|
@ -356,11 +277,25 @@ export async function stageHistoricSqlAggregatedSnapshot(input: StageHistoricSql
|
|||
}
|
||||
}
|
||||
|
||||
const analysis = await input.sqlAnalysis.analyzeBatch(
|
||||
snapshot.map((template) => ({ id: template.templateId, sql: template.canonicalSql })),
|
||||
config.dialect,
|
||||
);
|
||||
const warnings: string[] = [];
|
||||
const analysisItems = snapshot.map((template) => ({ id: template.templateId, sql: template.canonicalSql }));
|
||||
const analysisOptions =
|
||||
config.modeledTableCatalog.length > 0 ? { catalog: { tables: config.modeledTableCatalog } } : undefined;
|
||||
const warnings: string[] = [
|
||||
...config.scopeFloorWarnings,
|
||||
...(shouldFailOpenQueryHistoryScope(config) ? ['query_history_scope_floor_disabled:empty_modeled_scope'] : []),
|
||||
];
|
||||
let scopeDisabledByQualificationFailure = false;
|
||||
let analysis: Awaited<ReturnType<SqlAnalysisPort['analyzeBatch']>>;
|
||||
try {
|
||||
analysis = await input.sqlAnalysis.analyzeBatch(analysisItems, config.dialect, analysisOptions);
|
||||
} catch (error) {
|
||||
if (!analysisOptions || config.enabledTables.length > 0 || isQueryHistoryScopeFloorDisabled(config)) {
|
||||
throw error;
|
||||
}
|
||||
warnings.push('query_history_scope_floor_disabled:catalog_qualification_failed');
|
||||
scopeDisabledByQualificationFailure = true;
|
||||
analysis = await input.sqlAnalysis.analyzeBatch(analysisItems, config.dialect, undefined);
|
||||
}
|
||||
const parsedTemplates: ParsedTemplate[] = [];
|
||||
for (const template of snapshot) {
|
||||
const parsed = analysis.get(template.templateId);
|
||||
|
|
@ -368,8 +303,12 @@ export async function stageHistoricSqlAggregatedSnapshot(input: StageHistoricSql
|
|||
warnings.push(`parse_failed:${template.templateId}`);
|
||||
continue;
|
||||
}
|
||||
const tablesTouched = [...new Set(parsed.tablesTouched)].filter((table) => table.length > 0).sort();
|
||||
const includedTables = tablesTouched.filter((table) => isEnabledTable(table, enabledTableFilter));
|
||||
const tablesTouched = [...new Map(parsed.tablesTouched.map((ref) => [tableRefKey(ref), ref])).values()]
|
||||
.filter((ref) => ref.name.length > 0)
|
||||
.sort((left, right) => tableRefKey(left).localeCompare(tableRefKey(right)));
|
||||
const includedTables = scopeDisabledByQualificationFailure
|
||||
? [...tablesTouched]
|
||||
: includedQueryHistoryTableRefs(tablesTouched, config);
|
||||
if (includedTables.length === 0) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -383,24 +322,23 @@ export async function stageHistoricSqlAggregatedSnapshot(input: StageHistoricSql
|
|||
});
|
||||
}
|
||||
|
||||
canonicalizeTableIdentifiers(parsedTemplates);
|
||||
|
||||
const byTable = new Map<string, TableAccumulator>();
|
||||
const byTable = new Map<KtxTableRefKey, TableAccumulator>();
|
||||
for (const parsed of parsedTemplates) {
|
||||
for (const table of parsed.includedTables) {
|
||||
const acc = byTable.get(table) ?? accumulatorFor(table);
|
||||
for (const tableRef of parsed.includedTables) {
|
||||
const key = tableRefKey(tableRef);
|
||||
const acc = byTable.get(key) ?? accumulatorFor(tableRef);
|
||||
addTemplate(acc, parsed);
|
||||
byTable.set(table, acc);
|
||||
byTable.set(key, acc);
|
||||
}
|
||||
}
|
||||
|
||||
await mkdir(input.stagedDir, { recursive: true });
|
||||
for (const [table, acc] of [...byTable.entries()].sort(([left], [right]) => left.localeCompare(right))) {
|
||||
await writeJson(input.stagedDir, `tables/${table}.json`, toStagedTable(acc, now));
|
||||
for (const [, acc] of [...byTable.entries()].sort((left, right) => left[0].localeCompare(right[0]))) {
|
||||
await writeJson(input.stagedDir, `tables/${acc.table}.json`, toStagedTable(acc, now));
|
||||
}
|
||||
const patternsInput = toPatternsInput(parsedTemplates);
|
||||
const patternInputSplit = splitHistoricSqlPatternInputs(patternsInput);
|
||||
const allWarnings = [...warnings, ...patternInputSplit.warnings];
|
||||
const allWarnings = [...new Set([...warnings, ...patternInputSplit.warnings])];
|
||||
await writeJson(input.stagedDir, 'patterns-input.json', patternInputSplit.auditInput);
|
||||
for (const shard of patternInputSplit.shards) {
|
||||
await writeJson(input.stagedDir, shard.path, shard.input);
|
||||
|
|
|
|||
|
|
@ -8,9 +8,22 @@ export type HistoricSqlDialect = z.infer<typeof historicSqlDialectSchema>;
|
|||
|
||||
const filterModeSchema = z.enum(['exclude', 'include', 'mark-only']);
|
||||
|
||||
const ktxTableRefSchema = z.object({
|
||||
catalog: z.string().nullable(),
|
||||
db: z.string().nullable(),
|
||||
name: z.string().min(1),
|
||||
}).strict();
|
||||
|
||||
const ktxTableRefWithColumnsSchema = ktxTableRefSchema.extend({
|
||||
columns: z.array(z.string().min(1)).optional(),
|
||||
}).strict();
|
||||
|
||||
const historicSqlCommonPullConfigSchema = z.object({
|
||||
minExecutions: z.number().int().nonnegative().default(5),
|
||||
enabledTables: z.array(z.string().min(1)).default([]),
|
||||
enabledTables: z.array(ktxTableRefSchema).default([]),
|
||||
enabledSchemas: z.array(z.string().min(1)).default([]),
|
||||
modeledTableCatalog: z.array(ktxTableRefWithColumnsSchema).default([]),
|
||||
scopeFloorWarnings: z.array(z.string()).default([]),
|
||||
filters: z.object({
|
||||
serviceAccounts: z.object({
|
||||
patterns: z.array(z.string()).default([]),
|
||||
|
|
@ -68,6 +81,7 @@ export type AggregatedTemplate = z.infer<typeof aggregatedTemplateSchema>;
|
|||
|
||||
export const stagedTableInputSchema = z.object({
|
||||
table: z.string().min(1),
|
||||
tableRef: ktxTableRefSchema,
|
||||
stats: z.object({
|
||||
executionsBucket: z.string(),
|
||||
distinctUsersBucket: z.string(),
|
||||
|
|
@ -93,7 +107,7 @@ export const stagedPatternsInputSchema = z.object({
|
|||
templates: z.array(z.object({
|
||||
id: z.string(),
|
||||
canonicalSql: z.string(),
|
||||
tablesTouched: z.array(z.string()),
|
||||
tablesTouched: z.array(ktxTableRefSchema),
|
||||
executionsBucket: z.string(),
|
||||
distinctUsersBucket: z.string(),
|
||||
dialect: historicSqlDialectSchema,
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import { DbtSourceAdapter } from './adapters/dbt/dbt.adapter.js';
|
|||
import { FakeSourceAdapter } from './adapters/fake/fake.adapter.js';
|
||||
import { HistoricSqlSourceAdapter } from './adapters/historic-sql/historic-sql.adapter.js';
|
||||
import { PostgresPgssReader } from './adapters/historic-sql/postgres-pgss-reader.js';
|
||||
import { resolveQueryHistoryScopeFloor } from './adapters/historic-sql/scope-floor.js';
|
||||
import {
|
||||
HISTORIC_SQL_SOURCE_KEY,
|
||||
historicSqlUnifiedPullConfigSchema,
|
||||
|
|
@ -179,12 +180,39 @@ function queryHistoryRecord(connection: unknown): Record<string, unknown> | null
|
|||
return queryHistory;
|
||||
}
|
||||
|
||||
function queryHistoryPullConfig(connection: unknown): Record<string, unknown> | null {
|
||||
async function queryHistoryPullConfig(
|
||||
project: KtxLocalProject,
|
||||
connectionId: string,
|
||||
connection: unknown,
|
||||
): Promise<Record<string, unknown> | null> {
|
||||
const queryHistory = queryHistoryRecord(connection);
|
||||
if (queryHistory?.enabled !== true || !isRecord(connection)) return null;
|
||||
const dialect = historicSqlDialectByDriver.get(String(connection.driver ?? '').toLowerCase());
|
||||
const driver = String(connection.driver ?? '').toLowerCase();
|
||||
const dialect = historicSqlDialectByDriver.get(driver);
|
||||
if (!dialect) return null;
|
||||
return { ...queryHistory, dialect };
|
||||
const scopeFloor = await resolveQueryHistoryScopeFloor({
|
||||
projectDir: project.projectDir,
|
||||
connectionId,
|
||||
driver,
|
||||
connection,
|
||||
storedQueryHistory: queryHistory,
|
||||
});
|
||||
const {
|
||||
enabled: _enabled,
|
||||
dialect: _dialect,
|
||||
enabledTables: _enabledTables,
|
||||
enabledSchemas: _enabledSchemas,
|
||||
scopeFloorWarnings: _scopeFloorWarnings,
|
||||
...stored
|
||||
} = queryHistory;
|
||||
return {
|
||||
...stored,
|
||||
dialect,
|
||||
...(scopeFloor.enabledTables.length > 0 ? { enabledTables: scopeFloor.enabledTables } : {}),
|
||||
...(scopeFloor.enabledSchemas.length > 0 ? { enabledSchemas: scopeFloor.enabledSchemas } : {}),
|
||||
...(scopeFloor.modeledTableCatalog.length > 0 ? { modeledTableCatalog: scopeFloor.modeledTableCatalog } : {}),
|
||||
...(scopeFloor.warnings.length > 0 ? { scopeFloorWarnings: scopeFloor.warnings } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function stringField(value: unknown): string | null {
|
||||
|
|
@ -245,7 +273,7 @@ export async function localPullConfigForAdapter(
|
|||
if (options.historicSqlPullConfigOverride) {
|
||||
return historicSqlUnifiedPullConfigSchema.parse(options.historicSqlPullConfigOverride);
|
||||
}
|
||||
const queryHistory = queryHistoryPullConfig(connection);
|
||||
const queryHistory = await queryHistoryPullConfig(project, connectionId, connection);
|
||||
if (!queryHistory) {
|
||||
throw new Error(`Connection "${connectionId}" does not have context.queryHistory.enabled: true`);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
import { request as httpRequest } from 'node:http';
|
||||
import { request as httpsRequest } from 'node:https';
|
||||
import { URL } from 'node:url';
|
||||
import type { KtxTableRef } from '../scan/types.js';
|
||||
import type {
|
||||
SqlAnalysisBatchItem,
|
||||
SqlAnalysisBatchOptions,
|
||||
SqlAnalysisBatchResult,
|
||||
SqlAnalysisDialect,
|
||||
SqlAnalysisFingerprintResult,
|
||||
|
|
@ -89,6 +91,14 @@ function optionalString(raw: Record<string, unknown>, field: string): string | n
|
|||
throw new Error(`sql analysis response has invalid optional string field ${field}`);
|
||||
}
|
||||
|
||||
function optionalNullableStringField(raw: Record<string, unknown>, field: string): string | null {
|
||||
const value = raw[field];
|
||||
if (value === null || value === undefined || typeof value === 'string') {
|
||||
return value ?? null;
|
||||
}
|
||||
throw new Error(`sql analysis response has invalid optional nullable string field ${field}`);
|
||||
}
|
||||
|
||||
function requiredStringArray(raw: Record<string, unknown>, field: string): string[] {
|
||||
const value = raw[field];
|
||||
if (!Array.isArray(value) || value.some((item) => typeof item !== 'string')) {
|
||||
|
|
@ -175,10 +185,34 @@ function mapColumnsByClause(raw: Record<string, unknown>): SqlAnalysisBatchResul
|
|||
return result;
|
||||
}
|
||||
|
||||
function requiredTableRef(raw: unknown, field: string): KtxTableRef {
|
||||
if (!raw || typeof raw !== 'object' || Array.isArray(raw)) {
|
||||
throw new Error(`sql analysis response contains invalid table ref in ${field}`);
|
||||
}
|
||||
const record = raw as Record<string, unknown>;
|
||||
const name = record.name;
|
||||
if (typeof name !== 'string' || name.length === 0) {
|
||||
throw new Error(`sql analysis response table ref in ${field} is missing name`);
|
||||
}
|
||||
return {
|
||||
catalog: optionalNullableStringField(record, 'catalog'),
|
||||
db: optionalNullableStringField(record, 'db'),
|
||||
name,
|
||||
};
|
||||
}
|
||||
|
||||
function requiredTableRefArray(raw: Record<string, unknown>, field: string): KtxTableRef[] {
|
||||
const value = raw[field];
|
||||
if (!Array.isArray(value)) {
|
||||
throw new Error(`sql analysis response is missing table-ref[] field ${field}`);
|
||||
}
|
||||
return value.map((item, index) => requiredTableRef(item, `${field}.${index}`));
|
||||
}
|
||||
|
||||
function mapBatchResult(raw: Record<string, unknown>): SqlAnalysisBatchResult {
|
||||
const error = optionalString(raw, 'error');
|
||||
return {
|
||||
tablesTouched: requiredStringArray(raw, 'tables_touched'),
|
||||
tablesTouched: requiredTableRefArray(raw, 'tables_touched'),
|
||||
columnsByClause: mapColumnsByClause(raw),
|
||||
...(error !== undefined ? { error } : {}),
|
||||
};
|
||||
|
|
@ -215,10 +249,11 @@ export function createHttpSqlAnalysisPort(options: HttpSqlAnalysisPortOptions):
|
|||
});
|
||||
return mapResult(raw);
|
||||
},
|
||||
async analyzeBatch(items: SqlAnalysisBatchItem[], dialect: SqlAnalysisDialect) {
|
||||
async analyzeBatch(items: SqlAnalysisBatchItem[], dialect: SqlAnalysisDialect, options?: SqlAnalysisBatchOptions) {
|
||||
const raw = await requestJson('/sql/analyze-batch', {
|
||||
dialect,
|
||||
items,
|
||||
...(options?.catalog ? { catalog: options.catalog } : {}),
|
||||
});
|
||||
return mapBatchResponse(raw);
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
import type { KtxTableRef } from '../scan/types.js';
|
||||
|
||||
export type SqlAnalysisDialect =
|
||||
| 'bigquery'
|
||||
| 'snowflake'
|
||||
|
|
@ -32,8 +34,20 @@ export interface SqlAnalysisBatchItem {
|
|||
sql: string;
|
||||
}
|
||||
|
||||
interface SqlAnalysisCatalogTable extends KtxTableRef {
|
||||
columns?: string[];
|
||||
}
|
||||
|
||||
interface SqlAnalysisCatalog {
|
||||
tables: SqlAnalysisCatalogTable[];
|
||||
}
|
||||
|
||||
export interface SqlAnalysisBatchOptions {
|
||||
catalog?: SqlAnalysisCatalog;
|
||||
}
|
||||
|
||||
export interface SqlAnalysisBatchResult {
|
||||
tablesTouched: string[];
|
||||
tablesTouched: KtxTableRef[];
|
||||
columnsByClause: Partial<Record<SqlAnalysisClause, string[]>>;
|
||||
error?: string | null;
|
||||
}
|
||||
|
|
@ -48,6 +62,7 @@ export interface SqlAnalysisPort {
|
|||
analyzeBatch(
|
||||
items: SqlAnalysisBatchItem[],
|
||||
dialect: SqlAnalysisDialect,
|
||||
options?: SqlAnalysisBatchOptions,
|
||||
): Promise<Map<string, SqlAnalysisBatchResult>>;
|
||||
validateReadOnly(sql: string, dialect: SqlAnalysisDialect): Promise<SqlReadOnlyValidationResult>;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ import { BigQueryHistoricSqlQueryHistoryReader } from './context/ingest/adapters
|
|||
import { historicSqlDialectForConnectionDriver } from './context/ingest/adapters/historic-sql/connection-dialect.js';
|
||||
import { createDaemonLiveDatabaseIntrospection } from './context/ingest/adapters/live-database/daemon-introspection.js';
|
||||
import { createDefaultLocalIngestAdapters, type DefaultLocalIngestAdaptersOptions } from './context/ingest/local-adapters.js';
|
||||
import type { HistoricSqlReader } from './context/ingest/adapters/historic-sql/types.js';
|
||||
import type { HistoricSqlDialect, HistoricSqlReader } from './context/ingest/adapters/historic-sql/types.js';
|
||||
import type {
|
||||
LiveDatabaseIntrospectionOptions,
|
||||
LiveDatabaseIntrospectionPort,
|
||||
|
|
@ -31,7 +31,7 @@ import {
|
|||
createManagedDaemonLookerTableIdentifierParser,
|
||||
createManagedDaemonSqlAnalysisPort,
|
||||
managedDaemonDatabaseIntrospectionOptions,
|
||||
type ManagedPythonCoreDaemonOptions,
|
||||
type ManagedPythonDaemonHttpOptions,
|
||||
} from './managed-python-http.js';
|
||||
import type { KtxOperationalLogger } from './io/logger.js';
|
||||
import { resolveKtxConfigReference } from './context/core/config-reference.js';
|
||||
|
|
@ -161,10 +161,17 @@ export interface KtxCliLocalIngestAdaptersOptions extends DefaultLocalIngestAdap
|
|||
historicSqlConnectionId?: string;
|
||||
sqlAnalysis?: SqlAnalysisPort;
|
||||
sqlAnalysisUrl?: string;
|
||||
managedDaemon?: ManagedPythonCoreDaemonOptions;
|
||||
managedDaemon?: ManagedPythonDaemonHttpOptions;
|
||||
logger?: KtxOperationalLogger;
|
||||
}
|
||||
|
||||
export interface KtxCliHistoricSqlRuntime {
|
||||
dialect: HistoricSqlDialect;
|
||||
sqlAnalysis: SqlAnalysisPort;
|
||||
reader: HistoricSqlReader;
|
||||
queryClient: unknown;
|
||||
}
|
||||
|
||||
function createEphemeralPostgresHistoricSqlClient(project: KtxLocalProject, connectionId: string) {
|
||||
const connection = project.config.connections[connectionId] as KtxPostgresConnectionConfig | undefined;
|
||||
const inputDriver = connection?.driver ?? 'unknown';
|
||||
|
|
@ -262,7 +269,10 @@ function bigQueryRegion(connection: KtxBigQueryConnectionConfig): string {
|
|||
: 'us';
|
||||
}
|
||||
|
||||
function historicSqlOptionsForLocalRun(project: KtxLocalProject, options: KtxCliLocalIngestAdaptersOptions) {
|
||||
function historicSqlOptionsForLocalRun(
|
||||
project: KtxLocalProject,
|
||||
options: KtxCliLocalIngestAdaptersOptions,
|
||||
): KtxCliHistoricSqlRuntime | undefined {
|
||||
const connectionId = options.historicSqlConnectionId;
|
||||
if (!connectionId) {
|
||||
return undefined;
|
||||
|
|
@ -285,6 +295,7 @@ function historicSqlOptionsForLocalRun(project: KtxLocalProject, options: KtxCli
|
|||
if (dialect === 'postgres') {
|
||||
return {
|
||||
...base,
|
||||
dialect,
|
||||
reader: new PostgresPgssReader() satisfies HistoricSqlReader,
|
||||
queryClient: createEphemeralPostgresHistoricSqlClient(project, connectionId),
|
||||
};
|
||||
|
|
@ -297,6 +308,7 @@ function historicSqlOptionsForLocalRun(project: KtxLocalProject, options: KtxCli
|
|||
}
|
||||
return {
|
||||
...base,
|
||||
dialect,
|
||||
reader: new BigQueryHistoricSqlQueryHistoryReader({
|
||||
projectId: bigQueryProjectId(connection, process.env),
|
||||
region: bigQueryRegion(connection),
|
||||
|
|
@ -307,6 +319,7 @@ function historicSqlOptionsForLocalRun(project: KtxLocalProject, options: KtxCli
|
|||
|
||||
return {
|
||||
...base,
|
||||
dialect,
|
||||
reader: new SnowflakeHistoricSqlQueryHistoryReader() satisfies HistoricSqlReader,
|
||||
queryClient: {
|
||||
async executeQuery(query: string) {
|
||||
|
|
@ -318,11 +331,24 @@ function historicSqlOptionsForLocalRun(project: KtxLocalProject, options: KtxCli
|
|||
};
|
||||
}
|
||||
|
||||
export function createKtxCliHistoricSqlRuntime(
|
||||
project: KtxLocalProject,
|
||||
connectionId: string,
|
||||
options: KtxCliLocalIngestAdaptersOptions = {},
|
||||
): KtxCliHistoricSqlRuntime | undefined {
|
||||
return historicSqlOptionsForLocalRun(project, {
|
||||
...options,
|
||||
historicSqlConnectionId: connectionId,
|
||||
});
|
||||
}
|
||||
|
||||
export function createKtxCliLocalIngestAdapters(
|
||||
project: KtxLocalProject,
|
||||
options: KtxCliLocalIngestAdaptersOptions = {},
|
||||
): SourceAdapter[] {
|
||||
const historicSql = historicSqlOptionsForLocalRun(project, options);
|
||||
const historicSql = options.historicSqlConnectionId
|
||||
? createKtxCliHistoricSqlRuntime(project, options.historicSqlConnectionId, options)
|
||||
: undefined;
|
||||
const base = createDefaultLocalIngestAdapters(project, {
|
||||
...options,
|
||||
databaseIntrospection: ktxCliDaemonDatabaseIntrospectionOptions(options),
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import type { KtxProgressPort } from './context/scan/types.js';
|
|||
import type { KtxCliIo } from './index.js';
|
||||
import type { KtxIngestArgs, KtxIngestDeps, KtxIngestProgressUpdate } from './ingest.js';
|
||||
import { isDatabaseDriver, normalizeConnectionDriver } from './connection-drivers.js';
|
||||
import { resolveQueryHistoryScopeFloor } from './context/ingest/adapters/historic-sql/scope-floor.js';
|
||||
import {
|
||||
ensureManagedPythonCommandRuntime,
|
||||
type KtxManagedPythonInstallPolicy,
|
||||
|
|
@ -19,6 +20,7 @@ import {
|
|||
import { createAggregateProgressPort } from './progress-port-adapter.js';
|
||||
import { resolvePublicIngestRuntimeRequirements } from './runtime-requirements.js';
|
||||
import type { KtxScanArgs, KtxScanDeps } from './scan.js';
|
||||
import type { KtxTableRef } from './context/scan/types.js';
|
||||
import { profileMark } from './startup-profile.js';
|
||||
import { isDemoConnection } from './telemetry/demo-detect.js';
|
||||
import { emitProjectStackSnapshot, emitTelemetryEvent } from './telemetry/index.js';
|
||||
|
|
@ -281,26 +283,35 @@ function positiveInteger(value: unknown): number | undefined {
|
|||
return typeof value === 'number' && Number.isInteger(value) && value > 0 ? value : undefined;
|
||||
}
|
||||
|
||||
function enabledTablesForConnection(connection: KtxProjectConnectionConfig): string[] | undefined {
|
||||
const raw = connection.enabled_tables;
|
||||
if (!Array.isArray(raw)) {
|
||||
return undefined;
|
||||
}
|
||||
const tables = raw.filter((value): value is string => typeof value === 'string' && value.trim().length > 0);
|
||||
return tables.length > 0 ? tables : undefined;
|
||||
}
|
||||
|
||||
function queryHistoryPullConfig(input: {
|
||||
/** @internal */
|
||||
export function queryHistoryPullConfig(input: {
|
||||
stored: Record<string, unknown>;
|
||||
dialect: HistoricSqlDialect;
|
||||
windowDays?: number;
|
||||
enabledTables?: string[];
|
||||
enabledTables?: KtxTableRef[];
|
||||
enabledSchemas?: string[];
|
||||
modeledTableCatalog?: KtxTableRef[];
|
||||
scopeFloorWarnings?: string[];
|
||||
}): Record<string, unknown> {
|
||||
const { enabled: _enabled, dialect: _dialect, ...storedConfig } = input.stored;
|
||||
const {
|
||||
enabled: _enabled,
|
||||
dialect: _dialect,
|
||||
enabledTables: _enabledTables,
|
||||
enabledSchemas: _enabledSchemas,
|
||||
scopeFloorWarnings: _scopeFloorWarnings,
|
||||
...storedConfig
|
||||
} = input.stored;
|
||||
return {
|
||||
...storedConfig,
|
||||
dialect: input.dialect,
|
||||
...(input.enabledTables ? { enabledTables: input.enabledTables } : {}),
|
||||
...(input.enabledTables && input.enabledTables.length > 0 ? { enabledTables: input.enabledTables } : {}),
|
||||
...(input.enabledSchemas && input.enabledSchemas.length > 0 ? { enabledSchemas: input.enabledSchemas } : {}),
|
||||
...(input.modeledTableCatalog && input.modeledTableCatalog.length > 0
|
||||
? { modeledTableCatalog: input.modeledTableCatalog }
|
||||
: {}),
|
||||
...(input.scopeFloorWarnings && input.scopeFloorWarnings.length > 0
|
||||
? { scopeFloorWarnings: input.scopeFloorWarnings }
|
||||
: {}),
|
||||
...(input.windowDays !== undefined ? { windowDays: input.windowDays } : {}),
|
||||
};
|
||||
}
|
||||
|
|
@ -361,7 +372,6 @@ function resolveDatabaseTargetOptions(input: {
|
|||
stored: storedQh,
|
||||
dialect,
|
||||
windowDays: queryHistory.windowDays,
|
||||
enabledTables: enabledTablesForConnection(input.connection),
|
||||
}),
|
||||
},
|
||||
steps: ['database-schema', 'query-history'],
|
||||
|
|
@ -374,6 +384,43 @@ function resolveDatabaseTargetOptions(input: {
|
|||
};
|
||||
}
|
||||
|
||||
async function resolvedQueryHistoryPullConfigForTarget(
|
||||
target: KtxPublicIngestPlanTarget,
|
||||
project: KtxPublicIngestProject,
|
||||
): Promise<Record<string, unknown> | null> {
|
||||
if (target.operation !== 'database-ingest' || target.queryHistory?.enabled !== true || !target.queryHistory.dialect) {
|
||||
return null;
|
||||
}
|
||||
const connection = project.config.connections[target.connectionId];
|
||||
if (!connection) {
|
||||
return (
|
||||
target.queryHistory.pullConfig ??
|
||||
queryHistoryPullConfig({
|
||||
stored: {},
|
||||
dialect: target.queryHistory.dialect,
|
||||
windowDays: target.queryHistory.windowDays,
|
||||
})
|
||||
);
|
||||
}
|
||||
const stored = storedQueryHistory(connection);
|
||||
const scopeFloor = await resolveQueryHistoryScopeFloor({
|
||||
projectDir: project.projectDir,
|
||||
connectionId: target.connectionId,
|
||||
driver: target.driver,
|
||||
connection: connection as Record<string, unknown>,
|
||||
storedQueryHistory: stored,
|
||||
});
|
||||
return queryHistoryPullConfig({
|
||||
stored,
|
||||
dialect: target.queryHistory.dialect,
|
||||
windowDays: target.queryHistory.windowDays,
|
||||
enabledTables: scopeFloor.enabledTables,
|
||||
enabledSchemas: scopeFloor.enabledSchemas,
|
||||
modeledTableCatalog: scopeFloor.modeledTableCatalog,
|
||||
scopeFloorWarnings: scopeFloor.warnings,
|
||||
});
|
||||
}
|
||||
|
||||
function enrichmentReadinessGaps(config: KtxProjectConfig): string[] {
|
||||
const gaps: string[] = [];
|
||||
if (config.llm.provider.backend === 'none' || !config.llm.models.default) {
|
||||
|
|
@ -877,7 +924,7 @@ export async function executePublicIngestTarget(
|
|||
project: KtxPublicIngestProject,
|
||||
): Promise<KtxPublicIngestTargetResult> {
|
||||
const startedAt = performance.now();
|
||||
const result = await runIngestTargetSteps(target, args, io, deps);
|
||||
const result = await runIngestTargetSteps(target, args, io, deps, project);
|
||||
// `io` may be a capture buffer for the scan/ingest step output; the telemetry
|
||||
// debug echo belongs on the real user-facing stream, which callers expose as
|
||||
// `deps.runtimeIo` (falling back to `io` when the step io is already real).
|
||||
|
|
@ -890,6 +937,7 @@ async function runIngestTargetSteps(
|
|||
args: Extract<KtxPublicIngestArgs, { command: 'run' }>,
|
||||
io: KtxCliIo,
|
||||
deps: KtxPublicIngestDeps,
|
||||
project: KtxPublicIngestProject,
|
||||
): Promise<KtxPublicIngestTargetResult> {
|
||||
if (target.preflightFailure) {
|
||||
if (target.operation === 'database-ingest') {
|
||||
|
|
@ -959,6 +1007,11 @@ async function runIngestTargetSteps(
|
|||
if (target.queryHistory?.enabled === true) {
|
||||
const { runKtxIngest } = await import('./ingest.js');
|
||||
const runIngest = deps.runIngest ?? runKtxIngest;
|
||||
const historicSqlPullConfigOverride =
|
||||
(await resolvedQueryHistoryPullConfigForTarget(target, project)) ?? {
|
||||
dialect: target.queryHistory.dialect,
|
||||
...(target.queryHistory.windowDays !== undefined ? { windowDays: target.queryHistory.windowDays } : {}),
|
||||
};
|
||||
const ingestArgs: KtxIngestArgs = {
|
||||
command: 'run',
|
||||
projectDir: args.projectDir,
|
||||
|
|
@ -969,11 +1022,7 @@ async function runIngestTargetSteps(
|
|||
...(args.cliVersion ? { cliVersion: args.cliVersion } : {}),
|
||||
...(args.runtimeInstallPolicy ? { runtimeInstallPolicy: args.runtimeInstallPolicy } : {}),
|
||||
allowImplicitAdapter: true,
|
||||
historicSqlPullConfigOverride:
|
||||
target.queryHistory.pullConfig ?? {
|
||||
dialect: target.queryHistory.dialect,
|
||||
...(target.queryHistory.windowDays !== undefined ? { windowDays: target.queryHistory.windowDays } : {}),
|
||||
},
|
||||
historicSqlPullConfigOverride,
|
||||
};
|
||||
// Query history runs after the schema scan has already written its report
|
||||
// into the shared target io, so it needs a phase-local capture. Reusing
|
||||
|
|
|
|||
|
|
@ -4,7 +4,15 @@ import { delimiter, dirname, join } from 'node:path';
|
|||
import { fileURLToPath } from 'node:url';
|
||||
import { promisify } from 'node:util';
|
||||
import { getDriverRegistration } from './context/connections/drivers.js';
|
||||
import { createLocalKtxLlmRuntimeFromConfig } from './context/llm/local-config.js';
|
||||
import type { KtxLlmRuntimePort } from './context/llm/runtime-port.js';
|
||||
import { queryHistoryDialectForConnection } from './context/ingest/adapters/historic-sql/connection-dialect.js';
|
||||
import {
|
||||
proposeQueryHistoryServiceAccountFilters,
|
||||
type ProposeQueryHistoryServiceAccountFiltersInput,
|
||||
type QueryHistoryFilterProposal,
|
||||
} from './context/ingest/adapters/historic-sql/query-history-filter-picker.js';
|
||||
import { resolveQueryHistoryScopeFloor } from './context/ingest/adapters/historic-sql/scope-floor.js';
|
||||
import type { HistoricSqlDialect } from './context/ingest/adapters/historic-sql/types.js';
|
||||
import {
|
||||
runHistoricSqlReadinessProbe,
|
||||
|
|
@ -15,7 +23,7 @@ import { type KtxProjectConnectionConfig, serializeKtxProjectConfig } from './co
|
|||
import { loadKtxProject } from './context/project/project.js';
|
||||
import { markKtxSetupStateStepComplete, setKtxSetupDatabaseConnectionIds } from './context/project/setup-config.js';
|
||||
import type { KtxTableListEntry } from './context/scan/types.js';
|
||||
import type { KtxCliIo } from './cli-runtime.js';
|
||||
import { getKtxCliPackageInfo, type KtxCliIo } from './cli-runtime.js';
|
||||
import {
|
||||
errorMessage,
|
||||
flushPrefixedBufferedCommandOutput,
|
||||
|
|
@ -35,6 +43,10 @@ import {
|
|||
type PickDatabaseScopeArgs,
|
||||
} from './database-tree-picker.js';
|
||||
import { withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js';
|
||||
import { createKtxCliHistoricSqlRuntime } from './local-adapters.js';
|
||||
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
|
||||
import type { ManagedPythonCoreDaemonOptions } from './managed-python-http.js';
|
||||
import { queryHistoryPullConfig } from './public-ingest.js';
|
||||
import { runKtxScan } from './scan.js';
|
||||
import { writeProjectLocalSecretReference } from './setup-secrets.js';
|
||||
import { isDemoConnection } from './telemetry/demo-detect.js';
|
||||
|
|
@ -61,6 +73,9 @@ export type KtxSetupDatabaseDriver =
|
|||
export interface KtxSetupDatabasesArgs {
|
||||
projectDir: string;
|
||||
inputMode: 'auto' | 'disabled';
|
||||
yes?: boolean;
|
||||
cliVersion?: string;
|
||||
runtimeInstallPolicy?: KtxManagedPythonInstallPolicy;
|
||||
databaseDrivers?: KtxSetupDatabaseDriver[];
|
||||
databaseConnectionIds?: string[];
|
||||
databaseConnectionId?: string;
|
||||
|
|
@ -123,6 +138,13 @@ export interface KtxSetupDatabasesDeps {
|
|||
listTables?: (projectDir: string, connectionId: string, schemas?: string[]) => Promise<KtxTableListEntry[]>;
|
||||
pickDatabaseScope?: (args: PickDatabaseScopeArgs, io: KtxCliIo) => Promise<DatabaseScopePickResult>;
|
||||
historicSqlReadinessProbe?: HistoricSqlReadinessProbe;
|
||||
queryHistoryFilterPicker?: (
|
||||
input: ProposeQueryHistoryServiceAccountFiltersInput,
|
||||
) => Promise<QueryHistoryFilterProposal>;
|
||||
createQueryHistoryLlmRuntime?: (
|
||||
projectDir: string,
|
||||
project: Awaited<ReturnType<typeof loadKtxProject>>,
|
||||
) => KtxLlmRuntimePort | null;
|
||||
}
|
||||
|
||||
const DRIVER_OPTIONS: Array<{ value: KtxSetupDatabaseDriver; label: string }> = [
|
||||
|
|
@ -947,10 +969,14 @@ async function maybeApplyHistoricSqlConfig(input: {
|
|||
return withQueryHistoryConfig(input.connection, { ...existing, enabled: false });
|
||||
}
|
||||
|
||||
const existingFilters =
|
||||
existing.filters && typeof existing.filters === 'object' && !Array.isArray(existing.filters)
|
||||
? (existing.filters as Record<string, unknown>)
|
||||
: {};
|
||||
const common: Record<string, unknown> = {
|
||||
...existing,
|
||||
enabled: true,
|
||||
filters: historicSqlFiltersForSetup(input.args.queryHistoryServiceAccountPatterns),
|
||||
filters: historicSqlFiltersForSetup(input.args.queryHistoryServiceAccountPatterns, existingFilters),
|
||||
};
|
||||
|
||||
if (dialect === 'postgres') {
|
||||
|
|
@ -967,9 +993,13 @@ async function maybeApplyHistoricSqlConfig(input: {
|
|||
});
|
||||
}
|
||||
|
||||
function historicSqlFiltersForSetup(patterns: string[] | undefined) {
|
||||
function historicSqlFiltersForSetup(
|
||||
patterns: string[] | undefined,
|
||||
existingFilters: Record<string, unknown> = {},
|
||||
) {
|
||||
const serviceAccountPatterns = patterns ?? [];
|
||||
return {
|
||||
...existingFilters,
|
||||
dropTrivialProbes: true,
|
||||
...(serviceAccountPatterns.length > 0
|
||||
? {
|
||||
|
|
@ -1587,6 +1617,189 @@ async function maybeRunHistoricSqlSetupProbe(input: {
|
|||
return result.ok;
|
||||
}
|
||||
|
||||
function hasServiceAccountsBlock(connection: KtxProjectConnectionConfig | undefined): boolean {
|
||||
const queryHistory = queryHistoryConfigRecord(connection);
|
||||
const filters = queryHistory?.filters;
|
||||
if (!filters || typeof filters !== 'object' || Array.isArray(filters)) {
|
||||
return false;
|
||||
}
|
||||
return 'serviceAccounts' in filters;
|
||||
}
|
||||
|
||||
function printQueryHistoryFilterProposal(io: KtxCliIo, proposal: QueryHistoryFilterProposal): void {
|
||||
if (proposal.excludedRoles.length === 0) {
|
||||
if (proposal.skipped?.reason === 'no-llm') {
|
||||
io.stdout.write('│ Query-history filter picker skipped: no LLM is configured.\n');
|
||||
} else if (proposal.skipped?.reason === 'no-daemon') {
|
||||
io.stdout.write('│ Query-history filter picker skipped: SQL analysis is unavailable.\n');
|
||||
} else if (proposal.skipped?.reason === 'no-in-scope-history') {
|
||||
io.stdout.write('│ Query-history filter picker found no in-scope service-account exclusions.\n');
|
||||
}
|
||||
for (const warning of proposal.warnings) {
|
||||
io.stdout.write(`│ ! ${warning}\n`);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
io.stdout.write('│ Proposed query-history service-account filters:\n');
|
||||
for (const excluded of proposal.excludedRoles) {
|
||||
io.stdout.write(`│ - ${excluded.role}: ${excluded.reason}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
async function shouldApplyQueryHistoryFilterProposal(input: {
|
||||
args: KtxSetupDatabasesArgs;
|
||||
prompts: KtxSetupDatabasesPromptAdapter;
|
||||
proposal: QueryHistoryFilterProposal;
|
||||
}): Promise<boolean> {
|
||||
if (input.proposal.excludedRoles.length === 0 || input.proposal.skipped?.reason === 'user-block-present') {
|
||||
return false;
|
||||
}
|
||||
if (input.args.yes === true || input.args.inputMode === 'disabled') {
|
||||
return true;
|
||||
}
|
||||
const choice = await input.prompts.select({
|
||||
message: `Apply ${input.proposal.excludedRoles.length} derived query-history service-account exclusion${
|
||||
input.proposal.excludedRoles.length === 1 ? '' : 's'
|
||||
}?`,
|
||||
options: [
|
||||
{ value: 'apply', label: 'Apply derived filters (recommended)' },
|
||||
{ value: 'skip', label: 'Leave query history filters unchanged' },
|
||||
],
|
||||
});
|
||||
return choice === 'apply';
|
||||
}
|
||||
|
||||
function createSetupQueryHistoryLlmRuntime(input: {
|
||||
projectDir: string;
|
||||
project: Awaited<ReturnType<typeof loadKtxProject>>;
|
||||
deps: KtxSetupDatabasesDeps;
|
||||
}): KtxLlmRuntimePort | null {
|
||||
try {
|
||||
return (
|
||||
input.deps.createQueryHistoryLlmRuntime?.(input.projectDir, input.project) ??
|
||||
createLocalKtxLlmRuntimeFromConfig(input.project.config.llm, {
|
||||
projectDir: input.projectDir,
|
||||
})
|
||||
);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function managedDaemonOptionsForSetupQueryHistoryPicker(input: {
|
||||
projectDir: string;
|
||||
args: Pick<KtxSetupDatabasesArgs, 'cliVersion' | 'runtimeInstallPolicy' | 'inputMode'>;
|
||||
io: KtxCliIo;
|
||||
}): ManagedPythonCoreDaemonOptions {
|
||||
return {
|
||||
cliVersion: input.args.cliVersion ?? getKtxCliPackageInfo().version,
|
||||
projectDir: input.projectDir,
|
||||
installPolicy: input.args.runtimeInstallPolicy ?? (input.args.inputMode === 'disabled' ? 'never' : 'prompt'),
|
||||
io: input.io,
|
||||
};
|
||||
}
|
||||
|
||||
async function maybeProposeQueryHistoryFilters(input: {
|
||||
projectDir: string;
|
||||
connectionId: string;
|
||||
io: KtxCliIo;
|
||||
deps: KtxSetupDatabasesDeps;
|
||||
args: KtxSetupDatabasesArgs;
|
||||
prompts: KtxSetupDatabasesPromptAdapter;
|
||||
}): Promise<void> {
|
||||
const project = await loadKtxProject({ projectDir: input.projectDir });
|
||||
const connection = project.config.connections[input.connectionId];
|
||||
const queryHistory = queryHistoryConfigRecord(connection);
|
||||
if (!connection || queryHistory?.enabled !== true) {
|
||||
return;
|
||||
}
|
||||
const dialect = queryHistoryDialectForConnection(connection);
|
||||
if (!dialect) {
|
||||
return;
|
||||
}
|
||||
|
||||
const picker = input.deps.queryHistoryFilterPicker ?? proposeQueryHistoryServiceAccountFilters;
|
||||
const llmRuntime = createSetupQueryHistoryLlmRuntime({
|
||||
projectDir: input.projectDir,
|
||||
project,
|
||||
deps: input.deps,
|
||||
});
|
||||
if (!llmRuntime && !input.deps.queryHistoryFilterPicker) {
|
||||
printQueryHistoryFilterProposal(input.io, {
|
||||
excludedRoles: [],
|
||||
consideredRoleCount: 0,
|
||||
skipped: { reason: 'no-llm' },
|
||||
warnings: [],
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const runtime = createKtxCliHistoricSqlRuntime(project, input.connectionId, {
|
||||
managedDaemon: managedDaemonOptionsForSetupQueryHistoryPicker({
|
||||
projectDir: input.projectDir,
|
||||
args: input.args,
|
||||
io: input.io,
|
||||
}),
|
||||
});
|
||||
if (!runtime) {
|
||||
return;
|
||||
}
|
||||
const userServiceAccountsPresent = hasServiceAccountsBlock(connection);
|
||||
const scopeFloor = await resolveQueryHistoryScopeFloor({
|
||||
projectDir: input.projectDir,
|
||||
connectionId: input.connectionId,
|
||||
driver: String(connection.driver ?? ''),
|
||||
connection: connection as Record<string, unknown>,
|
||||
storedQueryHistory: queryHistory,
|
||||
});
|
||||
const pullConfig = queryHistoryPullConfig({
|
||||
stored: queryHistory,
|
||||
dialect,
|
||||
enabledTables: scopeFloor.enabledTables,
|
||||
enabledSchemas: scopeFloor.enabledSchemas,
|
||||
modeledTableCatalog: scopeFloor.modeledTableCatalog,
|
||||
scopeFloorWarnings: scopeFloor.warnings,
|
||||
});
|
||||
const proposal = await picker({
|
||||
connectionId: input.connectionId,
|
||||
dialect,
|
||||
queryClient: runtime.queryClient,
|
||||
reader: runtime.reader,
|
||||
sqlAnalysis: runtime.sqlAnalysis,
|
||||
llmRuntime,
|
||||
pullConfig,
|
||||
userServiceAccountsPresent,
|
||||
});
|
||||
|
||||
printQueryHistoryFilterProposal(input.io, proposal);
|
||||
if (proposal.skipped?.reason === 'user-block-present') {
|
||||
input.io.stdout.write('│ Existing query-history service-account filters left unchanged.\n');
|
||||
return;
|
||||
}
|
||||
if (!(await shouldApplyQueryHistoryFilterProposal({ args: input.args, prompts: input.prompts, proposal }))) {
|
||||
return;
|
||||
}
|
||||
|
||||
await writeConnectionConfig({
|
||||
projectDir: input.projectDir,
|
||||
connectionId: input.connectionId,
|
||||
connection: withQueryHistoryConfig(connection, {
|
||||
...queryHistory,
|
||||
filters: {
|
||||
...(queryHistory.filters && typeof queryHistory.filters === 'object' && !Array.isArray(queryHistory.filters)
|
||||
? queryHistory.filters
|
||||
: {}),
|
||||
serviceAccounts: {
|
||||
mode: 'exclude',
|
||||
patterns: proposal.excludedRoles.map((role) => role.pattern),
|
||||
},
|
||||
},
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
async function applyHistoricSqlConfigToExistingConnection(input: {
|
||||
projectDir: string;
|
||||
connectionId: string;
|
||||
|
|
@ -1725,6 +1938,16 @@ async function validateAndScanConnection(input: {
|
|||
`Schema context complete for ${input.connectionId}`,
|
||||
[`Changes: ${summarizeScanChanges(scanOutput)}`],
|
||||
);
|
||||
if (queryHistoryAvailable) {
|
||||
await maybeProposeQueryHistoryFilters({
|
||||
projectDir: input.projectDir,
|
||||
connectionId: input.connectionId,
|
||||
io: input.io,
|
||||
deps: input.deps,
|
||||
args: input.args,
|
||||
prompts: input.prompts,
|
||||
});
|
||||
}
|
||||
writeSetupSection(input.io, 'Database ready', [
|
||||
`${input.connectionId} · ${driverDisplay} · schema context complete`,
|
||||
]);
|
||||
|
|
|
|||
|
|
@ -735,6 +735,9 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
|
|||
{
|
||||
projectDir: projectResult.projectDir,
|
||||
inputMode: args.inputMode,
|
||||
yes: args.yes,
|
||||
cliVersion: args.cliVersion,
|
||||
runtimeInstallPolicy: setupRuntimeInstallPolicy(args),
|
||||
...(args.databaseDrivers ? { databaseDrivers: args.databaseDrivers } : {}),
|
||||
...(args.databaseConnectionIds ? { databaseConnectionIds: args.databaseConnectionIds } : {}),
|
||||
...(args.databaseConnectionId ? { databaseConnectionId: args.databaseConnectionId } : {}),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue