mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
feat(query-history): scope mining to modeled schemas by default (#258)
* feat(query-history): structure SQL analysis table refs * feat(query-history): qualify SQL analysis table refs * feat(query-history): wire modeled scope floor through ingest * chore(query-history): verify scope floor * test(query-history): align daemon SQL batch endpoint contract * feat(query-history): build scope from same-run scan catalog * feat(query-history): fail open on scope-floor catalog failures * chore(query-history): verify scope-floor v1 closure * refactor(query-history): share scope membership * feat(setup): apply derived query history filters * docs: document derived query history filters * fix(query-history): redact filter picker LLM prompt SQL * fix(setup): run filter picker SQL analysis through managed daemon * chore(query-history): verify filter picker v1 closure * fix(query-history): fail open on partial service-account attribution * fix(query-history): aggregate BigQuery users by execution count * fix(query-history): aggregate Snowflake users by execution count * fix(query-history): use BigQuery query info hash
This commit is contained in:
parent
ce1516b357
commit
e70ae1e63b
42 changed files with 3090 additions and 274 deletions
|
|
@ -148,6 +148,13 @@ fix the prerequisite. If the later schema-context build also fails, interactive
|
||||||
setup offers **Disable query history and retry** so you can finish database
|
setup offers **Disable query history and retry** so you can finish database
|
||||||
setup with `connections.<id>.context.queryHistory.enabled: false`.
|
setup with `connections.<id>.context.queryHistory.enabled: false`.
|
||||||
|
|
||||||
|
After the schema scan completes, setup can derive query-history service-account
|
||||||
|
filters from in-scope history. If **ktx** finds clear operational roles, it
|
||||||
|
prints each proposed exclusion with a reason and writes
|
||||||
|
`connections.<id>.context.queryHistory.filters.serviceAccounts` only when you
|
||||||
|
apply the proposal. In non-interactive setup with `--yes`, the proposal is
|
||||||
|
applied automatically. Existing `serviceAccounts` blocks are never overwritten.
|
||||||
|
|
||||||
For BigQuery, the remediation tells you to grant `roles/bigquery.resourceViewer`
|
For BigQuery, the remediation tells you to grant `roles/bigquery.resourceViewer`
|
||||||
on the BigQuery project, or grant a custom role that contains
|
on the BigQuery project, or grant a custom role that contains
|
||||||
`bigquery.jobs.listAll`.
|
`bigquery.jobs.listAll`.
|
||||||
|
|
|
||||||
|
|
@ -179,9 +179,22 @@ connections:
|
||||||
context:
|
context:
|
||||||
queryHistory:
|
queryHistory:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
enabledSchemas:
|
||||||
|
- orbit_raw
|
||||||
|
- orbit_analytics
|
||||||
minExecutions: 5
|
minExecutions: 5
|
||||||
```
|
```
|
||||||
|
|
||||||
|
- `enabledSchemas`: Optional list of schema or dataset names that query-history
|
||||||
|
ingest may mine. Omit it to let **ktx** derive the modeled schema floor from
|
||||||
|
the connection and semantic-layer sources. Use `["*"]` to disable the floor
|
||||||
|
for discovery runs.
|
||||||
|
- `filters.serviceAccounts`: Optional service-account filter block. During
|
||||||
|
setup, when query history is enabled and no service-account block already
|
||||||
|
exists, **ktx** can propose exact role patterns such as `^svc_loader$` from
|
||||||
|
observed in-scope query history. The block uses `mode: exclude` and remains
|
||||||
|
hand-editable.
|
||||||
|
|
||||||
### Metabase
|
### Metabase
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,10 @@ isolation.
|
||||||
## Query history
|
## Query history
|
||||||
|
|
||||||
PostgreSQL, BigQuery, and Snowflake can add query-history context: common joins,
|
PostgreSQL, BigQuery, and Snowflake can add query-history context: common joins,
|
||||||
filters, service-account patterns, redaction rules, and high-usage templates.
|
filters, redaction rules, high-usage templates, and service-account exclusions.
|
||||||
|
When query history is enabled during setup, **ktx** reviews observed in-scope
|
||||||
|
roles and can write exact `filters.serviceAccounts` patterns for operational
|
||||||
|
traffic such as loader or refresh roles.
|
||||||
|
|
||||||
Enable it during setup, store it under `connections.<id>.context.queryHistory`,
|
Enable it during setup, store it under `connections.<id>.context.queryHistory`,
|
||||||
or request it for one run:
|
or request it for one run:
|
||||||
|
|
|
||||||
|
|
@ -200,27 +200,78 @@ export class BigQueryHistoricSqlQueryHistoryReader {
|
||||||
config: HistoricSqlUnifiedPullConfig,
|
config: HistoricSqlUnifiedPullConfig,
|
||||||
): AsyncIterable<AggregatedTemplate> {
|
): AsyncIterable<AggregatedTemplate> {
|
||||||
const sql = `
|
const sql = `
|
||||||
|
WITH filtered_jobs AS (
|
||||||
|
SELECT
|
||||||
|
COALESCE(query_info.query_hashes.normalized_literals, TO_HEX(SHA256(query))) AS template_id,
|
||||||
|
query,
|
||||||
|
user_email,
|
||||||
|
creation_time,
|
||||||
|
end_time,
|
||||||
|
error_result
|
||||||
|
FROM ${this.viewPath}
|
||||||
|
WHERE job_type = 'QUERY'
|
||||||
|
AND statement_type IN ('SELECT', 'MERGE')
|
||||||
|
AND creation_time >= ${timestampExpression(window.start)}
|
||||||
|
AND creation_time < ${timestampExpression(window.end)}
|
||||||
|
AND query IS NOT NULL
|
||||||
|
),
|
||||||
|
template_stats AS (
|
||||||
|
SELECT
|
||||||
|
template_id,
|
||||||
|
MIN(query) AS canonical_sql,
|
||||||
|
COUNT(*) AS executions,
|
||||||
|
COUNT(DISTINCT user_email) AS distinct_users,
|
||||||
|
MIN(creation_time) AS first_seen,
|
||||||
|
MAX(creation_time) AS last_seen,
|
||||||
|
APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND), 100)[OFFSET(50)] AS p50_ms,
|
||||||
|
APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND), 100)[OFFSET(95)] AS p95_ms,
|
||||||
|
SAFE_DIVIDE(COUNTIF(error_result IS NOT NULL), COUNT(*)) AS error_rate,
|
||||||
|
CAST(NULL AS INT64) AS rows_produced
|
||||||
|
FROM filtered_jobs
|
||||||
|
GROUP BY template_id
|
||||||
|
HAVING COUNT(*) >= ${config.minExecutions}
|
||||||
|
),
|
||||||
|
template_users AS (
|
||||||
|
SELECT
|
||||||
|
template_id,
|
||||||
|
user_email AS user,
|
||||||
|
COUNT(*) AS executions,
|
||||||
|
MAX(creation_time) AS last_seen
|
||||||
|
FROM filtered_jobs
|
||||||
|
GROUP BY template_id, user_email
|
||||||
|
)
|
||||||
SELECT
|
SELECT
|
||||||
query_hash AS template_id,
|
stats.template_id,
|
||||||
MIN(query) AS canonical_sql,
|
stats.canonical_sql,
|
||||||
COUNT(*) AS executions,
|
stats.executions,
|
||||||
COUNT(DISTINCT user_email) AS distinct_users,
|
stats.distinct_users,
|
||||||
MIN(creation_time) AS first_seen,
|
stats.first_seen,
|
||||||
MAX(creation_time) AS last_seen,
|
stats.last_seen,
|
||||||
APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND), 100)[OFFSET(50)] AS p50_ms,
|
stats.p50_ms,
|
||||||
APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND), 100)[OFFSET(95)] AS p95_ms,
|
stats.p95_ms,
|
||||||
SAFE_DIVIDE(COUNTIF(error_result IS NOT NULL), COUNT(*)) AS error_rate,
|
stats.error_rate,
|
||||||
CAST(NULL AS INT64) AS rows_produced,
|
stats.rows_produced,
|
||||||
TO_JSON_STRING(ARRAY_AGG(STRUCT(user_email AS user, 1 AS executions) ORDER BY creation_time DESC LIMIT 5)) AS top_users
|
TO_JSON_STRING(
|
||||||
FROM ${this.viewPath}
|
ARRAY_AGG(
|
||||||
WHERE job_type = 'QUERY'
|
STRUCT(users.user AS user, users.executions AS executions)
|
||||||
AND statement_type IN ('SELECT', 'MERGE')
|
ORDER BY users.executions DESC, users.last_seen DESC
|
||||||
AND creation_time >= ${timestampExpression(window.start)}
|
)
|
||||||
AND creation_time < ${timestampExpression(window.end)}
|
) AS top_users
|
||||||
AND query IS NOT NULL
|
FROM template_stats AS stats
|
||||||
GROUP BY query_hash
|
JOIN template_users AS users
|
||||||
HAVING COUNT(*) >= ${config.minExecutions}
|
ON users.template_id = stats.template_id
|
||||||
ORDER BY executions DESC`.trim();
|
GROUP BY
|
||||||
|
stats.template_id,
|
||||||
|
stats.canonical_sql,
|
||||||
|
stats.executions,
|
||||||
|
stats.distinct_users,
|
||||||
|
stats.first_seen,
|
||||||
|
stats.last_seen,
|
||||||
|
stats.p50_ms,
|
||||||
|
stats.p95_ms,
|
||||||
|
stats.error_rate,
|
||||||
|
stats.rows_produced
|
||||||
|
ORDER BY stats.executions DESC`.trim();
|
||||||
const result = await queryClient(client).executeQuery(sql);
|
const result = await queryClient(client).executeQuery(sql);
|
||||||
if (result.error) {
|
if (result.error) {
|
||||||
throw grantsError(result.error);
|
throw grantsError(result.error);
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
import { createHash } from 'node:crypto';
|
import { createHash } from 'node:crypto';
|
||||||
import { readFile, readdir } from 'node:fs/promises';
|
import { readFile, readdir } from 'node:fs/promises';
|
||||||
import { join, relative } from 'node:path';
|
import { join, relative } from 'node:path';
|
||||||
|
import { tableRefKey } from '../../../scan/table-ref.js';
|
||||||
import type { ChunkResult, DiffSet, ScopeDescriptor, WorkUnit } from '../../types.js';
|
import type { ChunkResult, DiffSet, ScopeDescriptor, WorkUnit } from '../../types.js';
|
||||||
import { isHistoricSqlPatternInputShardPath } from './pattern-inputs.js';
|
import { isHistoricSqlPatternInputShardPath } from './pattern-inputs.js';
|
||||||
import { stagedManifestSchema, stagedPatternsInputSchema, stagedTableInputSchema } from './types.js';
|
import { stagedManifestSchema, stagedPatternsInputSchema, stagedTableInputSchema } from './types.js';
|
||||||
|
|
@ -37,7 +38,7 @@ export async function chunkHistoricSqlUnifiedStagedDir(stagedDir: string, diffSe
|
||||||
}
|
}
|
||||||
const table = stagedTableInputSchema.parse(await readJson(stagedDir, path));
|
const table = stagedTableInputSchema.parse(await readJson(stagedDir, path));
|
||||||
workUnits.push({
|
workUnits.push({
|
||||||
unitKey: `historic-sql-table-${safeUnitKey(table.table)}`,
|
unitKey: `historic-sql-table-${safeUnitKey(tableRefKey(table.tableRef))}`,
|
||||||
displayLabel: `Historic SQL usage: ${table.table}`,
|
displayLabel: `Historic SQL usage: ${table.table}`,
|
||||||
rawFiles: [path],
|
rawFiles: [path],
|
||||||
dependencyPaths: ['manifest.json'],
|
dependencyPaths: ['manifest.json'],
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
import { Buffer } from 'node:buffer';
|
import { Buffer } from 'node:buffer';
|
||||||
|
import { tableRefKey } from '../../../scan/table-ref.js';
|
||||||
import type { StagedPatternsInput } from './types.js';
|
import type { StagedPatternsInput } from './types.js';
|
||||||
|
|
||||||
const HISTORIC_SQL_PATTERN_WORKUNIT_DIR = 'patterns-input';
|
const HISTORIC_SQL_PATTERN_WORKUNIT_DIR = 'patterns-input';
|
||||||
|
|
@ -44,11 +45,16 @@ function sortedAuditTemplates(templates: readonly PatternTemplate[]): PatternTem
|
||||||
function sortedPatternCandidates(templates: readonly PatternTemplate[]): PatternTemplate[] {
|
function sortedPatternCandidates(templates: readonly PatternTemplate[]): PatternTemplate[] {
|
||||||
return [...templates]
|
return [...templates]
|
||||||
.filter((template) => template.tablesTouched.length >= 2)
|
.filter((template) => template.tablesTouched.length >= 2)
|
||||||
.map((template) => ({ ...template, tablesTouched: [...template.tablesTouched].sort() }))
|
.map((template) => ({
|
||||||
|
...template,
|
||||||
|
tablesTouched: [...template.tablesTouched].sort((left, right) => tableRefKey(left).localeCompare(tableRefKey(right))),
|
||||||
|
}))
|
||||||
.sort((left, right) => {
|
.sort((left, right) => {
|
||||||
const cardinality = right.tablesTouched.length - left.tablesTouched.length;
|
const cardinality = right.tablesTouched.length - left.tablesTouched.length;
|
||||||
if (cardinality !== 0) return cardinality;
|
if (cardinality !== 0) return cardinality;
|
||||||
const tableSignature = left.tablesTouched.join('\0').localeCompare(right.tablesTouched.join('\0'));
|
const leftSignature = left.tablesTouched.map(tableRefKey).join('\0');
|
||||||
|
const rightSignature = right.tablesTouched.map(tableRefKey).join('\0');
|
||||||
|
const tableSignature = leftSignature.localeCompare(rightSignature);
|
||||||
if (tableSignature !== 0) return tableSignature;
|
if (tableSignature !== 0) return tableSignature;
|
||||||
return left.id.localeCompare(right.id);
|
return left.id.localeCompare(right.id);
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,278 @@
|
||||||
|
import { z } from 'zod';
|
||||||
|
import type { KtxLlmRuntimePort } from '../../../../context/llm/runtime-port.js';
|
||||||
|
import type { SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
|
||||||
|
import { tableRefKey } from '../../../scan/table-ref.js';
|
||||||
|
import type { KtxTableRef } from '../../../scan/types.js';
|
||||||
|
import { bucketDistinctUsers, bucketExecutions, bucketRecency } from './buckets.js';
|
||||||
|
import {
|
||||||
|
compileHistoricSqlRedactionPatterns,
|
||||||
|
redactHistoricSqlText,
|
||||||
|
type HistoricSqlRedactionPattern,
|
||||||
|
} from './redaction.js';
|
||||||
|
import { includedQueryHistoryTableRefs } from './scope-membership.js';
|
||||||
|
import {
|
||||||
|
aggregatedTemplateSchema,
|
||||||
|
historicSqlUnifiedPullConfigSchema,
|
||||||
|
type AggregatedTemplate,
|
||||||
|
type HistoricSqlDialect,
|
||||||
|
type HistoricSqlReader,
|
||||||
|
} from './types.js';
|
||||||
|
|
||||||
|
export interface QueryHistoryFilterProposal {
|
||||||
|
excludedRoles: Array<{ role: string; reason: string; pattern: string }>;
|
||||||
|
consideredRoleCount: number;
|
||||||
|
skipped: { reason: 'no-llm' | 'no-daemon' | 'no-in-scope-history' | 'user-block-present' } | null;
|
||||||
|
warnings: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ProposeQueryHistoryServiceAccountFiltersInput {
|
||||||
|
connectionId: string;
|
||||||
|
dialect: HistoricSqlDialect;
|
||||||
|
queryClient: unknown;
|
||||||
|
reader: HistoricSqlReader;
|
||||||
|
sqlAnalysis: SqlAnalysisPort;
|
||||||
|
llmRuntime: KtxLlmRuntimePort | null;
|
||||||
|
pullConfig: unknown;
|
||||||
|
now?: Date;
|
||||||
|
userServiceAccountsPresent?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ParsedTemplateForPicker {
|
||||||
|
template: AggregatedTemplate;
|
||||||
|
tablesTouched: KtxTableRef[];
|
||||||
|
includedTables: KtxTableRef[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface RoleAccumulator {
|
||||||
|
role: string;
|
||||||
|
executions: number;
|
||||||
|
distinctUsers: number;
|
||||||
|
lastSeen: string;
|
||||||
|
tables: Map<string, KtxTableRef>;
|
||||||
|
templates: AggregatedTemplate[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface QueryHistoryRoleRecord {
|
||||||
|
role: string;
|
||||||
|
inScopeTables: string[];
|
||||||
|
executionsBucket: string;
|
||||||
|
distinctUsersBucket: string;
|
||||||
|
recencyBucket: string;
|
||||||
|
representativeTemplates: Array<{ id: string; canonicalSql: string; dialect: HistoricSqlDialect }>;
|
||||||
|
}
|
||||||
|
|
||||||
|
const queryHistoryFilterAdjudicationSchema = z.object({
|
||||||
|
roles: z.array(
|
||||||
|
z.object({
|
||||||
|
role: z.string().min(1),
|
||||||
|
exclude: z.boolean(),
|
||||||
|
reason: z.string().min(1),
|
||||||
|
}).strict(),
|
||||||
|
),
|
||||||
|
}).strict();
|
||||||
|
|
||||||
|
type QueryHistoryFilterAdjudication = z.infer<typeof queryHistoryFilterAdjudicationSchema>;
|
||||||
|
|
||||||
|
function emptyProposal(skipped: QueryHistoryFilterProposal['skipped'], warnings: string[] = []): QueryHistoryFilterProposal {
|
||||||
|
return { excludedRoles: [], consideredRoleCount: 0, skipped, warnings };
|
||||||
|
}
|
||||||
|
|
||||||
|
function displayTableRef(ref: KtxTableRef): string {
|
||||||
|
return [ref.catalog, ref.db, ref.name].filter((part): part is string => !!part && part.length > 0).join('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
function redactTemplateSqlForPicker(
|
||||||
|
template: AggregatedTemplate,
|
||||||
|
redactors: readonly HistoricSqlRedactionPattern[],
|
||||||
|
): AggregatedTemplate {
|
||||||
|
if (redactors.length === 0) {
|
||||||
|
return template;
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
...template,
|
||||||
|
canonicalSql: redactHistoricSqlText(template.canonicalSql, redactors),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @internal */
|
||||||
|
export function regexEscapeForExactRolePattern(role: string): string {
|
||||||
|
return `^${role.replace(/[\\^$.*+?()[\]{}|]/g, '\\$&')}$`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function recordRole(
|
||||||
|
acc: RoleAccumulator,
|
||||||
|
template: AggregatedTemplate,
|
||||||
|
tables: readonly KtxTableRef[],
|
||||||
|
executions: number,
|
||||||
|
): void {
|
||||||
|
acc.executions += executions;
|
||||||
|
acc.distinctUsers = Math.max(acc.distinctUsers, template.stats.distinctUsers);
|
||||||
|
acc.lastSeen = template.stats.lastSeen > acc.lastSeen ? template.stats.lastSeen : acc.lastSeen;
|
||||||
|
for (const table of tables) {
|
||||||
|
acc.tables.set(tableRefKey(table), table);
|
||||||
|
}
|
||||||
|
acc.templates.push(template);
|
||||||
|
}
|
||||||
|
|
||||||
|
function roleRecords(parsedTemplates: readonly ParsedTemplateForPicker[], now: Date): QueryHistoryRoleRecord[] {
|
||||||
|
const byRole = new Map<string, RoleAccumulator>();
|
||||||
|
for (const parsed of parsedTemplates) {
|
||||||
|
for (const entry of parsed.template.topUsers) {
|
||||||
|
if (!entry.user || entry.user.trim().length === 0 || entry.executions <= 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const role = entry.user.trim();
|
||||||
|
const acc =
|
||||||
|
byRole.get(role) ??
|
||||||
|
{
|
||||||
|
role,
|
||||||
|
executions: 0,
|
||||||
|
distinctUsers: 0,
|
||||||
|
lastSeen: '1970-01-01T00:00:00.000Z',
|
||||||
|
tables: new Map<string, KtxTableRef>(),
|
||||||
|
templates: [],
|
||||||
|
};
|
||||||
|
recordRole(acc, parsed.template, parsed.includedTables, entry.executions);
|
||||||
|
byRole.set(role, acc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return [...byRole.values()]
|
||||||
|
.sort((left, right) => right.executions - left.executions || left.role.localeCompare(right.role))
|
||||||
|
.map((acc) => ({
|
||||||
|
role: acc.role,
|
||||||
|
inScopeTables: [...acc.tables.entries()]
|
||||||
|
.sort(([left], [right]) => left.localeCompare(right))
|
||||||
|
.slice(0, 25)
|
||||||
|
.map(([, ref]) => displayTableRef(ref)),
|
||||||
|
executionsBucket: bucketExecutions(acc.executions),
|
||||||
|
distinctUsersBucket: bucketDistinctUsers(acc.distinctUsers),
|
||||||
|
recencyBucket: bucketRecency(acc.lastSeen, now),
|
||||||
|
representativeTemplates: [...acc.templates]
|
||||||
|
.sort((left, right) => right.stats.executions - left.stats.executions || left.templateId.localeCompare(right.templateId))
|
||||||
|
.slice(0, 3)
|
||||||
|
.map((template) => ({
|
||||||
|
id: template.templateId,
|
||||||
|
canonicalSql: template.canonicalSql,
|
||||||
|
dialect: template.dialect,
|
||||||
|
})),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
function adjudicationSystemPrompt(): string {
|
||||||
|
return [
|
||||||
|
'You are helping ktx decide whether observed query-history roles are operational service accounts.',
|
||||||
|
'Default every role to keep. Mark exclude true only when the aggregate evidence clearly shows loader, ELT, reverse-ETL, export, refresh, or maintenance traffic rather than analyst or BI-dashboard usage.',
|
||||||
|
'Use only the observed role records. Do not rely on a hardcoded denylist. Return structured output only.',
|
||||||
|
].join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function proposeQueryHistoryServiceAccountFilters(
|
||||||
|
input: ProposeQueryHistoryServiceAccountFiltersInput,
|
||||||
|
): Promise<QueryHistoryFilterProposal> {
|
||||||
|
if (!input.llmRuntime) {
|
||||||
|
return emptyProposal({ reason: 'no-llm' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const config = historicSqlUnifiedPullConfigSchema.parse(input.pullConfig);
|
||||||
|
const redactors = compileHistoricSqlRedactionPatterns(config.redactionPatterns);
|
||||||
|
const now = input.now ?? new Date();
|
||||||
|
const windowDays = 'windowDays' in config ? config.windowDays : 90;
|
||||||
|
const windowStart = new Date(now.getTime() - windowDays * 24 * 60 * 60 * 1000);
|
||||||
|
const warnings: string[] = [];
|
||||||
|
const snapshot: AggregatedTemplate[] = [];
|
||||||
|
|
||||||
|
try {
|
||||||
|
for await (const row of input.reader.fetchAggregated(input.queryClient, { start: windowStart, end: now }, config)) {
|
||||||
|
snapshot.push(aggregatedTemplateSchema.parse(row));
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
return emptyProposal(null, [
|
||||||
|
`query_history_filter_picker_read_failed:${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (snapshot.length === 0) {
|
||||||
|
return emptyProposal({ reason: 'no-in-scope-history' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const analysisItems = snapshot.map((template) => ({ id: template.templateId, sql: template.canonicalSql }));
|
||||||
|
const analysisOptions =
|
||||||
|
config.modeledTableCatalog.length > 0 ? { catalog: { tables: config.modeledTableCatalog } } : undefined;
|
||||||
|
let analysis: Awaited<ReturnType<SqlAnalysisPort['analyzeBatch']>>;
|
||||||
|
try {
|
||||||
|
analysis = await input.sqlAnalysis.analyzeBatch(analysisItems, input.dialect, analysisOptions);
|
||||||
|
} catch (error) {
|
||||||
|
return emptyProposal({ reason: 'no-daemon' }, [
|
||||||
|
`query_history_filter_picker_analysis_failed:${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
const parsedTemplates: ParsedTemplateForPicker[] = [];
|
||||||
|
for (const template of snapshot) {
|
||||||
|
const parsed = analysis.get(template.templateId);
|
||||||
|
if (!parsed || parsed.error) {
|
||||||
|
warnings.push(`query_history_filter_picker_parse_failed:${template.templateId}`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const tablesTouched = [...new Map(parsed.tablesTouched.map((ref) => [tableRefKey(ref), ref])).values()]
|
||||||
|
.filter((ref) => ref.name.length > 0)
|
||||||
|
.sort((left, right) => tableRefKey(left).localeCompare(tableRefKey(right)));
|
||||||
|
const includedTables = includedQueryHistoryTableRefs(tablesTouched, config);
|
||||||
|
if (includedTables.length === 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
parsedTemplates.push({
|
||||||
|
template: redactTemplateSqlForPicker(template, redactors),
|
||||||
|
tablesTouched,
|
||||||
|
includedTables,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const records = roleRecords(parsedTemplates, now);
|
||||||
|
if (records.length <= 1) {
|
||||||
|
return {
|
||||||
|
excludedRoles: [],
|
||||||
|
consideredRoleCount: records.length,
|
||||||
|
skipped: { reason: 'no-in-scope-history' },
|
||||||
|
warnings,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
let generated: QueryHistoryFilterAdjudication;
|
||||||
|
try {
|
||||||
|
generated = await input.llmRuntime.generateObject<QueryHistoryFilterAdjudication, typeof queryHistoryFilterAdjudicationSchema>({
|
||||||
|
role: 'candidateExtraction',
|
||||||
|
system: adjudicationSystemPrompt(),
|
||||||
|
prompt: JSON.stringify({ connectionId: input.connectionId, dialect: input.dialect, roles: records }),
|
||||||
|
schema: queryHistoryFilterAdjudicationSchema,
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
excludedRoles: [],
|
||||||
|
consideredRoleCount: records.length,
|
||||||
|
skipped: { reason: 'no-llm' },
|
||||||
|
warnings: [
|
||||||
|
...warnings,
|
||||||
|
`query_history_filter_picker_llm_failed:${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const knownRoles = new Set(records.map((record) => record.role));
|
||||||
|
const excludedRoles = generated.roles
|
||||||
|
.filter((role) => role.exclude && knownRoles.has(role.role))
|
||||||
|
.sort((left, right) => left.role.localeCompare(right.role))
|
||||||
|
.map((role) => ({
|
||||||
|
role: role.role,
|
||||||
|
reason: role.reason,
|
||||||
|
pattern: regexEscapeForExactRolePattern(role.role),
|
||||||
|
}));
|
||||||
|
|
||||||
|
return {
|
||||||
|
excludedRoles,
|
||||||
|
consideredRoleCount: records.length,
|
||||||
|
skipped: input.userServiceAccountsPresent ? { reason: 'user-block-present' } : null,
|
||||||
|
warnings,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,260 @@
|
||||||
|
import type { Dirent } from 'node:fs';
|
||||||
|
import { access, readdir, readFile } from 'node:fs/promises';
|
||||||
|
import { join, relative } from 'node:path';
|
||||||
|
import YAML from 'yaml';
|
||||||
|
import { getDriverRegistration } from '../../../connections/drivers.js';
|
||||||
|
import { parseDottedTableEntry } from '../../../scan/enabled-tables.js';
|
||||||
|
import { tableRefKey, tableRefSet, type KtxTableRefKey } from '../../../scan/table-ref.js';
|
||||||
|
import type { KtxTableRef } from '../../../scan/types.js';
|
||||||
|
import { readLiveDatabaseTableFiles } from '../live-database/stage.js';
|
||||||
|
|
||||||
|
export interface QueryHistoryScopeFloorInput {
|
||||||
|
projectDir: string;
|
||||||
|
connectionId: string;
|
||||||
|
driver: string;
|
||||||
|
connection: Record<string, unknown>;
|
||||||
|
storedQueryHistory: Record<string, unknown>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface QueryHistoryScopeFloor {
|
||||||
|
enabledTables: KtxTableRef[];
|
||||||
|
enabledTableKeys: ReadonlySet<KtxTableRefKey> | null;
|
||||||
|
enabledSchemas: string[];
|
||||||
|
modeledTableCatalog: KtxTableRef[];
|
||||||
|
floorDisabled: boolean;
|
||||||
|
warnings: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||||
|
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
function stringArray(value: unknown): string[] {
|
||||||
|
return Array.isArray(value)
|
||||||
|
? value
|
||||||
|
.filter((item): item is string => typeof item === 'string' && item.trim().length > 0)
|
||||||
|
.map((item) => item.trim())
|
||||||
|
: [];
|
||||||
|
}
|
||||||
|
|
||||||
|
function tableRefsFromValues(values: unknown): KtxTableRef[] {
|
||||||
|
if (!Array.isArray(values)) return [];
|
||||||
|
return values.flatMap((value) => {
|
||||||
|
if (typeof value === 'string') {
|
||||||
|
const ref = parseDottedTableEntry(value);
|
||||||
|
return ref ? [ref] : [];
|
||||||
|
}
|
||||||
|
if (isRecord(value) && typeof value.name === 'string' && value.name.length > 0) {
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
catalog: typeof value.catalog === 'string' ? value.catalog : null,
|
||||||
|
db: typeof value.db === 'string' ? value.db : null,
|
||||||
|
name: value.name,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
}
|
||||||
|
return [];
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function declaredSchemas(driver: string, connection: Record<string, unknown>): string[] {
|
||||||
|
const key = getDriverRegistration(driver)?.scopeConfigKey;
|
||||||
|
if (!key) return [];
|
||||||
|
return [...new Set(stringArray(connection[key]))].sort();
|
||||||
|
}
|
||||||
|
|
||||||
|
function uniqueSortedTableRefs(refs: readonly KtxTableRef[]): KtxTableRef[] {
|
||||||
|
const byKey = new Map<KtxTableRefKey, KtxTableRef>();
|
||||||
|
for (const ref of refs) {
|
||||||
|
byKey.set(tableRefKey(ref), ref);
|
||||||
|
}
|
||||||
|
return [...byKey.entries()]
|
||||||
|
.sort(([left], [right]) => left.localeCompare(right))
|
||||||
|
.map(([, ref]) => ref);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function latestLiveDatabaseScanDir(projectDir: string, connectionId: string): Promise<string | null> {
|
||||||
|
const root = join(projectDir, 'raw-sources', connectionId, 'live-database');
|
||||||
|
let entries: Dirent[];
|
||||||
|
try {
|
||||||
|
entries = await readdir(root, { withFileTypes: true });
|
||||||
|
} catch (error) {
|
||||||
|
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') return null;
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
const syncDirs = entries
|
||||||
|
.filter((entry) => entry.isDirectory())
|
||||||
|
.map((entry) => entry.name)
|
||||||
|
.sort()
|
||||||
|
.reverse();
|
||||||
|
for (const syncDir of syncDirs) {
|
||||||
|
const absolute = join(root, syncDir);
|
||||||
|
try {
|
||||||
|
await access(join(absolute, 'connection.json'));
|
||||||
|
return absolute;
|
||||||
|
} catch {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scannedTableRefs(
|
||||||
|
projectDir: string,
|
||||||
|
connectionId: string,
|
||||||
|
): Promise<{ refs: KtxTableRef[]; catalogAvailable: boolean; warnings: string[] }> {
|
||||||
|
const scanDir = await latestLiveDatabaseScanDir(projectDir, connectionId);
|
||||||
|
if (!scanDir) {
|
||||||
|
return { refs: [], catalogAvailable: false, warnings: [] };
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const tableFiles = await readLiveDatabaseTableFiles(scanDir);
|
||||||
|
return {
|
||||||
|
refs: uniqueSortedTableRefs(
|
||||||
|
tableFiles.map(({ table }) => ({ catalog: table.catalog, db: table.db, name: table.name })),
|
||||||
|
),
|
||||||
|
catalogAvailable: true,
|
||||||
|
warnings: [],
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
refs: [],
|
||||||
|
catalogAvailable: false,
|
||||||
|
warnings: [
|
||||||
|
`query_history_scope_floor_catalog_read_failed:live_database_scan:${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function listYamlFiles(root: string): Promise<string[]> {
|
||||||
|
try {
|
||||||
|
const entries = await readdir(root, { withFileTypes: true, recursive: true });
|
||||||
|
return entries
|
||||||
|
.filter((entry) => entry.isFile() && /\.ya?ml$/i.test(entry.name))
|
||||||
|
.map((entry) => relative(root, join(entry.parentPath, entry.name)).replace(/\\/g, '/'))
|
||||||
|
.sort();
|
||||||
|
} catch (error) {
|
||||||
|
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') return [];
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function refsFromManifest(content: string): KtxTableRef[] {
|
||||||
|
const parsed = YAML.parse(content) as unknown;
|
||||||
|
if (!isRecord(parsed) || !isRecord(parsed.tables)) return [];
|
||||||
|
return Object.values(parsed.tables).flatMap((entry) => {
|
||||||
|
if (!isRecord(entry) || typeof entry.table !== 'string') return [];
|
||||||
|
const ref = parseDottedTableEntry(entry.table);
|
||||||
|
return ref ? [ref] : [];
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function refsFromStandaloneSource(content: string): KtxTableRef[] {
|
||||||
|
const parsed = YAML.parse(content) as unknown;
|
||||||
|
if (!isRecord(parsed) || typeof parsed.table !== 'string') return [];
|
||||||
|
const ref = parseDottedTableEntry(parsed.table);
|
||||||
|
return ref ? [ref] : [];
|
||||||
|
}
|
||||||
|
|
||||||
|
async function semanticTableRefs(
|
||||||
|
projectDir: string,
|
||||||
|
connectionId: string,
|
||||||
|
): Promise<{ refs: KtxTableRef[]; warnings: string[] }> {
|
||||||
|
const root = join(projectDir, 'semantic-layer', connectionId);
|
||||||
|
const files = await listYamlFiles(root);
|
||||||
|
const refs: KtxTableRef[] = [];
|
||||||
|
const warnings: string[] = [];
|
||||||
|
for (const file of files) {
|
||||||
|
try {
|
||||||
|
const content = await readFile(join(root, file), 'utf-8');
|
||||||
|
refs.push(...(file.startsWith('_schema/') ? refsFromManifest(content) : refsFromStandaloneSource(content)));
|
||||||
|
} catch (error) {
|
||||||
|
warnings.push(
|
||||||
|
`query_history_scope_floor_catalog_read_failed:${file}:${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return { refs: uniqueSortedTableRefs(refs), warnings };
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function resolveQueryHistoryScopeFloor(input: QueryHistoryScopeFloorInput): Promise<QueryHistoryScopeFloor> {
|
||||||
|
const explicitEnabledTables = [
|
||||||
|
...tableRefsFromValues(input.storedQueryHistory.enabledTables),
|
||||||
|
...tableRefsFromValues(input.connection.enabled_tables),
|
||||||
|
];
|
||||||
|
const semanticTables = await semanticTableRefs(input.projectDir, input.connectionId);
|
||||||
|
const scannedTables = await scannedTableRefs(input.projectDir, input.connectionId);
|
||||||
|
const modeledTables = uniqueSortedTableRefs([
|
||||||
|
...semanticTables.refs,
|
||||||
|
...scannedTables.refs,
|
||||||
|
...explicitEnabledTables,
|
||||||
|
]);
|
||||||
|
const warnings = [...semanticTables.warnings, ...scannedTables.warnings];
|
||||||
|
|
||||||
|
if (explicitEnabledTables.length > 0) {
|
||||||
|
return {
|
||||||
|
enabledTables: explicitEnabledTables,
|
||||||
|
enabledTableKeys: tableRefSet(explicitEnabledTables),
|
||||||
|
enabledSchemas: [],
|
||||||
|
modeledTableCatalog: modeledTables,
|
||||||
|
floorDisabled: false,
|
||||||
|
warnings,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const explicitSchemas = stringArray(input.storedQueryHistory.enabledSchemas);
|
||||||
|
if (explicitSchemas.includes('*')) {
|
||||||
|
return {
|
||||||
|
enabledTables: [],
|
||||||
|
enabledTableKeys: null,
|
||||||
|
enabledSchemas: ['*'],
|
||||||
|
modeledTableCatalog: modeledTables,
|
||||||
|
floorDisabled: true,
|
||||||
|
warnings,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (explicitSchemas.length > 0) {
|
||||||
|
if (!scannedTables.catalogAvailable || modeledTables.length === 0) {
|
||||||
|
return {
|
||||||
|
enabledTables: [],
|
||||||
|
enabledTableKeys: null,
|
||||||
|
enabledSchemas: ['*'],
|
||||||
|
modeledTableCatalog: modeledTables,
|
||||||
|
floorDisabled: true,
|
||||||
|
warnings: [...warnings, 'query_history_scope_floor_disabled:catalog_unavailable'],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
enabledTables: [],
|
||||||
|
enabledTableKeys: null,
|
||||||
|
enabledSchemas: [...new Set(explicitSchemas)].sort(),
|
||||||
|
modeledTableCatalog: modeledTables,
|
||||||
|
floorDisabled: false,
|
||||||
|
warnings,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const schemas = new Set(declaredSchemas(input.driver, input.connection));
|
||||||
|
for (const ref of semanticTables.refs) {
|
||||||
|
if (ref.db) schemas.add(ref.db);
|
||||||
|
}
|
||||||
|
if (schemas.size > 0 && (!scannedTables.catalogAvailable || modeledTables.length === 0)) {
|
||||||
|
return {
|
||||||
|
enabledTables: [],
|
||||||
|
enabledTableKeys: null,
|
||||||
|
enabledSchemas: ['*'],
|
||||||
|
modeledTableCatalog: modeledTables,
|
||||||
|
floorDisabled: true,
|
||||||
|
warnings: [...warnings, 'query_history_scope_floor_disabled:catalog_unavailable'],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
enabledTables: [],
|
||||||
|
enabledTableKeys: null,
|
||||||
|
enabledSchemas: [...schemas].sort(),
|
||||||
|
modeledTableCatalog: modeledTables,
|
||||||
|
floorDisabled: false,
|
||||||
|
warnings,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,45 @@
|
||||||
|
import { tableRefKey, tableRefSet } from '../../../scan/table-ref.js';
|
||||||
|
import type { KtxTableRef } from '../../../scan/types.js';
|
||||||
|
|
||||||
|
export interface QueryHistoryScopeMembershipConfig {
|
||||||
|
enabledTables: readonly KtxTableRef[];
|
||||||
|
enabledSchemas: readonly string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
function schemaNameForRef(ref: KtxTableRef): string | null {
|
||||||
|
return ref.db && ref.db.length > 0 ? ref.db : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function schemaNamesFromConfig(enabledSchemas: readonly string[]): Set<string> {
|
||||||
|
return new Set(enabledSchemas.filter((schema) => schema !== '*'));
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isQueryHistoryScopeFloorDisabled(config: QueryHistoryScopeMembershipConfig): boolean {
|
||||||
|
return config.enabledSchemas.includes('*');
|
||||||
|
}
|
||||||
|
|
||||||
|
export function shouldFailOpenQueryHistoryScope(config: QueryHistoryScopeMembershipConfig): boolean {
|
||||||
|
return (
|
||||||
|
config.enabledTables.length === 0 &&
|
||||||
|
!isQueryHistoryScopeFloorDisabled(config) &&
|
||||||
|
config.enabledSchemas.length === 0
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function includedQueryHistoryTableRefs(
|
||||||
|
tablesTouched: readonly KtxTableRef[],
|
||||||
|
config: QueryHistoryScopeMembershipConfig,
|
||||||
|
): KtxTableRef[] {
|
||||||
|
if (config.enabledTables.length > 0) {
|
||||||
|
const enabled = tableRefSet(config.enabledTables);
|
||||||
|
return tablesTouched.filter((ref) => enabled.has(tableRefKey(ref)));
|
||||||
|
}
|
||||||
|
if (isQueryHistoryScopeFloorDisabled(config) || shouldFailOpenQueryHistoryScope(config)) {
|
||||||
|
return [...tablesTouched];
|
||||||
|
}
|
||||||
|
const schemas = schemaNamesFromConfig(config.enabledSchemas);
|
||||||
|
return tablesTouched.filter((ref) => {
|
||||||
|
const schema = schemaNameForRef(ref);
|
||||||
|
return schema !== null && schemas.has(schema);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
@ -188,26 +188,75 @@ export class SnowflakeHistoricSqlQueryHistoryReader {
|
||||||
config: HistoricSqlUnifiedPullConfig,
|
config: HistoricSqlUnifiedPullConfig,
|
||||||
): AsyncIterable<AggregatedTemplate> {
|
): AsyncIterable<AggregatedTemplate> {
|
||||||
const sql = `
|
const sql = `
|
||||||
|
WITH filtered_queries AS (
|
||||||
|
SELECT
|
||||||
|
query_hash,
|
||||||
|
query_text,
|
||||||
|
user_name,
|
||||||
|
start_time,
|
||||||
|
total_elapsed_time,
|
||||||
|
execution_status,
|
||||||
|
rows_produced
|
||||||
|
FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY
|
||||||
|
WHERE query_text IS NOT NULL
|
||||||
|
AND query_type IN ('SELECT', 'MERGE')
|
||||||
|
AND start_time >= ${timestampLiteral(window.start)}
|
||||||
|
AND start_time < ${timestampLiteral(window.end)}
|
||||||
|
),
|
||||||
|
template_stats AS (
|
||||||
|
SELECT
|
||||||
|
query_hash AS template_id,
|
||||||
|
MIN(query_text) AS canonical_sql,
|
||||||
|
COUNT(*) AS executions,
|
||||||
|
COUNT(DISTINCT user_name) AS distinct_users,
|
||||||
|
MIN(start_time) AS first_seen,
|
||||||
|
MAX(start_time) AS last_seen,
|
||||||
|
APPROX_PERCENTILE(total_elapsed_time, 0.50) AS p50_ms,
|
||||||
|
APPROX_PERCENTILE(total_elapsed_time, 0.95) AS p95_ms,
|
||||||
|
DIV0(COUNT_IF(execution_status != 'SUCCESS'), COUNT(*)) AS error_rate,
|
||||||
|
SUM(rows_produced) AS rows_produced
|
||||||
|
FROM filtered_queries
|
||||||
|
GROUP BY query_hash
|
||||||
|
HAVING COUNT(*) >= ${config.minExecutions}
|
||||||
|
),
|
||||||
|
template_users AS (
|
||||||
|
SELECT
|
||||||
|
query_hash AS template_id,
|
||||||
|
user_name AS user,
|
||||||
|
COUNT(*) AS executions,
|
||||||
|
MAX(start_time) AS last_seen
|
||||||
|
FROM filtered_queries
|
||||||
|
GROUP BY query_hash, user_name
|
||||||
|
)
|
||||||
SELECT
|
SELECT
|
||||||
query_hash AS template_id,
|
stats.template_id,
|
||||||
MIN(query_text) AS canonical_sql,
|
stats.canonical_sql,
|
||||||
COUNT(*) AS executions,
|
stats.executions,
|
||||||
COUNT(DISTINCT user_name) AS distinct_users,
|
stats.distinct_users,
|
||||||
MIN(start_time) AS first_seen,
|
stats.first_seen,
|
||||||
MAX(start_time) AS last_seen,
|
stats.last_seen,
|
||||||
APPROX_PERCENTILE(total_elapsed_time, 0.50) AS p50_ms,
|
stats.p50_ms,
|
||||||
APPROX_PERCENTILE(total_elapsed_time, 0.95) AS p95_ms,
|
stats.p95_ms,
|
||||||
DIV0(COUNT_IF(execution_status != 'SUCCESS'), COUNT(*)) AS error_rate,
|
stats.error_rate,
|
||||||
SUM(rows_produced) AS rows_produced,
|
stats.rows_produced,
|
||||||
ARRAY_AGG(OBJECT_CONSTRUCT('user', user_name, 'executions', 1)) WITHIN GROUP (ORDER BY start_time DESC)::string AS top_users
|
ARRAY_AGG(
|
||||||
FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY
|
OBJECT_CONSTRUCT('user', users.user, 'executions', users.executions)
|
||||||
WHERE query_text IS NOT NULL
|
) WITHIN GROUP (ORDER BY users.executions DESC, users.last_seen DESC)::string AS top_users
|
||||||
AND query_type IN ('SELECT', 'MERGE')
|
FROM template_stats AS stats
|
||||||
AND start_time >= ${timestampLiteral(window.start)}
|
JOIN template_users AS users
|
||||||
AND start_time < ${timestampLiteral(window.end)}
|
ON users.template_id = stats.template_id
|
||||||
GROUP BY query_hash
|
GROUP BY
|
||||||
HAVING COUNT(*) >= ${config.minExecutions}
|
stats.template_id,
|
||||||
ORDER BY executions DESC`.trim();
|
stats.canonical_sql,
|
||||||
|
stats.executions,
|
||||||
|
stats.distinct_users,
|
||||||
|
stats.first_seen,
|
||||||
|
stats.last_seen,
|
||||||
|
stats.p50_ms,
|
||||||
|
stats.p95_ms,
|
||||||
|
stats.error_rate,
|
||||||
|
stats.rows_produced
|
||||||
|
ORDER BY stats.executions DESC`.trim();
|
||||||
const result = await queryClient(client).executeQuery(sql);
|
const result = await queryClient(client).executeQuery(sql);
|
||||||
if (result.error) {
|
if (result.error) {
|
||||||
throw grantsError(result.error);
|
throw grantsError(result.error);
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
import { mkdir, writeFile } from 'node:fs/promises';
|
import { mkdir, writeFile } from 'node:fs/promises';
|
||||||
import { dirname, join } from 'node:path';
|
import { dirname, join } from 'node:path';
|
||||||
import type { SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
|
import type { SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
|
||||||
|
import { tableRefKey, type KtxTableRefKey } from '../../../scan/table-ref.js';
|
||||||
|
import type { KtxTableRef } from '../../../scan/types.js';
|
||||||
import {
|
import {
|
||||||
bucketDistinctUsers,
|
bucketDistinctUsers,
|
||||||
bucketErrorRate,
|
bucketErrorRate,
|
||||||
|
|
@ -15,6 +17,11 @@ import {
|
||||||
redactHistoricSqlText,
|
redactHistoricSqlText,
|
||||||
type HistoricSqlRedactionPattern,
|
type HistoricSqlRedactionPattern,
|
||||||
} from './redaction.js';
|
} from './redaction.js';
|
||||||
|
import {
|
||||||
|
includedQueryHistoryTableRefs,
|
||||||
|
isQueryHistoryScopeFloorDisabled,
|
||||||
|
shouldFailOpenQueryHistoryScope,
|
||||||
|
} from './scope-membership.js';
|
||||||
import {
|
import {
|
||||||
HISTORIC_SQL_SOURCE_KEY,
|
HISTORIC_SQL_SOURCE_KEY,
|
||||||
aggregatedTemplateSchema,
|
aggregatedTemplateSchema,
|
||||||
|
|
@ -38,17 +45,13 @@ interface StageHistoricSqlAggregatedSnapshotInput {
|
||||||
|
|
||||||
interface ParsedTemplate {
|
interface ParsedTemplate {
|
||||||
template: AggregatedTemplate;
|
template: AggregatedTemplate;
|
||||||
tablesTouched: string[];
|
tablesTouched: KtxTableRef[];
|
||||||
includedTables: string[];
|
includedTables: KtxTableRef[];
|
||||||
columnsByClause: Record<string, string[]>;
|
columnsByClause: Record<string, string[]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface EnabledTableFilter {
|
|
||||||
exact: Set<string>;
|
|
||||||
uniqueUnqualified: Set<string>;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface TableAccumulator {
|
interface TableAccumulator {
|
||||||
|
tableRef: KtxTableRef;
|
||||||
table: string;
|
table: string;
|
||||||
executions: number;
|
executions: number;
|
||||||
distinctUsers: number;
|
distinctUsers: number;
|
||||||
|
|
@ -105,8 +108,7 @@ function shouldDropByUsers(template: AggregatedTemplate, config: HistoricSqlUnif
|
||||||
const matchingExecutions = template.topUsers
|
const matchingExecutions = template.topUsers
|
||||||
.filter((entry) => matchesAny(entry.user, patterns))
|
.filter((entry) => matchesAny(entry.user, patterns))
|
||||||
.reduce((sum, entry) => sum + entry.executions, 0);
|
.reduce((sum, entry) => sum + entry.executions, 0);
|
||||||
const allExecutions = template.topUsers.reduce((sum, entry) => sum + entry.executions, 0);
|
const serviceOnly = template.stats.executions > 0 && matchingExecutions >= template.stats.executions;
|
||||||
const serviceOnly = allExecutions > 0 && matchingExecutions >= allExecutions;
|
|
||||||
return service.mode === 'exclude' ? serviceOnly : !serviceOnly;
|
return service.mode === 'exclude' ? serviceOnly : !serviceOnly;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -122,90 +124,8 @@ function shouldDropTemplate(template: AggregatedTemplate, config: HistoricSqlUni
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeTableIdentifier(value: string): string {
|
function displayTableRef(ref: KtxTableRef): string {
|
||||||
return value.trim().toLowerCase();
|
return [ref.catalog, ref.db, ref.name].filter((part): part is string => !!part && part.length > 0).join('.');
|
||||||
}
|
|
||||||
|
|
||||||
function unqualifiedTableIdentifier(value: string): string {
|
|
||||||
const parts = normalizeTableIdentifier(value).split('.').filter(Boolean);
|
|
||||||
return parts.at(-1) ?? '';
|
|
||||||
}
|
|
||||||
|
|
||||||
function buildEnabledTableFilter(enabledTables: string[]): EnabledTableFilter | null {
|
|
||||||
if (enabledTables.length === 0) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
const exact = new Set(enabledTables.map(normalizeTableIdentifier).filter((value) => value.length > 0));
|
|
||||||
const unqualifiedCounts = new Map<string, number>();
|
|
||||||
for (const table of exact) {
|
|
||||||
const unqualified = unqualifiedTableIdentifier(table);
|
|
||||||
if (unqualified.length > 0) {
|
|
||||||
unqualifiedCounts.set(unqualified, (unqualifiedCounts.get(unqualified) ?? 0) + 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return {
|
|
||||||
exact,
|
|
||||||
uniqueUnqualified: new Set(
|
|
||||||
[...unqualifiedCounts.entries()]
|
|
||||||
.filter(([, count]) => count === 1)
|
|
||||||
.map(([table]) => table),
|
|
||||||
),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
function isEnabledTable(table: string, filter: EnabledTableFilter | null): boolean {
|
|
||||||
if (!filter) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
const normalized = normalizeTableIdentifier(table);
|
|
||||||
return filter.exact.has(normalized) || filter.uniqueUnqualified.has(unqualifiedTableIdentifier(normalized));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* pg_stat_statements records queries as written, so the same physical table can appear
|
|
||||||
* both bare (`accounts`, resolved via search_path) and schema-qualified
|
|
||||||
* (`orbit_raw.accounts`). Collapse a bare identifier into its schema-qualified form when
|
|
||||||
* exactly one qualified form shares its unqualified name, so the two never become separate
|
|
||||||
* work units. Ambiguous bare names (two qualified forms) are left untouched.
|
|
||||||
*/
|
|
||||||
function canonicalizeTableIdentifiers(parsedTemplates: ParsedTemplate[]): void {
|
|
||||||
const all = new Set<string>();
|
|
||||||
for (const parsed of parsedTemplates) {
|
|
||||||
for (const table of parsed.includedTables) {
|
|
||||||
all.add(table);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const qualifiedByUnqualified = new Map<string, Set<string>>();
|
|
||||||
for (const table of all) {
|
|
||||||
if (!table.includes('.')) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const unqualified = unqualifiedTableIdentifier(table);
|
|
||||||
if (unqualified.length === 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const forms = qualifiedByUnqualified.get(unqualified) ?? new Set<string>();
|
|
||||||
forms.add(table);
|
|
||||||
qualifiedByUnqualified.set(unqualified, forms);
|
|
||||||
}
|
|
||||||
const canonical = new Map<string, string>();
|
|
||||||
for (const table of all) {
|
|
||||||
if (table.includes('.')) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const forms = qualifiedByUnqualified.get(unqualifiedTableIdentifier(table));
|
|
||||||
if (forms && forms.size === 1) {
|
|
||||||
canonical.set(table, [...forms][0]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (canonical.size === 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const remap = (table: string): string => canonical.get(table) ?? table;
|
|
||||||
for (const parsed of parsedTemplates) {
|
|
||||||
parsed.includedTables = [...new Set(parsed.includedTables.map(remap))].sort();
|
|
||||||
parsed.tablesTouched = [...new Set(parsed.tablesTouched.map(remap))].sort();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function historicSqlWindowDays(config: HistoricSqlUnifiedPullConfig): number {
|
function historicSqlWindowDays(config: HistoricSqlUnifiedPullConfig): number {
|
||||||
|
|
@ -240,9 +160,10 @@ function recordJoin(acc: TableAccumulator, otherTable: string, columns: string[]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function accumulatorFor(table: string): TableAccumulator {
|
function accumulatorFor(tableRef: KtxTableRef): TableAccumulator {
|
||||||
return {
|
return {
|
||||||
table,
|
tableRef,
|
||||||
|
table: displayTableRef(tableRef),
|
||||||
executions: 0,
|
executions: 0,
|
||||||
distinctUsers: 0,
|
distinctUsers: 0,
|
||||||
errorRateNumerator: 0,
|
errorRateNumerator: 0,
|
||||||
|
|
@ -272,8 +193,8 @@ function addTemplate(acc: TableAccumulator, parsed: ParsedTemplate): void {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const joinColumns = parsed.columnsByClause.join ?? [];
|
const joinColumns = parsed.columnsByClause.join ?? [];
|
||||||
for (const otherTable of parsed.tablesTouched.filter((table) => table !== acc.table)) {
|
for (const otherTable of parsed.tablesTouched.filter((table) => tableRefKey(table) !== tableRefKey(acc.tableRef))) {
|
||||||
recordJoin(acc, otherTable, joinColumns, executions);
|
recordJoin(acc, displayTableRef(otherTable), joinColumns, executions);
|
||||||
}
|
}
|
||||||
acc.topTemplates.push(parsed.template);
|
acc.topTemplates.push(parsed.template);
|
||||||
}
|
}
|
||||||
|
|
@ -310,6 +231,7 @@ function toStagedTable(acc: TableAccumulator, now: Date): StagedTableInput {
|
||||||
|
|
||||||
return {
|
return {
|
||||||
table: acc.table,
|
table: acc.table,
|
||||||
|
tableRef: acc.tableRef,
|
||||||
stats: {
|
stats: {
|
||||||
executionsBucket: bucketExecutions(acc.executions),
|
executionsBucket: bucketExecutions(acc.executions),
|
||||||
distinctUsersBucket: bucketDistinctUsers(acc.distinctUsers),
|
distinctUsersBucket: bucketDistinctUsers(acc.distinctUsers),
|
||||||
|
|
@ -329,7 +251,7 @@ function toPatternsInput(parsedTemplates: ParsedTemplate[]): StagedPatternsInput
|
||||||
.map(({ template, tablesTouched }) => ({
|
.map(({ template, tablesTouched }) => ({
|
||||||
id: template.templateId,
|
id: template.templateId,
|
||||||
canonicalSql: template.canonicalSql,
|
canonicalSql: template.canonicalSql,
|
||||||
tablesTouched: [...tablesTouched].sort(),
|
tablesTouched: [...tablesTouched].sort((left, right) => tableRefKey(left).localeCompare(tableRefKey(right))),
|
||||||
executionsBucket: bucketExecutions(template.stats.executions),
|
executionsBucket: bucketExecutions(template.stats.executions),
|
||||||
distinctUsersBucket: bucketDistinctUsers(template.stats.distinctUsers),
|
distinctUsersBucket: bucketDistinctUsers(template.stats.distinctUsers),
|
||||||
dialect: template.dialect,
|
dialect: template.dialect,
|
||||||
|
|
@ -340,7 +262,6 @@ function toPatternsInput(parsedTemplates: ParsedTemplate[]): StagedPatternsInput
|
||||||
|
|
||||||
export async function stageHistoricSqlAggregatedSnapshot(input: StageHistoricSqlAggregatedSnapshotInput): Promise<void> {
|
export async function stageHistoricSqlAggregatedSnapshot(input: StageHistoricSqlAggregatedSnapshotInput): Promise<void> {
|
||||||
const config = historicSqlUnifiedPullConfigSchema.parse(input.pullConfig);
|
const config = historicSqlUnifiedPullConfigSchema.parse(input.pullConfig);
|
||||||
const enabledTableFilter = buildEnabledTableFilter(config.enabledTables);
|
|
||||||
const redactors = compileHistoricSqlRedactionPatterns(config.redactionPatterns);
|
const redactors = compileHistoricSqlRedactionPatterns(config.redactionPatterns);
|
||||||
const now = input.now ?? new Date();
|
const now = input.now ?? new Date();
|
||||||
const windowStart = new Date(now.getTime() - historicSqlWindowDays(config) * 24 * 60 * 60 * 1000);
|
const windowStart = new Date(now.getTime() - historicSqlWindowDays(config) * 24 * 60 * 60 * 1000);
|
||||||
|
|
@ -356,11 +277,25 @@ export async function stageHistoricSqlAggregatedSnapshot(input: StageHistoricSql
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const analysis = await input.sqlAnalysis.analyzeBatch(
|
const analysisItems = snapshot.map((template) => ({ id: template.templateId, sql: template.canonicalSql }));
|
||||||
snapshot.map((template) => ({ id: template.templateId, sql: template.canonicalSql })),
|
const analysisOptions =
|
||||||
config.dialect,
|
config.modeledTableCatalog.length > 0 ? { catalog: { tables: config.modeledTableCatalog } } : undefined;
|
||||||
);
|
const warnings: string[] = [
|
||||||
const warnings: string[] = [];
|
...config.scopeFloorWarnings,
|
||||||
|
...(shouldFailOpenQueryHistoryScope(config) ? ['query_history_scope_floor_disabled:empty_modeled_scope'] : []),
|
||||||
|
];
|
||||||
|
let scopeDisabledByQualificationFailure = false;
|
||||||
|
let analysis: Awaited<ReturnType<SqlAnalysisPort['analyzeBatch']>>;
|
||||||
|
try {
|
||||||
|
analysis = await input.sqlAnalysis.analyzeBatch(analysisItems, config.dialect, analysisOptions);
|
||||||
|
} catch (error) {
|
||||||
|
if (!analysisOptions || config.enabledTables.length > 0 || isQueryHistoryScopeFloorDisabled(config)) {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
warnings.push('query_history_scope_floor_disabled:catalog_qualification_failed');
|
||||||
|
scopeDisabledByQualificationFailure = true;
|
||||||
|
analysis = await input.sqlAnalysis.analyzeBatch(analysisItems, config.dialect, undefined);
|
||||||
|
}
|
||||||
const parsedTemplates: ParsedTemplate[] = [];
|
const parsedTemplates: ParsedTemplate[] = [];
|
||||||
for (const template of snapshot) {
|
for (const template of snapshot) {
|
||||||
const parsed = analysis.get(template.templateId);
|
const parsed = analysis.get(template.templateId);
|
||||||
|
|
@ -368,8 +303,12 @@ export async function stageHistoricSqlAggregatedSnapshot(input: StageHistoricSql
|
||||||
warnings.push(`parse_failed:${template.templateId}`);
|
warnings.push(`parse_failed:${template.templateId}`);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const tablesTouched = [...new Set(parsed.tablesTouched)].filter((table) => table.length > 0).sort();
|
const tablesTouched = [...new Map(parsed.tablesTouched.map((ref) => [tableRefKey(ref), ref])).values()]
|
||||||
const includedTables = tablesTouched.filter((table) => isEnabledTable(table, enabledTableFilter));
|
.filter((ref) => ref.name.length > 0)
|
||||||
|
.sort((left, right) => tableRefKey(left).localeCompare(tableRefKey(right)));
|
||||||
|
const includedTables = scopeDisabledByQualificationFailure
|
||||||
|
? [...tablesTouched]
|
||||||
|
: includedQueryHistoryTableRefs(tablesTouched, config);
|
||||||
if (includedTables.length === 0) {
|
if (includedTables.length === 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
@ -383,24 +322,23 @@ export async function stageHistoricSqlAggregatedSnapshot(input: StageHistoricSql
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
canonicalizeTableIdentifiers(parsedTemplates);
|
const byTable = new Map<KtxTableRefKey, TableAccumulator>();
|
||||||
|
|
||||||
const byTable = new Map<string, TableAccumulator>();
|
|
||||||
for (const parsed of parsedTemplates) {
|
for (const parsed of parsedTemplates) {
|
||||||
for (const table of parsed.includedTables) {
|
for (const tableRef of parsed.includedTables) {
|
||||||
const acc = byTable.get(table) ?? accumulatorFor(table);
|
const key = tableRefKey(tableRef);
|
||||||
|
const acc = byTable.get(key) ?? accumulatorFor(tableRef);
|
||||||
addTemplate(acc, parsed);
|
addTemplate(acc, parsed);
|
||||||
byTable.set(table, acc);
|
byTable.set(key, acc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
await mkdir(input.stagedDir, { recursive: true });
|
await mkdir(input.stagedDir, { recursive: true });
|
||||||
for (const [table, acc] of [...byTable.entries()].sort(([left], [right]) => left.localeCompare(right))) {
|
for (const [, acc] of [...byTable.entries()].sort((left, right) => left[0].localeCompare(right[0]))) {
|
||||||
await writeJson(input.stagedDir, `tables/${table}.json`, toStagedTable(acc, now));
|
await writeJson(input.stagedDir, `tables/${acc.table}.json`, toStagedTable(acc, now));
|
||||||
}
|
}
|
||||||
const patternsInput = toPatternsInput(parsedTemplates);
|
const patternsInput = toPatternsInput(parsedTemplates);
|
||||||
const patternInputSplit = splitHistoricSqlPatternInputs(patternsInput);
|
const patternInputSplit = splitHistoricSqlPatternInputs(patternsInput);
|
||||||
const allWarnings = [...warnings, ...patternInputSplit.warnings];
|
const allWarnings = [...new Set([...warnings, ...patternInputSplit.warnings])];
|
||||||
await writeJson(input.stagedDir, 'patterns-input.json', patternInputSplit.auditInput);
|
await writeJson(input.stagedDir, 'patterns-input.json', patternInputSplit.auditInput);
|
||||||
for (const shard of patternInputSplit.shards) {
|
for (const shard of patternInputSplit.shards) {
|
||||||
await writeJson(input.stagedDir, shard.path, shard.input);
|
await writeJson(input.stagedDir, shard.path, shard.input);
|
||||||
|
|
|
||||||
|
|
@ -8,9 +8,22 @@ export type HistoricSqlDialect = z.infer<typeof historicSqlDialectSchema>;
|
||||||
|
|
||||||
const filterModeSchema = z.enum(['exclude', 'include', 'mark-only']);
|
const filterModeSchema = z.enum(['exclude', 'include', 'mark-only']);
|
||||||
|
|
||||||
|
const ktxTableRefSchema = z.object({
|
||||||
|
catalog: z.string().nullable(),
|
||||||
|
db: z.string().nullable(),
|
||||||
|
name: z.string().min(1),
|
||||||
|
}).strict();
|
||||||
|
|
||||||
|
const ktxTableRefWithColumnsSchema = ktxTableRefSchema.extend({
|
||||||
|
columns: z.array(z.string().min(1)).optional(),
|
||||||
|
}).strict();
|
||||||
|
|
||||||
const historicSqlCommonPullConfigSchema = z.object({
|
const historicSqlCommonPullConfigSchema = z.object({
|
||||||
minExecutions: z.number().int().nonnegative().default(5),
|
minExecutions: z.number().int().nonnegative().default(5),
|
||||||
enabledTables: z.array(z.string().min(1)).default([]),
|
enabledTables: z.array(ktxTableRefSchema).default([]),
|
||||||
|
enabledSchemas: z.array(z.string().min(1)).default([]),
|
||||||
|
modeledTableCatalog: z.array(ktxTableRefWithColumnsSchema).default([]),
|
||||||
|
scopeFloorWarnings: z.array(z.string()).default([]),
|
||||||
filters: z.object({
|
filters: z.object({
|
||||||
serviceAccounts: z.object({
|
serviceAccounts: z.object({
|
||||||
patterns: z.array(z.string()).default([]),
|
patterns: z.array(z.string()).default([]),
|
||||||
|
|
@ -68,6 +81,7 @@ export type AggregatedTemplate = z.infer<typeof aggregatedTemplateSchema>;
|
||||||
|
|
||||||
export const stagedTableInputSchema = z.object({
|
export const stagedTableInputSchema = z.object({
|
||||||
table: z.string().min(1),
|
table: z.string().min(1),
|
||||||
|
tableRef: ktxTableRefSchema,
|
||||||
stats: z.object({
|
stats: z.object({
|
||||||
executionsBucket: z.string(),
|
executionsBucket: z.string(),
|
||||||
distinctUsersBucket: z.string(),
|
distinctUsersBucket: z.string(),
|
||||||
|
|
@ -93,7 +107,7 @@ export const stagedPatternsInputSchema = z.object({
|
||||||
templates: z.array(z.object({
|
templates: z.array(z.object({
|
||||||
id: z.string(),
|
id: z.string(),
|
||||||
canonicalSql: z.string(),
|
canonicalSql: z.string(),
|
||||||
tablesTouched: z.array(z.string()),
|
tablesTouched: z.array(ktxTableRefSchema),
|
||||||
executionsBucket: z.string(),
|
executionsBucket: z.string(),
|
||||||
distinctUsersBucket: z.string(),
|
distinctUsersBucket: z.string(),
|
||||||
dialect: historicSqlDialectSchema,
|
dialect: historicSqlDialectSchema,
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ import { DbtSourceAdapter } from './adapters/dbt/dbt.adapter.js';
|
||||||
import { FakeSourceAdapter } from './adapters/fake/fake.adapter.js';
|
import { FakeSourceAdapter } from './adapters/fake/fake.adapter.js';
|
||||||
import { HistoricSqlSourceAdapter } from './adapters/historic-sql/historic-sql.adapter.js';
|
import { HistoricSqlSourceAdapter } from './adapters/historic-sql/historic-sql.adapter.js';
|
||||||
import { PostgresPgssReader } from './adapters/historic-sql/postgres-pgss-reader.js';
|
import { PostgresPgssReader } from './adapters/historic-sql/postgres-pgss-reader.js';
|
||||||
|
import { resolveQueryHistoryScopeFloor } from './adapters/historic-sql/scope-floor.js';
|
||||||
import {
|
import {
|
||||||
HISTORIC_SQL_SOURCE_KEY,
|
HISTORIC_SQL_SOURCE_KEY,
|
||||||
historicSqlUnifiedPullConfigSchema,
|
historicSqlUnifiedPullConfigSchema,
|
||||||
|
|
@ -179,12 +180,39 @@ function queryHistoryRecord(connection: unknown): Record<string, unknown> | null
|
||||||
return queryHistory;
|
return queryHistory;
|
||||||
}
|
}
|
||||||
|
|
||||||
function queryHistoryPullConfig(connection: unknown): Record<string, unknown> | null {
|
async function queryHistoryPullConfig(
|
||||||
|
project: KtxLocalProject,
|
||||||
|
connectionId: string,
|
||||||
|
connection: unknown,
|
||||||
|
): Promise<Record<string, unknown> | null> {
|
||||||
const queryHistory = queryHistoryRecord(connection);
|
const queryHistory = queryHistoryRecord(connection);
|
||||||
if (queryHistory?.enabled !== true || !isRecord(connection)) return null;
|
if (queryHistory?.enabled !== true || !isRecord(connection)) return null;
|
||||||
const dialect = historicSqlDialectByDriver.get(String(connection.driver ?? '').toLowerCase());
|
const driver = String(connection.driver ?? '').toLowerCase();
|
||||||
|
const dialect = historicSqlDialectByDriver.get(driver);
|
||||||
if (!dialect) return null;
|
if (!dialect) return null;
|
||||||
return { ...queryHistory, dialect };
|
const scopeFloor = await resolveQueryHistoryScopeFloor({
|
||||||
|
projectDir: project.projectDir,
|
||||||
|
connectionId,
|
||||||
|
driver,
|
||||||
|
connection,
|
||||||
|
storedQueryHistory: queryHistory,
|
||||||
|
});
|
||||||
|
const {
|
||||||
|
enabled: _enabled,
|
||||||
|
dialect: _dialect,
|
||||||
|
enabledTables: _enabledTables,
|
||||||
|
enabledSchemas: _enabledSchemas,
|
||||||
|
scopeFloorWarnings: _scopeFloorWarnings,
|
||||||
|
...stored
|
||||||
|
} = queryHistory;
|
||||||
|
return {
|
||||||
|
...stored,
|
||||||
|
dialect,
|
||||||
|
...(scopeFloor.enabledTables.length > 0 ? { enabledTables: scopeFloor.enabledTables } : {}),
|
||||||
|
...(scopeFloor.enabledSchemas.length > 0 ? { enabledSchemas: scopeFloor.enabledSchemas } : {}),
|
||||||
|
...(scopeFloor.modeledTableCatalog.length > 0 ? { modeledTableCatalog: scopeFloor.modeledTableCatalog } : {}),
|
||||||
|
...(scopeFloor.warnings.length > 0 ? { scopeFloorWarnings: scopeFloor.warnings } : {}),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function stringField(value: unknown): string | null {
|
function stringField(value: unknown): string | null {
|
||||||
|
|
@ -245,7 +273,7 @@ export async function localPullConfigForAdapter(
|
||||||
if (options.historicSqlPullConfigOverride) {
|
if (options.historicSqlPullConfigOverride) {
|
||||||
return historicSqlUnifiedPullConfigSchema.parse(options.historicSqlPullConfigOverride);
|
return historicSqlUnifiedPullConfigSchema.parse(options.historicSqlPullConfigOverride);
|
||||||
}
|
}
|
||||||
const queryHistory = queryHistoryPullConfig(connection);
|
const queryHistory = await queryHistoryPullConfig(project, connectionId, connection);
|
||||||
if (!queryHistory) {
|
if (!queryHistory) {
|
||||||
throw new Error(`Connection "${connectionId}" does not have context.queryHistory.enabled: true`);
|
throw new Error(`Connection "${connectionId}" does not have context.queryHistory.enabled: true`);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,10 @@
|
||||||
import { request as httpRequest } from 'node:http';
|
import { request as httpRequest } from 'node:http';
|
||||||
import { request as httpsRequest } from 'node:https';
|
import { request as httpsRequest } from 'node:https';
|
||||||
import { URL } from 'node:url';
|
import { URL } from 'node:url';
|
||||||
|
import type { KtxTableRef } from '../scan/types.js';
|
||||||
import type {
|
import type {
|
||||||
SqlAnalysisBatchItem,
|
SqlAnalysisBatchItem,
|
||||||
|
SqlAnalysisBatchOptions,
|
||||||
SqlAnalysisBatchResult,
|
SqlAnalysisBatchResult,
|
||||||
SqlAnalysisDialect,
|
SqlAnalysisDialect,
|
||||||
SqlAnalysisFingerprintResult,
|
SqlAnalysisFingerprintResult,
|
||||||
|
|
@ -89,6 +91,14 @@ function optionalString(raw: Record<string, unknown>, field: string): string | n
|
||||||
throw new Error(`sql analysis response has invalid optional string field ${field}`);
|
throw new Error(`sql analysis response has invalid optional string field ${field}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function optionalNullableStringField(raw: Record<string, unknown>, field: string): string | null {
|
||||||
|
const value = raw[field];
|
||||||
|
if (value === null || value === undefined || typeof value === 'string') {
|
||||||
|
return value ?? null;
|
||||||
|
}
|
||||||
|
throw new Error(`sql analysis response has invalid optional nullable string field ${field}`);
|
||||||
|
}
|
||||||
|
|
||||||
function requiredStringArray(raw: Record<string, unknown>, field: string): string[] {
|
function requiredStringArray(raw: Record<string, unknown>, field: string): string[] {
|
||||||
const value = raw[field];
|
const value = raw[field];
|
||||||
if (!Array.isArray(value) || value.some((item) => typeof item !== 'string')) {
|
if (!Array.isArray(value) || value.some((item) => typeof item !== 'string')) {
|
||||||
|
|
@ -175,10 +185,34 @@ function mapColumnsByClause(raw: Record<string, unknown>): SqlAnalysisBatchResul
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function requiredTableRef(raw: unknown, field: string): KtxTableRef {
|
||||||
|
if (!raw || typeof raw !== 'object' || Array.isArray(raw)) {
|
||||||
|
throw new Error(`sql analysis response contains invalid table ref in ${field}`);
|
||||||
|
}
|
||||||
|
const record = raw as Record<string, unknown>;
|
||||||
|
const name = record.name;
|
||||||
|
if (typeof name !== 'string' || name.length === 0) {
|
||||||
|
throw new Error(`sql analysis response table ref in ${field} is missing name`);
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
catalog: optionalNullableStringField(record, 'catalog'),
|
||||||
|
db: optionalNullableStringField(record, 'db'),
|
||||||
|
name,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function requiredTableRefArray(raw: Record<string, unknown>, field: string): KtxTableRef[] {
|
||||||
|
const value = raw[field];
|
||||||
|
if (!Array.isArray(value)) {
|
||||||
|
throw new Error(`sql analysis response is missing table-ref[] field ${field}`);
|
||||||
|
}
|
||||||
|
return value.map((item, index) => requiredTableRef(item, `${field}.${index}`));
|
||||||
|
}
|
||||||
|
|
||||||
function mapBatchResult(raw: Record<string, unknown>): SqlAnalysisBatchResult {
|
function mapBatchResult(raw: Record<string, unknown>): SqlAnalysisBatchResult {
|
||||||
const error = optionalString(raw, 'error');
|
const error = optionalString(raw, 'error');
|
||||||
return {
|
return {
|
||||||
tablesTouched: requiredStringArray(raw, 'tables_touched'),
|
tablesTouched: requiredTableRefArray(raw, 'tables_touched'),
|
||||||
columnsByClause: mapColumnsByClause(raw),
|
columnsByClause: mapColumnsByClause(raw),
|
||||||
...(error !== undefined ? { error } : {}),
|
...(error !== undefined ? { error } : {}),
|
||||||
};
|
};
|
||||||
|
|
@ -215,10 +249,11 @@ export function createHttpSqlAnalysisPort(options: HttpSqlAnalysisPortOptions):
|
||||||
});
|
});
|
||||||
return mapResult(raw);
|
return mapResult(raw);
|
||||||
},
|
},
|
||||||
async analyzeBatch(items: SqlAnalysisBatchItem[], dialect: SqlAnalysisDialect) {
|
async analyzeBatch(items: SqlAnalysisBatchItem[], dialect: SqlAnalysisDialect, options?: SqlAnalysisBatchOptions) {
|
||||||
const raw = await requestJson('/sql/analyze-batch', {
|
const raw = await requestJson('/sql/analyze-batch', {
|
||||||
dialect,
|
dialect,
|
||||||
items,
|
items,
|
||||||
|
...(options?.catalog ? { catalog: options.catalog } : {}),
|
||||||
});
|
});
|
||||||
return mapBatchResponse(raw);
|
return mapBatchResponse(raw);
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,5 @@
|
||||||
|
import type { KtxTableRef } from '../scan/types.js';
|
||||||
|
|
||||||
export type SqlAnalysisDialect =
|
export type SqlAnalysisDialect =
|
||||||
| 'bigquery'
|
| 'bigquery'
|
||||||
| 'snowflake'
|
| 'snowflake'
|
||||||
|
|
@ -32,8 +34,20 @@ export interface SqlAnalysisBatchItem {
|
||||||
sql: string;
|
sql: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface SqlAnalysisCatalogTable extends KtxTableRef {
|
||||||
|
columns?: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SqlAnalysisCatalog {
|
||||||
|
tables: SqlAnalysisCatalogTable[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SqlAnalysisBatchOptions {
|
||||||
|
catalog?: SqlAnalysisCatalog;
|
||||||
|
}
|
||||||
|
|
||||||
export interface SqlAnalysisBatchResult {
|
export interface SqlAnalysisBatchResult {
|
||||||
tablesTouched: string[];
|
tablesTouched: KtxTableRef[];
|
||||||
columnsByClause: Partial<Record<SqlAnalysisClause, string[]>>;
|
columnsByClause: Partial<Record<SqlAnalysisClause, string[]>>;
|
||||||
error?: string | null;
|
error?: string | null;
|
||||||
}
|
}
|
||||||
|
|
@ -48,6 +62,7 @@ export interface SqlAnalysisPort {
|
||||||
analyzeBatch(
|
analyzeBatch(
|
||||||
items: SqlAnalysisBatchItem[],
|
items: SqlAnalysisBatchItem[],
|
||||||
dialect: SqlAnalysisDialect,
|
dialect: SqlAnalysisDialect,
|
||||||
|
options?: SqlAnalysisBatchOptions,
|
||||||
): Promise<Map<string, SqlAnalysisBatchResult>>;
|
): Promise<Map<string, SqlAnalysisBatchResult>>;
|
||||||
validateReadOnly(sql: string, dialect: SqlAnalysisDialect): Promise<SqlReadOnlyValidationResult>;
|
validateReadOnly(sql: string, dialect: SqlAnalysisDialect): Promise<SqlReadOnlyValidationResult>;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ import { BigQueryHistoricSqlQueryHistoryReader } from './context/ingest/adapters
|
||||||
import { historicSqlDialectForConnectionDriver } from './context/ingest/adapters/historic-sql/connection-dialect.js';
|
import { historicSqlDialectForConnectionDriver } from './context/ingest/adapters/historic-sql/connection-dialect.js';
|
||||||
import { createDaemonLiveDatabaseIntrospection } from './context/ingest/adapters/live-database/daemon-introspection.js';
|
import { createDaemonLiveDatabaseIntrospection } from './context/ingest/adapters/live-database/daemon-introspection.js';
|
||||||
import { createDefaultLocalIngestAdapters, type DefaultLocalIngestAdaptersOptions } from './context/ingest/local-adapters.js';
|
import { createDefaultLocalIngestAdapters, type DefaultLocalIngestAdaptersOptions } from './context/ingest/local-adapters.js';
|
||||||
import type { HistoricSqlReader } from './context/ingest/adapters/historic-sql/types.js';
|
import type { HistoricSqlDialect, HistoricSqlReader } from './context/ingest/adapters/historic-sql/types.js';
|
||||||
import type {
|
import type {
|
||||||
LiveDatabaseIntrospectionOptions,
|
LiveDatabaseIntrospectionOptions,
|
||||||
LiveDatabaseIntrospectionPort,
|
LiveDatabaseIntrospectionPort,
|
||||||
|
|
@ -31,7 +31,7 @@ import {
|
||||||
createManagedDaemonLookerTableIdentifierParser,
|
createManagedDaemonLookerTableIdentifierParser,
|
||||||
createManagedDaemonSqlAnalysisPort,
|
createManagedDaemonSqlAnalysisPort,
|
||||||
managedDaemonDatabaseIntrospectionOptions,
|
managedDaemonDatabaseIntrospectionOptions,
|
||||||
type ManagedPythonCoreDaemonOptions,
|
type ManagedPythonDaemonHttpOptions,
|
||||||
} from './managed-python-http.js';
|
} from './managed-python-http.js';
|
||||||
import type { KtxOperationalLogger } from './io/logger.js';
|
import type { KtxOperationalLogger } from './io/logger.js';
|
||||||
import { resolveKtxConfigReference } from './context/core/config-reference.js';
|
import { resolveKtxConfigReference } from './context/core/config-reference.js';
|
||||||
|
|
@ -161,10 +161,17 @@ export interface KtxCliLocalIngestAdaptersOptions extends DefaultLocalIngestAdap
|
||||||
historicSqlConnectionId?: string;
|
historicSqlConnectionId?: string;
|
||||||
sqlAnalysis?: SqlAnalysisPort;
|
sqlAnalysis?: SqlAnalysisPort;
|
||||||
sqlAnalysisUrl?: string;
|
sqlAnalysisUrl?: string;
|
||||||
managedDaemon?: ManagedPythonCoreDaemonOptions;
|
managedDaemon?: ManagedPythonDaemonHttpOptions;
|
||||||
logger?: KtxOperationalLogger;
|
logger?: KtxOperationalLogger;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface KtxCliHistoricSqlRuntime {
|
||||||
|
dialect: HistoricSqlDialect;
|
||||||
|
sqlAnalysis: SqlAnalysisPort;
|
||||||
|
reader: HistoricSqlReader;
|
||||||
|
queryClient: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
function createEphemeralPostgresHistoricSqlClient(project: KtxLocalProject, connectionId: string) {
|
function createEphemeralPostgresHistoricSqlClient(project: KtxLocalProject, connectionId: string) {
|
||||||
const connection = project.config.connections[connectionId] as KtxPostgresConnectionConfig | undefined;
|
const connection = project.config.connections[connectionId] as KtxPostgresConnectionConfig | undefined;
|
||||||
const inputDriver = connection?.driver ?? 'unknown';
|
const inputDriver = connection?.driver ?? 'unknown';
|
||||||
|
|
@ -262,7 +269,10 @@ function bigQueryRegion(connection: KtxBigQueryConnectionConfig): string {
|
||||||
: 'us';
|
: 'us';
|
||||||
}
|
}
|
||||||
|
|
||||||
function historicSqlOptionsForLocalRun(project: KtxLocalProject, options: KtxCliLocalIngestAdaptersOptions) {
|
function historicSqlOptionsForLocalRun(
|
||||||
|
project: KtxLocalProject,
|
||||||
|
options: KtxCliLocalIngestAdaptersOptions,
|
||||||
|
): KtxCliHistoricSqlRuntime | undefined {
|
||||||
const connectionId = options.historicSqlConnectionId;
|
const connectionId = options.historicSqlConnectionId;
|
||||||
if (!connectionId) {
|
if (!connectionId) {
|
||||||
return undefined;
|
return undefined;
|
||||||
|
|
@ -285,6 +295,7 @@ function historicSqlOptionsForLocalRun(project: KtxLocalProject, options: KtxCli
|
||||||
if (dialect === 'postgres') {
|
if (dialect === 'postgres') {
|
||||||
return {
|
return {
|
||||||
...base,
|
...base,
|
||||||
|
dialect,
|
||||||
reader: new PostgresPgssReader() satisfies HistoricSqlReader,
|
reader: new PostgresPgssReader() satisfies HistoricSqlReader,
|
||||||
queryClient: createEphemeralPostgresHistoricSqlClient(project, connectionId),
|
queryClient: createEphemeralPostgresHistoricSqlClient(project, connectionId),
|
||||||
};
|
};
|
||||||
|
|
@ -297,6 +308,7 @@ function historicSqlOptionsForLocalRun(project: KtxLocalProject, options: KtxCli
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
...base,
|
...base,
|
||||||
|
dialect,
|
||||||
reader: new BigQueryHistoricSqlQueryHistoryReader({
|
reader: new BigQueryHistoricSqlQueryHistoryReader({
|
||||||
projectId: bigQueryProjectId(connection, process.env),
|
projectId: bigQueryProjectId(connection, process.env),
|
||||||
region: bigQueryRegion(connection),
|
region: bigQueryRegion(connection),
|
||||||
|
|
@ -307,6 +319,7 @@ function historicSqlOptionsForLocalRun(project: KtxLocalProject, options: KtxCli
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...base,
|
...base,
|
||||||
|
dialect,
|
||||||
reader: new SnowflakeHistoricSqlQueryHistoryReader() satisfies HistoricSqlReader,
|
reader: new SnowflakeHistoricSqlQueryHistoryReader() satisfies HistoricSqlReader,
|
||||||
queryClient: {
|
queryClient: {
|
||||||
async executeQuery(query: string) {
|
async executeQuery(query: string) {
|
||||||
|
|
@ -318,11 +331,24 @@ function historicSqlOptionsForLocalRun(project: KtxLocalProject, options: KtxCli
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function createKtxCliHistoricSqlRuntime(
|
||||||
|
project: KtxLocalProject,
|
||||||
|
connectionId: string,
|
||||||
|
options: KtxCliLocalIngestAdaptersOptions = {},
|
||||||
|
): KtxCliHistoricSqlRuntime | undefined {
|
||||||
|
return historicSqlOptionsForLocalRun(project, {
|
||||||
|
...options,
|
||||||
|
historicSqlConnectionId: connectionId,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
export function createKtxCliLocalIngestAdapters(
|
export function createKtxCliLocalIngestAdapters(
|
||||||
project: KtxLocalProject,
|
project: KtxLocalProject,
|
||||||
options: KtxCliLocalIngestAdaptersOptions = {},
|
options: KtxCliLocalIngestAdaptersOptions = {},
|
||||||
): SourceAdapter[] {
|
): SourceAdapter[] {
|
||||||
const historicSql = historicSqlOptionsForLocalRun(project, options);
|
const historicSql = options.historicSqlConnectionId
|
||||||
|
? createKtxCliHistoricSqlRuntime(project, options.historicSqlConnectionId, options)
|
||||||
|
: undefined;
|
||||||
const base = createDefaultLocalIngestAdapters(project, {
|
const base = createDefaultLocalIngestAdapters(project, {
|
||||||
...options,
|
...options,
|
||||||
databaseIntrospection: ktxCliDaemonDatabaseIntrospectionOptions(options),
|
databaseIntrospection: ktxCliDaemonDatabaseIntrospectionOptions(options),
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import type { KtxProgressPort } from './context/scan/types.js';
|
||||||
import type { KtxCliIo } from './index.js';
|
import type { KtxCliIo } from './index.js';
|
||||||
import type { KtxIngestArgs, KtxIngestDeps, KtxIngestProgressUpdate } from './ingest.js';
|
import type { KtxIngestArgs, KtxIngestDeps, KtxIngestProgressUpdate } from './ingest.js';
|
||||||
import { isDatabaseDriver, normalizeConnectionDriver } from './connection-drivers.js';
|
import { isDatabaseDriver, normalizeConnectionDriver } from './connection-drivers.js';
|
||||||
|
import { resolveQueryHistoryScopeFloor } from './context/ingest/adapters/historic-sql/scope-floor.js';
|
||||||
import {
|
import {
|
||||||
ensureManagedPythonCommandRuntime,
|
ensureManagedPythonCommandRuntime,
|
||||||
type KtxManagedPythonInstallPolicy,
|
type KtxManagedPythonInstallPolicy,
|
||||||
|
|
@ -19,6 +20,7 @@ import {
|
||||||
import { createAggregateProgressPort } from './progress-port-adapter.js';
|
import { createAggregateProgressPort } from './progress-port-adapter.js';
|
||||||
import { resolvePublicIngestRuntimeRequirements } from './runtime-requirements.js';
|
import { resolvePublicIngestRuntimeRequirements } from './runtime-requirements.js';
|
||||||
import type { KtxScanArgs, KtxScanDeps } from './scan.js';
|
import type { KtxScanArgs, KtxScanDeps } from './scan.js';
|
||||||
|
import type { KtxTableRef } from './context/scan/types.js';
|
||||||
import { profileMark } from './startup-profile.js';
|
import { profileMark } from './startup-profile.js';
|
||||||
import { isDemoConnection } from './telemetry/demo-detect.js';
|
import { isDemoConnection } from './telemetry/demo-detect.js';
|
||||||
import { emitProjectStackSnapshot, emitTelemetryEvent } from './telemetry/index.js';
|
import { emitProjectStackSnapshot, emitTelemetryEvent } from './telemetry/index.js';
|
||||||
|
|
@ -281,26 +283,35 @@ function positiveInteger(value: unknown): number | undefined {
|
||||||
return typeof value === 'number' && Number.isInteger(value) && value > 0 ? value : undefined;
|
return typeof value === 'number' && Number.isInteger(value) && value > 0 ? value : undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
function enabledTablesForConnection(connection: KtxProjectConnectionConfig): string[] | undefined {
|
/** @internal */
|
||||||
const raw = connection.enabled_tables;
|
export function queryHistoryPullConfig(input: {
|
||||||
if (!Array.isArray(raw)) {
|
|
||||||
return undefined;
|
|
||||||
}
|
|
||||||
const tables = raw.filter((value): value is string => typeof value === 'string' && value.trim().length > 0);
|
|
||||||
return tables.length > 0 ? tables : undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
function queryHistoryPullConfig(input: {
|
|
||||||
stored: Record<string, unknown>;
|
stored: Record<string, unknown>;
|
||||||
dialect: HistoricSqlDialect;
|
dialect: HistoricSqlDialect;
|
||||||
windowDays?: number;
|
windowDays?: number;
|
||||||
enabledTables?: string[];
|
enabledTables?: KtxTableRef[];
|
||||||
|
enabledSchemas?: string[];
|
||||||
|
modeledTableCatalog?: KtxTableRef[];
|
||||||
|
scopeFloorWarnings?: string[];
|
||||||
}): Record<string, unknown> {
|
}): Record<string, unknown> {
|
||||||
const { enabled: _enabled, dialect: _dialect, ...storedConfig } = input.stored;
|
const {
|
||||||
|
enabled: _enabled,
|
||||||
|
dialect: _dialect,
|
||||||
|
enabledTables: _enabledTables,
|
||||||
|
enabledSchemas: _enabledSchemas,
|
||||||
|
scopeFloorWarnings: _scopeFloorWarnings,
|
||||||
|
...storedConfig
|
||||||
|
} = input.stored;
|
||||||
return {
|
return {
|
||||||
...storedConfig,
|
...storedConfig,
|
||||||
dialect: input.dialect,
|
dialect: input.dialect,
|
||||||
...(input.enabledTables ? { enabledTables: input.enabledTables } : {}),
|
...(input.enabledTables && input.enabledTables.length > 0 ? { enabledTables: input.enabledTables } : {}),
|
||||||
|
...(input.enabledSchemas && input.enabledSchemas.length > 0 ? { enabledSchemas: input.enabledSchemas } : {}),
|
||||||
|
...(input.modeledTableCatalog && input.modeledTableCatalog.length > 0
|
||||||
|
? { modeledTableCatalog: input.modeledTableCatalog }
|
||||||
|
: {}),
|
||||||
|
...(input.scopeFloorWarnings && input.scopeFloorWarnings.length > 0
|
||||||
|
? { scopeFloorWarnings: input.scopeFloorWarnings }
|
||||||
|
: {}),
|
||||||
...(input.windowDays !== undefined ? { windowDays: input.windowDays } : {}),
|
...(input.windowDays !== undefined ? { windowDays: input.windowDays } : {}),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
@ -361,7 +372,6 @@ function resolveDatabaseTargetOptions(input: {
|
||||||
stored: storedQh,
|
stored: storedQh,
|
||||||
dialect,
|
dialect,
|
||||||
windowDays: queryHistory.windowDays,
|
windowDays: queryHistory.windowDays,
|
||||||
enabledTables: enabledTablesForConnection(input.connection),
|
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
steps: ['database-schema', 'query-history'],
|
steps: ['database-schema', 'query-history'],
|
||||||
|
|
@ -374,6 +384,43 @@ function resolveDatabaseTargetOptions(input: {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function resolvedQueryHistoryPullConfigForTarget(
|
||||||
|
target: KtxPublicIngestPlanTarget,
|
||||||
|
project: KtxPublicIngestProject,
|
||||||
|
): Promise<Record<string, unknown> | null> {
|
||||||
|
if (target.operation !== 'database-ingest' || target.queryHistory?.enabled !== true || !target.queryHistory.dialect) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const connection = project.config.connections[target.connectionId];
|
||||||
|
if (!connection) {
|
||||||
|
return (
|
||||||
|
target.queryHistory.pullConfig ??
|
||||||
|
queryHistoryPullConfig({
|
||||||
|
stored: {},
|
||||||
|
dialect: target.queryHistory.dialect,
|
||||||
|
windowDays: target.queryHistory.windowDays,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const stored = storedQueryHistory(connection);
|
||||||
|
const scopeFloor = await resolveQueryHistoryScopeFloor({
|
||||||
|
projectDir: project.projectDir,
|
||||||
|
connectionId: target.connectionId,
|
||||||
|
driver: target.driver,
|
||||||
|
connection: connection as Record<string, unknown>,
|
||||||
|
storedQueryHistory: stored,
|
||||||
|
});
|
||||||
|
return queryHistoryPullConfig({
|
||||||
|
stored,
|
||||||
|
dialect: target.queryHistory.dialect,
|
||||||
|
windowDays: target.queryHistory.windowDays,
|
||||||
|
enabledTables: scopeFloor.enabledTables,
|
||||||
|
enabledSchemas: scopeFloor.enabledSchemas,
|
||||||
|
modeledTableCatalog: scopeFloor.modeledTableCatalog,
|
||||||
|
scopeFloorWarnings: scopeFloor.warnings,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
function enrichmentReadinessGaps(config: KtxProjectConfig): string[] {
|
function enrichmentReadinessGaps(config: KtxProjectConfig): string[] {
|
||||||
const gaps: string[] = [];
|
const gaps: string[] = [];
|
||||||
if (config.llm.provider.backend === 'none' || !config.llm.models.default) {
|
if (config.llm.provider.backend === 'none' || !config.llm.models.default) {
|
||||||
|
|
@ -877,7 +924,7 @@ export async function executePublicIngestTarget(
|
||||||
project: KtxPublicIngestProject,
|
project: KtxPublicIngestProject,
|
||||||
): Promise<KtxPublicIngestTargetResult> {
|
): Promise<KtxPublicIngestTargetResult> {
|
||||||
const startedAt = performance.now();
|
const startedAt = performance.now();
|
||||||
const result = await runIngestTargetSteps(target, args, io, deps);
|
const result = await runIngestTargetSteps(target, args, io, deps, project);
|
||||||
// `io` may be a capture buffer for the scan/ingest step output; the telemetry
|
// `io` may be a capture buffer for the scan/ingest step output; the telemetry
|
||||||
// debug echo belongs on the real user-facing stream, which callers expose as
|
// debug echo belongs on the real user-facing stream, which callers expose as
|
||||||
// `deps.runtimeIo` (falling back to `io` when the step io is already real).
|
// `deps.runtimeIo` (falling back to `io` when the step io is already real).
|
||||||
|
|
@ -890,6 +937,7 @@ async function runIngestTargetSteps(
|
||||||
args: Extract<KtxPublicIngestArgs, { command: 'run' }>,
|
args: Extract<KtxPublicIngestArgs, { command: 'run' }>,
|
||||||
io: KtxCliIo,
|
io: KtxCliIo,
|
||||||
deps: KtxPublicIngestDeps,
|
deps: KtxPublicIngestDeps,
|
||||||
|
project: KtxPublicIngestProject,
|
||||||
): Promise<KtxPublicIngestTargetResult> {
|
): Promise<KtxPublicIngestTargetResult> {
|
||||||
if (target.preflightFailure) {
|
if (target.preflightFailure) {
|
||||||
if (target.operation === 'database-ingest') {
|
if (target.operation === 'database-ingest') {
|
||||||
|
|
@ -959,6 +1007,11 @@ async function runIngestTargetSteps(
|
||||||
if (target.queryHistory?.enabled === true) {
|
if (target.queryHistory?.enabled === true) {
|
||||||
const { runKtxIngest } = await import('./ingest.js');
|
const { runKtxIngest } = await import('./ingest.js');
|
||||||
const runIngest = deps.runIngest ?? runKtxIngest;
|
const runIngest = deps.runIngest ?? runKtxIngest;
|
||||||
|
const historicSqlPullConfigOverride =
|
||||||
|
(await resolvedQueryHistoryPullConfigForTarget(target, project)) ?? {
|
||||||
|
dialect: target.queryHistory.dialect,
|
||||||
|
...(target.queryHistory.windowDays !== undefined ? { windowDays: target.queryHistory.windowDays } : {}),
|
||||||
|
};
|
||||||
const ingestArgs: KtxIngestArgs = {
|
const ingestArgs: KtxIngestArgs = {
|
||||||
command: 'run',
|
command: 'run',
|
||||||
projectDir: args.projectDir,
|
projectDir: args.projectDir,
|
||||||
|
|
@ -969,11 +1022,7 @@ async function runIngestTargetSteps(
|
||||||
...(args.cliVersion ? { cliVersion: args.cliVersion } : {}),
|
...(args.cliVersion ? { cliVersion: args.cliVersion } : {}),
|
||||||
...(args.runtimeInstallPolicy ? { runtimeInstallPolicy: args.runtimeInstallPolicy } : {}),
|
...(args.runtimeInstallPolicy ? { runtimeInstallPolicy: args.runtimeInstallPolicy } : {}),
|
||||||
allowImplicitAdapter: true,
|
allowImplicitAdapter: true,
|
||||||
historicSqlPullConfigOverride:
|
historicSqlPullConfigOverride,
|
||||||
target.queryHistory.pullConfig ?? {
|
|
||||||
dialect: target.queryHistory.dialect,
|
|
||||||
...(target.queryHistory.windowDays !== undefined ? { windowDays: target.queryHistory.windowDays } : {}),
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
// Query history runs after the schema scan has already written its report
|
// Query history runs after the schema scan has already written its report
|
||||||
// into the shared target io, so it needs a phase-local capture. Reusing
|
// into the shared target io, so it needs a phase-local capture. Reusing
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,15 @@ import { delimiter, dirname, join } from 'node:path';
|
||||||
import { fileURLToPath } from 'node:url';
|
import { fileURLToPath } from 'node:url';
|
||||||
import { promisify } from 'node:util';
|
import { promisify } from 'node:util';
|
||||||
import { getDriverRegistration } from './context/connections/drivers.js';
|
import { getDriverRegistration } from './context/connections/drivers.js';
|
||||||
|
import { createLocalKtxLlmRuntimeFromConfig } from './context/llm/local-config.js';
|
||||||
|
import type { KtxLlmRuntimePort } from './context/llm/runtime-port.js';
|
||||||
import { queryHistoryDialectForConnection } from './context/ingest/adapters/historic-sql/connection-dialect.js';
|
import { queryHistoryDialectForConnection } from './context/ingest/adapters/historic-sql/connection-dialect.js';
|
||||||
|
import {
|
||||||
|
proposeQueryHistoryServiceAccountFilters,
|
||||||
|
type ProposeQueryHistoryServiceAccountFiltersInput,
|
||||||
|
type QueryHistoryFilterProposal,
|
||||||
|
} from './context/ingest/adapters/historic-sql/query-history-filter-picker.js';
|
||||||
|
import { resolveQueryHistoryScopeFloor } from './context/ingest/adapters/historic-sql/scope-floor.js';
|
||||||
import type { HistoricSqlDialect } from './context/ingest/adapters/historic-sql/types.js';
|
import type { HistoricSqlDialect } from './context/ingest/adapters/historic-sql/types.js';
|
||||||
import {
|
import {
|
||||||
runHistoricSqlReadinessProbe,
|
runHistoricSqlReadinessProbe,
|
||||||
|
|
@ -15,7 +23,7 @@ import { type KtxProjectConnectionConfig, serializeKtxProjectConfig } from './co
|
||||||
import { loadKtxProject } from './context/project/project.js';
|
import { loadKtxProject } from './context/project/project.js';
|
||||||
import { markKtxSetupStateStepComplete, setKtxSetupDatabaseConnectionIds } from './context/project/setup-config.js';
|
import { markKtxSetupStateStepComplete, setKtxSetupDatabaseConnectionIds } from './context/project/setup-config.js';
|
||||||
import type { KtxTableListEntry } from './context/scan/types.js';
|
import type { KtxTableListEntry } from './context/scan/types.js';
|
||||||
import type { KtxCliIo } from './cli-runtime.js';
|
import { getKtxCliPackageInfo, type KtxCliIo } from './cli-runtime.js';
|
||||||
import {
|
import {
|
||||||
errorMessage,
|
errorMessage,
|
||||||
flushPrefixedBufferedCommandOutput,
|
flushPrefixedBufferedCommandOutput,
|
||||||
|
|
@ -35,6 +43,10 @@ import {
|
||||||
type PickDatabaseScopeArgs,
|
type PickDatabaseScopeArgs,
|
||||||
} from './database-tree-picker.js';
|
} from './database-tree-picker.js';
|
||||||
import { withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js';
|
import { withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js';
|
||||||
|
import { createKtxCliHistoricSqlRuntime } from './local-adapters.js';
|
||||||
|
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
|
||||||
|
import type { ManagedPythonCoreDaemonOptions } from './managed-python-http.js';
|
||||||
|
import { queryHistoryPullConfig } from './public-ingest.js';
|
||||||
import { runKtxScan } from './scan.js';
|
import { runKtxScan } from './scan.js';
|
||||||
import { writeProjectLocalSecretReference } from './setup-secrets.js';
|
import { writeProjectLocalSecretReference } from './setup-secrets.js';
|
||||||
import { isDemoConnection } from './telemetry/demo-detect.js';
|
import { isDemoConnection } from './telemetry/demo-detect.js';
|
||||||
|
|
@ -61,6 +73,9 @@ export type KtxSetupDatabaseDriver =
|
||||||
export interface KtxSetupDatabasesArgs {
|
export interface KtxSetupDatabasesArgs {
|
||||||
projectDir: string;
|
projectDir: string;
|
||||||
inputMode: 'auto' | 'disabled';
|
inputMode: 'auto' | 'disabled';
|
||||||
|
yes?: boolean;
|
||||||
|
cliVersion?: string;
|
||||||
|
runtimeInstallPolicy?: KtxManagedPythonInstallPolicy;
|
||||||
databaseDrivers?: KtxSetupDatabaseDriver[];
|
databaseDrivers?: KtxSetupDatabaseDriver[];
|
||||||
databaseConnectionIds?: string[];
|
databaseConnectionIds?: string[];
|
||||||
databaseConnectionId?: string;
|
databaseConnectionId?: string;
|
||||||
|
|
@ -123,6 +138,13 @@ export interface KtxSetupDatabasesDeps {
|
||||||
listTables?: (projectDir: string, connectionId: string, schemas?: string[]) => Promise<KtxTableListEntry[]>;
|
listTables?: (projectDir: string, connectionId: string, schemas?: string[]) => Promise<KtxTableListEntry[]>;
|
||||||
pickDatabaseScope?: (args: PickDatabaseScopeArgs, io: KtxCliIo) => Promise<DatabaseScopePickResult>;
|
pickDatabaseScope?: (args: PickDatabaseScopeArgs, io: KtxCliIo) => Promise<DatabaseScopePickResult>;
|
||||||
historicSqlReadinessProbe?: HistoricSqlReadinessProbe;
|
historicSqlReadinessProbe?: HistoricSqlReadinessProbe;
|
||||||
|
queryHistoryFilterPicker?: (
|
||||||
|
input: ProposeQueryHistoryServiceAccountFiltersInput,
|
||||||
|
) => Promise<QueryHistoryFilterProposal>;
|
||||||
|
createQueryHistoryLlmRuntime?: (
|
||||||
|
projectDir: string,
|
||||||
|
project: Awaited<ReturnType<typeof loadKtxProject>>,
|
||||||
|
) => KtxLlmRuntimePort | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
const DRIVER_OPTIONS: Array<{ value: KtxSetupDatabaseDriver; label: string }> = [
|
const DRIVER_OPTIONS: Array<{ value: KtxSetupDatabaseDriver; label: string }> = [
|
||||||
|
|
@ -947,10 +969,14 @@ async function maybeApplyHistoricSqlConfig(input: {
|
||||||
return withQueryHistoryConfig(input.connection, { ...existing, enabled: false });
|
return withQueryHistoryConfig(input.connection, { ...existing, enabled: false });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const existingFilters =
|
||||||
|
existing.filters && typeof existing.filters === 'object' && !Array.isArray(existing.filters)
|
||||||
|
? (existing.filters as Record<string, unknown>)
|
||||||
|
: {};
|
||||||
const common: Record<string, unknown> = {
|
const common: Record<string, unknown> = {
|
||||||
...existing,
|
...existing,
|
||||||
enabled: true,
|
enabled: true,
|
||||||
filters: historicSqlFiltersForSetup(input.args.queryHistoryServiceAccountPatterns),
|
filters: historicSqlFiltersForSetup(input.args.queryHistoryServiceAccountPatterns, existingFilters),
|
||||||
};
|
};
|
||||||
|
|
||||||
if (dialect === 'postgres') {
|
if (dialect === 'postgres') {
|
||||||
|
|
@ -967,9 +993,13 @@ async function maybeApplyHistoricSqlConfig(input: {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function historicSqlFiltersForSetup(patterns: string[] | undefined) {
|
function historicSqlFiltersForSetup(
|
||||||
|
patterns: string[] | undefined,
|
||||||
|
existingFilters: Record<string, unknown> = {},
|
||||||
|
) {
|
||||||
const serviceAccountPatterns = patterns ?? [];
|
const serviceAccountPatterns = patterns ?? [];
|
||||||
return {
|
return {
|
||||||
|
...existingFilters,
|
||||||
dropTrivialProbes: true,
|
dropTrivialProbes: true,
|
||||||
...(serviceAccountPatterns.length > 0
|
...(serviceAccountPatterns.length > 0
|
||||||
? {
|
? {
|
||||||
|
|
@ -1587,6 +1617,189 @@ async function maybeRunHistoricSqlSetupProbe(input: {
|
||||||
return result.ok;
|
return result.ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function hasServiceAccountsBlock(connection: KtxProjectConnectionConfig | undefined): boolean {
|
||||||
|
const queryHistory = queryHistoryConfigRecord(connection);
|
||||||
|
const filters = queryHistory?.filters;
|
||||||
|
if (!filters || typeof filters !== 'object' || Array.isArray(filters)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return 'serviceAccounts' in filters;
|
||||||
|
}
|
||||||
|
|
||||||
|
function printQueryHistoryFilterProposal(io: KtxCliIo, proposal: QueryHistoryFilterProposal): void {
|
||||||
|
if (proposal.excludedRoles.length === 0) {
|
||||||
|
if (proposal.skipped?.reason === 'no-llm') {
|
||||||
|
io.stdout.write('│ Query-history filter picker skipped: no LLM is configured.\n');
|
||||||
|
} else if (proposal.skipped?.reason === 'no-daemon') {
|
||||||
|
io.stdout.write('│ Query-history filter picker skipped: SQL analysis is unavailable.\n');
|
||||||
|
} else if (proposal.skipped?.reason === 'no-in-scope-history') {
|
||||||
|
io.stdout.write('│ Query-history filter picker found no in-scope service-account exclusions.\n');
|
||||||
|
}
|
||||||
|
for (const warning of proposal.warnings) {
|
||||||
|
io.stdout.write(`│ ! ${warning}\n`);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
io.stdout.write('│ Proposed query-history service-account filters:\n');
|
||||||
|
for (const excluded of proposal.excludedRoles) {
|
||||||
|
io.stdout.write(`│ - ${excluded.role}: ${excluded.reason}\n`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function shouldApplyQueryHistoryFilterProposal(input: {
|
||||||
|
args: KtxSetupDatabasesArgs;
|
||||||
|
prompts: KtxSetupDatabasesPromptAdapter;
|
||||||
|
proposal: QueryHistoryFilterProposal;
|
||||||
|
}): Promise<boolean> {
|
||||||
|
if (input.proposal.excludedRoles.length === 0 || input.proposal.skipped?.reason === 'user-block-present') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (input.args.yes === true || input.args.inputMode === 'disabled') {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
const choice = await input.prompts.select({
|
||||||
|
message: `Apply ${input.proposal.excludedRoles.length} derived query-history service-account exclusion${
|
||||||
|
input.proposal.excludedRoles.length === 1 ? '' : 's'
|
||||||
|
}?`,
|
||||||
|
options: [
|
||||||
|
{ value: 'apply', label: 'Apply derived filters (recommended)' },
|
||||||
|
{ value: 'skip', label: 'Leave query history filters unchanged' },
|
||||||
|
],
|
||||||
|
});
|
||||||
|
return choice === 'apply';
|
||||||
|
}
|
||||||
|
|
||||||
|
function createSetupQueryHistoryLlmRuntime(input: {
|
||||||
|
projectDir: string;
|
||||||
|
project: Awaited<ReturnType<typeof loadKtxProject>>;
|
||||||
|
deps: KtxSetupDatabasesDeps;
|
||||||
|
}): KtxLlmRuntimePort | null {
|
||||||
|
try {
|
||||||
|
return (
|
||||||
|
input.deps.createQueryHistoryLlmRuntime?.(input.projectDir, input.project) ??
|
||||||
|
createLocalKtxLlmRuntimeFromConfig(input.project.config.llm, {
|
||||||
|
projectDir: input.projectDir,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @internal */
|
||||||
|
export function managedDaemonOptionsForSetupQueryHistoryPicker(input: {
|
||||||
|
projectDir: string;
|
||||||
|
args: Pick<KtxSetupDatabasesArgs, 'cliVersion' | 'runtimeInstallPolicy' | 'inputMode'>;
|
||||||
|
io: KtxCliIo;
|
||||||
|
}): ManagedPythonCoreDaemonOptions {
|
||||||
|
return {
|
||||||
|
cliVersion: input.args.cliVersion ?? getKtxCliPackageInfo().version,
|
||||||
|
projectDir: input.projectDir,
|
||||||
|
installPolicy: input.args.runtimeInstallPolicy ?? (input.args.inputMode === 'disabled' ? 'never' : 'prompt'),
|
||||||
|
io: input.io,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function maybeProposeQueryHistoryFilters(input: {
|
||||||
|
projectDir: string;
|
||||||
|
connectionId: string;
|
||||||
|
io: KtxCliIo;
|
||||||
|
deps: KtxSetupDatabasesDeps;
|
||||||
|
args: KtxSetupDatabasesArgs;
|
||||||
|
prompts: KtxSetupDatabasesPromptAdapter;
|
||||||
|
}): Promise<void> {
|
||||||
|
const project = await loadKtxProject({ projectDir: input.projectDir });
|
||||||
|
const connection = project.config.connections[input.connectionId];
|
||||||
|
const queryHistory = queryHistoryConfigRecord(connection);
|
||||||
|
if (!connection || queryHistory?.enabled !== true) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const dialect = queryHistoryDialectForConnection(connection);
|
||||||
|
if (!dialect) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const picker = input.deps.queryHistoryFilterPicker ?? proposeQueryHistoryServiceAccountFilters;
|
||||||
|
const llmRuntime = createSetupQueryHistoryLlmRuntime({
|
||||||
|
projectDir: input.projectDir,
|
||||||
|
project,
|
||||||
|
deps: input.deps,
|
||||||
|
});
|
||||||
|
if (!llmRuntime && !input.deps.queryHistoryFilterPicker) {
|
||||||
|
printQueryHistoryFilterProposal(input.io, {
|
||||||
|
excludedRoles: [],
|
||||||
|
consideredRoleCount: 0,
|
||||||
|
skipped: { reason: 'no-llm' },
|
||||||
|
warnings: [],
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const runtime = createKtxCliHistoricSqlRuntime(project, input.connectionId, {
|
||||||
|
managedDaemon: managedDaemonOptionsForSetupQueryHistoryPicker({
|
||||||
|
projectDir: input.projectDir,
|
||||||
|
args: input.args,
|
||||||
|
io: input.io,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
if (!runtime) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const userServiceAccountsPresent = hasServiceAccountsBlock(connection);
|
||||||
|
const scopeFloor = await resolveQueryHistoryScopeFloor({
|
||||||
|
projectDir: input.projectDir,
|
||||||
|
connectionId: input.connectionId,
|
||||||
|
driver: String(connection.driver ?? ''),
|
||||||
|
connection: connection as Record<string, unknown>,
|
||||||
|
storedQueryHistory: queryHistory,
|
||||||
|
});
|
||||||
|
const pullConfig = queryHistoryPullConfig({
|
||||||
|
stored: queryHistory,
|
||||||
|
dialect,
|
||||||
|
enabledTables: scopeFloor.enabledTables,
|
||||||
|
enabledSchemas: scopeFloor.enabledSchemas,
|
||||||
|
modeledTableCatalog: scopeFloor.modeledTableCatalog,
|
||||||
|
scopeFloorWarnings: scopeFloor.warnings,
|
||||||
|
});
|
||||||
|
const proposal = await picker({
|
||||||
|
connectionId: input.connectionId,
|
||||||
|
dialect,
|
||||||
|
queryClient: runtime.queryClient,
|
||||||
|
reader: runtime.reader,
|
||||||
|
sqlAnalysis: runtime.sqlAnalysis,
|
||||||
|
llmRuntime,
|
||||||
|
pullConfig,
|
||||||
|
userServiceAccountsPresent,
|
||||||
|
});
|
||||||
|
|
||||||
|
printQueryHistoryFilterProposal(input.io, proposal);
|
||||||
|
if (proposal.skipped?.reason === 'user-block-present') {
|
||||||
|
input.io.stdout.write('│ Existing query-history service-account filters left unchanged.\n');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!(await shouldApplyQueryHistoryFilterProposal({ args: input.args, prompts: input.prompts, proposal }))) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await writeConnectionConfig({
|
||||||
|
projectDir: input.projectDir,
|
||||||
|
connectionId: input.connectionId,
|
||||||
|
connection: withQueryHistoryConfig(connection, {
|
||||||
|
...queryHistory,
|
||||||
|
filters: {
|
||||||
|
...(queryHistory.filters && typeof queryHistory.filters === 'object' && !Array.isArray(queryHistory.filters)
|
||||||
|
? queryHistory.filters
|
||||||
|
: {}),
|
||||||
|
serviceAccounts: {
|
||||||
|
mode: 'exclude',
|
||||||
|
patterns: proposal.excludedRoles.map((role) => role.pattern),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
async function applyHistoricSqlConfigToExistingConnection(input: {
|
async function applyHistoricSqlConfigToExistingConnection(input: {
|
||||||
projectDir: string;
|
projectDir: string;
|
||||||
connectionId: string;
|
connectionId: string;
|
||||||
|
|
@ -1725,6 +1938,16 @@ async function validateAndScanConnection(input: {
|
||||||
`Schema context complete for ${input.connectionId}`,
|
`Schema context complete for ${input.connectionId}`,
|
||||||
[`Changes: ${summarizeScanChanges(scanOutput)}`],
|
[`Changes: ${summarizeScanChanges(scanOutput)}`],
|
||||||
);
|
);
|
||||||
|
if (queryHistoryAvailable) {
|
||||||
|
await maybeProposeQueryHistoryFilters({
|
||||||
|
projectDir: input.projectDir,
|
||||||
|
connectionId: input.connectionId,
|
||||||
|
io: input.io,
|
||||||
|
deps: input.deps,
|
||||||
|
args: input.args,
|
||||||
|
prompts: input.prompts,
|
||||||
|
});
|
||||||
|
}
|
||||||
writeSetupSection(input.io, 'Database ready', [
|
writeSetupSection(input.io, 'Database ready', [
|
||||||
`${input.connectionId} · ${driverDisplay} · schema context complete`,
|
`${input.connectionId} · ${driverDisplay} · schema context complete`,
|
||||||
]);
|
]);
|
||||||
|
|
|
||||||
|
|
@ -735,6 +735,9 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
|
||||||
{
|
{
|
||||||
projectDir: projectResult.projectDir,
|
projectDir: projectResult.projectDir,
|
||||||
inputMode: args.inputMode,
|
inputMode: args.inputMode,
|
||||||
|
yes: args.yes,
|
||||||
|
cliVersion: args.cliVersion,
|
||||||
|
runtimeInstallPolicy: setupRuntimeInstallPolicy(args),
|
||||||
...(args.databaseDrivers ? { databaseDrivers: args.databaseDrivers } : {}),
|
...(args.databaseDrivers ? { databaseDrivers: args.databaseDrivers } : {}),
|
||||||
...(args.databaseConnectionIds ? { databaseConnectionIds: args.databaseConnectionIds } : {}),
|
...(args.databaseConnectionIds ? { databaseConnectionIds: args.databaseConnectionIds } : {}),
|
||||||
...(args.databaseConnectionId ? { databaseConnectionId: args.databaseConnectionId } : {}),
|
...(args.databaseConnectionId ? { databaseConnectionId: args.databaseConnectionId } : {}),
|
||||||
|
|
|
||||||
|
|
@ -91,7 +91,10 @@ describe('BigQueryHistoricSqlQueryHistoryReader', () => {
|
||||||
40,
|
40,
|
||||||
0.05,
|
0.05,
|
||||||
null,
|
null,
|
||||||
JSON.stringify([{ user: 'analyst@example.test', executions: 1 }]),
|
JSON.stringify([
|
||||||
|
{ user: 'svc-loader@example.test', executions: 40 },
|
||||||
|
{ user: 'analyst@example.test', executions: 2 },
|
||||||
|
]),
|
||||||
],
|
],
|
||||||
],
|
],
|
||||||
totalRows: 1,
|
totalRows: 1,
|
||||||
|
|
@ -103,15 +106,25 @@ describe('BigQueryHistoricSqlQueryHistoryReader', () => {
|
||||||
for await (const row of reader.fetchAggregated(
|
for await (const row of reader.fetchAggregated(
|
||||||
client,
|
client,
|
||||||
{ start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') },
|
{ start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') },
|
||||||
{ dialect: 'bigquery', minExecutions: 5, windowDays: 90, enabledTables: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
|
{ dialect: 'bigquery', minExecutions: 5, windowDays: 90, enabledTables: [], enabledSchemas: [], modeledTableCatalog: [], scopeFloorWarnings: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
|
||||||
)) {
|
)) {
|
||||||
rows.push(row);
|
rows.push(row);
|
||||||
}
|
}
|
||||||
|
|
||||||
const sql = firstQuery(client);
|
const sql = firstQuery(client);
|
||||||
|
expect(sql).toContain('WITH filtered_jobs AS');
|
||||||
|
expect(sql).toContain('query_info.query_hashes.normalized_literals');
|
||||||
|
expect(sql).toContain('TO_HEX(SHA256(query))');
|
||||||
|
expect(sql).toContain('AS template_id');
|
||||||
|
expect(sql).toContain('template_stats AS');
|
||||||
|
expect(sql).toContain('template_users AS');
|
||||||
expect(sql).toContain('COUNT(*) AS executions');
|
expect(sql).toContain('COUNT(*) AS executions');
|
||||||
expect(sql).toContain('COUNT(DISTINCT user_email) AS distinct_users');
|
expect(sql).toContain('COUNT(DISTINCT user_email) AS distinct_users');
|
||||||
expect(sql).toContain('GROUP BY query_hash');
|
expect(sql).toContain('GROUP BY template_id');
|
||||||
|
expect(sql).toContain('GROUP BY template_id, user_email');
|
||||||
|
expect(sql).toContain('ORDER BY users.executions DESC');
|
||||||
|
expect(sql).not.toMatch(/\bquery_hash\b/);
|
||||||
|
expect(sql).not.toContain('LIMIT 5');
|
||||||
expect(sql).toContain('HAVING COUNT(*) >= 5');
|
expect(sql).toContain('HAVING COUNT(*) >= 5');
|
||||||
expect(rows).toMatchObject([
|
expect(rows).toMatchObject([
|
||||||
{
|
{
|
||||||
|
|
@ -120,7 +133,10 @@ describe('BigQueryHistoricSqlQueryHistoryReader', () => {
|
||||||
executions: 42,
|
executions: 42,
|
||||||
errorRate: 0.05,
|
errorRate: 0.05,
|
||||||
},
|
},
|
||||||
topUsers: [{ user: 'analyst@example.test', executions: 1 }],
|
topUsers: [
|
||||||
|
{ user: 'svc-loader@example.test', executions: 40 },
|
||||||
|
{ user: 'analyst@example.test', executions: 2 },
|
||||||
|
],
|
||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
@ -137,6 +153,9 @@ describe('BigQueryHistoricSqlQueryHistoryReader', () => {
|
||||||
minExecutions: 5,
|
minExecutions: 5,
|
||||||
windowDays: 90,
|
windowDays: 90,
|
||||||
enabledTables: [],
|
enabledTables: [],
|
||||||
|
enabledSchemas: [],
|
||||||
|
modeledTableCatalog: [],
|
||||||
|
scopeFloorWarnings: [],
|
||||||
filters: { dropTrivialProbes: true },
|
filters: { dropTrivialProbes: true },
|
||||||
redactionPatterns: [],
|
redactionPatterns: [],
|
||||||
staleArchiveAfterDays: 90,
|
staleArchiveAfterDays: 90,
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ async function writeUnifiedStagedDir(root: string): Promise<void> {
|
||||||
});
|
});
|
||||||
await writeJson(root, 'tables/public.orders.json', {
|
await writeJson(root, 'tables/public.orders.json', {
|
||||||
table: 'public.orders',
|
table: 'public.orders',
|
||||||
|
tableRef: { catalog: null, db: 'public', name: 'orders' },
|
||||||
stats: {
|
stats: {
|
||||||
executionsBucket: '10-100',
|
executionsBucket: '10-100',
|
||||||
distinctUsersBucket: '2-5',
|
distinctUsersBucket: '2-5',
|
||||||
|
|
@ -46,7 +47,10 @@ async function writeUnifiedStagedDir(root: string): Promise<void> {
|
||||||
{
|
{
|
||||||
id: 'orders',
|
id: 'orders',
|
||||||
canonicalSql: 'select * from public.orders join public.customers on true',
|
canonicalSql: 'select * from public.orders join public.customers on true',
|
||||||
tablesTouched: ['public.orders', 'public.customers'],
|
tablesTouched: [
|
||||||
|
{ catalog: null, db: 'public', name: 'orders' },
|
||||||
|
{ catalog: null, db: 'public', name: 'customers' },
|
||||||
|
],
|
||||||
executionsBucket: '10-100',
|
executionsBucket: '10-100',
|
||||||
distinctUsersBucket: '2-5',
|
distinctUsersBucket: '2-5',
|
||||||
dialect: 'postgres',
|
dialect: 'postgres',
|
||||||
|
|
@ -58,7 +62,10 @@ async function writeUnifiedStagedDir(root: string): Promise<void> {
|
||||||
{
|
{
|
||||||
id: 'orders',
|
id: 'orders',
|
||||||
canonicalSql: 'select * from public.orders join public.customers on true',
|
canonicalSql: 'select * from public.orders join public.customers on true',
|
||||||
tablesTouched: ['public.orders', 'public.customers'],
|
tablesTouched: [
|
||||||
|
{ catalog: null, db: 'public', name: 'orders' },
|
||||||
|
{ catalog: null, db: 'public', name: 'customers' },
|
||||||
|
],
|
||||||
executionsBucket: '10-100',
|
executionsBucket: '10-100',
|
||||||
distinctUsersBucket: '2-5',
|
distinctUsersBucket: '2-5',
|
||||||
dialect: 'postgres',
|
dialect: 'postgres',
|
||||||
|
|
@ -155,7 +162,10 @@ describe('chunkHistoricSqlUnifiedStagedDir', () => {
|
||||||
{
|
{
|
||||||
id: 'line-items',
|
id: 'line-items',
|
||||||
canonicalSql: 'select * from public.orders join public.line_items on true',
|
canonicalSql: 'select * from public.orders join public.line_items on true',
|
||||||
tablesTouched: ['public.orders', 'public.line_items'],
|
tablesTouched: [
|
||||||
|
{ catalog: null, db: 'public', name: 'orders' },
|
||||||
|
{ catalog: null, db: 'public', name: 'line_items' },
|
||||||
|
],
|
||||||
executionsBucket: '10-100',
|
executionsBucket: '10-100',
|
||||||
distinctUsersBucket: '2-5',
|
distinctUsersBucket: '2-5',
|
||||||
dialect: 'postgres',
|
dialect: 'postgres',
|
||||||
|
|
|
||||||
|
|
@ -76,7 +76,10 @@ describe('HistoricSqlSourceAdapter', () => {
|
||||||
[
|
[
|
||||||
'pg:1',
|
'pg:1',
|
||||||
{
|
{
|
||||||
tablesTouched: ['public.orders', 'public.customers'],
|
tablesTouched: [
|
||||||
|
{ catalog: null, db: 'public', name: 'orders' },
|
||||||
|
{ catalog: null, db: 'public', name: 'customers' },
|
||||||
|
],
|
||||||
columnsByClause: { select: ['status'], join: ['customer_id', 'id'], groupBy: ['status'] },
|
columnsByClause: { select: ['status'], join: ['customer_id', 'id'], groupBy: ['status'] },
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
|
|
||||||
|
|
@ -126,7 +126,10 @@ function acceptanceSqlAnalysis(): SqlAnalysisPort {
|
||||||
items.map((item) => [
|
items.map((item) => [
|
||||||
item.id,
|
item.id,
|
||||||
{
|
{
|
||||||
tablesTouched: ['public.orders', 'public.customers'],
|
tablesTouched: [
|
||||||
|
{ catalog: null, db: 'public', name: 'orders' },
|
||||||
|
{ catalog: null, db: 'public', name: 'customers' },
|
||||||
|
],
|
||||||
columnsByClause: {
|
columnsByClause: {
|
||||||
select: ['status', 'segment'],
|
select: ['status', 'segment'],
|
||||||
where: ['status'],
|
where: ['status'],
|
||||||
|
|
|
||||||
|
|
@ -9,11 +9,18 @@ import type { StagedPatternsInput } from '../../../../../src/context/ingest/adap
|
||||||
|
|
||||||
type PatternTemplate = StagedPatternsInput['templates'][number];
|
type PatternTemplate = StagedPatternsInput['templates'][number];
|
||||||
|
|
||||||
|
function tableRef(value: string): { catalog: string | null; db: string | null; name: string } {
|
||||||
|
const parts = value.split('.');
|
||||||
|
if (parts.length === 3) return { catalog: parts[0]!, db: parts[1]!, name: parts[2]! };
|
||||||
|
if (parts.length === 2) return { catalog: null, db: parts[0]!, name: parts[1]! };
|
||||||
|
return { catalog: null, db: null, name: value };
|
||||||
|
}
|
||||||
|
|
||||||
function template(id: string, tablesTouched: string[], canonicalSql = 'select 1'): PatternTemplate {
|
function template(id: string, tablesTouched: string[], canonicalSql = 'select 1'): PatternTemplate {
|
||||||
return {
|
return {
|
||||||
id,
|
id,
|
||||||
canonicalSql,
|
canonicalSql,
|
||||||
tablesTouched,
|
tablesTouched: tablesTouched.map(tableRef),
|
||||||
executionsBucket: '10-100',
|
executionsBucket: '10-100',
|
||||||
distinctUsersBucket: '2-5',
|
distinctUsersBucket: '2-5',
|
||||||
dialect: 'postgres',
|
dialect: 'postgres',
|
||||||
|
|
@ -32,7 +39,7 @@ describe('historic-SQL pattern input sharding', () => {
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
|
|
||||||
const result = splitHistoricSqlPatternInputs(input, { maxBytes: 760 });
|
const result = splitHistoricSqlPatternInputs(input, { maxBytes: 1200 });
|
||||||
|
|
||||||
expect(result.auditInput.templates.map((entry) => entry.id)).toEqual([
|
expect(result.auditInput.templates.map((entry) => entry.id)).toEqual([
|
||||||
'orders-customers-1',
|
'orders-customers-1',
|
||||||
|
|
@ -51,7 +58,7 @@ describe('historic-SQL pattern input sharding', () => {
|
||||||
'orders-customers-1',
|
'orders-customers-1',
|
||||||
'orders-customers-2',
|
'orders-customers-2',
|
||||||
]);
|
]);
|
||||||
expect(result.shards.every((shard) => shard.byteLength <= 760)).toBe(true);
|
expect(result.shards.every((shard) => shard.byteLength <= 1200)).toBe(true);
|
||||||
expect(result.shards.flatMap((shard) => shard.input.templates).some((entry) => entry.id === 'single-table-orders')).toBe(false);
|
expect(result.shards.flatMap((shard) => shard.input.templates).some((entry) => entry.id === 'single-table-orders')).toBe(false);
|
||||||
expect(result.warnings).toEqual([]);
|
expect(result.warnings).toEqual([]);
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -215,7 +215,7 @@ describe('PostgresPgssReader aggregate path', () => {
|
||||||
for await (const row of reader.fetchAggregated(
|
for await (const row of reader.fetchAggregated(
|
||||||
{ executeQuery },
|
{ executeQuery },
|
||||||
{ start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') },
|
{ start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') },
|
||||||
{ dialect: 'postgres', minExecutions: 5, enabledTables: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
|
{ dialect: 'postgres', minExecutions: 5, enabledTables: [], enabledSchemas: [], modeledTableCatalog: [], scopeFloorWarnings: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
|
||||||
)) {
|
)) {
|
||||||
rows.push(row);
|
rows.push(row);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,274 @@
|
||||||
|
import { describe, expect, it, vi } from 'vitest';
|
||||||
|
import type { KtxLlmRuntimePort } from '../../../../../src/context/llm/runtime-port.js';
|
||||||
|
import type {
|
||||||
|
SqlAnalysisBatchItem,
|
||||||
|
SqlAnalysisBatchResult,
|
||||||
|
SqlAnalysisPort,
|
||||||
|
} from '../../../../../src/context/sql-analysis/ports.js';
|
||||||
|
import {
|
||||||
|
proposeQueryHistoryServiceAccountFilters,
|
||||||
|
regexEscapeForExactRolePattern,
|
||||||
|
} from '../../../../../src/context/ingest/adapters/historic-sql/query-history-filter-picker.js';
|
||||||
|
import type {
|
||||||
|
AggregatedTemplate,
|
||||||
|
HistoricSqlReader,
|
||||||
|
} from '../../../../../src/context/ingest/adapters/historic-sql/types.js';
|
||||||
|
|
||||||
|
function aggregate(overrides: Partial<AggregatedTemplate> & { templateId: string; canonicalSql: string }): AggregatedTemplate {
|
||||||
|
return {
|
||||||
|
templateId: overrides.templateId,
|
||||||
|
canonicalSql: overrides.canonicalSql,
|
||||||
|
dialect: overrides.dialect ?? 'postgres',
|
||||||
|
stats: overrides.stats ?? {
|
||||||
|
executions: 25,
|
||||||
|
distinctUsers: 1,
|
||||||
|
firstSeen: '2026-05-01T00:00:00.000Z',
|
||||||
|
lastSeen: '2026-06-01T00:00:00.000Z',
|
||||||
|
p50RuntimeMs: 50,
|
||||||
|
p95RuntimeMs: 100,
|
||||||
|
errorRate: 0,
|
||||||
|
rowsProduced: 10,
|
||||||
|
},
|
||||||
|
topUsers: overrides.topUsers ?? [{ user: 'analyst', executions: 25 }],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function reader(...templates: AggregatedTemplate[]): HistoricSqlReader {
|
||||||
|
return {
|
||||||
|
async probe() {
|
||||||
|
return { warnings: [], info: [] };
|
||||||
|
},
|
||||||
|
async *fetchAggregated() {
|
||||||
|
for (const template of templates) {
|
||||||
|
yield template;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function sqlAnalysis(tablesById: Record<string, Array<{ catalog: string | null; db: string | null; name: string }>>): SqlAnalysisPort {
|
||||||
|
return {
|
||||||
|
analyzeForFingerprint: vi.fn(),
|
||||||
|
analyzeBatch: vi.fn(async (items: SqlAnalysisBatchItem[]): Promise<Map<string, SqlAnalysisBatchResult>> =>
|
||||||
|
new Map<string, SqlAnalysisBatchResult>(
|
||||||
|
items.map((item) => [
|
||||||
|
item.id,
|
||||||
|
{
|
||||||
|
tablesTouched: tablesById[item.id] ?? [],
|
||||||
|
columnsByClause: {},
|
||||||
|
},
|
||||||
|
]),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function llm(decisions: Array<{ role: string; exclude: boolean; reason: string }>): KtxLlmRuntimePort {
|
||||||
|
const generateObject = vi.fn(async () => ({ roles: decisions })) as KtxLlmRuntimePort['generateObject'];
|
||||||
|
return {
|
||||||
|
generateText: vi.fn(),
|
||||||
|
generateObject,
|
||||||
|
runAgentLoop: vi.fn(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('query-history filter picker', () => {
|
||||||
|
it('emits anchored escaped patterns for excluded roles from one batched LLM call', async () => {
|
||||||
|
const runtime = llm([
|
||||||
|
{ role: 'svc.loader+prod', exclude: true, reason: 'Runs recurring loader traffic only.' },
|
||||||
|
{ role: 'analyst', exclude: false, reason: 'Interactive analytic usage.' },
|
||||||
|
]);
|
||||||
|
const analysis = sqlAnalysis({
|
||||||
|
loader: [{ catalog: null, db: 'analytics', name: 'orders' }],
|
||||||
|
analyst: [{ catalog: null, db: 'analytics', name: 'orders' }],
|
||||||
|
});
|
||||||
|
|
||||||
|
const proposal = await proposeQueryHistoryServiceAccountFilters({
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
dialect: 'postgres',
|
||||||
|
queryClient: {},
|
||||||
|
reader: reader(
|
||||||
|
aggregate({
|
||||||
|
templateId: 'loader',
|
||||||
|
canonicalSql: 'merge into analytics.orders using staging.orders_delta on orders.id = orders_delta.id',
|
||||||
|
topUsers: [{ user: 'svc.loader+prod', executions: 40 }],
|
||||||
|
}),
|
||||||
|
aggregate({
|
||||||
|
templateId: 'analyst',
|
||||||
|
canonicalSql: 'select status, count(*) from analytics.orders group by status',
|
||||||
|
topUsers: [{ user: 'analyst', executions: 25 }],
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
sqlAnalysis: analysis,
|
||||||
|
llmRuntime: runtime,
|
||||||
|
pullConfig: {
|
||||||
|
dialect: 'postgres',
|
||||||
|
enabledSchemas: ['analytics'],
|
||||||
|
enabledTables: [],
|
||||||
|
modeledTableCatalog: [{ catalog: null, db: 'analytics', name: 'orders' }],
|
||||||
|
filters: { dropTrivialProbes: true },
|
||||||
|
},
|
||||||
|
now: new Date('2026-06-03T00:00:00.000Z'),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(runtime.generateObject).toHaveBeenCalledTimes(1);
|
||||||
|
expect(proposal).toMatchObject({
|
||||||
|
excludedRoles: [
|
||||||
|
{
|
||||||
|
role: 'svc.loader+prod',
|
||||||
|
pattern: '^svc\\.loader\\+prod$',
|
||||||
|
reason: 'Runs recurring loader traffic only.',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
consideredRoleCount: 2,
|
||||||
|
skipped: null,
|
||||||
|
warnings: [],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('redacts representative SQL before sending role records to the LLM', async () => {
|
||||||
|
const originalSql =
|
||||||
|
"select * from public.api_events where api_key = 'sk_live_abc123' and note = 'Secret_Token_9f'"; // pragma: allowlist secret
|
||||||
|
const runtime = llm([
|
||||||
|
{ role: 'svc_loader', exclude: false, reason: 'Keep by default.' },
|
||||||
|
{ role: 'analyst', exclude: false, reason: 'Interactive analytic usage.' },
|
||||||
|
]);
|
||||||
|
const analysis = sqlAnalysis({
|
||||||
|
secret: [{ catalog: null, db: 'public', name: 'api_events' }],
|
||||||
|
analyst: [{ catalog: null, db: 'public', name: 'orders' }],
|
||||||
|
});
|
||||||
|
|
||||||
|
await proposeQueryHistoryServiceAccountFilters({
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
dialect: 'postgres',
|
||||||
|
queryClient: {},
|
||||||
|
reader: reader(
|
||||||
|
aggregate({
|
||||||
|
templateId: 'secret',
|
||||||
|
canonicalSql: originalSql,
|
||||||
|
topUsers: [{ user: 'svc_loader', executions: 30 }],
|
||||||
|
}),
|
||||||
|
aggregate({
|
||||||
|
templateId: 'analyst',
|
||||||
|
canonicalSql: 'select status, count(*) from public.orders group by status',
|
||||||
|
topUsers: [{ user: 'analyst', executions: 25 }],
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
sqlAnalysis: analysis,
|
||||||
|
llmRuntime: runtime,
|
||||||
|
pullConfig: {
|
||||||
|
dialect: 'postgres',
|
||||||
|
enabledSchemas: ['public'],
|
||||||
|
enabledTables: [],
|
||||||
|
modeledTableCatalog: [],
|
||||||
|
redactionPatterns: ['sk_live_[A-Za-z0-9]+', '(?i)secret_token_[a-z0-9]+'],
|
||||||
|
filters: { dropTrivialProbes: true },
|
||||||
|
},
|
||||||
|
now: new Date('2026-06-03T00:00:00.000Z'),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(analysis.analyzeBatch).toHaveBeenCalledWith(
|
||||||
|
[
|
||||||
|
{ id: 'secret', sql: originalSql },
|
||||||
|
{ id: 'analyst', sql: 'select status, count(*) from public.orders group by status' },
|
||||||
|
],
|
||||||
|
'postgres',
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
const call = vi.mocked(runtime.generateObject).mock.calls[0]?.[0];
|
||||||
|
expect(call?.prompt).toContain('[REDACTED]');
|
||||||
|
expect(call?.prompt).not.toContain('sk_live_abc123');
|
||||||
|
expect(call?.prompt).not.toContain('Secret_Token_9f');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('fails open with no LLM runtime', async () => {
|
||||||
|
const proposal = await proposeQueryHistoryServiceAccountFilters({
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
dialect: 'postgres',
|
||||||
|
queryClient: {},
|
||||||
|
reader: reader(),
|
||||||
|
sqlAnalysis: sqlAnalysis({}),
|
||||||
|
llmRuntime: null,
|
||||||
|
pullConfig: { dialect: 'postgres', filters: { dropTrivialProbes: true } },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(proposal).toEqual({
|
||||||
|
excludedRoles: [],
|
||||||
|
consideredRoleCount: 0,
|
||||||
|
skipped: { reason: 'no-llm' },
|
||||||
|
warnings: [],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('proposes nothing for a single-role stack', async () => {
|
||||||
|
const runtime = llm([{ role: 'warehouse_user', exclude: true, reason: 'Only observed role.' }]);
|
||||||
|
|
||||||
|
const proposal = await proposeQueryHistoryServiceAccountFilters({
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
dialect: 'postgres',
|
||||||
|
queryClient: {},
|
||||||
|
reader: reader(
|
||||||
|
aggregate({
|
||||||
|
templateId: 'single-role',
|
||||||
|
canonicalSql: 'select * from analytics.orders',
|
||||||
|
topUsers: [{ user: 'warehouse_user', executions: 40 }],
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
sqlAnalysis: sqlAnalysis({
|
||||||
|
'single-role': [{ catalog: null, db: 'analytics', name: 'orders' }],
|
||||||
|
}),
|
||||||
|
llmRuntime: runtime,
|
||||||
|
pullConfig: { dialect: 'postgres', enabledSchemas: ['analytics'], filters: { dropTrivialProbes: true } },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(runtime.generateObject).not.toHaveBeenCalled();
|
||||||
|
expect(proposal.excludedRoles).toEqual([]);
|
||||||
|
expect(proposal.skipped).toEqual({ reason: 'no-in-scope-history' });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps clean in-scope history when the model excludes nothing', async () => {
|
||||||
|
const proposal = await proposeQueryHistoryServiceAccountFilters({
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
dialect: 'bigquery',
|
||||||
|
queryClient: {},
|
||||||
|
reader: reader(
|
||||||
|
aggregate({
|
||||||
|
templateId: 'dashboard',
|
||||||
|
canonicalSql: 'select status, count(*) from `demo.analytics.orders` group by status',
|
||||||
|
dialect: 'bigquery',
|
||||||
|
topUsers: [{ user: 'bi_runner', executions: 1 }],
|
||||||
|
}),
|
||||||
|
aggregate({
|
||||||
|
templateId: 'analyst',
|
||||||
|
canonicalSql: 'select * from `demo.analytics.orders` where id = @id',
|
||||||
|
dialect: 'bigquery',
|
||||||
|
topUsers: [{ user: 'analyst', executions: 1 }],
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
sqlAnalysis: sqlAnalysis({
|
||||||
|
dashboard: [{ catalog: 'demo', db: 'analytics', name: 'orders' }],
|
||||||
|
analyst: [{ catalog: 'demo', db: 'analytics', name: 'orders' }],
|
||||||
|
}),
|
||||||
|
llmRuntime: llm([
|
||||||
|
{ role: 'bi_runner', exclude: false, reason: 'Dashboard usage is analytic.' },
|
||||||
|
{ role: 'analyst', exclude: false, reason: 'Interactive analyst usage.' },
|
||||||
|
]),
|
||||||
|
pullConfig: {
|
||||||
|
dialect: 'bigquery',
|
||||||
|
windowDays: 90,
|
||||||
|
enabledSchemas: ['analytics'],
|
||||||
|
filters: { dropTrivialProbes: true },
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(proposal.excludedRoles).toEqual([]);
|
||||||
|
expect(proposal.consideredRoleCount).toBe(2);
|
||||||
|
expect(proposal.skipped).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('escapes regex metacharacters for exact role matches', () => {
|
||||||
|
expect(regexEscapeForExactRolePattern('svc.loader+prod')).toBe('^svc\\.loader\\+prod$');
|
||||||
|
expect(regexEscapeForExactRolePattern('team[etl](west)')).toBe('^team\\[etl\\]\\(west\\)$');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
@ -0,0 +1,194 @@
|
||||||
|
import { mkdir, mkdtemp, writeFile } from 'node:fs/promises';
|
||||||
|
import { tmpdir } from 'node:os';
|
||||||
|
import { join } from 'node:path';
|
||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import { resolveQueryHistoryScopeFloor } from '../../../../../src/context/ingest/adapters/historic-sql/scope-floor.js';
|
||||||
|
|
||||||
|
async function tempProject(): Promise<string> {
|
||||||
|
return mkdtemp(join(tmpdir(), 'ktx-qh-scope-'));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function seedLiveScanTable(
|
||||||
|
projectDir: string,
|
||||||
|
connectionId: string,
|
||||||
|
syncId: string,
|
||||||
|
table: { catalog: string | null; db: string | null; name: string },
|
||||||
|
): Promise<void> {
|
||||||
|
const root = join(projectDir, 'raw-sources', connectionId, 'live-database', syncId);
|
||||||
|
await mkdir(join(root, 'tables'), { recursive: true });
|
||||||
|
await writeFile(
|
||||||
|
join(root, 'connection.json'),
|
||||||
|
`${JSON.stringify({ connectionId, driver: 'postgres' }, null, 2)}\n`,
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
await writeFile(
|
||||||
|
join(root, 'tables', `${table.db ?? 'default'}-${table.name}.json`),
|
||||||
|
`${JSON.stringify(
|
||||||
|
{
|
||||||
|
...table,
|
||||||
|
kind: 'table',
|
||||||
|
comment: null,
|
||||||
|
estimatedRows: null,
|
||||||
|
columns: [],
|
||||||
|
foreignKeys: [],
|
||||||
|
},
|
||||||
|
null,
|
||||||
|
2,
|
||||||
|
)}\n`,
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
await writeFile(
|
||||||
|
join(root, 'scan-report.json'),
|
||||||
|
`${JSON.stringify(
|
||||||
|
{
|
||||||
|
connectionId,
|
||||||
|
driver: 'postgres',
|
||||||
|
syncId,
|
||||||
|
runId: `scan-${syncId}`,
|
||||||
|
trigger: 'cli',
|
||||||
|
mode: 'enriched',
|
||||||
|
dryRun: false,
|
||||||
|
artifactPaths: {
|
||||||
|
rawSourcesDir: `raw-sources/${connectionId}/live-database/${syncId}`,
|
||||||
|
reportPath: `raw-sources/${connectionId}/live-database/${syncId}/scan-report.json`,
|
||||||
|
manifestShards: [],
|
||||||
|
enrichmentArtifacts: [],
|
||||||
|
},
|
||||||
|
counts: {},
|
||||||
|
warnings: [],
|
||||||
|
enrichment: {},
|
||||||
|
enrichmentState: {},
|
||||||
|
},
|
||||||
|
null,
|
||||||
|
2,
|
||||||
|
)}\n`,
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('resolveQueryHistoryScopeFloor', () => {
|
||||||
|
it('computes modeled schemas from connection schemas plus semantic source tables', async () => {
|
||||||
|
const projectDir = await tempProject();
|
||||||
|
await mkdir(join(projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||||
|
await writeFile(
|
||||||
|
join(projectDir, 'semantic-layer/warehouse/revenue.yaml'),
|
||||||
|
[
|
||||||
|
'name: revenue',
|
||||||
|
'table: orbit_analytics.mart_revenue',
|
||||||
|
'grain: [id]',
|
||||||
|
'columns:',
|
||||||
|
' - name: id',
|
||||||
|
' type: string',
|
||||||
|
'',
|
||||||
|
].join('\n'),
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
await seedLiveScanTable(projectDir, 'warehouse', 'sync-1', {
|
||||||
|
catalog: null,
|
||||||
|
db: 'orbit_raw',
|
||||||
|
name: 'accounts',
|
||||||
|
});
|
||||||
|
|
||||||
|
const scope = await resolveQueryHistoryScopeFloor({
|
||||||
|
projectDir,
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
driver: 'postgres',
|
||||||
|
connection: { driver: 'postgres', schemas: ['orbit_raw'] },
|
||||||
|
storedQueryHistory: {},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(scope.enabledSchemas).toEqual(['orbit_analytics', 'orbit_raw']);
|
||||||
|
expect(scope.modeledTableCatalog).toEqual([
|
||||||
|
{ catalog: null, db: 'orbit_analytics', name: 'mart_revenue' },
|
||||||
|
{ catalog: null, db: 'orbit_raw', name: 'accounts' },
|
||||||
|
]);
|
||||||
|
expect(scope.enabledTables).toEqual([]);
|
||||||
|
expect(scope.floorDisabled).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('uses explicit enabledTables before explicit enabledSchemas and computed scope', async () => {
|
||||||
|
const scope = await resolveQueryHistoryScopeFloor({
|
||||||
|
projectDir: await tempProject(),
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
driver: 'postgres',
|
||||||
|
connection: { driver: 'postgres', schemas: ['orbit_raw'] },
|
||||||
|
storedQueryHistory: {
|
||||||
|
enabledTables: ['orbit_analytics.mart_revenue'],
|
||||||
|
enabledSchemas: ['orbit_raw'],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(scope.enabledTables).toEqual([{ catalog: null, db: 'orbit_analytics', name: 'mart_revenue' }]);
|
||||||
|
expect(scope.enabledSchemas).toEqual([]);
|
||||||
|
expect(scope.floorDisabled).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('disables the floor for enabledSchemas star', async () => {
|
||||||
|
const scope = await resolveQueryHistoryScopeFloor({
|
||||||
|
projectDir: await tempProject(),
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
driver: 'postgres',
|
||||||
|
connection: { driver: 'postgres', schemas: ['orbit_raw'] },
|
||||||
|
storedQueryHistory: { enabledSchemas: ['*'] },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(scope.enabledTables).toEqual([]);
|
||||||
|
expect(scope.enabledSchemas).toEqual(['*']);
|
||||||
|
expect(scope.floorDisabled).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('adds latest live-database scan tables to the modeled table catalog', async () => {
|
||||||
|
const projectDir = await tempProject();
|
||||||
|
await mkdir(join(projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||||
|
await writeFile(
|
||||||
|
join(projectDir, 'semantic-layer/warehouse/revenue.yaml'),
|
||||||
|
[
|
||||||
|
'name: revenue',
|
||||||
|
'table: orbit_analytics.mart_revenue',
|
||||||
|
'grain: [id]',
|
||||||
|
'columns:',
|
||||||
|
' - name: id',
|
||||||
|
' type: string',
|
||||||
|
'',
|
||||||
|
].join('\n'),
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
await seedLiveScanTable(projectDir, 'warehouse', 'sync-1', {
|
||||||
|
catalog: null,
|
||||||
|
db: 'orbit_raw',
|
||||||
|
name: 'accounts',
|
||||||
|
});
|
||||||
|
|
||||||
|
const scope = await resolveQueryHistoryScopeFloor({
|
||||||
|
projectDir,
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
driver: 'postgres',
|
||||||
|
connection: { driver: 'postgres', schemas: ['orbit_raw'] },
|
||||||
|
storedQueryHistory: {},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(scope.enabledSchemas).toEqual(['orbit_analytics', 'orbit_raw']);
|
||||||
|
expect(scope.modeledTableCatalog).toEqual([
|
||||||
|
{ catalog: null, db: 'orbit_analytics', name: 'mart_revenue' },
|
||||||
|
{ catalog: null, db: 'orbit_raw', name: 'accounts' },
|
||||||
|
]);
|
||||||
|
expect(scope.warnings).toEqual([]);
|
||||||
|
expect(scope.floorDisabled).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('fails open when schema scope exists but the scan catalog is unavailable', async () => {
|
||||||
|
const scope = await resolveQueryHistoryScopeFloor({
|
||||||
|
projectDir: await tempProject(),
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
driver: 'postgres',
|
||||||
|
connection: { driver: 'postgres', schemas: ['orbit_raw'] },
|
||||||
|
storedQueryHistory: {},
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(scope.enabledTables).toEqual([]);
|
||||||
|
expect(scope.enabledSchemas).toEqual(['*']);
|
||||||
|
expect(scope.modeledTableCatalog).toEqual([]);
|
||||||
|
expect(scope.floorDisabled).toBe(true);
|
||||||
|
expect(scope.warnings).toContain('query_history_scope_floor_disabled:catalog_unavailable');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
@ -0,0 +1,51 @@
|
||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import {
|
||||||
|
includedQueryHistoryTableRefs,
|
||||||
|
isQueryHistoryScopeFloorDisabled,
|
||||||
|
shouldFailOpenQueryHistoryScope,
|
||||||
|
} from '../../../../../src/context/ingest/adapters/historic-sql/scope-membership.js';
|
||||||
|
import type { KtxTableRef } from '../../../../../src/context/scan/types.js';
|
||||||
|
|
||||||
|
function ref(db: string | null, name: string, catalog: string | null = null): KtxTableRef {
|
||||||
|
return { catalog, db, name };
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('query-history scope membership', () => {
|
||||||
|
it('prefers explicit enabled tables over schema scope', () => {
|
||||||
|
const orders = ref('analytics', 'orders');
|
||||||
|
const noise = ref('metabase', 'application_table');
|
||||||
|
|
||||||
|
expect(
|
||||||
|
includedQueryHistoryTableRefs([orders, noise], {
|
||||||
|
enabledTables: [orders],
|
||||||
|
enabledSchemas: ['metabase'],
|
||||||
|
}),
|
||||||
|
).toEqual([orders]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('matches schema scope by the db component across catalogs', () => {
|
||||||
|
const modeled = ref('orbit_analytics', 'orders', 'demo-project');
|
||||||
|
const noise = ref('metabase', 'application_table', 'demo-project');
|
||||||
|
|
||||||
|
expect(
|
||||||
|
includedQueryHistoryTableRefs([modeled, noise], {
|
||||||
|
enabledTables: [],
|
||||||
|
enabledSchemas: ['orbit_analytics'],
|
||||||
|
}),
|
||||||
|
).toEqual([modeled]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps every touched ref when wildcard scope disables the floor', () => {
|
||||||
|
const tables = [ref('analytics', 'orders'), ref('metabase', 'application_table')];
|
||||||
|
|
||||||
|
expect(isQueryHistoryScopeFloorDisabled({ enabledTables: [], enabledSchemas: ['*'] })).toBe(true);
|
||||||
|
expect(includedQueryHistoryTableRefs(tables, { enabledTables: [], enabledSchemas: ['*'] })).toEqual(tables);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('fails open when no tables, schemas, or wildcard are configured', () => {
|
||||||
|
const tables = [ref('metabase', 'application_table')];
|
||||||
|
|
||||||
|
expect(shouldFailOpenQueryHistoryScope({ enabledTables: [], enabledSchemas: [] })).toBe(true);
|
||||||
|
expect(includedQueryHistoryTableRefs(tables, { enabledTables: [], enabledSchemas: [] })).toEqual(tables);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
@ -90,7 +90,10 @@ describe('SnowflakeHistoricSqlQueryHistoryReader', () => {
|
||||||
40,
|
40,
|
||||||
0.05,
|
0.05,
|
||||||
100,
|
100,
|
||||||
JSON.stringify([{ user: 'ANALYST', executions: 1 }]),
|
JSON.stringify([
|
||||||
|
{ user: 'SVC_LOADER', executions: 40 },
|
||||||
|
{ user: 'ANALYST', executions: 2 },
|
||||||
|
]),
|
||||||
],
|
],
|
||||||
],
|
],
|
||||||
totalRows: 1,
|
totalRows: 1,
|
||||||
|
|
@ -102,15 +105,20 @@ describe('SnowflakeHistoricSqlQueryHistoryReader', () => {
|
||||||
for await (const row of reader.fetchAggregated(
|
for await (const row of reader.fetchAggregated(
|
||||||
client,
|
client,
|
||||||
{ start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') },
|
{ start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') },
|
||||||
{ dialect: 'snowflake', minExecutions: 5, windowDays: 90, enabledTables: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
|
{ dialect: 'snowflake', minExecutions: 5, windowDays: 90, enabledTables: [], enabledSchemas: [], modeledTableCatalog: [], scopeFloorWarnings: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 },
|
||||||
)) {
|
)) {
|
||||||
rows.push(row);
|
rows.push(row);
|
||||||
}
|
}
|
||||||
|
|
||||||
const sql = firstQuery(client);
|
const sql = firstQuery(client);
|
||||||
|
expect(sql).toContain('WITH filtered_queries AS');
|
||||||
|
expect(sql).toContain('template_stats AS');
|
||||||
|
expect(sql).toContain('template_users AS');
|
||||||
expect(sql).toContain('SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY');
|
expect(sql).toContain('SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY');
|
||||||
expect(sql).toContain('COUNT(*) AS executions');
|
expect(sql).toContain('COUNT(*) AS executions');
|
||||||
expect(sql).toContain('GROUP BY query_hash');
|
expect(sql).toContain('COUNT(DISTINCT user_name) AS distinct_users');
|
||||||
|
expect(sql).toContain('GROUP BY query_hash, user_name');
|
||||||
|
expect(sql).toContain('ORDER BY users.executions DESC');
|
||||||
expect(sql).toContain('HAVING COUNT(*) >= 5');
|
expect(sql).toContain('HAVING COUNT(*) >= 5');
|
||||||
expect(rows).toMatchObject([
|
expect(rows).toMatchObject([
|
||||||
{
|
{
|
||||||
|
|
@ -119,7 +127,10 @@ describe('SnowflakeHistoricSqlQueryHistoryReader', () => {
|
||||||
executions: 42,
|
executions: 42,
|
||||||
errorRate: 0.05,
|
errorRate: 0.05,
|
||||||
},
|
},
|
||||||
topUsers: [{ user: 'ANALYST', executions: 1 }],
|
topUsers: [
|
||||||
|
{ user: 'SVC_LOADER', executions: 40 },
|
||||||
|
{ user: 'ANALYST', executions: 2 },
|
||||||
|
],
|
||||||
},
|
},
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
@ -136,6 +147,9 @@ describe('SnowflakeHistoricSqlQueryHistoryReader', () => {
|
||||||
minExecutions: 5,
|
minExecutions: 5,
|
||||||
windowDays: 90,
|
windowDays: 90,
|
||||||
enabledTables: [],
|
enabledTables: [],
|
||||||
|
enabledSchemas: [],
|
||||||
|
modeledTableCatalog: [],
|
||||||
|
scopeFloorWarnings: [],
|
||||||
filters: { dropTrivialProbes: true },
|
filters: { dropTrivialProbes: true },
|
||||||
redactionPatterns: [],
|
redactionPatterns: [],
|
||||||
staleArchiveAfterDays: 90,
|
staleArchiveAfterDays: 90,
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,13 @@ async function readJson<T>(root: string, relPath: string): Promise<T> {
|
||||||
return JSON.parse(await readFile(join(root, relPath), 'utf-8')) as T;
|
return JSON.parse(await readFile(join(root, relPath), 'utf-8')) as T;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function tableRef(value: string): { catalog: string | null; db: string | null; name: string } {
|
||||||
|
const parts = value.split('.');
|
||||||
|
if (parts.length === 3) return { catalog: parts[0]!, db: parts[1]!, name: parts[2]! };
|
||||||
|
if (parts.length === 2) return { catalog: null, db: parts[0]!, name: parts[1]! };
|
||||||
|
return { catalog: null, db: null, name: value };
|
||||||
|
}
|
||||||
|
|
||||||
function aggregate(overrides: Partial<AggregatedTemplate> & { templateId: string; canonicalSql: string }): AggregatedTemplate {
|
function aggregate(overrides: Partial<AggregatedTemplate> & { templateId: string; canonicalSql: string }): AggregatedTemplate {
|
||||||
return {
|
return {
|
||||||
templateId: overrides.templateId,
|
templateId: overrides.templateId,
|
||||||
|
|
@ -72,7 +79,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
[
|
[
|
||||||
'orders-by-status',
|
'orders-by-status',
|
||||||
{
|
{
|
||||||
tablesTouched: ['public.orders', 'public.customers'],
|
tablesTouched: [tableRef('public.orders'), tableRef('public.customers')],
|
||||||
columnsByClause: {
|
columnsByClause: {
|
||||||
select: ['status'],
|
select: ['status'],
|
||||||
where: ['created_at'],
|
where: ['created_at'],
|
||||||
|
|
@ -94,6 +101,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
sqlAnalysis,
|
sqlAnalysis,
|
||||||
pullConfig: {
|
pullConfig: {
|
||||||
dialect: 'postgres',
|
dialect: 'postgres',
|
||||||
|
enabledSchemas: ['public'],
|
||||||
filters: {
|
filters: {
|
||||||
serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' },
|
serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' },
|
||||||
},
|
},
|
||||||
|
|
@ -111,6 +119,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
{ id: 'bad-parse', sql: 'select broken from' },
|
{ id: 'bad-parse', sql: 'select broken from' },
|
||||||
],
|
],
|
||||||
'postgres',
|
'postgres',
|
||||||
|
undefined,
|
||||||
);
|
);
|
||||||
|
|
||||||
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['public.customers.json', 'public.orders.json']);
|
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['public.customers.json', 'public.orders.json']);
|
||||||
|
|
@ -131,6 +140,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
const orders = await readJson<Record<string, any>>(stagedDir, 'tables/public.orders.json');
|
const orders = await readJson<Record<string, any>>(stagedDir, 'tables/public.orders.json');
|
||||||
expect(orders).toMatchObject({
|
expect(orders).toMatchObject({
|
||||||
table: 'public.orders',
|
table: 'public.orders',
|
||||||
|
tableRef: tableRef('public.orders'),
|
||||||
stats: {
|
stats: {
|
||||||
executionsBucket: '10-100',
|
executionsBucket: '10-100',
|
||||||
distinctUsersBucket: '2-5',
|
distinctUsersBucket: '2-5',
|
||||||
|
|
@ -159,7 +169,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
{
|
{
|
||||||
id: 'orders-by-status',
|
id: 'orders-by-status',
|
||||||
canonicalSql: expect.stringContaining('public.orders'),
|
canonicalSql: expect.stringContaining('public.orders'),
|
||||||
tablesTouched: ['public.customers', 'public.orders'],
|
tablesTouched: [tableRef('public.customers'), tableRef('public.orders')],
|
||||||
executionsBucket: '10-100',
|
executionsBucket: '10-100',
|
||||||
distinctUsersBucket: '2-5',
|
distinctUsersBucket: '2-5',
|
||||||
dialect: 'postgres',
|
dialect: 'postgres',
|
||||||
|
|
@ -167,6 +177,129 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('keeps templates when service-account topUsers are only a partial execution sample', async () => {
|
||||||
|
const stagedDir = await tempDir();
|
||||||
|
const reader: HistoricSqlReader = {
|
||||||
|
async probe() {
|
||||||
|
return { warnings: [], info: [] };
|
||||||
|
},
|
||||||
|
async *fetchAggregated() {
|
||||||
|
yield aggregate({
|
||||||
|
templateId: 'shared-bigquery-template',
|
||||||
|
canonicalSql: 'select status, count(*) from `demo.analytics.orders` group by status',
|
||||||
|
dialect: 'bigquery',
|
||||||
|
stats: {
|
||||||
|
executions: 42,
|
||||||
|
distinctUsers: 2,
|
||||||
|
firstSeen: '2026-05-01T00:00:00.000Z',
|
||||||
|
lastSeen: '2026-05-11T00:00:00.000Z',
|
||||||
|
p50RuntimeMs: 20,
|
||||||
|
p95RuntimeMs: 80,
|
||||||
|
errorRate: 0,
|
||||||
|
rowsProduced: null,
|
||||||
|
},
|
||||||
|
topUsers: [{ user: 'svc_loader', executions: 5 }],
|
||||||
|
});
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const sqlAnalysis: SqlAnalysisPort = {
|
||||||
|
analyzeForFingerprint: vi.fn(),
|
||||||
|
analyzeBatch: vi.fn(async () =>
|
||||||
|
new Map([
|
||||||
|
[
|
||||||
|
'shared-bigquery-template',
|
||||||
|
{
|
||||||
|
tablesTouched: [tableRef('demo.analytics.orders')],
|
||||||
|
columnsByClause: { select: ['status'], groupBy: ['status'] },
|
||||||
|
},
|
||||||
|
],
|
||||||
|
]),
|
||||||
|
),
|
||||||
|
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||||
|
};
|
||||||
|
|
||||||
|
await stageHistoricSqlAggregatedSnapshot({
|
||||||
|
stagedDir,
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
queryClient: {},
|
||||||
|
reader,
|
||||||
|
sqlAnalysis,
|
||||||
|
pullConfig: {
|
||||||
|
dialect: 'bigquery',
|
||||||
|
windowDays: 90,
|
||||||
|
enabledSchemas: ['analytics'],
|
||||||
|
filters: {
|
||||||
|
serviceAccounts: { patterns: ['^svc_loader$'], mode: 'exclude' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||||
|
});
|
||||||
|
|
||||||
|
const patterns = await readJson<Record<string, any>>(stagedDir, 'patterns-input.json');
|
||||||
|
expect(patterns.templates.map((template: { id: string }) => template.id)).toEqual([
|
||||||
|
'shared-bigquery-template',
|
||||||
|
]);
|
||||||
|
const orders = await readJson<Record<string, any>>(stagedDir, 'tables/demo.analytics.orders.json');
|
||||||
|
expect(orders.topTemplates).toEqual([
|
||||||
|
{
|
||||||
|
id: 'shared-bigquery-template',
|
||||||
|
canonicalSql: 'select status, count(*) from `demo.analytics.orders` group by status',
|
||||||
|
topUsers: [{ user: 'svc_loader' }],
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('drops service-account-only templates when matched users cover all executions', async () => {
|
||||||
|
const stagedDir = await tempDir();
|
||||||
|
const reader: HistoricSqlReader = {
|
||||||
|
async probe() {
|
||||||
|
return { warnings: [], info: [] };
|
||||||
|
},
|
||||||
|
async *fetchAggregated() {
|
||||||
|
yield aggregate({
|
||||||
|
templateId: 'service-only-template',
|
||||||
|
canonicalSql: 'merge into analytics.orders using staging.orders_delta on orders.id = orders_delta.id',
|
||||||
|
stats: {
|
||||||
|
executions: 12,
|
||||||
|
distinctUsers: 1,
|
||||||
|
firstSeen: '2026-05-01T00:00:00.000Z',
|
||||||
|
lastSeen: '2026-05-11T00:00:00.000Z',
|
||||||
|
p50RuntimeMs: 20,
|
||||||
|
p95RuntimeMs: 80,
|
||||||
|
errorRate: 0,
|
||||||
|
rowsProduced: 0,
|
||||||
|
},
|
||||||
|
topUsers: [{ user: 'svc_loader', executions: 12 }],
|
||||||
|
});
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const sqlAnalysis: SqlAnalysisPort = {
|
||||||
|
analyzeForFingerprint: vi.fn(),
|
||||||
|
analyzeBatch: vi.fn(async () => new Map()),
|
||||||
|
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||||
|
};
|
||||||
|
|
||||||
|
await stageHistoricSqlAggregatedSnapshot({
|
||||||
|
stagedDir,
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
queryClient: {},
|
||||||
|
reader,
|
||||||
|
sqlAnalysis,
|
||||||
|
pullConfig: {
|
||||||
|
dialect: 'postgres',
|
||||||
|
enabledSchemas: ['analytics'],
|
||||||
|
filters: {
|
||||||
|
serviceAccounts: { patterns: ['^svc_loader$'], mode: 'exclude' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledWith([], 'postgres', undefined);
|
||||||
|
const patterns = await readJson<Record<string, any>>(stagedDir, 'patterns-input.json');
|
||||||
|
expect(patterns.templates).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
it('redacts configured SQL substrings in staged artifacts while analyzing original SQL', async () => {
|
it('redacts configured SQL substrings in staged artifacts while analyzing original SQL', async () => {
|
||||||
const stagedDir = await tempDir();
|
const stagedDir = await tempDir();
|
||||||
const originalSql =
|
const originalSql =
|
||||||
|
|
@ -198,7 +331,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
[
|
[
|
||||||
'api-events-with-secret',
|
'api-events-with-secret',
|
||||||
{
|
{
|
||||||
tablesTouched: ['public.api_events'],
|
tablesTouched: [tableRef('public.api_events')],
|
||||||
columnsByClause: {
|
columnsByClause: {
|
||||||
select: [],
|
select: [],
|
||||||
where: ['api_key', 'note'],
|
where: ['api_key', 'note'],
|
||||||
|
|
@ -219,6 +352,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
sqlAnalysis,
|
sqlAnalysis,
|
||||||
pullConfig: {
|
pullConfig: {
|
||||||
dialect: 'postgres',
|
dialect: 'postgres',
|
||||||
|
enabledSchemas: ['public'],
|
||||||
redactionPatterns: ['sk_live_[A-Za-z0-9]+', '(?i)secret_token_[a-z0-9]+'],
|
redactionPatterns: ['sk_live_[A-Za-z0-9]+', '(?i)secret_token_[a-z0-9]+'],
|
||||||
},
|
},
|
||||||
now: new Date('2026-05-11T12:00:00.000Z'),
|
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||||
|
|
@ -227,6 +361,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledWith(
|
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledWith(
|
||||||
[{ id: 'api-events-with-secret', sql: originalSql }],
|
[{ id: 'api-events-with-secret', sql: originalSql }],
|
||||||
'postgres',
|
'postgres',
|
||||||
|
undefined,
|
||||||
);
|
);
|
||||||
|
|
||||||
const tableJson = await readFile(join(stagedDir, 'tables/public.api_events.json'), 'utf-8');
|
const tableJson = await readFile(join(stagedDir, 'tables/public.api_events.json'), 'utf-8');
|
||||||
|
|
@ -266,21 +401,21 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
[
|
[
|
||||||
'selected-qualified',
|
'selected-qualified',
|
||||||
{
|
{
|
||||||
tablesTouched: ['orbit_analytics.int_active_contract_arr'],
|
tablesTouched: [tableRef('orbit_analytics.int_active_contract_arr')],
|
||||||
columnsByClause: { select: [], where: [], join: [], groupBy: [] },
|
columnsByClause: { select: [], where: [], join: [], groupBy: [] },
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
'selected-unqualified',
|
'selected-unqualified',
|
||||||
{
|
{
|
||||||
tablesTouched: ['int_customer_health_signals'],
|
tablesTouched: [tableRef('orbit_analytics.int_customer_health_signals')],
|
||||||
columnsByClause: { select: [], where: [], join: [], groupBy: [] },
|
columnsByClause: { select: [], where: [], join: [], groupBy: [] },
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
'unselected',
|
'unselected',
|
||||||
{
|
{
|
||||||
tablesTouched: ['orbit_raw.accounts'],
|
tablesTouched: [tableRef('orbit_raw.accounts')],
|
||||||
columnsByClause: { select: [], where: [], join: [], groupBy: [] },
|
columnsByClause: { select: [], where: [], join: [], groupBy: [] },
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
|
@ -297,16 +432,16 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
pullConfig: {
|
pullConfig: {
|
||||||
dialect: 'postgres',
|
dialect: 'postgres',
|
||||||
enabledTables: [
|
enabledTables: [
|
||||||
'orbit_analytics.int_active_contract_arr',
|
tableRef('orbit_analytics.int_active_contract_arr'),
|
||||||
'orbit_analytics.int_customer_health_signals',
|
tableRef('orbit_analytics.int_customer_health_signals'),
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
now: new Date('2026-05-11T12:00:00.000Z'),
|
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(await readdir(join(stagedDir, 'tables'))).toEqual([
|
expect(await readdir(join(stagedDir, 'tables'))).toEqual([
|
||||||
'int_customer_health_signals.json',
|
|
||||||
'orbit_analytics.int_active_contract_arr.json',
|
'orbit_analytics.int_active_contract_arr.json',
|
||||||
|
'orbit_analytics.int_customer_health_signals.json',
|
||||||
]);
|
]);
|
||||||
const manifest = await readJson<Record<string, any>>(stagedDir, 'manifest.json');
|
const manifest = await readJson<Record<string, any>>(stagedDir, 'manifest.json');
|
||||||
expect(manifest.touchedTableCount).toBe(2);
|
expect(manifest.touchedTableCount).toBe(2);
|
||||||
|
|
@ -372,7 +507,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
[
|
[
|
||||||
'orders-customers-a',
|
'orders-customers-a',
|
||||||
{
|
{
|
||||||
tablesTouched: ['public.orders', 'public.customers'],
|
tablesTouched: [tableRef('public.orders'), tableRef('public.customers')],
|
||||||
columnsByClause: {
|
columnsByClause: {
|
||||||
select: [],
|
select: [],
|
||||||
where: ['payload'],
|
where: ['payload'],
|
||||||
|
|
@ -384,7 +519,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
[
|
[
|
||||||
'orders-customers-b',
|
'orders-customers-b',
|
||||||
{
|
{
|
||||||
tablesTouched: ['public.orders', 'public.customers'],
|
tablesTouched: [tableRef('public.orders'), tableRef('public.customers')],
|
||||||
columnsByClause: {
|
columnsByClause: {
|
||||||
select: [],
|
select: [],
|
||||||
where: ['payload_b'],
|
where: ['payload_b'],
|
||||||
|
|
@ -396,7 +531,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
[
|
[
|
||||||
'orders-single-table',
|
'orders-single-table',
|
||||||
{
|
{
|
||||||
tablesTouched: ['public.orders'],
|
tablesTouched: [tableRef('public.orders')],
|
||||||
columnsByClause: {
|
columnsByClause: {
|
||||||
select: [],
|
select: [],
|
||||||
where: [],
|
where: [],
|
||||||
|
|
@ -415,7 +550,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
queryClient: {},
|
queryClient: {},
|
||||||
reader,
|
reader,
|
||||||
sqlAnalysis,
|
sqlAnalysis,
|
||||||
pullConfig: { dialect: 'postgres' },
|
pullConfig: { dialect: 'postgres', enabledSchemas: ['public'] },
|
||||||
now: new Date('2026-05-11T12:00:00.000Z'),
|
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -456,7 +591,13 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
const sqlAnalysis: SqlAnalysisPort = {
|
const sqlAnalysis: SqlAnalysisPort = {
|
||||||
analyzeForFingerprint: vi.fn(),
|
analyzeForFingerprint: vi.fn(),
|
||||||
analyzeBatch: vi.fn(async () => new Map([
|
analyzeBatch: vi.fn(async () => new Map([
|
||||||
['analytic', { tablesTouched: ['public.orders'], columnsByClause: { select: ['status'], where: [], join: [], groupBy: ['status'] } }],
|
[
|
||||||
|
'analytic',
|
||||||
|
{
|
||||||
|
tablesTouched: [tableRef('public.orders')],
|
||||||
|
columnsByClause: { select: ['status'], where: [], join: [], groupBy: ['status'] },
|
||||||
|
},
|
||||||
|
],
|
||||||
])),
|
])),
|
||||||
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||||
};
|
};
|
||||||
|
|
@ -467,7 +608,7 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
queryClient: {},
|
queryClient: {},
|
||||||
reader,
|
reader,
|
||||||
sqlAnalysis,
|
sqlAnalysis,
|
||||||
pullConfig: { dialect: 'postgres' },
|
pullConfig: { dialect: 'postgres', enabledSchemas: ['public'] },
|
||||||
now: new Date('2026-05-11T12:00:00.000Z'),
|
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -475,26 +616,27 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledWith(
|
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledWith(
|
||||||
[{ id: 'analytic', sql: 'select status, count(*) from public.orders group by status' }],
|
[{ id: 'analytic', sql: 'select status, count(*) from public.orders group by status' }],
|
||||||
'postgres',
|
'postgres',
|
||||||
|
undefined,
|
||||||
);
|
);
|
||||||
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['public.orders.json']);
|
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['public.orders.json']);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('merges bare and schema-qualified references to the same table into one work unit', async () => {
|
it('keeps modeled-schema refs and drops unmodeled-schema refs by default', async () => {
|
||||||
const stagedDir = await tempDir();
|
const stagedDir = await tempDir();
|
||||||
const reader: HistoricSqlReader = {
|
const reader: HistoricSqlReader = {
|
||||||
async probe() {
|
async probe() {
|
||||||
return { warnings: [], info: [] };
|
return { warnings: [], info: [] };
|
||||||
},
|
},
|
||||||
async *fetchAggregated() {
|
async *fetchAggregated() {
|
||||||
yield aggregate({ templateId: 'qualified', canonicalSql: 'select count(*) from orbit_raw.accounts' });
|
yield aggregate({ templateId: 'modeled', canonicalSql: 'select count(*) from orbit_raw.accounts' });
|
||||||
yield aggregate({ templateId: 'bare', canonicalSql: 'select id from accounts where active' });
|
yield aggregate({ templateId: 'noise', canonicalSql: 'select count(*) from metabase.application_table' });
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
const sqlAnalysis: SqlAnalysisPort = {
|
const sqlAnalysis: SqlAnalysisPort = {
|
||||||
analyzeForFingerprint: vi.fn(),
|
analyzeForFingerprint: vi.fn(),
|
||||||
analyzeBatch: vi.fn(async () => new Map([
|
analyzeBatch: vi.fn(async () => new Map([
|
||||||
['qualified', { tablesTouched: ['orbit_raw.accounts'], columnsByClause: { select: [], where: [], join: [], groupBy: [] } }],
|
['modeled', { tablesTouched: [{ catalog: null, db: 'orbit_raw', name: 'accounts' }], columnsByClause: {} }],
|
||||||
['bare', { tablesTouched: ['accounts'], columnsByClause: { select: ['id'], where: ['active'], join: [], groupBy: [] } }],
|
['noise', { tablesTouched: [{ catalog: null, db: 'metabase', name: 'application_table' }], columnsByClause: {} }],
|
||||||
])),
|
])),
|
||||||
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||||
};
|
};
|
||||||
|
|
@ -505,16 +647,213 @@ describe('stageHistoricSqlAggregatedSnapshot', () => {
|
||||||
queryClient: {},
|
queryClient: {},
|
||||||
reader,
|
reader,
|
||||||
sqlAnalysis,
|
sqlAnalysis,
|
||||||
pullConfig: { dialect: 'postgres' },
|
pullConfig: {
|
||||||
|
dialect: 'postgres',
|
||||||
|
enabledSchemas: ['orbit_raw'],
|
||||||
|
modeledTableCatalog: [{ catalog: null, db: 'orbit_raw', name: 'accounts' }],
|
||||||
|
},
|
||||||
now: new Date('2026-05-11T12:00:00.000Z'),
|
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||||
});
|
});
|
||||||
|
|
||||||
// The bare `accounts` reference resolves to the unique qualified `orbit_raw.accounts`,
|
|
||||||
// so the two templates collapse into a single work unit instead of two.
|
|
||||||
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['orbit_raw.accounts.json']);
|
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['orbit_raw.accounts.json']);
|
||||||
const merged = await readJson<Record<string, any>>(stagedDir, 'tables/orbit_raw.accounts.json');
|
|
||||||
expect(merged.topTemplates.map((t: any) => t.id).sort()).toEqual(['bare', 'qualified']);
|
|
||||||
const manifest = await readJson<Record<string, any>>(stagedDir, 'manifest.json');
|
const manifest = await readJson<Record<string, any>>(stagedDir, 'manifest.json');
|
||||||
expect(manifest.touchedTableCount).toBe(1);
|
expect(manifest.touchedTableCount).toBe(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('fails open when the implicit modeled scope is empty', async () => {
|
||||||
|
const stagedDir = await tempDir();
|
||||||
|
const reader: HistoricSqlReader = {
|
||||||
|
async probe() {
|
||||||
|
return { warnings: [], info: [] };
|
||||||
|
},
|
||||||
|
async *fetchAggregated() {
|
||||||
|
yield aggregate({ templateId: 'any-table', canonicalSql: 'select count(*) from metabase.application_table' });
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const sqlAnalysis: SqlAnalysisPort = {
|
||||||
|
analyzeForFingerprint: vi.fn(),
|
||||||
|
analyzeBatch: vi.fn(async () => new Map([
|
||||||
|
['any-table', { tablesTouched: [{ catalog: null, db: 'metabase', name: 'application_table' }], columnsByClause: {} }],
|
||||||
|
])),
|
||||||
|
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||||
|
};
|
||||||
|
|
||||||
|
await stageHistoricSqlAggregatedSnapshot({
|
||||||
|
stagedDir,
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
queryClient: {},
|
||||||
|
reader,
|
||||||
|
sqlAnalysis,
|
||||||
|
pullConfig: { dialect: 'postgres', enabledSchemas: [], modeledTableCatalog: [] },
|
||||||
|
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['metabase.application_table.json']);
|
||||||
|
const manifest = await readJson<Record<string, any>>(stagedDir, 'manifest.json');
|
||||||
|
expect(manifest.warnings).toContain('query_history_scope_floor_disabled:empty_modeled_scope');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('lets enabledSchemas star disable the floor', async () => {
|
||||||
|
const stagedDir = await tempDir();
|
||||||
|
const reader: HistoricSqlReader = {
|
||||||
|
async probe() {
|
||||||
|
return { warnings: [], info: [] };
|
||||||
|
},
|
||||||
|
async *fetchAggregated() {
|
||||||
|
yield aggregate({ templateId: 'noise', canonicalSql: 'select count(*) from metabase.application_table' });
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const sqlAnalysis: SqlAnalysisPort = {
|
||||||
|
analyzeForFingerprint: vi.fn(),
|
||||||
|
analyzeBatch: vi.fn(async () => new Map([
|
||||||
|
['noise', { tablesTouched: [{ catalog: null, db: 'metabase', name: 'application_table' }], columnsByClause: {} }],
|
||||||
|
])),
|
||||||
|
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||||
|
};
|
||||||
|
|
||||||
|
await stageHistoricSqlAggregatedSnapshot({
|
||||||
|
stagedDir,
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
queryClient: {},
|
||||||
|
reader,
|
||||||
|
sqlAnalysis,
|
||||||
|
pullConfig: {
|
||||||
|
dialect: 'postgres',
|
||||||
|
enabledSchemas: ['*'],
|
||||||
|
modeledTableCatalog: [{ catalog: null, db: 'orbit_raw', name: 'accounts' }],
|
||||||
|
},
|
||||||
|
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['metabase.application_table.json']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('matches BigQuery dataset scope even when refs include a catalog', async () => {
|
||||||
|
const stagedDir = await tempDir();
|
||||||
|
const reader: HistoricSqlReader = {
|
||||||
|
async probe() {
|
||||||
|
return { warnings: [], info: [] };
|
||||||
|
},
|
||||||
|
async *fetchAggregated() {
|
||||||
|
yield aggregate({ templateId: 'modeled', canonicalSql: 'select count(*) from `demo-project.orbit_analytics.orders`' });
|
||||||
|
yield aggregate({ templateId: 'noise', canonicalSql: 'select count(*) from `demo-project.metabase.application_table`' });
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const sqlAnalysis: SqlAnalysisPort = {
|
||||||
|
analyzeForFingerprint: vi.fn(),
|
||||||
|
analyzeBatch: vi.fn(async () => new Map([
|
||||||
|
['modeled', { tablesTouched: [{ catalog: 'demo-project', db: 'orbit_analytics', name: 'orders' }], columnsByClause: {} }],
|
||||||
|
['noise', { tablesTouched: [{ catalog: 'demo-project', db: 'metabase', name: 'application_table' }], columnsByClause: {} }],
|
||||||
|
])),
|
||||||
|
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||||
|
};
|
||||||
|
|
||||||
|
await stageHistoricSqlAggregatedSnapshot({
|
||||||
|
stagedDir,
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
queryClient: {},
|
||||||
|
reader,
|
||||||
|
sqlAnalysis,
|
||||||
|
pullConfig: {
|
||||||
|
dialect: 'bigquery',
|
||||||
|
enabledSchemas: ['orbit_analytics'],
|
||||||
|
modeledTableCatalog: [{ catalog: 'demo-project', db: 'orbit_analytics', name: 'orders' }],
|
||||||
|
},
|
||||||
|
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['demo-project.orbit_analytics.orders.json']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('writes propagated scope-floor warnings to the staged manifest', async () => {
|
||||||
|
const stagedDir = await tempDir();
|
||||||
|
const reader: HistoricSqlReader = {
|
||||||
|
async probe() {
|
||||||
|
return { warnings: [], info: [] };
|
||||||
|
},
|
||||||
|
async *fetchAggregated() {
|
||||||
|
yield aggregate({ templateId: 'any-table', canonicalSql: 'select count(*) from metabase.application_table' });
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const sqlAnalysis: SqlAnalysisPort = {
|
||||||
|
analyzeForFingerprint: vi.fn(),
|
||||||
|
analyzeBatch: vi.fn(async () => new Map([
|
||||||
|
['any-table', { tablesTouched: [{ catalog: null, db: 'metabase', name: 'application_table' }], columnsByClause: {} }],
|
||||||
|
])),
|
||||||
|
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||||
|
};
|
||||||
|
|
||||||
|
await stageHistoricSqlAggregatedSnapshot({
|
||||||
|
stagedDir,
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
queryClient: {},
|
||||||
|
reader,
|
||||||
|
sqlAnalysis,
|
||||||
|
pullConfig: {
|
||||||
|
dialect: 'postgres',
|
||||||
|
enabledSchemas: ['*'],
|
||||||
|
scopeFloorWarnings: ['query_history_scope_floor_disabled:catalog_unavailable'],
|
||||||
|
},
|
||||||
|
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||||
|
});
|
||||||
|
|
||||||
|
const manifest = await readJson<Record<string, any>>(stagedDir, 'manifest.json');
|
||||||
|
expect(manifest.warnings).toContain('query_history_scope_floor_disabled:catalog_unavailable');
|
||||||
|
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['metabase.application_table.json']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('retries without the catalog and disables the floor when catalog qualification fails wholesale', async () => {
|
||||||
|
const stagedDir = await tempDir();
|
||||||
|
const reader: HistoricSqlReader = {
|
||||||
|
async probe() {
|
||||||
|
return { warnings: [], info: [] };
|
||||||
|
},
|
||||||
|
async *fetchAggregated() {
|
||||||
|
yield aggregate({ templateId: 'noise', canonicalSql: 'select count(*) from metabase.application_table' });
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const sqlAnalysis: SqlAnalysisPort = {
|
||||||
|
analyzeForFingerprint: vi.fn(),
|
||||||
|
analyzeBatch: vi
|
||||||
|
.fn()
|
||||||
|
.mockRejectedValueOnce(new Error('catalog qualification failed'))
|
||||||
|
.mockResolvedValueOnce(
|
||||||
|
new Map([
|
||||||
|
['noise', { tablesTouched: [{ catalog: null, db: 'metabase', name: 'application_table' }], columnsByClause: {} }],
|
||||||
|
]),
|
||||||
|
),
|
||||||
|
validateReadOnly: vi.fn(async () => ({ ok: true })),
|
||||||
|
};
|
||||||
|
|
||||||
|
await stageHistoricSqlAggregatedSnapshot({
|
||||||
|
stagedDir,
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
queryClient: {},
|
||||||
|
reader,
|
||||||
|
sqlAnalysis,
|
||||||
|
pullConfig: {
|
||||||
|
dialect: 'postgres',
|
||||||
|
enabledSchemas: ['orbit_raw'],
|
||||||
|
modeledTableCatalog: [{ catalog: null, db: 'orbit_raw', name: 'accounts' }],
|
||||||
|
},
|
||||||
|
now: new Date('2026-05-11T12:00:00.000Z'),
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(sqlAnalysis.analyzeBatch).toHaveBeenCalledTimes(2);
|
||||||
|
expect(sqlAnalysis.analyzeBatch).toHaveBeenNthCalledWith(
|
||||||
|
1,
|
||||||
|
[{ id: 'noise', sql: 'select count(*) from metabase.application_table' }],
|
||||||
|
'postgres',
|
||||||
|
{ catalog: { tables: [{ catalog: null, db: 'orbit_raw', name: 'accounts' }] } },
|
||||||
|
);
|
||||||
|
expect(sqlAnalysis.analyzeBatch).toHaveBeenNthCalledWith(
|
||||||
|
2,
|
||||||
|
[{ id: 'noise', sql: 'select count(*) from metabase.application_table' }],
|
||||||
|
'postgres',
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
expect(await readdir(join(stagedDir, 'tables'))).toEqual(['metabase.application_table.json']);
|
||||||
|
const manifest = await readJson<Record<string, any>>(stagedDir, 'manifest.json');
|
||||||
|
expect(manifest.warnings).toContain('query_history_scope_floor_disabled:catalog_qualification_failed');
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -59,6 +59,7 @@ describe('historic-sql unified contracts', () => {
|
||||||
expect(
|
expect(
|
||||||
stagedTableInputSchema.parse({
|
stagedTableInputSchema.parse({
|
||||||
table: 'public.orders',
|
table: 'public.orders',
|
||||||
|
tableRef: { catalog: null, db: 'public', name: 'orders' },
|
||||||
stats: {
|
stats: {
|
||||||
executionsBucket: '10-100',
|
executionsBucket: '10-100',
|
||||||
distinctUsersBucket: '2-5',
|
distinctUsersBucket: '2-5',
|
||||||
|
|
@ -81,7 +82,7 @@ describe('historic-sql unified contracts', () => {
|
||||||
{
|
{
|
||||||
id: 'pg:123',
|
id: 'pg:123',
|
||||||
canonicalSql: 'select * from public.orders',
|
canonicalSql: 'select * from public.orders',
|
||||||
tablesTouched: ['public.orders'],
|
tablesTouched: [{ catalog: null, db: 'public', name: 'orders' }],
|
||||||
executionsBucket: '10-100',
|
executionsBucket: '10-100',
|
||||||
distinctUsersBucket: '2-5',
|
distinctUsersBucket: '2-5',
|
||||||
dialect: 'postgres',
|
dialect: 'postgres',
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
|
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||||
import { tmpdir } from 'node:os';
|
import { tmpdir } from 'node:os';
|
||||||
import { join } from 'node:path';
|
import { join } from 'node:path';
|
||||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||||
|
|
@ -34,6 +34,36 @@ describe('local ingest adapters', () => {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function seedLiveScanTable(
|
||||||
|
projectDir: string,
|
||||||
|
connectionId: string,
|
||||||
|
table: { catalog: string | null; db: string | null; name: string },
|
||||||
|
): Promise<void> {
|
||||||
|
const rawRoot = join(projectDir, 'raw-sources', connectionId, 'live-database', 'sync-1');
|
||||||
|
await mkdir(join(rawRoot, 'tables'), { recursive: true });
|
||||||
|
await writeFile(
|
||||||
|
join(rawRoot, 'connection.json'),
|
||||||
|
`${JSON.stringify({ connectionId, driver: 'postgres' }, null, 2)}\n`,
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
await writeFile(
|
||||||
|
join(rawRoot, 'tables', `${table.db ?? 'default'}-${table.name}.json`),
|
||||||
|
`${JSON.stringify(
|
||||||
|
{
|
||||||
|
...table,
|
||||||
|
kind: 'table',
|
||||||
|
comment: null,
|
||||||
|
estimatedRows: null,
|
||||||
|
columns: [],
|
||||||
|
foreignKeys: [],
|
||||||
|
},
|
||||||
|
null,
|
||||||
|
2,
|
||||||
|
)}\n`,
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
it('registers Metabase locally as a staged-bundle adapter', () => {
|
it('registers Metabase locally as a staged-bundle adapter', () => {
|
||||||
const adapters = createDefaultLocalIngestAdapters(project);
|
const adapters = createDefaultLocalIngestAdapters(project);
|
||||||
|
|
||||||
|
|
@ -205,11 +235,14 @@ describe('local ingest adapters', () => {
|
||||||
dialect: 'postgres',
|
dialect: 'postgres',
|
||||||
minExecutions: 7,
|
minExecutions: 7,
|
||||||
enabledTables: [],
|
enabledTables: [],
|
||||||
|
enabledSchemas: [],
|
||||||
|
modeledTableCatalog: [],
|
||||||
filters: {
|
filters: {
|
||||||
serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' },
|
serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' },
|
||||||
dropTrivialProbes: true,
|
dropTrivialProbes: true,
|
||||||
},
|
},
|
||||||
redactionPatterns: [],
|
redactionPatterns: [],
|
||||||
|
scopeFloorWarnings: [],
|
||||||
staleArchiveAfterDays: 90,
|
staleArchiveAfterDays: 90,
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
@ -237,6 +270,71 @@ describe('local ingest adapters', () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('passes computed modeled scope to direct historic-sql adapter pull config', async () => {
|
||||||
|
await mkdir(join(project.projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||||
|
await writeFile(
|
||||||
|
join(project.projectDir, 'semantic-layer/warehouse/revenue.yaml'),
|
||||||
|
[
|
||||||
|
'name: revenue',
|
||||||
|
'table: orbit_analytics.mart_revenue',
|
||||||
|
'grain: [id]',
|
||||||
|
'columns:',
|
||||||
|
' - name: id',
|
||||||
|
' type: string',
|
||||||
|
'',
|
||||||
|
].join('\n'),
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
await seedLiveScanTable(project.projectDir, 'warehouse', {
|
||||||
|
catalog: null,
|
||||||
|
db: 'orbit_raw',
|
||||||
|
name: 'accounts',
|
||||||
|
});
|
||||||
|
const projectWithQueryHistory = projectWithConnections({
|
||||||
|
warehouse: {
|
||||||
|
driver: 'postgres',
|
||||||
|
schemas: ['orbit_raw'],
|
||||||
|
context: {
|
||||||
|
queryHistory: {
|
||||||
|
enabled: true,
|
||||||
|
minExecutions: 7,
|
||||||
|
filters: { dropTrivialProbes: true },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const adapter = { source: 'historic-sql' } as never;
|
||||||
|
|
||||||
|
await expect(localPullConfigForAdapter(projectWithQueryHistory, adapter, 'warehouse')).resolves.toMatchObject({
|
||||||
|
dialect: 'postgres',
|
||||||
|
minExecutions: 7,
|
||||||
|
enabledSchemas: ['orbit_analytics', 'orbit_raw'],
|
||||||
|
modeledTableCatalog: [
|
||||||
|
{ catalog: null, db: 'orbit_analytics', name: 'mart_revenue' },
|
||||||
|
{ catalog: null, db: 'orbit_raw', name: 'accounts' },
|
||||||
|
],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('passes query-history scope fail-open warnings to direct historic-sql pull config', async () => {
|
||||||
|
const projectDir = await mkdtemp(join(tmpdir(), 'ktx-local-qh-scope-warning-'));
|
||||||
|
const project = await initKtxProject({ projectDir });
|
||||||
|
project.config.connections.warehouse = {
|
||||||
|
driver: 'postgres',
|
||||||
|
schemas: ['orbit_raw'],
|
||||||
|
context: { queryHistory: { enabled: true } },
|
||||||
|
} as never;
|
||||||
|
const adapter = { source: 'historic-sql' } as never;
|
||||||
|
|
||||||
|
await expect(localPullConfigForAdapter(project, adapter, 'warehouse')).resolves.toMatchObject({
|
||||||
|
dialect: 'postgres',
|
||||||
|
enabledSchemas: ['*'],
|
||||||
|
scopeFloorWarnings: ['query_history_scope_floor_disabled:catalog_unavailable'],
|
||||||
|
});
|
||||||
|
|
||||||
|
await rm(projectDir, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
|
||||||
it('rejects local historic-sql pulls when the connection has not enabled historic SQL', async () => {
|
it('rejects local historic-sql pulls when the connection has not enabled historic SQL', async () => {
|
||||||
const historicSql = createDefaultLocalIngestAdapters(project, {
|
const historicSql = createDefaultLocalIngestAdapters(project, {
|
||||||
historicSql: {
|
historicSql: {
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,10 @@ describe('createHttpSqlAnalysisPort', () => {
|
||||||
const requestJson = vi.fn(async () => ({
|
const requestJson = vi.fn(async () => ({
|
||||||
results: {
|
results: {
|
||||||
orders: {
|
orders: {
|
||||||
tables_touched: ['public.orders', 'public.customers'],
|
tables_touched: [
|
||||||
|
{ catalog: null, db: 'public', name: 'orders' },
|
||||||
|
{ catalog: null, db: 'public', name: 'customers' },
|
||||||
|
],
|
||||||
columns_by_clause: {
|
columns_by_clause: {
|
||||||
select: ['status'],
|
select: ['status'],
|
||||||
where: ['created_at'],
|
where: ['created_at'],
|
||||||
|
|
@ -79,7 +82,10 @@ describe('createHttpSqlAnalysisPort', () => {
|
||||||
[
|
[
|
||||||
'orders',
|
'orders',
|
||||||
{
|
{
|
||||||
tablesTouched: ['public.orders', 'public.customers'],
|
tablesTouched: [
|
||||||
|
{ catalog: null, db: 'public', name: 'orders' },
|
||||||
|
{ catalog: null, db: 'public', name: 'customers' },
|
||||||
|
],
|
||||||
columnsByClause: {
|
columnsByClause: {
|
||||||
select: ['status'],
|
select: ['status'],
|
||||||
where: ['created_at'],
|
where: ['created_at'],
|
||||||
|
|
@ -108,6 +114,62 @@ describe('createHttpSqlAnalysisPort', () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('passes an optional catalog and maps structured table refs for SQL batch analysis', async () => {
|
||||||
|
const requestJson = vi.fn(async () => ({
|
||||||
|
results: {
|
||||||
|
orders: {
|
||||||
|
tables_touched: [
|
||||||
|
{ catalog: null, db: 'orbit_raw', name: 'accounts' },
|
||||||
|
{ catalog: 'demo_project', db: 'orbit_analytics', name: 'orders' },
|
||||||
|
],
|
||||||
|
columns_by_clause: { select: ['id'] },
|
||||||
|
error: null,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
const port = createHttpSqlAnalysisPort({ baseUrl: 'http://python.test', requestJson });
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
port.analyzeBatch(
|
||||||
|
[{ id: 'orders', sql: 'select id from accounts' }],
|
||||||
|
'postgres',
|
||||||
|
{
|
||||||
|
catalog: {
|
||||||
|
tables: [
|
||||||
|
{ catalog: null, db: 'orbit_raw', name: 'accounts', columns: ['id'] },
|
||||||
|
{ catalog: 'demo_project', db: 'orbit_analytics', name: 'orders', columns: ['id'] },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
).resolves.toEqual(
|
||||||
|
new Map([
|
||||||
|
[
|
||||||
|
'orders',
|
||||||
|
{
|
||||||
|
tablesTouched: [
|
||||||
|
{ catalog: null, db: 'orbit_raw', name: 'accounts' },
|
||||||
|
{ catalog: 'demo_project', db: 'orbit_analytics', name: 'orders' },
|
||||||
|
],
|
||||||
|
columnsByClause: { select: ['id'] },
|
||||||
|
error: null,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
]),
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(requestJson).toHaveBeenCalledWith('/sql/analyze-batch', {
|
||||||
|
dialect: 'postgres',
|
||||||
|
items: [{ id: 'orders', sql: 'select id from accounts' }],
|
||||||
|
catalog: {
|
||||||
|
tables: [
|
||||||
|
{ catalog: null, db: 'orbit_raw', name: 'accounts', columns: ['id'] },
|
||||||
|
{ catalog: 'demo_project', db: 'orbit_analytics', name: 'orders', columns: ['id'] },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
it('maps read-only SQL validation responses', async () => {
|
it('maps read-only SQL validation responses', async () => {
|
||||||
const requests: Array<{ path: string; payload: Record<string, unknown> }> = [];
|
const requests: Array<{ path: string; payload: Record<string, unknown> }> = [];
|
||||||
const port = createHttpSqlAnalysisPort({
|
const port = createHttpSqlAnalysisPort({
|
||||||
|
|
@ -150,7 +212,7 @@ describe('createHttpSqlAnalysisPort', () => {
|
||||||
const requestJson = vi.fn(async () => ({
|
const requestJson = vi.fn(async () => ({
|
||||||
results: {
|
results: {
|
||||||
orders: {
|
orders: {
|
||||||
tables_touched: ['public.orders'],
|
tables_touched: [{ catalog: null, db: 'public', name: 'orders' }],
|
||||||
columns_by_clause: { select: ['status'], where: [42] },
|
columns_by_clause: { select: ['status'], where: [42] },
|
||||||
error: null,
|
error: null,
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,8 @@ import { mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||||
import { tmpdir } from 'node:os';
|
import { tmpdir } from 'node:os';
|
||||||
import { join } from 'node:path';
|
import { join } from 'node:path';
|
||||||
import { loadKtxProject } from '../src/context/project/project.js';
|
import { loadKtxProject } from '../src/context/project/project.js';
|
||||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||||
import { createKtxCliLocalIngestAdapters } from '../src/local-adapters.js';
|
import { createKtxCliHistoricSqlRuntime, createKtxCliLocalIngestAdapters } from '../src/local-adapters.js';
|
||||||
|
|
||||||
function sqlAnalysisStub() {
|
function sqlAnalysisStub() {
|
||||||
return {
|
return {
|
||||||
|
|
@ -70,6 +70,116 @@ describe('CLI local ingest adapters', () => {
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('creates reusable query-history runtime dependencies for setup', async () => {
|
||||||
|
await writeProject(
|
||||||
|
tempDir,
|
||||||
|
[
|
||||||
|
'connections:',
|
||||||
|
' warehouse:',
|
||||||
|
' driver: postgres',
|
||||||
|
' url: env:WAREHOUSE_DATABASE_URL',
|
||||||
|
' readonly: true',
|
||||||
|
' context:',
|
||||||
|
' queryHistory:',
|
||||||
|
' enabled: true',
|
||||||
|
'',
|
||||||
|
].join('\n'),
|
||||||
|
);
|
||||||
|
const project = await loadKtxProject({ projectDir: tempDir });
|
||||||
|
const sqlAnalysis = sqlAnalysisStub();
|
||||||
|
|
||||||
|
const runtime = createKtxCliHistoricSqlRuntime(project, 'warehouse', { sqlAnalysis });
|
||||||
|
|
||||||
|
expect(runtime).toMatchObject({
|
||||||
|
dialect: 'postgres',
|
||||||
|
sqlAnalysis,
|
||||||
|
});
|
||||||
|
expect(runtime?.reader).toBeDefined();
|
||||||
|
expect(runtime?.queryClient).toBeDefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('uses managed daemon SQL analysis when query-history runtime gets managed daemon options', async () => {
|
||||||
|
await writeProject(
|
||||||
|
tempDir,
|
||||||
|
[
|
||||||
|
'connections:',
|
||||||
|
' warehouse:',
|
||||||
|
' driver: postgres',
|
||||||
|
' url: env:WAREHOUSE_DATABASE_URL',
|
||||||
|
' readonly: true',
|
||||||
|
' context:',
|
||||||
|
' queryHistory:',
|
||||||
|
' enabled: true',
|
||||||
|
'',
|
||||||
|
].join('\n'),
|
||||||
|
);
|
||||||
|
const project = await loadKtxProject({ projectDir: tempDir });
|
||||||
|
const testIo = {
|
||||||
|
stdout: { write: vi.fn() },
|
||||||
|
stderr: { write: vi.fn() },
|
||||||
|
};
|
||||||
|
const ensureRuntime = vi.fn(async () => ({
|
||||||
|
layout: {} as never,
|
||||||
|
manifest: {} as never,
|
||||||
|
}));
|
||||||
|
const startDaemon = vi.fn(async () => ({
|
||||||
|
status: 'started' as const,
|
||||||
|
layout: {} as never,
|
||||||
|
state: { pid: 1234 } as never,
|
||||||
|
baseUrl: 'http://127.0.0.1:61234',
|
||||||
|
}));
|
||||||
|
const postJson = vi.fn(async () => ({
|
||||||
|
results: {
|
||||||
|
probe: {
|
||||||
|
tables_touched: [],
|
||||||
|
columns_by_clause: {},
|
||||||
|
error: null,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
|
||||||
|
const runtime = createKtxCliHistoricSqlRuntime(project, 'warehouse', {
|
||||||
|
managedDaemon: {
|
||||||
|
cliVersion: '0.2.0',
|
||||||
|
projectDir: tempDir,
|
||||||
|
installPolicy: 'auto',
|
||||||
|
io: testIo,
|
||||||
|
ensureRuntime,
|
||||||
|
startDaemon,
|
||||||
|
postJson,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await expect(runtime?.sqlAnalysis.analyzeBatch([{ id: 'probe', sql: 'select 1' }], 'postgres')).resolves.toEqual(
|
||||||
|
new Map([
|
||||||
|
[
|
||||||
|
'probe',
|
||||||
|
{
|
||||||
|
tablesTouched: [],
|
||||||
|
columnsByClause: {},
|
||||||
|
error: null,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
]),
|
||||||
|
);
|
||||||
|
expect(ensureRuntime).toHaveBeenCalledWith({
|
||||||
|
cliVersion: '0.2.0',
|
||||||
|
installPolicy: 'auto',
|
||||||
|
io: testIo,
|
||||||
|
feature: 'core',
|
||||||
|
});
|
||||||
|
expect(startDaemon).toHaveBeenCalledWith({
|
||||||
|
cliVersion: '0.2.0',
|
||||||
|
projectDir: tempDir,
|
||||||
|
features: ['core'],
|
||||||
|
force: false,
|
||||||
|
});
|
||||||
|
expect(postJson).toHaveBeenCalledWith('http://127.0.0.1:61234', '/sql/analyze-batch', {
|
||||||
|
dialect: 'postgres',
|
||||||
|
items: [{ id: 'probe', sql: 'select 1' }],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
it('registers historic SQL when explicitly requested even if connection query history is disabled', async () => {
|
it('registers historic SQL when explicitly requested even if connection query history is disabled', async () => {
|
||||||
await writeProject(
|
await writeProject(
|
||||||
tempDir,
|
tempDir,
|
||||||
|
|
|
||||||
|
|
@ -161,7 +161,7 @@ describe('KTX daemon ingest ports', () => {
|
||||||
const requestJson = vi.fn(async () => ({
|
const requestJson = vi.fn(async () => ({
|
||||||
results: {
|
results: {
|
||||||
orders: {
|
orders: {
|
||||||
tables_touched: ['public.orders'],
|
tables_touched: [{ catalog: null, db: 'public', name: 'orders' }],
|
||||||
columns_by_clause: { select: ['status'] },
|
columns_by_clause: { select: ['status'] },
|
||||||
error: null,
|
error: null,
|
||||||
},
|
},
|
||||||
|
|
@ -175,7 +175,7 @@ describe('KTX daemon ingest ports', () => {
|
||||||
[
|
[
|
||||||
'orders',
|
'orders',
|
||||||
{
|
{
|
||||||
tablesTouched: ['public.orders'],
|
tablesTouched: [{ catalog: null, db: 'public', name: 'orders' }],
|
||||||
columnsByClause: { select: ['status'] },
|
columnsByClause: { select: ['status'] },
|
||||||
error: null,
|
error: null,
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
import { mkdtemp, rm } from 'node:fs/promises';
|
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||||
import { tmpdir } from 'node:os';
|
import { tmpdir } from 'node:os';
|
||||||
import { join } from 'node:path';
|
import { join } from 'node:path';
|
||||||
import { buildDefaultKtxProjectConfig, type KtxProjectConfig } from '../src/context/project/config.js';
|
import { buildDefaultKtxProjectConfig, type KtxProjectConfig } from '../src/context/project/config.js';
|
||||||
|
|
@ -668,12 +668,134 @@ describe('runKtxPublicIngest', () => {
|
||||||
dropFailedBelow: { errorRate: 0.5, executions: 3 },
|
dropFailedBelow: { errorRate: 0.5, executions: 3 },
|
||||||
},
|
},
|
||||||
redactionPatterns: ['(?i)secret'],
|
redactionPatterns: ['(?i)secret'],
|
||||||
enabledTables: ['orbit_analytics.int_active_contract_arr'],
|
enabledTables: [{ catalog: null, db: 'orbit_analytics', name: 'int_active_contract_arr' }],
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
expect(ingestArgs?.historicSqlPullConfigOverride).not.toHaveProperty('enabled');
|
expect(ingestArgs?.historicSqlPullConfigOverride).not.toHaveProperty('enabled');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('resolves query-history scope after the schema scan writes artifacts', async () => {
|
||||||
|
const io = makeIo();
|
||||||
|
const projectDir = await mkdtemp(join(tmpdir(), 'ktx-public-qh-scope-'));
|
||||||
|
const project = deepReadyProject({
|
||||||
|
warehouse: {
|
||||||
|
driver: 'postgres',
|
||||||
|
schemas: ['orbit_raw'],
|
||||||
|
context: { queryHistory: { enabled: true } },
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const runScan = vi.fn(async () => {
|
||||||
|
await mkdir(join(projectDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||||
|
await writeFile(
|
||||||
|
join(projectDir, 'semantic-layer/warehouse/revenue.yaml'),
|
||||||
|
[
|
||||||
|
'name: revenue',
|
||||||
|
'table: orbit_analytics.mart_revenue',
|
||||||
|
'grain: [id]',
|
||||||
|
'columns:',
|
||||||
|
' - name: id',
|
||||||
|
' type: string',
|
||||||
|
'',
|
||||||
|
].join('\n'),
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
const rawRoot = join(projectDir, 'raw-sources/warehouse/live-database/sync-1');
|
||||||
|
await mkdir(join(rawRoot, 'tables'), { recursive: true });
|
||||||
|
await writeFile(
|
||||||
|
join(rawRoot, 'connection.json'),
|
||||||
|
`${JSON.stringify({ connectionId: 'warehouse', driver: 'postgres' }, null, 2)}\n`,
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
await writeFile(
|
||||||
|
join(rawRoot, 'tables/accounts.json'),
|
||||||
|
`${JSON.stringify(
|
||||||
|
{
|
||||||
|
catalog: null,
|
||||||
|
db: 'orbit_raw',
|
||||||
|
name: 'accounts',
|
||||||
|
kind: 'table',
|
||||||
|
comment: null,
|
||||||
|
estimatedRows: null,
|
||||||
|
columns: [
|
||||||
|
{
|
||||||
|
name: 'id',
|
||||||
|
nativeType: 'integer',
|
||||||
|
normalizedType: 'integer',
|
||||||
|
dimensionType: 'number',
|
||||||
|
nullable: false,
|
||||||
|
primaryKey: true,
|
||||||
|
comment: null,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
foreignKeys: [],
|
||||||
|
},
|
||||||
|
null,
|
||||||
|
2,
|
||||||
|
)}\n`,
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
await writeFile(
|
||||||
|
join(rawRoot, 'scan-report.json'),
|
||||||
|
`${JSON.stringify(
|
||||||
|
{
|
||||||
|
connectionId: 'warehouse',
|
||||||
|
driver: 'postgres',
|
||||||
|
syncId: 'sync-1',
|
||||||
|
runId: 'scan-sync-1',
|
||||||
|
trigger: 'cli',
|
||||||
|
mode: 'enriched',
|
||||||
|
dryRun: false,
|
||||||
|
artifactPaths: {
|
||||||
|
rawSourcesDir: 'raw-sources/warehouse/live-database/sync-1',
|
||||||
|
reportPath: 'raw-sources/warehouse/live-database/sync-1/scan-report.json',
|
||||||
|
manifestShards: [],
|
||||||
|
enrichmentArtifacts: [],
|
||||||
|
},
|
||||||
|
counts: {},
|
||||||
|
warnings: [],
|
||||||
|
enrichment: {},
|
||||||
|
enrichmentState: {},
|
||||||
|
},
|
||||||
|
null,
|
||||||
|
2,
|
||||||
|
)}\n`,
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
return 0;
|
||||||
|
});
|
||||||
|
const runIngest = vi.fn<NonNullable<KtxPublicIngestDeps['runIngest']>>(async () => 0);
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
runKtxPublicIngest(
|
||||||
|
{
|
||||||
|
command: 'run',
|
||||||
|
projectDir,
|
||||||
|
targetConnectionId: 'warehouse',
|
||||||
|
all: false,
|
||||||
|
json: false,
|
||||||
|
inputMode: 'disabled',
|
||||||
|
queryHistory: 'enabled',
|
||||||
|
},
|
||||||
|
io.io,
|
||||||
|
{ loadProject: vi.fn(async () => ({ ...project, projectDir })), runScan, runIngest },
|
||||||
|
),
|
||||||
|
).resolves.toBe(0);
|
||||||
|
|
||||||
|
const ingestArgs = runIngest.mock.calls[0]?.[0] as
|
||||||
|
| Extract<Parameters<NonNullable<KtxPublicIngestDeps['runIngest']>>[0], { command: 'run' }>
|
||||||
|
| undefined;
|
||||||
|
expect(ingestArgs?.historicSqlPullConfigOverride).toMatchObject({
|
||||||
|
dialect: 'postgres',
|
||||||
|
enabledSchemas: ['orbit_analytics', 'orbit_raw'],
|
||||||
|
modeledTableCatalog: [
|
||||||
|
{ catalog: null, db: 'orbit_analytics', name: 'mart_revenue' },
|
||||||
|
{ catalog: null, db: 'orbit_raw', name: 'accounts' },
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
await rm(projectDir, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
|
||||||
it('prints the schema-first notice for explicit query-history runs', async () => {
|
it('prints the schema-first notice for explicit query-history runs', async () => {
|
||||||
const io = makeIo();
|
const io = makeIo();
|
||||||
const project = deepReadyProject({
|
const project = deepReadyProject({
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import { parseKtxProjectConfig } from '../src/context/project/config.js';
|
||||||
import { readKtxSetupState, writeKtxSetupState } from '../src/context/project/setup-config.js';
|
import { readKtxSetupState, writeKtxSetupState } from '../src/context/project/setup-config.js';
|
||||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||||
import {
|
import {
|
||||||
|
managedDaemonOptionsForSetupQueryHistoryPicker,
|
||||||
type KtxSetupDatabaseDriver,
|
type KtxSetupDatabaseDriver,
|
||||||
type KtxSetupDatabasesDeps,
|
type KtxSetupDatabasesDeps,
|
||||||
type KtxSetupDatabasesPromptAdapter,
|
type KtxSetupDatabasesPromptAdapter,
|
||||||
|
|
@ -137,6 +138,22 @@ function textInputPrompt(message: string): string {
|
||||||
return `${title}\n│\n│ ${bodyLines.join('\n│ ')}\n│ Press Escape to go back.\n│`;
|
return `${title}\n│\n│ ${bodyLines.join('\n│ ')}\n│ Press Escape to go back.\n│`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function queryHistoryFromConfig(connection: unknown): {
|
||||||
|
filters?: { serviceAccounts?: unknown; dropTrivialProbes?: boolean };
|
||||||
|
} | undefined {
|
||||||
|
if (!connection || typeof connection !== 'object' || Array.isArray(connection)) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
const context = (connection as { context?: unknown }).context;
|
||||||
|
if (!context || typeof context !== 'object' || Array.isArray(context)) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
const queryHistory = (context as { queryHistory?: unknown }).queryHistory;
|
||||||
|
return queryHistory && typeof queryHistory === 'object' && !Array.isArray(queryHistory)
|
||||||
|
? (queryHistory as { filters?: { serviceAccounts?: unknown; dropTrivialProbes?: boolean } })
|
||||||
|
: undefined;
|
||||||
|
}
|
||||||
|
|
||||||
describe('setup databases step', () => {
|
describe('setup databases step', () => {
|
||||||
let tempDir: string;
|
let tempDir: string;
|
||||||
|
|
||||||
|
|
@ -150,6 +167,61 @@ describe('setup databases step', () => {
|
||||||
await rm(tempDir, { recursive: true, force: true });
|
await rm(tempDir, { recursive: true, force: true });
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('builds managed daemon options for setup query-history SQL analysis', () => {
|
||||||
|
const io = makeIo();
|
||||||
|
|
||||||
|
expect(
|
||||||
|
managedDaemonOptionsForSetupQueryHistoryPicker({
|
||||||
|
projectDir: tempDir,
|
||||||
|
args: {
|
||||||
|
inputMode: 'disabled',
|
||||||
|
cliVersion: '0.2.0',
|
||||||
|
runtimeInstallPolicy: 'auto',
|
||||||
|
},
|
||||||
|
io: io.io,
|
||||||
|
}),
|
||||||
|
).toEqual({
|
||||||
|
cliVersion: '0.2.0',
|
||||||
|
projectDir: tempDir,
|
||||||
|
installPolicy: 'auto',
|
||||||
|
io: io.io,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('defaults managed daemon setup options when the database step is called directly', () => {
|
||||||
|
const io = makeIo();
|
||||||
|
|
||||||
|
expect(
|
||||||
|
managedDaemonOptionsForSetupQueryHistoryPicker({
|
||||||
|
projectDir: tempDir,
|
||||||
|
args: {
|
||||||
|
inputMode: 'disabled',
|
||||||
|
},
|
||||||
|
io: io.io,
|
||||||
|
}),
|
||||||
|
).toMatchObject({
|
||||||
|
cliVersion: expect.any(String),
|
||||||
|
projectDir: tempDir,
|
||||||
|
installPolicy: 'never',
|
||||||
|
io: io.io,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(
|
||||||
|
managedDaemonOptionsForSetupQueryHistoryPicker({
|
||||||
|
projectDir: tempDir,
|
||||||
|
args: {
|
||||||
|
inputMode: 'auto',
|
||||||
|
},
|
||||||
|
io: io.io,
|
||||||
|
}),
|
||||||
|
).toMatchObject({
|
||||||
|
cliVersion: expect.any(String),
|
||||||
|
projectDir: tempDir,
|
||||||
|
installPolicy: 'prompt',
|
||||||
|
io: io.io,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
it('shows every supported database in the interactive checklist', async () => {
|
it('shows every supported database in the interactive checklist', async () => {
|
||||||
const prompts = makePromptAdapter({ multiselectValues: [['back']] });
|
const prompts = makePromptAdapter({ multiselectValues: [['back']] });
|
||||||
|
|
||||||
|
|
@ -2569,6 +2641,190 @@ describe('setup databases step', () => {
|
||||||
expect(io.stdout()).toContain('pg_stat_statements ready');
|
expect(io.stdout()).toContain('pg_stat_statements ready');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('auto-applies derived query-history service-account filters in non-interactive setup', async () => {
|
||||||
|
const io = makeIo();
|
||||||
|
const queryHistoryFilterPicker = vi.fn(async () => ({
|
||||||
|
excludedRoles: [
|
||||||
|
{
|
||||||
|
role: 'svc_loader',
|
||||||
|
pattern: '^svc_loader$',
|
||||||
|
reason: 'Runs recurring loader traffic against modeled tables.',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
consideredRoleCount: 2,
|
||||||
|
skipped: null,
|
||||||
|
warnings: [],
|
||||||
|
}));
|
||||||
|
|
||||||
|
const result = await runKtxSetupDatabasesStep(
|
||||||
|
{
|
||||||
|
projectDir: tempDir,
|
||||||
|
inputMode: 'disabled',
|
||||||
|
yes: true,
|
||||||
|
databaseDrivers: ['postgres'],
|
||||||
|
databaseConnectionId: 'warehouse',
|
||||||
|
databaseUrl: 'env:DATABASE_URL',
|
||||||
|
databaseSchemas: ['public'],
|
||||||
|
enableQueryHistory: true,
|
||||||
|
skipDatabases: false,
|
||||||
|
},
|
||||||
|
io.io,
|
||||||
|
{
|
||||||
|
testConnection: vi.fn(async () => 0),
|
||||||
|
scanConnection: vi.fn(async () => 0),
|
||||||
|
historicSqlReadinessProbe: vi.fn(async () => {
|
||||||
|
const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements');
|
||||||
|
return {
|
||||||
|
ok: true as const,
|
||||||
|
dialect: 'postgres' as const,
|
||||||
|
runner,
|
||||||
|
result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] },
|
||||||
|
};
|
||||||
|
}),
|
||||||
|
queryHistoryFilterPicker,
|
||||||
|
createQueryHistoryLlmRuntime: vi.fn(() => null),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(result.status).toBe('ready');
|
||||||
|
expect(queryHistoryFilterPicker).toHaveBeenCalledTimes(1);
|
||||||
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
||||||
|
expect(config.connections.warehouse).toMatchObject({
|
||||||
|
context: {
|
||||||
|
queryHistory: {
|
||||||
|
filters: {
|
||||||
|
dropTrivialProbes: true,
|
||||||
|
serviceAccounts: {
|
||||||
|
mode: 'exclude',
|
||||||
|
patterns: ['^svc_loader$'],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(io.stdout()).toContain('Proposed query-history service-account filters');
|
||||||
|
expect(io.stdout()).toContain('svc_loader');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('lets interactive setup skip applying derived filters', async () => {
|
||||||
|
const io = makeIo();
|
||||||
|
const prompts = makePromptAdapter({
|
||||||
|
selectValues: ['skip'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await runKtxSetupDatabasesStep(
|
||||||
|
{
|
||||||
|
projectDir: tempDir,
|
||||||
|
inputMode: 'auto',
|
||||||
|
yes: false,
|
||||||
|
databaseDrivers: ['postgres'],
|
||||||
|
databaseConnectionId: 'warehouse',
|
||||||
|
databaseUrl: 'env:DATABASE_URL',
|
||||||
|
databaseSchemas: ['public'],
|
||||||
|
enableQueryHistory: true,
|
||||||
|
skipDatabases: false,
|
||||||
|
},
|
||||||
|
io.io,
|
||||||
|
{
|
||||||
|
prompts,
|
||||||
|
testConnection: vi.fn(async () => 0),
|
||||||
|
scanConnection: vi.fn(async () => 0),
|
||||||
|
historicSqlReadinessProbe: vi.fn(async () => {
|
||||||
|
const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements');
|
||||||
|
return {
|
||||||
|
ok: true as const,
|
||||||
|
dialect: 'postgres' as const,
|
||||||
|
runner,
|
||||||
|
result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] },
|
||||||
|
};
|
||||||
|
}),
|
||||||
|
queryHistoryFilterPicker: vi.fn(async () => ({
|
||||||
|
excludedRoles: [{ role: 'svc_loader', pattern: '^svc_loader$', reason: 'Loader traffic.' }],
|
||||||
|
consideredRoleCount: 2,
|
||||||
|
skipped: null,
|
||||||
|
warnings: [],
|
||||||
|
})),
|
||||||
|
createQueryHistoryLlmRuntime: vi.fn(() => null),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(result.status).toBe('ready');
|
||||||
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
||||||
|
expect(queryHistoryFromConfig(config.connections.warehouse)?.filters).toEqual({ dropTrivialProbes: true });
|
||||||
|
expect(prompts.select).toHaveBeenCalledWith({
|
||||||
|
message: 'Apply 1 derived query-history service-account exclusion?',
|
||||||
|
options: [
|
||||||
|
{ value: 'apply', label: 'Apply derived filters (recommended)' },
|
||||||
|
{ value: 'skip', label: 'Leave query history filters unchanged' },
|
||||||
|
],
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not overwrite an existing serviceAccounts block', async () => {
|
||||||
|
await writeFile(
|
||||||
|
join(tempDir, 'ktx.yaml'),
|
||||||
|
[
|
||||||
|
'connections:',
|
||||||
|
' warehouse:',
|
||||||
|
' driver: postgres',
|
||||||
|
' url: env:DATABASE_URL',
|
||||||
|
' context:',
|
||||||
|
' queryHistory:',
|
||||||
|
' enabled: true',
|
||||||
|
' filters:',
|
||||||
|
' dropTrivialProbes: true',
|
||||||
|
' serviceAccounts:',
|
||||||
|
' mode: exclude',
|
||||||
|
' patterns:',
|
||||||
|
" - '^existing$'",
|
||||||
|
'',
|
||||||
|
].join('\n'),
|
||||||
|
'utf-8',
|
||||||
|
);
|
||||||
|
|
||||||
|
const io = makeIo();
|
||||||
|
const result = await runKtxSetupDatabasesStep(
|
||||||
|
{
|
||||||
|
projectDir: tempDir,
|
||||||
|
inputMode: 'disabled',
|
||||||
|
yes: true,
|
||||||
|
databaseConnectionIds: ['warehouse'],
|
||||||
|
databaseSchemas: [],
|
||||||
|
enableQueryHistory: true,
|
||||||
|
skipDatabases: false,
|
||||||
|
},
|
||||||
|
io.io,
|
||||||
|
{
|
||||||
|
testConnection: vi.fn(async () => 0),
|
||||||
|
scanConnection: vi.fn(async () => 0),
|
||||||
|
historicSqlReadinessProbe: vi.fn(async () => {
|
||||||
|
const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements');
|
||||||
|
return {
|
||||||
|
ok: true as const,
|
||||||
|
dialect: 'postgres' as const,
|
||||||
|
runner,
|
||||||
|
result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] },
|
||||||
|
};
|
||||||
|
}),
|
||||||
|
queryHistoryFilterPicker: vi.fn(async () => ({
|
||||||
|
excludedRoles: [{ role: 'svc_loader', pattern: '^svc_loader$', reason: 'Loader traffic.' }],
|
||||||
|
consideredRoleCount: 2,
|
||||||
|
skipped: { reason: 'user-block-present' as const },
|
||||||
|
warnings: [],
|
||||||
|
})),
|
||||||
|
createQueryHistoryLlmRuntime: vi.fn(() => null),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(result.status).toBe('ready');
|
||||||
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
||||||
|
expect(queryHistoryFromConfig(config.connections.warehouse)?.filters?.serviceAccounts).toEqual({
|
||||||
|
mode: 'exclude',
|
||||||
|
patterns: ['^existing$'],
|
||||||
|
});
|
||||||
|
expect(io.stdout()).toContain('Existing query-history service-account filters left unchanged');
|
||||||
|
});
|
||||||
|
|
||||||
it('asks interactive Postgres setup whether to enable query history', async () => {
|
it('asks interactive Postgres setup whether to enable query history', async () => {
|
||||||
await writeFile(
|
await writeFile(
|
||||||
join(tempDir, 'ktx.yaml'),
|
join(tempDir, 'ktx.yaml'),
|
||||||
|
|
|
||||||
|
|
@ -1684,6 +1684,9 @@ describe('setup status', () => {
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
projectDir: tempDir,
|
projectDir: tempDir,
|
||||||
inputMode: 'disabled',
|
inputMode: 'disabled',
|
||||||
|
yes: true,
|
||||||
|
cliVersion: '0.2.0',
|
||||||
|
runtimeInstallPolicy: 'auto',
|
||||||
databaseDrivers: ['postgres'],
|
databaseDrivers: ['postgres'],
|
||||||
databaseConnectionId: 'warehouse',
|
databaseConnectionId: 'warehouse',
|
||||||
databaseUrl: 'env:DATABASE_URL',
|
databaseUrl: 'env:DATABASE_URL',
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ function makeIo(options: { isTTY?: boolean } = {}) {
|
||||||
function makeSqlAnalysis(result: Awaited<ReturnType<SqlAnalysisPort['validateReadOnly']>>): SqlAnalysisPort {
|
function makeSqlAnalysis(result: Awaited<ReturnType<SqlAnalysisPort['validateReadOnly']>>): SqlAnalysisPort {
|
||||||
return {
|
return {
|
||||||
analyzeForFingerprint: vi.fn(),
|
analyzeForFingerprint: vi.fn(),
|
||||||
analyzeBatch: vi.fn(async () => new Map([['cli-sql', { tablesTouched: ['orders'], columnsByClause: {} }]])),
|
analyzeBatch: vi.fn(async () => new Map([['cli-sql', { tablesTouched: [{ catalog: null, db: null, name: 'orders' }], columnsByClause: {} }]])),
|
||||||
validateReadOnly: vi.fn(async () => result),
|
validateReadOnly: vi.fn(async () => result),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,15 +2,32 @@ from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from concurrent.futures import ProcessPoolExecutor
|
from concurrent.futures import ProcessPoolExecutor
|
||||||
|
from dataclasses import dataclass
|
||||||
from typing import Literal
|
from typing import Literal
|
||||||
|
|
||||||
import sqlglot
|
import sqlglot
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from sqlglot import exp
|
from sqlglot import exp
|
||||||
|
from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
|
||||||
|
from sqlglot.optimizer.qualify_tables import qualify_tables
|
||||||
|
|
||||||
SqlAnalysisClause = Literal["select", "where", "join", "groupBy", "having", "orderBy"]
|
SqlAnalysisClause = Literal["select", "where", "join", "groupBy", "having", "orderBy"]
|
||||||
|
|
||||||
|
|
||||||
|
class SqlAnalysisTableRef(BaseModel):
|
||||||
|
catalog: str | None = None
|
||||||
|
db: str | None = None
|
||||||
|
name: str
|
||||||
|
|
||||||
|
|
||||||
|
class SqlAnalysisCatalogTable(SqlAnalysisTableRef):
|
||||||
|
columns: list[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class AnalyzeSqlCatalog(BaseModel):
|
||||||
|
tables: list[SqlAnalysisCatalogTable] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
class AnalyzeSqlBatchItem(BaseModel):
|
class AnalyzeSqlBatchItem(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
sql: str
|
sql: str
|
||||||
|
|
@ -19,11 +36,12 @@ class AnalyzeSqlBatchItem(BaseModel):
|
||||||
class AnalyzeSqlBatchRequest(BaseModel):
|
class AnalyzeSqlBatchRequest(BaseModel):
|
||||||
dialect: str
|
dialect: str
|
||||||
items: list[AnalyzeSqlBatchItem]
|
items: list[AnalyzeSqlBatchItem]
|
||||||
|
catalog: AnalyzeSqlCatalog | None = None
|
||||||
max_workers: int | None = Field(default=None, ge=1, le=32)
|
max_workers: int | None = Field(default=None, ge=1, le=32)
|
||||||
|
|
||||||
|
|
||||||
class AnalyzeSqlBatchResult(BaseModel):
|
class AnalyzeSqlBatchResult(BaseModel):
|
||||||
tables_touched: list[str] = Field(default_factory=list)
|
tables_touched: list[SqlAnalysisTableRef] = Field(default_factory=list)
|
||||||
columns_by_clause: dict[SqlAnalysisClause, list[str]] = Field(default_factory=dict)
|
columns_by_clause: dict[SqlAnalysisClause, list[str]] = Field(default_factory=dict)
|
||||||
error: str | None = None
|
error: str | None = None
|
||||||
|
|
||||||
|
|
@ -82,17 +100,76 @@ def _ordered_unique(values: list[str]) -> list[str]:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def _table_ref(table: exp.Table) -> str:
|
def _normalize_identifier(value: str | None, dialect: str) -> str | None:
|
||||||
parts: list[str] = []
|
if value is None:
|
||||||
|
return None
|
||||||
|
identifier = exp.to_identifier(value)
|
||||||
|
identifier.meta["is_table"] = True
|
||||||
|
normalized = normalize_identifiers(identifier, dialect=dialect)
|
||||||
|
return str(normalized.name)
|
||||||
|
|
||||||
|
|
||||||
|
def _normalized_ref(ref: SqlAnalysisTableRef, dialect: str) -> SqlAnalysisTableRef:
|
||||||
|
return SqlAnalysisTableRef(
|
||||||
|
catalog=_normalize_identifier(ref.catalog, dialect),
|
||||||
|
db=_normalize_identifier(ref.db, dialect),
|
||||||
|
name=_normalize_identifier(ref.name, dialect) or ref.name,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class _CatalogIndex:
|
||||||
|
by_full: dict[tuple[str | None, str | None, str], SqlAnalysisTableRef]
|
||||||
|
by_name: dict[str, list[SqlAnalysisTableRef]]
|
||||||
|
|
||||||
|
|
||||||
|
def _catalog_index(
|
||||||
|
catalog: AnalyzeSqlCatalog | None, dialect: str
|
||||||
|
) -> _CatalogIndex | None:
|
||||||
|
if catalog is None or not catalog.tables:
|
||||||
|
return None
|
||||||
|
by_full: dict[tuple[str | None, str | None, str], SqlAnalysisTableRef] = {}
|
||||||
|
by_name: dict[str, list[SqlAnalysisTableRef]] = {}
|
||||||
|
for table in catalog.tables:
|
||||||
|
ref = _normalized_ref(table, dialect)
|
||||||
|
key = (ref.catalog, ref.db, ref.name)
|
||||||
|
by_full[key] = ref
|
||||||
|
by_name.setdefault(ref.name, []).append(ref)
|
||||||
|
return _CatalogIndex(by_full=by_full, by_name=by_name)
|
||||||
|
|
||||||
|
|
||||||
|
def _raw_table_ref(table: exp.Table, dialect: str) -> SqlAnalysisTableRef | None:
|
||||||
|
if not table.name:
|
||||||
|
return None
|
||||||
catalog = table.args.get("catalog")
|
catalog = table.args.get("catalog")
|
||||||
db = table.args.get("db")
|
db = table.args.get("db")
|
||||||
if catalog is not None and getattr(catalog, "name", None):
|
return _normalized_ref(
|
||||||
parts.append(str(catalog.name))
|
SqlAnalysisTableRef(
|
||||||
if db is not None and getattr(db, "name", None):
|
catalog=str(catalog.name)
|
||||||
parts.append(str(db.name))
|
if catalog is not None and getattr(catalog, "name", None)
|
||||||
if table.name:
|
else None,
|
||||||
parts.append(str(table.name))
|
db=str(db.name) if db is not None and getattr(db, "name", None) else None,
|
||||||
return ".".join(parts)
|
name=str(table.name),
|
||||||
|
),
|
||||||
|
dialect,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_table_refs(
|
||||||
|
raw: SqlAnalysisTableRef,
|
||||||
|
catalog: _CatalogIndex | None,
|
||||||
|
) -> list[SqlAnalysisTableRef]:
|
||||||
|
if catalog is None:
|
||||||
|
return [raw]
|
||||||
|
exact = catalog.by_full.get((raw.catalog, raw.db, raw.name))
|
||||||
|
if exact is not None:
|
||||||
|
return [exact]
|
||||||
|
if raw.db is not None:
|
||||||
|
return [raw]
|
||||||
|
matches = catalog.by_name.get(raw.name, [])
|
||||||
|
if matches:
|
||||||
|
return matches
|
||||||
|
return [SqlAnalysisTableRef(catalog=None, db=None, name=raw.name)]
|
||||||
|
|
||||||
|
|
||||||
def _column_name(column: exp.Column) -> str:
|
def _column_name(column: exp.Column) -> str:
|
||||||
|
|
@ -146,33 +223,48 @@ def _columns_by_clause(tree: exp.Expression) -> dict[SqlAnalysisClause, list[str
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _table_refs(
|
||||||
|
tree: exp.Expression, dialect: str, catalog: _CatalogIndex | None
|
||||||
|
) -> list[SqlAnalysisTableRef]:
|
||||||
|
normalized_tree = normalize_identifiers(tree, dialect=dialect)
|
||||||
|
qualified_tree = qualify_tables(normalized_tree, dialect=dialect)
|
||||||
|
cte_names = {cte.alias_or_name.lower() for cte in qualified_tree.find_all(exp.CTE)}
|
||||||
|
refs: list[SqlAnalysisTableRef] = []
|
||||||
|
seen: set[tuple[str | None, str | None, str]] = set()
|
||||||
|
for table in qualified_tree.find_all(exp.Table):
|
||||||
|
if table.name.lower() in cte_names:
|
||||||
|
continue
|
||||||
|
raw = _raw_table_ref(table, dialect)
|
||||||
|
if raw is None:
|
||||||
|
continue
|
||||||
|
for ref in _resolve_table_refs(raw, catalog):
|
||||||
|
key = (ref.catalog, ref.db, ref.name)
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
refs.append(ref)
|
||||||
|
return refs
|
||||||
|
|
||||||
|
|
||||||
def _analyze_one(
|
def _analyze_one(
|
||||||
item_id: str, sql: str, dialect: str
|
item_id: str, sql: str, dialect: str, catalog: _CatalogIndex | None
|
||||||
) -> tuple[str, AnalyzeSqlBatchResult]:
|
) -> tuple[str, AnalyzeSqlBatchResult]:
|
||||||
try:
|
try:
|
||||||
tree = sqlglot.parse_one(sql, read=dialect)
|
tree = sqlglot.parse_one(sql, read=dialect)
|
||||||
except sqlglot.errors.SqlglotError as exc:
|
except sqlglot.errors.SqlglotError as exc:
|
||||||
return item_id, AnalyzeSqlBatchResult(error=str(exc))
|
return item_id, AnalyzeSqlBatchResult(error=str(exc))
|
||||||
|
|
||||||
cte_names = {cte.alias_or_name.lower() for cte in tree.find_all(exp.CTE)}
|
|
||||||
table_refs = [
|
|
||||||
table_ref
|
|
||||||
for table_ref in (_table_ref(table) for table in tree.find_all(exp.Table))
|
|
||||||
if table_ref and table_ref.split(".")[-1].lower() not in cte_names
|
|
||||||
]
|
|
||||||
|
|
||||||
return item_id, AnalyzeSqlBatchResult(
|
return item_id, AnalyzeSqlBatchResult(
|
||||||
tables_touched=_ordered_unique(table_refs),
|
tables_touched=_table_refs(tree, dialect, catalog),
|
||||||
columns_by_clause=_columns_by_clause(tree),
|
columns_by_clause=_columns_by_clause(tree),
|
||||||
error=None,
|
error=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _analyze_payload(
|
def _analyze_payload(
|
||||||
payload: tuple[str, str, str],
|
payload: tuple[str, str, str, _CatalogIndex | None],
|
||||||
) -> tuple[str, AnalyzeSqlBatchResult]:
|
) -> tuple[str, AnalyzeSqlBatchResult]:
|
||||||
item_id, sql, dialect = payload
|
item_id, sql, dialect, catalog = payload
|
||||||
return _analyze_one(item_id, sql, dialect)
|
return _analyze_one(item_id, sql, dialect, catalog)
|
||||||
|
|
||||||
|
|
||||||
def validate_read_only_sql_response(
|
def validate_read_only_sql_response(
|
||||||
|
|
@ -222,7 +314,8 @@ def _worker_count(request: AnalyzeSqlBatchRequest) -> int:
|
||||||
def analyze_sql_batch_response(
|
def analyze_sql_batch_response(
|
||||||
request: AnalyzeSqlBatchRequest,
|
request: AnalyzeSqlBatchRequest,
|
||||||
) -> AnalyzeSqlBatchResponse:
|
) -> AnalyzeSqlBatchResponse:
|
||||||
payloads = [(item.id, item.sql, request.dialect) for item in request.items]
|
catalog = _catalog_index(request.catalog, request.dialect)
|
||||||
|
payloads = [(item.id, item.sql, request.dialect, catalog) for item in request.items]
|
||||||
if _worker_count(request) == 1:
|
if _worker_count(request) == 1:
|
||||||
analyzed = [_analyze_payload(payload) for payload in payloads]
|
analyzed = [_analyze_payload(payload) for payload in payloads]
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -368,7 +368,9 @@ def test_sql_analyze_batch_endpoint_returns_per_item_results() -> None:
|
||||||
|
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
body = response.json()
|
body = response.json()
|
||||||
assert body["results"]["orders"]["tables_touched"] == ["public.orders"]
|
assert body["results"]["orders"]["tables_touched"] == [
|
||||||
|
{"catalog": None, "db": "public", "name": "orders"}
|
||||||
|
]
|
||||||
assert body["results"]["orders"]["columns_by_clause"] == {
|
assert body["results"]["orders"]["columns_by_clause"] == {
|
||||||
"select": ["status"],
|
"select": ["status"],
|
||||||
"where": ["created_at"],
|
"where": ["created_at"],
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,10 @@ def test_analyze_sql_batch_extracts_tables_and_clause_columns() -> None:
|
||||||
|
|
||||||
result = response.results["orders_by_customer"]
|
result = response.results["orders_by_customer"]
|
||||||
assert result.error is None
|
assert result.error is None
|
||||||
assert result.tables_touched == ["public.orders", "public.customers"]
|
assert [item.model_dump() for item in result.tables_touched] == [
|
||||||
|
{"catalog": None, "db": "public", "name": "orders"},
|
||||||
|
{"catalog": None, "db": "public", "name": "customers"},
|
||||||
|
]
|
||||||
assert result.columns_by_clause == {
|
assert result.columns_by_clause == {
|
||||||
"select": ["status"],
|
"select": ["status"],
|
||||||
"where": ["created_at"],
|
"where": ["created_at"],
|
||||||
|
|
@ -56,6 +59,114 @@ def test_analyze_sql_batch_returns_per_item_parse_errors() -> None:
|
||||||
assert result.error is not None
|
assert result.error is not None
|
||||||
|
|
||||||
|
|
||||||
|
def test_analyze_sql_batch_qualifies_bare_table_from_catalog() -> None:
|
||||||
|
response = analyze_sql_batch_response(
|
||||||
|
AnalyzeSqlBatchRequest(
|
||||||
|
dialect="postgres",
|
||||||
|
catalog={
|
||||||
|
"tables": [
|
||||||
|
{
|
||||||
|
"catalog": None,
|
||||||
|
"db": "orbit_raw",
|
||||||
|
"name": "accounts",
|
||||||
|
"columns": ["id"],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"catalog": None,
|
||||||
|
"db": "orbit_analytics",
|
||||||
|
"name": "orders",
|
||||||
|
"columns": ["id"],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
items=[AnalyzeSqlBatchItem(id="bare", sql="select id from accounts")],
|
||||||
|
max_workers=1,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert [item.model_dump() for item in response.results["bare"].tables_touched] == [
|
||||||
|
{"catalog": None, "db": "orbit_raw", "name": "accounts"}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_analyze_sql_batch_returns_all_ambiguous_modeled_matches() -> None:
|
||||||
|
response = analyze_sql_batch_response(
|
||||||
|
AnalyzeSqlBatchRequest(
|
||||||
|
dialect="postgres",
|
||||||
|
catalog={
|
||||||
|
"tables": [
|
||||||
|
{
|
||||||
|
"catalog": None,
|
||||||
|
"db": "orbit_raw",
|
||||||
|
"name": "events",
|
||||||
|
"columns": ["id"],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"catalog": None,
|
||||||
|
"db": "orbit_analytics",
|
||||||
|
"name": "events",
|
||||||
|
"columns": ["id"],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
items=[AnalyzeSqlBatchItem(id="ambiguous", sql="select id from events")],
|
||||||
|
max_workers=1,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert [
|
||||||
|
item.model_dump() for item in response.results["ambiguous"].tables_touched
|
||||||
|
] == [
|
||||||
|
{"catalog": None, "db": "orbit_raw", "name": "events"},
|
||||||
|
{"catalog": None, "db": "orbit_analytics", "name": "events"},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_analyze_sql_batch_leaves_unresolved_bare_refs_unqualified() -> None:
|
||||||
|
response = analyze_sql_batch_response(
|
||||||
|
AnalyzeSqlBatchRequest(
|
||||||
|
dialect="postgres",
|
||||||
|
catalog={
|
||||||
|
"tables": [{"catalog": None, "db": "orbit_raw", "name": "accounts"}]
|
||||||
|
},
|
||||||
|
items=[AnalyzeSqlBatchItem(id="missing", sql="select * from invoices")],
|
||||||
|
max_workers=1,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert [
|
||||||
|
item.model_dump() for item in response.results["missing"].tables_touched
|
||||||
|
] == [{"catalog": None, "db": None, "name": "invoices"}]
|
||||||
|
|
||||||
|
|
||||||
|
def test_analyze_sql_batch_returns_bigquery_project_dataset_table_refs() -> None:
|
||||||
|
response = analyze_sql_batch_response(
|
||||||
|
AnalyzeSqlBatchRequest(
|
||||||
|
dialect="bigquery",
|
||||||
|
catalog={
|
||||||
|
"tables": [
|
||||||
|
{
|
||||||
|
"catalog": "demo-project",
|
||||||
|
"db": "orbit_analytics",
|
||||||
|
"name": "orders",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
items=[
|
||||||
|
AnalyzeSqlBatchItem(
|
||||||
|
id="bq",
|
||||||
|
sql="select * from `demo-project.orbit_analytics.orders`",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
max_workers=1,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert [item.model_dump() for item in response.results["bq"].tables_touched] == [
|
||||||
|
{"catalog": "demo-project", "db": "orbit_analytics", "name": "orders"}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_columns_from_nodes_ignores_non_expression_clause_values() -> None:
|
def test_columns_from_nodes_ignores_non_expression_clause_values() -> None:
|
||||||
assert _columns_from_nodes([True, False, None]) == []
|
assert _columns_from_nodes([True, False, None]) == []
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue