diff --git a/packages/context/src/ingest/adapters/historic-sql/types.test.ts b/packages/context/src/ingest/adapters/historic-sql/types.test.ts new file mode 100644 index 00000000..9d5aeea8 --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/types.test.ts @@ -0,0 +1,97 @@ +import { describe, expect, it } from 'vitest'; +import { + aggregatedTemplateSchema, + historicSqlUnifiedPullConfigSchema, + stagedManifestSchema, + stagedPatternsInputSchema, + stagedTableInputSchema, +} from './types.js'; + +describe('historic-sql unified contracts', () => { + it('parses minExecutions and accepts minCalls as a one-release alias', () => { + expect(historicSqlUnifiedPullConfigSchema.parse({ dialect: 'postgres', minExecutions: 9 })).toMatchObject({ + dialect: 'postgres', + minExecutions: 9, + windowDays: 90, + concurrency: 12, + redactionPatterns: [], + staleArchiveAfterDays: 90, + }); + + expect(historicSqlUnifiedPullConfigSchema.parse({ dialect: 'postgres', minCalls: 7 }).minExecutions).toBe(7); + }); + + it('validates aggregate templates from warehouse readers', () => { + const parsed = aggregatedTemplateSchema.parse({ + templateId: 'pg:123', + canonicalSql: 'select status, count(*) from public.orders group by status', + dialect: 'postgres', + stats: { + executions: 42, + distinctUsers: 3, + firstSeen: '2026-05-01T00:00:00.000Z', + lastSeen: '2026-05-11T00:00:00.000Z', + p50RuntimeMs: 12.5, + p95RuntimeMs: 40, + errorRate: 0, + rowsProduced: 100, + }, + topUsers: [{ user: 'analyst', executions: 40 }], + }); + + expect(parsed.templateId).toBe('pg:123'); + expect(parsed.topUsers).toEqual([{ user: 'analyst', executions: 40 }]); + }); + + it('validates staged table, patterns, and manifest artifacts', () => { + expect( + stagedTableInputSchema.parse({ + table: 'public.orders', + stats: { + executionsBucket: '10-100', + distinctUsersBucket: '2-5', + errorRateBucket: 'none', + p95RuntimeBucket: '<100ms', + recencyBucket: 'current', + }, + columnsByClause: { + select: [['status', 'high']], + where: [['created_at', 'mid']], + }, + observedJoins: [{ withTable: 'public.customers', on: ['customer_id'], freq: 'high' }], + topTemplates: [{ id: 'pg:123', canonicalSql: 'select * from public.orders', topUsers: [{ user: 'analyst' }] }], + }).table, + ).toBe('public.orders'); + + expect( + stagedPatternsInputSchema.parse({ + templates: [ + { + id: 'pg:123', + canonicalSql: 'select * from public.orders', + tablesTouched: ['public.orders'], + executionsBucket: '10-100', + distinctUsersBucket: '2-5', + dialect: 'postgres', + }, + ], + }).templates, + ).toHaveLength(1); + + expect( + stagedManifestSchema.parse({ + source: 'historic-sql', + connectionId: 'warehouse', + dialect: 'postgres', + fetchedAt: '2026-05-11T00:00:00.000Z', + windowStart: '2026-02-10T00:00:00.000Z', + windowEnd: '2026-05-11T00:00:00.000Z', + snapshotRowCount: 2, + touchedTableCount: 1, + parseFailures: 1, + warnings: ['parse_failed:bad'], + probeWarnings: [], + }).parseFailures, + ).toBe(1); + }); +}); diff --git a/packages/context/src/ingest/adapters/historic-sql/types.ts b/packages/context/src/ingest/adapters/historic-sql/types.ts index 0cd3d01a..2d83e15b 100644 --- a/packages/context/src/ingest/adapters/historic-sql/types.ts +++ b/packages/context/src/ingest/adapters/historic-sql/types.ts @@ -18,6 +18,129 @@ export const historicSqlPullConfigSchema = z.object({ }); export type HistoricSqlPullConfig = z.infer; +const filterModeSchema = z.enum(['exclude', 'include', 'mark-only']); + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +export const historicSqlUnifiedPullConfigSchema = z.preprocess((value) => { + if (!isRecord(value)) { + return value; + } + if (value.minExecutions === undefined && typeof value.minCalls === 'number') { + return { ...value, minExecutions: value.minCalls }; + } + return value; +}, z.object({ + dialect: historicSqlDialectSchema, + windowDays: z.number().int().positive().default(90), + minExecutions: z.number().int().nonnegative().default(5), + concurrency: z.number().int().positive().default(12), + filters: z.object({ + serviceAccounts: z.object({ + patterns: z.array(z.string()).default([]), + mode: filterModeSchema.default('exclude'), + }).optional(), + orchestrators: z.object({ + mode: filterModeSchema.default('mark-only'), + }).optional(), + dropTrivialProbes: z.boolean().default(true), + dropFailedBelow: z.object({ + errorRate: z.number().min(0).max(1), + executions: z.number().int().nonnegative(), + }).optional(), + }).default({}), + redactionPatterns: z.array(z.string()).default([]), + staleArchiveAfterDays: z.number().int().positive().default(90), +})); + +export type HistoricSqlUnifiedPullConfig = z.infer; + +export const aggregatedTemplateSchema = z.object({ + templateId: z.string().min(1), + canonicalSql: z.string().min(1), + dialect: historicSqlDialectSchema, + stats: z.object({ + executions: z.number().int().nonnegative(), + distinctUsers: z.number().int().nonnegative(), + firstSeen: z.iso.datetime(), + lastSeen: z.iso.datetime(), + p50RuntimeMs: z.number().nonnegative().nullable(), + p95RuntimeMs: z.number().nonnegative().nullable(), + errorRate: z.number().min(0).max(1), + rowsProduced: z.number().int().nonnegative().nullable(), + }), + topUsers: z.array(z.object({ + user: z.string().nullable(), + executions: z.number().int().nonnegative(), + })).default([]), +}); +export type AggregatedTemplate = z.infer; + +export const stagedTableInputSchema = z.object({ + table: z.string().min(1), + stats: z.object({ + executionsBucket: z.string(), + distinctUsersBucket: z.string(), + errorRateBucket: z.string(), + p95RuntimeBucket: z.string(), + recencyBucket: z.string(), + }), + columnsByClause: z.record(z.string(), z.array(z.tuple([z.string(), z.string()]))), + observedJoins: z.array(z.object({ + withTable: z.string(), + on: z.array(z.string()), + freq: z.string(), + })), + topTemplates: z.array(z.object({ + id: z.string(), + canonicalSql: z.string(), + topUsers: z.array(z.object({ user: z.string().nullable() })), + })), +}); +export type StagedTableInput = z.infer; + +export const stagedPatternsInputSchema = z.object({ + templates: z.array(z.object({ + id: z.string(), + canonicalSql: z.string(), + tablesTouched: z.array(z.string()), + executionsBucket: z.string(), + distinctUsersBucket: z.string(), + dialect: historicSqlDialectSchema, + })), +}); +export type StagedPatternsInput = z.infer; + +export const stagedManifestSchema = z.object({ + source: z.literal(HISTORIC_SQL_SOURCE_KEY), + connectionId: z.string().min(1), + dialect: historicSqlDialectSchema, + fetchedAt: z.iso.datetime(), + windowStart: z.iso.datetime(), + windowEnd: z.iso.datetime(), + snapshotRowCount: z.number().int().nonnegative(), + touchedTableCount: z.number().int().nonnegative(), + parseFailures: z.number().int().nonnegative(), + warnings: z.array(z.string()), + probeWarnings: z.array(z.string()), +}); +export type StagedManifest = z.infer; + +export interface HistoricSqlProbeResult { + warnings: string[]; +} + +export interface HistoricSqlReader { + probe(client: unknown): Promise; + fetchAggregated( + client: unknown, + window: HistoricSqlTimeWindow, + config: HistoricSqlUnifiedPullConfig, + ): AsyncIterable; +} + export interface HistoricSqlTimeWindow { start: Date; end: Date; diff --git a/packages/context/src/ingest/index.ts b/packages/context/src/ingest/index.ts index 0db5c2eb..f76befc3 100644 --- a/packages/context/src/ingest/index.ts +++ b/packages/context/src/ingest/index.ts @@ -347,14 +347,18 @@ export { } from './adapters/historic-sql/stage-pgss.js'; export type { PgssBaseline, StagePgStatStatementsTemplatesResult } from './adapters/historic-sql/stage-pgss.js'; export type { + AggregatedTemplate, HistoricSqlDialect, HistoricSqlManifest, HistoricSqlMetadata, + HistoricSqlProbeResult, HistoricSqlPullConfig, HistoricSqlQueryHistoryReader, HistoricSqlRawQueryRow, + HistoricSqlReader, HistoricSqlSourceAdapterDeps, HistoricSqlTimeWindow, + HistoricSqlUnifiedPullConfig, HistoricSqlUsage, KtxPostgresQueryClient, PostgresPgssAggregateRow, @@ -362,15 +366,23 @@ export type { PostgresPgssReader, PostgresPgssRow, PostgresPgssSnapshot, + StagedManifest, + StagedPatternsInput, + StagedTableInput, } from './adapters/historic-sql/types.js'; export { HISTORIC_SQL_OBJECT_TYPE, HISTORIC_SQL_SOURCE_KEY, + aggregatedTemplateSchema, historicSqlManifestSchema, historicSqlMetadataSchema, historicSqlPullConfigSchema, historicSqlRawQueryRowSchema, + historicSqlUnifiedPullConfigSchema, historicSqlUsageSchema, + stagedManifestSchema, + stagedPatternsInputSchema, + stagedTableInputSchema, } from './adapters/historic-sql/types.js'; export type { CanonicalPin } from './canonical-pins.js'; export { buildCanonicalPinsPromptBlock, selectRelevantCanonicalPins } from './canonical-pins.js'; diff --git a/packages/context/src/package-exports.test.ts b/packages/context/src/package-exports.test.ts index e22d64fa..40ed745f 100644 --- a/packages/context/src/package-exports.test.ts +++ b/packages/context/src/package-exports.test.ts @@ -240,6 +240,9 @@ describe('@ktx/context package exports', () => { expect(ingest.HistoricSqlExtensionMissingError).toBeTypeOf('function'); expect(ingest.HistoricSqlVersionUnsupportedError).toBeTypeOf('function'); expect(ingest.HISTORIC_SQL_SOURCE_KEY).toBe('historic-sql'); + expect(ingest.historicSqlUnifiedPullConfigSchema).toBeDefined(); + expect(ingest.aggregatedTemplateSchema).toBeDefined(); + expect(ingest.stagedTableInputSchema).toBeDefined(); expect(ingest.SqliteContextEvidenceStore).toBeTypeOf('function'); expect(ingest.SqliteBundleIngestStore).toBeTypeOf('function'); expect(ingest.CuratorPaginationService).toBeTypeOf('function');