diff --git a/packages/context/src/ingest/adapters/historic-sql/evidence-tool.test.ts b/packages/context/src/ingest/adapters/historic-sql/evidence-tool.test.ts new file mode 100644 index 00000000..a8219192 --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/evidence-tool.test.ts @@ -0,0 +1,80 @@ +import { describe, expect, it, vi } from 'vitest'; +import { createEmitHistoricSqlEvidenceTool } from './evidence-tool.js'; + +describe('emit_historic_sql_evidence tool', () => { + it('writes table usage evidence to the ignored run evidence directory', async () => { + const writeFile = vi.fn(async () => ({ success: true, commitHash: null })); + const tool = createEmitHistoricSqlEvidenceTool(); + + const result = await tool.execute!( + { + kind: 'table_usage', + table: 'public.orders', + rawPath: 'tables/public.orders.json', + usage: { + narrative: 'Orders are repeatedly queried by paid status.', + frequencyTier: 'high', + commonFilters: ['status'], + commonJoins: [], + staleSince: null, + }, + }, + { + toolCallId: 'call-1', + messages: [], + abortSignal: new AbortController().signal, + experimental_context: { + connectionId: 'warehouse', + session: { + ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'historic-sql' }, + configService: { writeFile }, + }, + }, + } as never, + ); + + expect(result).toBe('Recorded historic-SQL table_usage evidence for public.orders.'); + expect(writeFile).toHaveBeenCalledWith( + '.ktx/ingest-evidence/historic-sql/run-1/historic-sql-table-public-orders.json', + expect.stringContaining('"kind": "table_usage"'), + 'System User', + 'system@example.com', + 'Record historic-SQL evidence: historic-sql-table-public-orders', + { skipLock: true }, + ); + }); + + it('rejects non-historic ingest sessions', async () => { + const tool = createEmitHistoricSqlEvidenceTool(); + + await expect( + tool.execute!( + { + kind: 'pattern', + rawPath: 'patterns-input.json', + pattern: { + slug: 'orders', + title: 'Orders', + narrative: 'Orders pattern.', + definitionSql: 'select * from public.orders', + tablesInvolved: ['public.orders'], + slRefs: ['orders'], + constituentTemplateIds: ['pg:1'], + }, + }, + { + toolCallId: 'call-1', + messages: [], + abortSignal: new AbortController().signal, + experimental_context: { + connectionId: 'warehouse', + session: { + ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'notion' }, + configService: { writeFile: vi.fn() }, + }, + }, + } as never, + ), + ).resolves.toContain('Error: emit_historic_sql_evidence is only available during historic-sql ingest'); + }); +}); diff --git a/packages/context/src/ingest/adapters/historic-sql/evidence-tool.ts b/packages/context/src/ingest/adapters/historic-sql/evidence-tool.ts new file mode 100644 index 00000000..634b7593 --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/evidence-tool.ts @@ -0,0 +1,72 @@ +import { tool } from 'ai'; +import { z } from 'zod'; +import { historicSqlEvidencePath, serializeHistoricSqlEvidence } from './evidence.js'; +import { patternOutputSchema, tableUsageOutputSchema } from './skill-schemas.js'; + +const SYSTEM_AUTHOR = 'System User'; +const SYSTEM_EMAIL = 'system@example.com'; + +function unitKeyForEvidence(input: { kind: string; table?: string; pattern?: { slug: string } }): string { + if (input.kind === 'table_usage') { + return `historic-sql-table-${String(input.table).replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-+|-+$/g, '')}`; + } + return `historic-sql-pattern-${String(input.pattern?.slug).replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-+|-+$/g, '')}`; +} + +export function createEmitHistoricSqlEvidenceTool() { + return tool({ + description: + 'Record typed historic-SQL evidence for deterministic projection. Use this instead of wiki_write, sl_write_source, sl_edit_source, or context_candidate_write during historic-SQL WorkUnits.', + inputSchema: z.discriminatedUnion('kind', [ + z.object({ + kind: z.literal('table_usage'), + table: z.string().min(1), + rawPath: z.string().min(1), + usage: tableUsageOutputSchema, + }), + z.object({ + kind: z.literal('pattern'), + rawPath: z.string().min(1), + pattern: patternOutputSchema, + }), + ]), + execute: async (input, options): Promise => { + const context = options.experimental_context as + | { + connectionId?: string | null; + session?: { + ingest?: { runId: string; sourceKey: string }; + configService?: { + writeFile( + path: string, + content: string, + author: string, + authorEmail: string, + commitMessage: string, + options?: { skipLock?: boolean }, + ): Promise; + }; + }; + } + | undefined; + const ingest = context?.session?.ingest; + const configService = context?.session?.configService; + if (!ingest || ingest.sourceKey !== 'historic-sql' || !configService || !context?.connectionId) { + return 'Error: emit_historic_sql_evidence is only available during historic-sql ingest.'; + } + + const unitKey = unitKeyForEvidence(input); + const content = serializeHistoricSqlEvidence({ ...input, connectionId: context.connectionId }); + await configService.writeFile( + historicSqlEvidencePath(ingest.runId, unitKey), + content, + SYSTEM_AUTHOR, + SYSTEM_EMAIL, + `Record historic-SQL evidence: ${unitKey}`, + { skipLock: true }, + ); + const label = input.kind === 'table_usage' ? input.table : input.pattern.slug; + return `Recorded historic-SQL ${input.kind} evidence for ${label}.`; + }, + }); +} diff --git a/packages/context/src/ingest/adapters/historic-sql/evidence.test.ts b/packages/context/src/ingest/adapters/historic-sql/evidence.test.ts new file mode 100644 index 00000000..2d6b7556 --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/evidence.test.ts @@ -0,0 +1,54 @@ +import { describe, expect, it } from 'vitest'; +import { + historicSqlEvidenceEnvelopeSchema, + historicSqlEvidencePath, + historicSqlTableUsageEvidenceSchema, +} from './evidence.js'; + +describe('historic-sql evidence contracts', () => { + it('validates table usage evidence emitted by table digest WorkUnits', () => { + const parsed = historicSqlTableUsageEvidenceSchema.parse({ + kind: 'table_usage', + connectionId: 'warehouse', + table: 'public.orders', + rawPath: 'tables/public.orders.json', + usage: { + narrative: 'Orders are repeatedly queried for paid/refunded lifecycle analysis.', + frequencyTier: 'high', + commonFilters: ['status', 'created_at'], + commonGroupBys: ['status'], + commonJoins: [{ table: 'public.customers', on: ['customer_id'] }], + staleSince: null, + }, + }); + + expect(parsed.table).toBe('public.orders'); + expect(parsed.usage.frequencyTier).toBe('high'); + }); + + it('validates pattern evidence emitted by the patterns WorkUnit', () => { + const parsed = historicSqlEvidenceEnvelopeSchema.parse({ + kind: 'pattern', + connectionId: 'warehouse', + rawPath: 'patterns-input.json', + pattern: { + slug: 'order-lifecycle-analysis', + title: 'Order Lifecycle Analysis', + narrative: 'Analysts compare order status changes by customer segment.', + definitionSql: 'select status, count(*) from public.orders group by status', + tablesInvolved: ['public.orders', 'public.customers'], + slRefs: ['orders', 'customers'], + constituentTemplateIds: ['pg:1', 'pg:2'], + }, + }); + + expect(parsed.kind).toBe('pattern'); + expect(parsed.pattern.slug).toBe('order-lifecycle-analysis'); + }); + + it('builds a stable ignored evidence path from run and WorkUnit identity', () => { + expect(historicSqlEvidencePath('run-1', 'historic-sql-table-public-orders')).toBe( + '.ktx/ingest-evidence/historic-sql/run-1/historic-sql-table-public-orders.json', + ); + }); +}); diff --git a/packages/context/src/ingest/adapters/historic-sql/evidence.ts b/packages/context/src/ingest/adapters/historic-sql/evidence.ts new file mode 100644 index 00000000..18c01a85 --- /dev/null +++ b/packages/context/src/ingest/adapters/historic-sql/evidence.ts @@ -0,0 +1,41 @@ +import { z } from 'zod'; +import { patternOutputSchema, tableUsageOutputSchema } from './skill-schemas.js'; + +function safeEvidenceSegment(value: string): string { + const segment = value.replace(/[^a-zA-Z0-9._-]+/g, '-').replace(/^-+|-+$/g, ''); + if (!segment) { + throw new Error(`Invalid historic-SQL evidence path segment: ${value}`); + } + return segment; +} + +export const historicSqlTableUsageEvidenceSchema = z.object({ + kind: z.literal('table_usage'), + connectionId: z.string().min(1), + table: z.string().min(1), + rawPath: z.string().min(1), + usage: tableUsageOutputSchema, +}); +export type HistoricSqlTableUsageEvidence = z.infer; + +export const historicSqlPatternEvidenceSchema = z.object({ + kind: z.literal('pattern'), + connectionId: z.string().min(1), + rawPath: z.string().min(1), + pattern: patternOutputSchema, +}); +export type HistoricSqlPatternEvidence = z.infer; + +export const historicSqlEvidenceEnvelopeSchema = z.discriminatedUnion('kind', [ + historicSqlTableUsageEvidenceSchema, + historicSqlPatternEvidenceSchema, +]); +export type HistoricSqlEvidenceEnvelope = z.infer; + +export function historicSqlEvidencePath(runId: string, unitKey: string): string { + return `.ktx/ingest-evidence/historic-sql/${safeEvidenceSegment(runId)}/${safeEvidenceSegment(unitKey)}.json`; +} + +export function serializeHistoricSqlEvidence(evidence: HistoricSqlEvidenceEnvelope): string { + return `${JSON.stringify(historicSqlEvidenceEnvelopeSchema.parse(evidence), null, 2)}\n`; +} diff --git a/packages/context/src/ingest/index.ts b/packages/context/src/ingest/index.ts index 352281d3..e3dc3899 100644 --- a/packages/context/src/ingest/index.ts +++ b/packages/context/src/ingest/index.ts @@ -334,6 +334,19 @@ export { PostgresPgssQueryHistoryReader } from './adapters/historic-sql/postgres export { SnowflakeHistoricSqlQueryHistoryReader } from './adapters/historic-sql/snowflake-query-history-reader.js'; export { stageHistoricSqlAggregatedSnapshot } from './adapters/historic-sql/stage-unified.js'; export { stageHistoricSqlTemplates } from './adapters/historic-sql/stage.js'; +export { + historicSqlEvidenceEnvelopeSchema, + historicSqlEvidencePath, + historicSqlPatternEvidenceSchema, + historicSqlTableUsageEvidenceSchema, + serializeHistoricSqlEvidence, +} from './adapters/historic-sql/evidence.js'; +export type { + HistoricSqlEvidenceEnvelope, + HistoricSqlPatternEvidence, + HistoricSqlTableUsageEvidence, +} from './adapters/historic-sql/evidence.js'; +export { createEmitHistoricSqlEvidenceTool } from './adapters/historic-sql/evidence-tool.js'; export { patternOutputSchema, patternsArraySchema, diff --git a/packages/context/src/package-exports.test.ts b/packages/context/src/package-exports.test.ts index 40ed745f..25a3ddb1 100644 --- a/packages/context/src/package-exports.test.ts +++ b/packages/context/src/package-exports.test.ts @@ -243,6 +243,9 @@ describe('@ktx/context package exports', () => { expect(ingest.historicSqlUnifiedPullConfigSchema).toBeDefined(); expect(ingest.aggregatedTemplateSchema).toBeDefined(); expect(ingest.stagedTableInputSchema).toBeDefined(); + expect(ingest.historicSqlEvidenceEnvelopeSchema).toBeDefined(); + expect(ingest.historicSqlEvidencePath).toBeTypeOf('function'); + expect(ingest.createEmitHistoricSqlEvidenceTool).toBeTypeOf('function'); expect(ingest.SqliteContextEvidenceStore).toBeTypeOf('function'); expect(ingest.SqliteBundleIngestStore).toBeTypeOf('function'); expect(ingest.CuratorPaginationService).toBeTypeOf('function');