feat: add historic sql evidence emission

This commit is contained in:
Andrey Avtomonov 2026-05-11 18:44:24 +02:00
parent f99f85361a
commit ef9358beb7
6 changed files with 263 additions and 0 deletions

View file

@ -0,0 +1,80 @@
import { describe, expect, it, vi } from 'vitest';
import { createEmitHistoricSqlEvidenceTool } from './evidence-tool.js';
describe('emit_historic_sql_evidence tool', () => {
it('writes table usage evidence to the ignored run evidence directory', async () => {
const writeFile = vi.fn(async () => ({ success: true, commitHash: null }));
const tool = createEmitHistoricSqlEvidenceTool();
const result = await tool.execute!(
{
kind: 'table_usage',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Orders are repeatedly queried by paid status.',
frequencyTier: 'high',
commonFilters: ['status'],
commonJoins: [],
staleSince: null,
},
},
{
toolCallId: 'call-1',
messages: [],
abortSignal: new AbortController().signal,
experimental_context: {
connectionId: 'warehouse',
session: {
ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'historic-sql' },
configService: { writeFile },
},
},
} as never,
);
expect(result).toBe('Recorded historic-SQL table_usage evidence for public.orders.');
expect(writeFile).toHaveBeenCalledWith(
'.ktx/ingest-evidence/historic-sql/run-1/historic-sql-table-public-orders.json',
expect.stringContaining('"kind": "table_usage"'),
'System User',
'system@example.com',
'Record historic-SQL evidence: historic-sql-table-public-orders',
{ skipLock: true },
);
});
it('rejects non-historic ingest sessions', async () => {
const tool = createEmitHistoricSqlEvidenceTool();
await expect(
tool.execute!(
{
kind: 'pattern',
rawPath: 'patterns-input.json',
pattern: {
slug: 'orders',
title: 'Orders',
narrative: 'Orders pattern.',
definitionSql: 'select * from public.orders',
tablesInvolved: ['public.orders'],
slRefs: ['orders'],
constituentTemplateIds: ['pg:1'],
},
},
{
toolCallId: 'call-1',
messages: [],
abortSignal: new AbortController().signal,
experimental_context: {
connectionId: 'warehouse',
session: {
ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'notion' },
configService: { writeFile: vi.fn() },
},
},
} as never,
),
).resolves.toContain('Error: emit_historic_sql_evidence is only available during historic-sql ingest');
});
});

View file

@ -0,0 +1,72 @@
import { tool } from 'ai';
import { z } from 'zod';
import { historicSqlEvidencePath, serializeHistoricSqlEvidence } from './evidence.js';
import { patternOutputSchema, tableUsageOutputSchema } from './skill-schemas.js';
const SYSTEM_AUTHOR = 'System User';
const SYSTEM_EMAIL = 'system@example.com';
function unitKeyForEvidence(input: { kind: string; table?: string; pattern?: { slug: string } }): string {
if (input.kind === 'table_usage') {
return `historic-sql-table-${String(input.table).replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-+|-+$/g, '')}`;
}
return `historic-sql-pattern-${String(input.pattern?.slug).replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-+|-+$/g, '')}`;
}
export function createEmitHistoricSqlEvidenceTool() {
return tool({
description:
'Record typed historic-SQL evidence for deterministic projection. Use this instead of wiki_write, sl_write_source, sl_edit_source, or context_candidate_write during historic-SQL WorkUnits.',
inputSchema: z.discriminatedUnion('kind', [
z.object({
kind: z.literal('table_usage'),
table: z.string().min(1),
rawPath: z.string().min(1),
usage: tableUsageOutputSchema,
}),
z.object({
kind: z.literal('pattern'),
rawPath: z.string().min(1),
pattern: patternOutputSchema,
}),
]),
execute: async (input, options): Promise<string> => {
const context = options.experimental_context as
| {
connectionId?: string | null;
session?: {
ingest?: { runId: string; sourceKey: string };
configService?: {
writeFile(
path: string,
content: string,
author: string,
authorEmail: string,
commitMessage: string,
options?: { skipLock?: boolean },
): Promise<unknown>;
};
};
}
| undefined;
const ingest = context?.session?.ingest;
const configService = context?.session?.configService;
if (!ingest || ingest.sourceKey !== 'historic-sql' || !configService || !context?.connectionId) {
return 'Error: emit_historic_sql_evidence is only available during historic-sql ingest.';
}
const unitKey = unitKeyForEvidence(input);
const content = serializeHistoricSqlEvidence({ ...input, connectionId: context.connectionId });
await configService.writeFile(
historicSqlEvidencePath(ingest.runId, unitKey),
content,
SYSTEM_AUTHOR,
SYSTEM_EMAIL,
`Record historic-SQL evidence: ${unitKey}`,
{ skipLock: true },
);
const label = input.kind === 'table_usage' ? input.table : input.pattern.slug;
return `Recorded historic-SQL ${input.kind} evidence for ${label}.`;
},
});
}

View file

@ -0,0 +1,54 @@
import { describe, expect, it } from 'vitest';
import {
historicSqlEvidenceEnvelopeSchema,
historicSqlEvidencePath,
historicSqlTableUsageEvidenceSchema,
} from './evidence.js';
describe('historic-sql evidence contracts', () => {
it('validates table usage evidence emitted by table digest WorkUnits', () => {
const parsed = historicSqlTableUsageEvidenceSchema.parse({
kind: 'table_usage',
connectionId: 'warehouse',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Orders are repeatedly queried for paid/refunded lifecycle analysis.',
frequencyTier: 'high',
commonFilters: ['status', 'created_at'],
commonGroupBys: ['status'],
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
staleSince: null,
},
});
expect(parsed.table).toBe('public.orders');
expect(parsed.usage.frequencyTier).toBe('high');
});
it('validates pattern evidence emitted by the patterns WorkUnit', () => {
const parsed = historicSqlEvidenceEnvelopeSchema.parse({
kind: 'pattern',
connectionId: 'warehouse',
rawPath: 'patterns-input.json',
pattern: {
slug: 'order-lifecycle-analysis',
title: 'Order Lifecycle Analysis',
narrative: 'Analysts compare order status changes by customer segment.',
definitionSql: 'select status, count(*) from public.orders group by status',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['pg:1', 'pg:2'],
},
});
expect(parsed.kind).toBe('pattern');
expect(parsed.pattern.slug).toBe('order-lifecycle-analysis');
});
it('builds a stable ignored evidence path from run and WorkUnit identity', () => {
expect(historicSqlEvidencePath('run-1', 'historic-sql-table-public-orders')).toBe(
'.ktx/ingest-evidence/historic-sql/run-1/historic-sql-table-public-orders.json',
);
});
});

View file

@ -0,0 +1,41 @@
import { z } from 'zod';
import { patternOutputSchema, tableUsageOutputSchema } from './skill-schemas.js';
function safeEvidenceSegment(value: string): string {
const segment = value.replace(/[^a-zA-Z0-9._-]+/g, '-').replace(/^-+|-+$/g, '');
if (!segment) {
throw new Error(`Invalid historic-SQL evidence path segment: ${value}`);
}
return segment;
}
export const historicSqlTableUsageEvidenceSchema = z.object({
kind: z.literal('table_usage'),
connectionId: z.string().min(1),
table: z.string().min(1),
rawPath: z.string().min(1),
usage: tableUsageOutputSchema,
});
export type HistoricSqlTableUsageEvidence = z.infer<typeof historicSqlTableUsageEvidenceSchema>;
export const historicSqlPatternEvidenceSchema = z.object({
kind: z.literal('pattern'),
connectionId: z.string().min(1),
rawPath: z.string().min(1),
pattern: patternOutputSchema,
});
export type HistoricSqlPatternEvidence = z.infer<typeof historicSqlPatternEvidenceSchema>;
export const historicSqlEvidenceEnvelopeSchema = z.discriminatedUnion('kind', [
historicSqlTableUsageEvidenceSchema,
historicSqlPatternEvidenceSchema,
]);
export type HistoricSqlEvidenceEnvelope = z.infer<typeof historicSqlEvidenceEnvelopeSchema>;
export function historicSqlEvidencePath(runId: string, unitKey: string): string {
return `.ktx/ingest-evidence/historic-sql/${safeEvidenceSegment(runId)}/${safeEvidenceSegment(unitKey)}.json`;
}
export function serializeHistoricSqlEvidence(evidence: HistoricSqlEvidenceEnvelope): string {
return `${JSON.stringify(historicSqlEvidenceEnvelopeSchema.parse(evidence), null, 2)}\n`;
}

View file

@ -334,6 +334,19 @@ export { PostgresPgssQueryHistoryReader } from './adapters/historic-sql/postgres
export { SnowflakeHistoricSqlQueryHistoryReader } from './adapters/historic-sql/snowflake-query-history-reader.js';
export { stageHistoricSqlAggregatedSnapshot } from './adapters/historic-sql/stage-unified.js';
export { stageHistoricSqlTemplates } from './adapters/historic-sql/stage.js';
export {
historicSqlEvidenceEnvelopeSchema,
historicSqlEvidencePath,
historicSqlPatternEvidenceSchema,
historicSqlTableUsageEvidenceSchema,
serializeHistoricSqlEvidence,
} from './adapters/historic-sql/evidence.js';
export type {
HistoricSqlEvidenceEnvelope,
HistoricSqlPatternEvidence,
HistoricSqlTableUsageEvidence,
} from './adapters/historic-sql/evidence.js';
export { createEmitHistoricSqlEvidenceTool } from './adapters/historic-sql/evidence-tool.js';
export {
patternOutputSchema,
patternsArraySchema,

View file

@ -243,6 +243,9 @@ describe('@ktx/context package exports', () => {
expect(ingest.historicSqlUnifiedPullConfigSchema).toBeDefined();
expect(ingest.aggregatedTemplateSchema).toBeDefined();
expect(ingest.stagedTableInputSchema).toBeDefined();
expect(ingest.historicSqlEvidenceEnvelopeSchema).toBeDefined();
expect(ingest.historicSqlEvidencePath).toBeTypeOf('function');
expect(ingest.createEmitHistoricSqlEvidenceTool).toBeTypeOf('function');
expect(ingest.SqliteContextEvidenceStore).toBeTypeOf('function');
expect(ingest.SqliteBundleIngestStore).toBeTypeOf('function');
expect(ingest.CuratorPaginationService).toBeTypeOf('function');