mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
feat: add historic sql evidence emission
This commit is contained in:
parent
f99f85361a
commit
ef9358beb7
6 changed files with 263 additions and 0 deletions
|
|
@ -0,0 +1,80 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createEmitHistoricSqlEvidenceTool } from './evidence-tool.js';
|
||||
|
||||
describe('emit_historic_sql_evidence tool', () => {
|
||||
it('writes table usage evidence to the ignored run evidence directory', async () => {
|
||||
const writeFile = vi.fn(async () => ({ success: true, commitHash: null }));
|
||||
const tool = createEmitHistoricSqlEvidenceTool();
|
||||
|
||||
const result = await tool.execute!(
|
||||
{
|
||||
kind: 'table_usage',
|
||||
table: 'public.orders',
|
||||
rawPath: 'tables/public.orders.json',
|
||||
usage: {
|
||||
narrative: 'Orders are repeatedly queried by paid status.',
|
||||
frequencyTier: 'high',
|
||||
commonFilters: ['status'],
|
||||
commonJoins: [],
|
||||
staleSince: null,
|
||||
},
|
||||
},
|
||||
{
|
||||
toolCallId: 'call-1',
|
||||
messages: [],
|
||||
abortSignal: new AbortController().signal,
|
||||
experimental_context: {
|
||||
connectionId: 'warehouse',
|
||||
session: {
|
||||
ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'historic-sql' },
|
||||
configService: { writeFile },
|
||||
},
|
||||
},
|
||||
} as never,
|
||||
);
|
||||
|
||||
expect(result).toBe('Recorded historic-SQL table_usage evidence for public.orders.');
|
||||
expect(writeFile).toHaveBeenCalledWith(
|
||||
'.ktx/ingest-evidence/historic-sql/run-1/historic-sql-table-public-orders.json',
|
||||
expect.stringContaining('"kind": "table_usage"'),
|
||||
'System User',
|
||||
'system@example.com',
|
||||
'Record historic-SQL evidence: historic-sql-table-public-orders',
|
||||
{ skipLock: true },
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects non-historic ingest sessions', async () => {
|
||||
const tool = createEmitHistoricSqlEvidenceTool();
|
||||
|
||||
await expect(
|
||||
tool.execute!(
|
||||
{
|
||||
kind: 'pattern',
|
||||
rawPath: 'patterns-input.json',
|
||||
pattern: {
|
||||
slug: 'orders',
|
||||
title: 'Orders',
|
||||
narrative: 'Orders pattern.',
|
||||
definitionSql: 'select * from public.orders',
|
||||
tablesInvolved: ['public.orders'],
|
||||
slRefs: ['orders'],
|
||||
constituentTemplateIds: ['pg:1'],
|
||||
},
|
||||
},
|
||||
{
|
||||
toolCallId: 'call-1',
|
||||
messages: [],
|
||||
abortSignal: new AbortController().signal,
|
||||
experimental_context: {
|
||||
connectionId: 'warehouse',
|
||||
session: {
|
||||
ingest: { runId: 'run-1', jobId: 'job-1', syncId: 'sync-1', sourceKey: 'notion' },
|
||||
configService: { writeFile: vi.fn() },
|
||||
},
|
||||
},
|
||||
} as never,
|
||||
),
|
||||
).resolves.toContain('Error: emit_historic_sql_evidence is only available during historic-sql ingest');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
import { tool } from 'ai';
|
||||
import { z } from 'zod';
|
||||
import { historicSqlEvidencePath, serializeHistoricSqlEvidence } from './evidence.js';
|
||||
import { patternOutputSchema, tableUsageOutputSchema } from './skill-schemas.js';
|
||||
|
||||
const SYSTEM_AUTHOR = 'System User';
|
||||
const SYSTEM_EMAIL = 'system@example.com';
|
||||
|
||||
function unitKeyForEvidence(input: { kind: string; table?: string; pattern?: { slug: string } }): string {
|
||||
if (input.kind === 'table_usage') {
|
||||
return `historic-sql-table-${String(input.table).replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-+|-+$/g, '')}`;
|
||||
}
|
||||
return `historic-sql-pattern-${String(input.pattern?.slug).replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-+|-+$/g, '')}`;
|
||||
}
|
||||
|
||||
export function createEmitHistoricSqlEvidenceTool() {
|
||||
return tool({
|
||||
description:
|
||||
'Record typed historic-SQL evidence for deterministic projection. Use this instead of wiki_write, sl_write_source, sl_edit_source, or context_candidate_write during historic-SQL WorkUnits.',
|
||||
inputSchema: z.discriminatedUnion('kind', [
|
||||
z.object({
|
||||
kind: z.literal('table_usage'),
|
||||
table: z.string().min(1),
|
||||
rawPath: z.string().min(1),
|
||||
usage: tableUsageOutputSchema,
|
||||
}),
|
||||
z.object({
|
||||
kind: z.literal('pattern'),
|
||||
rawPath: z.string().min(1),
|
||||
pattern: patternOutputSchema,
|
||||
}),
|
||||
]),
|
||||
execute: async (input, options): Promise<string> => {
|
||||
const context = options.experimental_context as
|
||||
| {
|
||||
connectionId?: string | null;
|
||||
session?: {
|
||||
ingest?: { runId: string; sourceKey: string };
|
||||
configService?: {
|
||||
writeFile(
|
||||
path: string,
|
||||
content: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
commitMessage: string,
|
||||
options?: { skipLock?: boolean },
|
||||
): Promise<unknown>;
|
||||
};
|
||||
};
|
||||
}
|
||||
| undefined;
|
||||
const ingest = context?.session?.ingest;
|
||||
const configService = context?.session?.configService;
|
||||
if (!ingest || ingest.sourceKey !== 'historic-sql' || !configService || !context?.connectionId) {
|
||||
return 'Error: emit_historic_sql_evidence is only available during historic-sql ingest.';
|
||||
}
|
||||
|
||||
const unitKey = unitKeyForEvidence(input);
|
||||
const content = serializeHistoricSqlEvidence({ ...input, connectionId: context.connectionId });
|
||||
await configService.writeFile(
|
||||
historicSqlEvidencePath(ingest.runId, unitKey),
|
||||
content,
|
||||
SYSTEM_AUTHOR,
|
||||
SYSTEM_EMAIL,
|
||||
`Record historic-SQL evidence: ${unitKey}`,
|
||||
{ skipLock: true },
|
||||
);
|
||||
const label = input.kind === 'table_usage' ? input.table : input.pattern.slug;
|
||||
return `Recorded historic-SQL ${input.kind} evidence for ${label}.`;
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
historicSqlEvidenceEnvelopeSchema,
|
||||
historicSqlEvidencePath,
|
||||
historicSqlTableUsageEvidenceSchema,
|
||||
} from './evidence.js';
|
||||
|
||||
describe('historic-sql evidence contracts', () => {
|
||||
it('validates table usage evidence emitted by table digest WorkUnits', () => {
|
||||
const parsed = historicSqlTableUsageEvidenceSchema.parse({
|
||||
kind: 'table_usage',
|
||||
connectionId: 'warehouse',
|
||||
table: 'public.orders',
|
||||
rawPath: 'tables/public.orders.json',
|
||||
usage: {
|
||||
narrative: 'Orders are repeatedly queried for paid/refunded lifecycle analysis.',
|
||||
frequencyTier: 'high',
|
||||
commonFilters: ['status', 'created_at'],
|
||||
commonGroupBys: ['status'],
|
||||
commonJoins: [{ table: 'public.customers', on: ['customer_id'] }],
|
||||
staleSince: null,
|
||||
},
|
||||
});
|
||||
|
||||
expect(parsed.table).toBe('public.orders');
|
||||
expect(parsed.usage.frequencyTier).toBe('high');
|
||||
});
|
||||
|
||||
it('validates pattern evidence emitted by the patterns WorkUnit', () => {
|
||||
const parsed = historicSqlEvidenceEnvelopeSchema.parse({
|
||||
kind: 'pattern',
|
||||
connectionId: 'warehouse',
|
||||
rawPath: 'patterns-input.json',
|
||||
pattern: {
|
||||
slug: 'order-lifecycle-analysis',
|
||||
title: 'Order Lifecycle Analysis',
|
||||
narrative: 'Analysts compare order status changes by customer segment.',
|
||||
definitionSql: 'select status, count(*) from public.orders group by status',
|
||||
tablesInvolved: ['public.orders', 'public.customers'],
|
||||
slRefs: ['orders', 'customers'],
|
||||
constituentTemplateIds: ['pg:1', 'pg:2'],
|
||||
},
|
||||
});
|
||||
|
||||
expect(parsed.kind).toBe('pattern');
|
||||
expect(parsed.pattern.slug).toBe('order-lifecycle-analysis');
|
||||
});
|
||||
|
||||
it('builds a stable ignored evidence path from run and WorkUnit identity', () => {
|
||||
expect(historicSqlEvidencePath('run-1', 'historic-sql-table-public-orders')).toBe(
|
||||
'.ktx/ingest-evidence/historic-sql/run-1/historic-sql-table-public-orders.json',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
import { z } from 'zod';
|
||||
import { patternOutputSchema, tableUsageOutputSchema } from './skill-schemas.js';
|
||||
|
||||
function safeEvidenceSegment(value: string): string {
|
||||
const segment = value.replace(/[^a-zA-Z0-9._-]+/g, '-').replace(/^-+|-+$/g, '');
|
||||
if (!segment) {
|
||||
throw new Error(`Invalid historic-SQL evidence path segment: ${value}`);
|
||||
}
|
||||
return segment;
|
||||
}
|
||||
|
||||
export const historicSqlTableUsageEvidenceSchema = z.object({
|
||||
kind: z.literal('table_usage'),
|
||||
connectionId: z.string().min(1),
|
||||
table: z.string().min(1),
|
||||
rawPath: z.string().min(1),
|
||||
usage: tableUsageOutputSchema,
|
||||
});
|
||||
export type HistoricSqlTableUsageEvidence = z.infer<typeof historicSqlTableUsageEvidenceSchema>;
|
||||
|
||||
export const historicSqlPatternEvidenceSchema = z.object({
|
||||
kind: z.literal('pattern'),
|
||||
connectionId: z.string().min(1),
|
||||
rawPath: z.string().min(1),
|
||||
pattern: patternOutputSchema,
|
||||
});
|
||||
export type HistoricSqlPatternEvidence = z.infer<typeof historicSqlPatternEvidenceSchema>;
|
||||
|
||||
export const historicSqlEvidenceEnvelopeSchema = z.discriminatedUnion('kind', [
|
||||
historicSqlTableUsageEvidenceSchema,
|
||||
historicSqlPatternEvidenceSchema,
|
||||
]);
|
||||
export type HistoricSqlEvidenceEnvelope = z.infer<typeof historicSqlEvidenceEnvelopeSchema>;
|
||||
|
||||
export function historicSqlEvidencePath(runId: string, unitKey: string): string {
|
||||
return `.ktx/ingest-evidence/historic-sql/${safeEvidenceSegment(runId)}/${safeEvidenceSegment(unitKey)}.json`;
|
||||
}
|
||||
|
||||
export function serializeHistoricSqlEvidence(evidence: HistoricSqlEvidenceEnvelope): string {
|
||||
return `${JSON.stringify(historicSqlEvidenceEnvelopeSchema.parse(evidence), null, 2)}\n`;
|
||||
}
|
||||
|
|
@ -334,6 +334,19 @@ export { PostgresPgssQueryHistoryReader } from './adapters/historic-sql/postgres
|
|||
export { SnowflakeHistoricSqlQueryHistoryReader } from './adapters/historic-sql/snowflake-query-history-reader.js';
|
||||
export { stageHistoricSqlAggregatedSnapshot } from './adapters/historic-sql/stage-unified.js';
|
||||
export { stageHistoricSqlTemplates } from './adapters/historic-sql/stage.js';
|
||||
export {
|
||||
historicSqlEvidenceEnvelopeSchema,
|
||||
historicSqlEvidencePath,
|
||||
historicSqlPatternEvidenceSchema,
|
||||
historicSqlTableUsageEvidenceSchema,
|
||||
serializeHistoricSqlEvidence,
|
||||
} from './adapters/historic-sql/evidence.js';
|
||||
export type {
|
||||
HistoricSqlEvidenceEnvelope,
|
||||
HistoricSqlPatternEvidence,
|
||||
HistoricSqlTableUsageEvidence,
|
||||
} from './adapters/historic-sql/evidence.js';
|
||||
export { createEmitHistoricSqlEvidenceTool } from './adapters/historic-sql/evidence-tool.js';
|
||||
export {
|
||||
patternOutputSchema,
|
||||
patternsArraySchema,
|
||||
|
|
|
|||
|
|
@ -243,6 +243,9 @@ describe('@ktx/context package exports', () => {
|
|||
expect(ingest.historicSqlUnifiedPullConfigSchema).toBeDefined();
|
||||
expect(ingest.aggregatedTemplateSchema).toBeDefined();
|
||||
expect(ingest.stagedTableInputSchema).toBeDefined();
|
||||
expect(ingest.historicSqlEvidenceEnvelopeSchema).toBeDefined();
|
||||
expect(ingest.historicSqlEvidencePath).toBeTypeOf('function');
|
||||
expect(ingest.createEmitHistoricSqlEvidenceTool).toBeTypeOf('function');
|
||||
expect(ingest.SqliteContextEvidenceStore).toBeTypeOf('function');
|
||||
expect(ingest.SqliteBundleIngestStore).toBeTypeOf('function');
|
||||
expect(ingest.CuratorPaginationService).toBeTypeOf('function');
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue