fix(ingest): attribute historic-sql evidence writes in bundle report (#220)

The emit_historic_sql_evidence tool took rawPath as LLM-supplied input,
so projection actions frequently lacked defensible raw paths and every
row in bundle_ingest_reports fell through as actionType: 'skipped' with
null artifact metadata, hiding the wiki pages and SL merges the run had
actually produced (KLO-698).

The tool now reads the work unit's rawFiles from session.allowedRawPaths
and stores them on the evidence envelope; the projection emits actions
with those paths, and stale/archive actions are anchored to manifest.json
so they also surface as non-skipped provenance rows.
This commit is contained in:
Andrey Avtomonov 2026-05-26 12:21:53 +02:00 committed by GitHub
parent 2a6fb19ba4
commit 1071f9d1c9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 99 additions and 31 deletions

View file

@ -60,7 +60,7 @@ export async function chunkHistoricSqlUnifiedStagedDir(stagedDir: string, diffSe
dependencyPaths: ['manifest.json'],
peerFileIndex: files.filter((file) => file !== path && file !== 'manifest.json').sort(),
notes:
`Use historic_sql_patterns. Read ${path} and emit pattern objects with emit_historic_sql_evidence using rawPath "${path}". Do not call wiki_write or sl_write_source.`,
`Use historic_sql_patterns. Read ${path} and emit pattern objects with emit_historic_sql_evidence. Do not call wiki_write or sl_write_source.`,
});
}

View file

@ -10,7 +10,6 @@ const emitHistoricSqlEvidenceInputSchema = z
.object({
kind: z.enum(['table_usage', 'pattern']),
table: z.string().min(1).optional(),
rawPath: z.string().min(1),
usage: tableUsageOutputSchema.optional(),
pattern: patternOutputSchema.optional(),
})
@ -46,6 +45,7 @@ interface EmitHistoricSqlEvidenceToolContext {
connectionId?: string | null;
session?: {
ingest?: { runId: string; sourceKey: string };
allowedRawPaths?: ReadonlySet<string>;
configService?: {
writeFile(
path: string,
@ -66,7 +66,7 @@ function unitKeyForEvidence(input: EmitHistoricSqlEvidenceInput): string {
return `historic-sql-pattern-${String(input.pattern?.slug).replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-+|-+$/g, '')}`;
}
function evidenceEnvelope(input: EmitHistoricSqlEvidenceInput, connectionId: string) {
function evidenceEnvelope(input: EmitHistoricSqlEvidenceInput, connectionId: string, rawPaths: string[]) {
if (input.kind === 'table_usage') {
if (!input.table || !input.usage) {
throw new Error('Invalid historic-SQL table usage evidence input.');
@ -75,7 +75,7 @@ function evidenceEnvelope(input: EmitHistoricSqlEvidenceInput, connectionId: str
kind: 'table_usage' as const,
connectionId,
table: input.table,
rawPath: input.rawPath,
rawPaths,
usage: input.usage,
};
}
@ -85,7 +85,7 @@ function evidenceEnvelope(input: EmitHistoricSqlEvidenceInput, connectionId: str
return {
kind: 'pattern' as const,
connectionId,
rawPath: input.rawPath,
rawPaths,
pattern: input.pattern,
};
}
@ -102,9 +102,13 @@ export function createEmitHistoricSqlEvidenceTool(defaultContext?: EmitHistoricS
if (!ingest || ingest.sourceKey !== 'historic-sql' || !configService || !context?.connectionId) {
return 'Error: emit_historic_sql_evidence is only available during historic-sql ingest.';
}
const rawPaths = context.session?.allowedRawPaths ? [...context.session.allowedRawPaths].sort() : [];
if (rawPaths.length === 0) {
return 'Error: emit_historic_sql_evidence requires a WorkUnit context with at least one raw file.';
}
const unitKey = unitKeyForEvidence(input);
const evidence = evidenceEnvelope(input, context.connectionId);
const evidence = evidenceEnvelope(input, context.connectionId, rawPaths);
const content = serializeHistoricSqlEvidence(evidence);
await configService.writeFile(
historicSqlEvidencePath(ingest.runId, unitKey),

View file

@ -14,7 +14,7 @@ export const historicSqlTableUsageEvidenceSchema = z.object({
kind: z.literal('table_usage'),
connectionId: z.string().min(1),
table: z.string().min(1),
rawPath: z.string().min(1),
rawPaths: z.array(z.string().min(1)).min(1),
usage: tableUsageOutputSchema,
});
@ -22,7 +22,7 @@ export const historicSqlTableUsageEvidenceSchema = z.object({
export const historicSqlPatternEvidenceSchema = z.object({
kind: z.literal('pattern'),
connectionId: z.string().min(1),
rawPath: z.string().min(1),
rawPaths: z.array(z.string().min(1)).min(1),
pattern: patternOutputSchema,
});

View file

@ -278,7 +278,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
key: sourceName,
targetConnectionId: input.connectionId,
detail: `Merged historic-SQL usage for ${matchingEvidence.table}`,
rawPaths: [matchingEvidence.rawPath],
rawPaths: matchingEvidence.rawPaths,
});
}
} else if (entry.usage && !currentTables.has(tableRef)) {
@ -298,6 +298,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
key: sourceName,
targetConnectionId: input.connectionId,
detail: `Marked historic-SQL usage stale for ${tableRef}`,
rawPaths: ['manifest.json'],
});
}
}
@ -341,7 +342,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
type: reusable ? 'updated' : 'created',
key,
detail: `Projected historic-SQL pattern ${pattern.pattern.title}`,
rawPaths: [pattern.rawPath],
rawPaths: pattern.rawPaths,
});
}
@ -361,6 +362,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
type: 'updated',
key: page.key,
detail: `Archived stale historic-SQL pattern page ${page.key}`,
rawPaths: ['manifest.json'],
});
continue;
}
@ -377,6 +379,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
type: 'updated',
key: page.key,
detail: `Marked historic-SQL pattern page ${page.key} stale`,
rawPaths: ['manifest.json'],
});
}

View file

@ -15,8 +15,7 @@ Use this skill when the WorkUnit raw file is a `patterns-input/part-0001.json` s
3. Call `read_raw_file` for that exact raw file path.
4. Identify recurring analytical intents that span at least two tables and have repeated usage signal.
5. Emit one `pattern` evidence object per durable cross-table intent by calling `emit_historic_sql_evidence`.
6. Set each evidence object's `rawPath` to the exact raw file path read in step 3.
7. Stop after all pattern evidence has been emitted.
6. Stop after all pattern evidence has been emitted.
Every join column mentioned in pattern descriptions must be verified via
entity_details for both sides of the join.
@ -56,7 +55,6 @@ Each call to `emit_historic_sql_evidence` must use this shape:
```json
{
"kind": "pattern",
"rawPath": "patterns-input/part-0001.json",
"pattern": {
"slug": "order-lifecycle-analysis",
"title": "Order Lifecycle Analysis",

View file

@ -53,7 +53,6 @@ Call `emit_historic_sql_evidence` with this shape:
{
"kind": "table_usage",
"table": "public.orders",
"rawPath": "tables/public.orders.json",
"usage": {
"narrative": "Orders are repeatedly queried for paid/refunded lifecycle analysis and customer-level rollups.",
"frequencyTier": "high",