fix(ingest): attribute historic-sql evidence writes in bundle report (#220)

The emit_historic_sql_evidence tool took rawPath as LLM-supplied input,
so projection actions frequently lacked defensible raw paths and every
row in bundle_ingest_reports fell through as actionType: 'skipped' with
null artifact metadata, hiding the wiki pages and SL merges the run had
actually produced (KLO-698).

The tool now reads the work unit's rawFiles from session.allowedRawPaths
and stores them on the evidence envelope; the projection emits actions
with those paths, and stale/archive actions are anchored to manifest.json
so they also surface as non-skipped provenance rows.
This commit is contained in:
Andrey Avtomonov 2026-05-26 12:21:53 +02:00 committed by GitHub
parent 2a6fb19ba4
commit 1071f9d1c9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 99 additions and 31 deletions

View file

@ -60,7 +60,7 @@ export async function chunkHistoricSqlUnifiedStagedDir(stagedDir: string, diffSe
dependencyPaths: ['manifest.json'],
peerFileIndex: files.filter((file) => file !== path && file !== 'manifest.json').sort(),
notes:
`Use historic_sql_patterns. Read ${path} and emit pattern objects with emit_historic_sql_evidence using rawPath "${path}". Do not call wiki_write or sl_write_source.`,
`Use historic_sql_patterns. Read ${path} and emit pattern objects with emit_historic_sql_evidence. Do not call wiki_write or sl_write_source.`,
});
}

View file

@ -10,7 +10,6 @@ const emitHistoricSqlEvidenceInputSchema = z
.object({
kind: z.enum(['table_usage', 'pattern']),
table: z.string().min(1).optional(),
rawPath: z.string().min(1),
usage: tableUsageOutputSchema.optional(),
pattern: patternOutputSchema.optional(),
})
@ -46,6 +45,7 @@ interface EmitHistoricSqlEvidenceToolContext {
connectionId?: string | null;
session?: {
ingest?: { runId: string; sourceKey: string };
allowedRawPaths?: ReadonlySet<string>;
configService?: {
writeFile(
path: string,
@ -66,7 +66,7 @@ function unitKeyForEvidence(input: EmitHistoricSqlEvidenceInput): string {
return `historic-sql-pattern-${String(input.pattern?.slug).replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-+|-+$/g, '')}`;
}
function evidenceEnvelope(input: EmitHistoricSqlEvidenceInput, connectionId: string) {
function evidenceEnvelope(input: EmitHistoricSqlEvidenceInput, connectionId: string, rawPaths: string[]) {
if (input.kind === 'table_usage') {
if (!input.table || !input.usage) {
throw new Error('Invalid historic-SQL table usage evidence input.');
@ -75,7 +75,7 @@ function evidenceEnvelope(input: EmitHistoricSqlEvidenceInput, connectionId: str
kind: 'table_usage' as const,
connectionId,
table: input.table,
rawPath: input.rawPath,
rawPaths,
usage: input.usage,
};
}
@ -85,7 +85,7 @@ function evidenceEnvelope(input: EmitHistoricSqlEvidenceInput, connectionId: str
return {
kind: 'pattern' as const,
connectionId,
rawPath: input.rawPath,
rawPaths,
pattern: input.pattern,
};
}
@ -102,9 +102,13 @@ export function createEmitHistoricSqlEvidenceTool(defaultContext?: EmitHistoricS
if (!ingest || ingest.sourceKey !== 'historic-sql' || !configService || !context?.connectionId) {
return 'Error: emit_historic_sql_evidence is only available during historic-sql ingest.';
}
const rawPaths = context.session?.allowedRawPaths ? [...context.session.allowedRawPaths].sort() : [];
if (rawPaths.length === 0) {
return 'Error: emit_historic_sql_evidence requires a WorkUnit context with at least one raw file.';
}
const unitKey = unitKeyForEvidence(input);
const evidence = evidenceEnvelope(input, context.connectionId);
const evidence = evidenceEnvelope(input, context.connectionId, rawPaths);
const content = serializeHistoricSqlEvidence(evidence);
await configService.writeFile(
historicSqlEvidencePath(ingest.runId, unitKey),

View file

@ -14,7 +14,7 @@ export const historicSqlTableUsageEvidenceSchema = z.object({
kind: z.literal('table_usage'),
connectionId: z.string().min(1),
table: z.string().min(1),
rawPath: z.string().min(1),
rawPaths: z.array(z.string().min(1)).min(1),
usage: tableUsageOutputSchema,
});
@ -22,7 +22,7 @@ export const historicSqlTableUsageEvidenceSchema = z.object({
export const historicSqlPatternEvidenceSchema = z.object({
kind: z.literal('pattern'),
connectionId: z.string().min(1),
rawPath: z.string().min(1),
rawPaths: z.array(z.string().min(1)).min(1),
pattern: patternOutputSchema,
});

View file

@ -278,7 +278,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
key: sourceName,
targetConnectionId: input.connectionId,
detail: `Merged historic-SQL usage for ${matchingEvidence.table}`,
rawPaths: [matchingEvidence.rawPath],
rawPaths: matchingEvidence.rawPaths,
});
}
} else if (entry.usage && !currentTables.has(tableRef)) {
@ -298,6 +298,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
key: sourceName,
targetConnectionId: input.connectionId,
detail: `Marked historic-SQL usage stale for ${tableRef}`,
rawPaths: ['manifest.json'],
});
}
}
@ -341,7 +342,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
type: reusable ? 'updated' : 'created',
key,
detail: `Projected historic-SQL pattern ${pattern.pattern.title}`,
rawPaths: [pattern.rawPath],
rawPaths: pattern.rawPaths,
});
}
@ -361,6 +362,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
type: 'updated',
key: page.key,
detail: `Archived stale historic-SQL pattern page ${page.key}`,
rawPaths: ['manifest.json'],
});
continue;
}
@ -377,6 +379,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
type: 'updated',
key: page.key,
detail: `Marked historic-SQL pattern page ${page.key} stale`,
rawPaths: ['manifest.json'],
});
}