mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-16 08:25:14 +02:00
fix(ingest): attribute historic-sql evidence writes in bundle report (#220)
The emit_historic_sql_evidence tool took rawPath as LLM-supplied input, so projection actions frequently lacked defensible raw paths and every row in bundle_ingest_reports fell through as actionType: 'skipped' with null artifact metadata, hiding the wiki pages and SL merges the run had actually produced (KLO-698). The tool now reads the work unit's rawFiles from session.allowedRawPaths and stores them on the evidence envelope; the projection emits actions with those paths, and stale/archive actions are anchored to manifest.json so they also surface as non-skipped provenance rows.
This commit is contained in:
parent
2a6fb19ba4
commit
1071f9d1c9
11 changed files with 99 additions and 31 deletions
|
|
@ -60,7 +60,7 @@ export async function chunkHistoricSqlUnifiedStagedDir(stagedDir: string, diffSe
|
|||
dependencyPaths: ['manifest.json'],
|
||||
peerFileIndex: files.filter((file) => file !== path && file !== 'manifest.json').sort(),
|
||||
notes:
|
||||
`Use historic_sql_patterns. Read ${path} and emit pattern objects with emit_historic_sql_evidence using rawPath "${path}". Do not call wiki_write or sl_write_source.`,
|
||||
`Use historic_sql_patterns. Read ${path} and emit pattern objects with emit_historic_sql_evidence. Do not call wiki_write or sl_write_source.`,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ const emitHistoricSqlEvidenceInputSchema = z
|
|||
.object({
|
||||
kind: z.enum(['table_usage', 'pattern']),
|
||||
table: z.string().min(1).optional(),
|
||||
rawPath: z.string().min(1),
|
||||
usage: tableUsageOutputSchema.optional(),
|
||||
pattern: patternOutputSchema.optional(),
|
||||
})
|
||||
|
|
@ -46,6 +45,7 @@ interface EmitHistoricSqlEvidenceToolContext {
|
|||
connectionId?: string | null;
|
||||
session?: {
|
||||
ingest?: { runId: string; sourceKey: string };
|
||||
allowedRawPaths?: ReadonlySet<string>;
|
||||
configService?: {
|
||||
writeFile(
|
||||
path: string,
|
||||
|
|
@ -66,7 +66,7 @@ function unitKeyForEvidence(input: EmitHistoricSqlEvidenceInput): string {
|
|||
return `historic-sql-pattern-${String(input.pattern?.slug).replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-+|-+$/g, '')}`;
|
||||
}
|
||||
|
||||
function evidenceEnvelope(input: EmitHistoricSqlEvidenceInput, connectionId: string) {
|
||||
function evidenceEnvelope(input: EmitHistoricSqlEvidenceInput, connectionId: string, rawPaths: string[]) {
|
||||
if (input.kind === 'table_usage') {
|
||||
if (!input.table || !input.usage) {
|
||||
throw new Error('Invalid historic-SQL table usage evidence input.');
|
||||
|
|
@ -75,7 +75,7 @@ function evidenceEnvelope(input: EmitHistoricSqlEvidenceInput, connectionId: str
|
|||
kind: 'table_usage' as const,
|
||||
connectionId,
|
||||
table: input.table,
|
||||
rawPath: input.rawPath,
|
||||
rawPaths,
|
||||
usage: input.usage,
|
||||
};
|
||||
}
|
||||
|
|
@ -85,7 +85,7 @@ function evidenceEnvelope(input: EmitHistoricSqlEvidenceInput, connectionId: str
|
|||
return {
|
||||
kind: 'pattern' as const,
|
||||
connectionId,
|
||||
rawPath: input.rawPath,
|
||||
rawPaths,
|
||||
pattern: input.pattern,
|
||||
};
|
||||
}
|
||||
|
|
@ -102,9 +102,13 @@ export function createEmitHistoricSqlEvidenceTool(defaultContext?: EmitHistoricS
|
|||
if (!ingest || ingest.sourceKey !== 'historic-sql' || !configService || !context?.connectionId) {
|
||||
return 'Error: emit_historic_sql_evidence is only available during historic-sql ingest.';
|
||||
}
|
||||
const rawPaths = context.session?.allowedRawPaths ? [...context.session.allowedRawPaths].sort() : [];
|
||||
if (rawPaths.length === 0) {
|
||||
return 'Error: emit_historic_sql_evidence requires a WorkUnit context with at least one raw file.';
|
||||
}
|
||||
|
||||
const unitKey = unitKeyForEvidence(input);
|
||||
const evidence = evidenceEnvelope(input, context.connectionId);
|
||||
const evidence = evidenceEnvelope(input, context.connectionId, rawPaths);
|
||||
const content = serializeHistoricSqlEvidence(evidence);
|
||||
await configService.writeFile(
|
||||
historicSqlEvidencePath(ingest.runId, unitKey),
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ export const historicSqlTableUsageEvidenceSchema = z.object({
|
|||
kind: z.literal('table_usage'),
|
||||
connectionId: z.string().min(1),
|
||||
table: z.string().min(1),
|
||||
rawPath: z.string().min(1),
|
||||
rawPaths: z.array(z.string().min(1)).min(1),
|
||||
usage: tableUsageOutputSchema,
|
||||
});
|
||||
|
||||
|
|
@ -22,7 +22,7 @@ export const historicSqlTableUsageEvidenceSchema = z.object({
|
|||
export const historicSqlPatternEvidenceSchema = z.object({
|
||||
kind: z.literal('pattern'),
|
||||
connectionId: z.string().min(1),
|
||||
rawPath: z.string().min(1),
|
||||
rawPaths: z.array(z.string().min(1)).min(1),
|
||||
pattern: patternOutputSchema,
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -278,7 +278,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
key: sourceName,
|
||||
targetConnectionId: input.connectionId,
|
||||
detail: `Merged historic-SQL usage for ${matchingEvidence.table}`,
|
||||
rawPaths: [matchingEvidence.rawPath],
|
||||
rawPaths: matchingEvidence.rawPaths,
|
||||
});
|
||||
}
|
||||
} else if (entry.usage && !currentTables.has(tableRef)) {
|
||||
|
|
@ -298,6 +298,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
key: sourceName,
|
||||
targetConnectionId: input.connectionId,
|
||||
detail: `Marked historic-SQL usage stale for ${tableRef}`,
|
||||
rawPaths: ['manifest.json'],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -341,7 +342,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
type: reusable ? 'updated' : 'created',
|
||||
key,
|
||||
detail: `Projected historic-SQL pattern ${pattern.pattern.title}`,
|
||||
rawPaths: [pattern.rawPath],
|
||||
rawPaths: pattern.rawPaths,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -361,6 +362,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
type: 'updated',
|
||||
key: page.key,
|
||||
detail: `Archived stale historic-SQL pattern page ${page.key}`,
|
||||
rawPaths: ['manifest.json'],
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
|
@ -377,6 +379,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
type: 'updated',
|
||||
key: page.key,
|
||||
detail: `Marked historic-SQL pattern page ${page.key} stale`,
|
||||
rawPaths: ['manifest.json'],
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,8 +15,7 @@ Use this skill when the WorkUnit raw file is a `patterns-input/part-0001.json` s
|
|||
3. Call `read_raw_file` for that exact raw file path.
|
||||
4. Identify recurring analytical intents that span at least two tables and have repeated usage signal.
|
||||
5. Emit one `pattern` evidence object per durable cross-table intent by calling `emit_historic_sql_evidence`.
|
||||
6. Set each evidence object's `rawPath` to the exact raw file path read in step 3.
|
||||
7. Stop after all pattern evidence has been emitted.
|
||||
6. Stop after all pattern evidence has been emitted.
|
||||
|
||||
Every join column mentioned in pattern descriptions must be verified via
|
||||
entity_details for both sides of the join.
|
||||
|
|
@ -56,7 +55,6 @@ Each call to `emit_historic_sql_evidence` must use this shape:
|
|||
```json
|
||||
{
|
||||
"kind": "pattern",
|
||||
"rawPath": "patterns-input/part-0001.json",
|
||||
"pattern": {
|
||||
"slug": "order-lifecycle-analysis",
|
||||
"title": "Order Lifecycle Analysis",
|
||||
|
|
|
|||
|
|
@ -53,7 +53,6 @@ Call `emit_historic_sql_evidence` with this shape:
|
|||
{
|
||||
"kind": "table_usage",
|
||||
"table": "public.orders",
|
||||
"rawPath": "tables/public.orders.json",
|
||||
"usage": {
|
||||
"narrative": "Orders are repeatedly queried for paid/refunded lifecycle analysis and customer-level rollups.",
|
||||
"frequencyTier": "high",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue