fix: align KTX agent tools and repair handling (#73)

This commit is contained in:
Andrey Avtomonov 2026-05-14 00:57:51 +02:00 committed by GitHub
parent ed690ef60c
commit 28b5e2a83e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 113 additions and 45 deletions

View file

@ -40,6 +40,8 @@ describe('AgentRunnerService.runLoop', () => {
it('passes systemPrompt, userPrompt, tools, and step budget through to generateText', async () => {
(generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] });
const repairHandler = vi.fn();
llmProvider.repairToolCallHandler.mockReturnValueOnce(repairHandler);
const tools = { noop: { description: 'noop', inputSchema: {}, execute: vi.fn() } };
await runner.runLoop({
modelRole: 'candidateExtraction',
@ -59,7 +61,9 @@ describe('AgentRunnerService.runLoop', () => {
expect(call.tools).toEqual(tools);
expect(call.stopWhen).toBe(17);
expect(call.temperature).toBe(0);
expect(call.experimental_repairToolCall).toBe(repairHandler);
expect(llmProvider.getModel).toHaveBeenCalledWith('candidateExtraction');
expect(llmProvider.repairToolCallHandler).toHaveBeenCalledWith({ source: 'ktx-agent-runner' });
});
it('returns stopReason=natural when the loop completes without error', async () => {

View file

@ -73,6 +73,9 @@ export class AgentRunnerService {
temperature: 0,
stopWhen: stepCountIs(params.stepBudget),
experimental_telemetry: this.deps.telemetry?.createTelemetry(params.telemetryTags),
experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({
source: params.telemetryTags.operationName ?? 'ktx-agent-runner',
}),
messages: built.messages,
tools: built.tools as Record<string, Tool>,
onStepFinish: async () => {

View file

@ -695,7 +695,8 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
await params.toolSet.emit_unmapped_fallback.execute(
{
rawPath: 'a.yml',
reason: 'semantic_not_representable',
reason: 'parse_error',
clarification: 'semantic_not_representable',
fallback: 'flagged',
},
{ toolCallId: 'fallback-1', messages: [] },
@ -954,6 +955,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
{
rawPath: 'a.yml',
reason: 'conversion_metric_unsupported',
detail: expect.stringContaining('conversion metric'),
fallback: 'flagged',
},
],
@ -1006,7 +1008,8 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
await params.toolSet.emit_unmapped_fallback.execute(
{
rawPath: 'cards/untranslated.json',
reason: 'metabase_sql_untranslated',
reason: 'parse_error',
clarification: 'metabase_sql_untranslated',
fallback: 'flagged',
},
{ toolCallId: 'fallback-1', messages: [] },
@ -1053,7 +1056,8 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
unmappedFallbacks: [
{
rawPath: 'cards/untranslated.json',
reason: 'metabase_sql_untranslated',
reason: 'parse_error',
detail: expect.stringContaining('metabase_sql_untranslated'),
fallback: 'flagged',
},
],

View file

@ -37,7 +37,9 @@ export type UnmappedFallbackReason =
| 'multiple_table_references'
| 'unsupported_dialect'
| 'parse_error'
| 'missing_target_table';
| 'missing_target_table'
| 'cumulative_metric_unsupported'
| 'conversion_metric_unsupported';
export interface UnmappedFallbackRecord {
rawPath: string;

View file

@ -182,6 +182,30 @@ describe('reconciliation emit tools', () => {
]);
});
it('records MetricFlow-specific unsupported fallback reasons', async () => {
const stageIndex = makeStageIndex();
const tool = createEmitUnmappedFallbackTool({
stageIndex,
allowedPaths: new Set(['metrics/conversion.yml']),
});
const output = await executeTool(tool, {
rawPath: 'metrics/conversion.yml',
reason: 'conversion_metric_unsupported',
fallback: 'flagged',
});
expect(output).toContain('conversion metric');
expect(stageIndex.unmappedFallbacks).toEqual([
{
rawPath: 'metrics/conversion.yml',
reason: 'conversion_metric_unsupported',
detail: expect.stringContaining('conversion metric'),
fallback: 'flagged',
},
]);
});
it('rejects unmapped fallback decisions for raw paths outside the allowed set', async () => {
const stageIndex = makeStageIndex();
const tool = createEmitUnmappedFallbackTool({

View file

@ -17,6 +17,8 @@ const unmappedFallbackReasonSchema = z.enum([
'unsupported_dialect',
'parse_error',
'missing_target_table',
'cumulative_metric_unsupported',
'conversion_metric_unsupported',
]);
function sameUnmappedFallback(left: UnmappedFallbackRecord, right: UnmappedFallbackRecord): boolean {
@ -47,6 +49,10 @@ function canonicalDetail(reason: UnmappedFallbackReason, tableRef: string | unde
return `${tableClause} uses a SQL dialect that is not yet supported.`;
case 'parse_error':
return `${tableClause} could not be parsed.`;
case 'cumulative_metric_unsupported':
return `${tableClause} is a cumulative metric, which is not yet supported as a first-class semantic-layer primitive.`;
case 'conversion_metric_unsupported':
return `${tableClause} is a conversion metric, which is not yet supported as a first-class semantic-layer primitive.`;
}
}

View file

@ -51,6 +51,6 @@ describe('eviction_list tool', () => {
deletedRawPaths: [],
});
expect(tool.description).toContain('context_eviction_decision_write');
expect(tool.description).toContain('emit_eviction_decision');
});
});

View file

@ -12,7 +12,7 @@ export interface EvictionListDeps {
export function createEvictionListTool(deps: EvictionListDeps) {
return tool({
description:
'List every artifact that the most recent completed sync produced from a now-deleted raw file. Remove each listed artifact and record the decision with context_eviction_decision_write so the ingest report lists every deleted-source decision.',
'List every artifact that the most recent completed sync produced from a now-deleted raw file. Remove each listed artifact and record the decision with emit_eviction_decision so the ingest report lists every deleted-source decision.',
inputSchema: z.object({}),
execute: async () => {
if (deps.deletedRawPaths.length === 0) {

View file

@ -28,7 +28,7 @@ const WRITE_TOOL_NAMES = new Set([
]);
export const VERIFICATION_LEDGER_PROMPT = `<pre_write_verification>
Before any write-capable tool call (wiki_write, wiki_remove, sl_write_source, sl_edit_source, emit_unmapped_fallback), call record_verification_ledger.
Before any durable wiki, semantic-layer, or unmapped-fallback write (wiki_write, wiki_remove, sl_write_source, sl_edit_source, emit_unmapped_fallback), call record_verification_ledger.
The ledger is a model-authored checkpoint, not a deterministic parser gate. Summarize the verification protocol from the loaded skill, list identifiers verified with discover_data/entity_details/sql_execution, and list anything intentionally left unverified. If the write contains no warehouse identifiers, say that explicitly.
If a write tool returns verification_ledger_required, complete the ledger and retry the write.
</pre_write_verification>`;

View file

@ -4,6 +4,10 @@ import { generateText, Output, type FlexibleSchema, type ToolSet } from 'ai';
type GenerateTextInput = Parameters<typeof generateText>[0];
type GenerateTextFn = (input: GenerateTextInput) => Promise<{ text?: string; output?: unknown }>;
function hasTools(tools: ToolSet): boolean {
return Object.keys(tools).length > 0;
}
interface GenerateKtxTextInput {
llmProvider: KtxLlmProvider;
role: KtxModelRole;
@ -30,6 +34,13 @@ export async function generateKtxText(input: GenerateKtxTextInput): Promise<stri
temperature: input.temperature ?? 0,
messages: built.messages,
tools: built.tools as ToolSet,
...(hasTools(built.tools as ToolSet)
? {
experimental_repairToolCall: input.llmProvider.repairToolCallHandler({
source: `ktx-${input.role}`,
}),
}
: {}),
});
if (typeof result.text !== 'string') {
throw new Error('KTX LLM text generation returned no text');
@ -52,6 +63,13 @@ export async function generateKtxObject<TOutput, TSchema>(
temperature: input.temperature ?? 0,
messages: built.messages,
tools: built.tools as ToolSet,
...(hasTools(built.tools as ToolSet)
? {
experimental_repairToolCall: input.llmProvider.repairToolCallHandler({
source: `ktx-${input.role}`,
}),
}
: {}),
output: Output.object({
schema: input.schema as FlexibleSchema<TOutput>,
}),

View file

@ -53,7 +53,7 @@ export class SlDiscoverTool extends BaseSemanticLayerTool<typeof slDiscoverInput
return `<purpose>
Discover available semantic layer sources, columns, measures, and joins.
When called without a connectionId, discovers sources across ALL data sources grouped by data source name and ID.
Use this to understand what data is available before writing a semantic_query.
Use this to understand what data is available before querying through the semantic layer.
</purpose>
<when_to_use>

View file

@ -36,7 +36,7 @@ Use this when you need to understand how a source is built — e.g., before edit
<when_not_to_use>
- To discover what sources/measures/dimensions are available for querying use sl_discover instead
- To query data use semantic_query or create_widget with slQuery
- To query data use the semantic-layer query surface (\`sl_query\` in MCP)
</when_not_to_use>`;
}