mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-22 08:38:08 +02:00
fix: align KTX agent tools and repair handling (#73)
This commit is contained in:
parent
ed690ef60c
commit
28b5e2a83e
19 changed files with 113 additions and 45 deletions
|
|
@ -40,6 +40,8 @@ describe('AgentRunnerService.runLoop', () => {
|
|||
|
||||
it('passes systemPrompt, userPrompt, tools, and step budget through to generateText', async () => {
|
||||
(generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] });
|
||||
const repairHandler = vi.fn();
|
||||
llmProvider.repairToolCallHandler.mockReturnValueOnce(repairHandler);
|
||||
const tools = { noop: { description: 'noop', inputSchema: {}, execute: vi.fn() } };
|
||||
await runner.runLoop({
|
||||
modelRole: 'candidateExtraction',
|
||||
|
|
@ -59,7 +61,9 @@ describe('AgentRunnerService.runLoop', () => {
|
|||
expect(call.tools).toEqual(tools);
|
||||
expect(call.stopWhen).toBe(17);
|
||||
expect(call.temperature).toBe(0);
|
||||
expect(call.experimental_repairToolCall).toBe(repairHandler);
|
||||
expect(llmProvider.getModel).toHaveBeenCalledWith('candidateExtraction');
|
||||
expect(llmProvider.repairToolCallHandler).toHaveBeenCalledWith({ source: 'ktx-agent-runner' });
|
||||
});
|
||||
|
||||
it('returns stopReason=natural when the loop completes without error', async () => {
|
||||
|
|
|
|||
|
|
@ -73,6 +73,9 @@ export class AgentRunnerService {
|
|||
temperature: 0,
|
||||
stopWhen: stepCountIs(params.stepBudget),
|
||||
experimental_telemetry: this.deps.telemetry?.createTelemetry(params.telemetryTags),
|
||||
experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({
|
||||
source: params.telemetryTags.operationName ?? 'ktx-agent-runner',
|
||||
}),
|
||||
messages: built.messages,
|
||||
tools: built.tools as Record<string, Tool>,
|
||||
onStepFinish: async () => {
|
||||
|
|
|
|||
|
|
@ -695,7 +695,8 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
await params.toolSet.emit_unmapped_fallback.execute(
|
||||
{
|
||||
rawPath: 'a.yml',
|
||||
reason: 'semantic_not_representable',
|
||||
reason: 'parse_error',
|
||||
clarification: 'semantic_not_representable',
|
||||
fallback: 'flagged',
|
||||
},
|
||||
{ toolCallId: 'fallback-1', messages: [] },
|
||||
|
|
@ -954,6 +955,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
{
|
||||
rawPath: 'a.yml',
|
||||
reason: 'conversion_metric_unsupported',
|
||||
detail: expect.stringContaining('conversion metric'),
|
||||
fallback: 'flagged',
|
||||
},
|
||||
],
|
||||
|
|
@ -1006,7 +1008,8 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
await params.toolSet.emit_unmapped_fallback.execute(
|
||||
{
|
||||
rawPath: 'cards/untranslated.json',
|
||||
reason: 'metabase_sql_untranslated',
|
||||
reason: 'parse_error',
|
||||
clarification: 'metabase_sql_untranslated',
|
||||
fallback: 'flagged',
|
||||
},
|
||||
{ toolCallId: 'fallback-1', messages: [] },
|
||||
|
|
@ -1053,7 +1056,8 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
unmappedFallbacks: [
|
||||
{
|
||||
rawPath: 'cards/untranslated.json',
|
||||
reason: 'metabase_sql_untranslated',
|
||||
reason: 'parse_error',
|
||||
detail: expect.stringContaining('metabase_sql_untranslated'),
|
||||
fallback: 'flagged',
|
||||
},
|
||||
],
|
||||
|
|
|
|||
|
|
@ -37,7 +37,9 @@ export type UnmappedFallbackReason =
|
|||
| 'multiple_table_references'
|
||||
| 'unsupported_dialect'
|
||||
| 'parse_error'
|
||||
| 'missing_target_table';
|
||||
| 'missing_target_table'
|
||||
| 'cumulative_metric_unsupported'
|
||||
| 'conversion_metric_unsupported';
|
||||
|
||||
export interface UnmappedFallbackRecord {
|
||||
rawPath: string;
|
||||
|
|
|
|||
|
|
@ -182,6 +182,30 @@ describe('reconciliation emit tools', () => {
|
|||
]);
|
||||
});
|
||||
|
||||
it('records MetricFlow-specific unsupported fallback reasons', async () => {
|
||||
const stageIndex = makeStageIndex();
|
||||
const tool = createEmitUnmappedFallbackTool({
|
||||
stageIndex,
|
||||
allowedPaths: new Set(['metrics/conversion.yml']),
|
||||
});
|
||||
|
||||
const output = await executeTool(tool, {
|
||||
rawPath: 'metrics/conversion.yml',
|
||||
reason: 'conversion_metric_unsupported',
|
||||
fallback: 'flagged',
|
||||
});
|
||||
|
||||
expect(output).toContain('conversion metric');
|
||||
expect(stageIndex.unmappedFallbacks).toEqual([
|
||||
{
|
||||
rawPath: 'metrics/conversion.yml',
|
||||
reason: 'conversion_metric_unsupported',
|
||||
detail: expect.stringContaining('conversion metric'),
|
||||
fallback: 'flagged',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('rejects unmapped fallback decisions for raw paths outside the allowed set', async () => {
|
||||
const stageIndex = makeStageIndex();
|
||||
const tool = createEmitUnmappedFallbackTool({
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ const unmappedFallbackReasonSchema = z.enum([
|
|||
'unsupported_dialect',
|
||||
'parse_error',
|
||||
'missing_target_table',
|
||||
'cumulative_metric_unsupported',
|
||||
'conversion_metric_unsupported',
|
||||
]);
|
||||
|
||||
function sameUnmappedFallback(left: UnmappedFallbackRecord, right: UnmappedFallbackRecord): boolean {
|
||||
|
|
@ -47,6 +49,10 @@ function canonicalDetail(reason: UnmappedFallbackReason, tableRef: string | unde
|
|||
return `${tableClause} uses a SQL dialect that is not yet supported.`;
|
||||
case 'parse_error':
|
||||
return `${tableClause} could not be parsed.`;
|
||||
case 'cumulative_metric_unsupported':
|
||||
return `${tableClause} is a cumulative metric, which is not yet supported as a first-class semantic-layer primitive.`;
|
||||
case 'conversion_metric_unsupported':
|
||||
return `${tableClause} is a conversion metric, which is not yet supported as a first-class semantic-layer primitive.`;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -51,6 +51,6 @@ describe('eviction_list tool', () => {
|
|||
deletedRawPaths: [],
|
||||
});
|
||||
|
||||
expect(tool.description).toContain('context_eviction_decision_write');
|
||||
expect(tool.description).toContain('emit_eviction_decision');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ export interface EvictionListDeps {
|
|||
export function createEvictionListTool(deps: EvictionListDeps) {
|
||||
return tool({
|
||||
description:
|
||||
'List every artifact that the most recent completed sync produced from a now-deleted raw file. Remove each listed artifact and record the decision with context_eviction_decision_write so the ingest report lists every deleted-source decision.',
|
||||
'List every artifact that the most recent completed sync produced from a now-deleted raw file. Remove each listed artifact and record the decision with emit_eviction_decision so the ingest report lists every deleted-source decision.',
|
||||
inputSchema: z.object({}),
|
||||
execute: async () => {
|
||||
if (deps.deletedRawPaths.length === 0) {
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ const WRITE_TOOL_NAMES = new Set([
|
|||
]);
|
||||
|
||||
export const VERIFICATION_LEDGER_PROMPT = `<pre_write_verification>
|
||||
Before any write-capable tool call (wiki_write, wiki_remove, sl_write_source, sl_edit_source, emit_unmapped_fallback), call record_verification_ledger.
|
||||
Before any durable wiki, semantic-layer, or unmapped-fallback write (wiki_write, wiki_remove, sl_write_source, sl_edit_source, emit_unmapped_fallback), call record_verification_ledger.
|
||||
The ledger is a model-authored checkpoint, not a deterministic parser gate. Summarize the verification protocol from the loaded skill, list identifiers verified with discover_data/entity_details/sql_execution, and list anything intentionally left unverified. If the write contains no warehouse identifiers, say that explicitly.
|
||||
If a write tool returns verification_ledger_required, complete the ledger and retry the write.
|
||||
</pre_write_verification>`;
|
||||
|
|
|
|||
|
|
@ -4,6 +4,10 @@ import { generateText, Output, type FlexibleSchema, type ToolSet } from 'ai';
|
|||
type GenerateTextInput = Parameters<typeof generateText>[0];
|
||||
type GenerateTextFn = (input: GenerateTextInput) => Promise<{ text?: string; output?: unknown }>;
|
||||
|
||||
function hasTools(tools: ToolSet): boolean {
|
||||
return Object.keys(tools).length > 0;
|
||||
}
|
||||
|
||||
interface GenerateKtxTextInput {
|
||||
llmProvider: KtxLlmProvider;
|
||||
role: KtxModelRole;
|
||||
|
|
@ -30,6 +34,13 @@ export async function generateKtxText(input: GenerateKtxTextInput): Promise<stri
|
|||
temperature: input.temperature ?? 0,
|
||||
messages: built.messages,
|
||||
tools: built.tools as ToolSet,
|
||||
...(hasTools(built.tools as ToolSet)
|
||||
? {
|
||||
experimental_repairToolCall: input.llmProvider.repairToolCallHandler({
|
||||
source: `ktx-${input.role}`,
|
||||
}),
|
||||
}
|
||||
: {}),
|
||||
});
|
||||
if (typeof result.text !== 'string') {
|
||||
throw new Error('KTX LLM text generation returned no text');
|
||||
|
|
@ -52,6 +63,13 @@ export async function generateKtxObject<TOutput, TSchema>(
|
|||
temperature: input.temperature ?? 0,
|
||||
messages: built.messages,
|
||||
tools: built.tools as ToolSet,
|
||||
...(hasTools(built.tools as ToolSet)
|
||||
? {
|
||||
experimental_repairToolCall: input.llmProvider.repairToolCallHandler({
|
||||
source: `ktx-${input.role}`,
|
||||
}),
|
||||
}
|
||||
: {}),
|
||||
output: Output.object({
|
||||
schema: input.schema as FlexibleSchema<TOutput>,
|
||||
}),
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ export class SlDiscoverTool extends BaseSemanticLayerTool<typeof slDiscoverInput
|
|||
return `<purpose>
|
||||
Discover available semantic layer sources, columns, measures, and joins.
|
||||
When called without a connectionId, discovers sources across ALL data sources — grouped by data source name and ID.
|
||||
Use this to understand what data is available before writing a semantic_query.
|
||||
Use this to understand what data is available before querying through the semantic layer.
|
||||
</purpose>
|
||||
|
||||
<when_to_use>
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ Use this when you need to understand how a source is built — e.g., before edit
|
|||
|
||||
<when_not_to_use>
|
||||
- To discover what sources/measures/dimensions are available for querying — use sl_discover instead
|
||||
- To query data — use semantic_query or create_widget with slQuery
|
||||
- To query data — use the semantic-layer query surface (\`sl_query\` in MCP)
|
||||
</when_not_to_use>`;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue