mirror of
https://github.com/Kaelio/ktx.git
synced 2026-07-01 08:59:39 +02:00
feat(cli): profile ingest runs and split model vs tool time (#249)
* feat(cli): profile ingest runs to find where wall-clock time goes Add opt-in profiling for `ktx ingest`. Each timed phase, work unit, and agent loop now records durationMs / step count / token usage in the trace, and a post-run aggregator rolls them up into a "where did the time go" report printed to stderr. Enable per run with KTX_PROFILE_INGEST (1/true -> human table, json -> raw structured profile) or persistently via `ingest.profile` in ktx.yaml. The json form emits raw milliseconds, token counts, and a summary.headline one-line diagnosis so coding agents can parse it directly; json wins when both env and config request profiling. - runtime-port: RunLoopMetrics (totalMs, usage, stepCount, stepBoundariesMs) plus onMetrics callbacks on text/object generation - ai-sdk + claude-code runtimes: capture per-loop timing and token usage - work-unit-executor and stages 3/4: thread metrics into trace events - ingest-bundle.runner: time worktree / triage / clustering / index / reconcile / squash phases and emit the profile in a finally block (best-effort; never affects the run outcome) - ingest-profile: new trace+transcript aggregator with table/json formatters - config: ingest.profile flag; docs: profiling section in ktx-ingest.mdx * fix(cli): flush tool-call logs before reading ingest profile Tool transcripts are appended fire-and-forget so the agent hot path never blocks on logging. The ingest profiler read them before the writes settled, so per-work-unit toolMs (and the model-vs-tool split derived from it) could be incomplete. Track in-flight appends and expose flushToolCallLogs() — bounded by a timeout so it can never hang — and flush before the profiler reads the transcript.
This commit is contained in:
parent
22ddf5524c
commit
21744fc520
20 changed files with 1243 additions and 56 deletions
|
|
@ -284,7 +284,7 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
stepBudget: 1,
|
||||
telemetryTags: { operationName: 'test' },
|
||||
}),
|
||||
).resolves.toEqual({ stopReason: 'budget' });
|
||||
).resolves.toMatchObject({ stopReason: 'budget' });
|
||||
|
||||
const options = query.mock.calls[0][0].options;
|
||||
expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']);
|
||||
|
|
@ -467,7 +467,7 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
telemetryTags: { operationName: 'test' },
|
||||
onStepFinish,
|
||||
}),
|
||||
).resolves.toEqual({ stopReason: 'natural' });
|
||||
).resolves.toMatchObject({ stopReason: 'natural' });
|
||||
|
||||
expect(onStepFinish).toHaveBeenCalledTimes(1);
|
||||
expect(onStepFinish).toHaveBeenCalledWith({ stepIndex: 1, stepBudget: 40 });
|
||||
|
|
@ -513,7 +513,7 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
throw new Error('callback exploded');
|
||||
},
|
||||
}),
|
||||
).resolves.toEqual({ stopReason: 'natural' });
|
||||
).resolves.toMatchObject({ stopReason: 'natural' });
|
||||
expect(logger.warn).toHaveBeenCalledWith(expect.stringContaining('callback exploded'));
|
||||
});
|
||||
|
||||
|
|
@ -525,6 +525,45 @@ describe('ClaudeCodeKtxLlmRuntime', () => {
|
|||
expect(mapClaudeCodeStopReason(resultMessage({ subtype: 'error_during_execution' }))).toBe('error');
|
||||
});
|
||||
|
||||
it('returns loop metrics including step count and mapped token usage', async () => {
|
||||
const query = vi.fn((_input: any) =>
|
||||
stream([
|
||||
initMessage(),
|
||||
{
|
||||
type: 'assistant',
|
||||
message: { role: 'assistant', content: [] },
|
||||
parent_tool_use_id: null,
|
||||
uuid: '00000000-0000-4000-8000-000000000006',
|
||||
session_id: 'session-id',
|
||||
} as unknown as SDKMessage,
|
||||
resultMessage({
|
||||
subtype: 'success',
|
||||
terminal_reason: 'completed',
|
||||
usage: { input_tokens: 50, output_tokens: 10 } as never,
|
||||
}),
|
||||
]),
|
||||
);
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query,
|
||||
env: {},
|
||||
});
|
||||
|
||||
const result = await runtime.runAgentLoop({
|
||||
modelRole: 'default',
|
||||
systemPrompt: 'system',
|
||||
userPrompt: 'user',
|
||||
toolSet: {},
|
||||
stepBudget: 40,
|
||||
telemetryTags: { operationName: 'test' },
|
||||
});
|
||||
|
||||
expect(result.metrics?.stepCount).toBe(1);
|
||||
expect(result.metrics?.stepBoundariesMs).toHaveLength(1);
|
||||
expect(result.metrics?.usage).toEqual({ inputTokens: 50, outputTokens: 10, totalTokens: 60 });
|
||||
});
|
||||
|
||||
it('auth probe uses isolation options and a scrubbed env', async () => {
|
||||
const query = vi.fn((_input: any) => stream([initMessage(), resultMessage({ result: 'ok' })]));
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue