feat(cli): profile ingest runs and split model vs tool time (#249)

* feat(cli): profile ingest runs to find where wall-clock time goes

Add opt-in profiling for `ktx ingest`. Each timed phase, work unit, and
agent loop now records durationMs / step count / token usage in the
trace, and a post-run aggregator rolls them up into a "where did the
time go" report printed to stderr.

Enable per run with KTX_PROFILE_INGEST (1/true -> human table, json ->
raw structured profile) or persistently via `ingest.profile` in
ktx.yaml. The json form emits raw milliseconds, token counts, and a
summary.headline one-line diagnosis so coding agents can parse it
directly; json wins when both env and config request profiling.

- runtime-port: RunLoopMetrics (totalMs, usage, stepCount,
  stepBoundariesMs) plus onMetrics callbacks on text/object generation
- ai-sdk + claude-code runtimes: capture per-loop timing and token usage
- work-unit-executor and stages 3/4: thread metrics into trace events
- ingest-bundle.runner: time worktree / triage / clustering / index /
  reconcile / squash phases and emit the profile in a finally block
  (best-effort; never affects the run outcome)
- ingest-profile: new trace+transcript aggregator with table/json formatters
- config: ingest.profile flag; docs: profiling section in ktx-ingest.mdx

* fix(cli): flush tool-call logs before reading ingest profile

Tool transcripts are appended fire-and-forget so the agent hot path never
blocks on logging. The ingest profiler read them before the writes settled,
so per-work-unit toolMs (and the model-vs-tool split derived from it) could
be incomplete. Track in-flight appends and expose flushToolCallLogs() —
bounded by a timeout so it can never hang — and flush before the profiler
reads the transcript.
This commit is contained in:
Andrey Avtomonov 2026-06-01 15:49:17 +02:00 committed by GitHub
parent 22ddf5524c
commit 21744fc520
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 1243 additions and 56 deletions

View file

@ -9,6 +9,7 @@ import type {
KtxGenerateObjectInput,
KtxGenerateTextInput,
KtxLlmRuntimePort,
LlmTokenUsage,
RunLoopParams,
RunLoopResult,
} from './runtime-port.js';
@ -17,6 +18,23 @@ interface AgentTelemetryPort {
createTelemetry(tags: Record<string, string>): TelemetrySettings;
}
interface MaybeUsage {
inputTokens?: number;
outputTokens?: number;
totalTokens?: number;
}
function toLlmTokenUsage(usage: MaybeUsage | undefined): LlmTokenUsage {
if (!usage) {
return {};
}
return {
...(usage.inputTokens !== undefined ? { inputTokens: usage.inputTokens } : {}),
...(usage.outputTokens !== undefined ? { outputTokens: usage.outputTokens } : {}),
...(usage.totalTokens !== undefined ? { totalTokens: usage.totalTokens } : {}),
};
}
export interface AiSdkKtxLlmRuntimeDeps {
llmProvider: KtxLlmProvider;
telemetry?: AgentTelemetryPort;
@ -48,6 +66,7 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
model,
});
const split = splitKtxSystemMessages(built.messages);
const startedAt = Date.now();
const result = await generateText({
model,
temperature: input.temperature ?? 0,
@ -62,6 +81,7 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
}
: {}),
});
input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: toLlmTokenUsage(result.totalUsage ?? result.usage) });
if (typeof result.text !== 'string') {
throw new Error('KTX LLM text generation returned no text');
}
@ -80,6 +100,7 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
model,
});
const split = splitKtxSystemMessages(built.messages);
const startedAt = Date.now();
const result = await generateText({
model,
temperature: input.temperature ?? 0,
@ -95,6 +116,7 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
: {}),
output: Output.object({ schema: input.schema as unknown as FlexibleSchema<TOutput> }),
});
input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: toLlmTokenUsage(result.totalUsage ?? result.usage) });
if (result.output == null) {
throw new Error('KTX LLM object generation returned no output');
}
@ -103,6 +125,8 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
async runAgentLoop(params: RunLoopParams): Promise<RunLoopResult> {
let stepIndex = 0;
const startedAt = Date.now();
const stepBoundariesMs: number[] = [];
try {
const model = this.deps.llmProvider.getModel(params.modelRole);
const tools = createAiSdkToolSet(params.toolSet);
@ -128,7 +152,7 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
}),
);
await generateText({
const result = await generateText({
model,
temperature: 0,
stopWhen: stepCountIs(params.stepBudget),
@ -141,6 +165,7 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
tools: built.tools as ToolSet,
onStepFinish: async () => {
stepIndex += 1;
stepBoundariesMs.push(Date.now() - startedAt);
if (!params.onStepFinish) {
return;
}
@ -155,11 +180,23 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
}
},
});
return { stopReason: 'natural' };
return {
stopReason: 'natural',
metrics: {
totalMs: Date.now() - startedAt,
stepCount: stepIndex,
stepBoundariesMs,
usage: toLlmTokenUsage(result.totalUsage ?? result.usage),
},
};
} catch (error) {
const err = error instanceof Error ? error : new Error(String(error));
this.logger.warn(`[agent-runner] loop failed: ${err.message}`);
return { stopReason: 'error', error: err };
return {
stopReason: 'error',
error: err,
metrics: { totalMs: Date.now() - startedAt, stepCount: stepIndex, stepBoundariesMs, usage: {} },
};
}
}
}

View file

@ -15,6 +15,7 @@ import type {
KtxGenerateTextInput,
KtxLlmRuntimePort,
KtxRuntimeToolSet,
LlmTokenUsage,
RunLoopParams,
RunLoopResult,
RunLoopStopReason,
@ -22,6 +23,20 @@ import type {
type QueryFn = (params: Parameters<typeof defaultQuery>[0]) => AsyncIterable<SDKMessage>;
function claudeTokenUsage(result: SDKResultMessage): LlmTokenUsage {
const usage = (result as { usage?: { input_tokens?: number; output_tokens?: number } }).usage;
if (!usage) {
return {};
}
const { input_tokens: inputTokens, output_tokens: outputTokens } = usage;
const totalTokens = inputTokens !== undefined && outputTokens !== undefined ? inputTokens + outputTokens : undefined;
return {
...(inputTokens !== undefined ? { inputTokens } : {}),
...(outputTokens !== undefined ? { outputTokens } : {}),
...(totalTokens !== undefined ? { totalTokens } : {}),
};
}
export interface ClaudeCodeKtxLlmRuntimeDeps {
projectDir: string;
modelSlots: { default: string } & Partial<Record<string, string>>;
@ -236,6 +251,7 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
maxTurns: 1,
tools: input.tools,
});
const startedAt = Date.now();
const result = await collectResult({
query: this.runQuery,
prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'),
@ -243,6 +259,7 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
allowedToolIds: new Set(mcpToolIds(input.tools ?? {})),
expectedMcpServerNames: expectedMcpServerNames(input.tools),
});
input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: claudeTokenUsage(result) });
const error = resultError(result);
if (error) {
throw error;
@ -271,6 +288,7 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
}),
outputFormat: { type: 'json_schema' as const, schema: jsonSchema(input.schema as z.ZodType) },
};
const startedAt = Date.now();
const result = await collectResult({
query: this.runQuery,
prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'),
@ -278,6 +296,7 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
allowedToolIds: new Set([...mcpToolIds(input.tools ?? {}), STRUCTURED_OUTPUT_TOOL_NAME]),
expectedMcpServerNames: expectedMcpServerNames(input.tools),
});
input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: claudeTokenUsage(result) });
const error = resultError(result);
if (error) {
throw error;
@ -290,6 +309,8 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
async runAgentLoop(params: RunLoopParams): Promise<RunLoopResult> {
let stepIndex = 0;
const startedAt = Date.now();
const stepBoundariesMs: number[] = [];
try {
const options = baseOptions({
projectDir: this.deps.projectDir,
@ -306,6 +327,7 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
expectedMcpServerNames: expectedMcpServerNames(params.toolSet),
onAssistantTurn: async () => {
stepIndex += 1;
stepBoundariesMs.push(Date.now() - startedAt);
if (!params.onStepFinish) {
return;
}
@ -322,10 +344,23 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
});
const stopReason = mapClaudeCodeStopReason(result);
const error = resultError(result);
return { stopReason, ...(stopReason === 'error' && error ? { error } : {}) };
return {
stopReason,
...(stopReason === 'error' && error ? { error } : {}),
metrics: {
totalMs: Date.now() - startedAt,
stepCount: stepIndex,
stepBoundariesMs,
usage: claudeTokenUsage(result),
},
};
} catch (error) {
const err = error instanceof Error ? error : new Error(String(error));
return { stopReason: 'error', error: err };
return {
stopReason: 'error',
error: err,
metrics: { totalMs: Date.now() - startedAt, stepCount: stepIndex, stepBoundariesMs, usage: {} },
};
}
}
}

View file

@ -23,6 +23,24 @@ export interface RunLoopStepInfo {
stepBudget: number;
}
export interface LlmTokenUsage {
inputTokens?: number;
outputTokens?: number;
totalTokens?: number;
}
/** Timing and token metrics for a multi-step agent loop, used for ingest profiling. */
export interface RunLoopMetrics {
/** Wall-clock time around the whole `generateText` call, in milliseconds. */
totalMs: number;
/** Aggregate token usage across all steps. */
usage: LlmTokenUsage;
/** Number of agent steps (model round-trips) that actually ran. */
stepCount: number;
/** Wall-clock offset (ms from loop start) at which each step finished. */
stepBoundariesMs: number[];
}
export interface RunLoopParams {
modelRole: KtxModelRole;
systemPrompt: string;
@ -36,6 +54,7 @@ export interface RunLoopParams {
export interface RunLoopResult {
stopReason: RunLoopStopReason;
error?: Error;
metrics?: RunLoopMetrics;
}
export interface KtxGenerateTextInput {
@ -44,6 +63,7 @@ export interface KtxGenerateTextInput {
system?: string;
tools?: KtxRuntimeToolSet;
temperature?: number;
onMetrics?: (metrics: { totalMs: number; usage: LlmTokenUsage }) => void;
}
export interface KtxGenerateObjectInput<TOutput, TSchema extends z.ZodType<TOutput>> {
@ -53,6 +73,7 @@ export interface KtxGenerateObjectInput<TOutput, TSchema extends z.ZodType<TOutp
tools?: KtxRuntimeToolSet;
temperature?: number;
schema: TSchema;
onMetrics?: (metrics: { totalMs: number; usage: LlmTokenUsage }) => void;
}
export interface KtxLlmRuntimePort {