mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-25 08:48:08 +02:00
feat(cli): profile ingest runs and split model vs tool time (#249)
* feat(cli): profile ingest runs to find where wall-clock time goes Add opt-in profiling for `ktx ingest`. Each timed phase, work unit, and agent loop now records durationMs / step count / token usage in the trace, and a post-run aggregator rolls them up into a "where did the time go" report printed to stderr. Enable per run with KTX_PROFILE_INGEST (1/true -> human table, json -> raw structured profile) or persistently via `ingest.profile` in ktx.yaml. The json form emits raw milliseconds, token counts, and a summary.headline one-line diagnosis so coding agents can parse it directly; json wins when both env and config request profiling. - runtime-port: RunLoopMetrics (totalMs, usage, stepCount, stepBoundariesMs) plus onMetrics callbacks on text/object generation - ai-sdk + claude-code runtimes: capture per-loop timing and token usage - work-unit-executor and stages 3/4: thread metrics into trace events - ingest-bundle.runner: time worktree / triage / clustering / index / reconcile / squash phases and emit the profile in a finally block (best-effort; never affects the run outcome) - ingest-profile: new trace+transcript aggregator with table/json formatters - config: ingest.profile flag; docs: profiling section in ktx-ingest.mdx * fix(cli): flush tool-call logs before reading ingest profile Tool transcripts are appended fire-and-forget so the agent hot path never blocks on logging. The ingest profiler read them before the writes settled, so per-work-unit toolMs (and the model-vs-tool split derived from it) could be incomplete. Track in-flight appends and expose flushToolCallLogs() — bounded by a timeout so it can never hang — and flush before the profiler reads the transcript.
This commit is contained in:
parent
22ddf5524c
commit
21744fc520
20 changed files with 1243 additions and 56 deletions
|
|
@ -9,6 +9,7 @@ import type {
|
|||
KtxGenerateObjectInput,
|
||||
KtxGenerateTextInput,
|
||||
KtxLlmRuntimePort,
|
||||
LlmTokenUsage,
|
||||
RunLoopParams,
|
||||
RunLoopResult,
|
||||
} from './runtime-port.js';
|
||||
|
|
@ -17,6 +18,23 @@ interface AgentTelemetryPort {
|
|||
createTelemetry(tags: Record<string, string>): TelemetrySettings;
|
||||
}
|
||||
|
||||
interface MaybeUsage {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
}
|
||||
|
||||
function toLlmTokenUsage(usage: MaybeUsage | undefined): LlmTokenUsage {
|
||||
if (!usage) {
|
||||
return {};
|
||||
}
|
||||
return {
|
||||
...(usage.inputTokens !== undefined ? { inputTokens: usage.inputTokens } : {}),
|
||||
...(usage.outputTokens !== undefined ? { outputTokens: usage.outputTokens } : {}),
|
||||
...(usage.totalTokens !== undefined ? { totalTokens: usage.totalTokens } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
export interface AiSdkKtxLlmRuntimeDeps {
|
||||
llmProvider: KtxLlmProvider;
|
||||
telemetry?: AgentTelemetryPort;
|
||||
|
|
@ -48,6 +66,7 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
|
|||
model,
|
||||
});
|
||||
const split = splitKtxSystemMessages(built.messages);
|
||||
const startedAt = Date.now();
|
||||
const result = await generateText({
|
||||
model,
|
||||
temperature: input.temperature ?? 0,
|
||||
|
|
@ -62,6 +81,7 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
|
|||
}
|
||||
: {}),
|
||||
});
|
||||
input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: toLlmTokenUsage(result.totalUsage ?? result.usage) });
|
||||
if (typeof result.text !== 'string') {
|
||||
throw new Error('KTX LLM text generation returned no text');
|
||||
}
|
||||
|
|
@ -80,6 +100,7 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
|
|||
model,
|
||||
});
|
||||
const split = splitKtxSystemMessages(built.messages);
|
||||
const startedAt = Date.now();
|
||||
const result = await generateText({
|
||||
model,
|
||||
temperature: input.temperature ?? 0,
|
||||
|
|
@ -95,6 +116,7 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
|
|||
: {}),
|
||||
output: Output.object({ schema: input.schema as unknown as FlexibleSchema<TOutput> }),
|
||||
});
|
||||
input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: toLlmTokenUsage(result.totalUsage ?? result.usage) });
|
||||
if (result.output == null) {
|
||||
throw new Error('KTX LLM object generation returned no output');
|
||||
}
|
||||
|
|
@ -103,6 +125,8 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
|
|||
|
||||
async runAgentLoop(params: RunLoopParams): Promise<RunLoopResult> {
|
||||
let stepIndex = 0;
|
||||
const startedAt = Date.now();
|
||||
const stepBoundariesMs: number[] = [];
|
||||
try {
|
||||
const model = this.deps.llmProvider.getModel(params.modelRole);
|
||||
const tools = createAiSdkToolSet(params.toolSet);
|
||||
|
|
@ -128,7 +152,7 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
|
|||
}),
|
||||
);
|
||||
|
||||
await generateText({
|
||||
const result = await generateText({
|
||||
model,
|
||||
temperature: 0,
|
||||
stopWhen: stepCountIs(params.stepBudget),
|
||||
|
|
@ -141,6 +165,7 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
|
|||
tools: built.tools as ToolSet,
|
||||
onStepFinish: async () => {
|
||||
stepIndex += 1;
|
||||
stepBoundariesMs.push(Date.now() - startedAt);
|
||||
if (!params.onStepFinish) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -155,11 +180,23 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
|
|||
}
|
||||
},
|
||||
});
|
||||
return { stopReason: 'natural' };
|
||||
return {
|
||||
stopReason: 'natural',
|
||||
metrics: {
|
||||
totalMs: Date.now() - startedAt,
|
||||
stepCount: stepIndex,
|
||||
stepBoundariesMs,
|
||||
usage: toLlmTokenUsage(result.totalUsage ?? result.usage),
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error(String(error));
|
||||
this.logger.warn(`[agent-runner] loop failed: ${err.message}`);
|
||||
return { stopReason: 'error', error: err };
|
||||
return {
|
||||
stopReason: 'error',
|
||||
error: err,
|
||||
metrics: { totalMs: Date.now() - startedAt, stepCount: stepIndex, stepBoundariesMs, usage: {} },
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ import type {
|
|||
KtxGenerateTextInput,
|
||||
KtxLlmRuntimePort,
|
||||
KtxRuntimeToolSet,
|
||||
LlmTokenUsage,
|
||||
RunLoopParams,
|
||||
RunLoopResult,
|
||||
RunLoopStopReason,
|
||||
|
|
@ -22,6 +23,20 @@ import type {
|
|||
|
||||
type QueryFn = (params: Parameters<typeof defaultQuery>[0]) => AsyncIterable<SDKMessage>;
|
||||
|
||||
function claudeTokenUsage(result: SDKResultMessage): LlmTokenUsage {
|
||||
const usage = (result as { usage?: { input_tokens?: number; output_tokens?: number } }).usage;
|
||||
if (!usage) {
|
||||
return {};
|
||||
}
|
||||
const { input_tokens: inputTokens, output_tokens: outputTokens } = usage;
|
||||
const totalTokens = inputTokens !== undefined && outputTokens !== undefined ? inputTokens + outputTokens : undefined;
|
||||
return {
|
||||
...(inputTokens !== undefined ? { inputTokens } : {}),
|
||||
...(outputTokens !== undefined ? { outputTokens } : {}),
|
||||
...(totalTokens !== undefined ? { totalTokens } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
export interface ClaudeCodeKtxLlmRuntimeDeps {
|
||||
projectDir: string;
|
||||
modelSlots: { default: string } & Partial<Record<string, string>>;
|
||||
|
|
@ -236,6 +251,7 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
|
|||
maxTurns: 1,
|
||||
tools: input.tools,
|
||||
});
|
||||
const startedAt = Date.now();
|
||||
const result = await collectResult({
|
||||
query: this.runQuery,
|
||||
prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'),
|
||||
|
|
@ -243,6 +259,7 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
|
|||
allowedToolIds: new Set(mcpToolIds(input.tools ?? {})),
|
||||
expectedMcpServerNames: expectedMcpServerNames(input.tools),
|
||||
});
|
||||
input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: claudeTokenUsage(result) });
|
||||
const error = resultError(result);
|
||||
if (error) {
|
||||
throw error;
|
||||
|
|
@ -271,6 +288,7 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
|
|||
}),
|
||||
outputFormat: { type: 'json_schema' as const, schema: jsonSchema(input.schema as z.ZodType) },
|
||||
};
|
||||
const startedAt = Date.now();
|
||||
const result = await collectResult({
|
||||
query: this.runQuery,
|
||||
prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'),
|
||||
|
|
@ -278,6 +296,7 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
|
|||
allowedToolIds: new Set([...mcpToolIds(input.tools ?? {}), STRUCTURED_OUTPUT_TOOL_NAME]),
|
||||
expectedMcpServerNames: expectedMcpServerNames(input.tools),
|
||||
});
|
||||
input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: claudeTokenUsage(result) });
|
||||
const error = resultError(result);
|
||||
if (error) {
|
||||
throw error;
|
||||
|
|
@ -290,6 +309,8 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
|
|||
|
||||
async runAgentLoop(params: RunLoopParams): Promise<RunLoopResult> {
|
||||
let stepIndex = 0;
|
||||
const startedAt = Date.now();
|
||||
const stepBoundariesMs: number[] = [];
|
||||
try {
|
||||
const options = baseOptions({
|
||||
projectDir: this.deps.projectDir,
|
||||
|
|
@ -306,6 +327,7 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
|
|||
expectedMcpServerNames: expectedMcpServerNames(params.toolSet),
|
||||
onAssistantTurn: async () => {
|
||||
stepIndex += 1;
|
||||
stepBoundariesMs.push(Date.now() - startedAt);
|
||||
if (!params.onStepFinish) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -322,10 +344,23 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
|
|||
});
|
||||
const stopReason = mapClaudeCodeStopReason(result);
|
||||
const error = resultError(result);
|
||||
return { stopReason, ...(stopReason === 'error' && error ? { error } : {}) };
|
||||
return {
|
||||
stopReason,
|
||||
...(stopReason === 'error' && error ? { error } : {}),
|
||||
metrics: {
|
||||
totalMs: Date.now() - startedAt,
|
||||
stepCount: stepIndex,
|
||||
stepBoundariesMs,
|
||||
usage: claudeTokenUsage(result),
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error(String(error));
|
||||
return { stopReason: 'error', error: err };
|
||||
return {
|
||||
stopReason: 'error',
|
||||
error: err,
|
||||
metrics: { totalMs: Date.now() - startedAt, stepCount: stepIndex, stepBoundariesMs, usage: {} },
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,24 @@ export interface RunLoopStepInfo {
|
|||
stepBudget: number;
|
||||
}
|
||||
|
||||
export interface LlmTokenUsage {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
}
|
||||
|
||||
/** Timing and token metrics for a multi-step agent loop, used for ingest profiling. */
|
||||
export interface RunLoopMetrics {
|
||||
/** Wall-clock time around the whole `generateText` call, in milliseconds. */
|
||||
totalMs: number;
|
||||
/** Aggregate token usage across all steps. */
|
||||
usage: LlmTokenUsage;
|
||||
/** Number of agent steps (model round-trips) that actually ran. */
|
||||
stepCount: number;
|
||||
/** Wall-clock offset (ms from loop start) at which each step finished. */
|
||||
stepBoundariesMs: number[];
|
||||
}
|
||||
|
||||
export interface RunLoopParams {
|
||||
modelRole: KtxModelRole;
|
||||
systemPrompt: string;
|
||||
|
|
@ -36,6 +54,7 @@ export interface RunLoopParams {
|
|||
export interface RunLoopResult {
|
||||
stopReason: RunLoopStopReason;
|
||||
error?: Error;
|
||||
metrics?: RunLoopMetrics;
|
||||
}
|
||||
|
||||
export interface KtxGenerateTextInput {
|
||||
|
|
@ -44,6 +63,7 @@ export interface KtxGenerateTextInput {
|
|||
system?: string;
|
||||
tools?: KtxRuntimeToolSet;
|
||||
temperature?: number;
|
||||
onMetrics?: (metrics: { totalMs: number; usage: LlmTokenUsage }) => void;
|
||||
}
|
||||
|
||||
export interface KtxGenerateObjectInput<TOutput, TSchema extends z.ZodType<TOutput>> {
|
||||
|
|
@ -53,6 +73,7 @@ export interface KtxGenerateObjectInput<TOutput, TSchema extends z.ZodType<TOutp
|
|||
tools?: KtxRuntimeToolSet;
|
||||
temperature?: number;
|
||||
schema: TSchema;
|
||||
onMetrics?: (metrics: { totalMs: number; usage: LlmTokenUsage }) => void;
|
||||
}
|
||||
|
||||
export interface KtxLlmRuntimePort {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue