From 6424944426165b77e6b3bc10c9c20ff4007a64fd Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Thu, 14 May 2026 15:28:11 +0200 Subject: [PATCH] fix(llm): keep wrapSimple history breakpoint so multi-step agent loops cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reverts the wrapSimple `messages.length > 1` guard from the prior commit. agent-runner uses wrapSimple with a single user message, but generateText runs a multi-step tool loop inside it — the cache marker on the first user message is reused by every subsequent step, so it isn't waste. The release validator (scripts/validate-llm-debug-jsonl.mjs) also requires a `message-part` marker target in captured debug JSONL. --- packages/llm/src/message-builder.test.ts | 44 ------------------------ packages/llm/src/message-builder.ts | 7 +--- 2 files changed, 1 insertion(+), 50 deletions(-) diff --git a/packages/llm/src/message-builder.test.ts b/packages/llm/src/message-builder.test.ts index 7beedfbe..bc13a7e1 100644 --- a/packages/llm/src/message-builder.test.ts +++ b/packages/llm/src/message-builder.test.ts @@ -81,50 +81,6 @@ describe('KtxMessageBuilder.build', () => { expect((out.tools.z as { providerOptions?: unknown }).providerOptions).toBeUndefined(); }); - it('wrapSimple does not mark a single user message with a cache breakpoint', () => { - const builder = makeBuilder(); - - const out = builder.wrapSimple({ - system: 'SYS', - messages: [{ role: 'user', content: 'one-shot prompt' }], - tools: {}, - model: 'anthropic/claude-sonnet-4-6', - }); - - expect(out.messages).toHaveLength(2); - expect(out.messages[0]).toMatchObject({ - role: 'system', - providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } }, - }); - expect(out.messages[1]).toMatchObject({ role: 'user', content: 'one-shot prompt' }); - expect((out.messages[1] as { providerOptions?: unknown }).providerOptions).toBeUndefined(); - }); - - it('wrapSimple still marks the last history message when there are multiple messages', () => { - const builder = makeBuilder(); - - const out = builder.wrapSimple({ - system: 'SYS', - messages: [ - { role: 'user', content: 'turn 1' }, - { role: 'assistant', content: 'reply 1' }, - { role: 'user', content: 'turn 2' }, - ], - tools: {}, - model: 'anthropic/claude-sonnet-4-6', - }); - - expect(out.messages).toHaveLength(4); - expect(out.messages[1]).toMatchObject({ role: 'user' }); - expect((out.messages[1] as { providerOptions?: unknown }).providerOptions).toBeUndefined(); - expect(out.messages[2]).toMatchObject({ role: 'assistant' }); - expect((out.messages[2] as { providerOptions?: unknown }).providerOptions).toBeUndefined(); - const last = out.messages[3] as { content: Array<{ providerOptions?: unknown }> }; - expect(last.content[0].providerOptions).toEqual({ - anthropic: { cacheControl: { type: 'ephemeral', ttl: '5m' } }, - }); - }); - it('clamps every TTL to 5m for Vertex when vertexFallbackTo5m is enabled', () => { const provider = createKtxLlmProvider({ backend: 'vertex', diff --git a/packages/llm/src/message-builder.ts b/packages/llm/src/message-builder.ts index 387d0832..a98a0375 100644 --- a/packages/llm/src/message-builder.ts +++ b/packages/llm/src/message-builder.ts @@ -86,13 +86,8 @@ export class KtxMessageBuilder { } if (input.messages) { - // Only mark a history breakpoint when prior turns exist. A single-message call - // is the current user turn — marking it writes a cache entry that can't be - // reused on the next (different-content) call, costing tokens for nothing. - const shouldMarkHistory = - cachingActive && this.cacheHistoryEnabled() && input.messages.length > 1; messages.push( - ...(shouldMarkHistory + ...(cachingActive && this.cacheHistoryEnabled() ? this.markLastHistoryMessage(input.messages, ttls.historyTtl, input.model) : input.messages), );