diff --git a/packages/llm/src/message-builder.test.ts b/packages/llm/src/message-builder.test.ts index 7beedfbe..bc13a7e1 100644 --- a/packages/llm/src/message-builder.test.ts +++ b/packages/llm/src/message-builder.test.ts @@ -81,50 +81,6 @@ describe('KtxMessageBuilder.build', () => { expect((out.tools.z as { providerOptions?: unknown }).providerOptions).toBeUndefined(); }); - it('wrapSimple does not mark a single user message with a cache breakpoint', () => { - const builder = makeBuilder(); - - const out = builder.wrapSimple({ - system: 'SYS', - messages: [{ role: 'user', content: 'one-shot prompt' }], - tools: {}, - model: 'anthropic/claude-sonnet-4-6', - }); - - expect(out.messages).toHaveLength(2); - expect(out.messages[0]).toMatchObject({ - role: 'system', - providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } }, - }); - expect(out.messages[1]).toMatchObject({ role: 'user', content: 'one-shot prompt' }); - expect((out.messages[1] as { providerOptions?: unknown }).providerOptions).toBeUndefined(); - }); - - it('wrapSimple still marks the last history message when there are multiple messages', () => { - const builder = makeBuilder(); - - const out = builder.wrapSimple({ - system: 'SYS', - messages: [ - { role: 'user', content: 'turn 1' }, - { role: 'assistant', content: 'reply 1' }, - { role: 'user', content: 'turn 2' }, - ], - tools: {}, - model: 'anthropic/claude-sonnet-4-6', - }); - - expect(out.messages).toHaveLength(4); - expect(out.messages[1]).toMatchObject({ role: 'user' }); - expect((out.messages[1] as { providerOptions?: unknown }).providerOptions).toBeUndefined(); - expect(out.messages[2]).toMatchObject({ role: 'assistant' }); - expect((out.messages[2] as { providerOptions?: unknown }).providerOptions).toBeUndefined(); - const last = out.messages[3] as { content: Array<{ providerOptions?: unknown }> }; - expect(last.content[0].providerOptions).toEqual({ - anthropic: { cacheControl: { type: 'ephemeral', ttl: '5m' } }, - }); - }); - it('clamps every TTL to 5m for Vertex when vertexFallbackTo5m is enabled', () => { const provider = createKtxLlmProvider({ backend: 'vertex', diff --git a/packages/llm/src/message-builder.ts b/packages/llm/src/message-builder.ts index 387d0832..a98a0375 100644 --- a/packages/llm/src/message-builder.ts +++ b/packages/llm/src/message-builder.ts @@ -86,13 +86,8 @@ export class KtxMessageBuilder { } if (input.messages) { - // Only mark a history breakpoint when prior turns exist. A single-message call - // is the current user turn — marking it writes a cache entry that can't be - // reused on the next (different-content) call, costing tokens for nothing. - const shouldMarkHistory = - cachingActive && this.cacheHistoryEnabled() && input.messages.length > 1; messages.push( - ...(shouldMarkHistory + ...(cachingActive && this.cacheHistoryEnabled() ? this.markLastHistoryMessage(input.messages, ttls.historyTtl, input.model) : input.messages), );