fix(llm): keep wrapSimple history breakpoint so multi-step agent loops cache

Reverts the wrapSimple `messages.length > 1` guard from the prior commit.
agent-runner uses wrapSimple with a single user message, but generateText
runs a multi-step tool loop inside it — the cache marker on the first user
message is reused by every subsequent step, so it isn't waste.
The release validator (scripts/validate-llm-debug-jsonl.mjs) also requires
a `message-part` marker target in captured debug JSONL.
This commit is contained in:
Andrey Avtomonov 2026-05-14 15:28:11 +02:00
parent 07eaa6d254
commit 6424944426
2 changed files with 1 additions and 50 deletions

View file

@ -81,50 +81,6 @@ describe('KtxMessageBuilder.build', () => {
expect((out.tools.z as { providerOptions?: unknown }).providerOptions).toBeUndefined();
});
it('wrapSimple does not mark a single user message with a cache breakpoint', () => {
const builder = makeBuilder();
const out = builder.wrapSimple({
system: 'SYS',
messages: [{ role: 'user', content: 'one-shot prompt' }],
tools: {},
model: 'anthropic/claude-sonnet-4-6',
});
expect(out.messages).toHaveLength(2);
expect(out.messages[0]).toMatchObject({
role: 'system',
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } },
});
expect(out.messages[1]).toMatchObject({ role: 'user', content: 'one-shot prompt' });
expect((out.messages[1] as { providerOptions?: unknown }).providerOptions).toBeUndefined();
});
it('wrapSimple still marks the last history message when there are multiple messages', () => {
const builder = makeBuilder();
const out = builder.wrapSimple({
system: 'SYS',
messages: [
{ role: 'user', content: 'turn 1' },
{ role: 'assistant', content: 'reply 1' },
{ role: 'user', content: 'turn 2' },
],
tools: {},
model: 'anthropic/claude-sonnet-4-6',
});
expect(out.messages).toHaveLength(4);
expect(out.messages[1]).toMatchObject({ role: 'user' });
expect((out.messages[1] as { providerOptions?: unknown }).providerOptions).toBeUndefined();
expect(out.messages[2]).toMatchObject({ role: 'assistant' });
expect((out.messages[2] as { providerOptions?: unknown }).providerOptions).toBeUndefined();
const last = out.messages[3] as { content: Array<{ providerOptions?: unknown }> };
expect(last.content[0].providerOptions).toEqual({
anthropic: { cacheControl: { type: 'ephemeral', ttl: '5m' } },
});
});
it('clamps every TTL to 5m for Vertex when vertexFallbackTo5m is enabled', () => {
const provider = createKtxLlmProvider({
backend: 'vertex',

View file

@ -86,13 +86,8 @@ export class KtxMessageBuilder {
}
if (input.messages) {
// Only mark a history breakpoint when prior turns exist. A single-message call
// is the current user turn — marking it writes a cache entry that can't be
// reused on the next (different-content) call, costing tokens for nothing.
const shouldMarkHistory =
cachingActive && this.cacheHistoryEnabled() && input.messages.length > 1;
messages.push(
...(shouldMarkHistory
...(cachingActive && this.cacheHistoryEnabled()
? this.markLastHistoryMessage(input.messages, ttls.historyTtl, input.model)
: input.messages),
);