mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
fix(llm): keep wrapSimple history breakpoint so multi-step agent loops cache
Reverts the wrapSimple `messages.length > 1` guard from the prior commit. agent-runner uses wrapSimple with a single user message, but generateText runs a multi-step tool loop inside it — the cache marker on the first user message is reused by every subsequent step, so it isn't waste. The release validator (scripts/validate-llm-debug-jsonl.mjs) also requires a `message-part` marker target in captured debug JSONL.
This commit is contained in:
parent
07eaa6d254
commit
6424944426
2 changed files with 1 additions and 50 deletions
|
|
@ -81,50 +81,6 @@ describe('KtxMessageBuilder.build', () => {
|
|||
expect((out.tools.z as { providerOptions?: unknown }).providerOptions).toBeUndefined();
|
||||
});
|
||||
|
||||
it('wrapSimple does not mark a single user message with a cache breakpoint', () => {
|
||||
const builder = makeBuilder();
|
||||
|
||||
const out = builder.wrapSimple({
|
||||
system: 'SYS',
|
||||
messages: [{ role: 'user', content: 'one-shot prompt' }],
|
||||
tools: {},
|
||||
model: 'anthropic/claude-sonnet-4-6',
|
||||
});
|
||||
|
||||
expect(out.messages).toHaveLength(2);
|
||||
expect(out.messages[0]).toMatchObject({
|
||||
role: 'system',
|
||||
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } },
|
||||
});
|
||||
expect(out.messages[1]).toMatchObject({ role: 'user', content: 'one-shot prompt' });
|
||||
expect((out.messages[1] as { providerOptions?: unknown }).providerOptions).toBeUndefined();
|
||||
});
|
||||
|
||||
it('wrapSimple still marks the last history message when there are multiple messages', () => {
|
||||
const builder = makeBuilder();
|
||||
|
||||
const out = builder.wrapSimple({
|
||||
system: 'SYS',
|
||||
messages: [
|
||||
{ role: 'user', content: 'turn 1' },
|
||||
{ role: 'assistant', content: 'reply 1' },
|
||||
{ role: 'user', content: 'turn 2' },
|
||||
],
|
||||
tools: {},
|
||||
model: 'anthropic/claude-sonnet-4-6',
|
||||
});
|
||||
|
||||
expect(out.messages).toHaveLength(4);
|
||||
expect(out.messages[1]).toMatchObject({ role: 'user' });
|
||||
expect((out.messages[1] as { providerOptions?: unknown }).providerOptions).toBeUndefined();
|
||||
expect(out.messages[2]).toMatchObject({ role: 'assistant' });
|
||||
expect((out.messages[2] as { providerOptions?: unknown }).providerOptions).toBeUndefined();
|
||||
const last = out.messages[3] as { content: Array<{ providerOptions?: unknown }> };
|
||||
expect(last.content[0].providerOptions).toEqual({
|
||||
anthropic: { cacheControl: { type: 'ephemeral', ttl: '5m' } },
|
||||
});
|
||||
});
|
||||
|
||||
it('clamps every TTL to 5m for Vertex when vertexFallbackTo5m is enabled', () => {
|
||||
const provider = createKtxLlmProvider({
|
||||
backend: 'vertex',
|
||||
|
|
|
|||
|
|
@ -86,13 +86,8 @@ export class KtxMessageBuilder {
|
|||
}
|
||||
|
||||
if (input.messages) {
|
||||
// Only mark a history breakpoint when prior turns exist. A single-message call
|
||||
// is the current user turn — marking it writes a cache entry that can't be
|
||||
// reused on the next (different-content) call, costing tokens for nothing.
|
||||
const shouldMarkHistory =
|
||||
cachingActive && this.cacheHistoryEnabled() && input.messages.length > 1;
|
||||
messages.push(
|
||||
...(shouldMarkHistory
|
||||
...(cachingActive && this.cacheHistoryEnabled()
|
||||
? this.markLastHistoryMessage(input.messages, ttls.historyTtl, input.model)
|
||||
: input.messages),
|
||||
);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue