fix(llm): wire prompt caching through all Anthropic call sites

- page-triage classifier + light-extraction now put the static skill
  prompt in `system:` so the per-document caches hit instead of
  re-sending boilerplate in the user message every call.
- Description generation builders return `{ system, user }` with
  instruction text + word limit moved into the cacheable system.
- Relationship-LLM proposal framing moved to `system:`.
- `KtxMessageBuilder.wrapSimple` skips the history breakpoint for
  single-message calls (cache write that could never be reused).
- Gateway backend now sets `anthropic-beta: extended-cache-ttl-2025-04-11`
  so 1h TTLs don't silently downgrade to 5m on Gateway routes.
This commit is contained in:
Andrey Avtomonov 2026-05-14 15:16:46 +02:00
parent 52dd89481c
commit 07eaa6d254
13 changed files with 220 additions and 143 deletions

View file

@ -81,6 +81,50 @@ describe('KtxMessageBuilder.build', () => {
expect((out.tools.z as { providerOptions?: unknown }).providerOptions).toBeUndefined();
});
it('wrapSimple does not mark a single user message with a cache breakpoint', () => {
const builder = makeBuilder();
const out = builder.wrapSimple({
system: 'SYS',
messages: [{ role: 'user', content: 'one-shot prompt' }],
tools: {},
model: 'anthropic/claude-sonnet-4-6',
});
expect(out.messages).toHaveLength(2);
expect(out.messages[0]).toMatchObject({
role: 'system',
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } },
});
expect(out.messages[1]).toMatchObject({ role: 'user', content: 'one-shot prompt' });
expect((out.messages[1] as { providerOptions?: unknown }).providerOptions).toBeUndefined();
});
it('wrapSimple still marks the last history message when there are multiple messages', () => {
const builder = makeBuilder();
const out = builder.wrapSimple({
system: 'SYS',
messages: [
{ role: 'user', content: 'turn 1' },
{ role: 'assistant', content: 'reply 1' },
{ role: 'user', content: 'turn 2' },
],
tools: {},
model: 'anthropic/claude-sonnet-4-6',
});
expect(out.messages).toHaveLength(4);
expect(out.messages[1]).toMatchObject({ role: 'user' });
expect((out.messages[1] as { providerOptions?: unknown }).providerOptions).toBeUndefined();
expect(out.messages[2]).toMatchObject({ role: 'assistant' });
expect((out.messages[2] as { providerOptions?: unknown }).providerOptions).toBeUndefined();
const last = out.messages[3] as { content: Array<{ providerOptions?: unknown }> };
expect(last.content[0].providerOptions).toEqual({
anthropic: { cacheControl: { type: 'ephemeral', ttl: '5m' } },
});
});
it('clamps every TTL to 5m for Vertex when vertexFallbackTo5m is enabled', () => {
const provider = createKtxLlmProvider({
backend: 'vertex',

View file

@ -86,8 +86,13 @@ export class KtxMessageBuilder {
}
if (input.messages) {
// Only mark a history breakpoint when prior turns exist. A single-message call
// is the current user turn — marking it writes a cache entry that can't be
// reused on the next (different-content) call, costing tokens for nothing.
const shouldMarkHistory =
cachingActive && this.cacheHistoryEnabled() && input.messages.length > 1;
messages.push(
...(cachingActive && this.cacheHistoryEnabled()
...(shouldMarkHistory
? this.markLastHistoryMessage(input.messages, ttls.historyTtl, input.model)
: input.messages),
);

View file

@ -199,6 +199,9 @@ describe('createKtxLlmProvider', () => {
expect(createGateway).toHaveBeenCalledWith({
apiKey: 'gateway-key', // pragma: allowlist secret
baseURL: 'https://gateway.test/v1',
headers: {
'anthropic-beta': 'interleaved-thinking-2025-05-14,extended-cache-ttl-2025-04-11',
},
});
expect(gateway).toHaveBeenCalledWith('anthropic/claude-sonnet-4-6');
});

View file

@ -38,7 +38,7 @@ const DEFAULT_PROMPT_CACHING: KtxPromptCachingConfig = {
vertexFallbackTo5m: false,
};
const DIRECT_ANTHROPIC_BETA_HEADER = 'interleaved-thinking-2025-05-14,extended-cache-ttl-2025-04-11';
const ANTHROPIC_BETA_HEADER = 'interleaved-thinking-2025-05-14,extended-cache-ttl-2025-04-11';
function resolvePromptCaching(config: KtxLlmConfig): KtxPromptCachingConfig {
return { ...DEFAULT_PROMPT_CACHING, ...config.promptCaching };
@ -158,7 +158,7 @@ class DefaultKtxLlmProvider implements KtxLlmProvider {
...(config.anthropic?.apiKey ? { apiKey: config.anthropic.apiKey } : {}),
...(config.anthropic?.baseURL ? { baseURL: config.anthropic.baseURL } : {}),
headers: {
'anthropic-beta': DIRECT_ANTHROPIC_BETA_HEADER,
'anthropic-beta': ANTHROPIC_BETA_HEADER,
},
});
return (modelId) => anthropic(modelId);
@ -178,6 +178,9 @@ class DefaultKtxLlmProvider implements KtxLlmProvider {
const gateway = (deps.createGateway ?? createGateway)({
...(config.gateway?.apiKey ? { apiKey: config.gateway.apiKey } : {}),
...(config.gateway?.baseURL ? { baseURL: config.gateway.baseURL } : {}),
headers: {
'anthropic-beta': ANTHROPIC_BETA_HEADER,
},
});
return (modelId) => gateway(modelId);
}