mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-25 08:48:08 +02:00
fix(llm): wire prompt caching through all Anthropic call sites
- page-triage classifier + light-extraction now put the static skill
prompt in `system:` so the per-document caches hit instead of
re-sending boilerplate in the user message every call.
- Description generation builders return `{ system, user }` with
instruction text + word limit moved into the cacheable system.
- Relationship-LLM proposal framing moved to `system:`.
- `KtxMessageBuilder.wrapSimple` skips the history breakpoint for
single-message calls (cache write that could never be reused).
- Gateway backend now sets `anthropic-beta: extended-cache-ttl-2025-04-11`
so 1h TTLs don't silently downgrade to 5m on Gateway routes.
This commit is contained in:
parent
52dd89481c
commit
07eaa6d254
13 changed files with 220 additions and 143 deletions
|
|
@ -81,6 +81,50 @@ describe('KtxMessageBuilder.build', () => {
|
|||
expect((out.tools.z as { providerOptions?: unknown }).providerOptions).toBeUndefined();
|
||||
});
|
||||
|
||||
it('wrapSimple does not mark a single user message with a cache breakpoint', () => {
|
||||
const builder = makeBuilder();
|
||||
|
||||
const out = builder.wrapSimple({
|
||||
system: 'SYS',
|
||||
messages: [{ role: 'user', content: 'one-shot prompt' }],
|
||||
tools: {},
|
||||
model: 'anthropic/claude-sonnet-4-6',
|
||||
});
|
||||
|
||||
expect(out.messages).toHaveLength(2);
|
||||
expect(out.messages[0]).toMatchObject({
|
||||
role: 'system',
|
||||
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } },
|
||||
});
|
||||
expect(out.messages[1]).toMatchObject({ role: 'user', content: 'one-shot prompt' });
|
||||
expect((out.messages[1] as { providerOptions?: unknown }).providerOptions).toBeUndefined();
|
||||
});
|
||||
|
||||
it('wrapSimple still marks the last history message when there are multiple messages', () => {
|
||||
const builder = makeBuilder();
|
||||
|
||||
const out = builder.wrapSimple({
|
||||
system: 'SYS',
|
||||
messages: [
|
||||
{ role: 'user', content: 'turn 1' },
|
||||
{ role: 'assistant', content: 'reply 1' },
|
||||
{ role: 'user', content: 'turn 2' },
|
||||
],
|
||||
tools: {},
|
||||
model: 'anthropic/claude-sonnet-4-6',
|
||||
});
|
||||
|
||||
expect(out.messages).toHaveLength(4);
|
||||
expect(out.messages[1]).toMatchObject({ role: 'user' });
|
||||
expect((out.messages[1] as { providerOptions?: unknown }).providerOptions).toBeUndefined();
|
||||
expect(out.messages[2]).toMatchObject({ role: 'assistant' });
|
||||
expect((out.messages[2] as { providerOptions?: unknown }).providerOptions).toBeUndefined();
|
||||
const last = out.messages[3] as { content: Array<{ providerOptions?: unknown }> };
|
||||
expect(last.content[0].providerOptions).toEqual({
|
||||
anthropic: { cacheControl: { type: 'ephemeral', ttl: '5m' } },
|
||||
});
|
||||
});
|
||||
|
||||
it('clamps every TTL to 5m for Vertex when vertexFallbackTo5m is enabled', () => {
|
||||
const provider = createKtxLlmProvider({
|
||||
backend: 'vertex',
|
||||
|
|
|
|||
|
|
@ -86,8 +86,13 @@ export class KtxMessageBuilder {
|
|||
}
|
||||
|
||||
if (input.messages) {
|
||||
// Only mark a history breakpoint when prior turns exist. A single-message call
|
||||
// is the current user turn — marking it writes a cache entry that can't be
|
||||
// reused on the next (different-content) call, costing tokens for nothing.
|
||||
const shouldMarkHistory =
|
||||
cachingActive && this.cacheHistoryEnabled() && input.messages.length > 1;
|
||||
messages.push(
|
||||
...(cachingActive && this.cacheHistoryEnabled()
|
||||
...(shouldMarkHistory
|
||||
? this.markLastHistoryMessage(input.messages, ttls.historyTtl, input.model)
|
||||
: input.messages),
|
||||
);
|
||||
|
|
|
|||
|
|
@ -199,6 +199,9 @@ describe('createKtxLlmProvider', () => {
|
|||
expect(createGateway).toHaveBeenCalledWith({
|
||||
apiKey: 'gateway-key', // pragma: allowlist secret
|
||||
baseURL: 'https://gateway.test/v1',
|
||||
headers: {
|
||||
'anthropic-beta': 'interleaved-thinking-2025-05-14,extended-cache-ttl-2025-04-11',
|
||||
},
|
||||
});
|
||||
expect(gateway).toHaveBeenCalledWith('anthropic/claude-sonnet-4-6');
|
||||
});
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ const DEFAULT_PROMPT_CACHING: KtxPromptCachingConfig = {
|
|||
vertexFallbackTo5m: false,
|
||||
};
|
||||
|
||||
const DIRECT_ANTHROPIC_BETA_HEADER = 'interleaved-thinking-2025-05-14,extended-cache-ttl-2025-04-11';
|
||||
const ANTHROPIC_BETA_HEADER = 'interleaved-thinking-2025-05-14,extended-cache-ttl-2025-04-11';
|
||||
|
||||
function resolvePromptCaching(config: KtxLlmConfig): KtxPromptCachingConfig {
|
||||
return { ...DEFAULT_PROMPT_CACHING, ...config.promptCaching };
|
||||
|
|
@ -158,7 +158,7 @@ class DefaultKtxLlmProvider implements KtxLlmProvider {
|
|||
...(config.anthropic?.apiKey ? { apiKey: config.anthropic.apiKey } : {}),
|
||||
...(config.anthropic?.baseURL ? { baseURL: config.anthropic.baseURL } : {}),
|
||||
headers: {
|
||||
'anthropic-beta': DIRECT_ANTHROPIC_BETA_HEADER,
|
||||
'anthropic-beta': ANTHROPIC_BETA_HEADER,
|
||||
},
|
||||
});
|
||||
return (modelId) => anthropic(modelId);
|
||||
|
|
@ -178,6 +178,9 @@ class DefaultKtxLlmProvider implements KtxLlmProvider {
|
|||
const gateway = (deps.createGateway ?? createGateway)({
|
||||
...(config.gateway?.apiKey ? { apiKey: config.gateway.apiKey } : {}),
|
||||
...(config.gateway?.baseURL ? { baseURL: config.gateway.baseURL } : {}),
|
||||
headers: {
|
||||
'anthropic-beta': ANTHROPIC_BETA_HEADER,
|
||||
},
|
||||
});
|
||||
return (modelId) => gateway(modelId);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue