fix(llm): wire prompt caching through all Anthropic call sites (#90)

* fix(llm): wire prompt caching through all Anthropic call sites

- page-triage classifier + light-extraction now put the static skill
  prompt in `system:` so the per-document caches hit instead of
  re-sending boilerplate in the user message every call.
- Description generation builders return `{ system, user }` with
  instruction text + word limit moved into the cacheable system.
- Relationship-LLM proposal framing moved to `system:`.
- `KtxMessageBuilder.wrapSimple` skips the history breakpoint for
  single-message calls (cache write that could never be reused).
- Gateway backend now sets `anthropic-beta: extended-cache-ttl-2025-04-11`
  so 1h TTLs don't silently downgrade to 5m on Gateway routes.

* fix(llm): keep wrapSimple history breakpoint so multi-step agent loops cache

Reverts the wrapSimple `messages.length > 1` guard from the prior commit.
agent-runner uses wrapSimple with a single user message, but generateText
runs a multi-step tool loop inside it — the cache marker on the first user
message is reused by every subsequent step, so it isn't waste.
The release validator (scripts/validate-llm-debug-jsonl.mjs) also requires
a `message-part` marker target in captured debug JSONL.
This commit is contained in:
Andrey Avtomonov 2026-05-14 15:36:27 +02:00 committed by GitHub
parent 77cce79237
commit 49f1e2720e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 170 additions and 142 deletions

View file

@ -117,7 +117,6 @@ export type {
KtxColumnTypeMapping,
} from './scan/index.js';
export {
appendKtxWordLimitInstruction,
buildKtxColumnDescriptionPrompt,
buildKtxColumnEmbeddingText,
buildKtxDataSourceDescriptionPrompt,

View file

@ -227,10 +227,16 @@ describe('PageTriageService', () => {
});
generateTextMock
.mockImplementationOnce((args: any) => {
const prompt = args.messages[0].content as string;
expect(prompt).toContain('Reusable templates and scripts are durable knowledge regardless of subject matter.');
expect(prompt).toContain('Date-titled standups are still skip; named templates and scripts are not.');
expect(prompt).toContain('Cold Call Script');
const systemMessage = args.messages.find((m: { role: string }) => m.role === 'system');
const userMessage = args.messages.find((m: { role: string }) => m.role === 'user');
const systemText = systemMessage.content as string;
const userText = userMessage.content as string;
expect(systemText).toContain(
'Reusable templates and scripts are durable knowledge regardless of subject matter.',
);
expect(systemText).toContain('Date-titled standups are still skip; named templates and scripts are not.');
expect(userText).toContain('Cold Call Script');
expect(userText).not.toContain('Reusable templates and scripts are durable knowledge');
return { text: JSON.stringify({ lane: 'light', reason: 'reusable sales script' }) } as any;
})
.mockResolvedValueOnce({

View file

@ -172,10 +172,12 @@ export class PageTriageService {
try {
const signals = await this.getSignals(args, document, warnings);
const classifierPrompt = await this.buildClassifierPrompt(document, signals);
const classifierSystem = await this.buildClassifierSystem();
const classifierUser = this.buildClassifierUser(document, signals);
const modelText = await this.callModel({
operationName: 'page-triage',
prompt: classifierPrompt,
system: classifierSystem,
prompt: classifierUser,
sourceKey: args.sourceKey,
jobId: args.jobId,
unitKey: document.markdownRawPath,
@ -242,10 +244,12 @@ export class PageTriageService {
throw new Error('no indexed chunks available for light extraction');
}
const prompt = await this.buildLightExtractionPrompt(document, chunks);
const system = await this.buildLightExtractionSystem();
const user = this.buildLightExtractionUser(document, chunks);
const text = await this.callModel({
operationName: 'light-extraction',
prompt,
system,
prompt: user,
sourceKey: args.sourceKey,
jobId: args.jobId,
unitKey: document.markdownRawPath,
@ -329,6 +333,7 @@ export class PageTriageService {
private async callModel(params: {
operationName: 'page-triage' | 'light-extraction';
system: string;
prompt: string;
sourceKey: string;
jobId: string;
@ -336,6 +341,7 @@ export class PageTriageService {
}): Promise<string> {
const model = this.deps.llmProvider.getModel('triage');
const built = new KtxMessageBuilder(this.deps.llmProvider).wrapSimple({
system: params.system,
messages: [{ role: 'user', content: params.prompt }],
tools: {},
model,
@ -349,13 +355,12 @@ export class PageTriageService {
return result.text;
}
private async buildClassifierPrompt(
document: StagedTriageDocument,
signals: TriageSignals | undefined,
): Promise<string> {
const base = await this.deps.promptService.loadPrompt('skills/page_triage_classifier');
private async buildClassifierSystem(): Promise<string> {
return this.deps.promptService.loadPrompt('skills/page_triage_classifier');
}
private buildClassifierUser(document: StagedTriageDocument, signals: TriageSignals | undefined): string {
return [
base,
'<page>',
`externalId: ${document.externalId}`,
`title: ${document.title}`,
@ -371,14 +376,13 @@ export class PageTriageService {
].join('\n');
}
private async buildLightExtractionPrompt(
document: StagedTriageDocument,
chunks: PageTriageEvidenceChunk[],
): Promise<string> {
private async buildLightExtractionSystem(): Promise<string> {
const base = await this.deps.promptService.loadPrompt('skills/light_extraction');
return `${base}\n\nMaximum candidates: ${this.deps.settings.lightExtractionMaxCandidates}`;
}
private buildLightExtractionUser(document: StagedTriageDocument, chunks: PageTriageEvidenceChunk[]): string {
return [
base,
`Maximum candidates: ${this.deps.settings.lightExtractionMaxCandidates}`,
'<page>',
`externalId: ${document.externalId}`,
`title: ${document.title}`,

View file

@ -58,7 +58,6 @@ describe('@ktx/context package exports', () => {
expect(scan.writeLocalScanEnrichmentArtifacts).toBeTypeOf('function');
expect(scan.readLocalScanStructuralSnapshot).toBeTypeOf('function');
expect(scan.writeLocalScanManifestShards).toBeTypeOf('function');
expect(scan.appendKtxWordLimitInstruction).toBeTypeOf('function');
expect(scan.buildKtxColumnDescriptionPrompt).toBeTypeOf('function');
expect(scan.buildKtxTableDescriptionPrompt).toBeTypeOf('function');
expect(scan.buildKtxDataSourceDescriptionPrompt).toBeTypeOf('function');

View file

@ -104,24 +104,27 @@ function createConnector(): KtxScanConnector {
describe('KTX description prompt builders', () => {
it('builds column prompts with sample values, source descriptions, and nested BigQuery guidance', () => {
const prompt = buildKtxColumnDescriptionPrompt({
const { system, user } = buildKtxColumnDescriptionPrompt({
columnName: 'payload',
columnValues: [{ nested: true }, '[1,2]'],
tableContext: 'Table: events | Columns: payload | Data source: BIGQUERY',
dataSourceType: 'BIGQUERY',
supportsNestedAnalysis: true,
rawDescriptions: { db: 'Raw event payload', ai: 'Old AI text', user: 'User text' },
maxWords: 12,
});
expect(prompt).toContain(
expect(user).toContain(
'<table_context> Table: events | Columns: payload | Data source: BIGQUERY </table_context>',
);
expect(prompt).toContain('<column_name> payload </column_name>');
expect(prompt).toContain('<sample_values> [object Object], [1,2] </sample_values>');
expect(prompt).toContain('<db_documentation> Raw event payload </db_documentation>');
expect(prompt).not.toContain('Old AI text');
expect(prompt).not.toContain('User text');
expect(prompt).toContain('nested/structured data');
expect(user).toContain('<column_name> payload </column_name>');
expect(user).toContain('<sample_values> [object Object], [1,2] </sample_values>');
expect(user).toContain('<db_documentation> Raw event payload </db_documentation>');
expect(user).not.toContain('Old AI text');
expect(user).not.toContain('User text');
expect(system).toContain('nested/structured data');
expect(system).toContain('12 words or less');
expect(user).not.toContain('12 words or less');
});
it('builds table and data-source prompts from sampled rows', () => {
@ -134,21 +137,21 @@ describe('KTX description prompt builders', () => {
totalRows: 2,
};
expect(
buildKtxTableDescriptionPrompt({
tableName: 'orders',
sampleData: sample,
dataSourceType: 'POSTGRESQL',
rawDescriptions: { dbt: 'Fact table for commerce orders' },
}),
).toContain('status: paid, refunded');
const table = buildKtxTableDescriptionPrompt({
tableName: 'orders',
sampleData: sample,
dataSourceType: 'POSTGRESQL',
rawDescriptions: { dbt: 'Fact table for commerce orders' },
});
expect(table.user).toContain('status: paid, refunded');
expect(table.system).toContain('Analyze database tables');
expect(
buildKtxDataSourceDescriptionPrompt({
tableSamples: [['orders', sample]],
dataSourceType: 'POSTGRESQL',
}),
).toContain('orders (2 columns, 2 sample rows)');
const datasource = buildKtxDataSourceDescriptionPrompt({
tableSamples: [['orders', sample]],
dataSourceType: 'POSTGRESQL',
});
expect(datasource.user).toContain('orders (2 columns, 2 sample rows)');
expect(datasource.system).toContain('Analyze databases');
});
});
@ -202,9 +205,13 @@ describe('KtxDescriptionGenerator', () => {
temperature: 0.2,
messages: expect.arrayContaining([
expect.objectContaining({
role: 'user',
role: 'system',
content: expect.stringContaining('Please provide a concise description in 12 words or less.'),
}),
expect.objectContaining({
role: 'user',
content: expect.stringContaining('<column_name> status </column_name>'),
}),
]),
}),
);

View file

@ -169,20 +169,43 @@ async function runWithConcurrency<TInput, TOutput>(
return results;
}
export function appendKtxWordLimitInstruction(prompt: string, maxWords: number): string {
return `${prompt}\n\nPlease provide a concise description in ${maxWords} words or less.`;
export interface KtxDescriptionPrompt {
system: string;
user: string;
}
export function buildKtxColumnDescriptionPrompt(input: KtxColumnDescriptionPromptInput): string {
function wordLimitLine(maxWords: number): string {
return `Please provide a concise description in ${maxWords} words or less.`;
}
export function buildKtxColumnDescriptionPrompt(
input: KtxColumnDescriptionPromptInput & { maxWords?: number },
): KtxDescriptionPrompt {
const sampleValues = input.columnValues.slice(0, 5);
const valuesStr = sampleValues
.filter((value) => value !== null && value !== undefined)
.map((value) => String(value))
.join(', ');
let prompt = `Analyze this database column and provide a concise description:
const systemParts: string[] = [
`Analyze database columns and provide a concise description.
<table_context> ${input.tableContext} </table_context>
Provide a brief description of what the column contains without repeating the column name.
Focus on the data's meaning and business purpose. Start directly with the content description.
Example:
"first names of individuals, likely employees or contacts" instead of "The column contains first names..."
"Job titles or roles of individuals..." instead of "This column contains job titles..."`,
];
if (input.dataSourceType === 'BIGQUERY' && input.supportsNestedAnalysis) {
systemParts.push(
'If the sampled values indicate nested/structured data (JSON, STRUCT, or ARRAY), describe its general business purpose and data organization.',
);
}
if (input.maxWords !== undefined) {
systemParts.push(wordLimitLine(input.maxWords));
}
let user = `<table_context> ${input.tableContext} </table_context>
<column_name> ${input.columnName} </column_name>
@ -191,37 +214,20 @@ export function buildKtxColumnDescriptionPrompt(input: KtxColumnDescriptionPromp
const sources = descriptionSources(input.rawDescriptions);
if (sources.length > 0) {
prompt += '\nExisting descriptions from other sources:\n';
user += '\nExisting descriptions from other sources:\n';
for (const [source, text] of sources) {
prompt += `<${source}_documentation> ${text} </${source}_documentation>\n`;
user += `<${source}_documentation> ${text} </${source}_documentation>\n`;
}
prompt +=
user +=
'\nSynthesize a description that captures the most important information from all sources. Prioritize the sources as authoritative context.\n';
}
prompt += `
Provide a brief description of what this column contains without repeating the column name.
Focus on the data's meaning and business purpose. Start directly with the content description.
Example:
"first names of individuals, likely employees or contacts" instead of "The column contains first names..."
"Job titles or roles of individuals..." instead of "This column contains job titles..."
`;
if (input.dataSourceType === 'BIGQUERY' && input.supportsNestedAnalysis) {
const hasNestedData = sampleValues.some((value) => {
const text = String(value);
return text.includes('nested') || text.includes('{') || text.includes('[');
});
if (hasNestedData) {
prompt +=
'\nNote: This column contains nested/structured data (JSON, STRUCT, or ARRAY) - describe its general business purpose and data organization.';
}
}
return prompt.trim();
return { system: systemParts.join('\n\n'), user: user.trim() };
}
export function buildKtxTableDescriptionPrompt(input: KtxTableDescriptionPromptInput): string {
export function buildKtxTableDescriptionPrompt(
input: KtxTableDescriptionPromptInput & { maxWords?: number },
): KtxDescriptionPrompt {
const columnInfo: string[] = [];
for (let index = 0; index < Math.min(input.sampleData.headers.length, 10); index += 1) {
const header = input.sampleData.headers[index];
@ -232,43 +238,45 @@ export function buildKtxTableDescriptionPrompt(input: KtxTableDescriptionPromptI
columnInfo.push(`${header}: ${sampleValues.map((value) => String(value)).join(', ')}`);
}
let prompt = `
Analyze this database table and provide a concise description:
const systemParts: string[] = [
`Analyze database tables and provide a concise description.
Table: ${input.tableName}
Columns and sample data: ${columnInfo.join(' | ')}
Total rows in sample: ${input.sampleData.rows.length}
Data source type: ${input.dataSourceType}
`;
Provide a brief description of what the table represents and its business purpose.
Do NOT list or describe individual columns or fields.
Start directly with the content description without mentioning the table name.
Focus on the data's meaning and business purpose.
Example: "Information about healthcare professionals used for workforce management" instead of "The blahblah table contains information about healthcare professionals including their names, titles..."`,
];
if (input.dataSourceType === 'BIGQUERY') {
systemParts.push(
"Note (don't include in the final answer): BigQuery tables may contain nested structures, arrays, or other complex data types.",
);
}
if (input.maxWords !== undefined) {
systemParts.push(wordLimitLine(input.maxWords));
}
let user = `Table: ${input.tableName}
Columns and sample data: ${columnInfo.join(' | ')}
Total rows in sample: ${input.sampleData.rows.length}
Data source type: ${input.dataSourceType}`;
const sources = descriptionSources(input.rawDescriptions);
if (sources.length > 0) {
prompt += '\n Existing descriptions from other sources:\n';
user += '\n\nExisting descriptions from other sources:\n';
for (const [source, text] of sources) {
prompt += ` ${source}: ${text}\n`;
user += `${source}: ${text}\n`;
}
prompt +=
'\n Synthesize a description that captures the most important information from all sources. Prioritize the sources as authoritative context.\n';
user +=
'\nSynthesize a description that captures the most important information from all sources. Prioritize the sources as authoritative context.';
}
if (input.dataSourceType === 'BIGQUERY') {
prompt +=
"\nNote (Don't include this note in the final answer.): This is a BigQuery table which may contain nested structures, arrays, or other complex data types.";
}
prompt += `
Provide a brief description of what this table represents and its business purpose.
Do NOT list or describe individual columns or fields.
Start directly with the content description without mentioning the table name.
Focus on the data's meaning and business purpose.
Example: "Information about healthcare professionals used for workforce management" instead of "The blahblah table contains information about healthcare professionals including their names, titles..."
`;
return prompt.trim();
return { system: systemParts.join('\n\n'), user: user.trim() };
}
export function buildKtxDataSourceDescriptionPrompt(input: KtxDataSourceDescriptionPromptInput): string {
export function buildKtxDataSourceDescriptionPrompt(
input: KtxDataSourceDescriptionPromptInput & { maxWords?: number },
): KtxDescriptionPrompt {
const tablesText = input.tableSamples
.map(
([tableName, sampleData]) =>
@ -276,29 +284,29 @@ export function buildKtxDataSourceDescriptionPrompt(input: KtxDataSourceDescript
)
.join(' | ');
let prompt = `
Analyze this database and provide a concise description:
Tables: ${tablesText}
Total tables analyzed: ${input.tableSamples.length}
Data source type: ${input.dataSourceType}
`;
const systemParts: string[] = [
`Analyze databases and provide a concise description.
Provide a direct, concise description of what the database represents and its business purpose.
Do NOT start with phrases like "This database appears to represent" or "This BigQuery dataset".
Start directly with the domain or business area description.
Focus on the overall data model and its intended use.
Example: "Healthcare-related database with a focus on patient management..." instead of "This database appears to represent a healthcare-related system..."`,
];
if (input.dataSourceType === 'BIGQUERY') {
prompt +=
"\nNote (Don't include this note in the final answer): This is a BigQuery dataset which may contain large-scale analytics data, nested structures, and complex data types.";
systemParts.push(
"Note (don't include in the final answer): BigQuery datasets may contain large-scale analytics data, nested structures, and complex data types.",
);
}
if (input.maxWords !== undefined) {
systemParts.push(wordLimitLine(input.maxWords));
}
prompt += `
const user = `Tables: ${tablesText}
Total tables analyzed: ${input.tableSamples.length}
Data source type: ${input.dataSourceType}`;
Provide a direct, concise description of what this database represents and its business purpose.
Do NOT start with phrases like "This database appears to represent" or "This BigQuery dataset".
Start directly with the domain or business area description.
Focus on the overall data model and its intended use.
Example: "Healthcare-related database with a focus on patient management..." instead of "This database appears to represent a healthcare-related system..."
`;
return prompt.trim();
return { system: systemParts.join('\n\n'), user };
}
export class KtxDescriptionGenerator {
@ -380,12 +388,9 @@ export class KtxDescriptionGenerator {
sampleData,
dataSourceType: input.dataSourceType,
rawDescriptions: input.table.rawDescriptions,
maxWords: this.settings.tableMaxWords,
});
const description = await this.generateAiDescription(
prompt,
this.settings.tableMaxWords,
'ktx-table-description',
);
const description = await this.generateAiDescription(prompt, 'ktx-table-description');
if (cacheKey && description) {
await this.cache?.set(cacheKey, description);
}
@ -445,12 +450,9 @@ export class KtxDescriptionGenerator {
const prompt = buildKtxDataSourceDescriptionPrompt({
tableSamples: accessibleSamples,
dataSourceType: input.dataSourceType,
maxWords: this.settings.dataSourceMaxWords,
});
const description = await this.generateAiDescription(
prompt,
this.settings.dataSourceMaxWords,
'ktx-data-source-description',
);
const description = await this.generateAiDescription(prompt, 'ktx-data-source-description');
if (cacheKey && description) {
await this.cache?.set(cacheKey, description);
}
@ -536,12 +538,9 @@ export class KtxDescriptionGenerator {
dataSourceType: input.dataSourceType,
supportsNestedAnalysis: input.supportsNestedAnalysis,
rawDescriptions: column.rawDescriptions,
maxWords: this.settings.columnMaxWords,
});
const description = await this.generateAiDescription(
prompt,
this.settings.columnMaxWords,
'ktx-column-description',
);
const description = await this.generateAiDescription(prompt, 'ktx-column-description');
if (cacheKey && description) {
await this.cache?.set(cacheKey, description);
@ -564,12 +563,13 @@ export class KtxDescriptionGenerator {
}
}
private async generateAiDescription(prompt: string, maxWords: number, _operationName: string): Promise<string | null> {
private async generateAiDescription(prompt: KtxDescriptionPrompt, _operationName: string): Promise<string | null> {
try {
const text = await generateKtxText({
llmProvider: this.llmProvider,
role: 'candidateExtraction',
prompt: appendKtxWordLimitInstruction(prompt, maxWords),
system: prompt.system,
prompt: prompt.user,
temperature: this.settings.temperature,
});
const description = text.trim();

View file

@ -34,7 +34,6 @@ export type {
KtxTableDescriptionPromptInput,
} from './description-generation.js';
export {
appendKtxWordLimitInstruction,
buildKtxColumnDescriptionPrompt,
buildKtxDataSourceDescriptionPrompt,
buildKtxTableDescriptionPrompt,

View file

@ -167,6 +167,10 @@ describe('relationship LLM proposals', () => {
expect(generateText).toHaveBeenCalledWith(
expect.objectContaining({
messages: expect.arrayContaining([
expect.objectContaining({
role: 'system',
content: expect.stringContaining('You are helping KTX review possible SQL relationships'),
}),
expect.objectContaining({
role: 'user',
content: expect.stringContaining('"tables"'),
@ -174,6 +178,9 @@ describe('relationship LLM proposals', () => {
]),
}),
);
const call = (generateText.mock.calls as unknown as Array<[{ messages: Array<{ role: string; content: string }> }]>)[0]?.[0];
const userMessage = call?.messages.find((m) => m.role === 'user');
expect(userMessage?.content).not.toContain('You are helping KTX review possible SQL relationships');
});
it('skips deterministic providers without calling generateText', async () => {

View file

@ -244,12 +244,12 @@ export async function proposeKtxRelationshipCandidatesWithLlm(
const settings = mergeSettings(input.settings);
const evidence = buildEvidencePacket(input.schema, input.profile, settings);
const prompt = [
const system = [
'You are helping KTX review possible SQL relationships before validation.',
'Use only the compact schema evidence. Propose likely primary keys and foreign keys for later SQL validation.',
'Return structured output only; never assume a join is accepted.',
JSON.stringify(evidence),
].join('\n\n');
].join('\n');
const prompt = JSON.stringify(evidence);
try {
const generated = await generateKtxObject<
@ -258,6 +258,7 @@ export async function proposeKtxRelationshipCandidatesWithLlm(
>({
llmProvider: input.llmProvider,
role: 'candidateExtraction',
system,
prompt,
schema: relationshipLlmProposalSchema,
generateText: input.generateText,

View file

@ -199,6 +199,9 @@ describe('createKtxLlmProvider', () => {
expect(createGateway).toHaveBeenCalledWith({
apiKey: 'gateway-key', // pragma: allowlist secret
baseURL: 'https://gateway.test/v1',
headers: {
'anthropic-beta': 'interleaved-thinking-2025-05-14,extended-cache-ttl-2025-04-11',
},
});
expect(gateway).toHaveBeenCalledWith('anthropic/claude-sonnet-4-6');
});

View file

@ -38,7 +38,7 @@ const DEFAULT_PROMPT_CACHING: KtxPromptCachingConfig = {
vertexFallbackTo5m: false,
};
const DIRECT_ANTHROPIC_BETA_HEADER = 'interleaved-thinking-2025-05-14,extended-cache-ttl-2025-04-11';
const ANTHROPIC_BETA_HEADER = 'interleaved-thinking-2025-05-14,extended-cache-ttl-2025-04-11';
function resolvePromptCaching(config: KtxLlmConfig): KtxPromptCachingConfig {
return { ...DEFAULT_PROMPT_CACHING, ...config.promptCaching };
@ -158,7 +158,7 @@ class DefaultKtxLlmProvider implements KtxLlmProvider {
...(config.anthropic?.apiKey ? { apiKey: config.anthropic.apiKey } : {}),
...(config.anthropic?.baseURL ? { baseURL: config.anthropic.baseURL } : {}),
headers: {
'anthropic-beta': DIRECT_ANTHROPIC_BETA_HEADER,
'anthropic-beta': ANTHROPIC_BETA_HEADER,
},
});
return (modelId) => anthropic(modelId);
@ -178,6 +178,9 @@ class DefaultKtxLlmProvider implements KtxLlmProvider {
const gateway = (deps.createGateway ?? createGateway)({
...(config.gateway?.apiKey ? { apiKey: config.gateway.apiKey } : {}),
...(config.gateway?.baseURL ? { baseURL: config.gateway.baseURL } : {}),
headers: {
'anthropic-beta': ANTHROPIC_BETA_HEADER,
},
});
return (modelId) => gateway(modelId);
}