refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm

This commit is contained in:
Andrey Avtomonov 2026-05-21 03:08:56 +02:00
parent 9fc715ac6a
commit 8d2a36eb2f
15 changed files with 0 additions and 0 deletions

View file

@ -0,0 +1,106 @@
import { describe, expect, it, vi } from 'vitest';
import { runKtxEmbeddingHealthCheck } from './embedding-health.js';
describe('KTX embedding health check', () => {
it('runs a one-shot OpenAI embedding check through the configured provider', async () => {
const createOpenAIClient = vi.fn(() => ({
embeddings: {
create: vi.fn().mockResolvedValue({
data: [{ index: 0, embedding: [0.1, 0.2, 0.3] }],
}),
},
}));
await expect(
runKtxEmbeddingHealthCheck(
{
backend: 'openai',
model: 'text-embedding-3-small',
dimensions: 3,
openai: { apiKey: 'sk-openai-test' }, // pragma: allowlist secret
},
{ deps: { createOpenAIClient } },
),
).resolves.toEqual({ ok: true });
expect(createOpenAIClient).toHaveBeenCalledWith({ apiKey: 'sk-openai-test', baseURL: undefined }); // pragma: allowlist secret
});
it('returns failed when the provider returns the wrong dimensions', async () => {
const createOpenAIClient = vi.fn(() => ({
embeddings: {
create: vi.fn().mockResolvedValue({
data: [{ index: 0, embedding: [0.1, 0.2] }],
}),
},
}));
await expect(
runKtxEmbeddingHealthCheck(
{
backend: 'openai',
model: 'text-embedding-3-small',
dimensions: 3,
openai: { apiKey: 'sk-openai-test' }, // pragma: allowlist secret
},
{ deps: { createOpenAIClient } },
),
).resolves.toEqual({
ok: false,
message: 'Embedding provider openai returned vector with 2 dimensions; expected 3',
});
});
it('redacts credential values from health-check failures', async () => {
const createOpenAIClient = vi.fn(() => ({
embeddings: {
create: vi.fn(async () => {
throw new Error('401 invalid api key sk-openai-secret');
}),
},
}));
await expect(
runKtxEmbeddingHealthCheck(
{
backend: 'openai',
model: 'text-embedding-3-small',
dimensions: 3,
openai: { apiKey: 'sk-openai-secret' }, // pragma: allowlist secret
},
{ deps: { createOpenAIClient } },
),
).resolves.toEqual({
ok: false,
message: '401 invalid api key [redacted]',
});
});
it('returns failed when the health check times out', async () => {
const createOpenAIClient = vi.fn(() => ({
embeddings: {
create: vi.fn(
() =>
new Promise<{ data: Array<{ index?: number; embedding: number[] }>; usage?: { total_tokens?: number } }>(
() => undefined,
),
),
},
}));
await expect(
runKtxEmbeddingHealthCheck(
{
backend: 'openai',
model: 'text-embedding-3-small',
dimensions: 3,
openai: { apiKey: 'sk-openai-test' }, // pragma: allowlist secret
},
{ timeoutMs: 1, deps: { createOpenAIClient } },
),
).resolves.toEqual({
ok: false,
message: 'Embedding health check timed out after 1ms',
});
});
});

View file

@ -0,0 +1,54 @@
import { createKtxEmbeddingProvider, type KtxEmbeddingProviderDeps } from './embedding-provider.js';
import type { KtxEmbeddingConfig } from './types.js';
export type KtxEmbeddingHealthCheckResult = { ok: true } | { ok: false; message: string };
export interface KtxEmbeddingHealthCheckOptions {
text?: string;
timeoutMs?: number;
deps?: KtxEmbeddingProviderDeps;
}
function redactHealthCheckMessage(message: string, config: KtxEmbeddingConfig): string {
const secrets = [config.openai?.apiKey].filter(
(value): value is string => typeof value === 'string' && value.length > 0,
);
return secrets.reduce((current, secret) => current.split(secret).join('[redacted]'), message);
}
async function withTimeout<T>(promise: Promise<T>, timeoutMs: number): Promise<T> {
let timeout: NodeJS.Timeout | undefined;
const timeoutPromise = new Promise<never>((_resolve, reject) => {
timeout = setTimeout(() => reject(new Error(`Embedding health check timed out after ${timeoutMs}ms`)), timeoutMs);
});
try {
return await Promise.race([promise, timeoutPromise]);
} finally {
if (timeout) {
clearTimeout(timeout);
}
}
}
export async function runKtxEmbeddingHealthCheck(
config: KtxEmbeddingConfig,
options: KtxEmbeddingHealthCheckOptions = {},
): Promise<KtxEmbeddingHealthCheckResult> {
try {
const provider = createKtxEmbeddingProvider(config, options.deps);
const embedding = await withTimeout(
provider.embed(options.text ?? 'KTX embedding health check'),
options.timeoutMs ?? 15_000,
);
if (embedding.length !== config.dimensions) {
return {
ok: false,
message: `Embedding provider ${config.backend} returned vector with ${embedding.length} dimensions; expected ${config.dimensions}`,
};
}
return { ok: true };
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
return { ok: false, message: redactHealthCheckMessage(message, config) };
}
}

View file

@ -0,0 +1,137 @@
import { describe, expect, it, vi } from 'vitest';
import { createKtxEmbeddingProvider } from './embedding-provider.js';
import type { KtxEmbeddingConfig } from './types.js';
describe('createKtxEmbeddingProvider', () => {
it('rejects deterministic embeddings', () => {
const config = JSON.parse(
JSON.stringify({
backend: 'deterministic',
model: 'sha256',
dimensions: 6,
}),
) as KtxEmbeddingConfig;
expect(() => createKtxEmbeddingProvider(config)).toThrow('Unsupported KTX embedding backend: deterministic');
});
it('rejects gateway embeddings', () => {
const config = JSON.parse(
JSON.stringify({
backend: 'gateway',
model: 'provider/text-embedding',
dimensions: 2,
gateway: { apiKey: 'gateway-key' }, // pragma: allowlist secret
}),
) as KtxEmbeddingConfig;
expect(() => createKtxEmbeddingProvider(config)).toThrow('Unsupported KTX embedding backend: gateway');
});
it('uses OpenAI embeddings with configured dimensions', async () => {
const createOpenAIClient = vi.fn(() => ({
embeddings: {
create: vi.fn().mockResolvedValue({
data: [{ index: 0, embedding: [0.1, 0.2] }],
usage: { total_tokens: 7 },
}),
},
}));
const provider = createKtxEmbeddingProvider(
{
backend: 'openai',
model: 'text-embedding-3-small',
dimensions: 2,
openai: { apiKey: 'openai-key', baseURL: 'https://openai.test/v1' }, // pragma: allowlist secret
},
{ createOpenAIClient },
);
await expect(provider.embed('hello')).resolves.toEqual([0.1, 0.2]);
expect(createOpenAIClient).toHaveBeenCalledWith({
apiKey: 'openai-key', // pragma: allowlist secret
baseURL: 'https://openai.test/v1',
});
});
it('supports sentence-transformers pathPrefix defaults and explicit empty prefix', async () => {
const fetch = vi
.fn()
.mockResolvedValueOnce(new Response(JSON.stringify({ embedding: [0.1, 0.2] }), { status: 200 }))
.mockResolvedValueOnce(new Response(JSON.stringify({ embedding: [0.3, 0.4] }), { status: 200 }));
const provider = createKtxEmbeddingProvider(
{
backend: 'sentence-transformers',
model: 'all-MiniLM-L6-v2',
dimensions: 2,
sentenceTransformers: { baseURL: 'https://python.test/' },
},
{ fetch },
);
await expect(provider.embed('hello')).resolves.toEqual([0.3, 0.4]);
expect(fetch).toHaveBeenNthCalledWith(
1,
'https://python.test/api/embeddings/compute',
expect.objectContaining({ method: 'POST' }),
);
expect(fetch).toHaveBeenNthCalledWith(
2,
'https://python.test/api/embeddings/compute',
expect.objectContaining({ method: 'POST' }),
);
const daemonFetch = vi
.fn()
.mockResolvedValueOnce(new Response(JSON.stringify({ embedding: [0.1, 0.2] }), { status: 200 }))
.mockResolvedValueOnce(new Response(JSON.stringify({ embeddings: [[0.5, 0.6]] }), { status: 200 }));
const daemonProvider = createKtxEmbeddingProvider(
{
backend: 'sentence-transformers',
model: 'all-MiniLM-L6-v2',
dimensions: 2,
sentenceTransformers: { baseURL: 'https://daemon.test/base/', pathPrefix: '' },
},
{ fetch: daemonFetch },
);
await expect(daemonProvider.embedMany(['hello'])).resolves.toEqual([[0.5, 0.6]]);
expect(daemonFetch).toHaveBeenNthCalledWith(
1,
'https://daemon.test/base/embeddings/compute',
expect.objectContaining({ method: 'POST' }),
);
expect(daemonFetch).toHaveBeenNthCalledWith(
2,
'https://daemon.test/base/embeddings/compute-bulk',
expect.objectContaining({ method: 'POST' }),
);
});
it('reports local HTTP daemon failures without a ktx-daemon spawn fallback cascade', async () => {
const fetch = vi
.fn()
.mockResolvedValue(
new Response('Embedding compute failed: httpx.InvalidURL: Invalid port', { status: 500 }),
);
const provider = createKtxEmbeddingProvider(
{
backend: 'sentence-transformers',
model: 'all-MiniLM-L6-v2',
dimensions: 2,
sentenceTransformers: { baseURL: 'http://127.0.0.1:8765', pathPrefix: '' },
},
{ fetch },
);
await expect(provider.embed('hello')).rejects.toThrow(
'Embedding provider sentence-transformers request failed with HTTP 500: Embedding compute failed: httpx.InvalidURL: Invalid port',
);
await expect(provider.embed('hello')).rejects.not.toThrow('ktx-daemon fallback failed');
expect(fetch).toHaveBeenCalledTimes(1);
});
});

View file

@ -0,0 +1,212 @@
import OpenAI from 'openai';
import type { KtxEmbeddingConfig, KtxEmbeddingProvider } from './types.js';
type FetchFn = typeof fetch;
export interface KtxEmbeddingProviderDeps {
createOpenAIClient?: (options: { apiKey?: string; baseURL?: string }) => {
embeddings: {
create(input: {
model: string;
input: string | string[];
dimensions: number;
encoding_format: 'float';
}): Promise<{ data: Array<{ index?: number; embedding: number[] }>; usage?: { total_tokens?: number } }>;
};
};
fetch?: FetchFn;
}
const DEFAULT_BATCH_SIZE = 100;
const HTTP_ERROR_BODY_MAX_LENGTH = 2_000;
function assertNonEmptyText(text: string): void {
if (!text.trim()) {
throw new Error('Embedding text must be non-empty');
}
}
function assertBatchSize(texts: string[], maxBatchSize: number): void {
if (texts.length === 0) {
throw new Error('Embedding text batch must not be empty');
}
if (texts.length > maxBatchSize) {
throw new Error(`Embedding batch size ${texts.length} exceeds maximum ${maxBatchSize}`);
}
for (const text of texts) {
assertNonEmptyText(text);
}
}
function assertVectorDimensions(vector: number[], expected: number, backend: string): number[] {
if (!Array.isArray(vector) || vector.some((item) => typeof item !== 'number')) {
throw new Error(`Embedding provider ${backend} returned a malformed vector`);
}
if (vector.length !== expected) {
throw new Error(
`Embedding provider ${backend} returned vector with ${vector.length} dimensions; expected ${expected}`,
);
}
return vector;
}
function joinUrl(baseURL: string, pathPrefix: string, path: string): string {
const base = baseURL.replace(/\/+$/, '');
const prefix = pathPrefix.replace(/^\/+|\/+$/g, '');
const suffix = path.replace(/^\/+/, '');
return prefix ? `${base}/${prefix}/${suffix}` : `${base}/${suffix}`;
}
function boundedHttpBody(text: string): string {
const normalized = text.trim();
if (normalized.length <= HTTP_ERROR_BODY_MAX_LENGTH) {
return normalized;
}
return `${normalized.slice(0, HTTP_ERROR_BODY_MAX_LENGTH)}...`;
}
class OpenAIEmbeddingProvider implements KtxEmbeddingProvider {
readonly dimensions: number;
readonly maxBatchSize: number;
private readonly client: ReturnType<NonNullable<KtxEmbeddingProviderDeps['createOpenAIClient']>>;
constructor(
private readonly config: KtxEmbeddingConfig,
deps: KtxEmbeddingProviderDeps,
) {
this.dimensions = config.dimensions;
this.maxBatchSize = config.batchSize ?? DEFAULT_BATCH_SIZE;
if (!config.openai?.apiKey) {
throw new Error('openai.apiKey is required when KTX embedding backend is openai');
}
this.client = deps.createOpenAIClient
? deps.createOpenAIClient({ apiKey: config.openai.apiKey, baseURL: config.openai.baseURL })
: new OpenAI({
apiKey: config.openai.apiKey,
...(config.openai.baseURL ? { baseURL: config.openai.baseURL } : {}),
});
}
async embed(text: string): Promise<number[]> {
const [embedding] = await this.embedMany([text]);
if (!embedding) {
throw new Error('Embedding provider openai returned no embedding');
}
return embedding;
}
async embedMany(texts: string[]): Promise<number[][]> {
assertBatchSize(texts, this.maxBatchSize);
const response = await this.client.embeddings.create({
model: this.config.model,
input: texts.length === 1 ? texts[0] : texts,
dimensions: this.dimensions,
encoding_format: 'float',
});
const sorted = [...response.data].sort((a, b) => (a.index ?? 0) - (b.index ?? 0));
const embeddings = sorted.map((item) => item.embedding);
if (embeddings.length !== texts.length) {
throw new Error(`Embedding provider openai returned ${embeddings.length} embeddings for ${texts.length} texts`);
}
return embeddings.map((embedding) => assertVectorDimensions(embedding, this.dimensions, 'openai'));
}
}
class SentenceTransformersEmbeddingProvider implements KtxEmbeddingProvider {
readonly dimensions: number;
readonly maxBatchSize: number;
private readonly fetch: FetchFn;
private readonly baseURL: string;
private readonly pathPrefix: string;
private readonly startupProbe: Promise<void>;
constructor(config: KtxEmbeddingConfig, deps: KtxEmbeddingProviderDeps) {
if (!config.sentenceTransformers?.baseURL) {
throw new Error('sentenceTransformers.baseURL is required when KTX embedding backend is sentence-transformers');
}
this.dimensions = config.dimensions;
this.maxBatchSize = config.batchSize ?? DEFAULT_BATCH_SIZE;
this.fetch = deps.fetch ?? fetch;
this.baseURL = config.sentenceTransformers.baseURL;
this.pathPrefix = config.sentenceTransformers.pathPrefix ?? '/api';
this.startupProbe = this.requestSingle('__ktx_embedding_probe__').then((embedding) => {
assertVectorDimensions(embedding, this.dimensions, 'sentence-transformers');
});
}
async embed(text: string): Promise<number[]> {
assertNonEmptyText(text);
await this.startupProbe;
return assertVectorDimensions(await this.requestSingle(text), this.dimensions, 'sentence-transformers');
}
async embedMany(texts: string[]): Promise<number[][]> {
assertBatchSize(texts, this.maxBatchSize);
await this.startupProbe;
const response = await this.requestJson('/embeddings/compute-bulk', { texts });
if (
!response ||
typeof response !== 'object' ||
!('embeddings' in response) ||
!Array.isArray(response.embeddings)
) {
throw new Error('Embedding provider sentence-transformers returned malformed bulk response');
}
if (response.embeddings.length !== texts.length) {
const count = response.embeddings.length;
throw new Error(
`Embedding provider sentence-transformers returned ${count} embeddings for ${texts.length} texts`,
);
}
return response.embeddings.map((embedding: unknown) =>
assertVectorDimensions(embedding as number[], this.dimensions, 'sentence-transformers'),
);
}
private async requestSingle(text: string): Promise<number[]> {
const response = await this.requestJson('/embeddings/compute', { text });
if (!response || typeof response !== 'object' || !('embedding' in response) || !Array.isArray(response.embedding)) {
throw new Error('Embedding provider sentence-transformers returned malformed single response');
}
return response.embedding;
}
private async requestJson(path: string, body: Record<string, unknown>): Promise<Record<string, unknown>> {
return await this.postJson(path, body);
}
private async postJson(path: string, body: Record<string, unknown>): Promise<Record<string, unknown>> {
const response = await this.fetch(joinUrl(this.baseURL, this.pathPrefix, path), {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify(body),
});
if (!response.ok) {
const bodyText = boundedHttpBody(await response.text());
throw new Error(
`Embedding provider sentence-transformers request failed with HTTP ${response.status}${
bodyText ? `: ${bodyText}` : ''
}`,
);
}
const parsed = (await response.json()) as unknown;
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
throw new Error('Embedding provider sentence-transformers returned non-object JSON');
}
return parsed as Record<string, unknown>;
}
}
export function createKtxEmbeddingProvider(
config: KtxEmbeddingConfig,
deps: KtxEmbeddingProviderDeps = {},
): KtxEmbeddingProvider {
switch (config.backend) {
case 'openai':
return new OpenAIEmbeddingProvider(config, deps);
case 'sentence-transformers':
return new SentenceTransformersEmbeddingProvider(config, deps);
default:
throw new Error(`Unsupported KTX embedding backend: ${String((config as { backend?: string }).backend)}`);
}
}

View file

@ -0,0 +1,31 @@
export { createKtxEmbeddingProvider } from './embedding-provider.js';
export { runKtxEmbeddingHealthCheck } from './embedding-health.js';
export { KtxMessageBuilder, splitKtxSystemMessages } from './message-builder.js';
export type { KtxSplitSystemMessagesResult } from './message-builder.js';
export type { KtxEmbeddingHealthCheckOptions, KtxEmbeddingHealthCheckResult } from './embedding-health.js';
export type { KtxEmbeddingProviderDeps } from './embedding-provider.js';
export type { KtxLlmHealthCheckDeps, KtxLlmHealthCheckOptions, KtxLlmHealthCheckResult } from './model-health.js';
export { runKtxLlmHealthCheck } from './model-health.js';
export {
createKtxLlmProvider,
isAnthropicProtocolModel,
modelIdFromLanguageModel,
type KtxLlmProviderFactoryDeps,
} from './model-provider.js';
export type {
KtxEmbeddingBackend,
KtxEmbeddingConfig,
KtxEmbeddingProvider,
KtxEmbeddingTokenUsageEvent,
KtxJsonValue,
KtxLlmBackend,
KtxLlmConfig,
KtxLlmProvider,
KtxModelRole,
KtxPromptCacheTtl,
KtxPromptCachingConfig,
KtxPromptParts,
KtxProviderOptions,
KtxTokenUsageEvent,
} from './types.js';
export { KTX_MODEL_ROLES } from './types.js';

View file

@ -0,0 +1,146 @@
import type { ModelMessage } from 'ai';
import { describe, expect, it } from 'vitest';
import { KtxMessageBuilder, splitKtxSystemMessages } from './message-builder.js';
import { createKtxLlmProvider } from './model-provider.js';
function makeBuilder(overrides: Parameters<typeof createKtxLlmProvider>[0]['promptCaching'] = {}) {
const provider = createKtxLlmProvider({
backend: 'gateway',
gateway: { baseURL: 'https://gateway.test' },
modelSlots: { default: 'anthropic/claude-sonnet-4-6' },
promptCaching: { enabled: true, ...overrides },
});
return new KtxMessageBuilder(provider);
}
describe('KtxMessageBuilder.build', () => {
it('caches static system, last sorted tool, and last history message', () => {
const builder = makeBuilder();
const out = builder.build({
parts: { staticSystem: 'STATIC', dynamicSystem: 'DYNAMIC' },
history: [
{ role: 'user', content: 'first' },
{ role: 'assistant', content: [{ type: 'text', text: 'reply A' }, { type: 'text', text: 'reply B' }] } as ModelMessage,
],
currentMessage: { role: 'user', content: 'now' },
tools: {
zoo: { description: 'z' },
apple: { description: 'a' },
},
model: 'anthropic/claude-sonnet-4-6',
});
expect(out.messages[0]).toMatchObject({
role: 'system',
content: 'STATIC',
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } },
});
expect(out.messages[1]).toMatchObject({ role: 'system', content: 'DYNAMIC' });
expect((out.messages[1] as { providerOptions?: unknown }).providerOptions).toBeUndefined();
expect((out.messages[3] as { content: Array<{ providerOptions?: unknown }> }).content[1].providerOptions).toEqual({
anthropic: { cacheControl: { type: 'ephemeral', ttl: '5m' } },
});
expect(Object.keys(out.tools)).toEqual(['apple', 'zoo']);
expect((out.tools.zoo as { providerOptions?: unknown }).providerOptions).toEqual({
anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } },
});
});
it('wraps leading user context onto currentMessage as a system reminder part', () => {
const builder = makeBuilder();
const out = builder.build({
parts: { staticSystem: 'STATIC', leadingUserContext: 'current_date: 2026-05-04' },
history: [],
currentMessage: { role: 'user', content: 'question' },
tools: {},
model: 'anthropic/claude-sonnet-4-6',
});
expect(out.messages[out.messages.length - 1]).toMatchObject({
role: 'user',
content: [
{ type: 'text', text: '<system-reminder>\ncurrent_date: 2026-05-04\n</system-reminder>' },
{ type: 'text', text: 'question' },
],
});
});
it('omits cache markers for non-Anthropic protocol models', () => {
const builder = makeBuilder();
const out = builder.wrapSimple({
system: 'SYS',
messages: [{ role: 'user', content: 'q' }],
tools: { z: {} },
model: 'gpt-5',
});
expect((out.messages[0] as { providerOptions?: unknown }).providerOptions).toBeUndefined();
expect((out.tools.z as { providerOptions?: unknown }).providerOptions).toBeUndefined();
});
it('clamps every TTL to 5m for Vertex when vertexFallbackTo5m is enabled', () => {
const provider = createKtxLlmProvider({
backend: 'vertex',
vertex: { project: 'ktx-test', location: 'us-east5' },
modelSlots: { default: 'claude-sonnet-4-6' },
promptCaching: {
enabled: true,
systemTtl: '1h',
toolsTtl: '1h',
historyTtl: '1h',
vertexFallbackTo5m: true,
},
});
const builder = new KtxMessageBuilder(provider);
const out = builder.build({
parts: { staticSystem: 'STATIC' },
history: [{ role: 'user', content: 'history' }],
currentMessage: { role: 'user', content: 'now' },
tools: { z: {} },
model: 'claude-sonnet-4-6',
});
expect((out.messages[0] as { providerOptions: any }).providerOptions.anthropic.cacheControl.ttl).toBe('5m');
expect((out.messages[1] as { content: Array<{ providerOptions: any }> }).content[0].providerOptions.anthropic.cacheControl.ttl).toBe(
'5m',
);
expect((out.tools.z as { providerOptions: any }).providerOptions.anthropic.cacheControl.ttl).toBe('5m');
});
});
describe('splitKtxSystemMessages', () => {
it('returns undefined system when no system messages are present', () => {
const split = splitKtxSystemMessages([
{ role: 'user', content: 'hello' },
{ role: 'assistant', content: 'hi' },
]);
expect(split.system).toBeUndefined();
expect(split.messages).toHaveLength(2);
});
it('returns a single system message object when one system message is present, preserving providerOptions', () => {
const systemMessage = {
role: 'system' as const,
content: 'You are helpful.',
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral' } } },
};
const split = splitKtxSystemMessages([systemMessage, { role: 'user', content: 'hello' }]);
expect(split.system).toBe(systemMessage);
expect(split.messages).toEqual([{ role: 'user', content: 'hello' }]);
});
it('returns an array of system messages when multiple are present, in order', () => {
const split = splitKtxSystemMessages([
{ role: 'system', content: 'cached' },
{ role: 'system', content: 'fresh' },
{ role: 'user', content: 'hello' },
]);
expect(Array.isArray(split.system)).toBe(true);
expect(split.system).toHaveLength(2);
expect(split.messages).toEqual([{ role: 'user', content: 'hello' }]);
});
});

View file

@ -0,0 +1,219 @@
import type { LanguageModel, ModelMessage, SystemModelMessage, ToolSet } from 'ai';
import { isAnthropicProtocolModel } from './model-provider.js';
import type { KtxLlmProvider, KtxPromptCacheTtl, KtxPromptParts } from './types.js';
export interface KtxSplitSystemMessagesResult {
system: SystemModelMessage | SystemModelMessage[] | undefined;
messages: ModelMessage[];
}
export function splitKtxSystemMessages(messages: readonly ModelMessage[]): KtxSplitSystemMessagesResult {
const systemMessages: SystemModelMessage[] = [];
const otherMessages: ModelMessage[] = [];
for (const message of messages) {
if (message.role === 'system') {
systemMessages.push(message);
} else {
otherMessages.push(message);
}
}
return {
system:
systemMessages.length === 0 ? undefined : systemMessages.length === 1 ? systemMessages[0] : systemMessages,
messages: otherMessages,
};
}
type ToolMap = ToolSet | Record<string, Record<string, unknown>>;
interface KtxMessageBuilderOptions {
cacheSystem?: boolean;
cacheTools?: boolean;
cacheLastHistory?: boolean;
}
interface KtxBuildInput {
parts: KtxPromptParts;
history: ModelMessage[];
currentMessage: ModelMessage;
tools: ToolMap;
model: LanguageModel | string;
}
interface KtxWrapSimpleInput {
system?: string;
messages?: ModelMessage[];
tools?: ToolMap;
model: LanguageModel | string;
}
interface KtxBuildOutput {
messages: ModelMessage[];
tools: ToolMap;
}
export class KtxMessageBuilder {
constructor(
private readonly provider: KtxLlmProvider,
private readonly options: KtxMessageBuilderOptions = {},
) {}
build(input: KtxBuildInput): KtxBuildOutput {
const cfg = this.provider.promptCachingConfig();
const cachingActive = cfg.enabled && isAnthropicProtocolModel(input.model);
const ttls = this.resolveTtls(input.model);
const messages: ModelMessage[] = [];
const systemMessage: ModelMessage & { providerOptions?: unknown } = {
role: 'system',
content: input.parts.staticSystem,
};
if (cachingActive && this.cacheSystemEnabled()) {
systemMessage.providerOptions = this.provider.cacheMarker(ttls.systemTtl, input.model);
}
messages.push(systemMessage);
if (input.parts.dynamicSystem) {
messages.push({ role: 'system', content: input.parts.dynamicSystem });
}
const historyToEmit =
cachingActive && this.cacheHistoryEnabled()
? this.markLastHistoryMessage(input.history, ttls.historyTtl, input.model)
: input.history;
messages.push(...historyToEmit);
messages.push(this.wrapLeading(input.currentMessage, input.parts.leadingUserContext));
return {
messages,
tools: this.sortAndMarkTools(input.tools, cachingActive, this.cacheToolsEnabled(), ttls.toolsTtl, input.model),
};
}
wrapSimple(input: KtxWrapSimpleInput): KtxBuildOutput {
const cfg = this.provider.promptCachingConfig();
const cachingActive = cfg.enabled && isAnthropicProtocolModel(input.model);
const ttls = this.resolveTtls(input.model);
const messages: ModelMessage[] = [];
if (input.system) {
const systemMessage: ModelMessage & { providerOptions?: unknown } = {
role: 'system',
content: input.system,
};
if (cachingActive && this.cacheSystemEnabled()) {
systemMessage.providerOptions = this.provider.cacheMarker(ttls.systemTtl, input.model);
}
messages.push(systemMessage);
}
if (input.messages) {
messages.push(
...(cachingActive && this.cacheHistoryEnabled()
? this.markLastHistoryMessage(input.messages, ttls.historyTtl, input.model)
: input.messages),
);
}
return {
messages,
tools: this.sortAndMarkTools(input.tools ?? {}, cachingActive, this.cacheToolsEnabled(), ttls.toolsTtl, input.model),
};
}
private cacheSystemEnabled(): boolean {
return this.options.cacheSystem ?? this.provider.promptCachingConfig().cacheSystem;
}
private cacheToolsEnabled(): boolean {
return this.options.cacheTools ?? this.provider.promptCachingConfig().cacheTools;
}
private cacheHistoryEnabled(): boolean {
return this.options.cacheLastHistory ?? this.provider.promptCachingConfig().cacheHistory;
}
private resolveTtls(model: LanguageModel | string): {
systemTtl: KtxPromptCacheTtl;
toolsTtl: KtxPromptCacheTtl;
historyTtl: KtxPromptCacheTtl;
} {
const cfg = this.provider.promptCachingConfig();
if (cfg.vertexFallbackTo5m && this.provider.activeBackend() === 'vertex' && isAnthropicProtocolModel(model)) {
return { systemTtl: '5m', toolsTtl: '5m', historyTtl: '5m' };
}
return { systemTtl: cfg.systemTtl, toolsTtl: cfg.toolsTtl, historyTtl: cfg.historyTtl };
}
private wrapLeading(currentMessage: ModelMessage, leadingUserContext?: string): ModelMessage {
if (!leadingUserContext) {
return currentMessage;
}
const reminderPart = {
type: 'text' as const,
text: `<system-reminder>\n${leadingUserContext}\n</system-reminder>`,
};
if (typeof currentMessage.content === 'string') {
return {
...currentMessage,
content: [reminderPart, { type: 'text' as const, text: currentMessage.content }],
} as ModelMessage;
}
if (Array.isArray(currentMessage.content)) {
return { ...currentMessage, content: [reminderPart, ...currentMessage.content] } as ModelMessage;
}
return currentMessage;
}
private markLastHistoryMessage(
history: ModelMessage[],
ttl: KtxPromptCacheTtl,
model: LanguageModel | string,
): ModelMessage[] {
if (history.length === 0) {
return history;
}
const out = [...history];
const last = out[out.length - 1];
const marker = this.provider.cacheMarker(ttl, model);
if (!marker) {
return history;
}
if (typeof last.content === 'string') {
out[out.length - 1] = {
...last,
content: [{ type: 'text', text: last.content, providerOptions: marker }],
} as ModelMessage;
return out;
}
if (Array.isArray(last.content) && last.content.length > 0) {
const parts = [...last.content];
const lastPart = parts[parts.length - 1];
parts[parts.length - 1] = Object.assign({}, lastPart, { providerOptions: marker });
out[out.length - 1] = { ...last, content: parts } as ModelMessage;
}
return out;
}
private sortAndMarkTools(
tools: ToolMap,
cachingActive: boolean,
cacheTools: boolean,
ttl: KtxPromptCacheTtl,
model: LanguageModel | string,
): ToolMap {
const keys = Object.keys(tools).sort();
const sorted: Record<string, unknown> = {};
for (const key of keys) {
sorted[key] = tools[key as keyof typeof tools];
}
if (cachingActive && cacheTools && keys.length > 0) {
const lastKey = keys[keys.length - 1];
const marker = this.provider.cacheMarker(ttl, model);
if (marker) {
sorted[lastKey] = { ...(sorted[lastKey] as Record<string, unknown>), providerOptions: marker };
}
}
return sorted as ToolMap;
}
}

View file

@ -0,0 +1,77 @@
import { wrapLanguageModel as defaultWrapLanguageModel } from 'ai';
import { describe, expect, it, vi } from 'vitest';
import { runKtxLlmHealthCheck } from './model-health.js';
const anthropicModel = { modelId: 'claude-sonnet-4-6' } as never;
describe('KTX LLM health check', () => {
it('runs a minimal non-streaming model call through the configured provider', async () => {
const generateText = vi.fn(async () => ({ text: 'ok' }));
const createAnthropic = vi.fn(() => vi.fn(() => anthropicModel));
const wrapLanguageModel = vi.fn(defaultWrapLanguageModel);
await expect(
runKtxLlmHealthCheck(
{
backend: 'anthropic',
anthropic: { apiKey: 'sk-ant-test' }, // pragma: allowlist secret
modelSlots: { default: 'claude-sonnet-4-6' },
},
{ deps: { createAnthropic, generateText, devtoolsEnabled: true, wrapLanguageModel } },
),
).resolves.toEqual({ ok: true });
expect(createAnthropic).toHaveBeenCalledWith(
expect.objectContaining({
apiKey: 'sk-ant-test', // pragma: allowlist secret
}),
);
expect(generateText).toHaveBeenCalledWith(
expect.objectContaining({
model: anthropicModel,
prompt: 'Reply with exactly: ok',
temperature: 0,
maxOutputTokens: 8,
}),
);
expect(wrapLanguageModel).not.toHaveBeenCalled();
});
it('returns a failed result without exposing secret values', async () => {
const generateText = vi.fn(async () => {
throw new Error('401 invalid x-api-key sk-ant-secret');
});
await expect(
runKtxLlmHealthCheck(
{
backend: 'anthropic',
anthropic: { apiKey: 'sk-ant-secret' }, // pragma: allowlist secret
modelSlots: { default: 'claude-sonnet-4-6' },
},
{
deps: {
createAnthropic: vi.fn(() => vi.fn(() => anthropicModel)),
generateText,
},
},
),
).resolves.toEqual({
ok: false,
message: '401 invalid x-api-key [redacted]',
});
});
it('reports claude-code as unsupported by the AI SDK health check', async () => {
const result = await runKtxLlmHealthCheck({
backend: 'claude-code',
modelSlots: { default: 'sonnet' },
promptCaching: { enabled: false },
});
expect(result).toEqual({
ok: false,
message: expect.stringContaining('claude-code is not an AI SDK LanguageModel backend'),
});
});
});

View file

@ -0,0 +1,60 @@
import { generateText } from 'ai';
import { createKtxLlmProvider, type KtxLlmProviderFactoryDeps } from './model-provider.js';
import type { KtxLlmConfig } from './types.js';
export type KtxLlmHealthCheckResult = { ok: true } | { ok: false; message: string };
export interface KtxLlmHealthCheckDeps extends Omit<KtxLlmProviderFactoryDeps, 'generateText'> {
generateText?: (options: Parameters<typeof generateText>[0]) => Promise<unknown>;
}
export interface KtxLlmHealthCheckOptions {
prompt?: string;
timeoutMs?: number;
deps?: KtxLlmHealthCheckDeps;
}
function redactHealthCheckMessage(message: string, config: KtxLlmConfig): string {
const secrets = [config.anthropic?.apiKey, config.gateway?.apiKey].filter(
(value): value is string => typeof value === 'string' && value.length > 0,
);
return secrets.reduce((current, secret) => current.split(secret).join('[redacted]'), message);
}
async function withTimeout<T>(promise: Promise<T>, timeoutMs: number): Promise<T> {
let timeout: NodeJS.Timeout | undefined;
const timeoutPromise = new Promise<never>((_resolve, reject) => {
timeout = setTimeout(() => reject(new Error(`LLM health check timed out after ${timeoutMs}ms`)), timeoutMs);
});
try {
return await Promise.race([promise, timeoutPromise]);
} finally {
if (timeout) {
clearTimeout(timeout);
}
}
}
export async function runKtxLlmHealthCheck(
config: KtxLlmConfig,
options: KtxLlmHealthCheckOptions = {},
): Promise<KtxLlmHealthCheckResult> {
try {
const { generateText: runGenerateTextOverride, ...providerDeps } = options.deps ?? {};
const provider = createKtxLlmProvider(config, { ...providerDeps, devtoolsEnabled: false });
const runGenerateText = runGenerateTextOverride ?? generateText;
await withTimeout(
runGenerateText({
model: provider.getModel('default'),
prompt: options.prompt ?? 'Reply with exactly: ok',
temperature: 0,
maxOutputTokens: 8,
}),
options.timeoutMs ?? 15_000,
);
return { ok: true };
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
return { ok: false, message: redactHealthCheckMessage(message, config) };
}
}

View file

@ -0,0 +1,315 @@
import { devToolsMiddleware as defaultDevToolsMiddleware } from '@ai-sdk/devtools';
import { wrapLanguageModel as defaultWrapLanguageModel, type LanguageModel } from 'ai';
import { describe, expect, it, vi } from 'vitest';
import { createKtxLlmProvider, type KtxLlmProviderFactoryDeps } from './model-provider.js';
const languageModel = (modelId: string, provider = 'test'): LanguageModel => ({ modelId, provider }) as LanguageModel;
const devtoolsMiddleware = (): ReturnType<typeof defaultDevToolsMiddleware> => ({ specificationVersion: 'v3' });
const wrapWith = (model: LanguageModel) =>
vi.fn((_options: Parameters<typeof defaultWrapLanguageModel>[0]) => model as ReturnType<typeof defaultWrapLanguageModel>);
describe('createKtxLlmProvider', () => {
it('wraps language models with DevTools middleware when explicitly enabled', () => {
const anthropicModel = languageModel('claude-sonnet-4-6', 'anthropic');
const wrappedModel = languageModel('claude-sonnet-4-6', 'anthropic-devtools');
const middleware = devtoolsMiddleware();
const wrapLanguageModel = wrapWith(wrappedModel);
const devToolsMiddleware = vi.fn(devtoolsMiddleware);
const provider = createKtxLlmProvider(
{
backend: 'anthropic',
anthropic: { apiKey: 'test-anthropic-key' }, // pragma: allowlist secret
modelSlots: { default: 'claude-sonnet-4-6' },
promptCaching: { enabled: false },
},
{
createAnthropic: vi.fn(() => vi.fn(() => anthropicModel)),
devtoolsEnabled: true,
wrapLanguageModel,
devToolsMiddleware,
} satisfies KtxLlmProviderFactoryDeps,
);
expect(provider.getModel('default')).toBe(wrappedModel);
expect(devToolsMiddleware).toHaveBeenCalledTimes(1);
expect(wrapLanguageModel).toHaveBeenCalledWith({
model: anthropicModel,
middleware,
modelId: 'claude-sonnet-4-6',
providerId: 'anthropic',
});
});
it('does not wrap language models by default', () => {
const anthropicModel = languageModel('claude-sonnet-4-6', 'anthropic');
const wrapLanguageModel = vi.fn(defaultWrapLanguageModel);
const devToolsMiddleware = vi.fn(defaultDevToolsMiddleware);
const provider = createKtxLlmProvider(
{
backend: 'anthropic',
anthropic: { apiKey: 'test-anthropic-key' }, // pragma: allowlist secret
modelSlots: { default: 'claude-sonnet-4-6' },
promptCaching: { enabled: false },
},
{
createAnthropic: vi.fn(() => vi.fn(() => anthropicModel)),
devtoolsEnabled: false,
wrapLanguageModel,
devToolsMiddleware,
} satisfies KtxLlmProviderFactoryDeps,
);
expect(provider.getModel('default')).toBe(anthropicModel);
expect(wrapLanguageModel).not.toHaveBeenCalled();
expect(devToolsMiddleware).not.toHaveBeenCalled();
});
it('wraps language models when KTX_AI_DEVTOOLS_ENABLED is true', () => {
const originalEnv = process.env.KTX_AI_DEVTOOLS_ENABLED;
process.env.KTX_AI_DEVTOOLS_ENABLED = 'true';
try {
const gatewayModel = languageModel('anthropic/claude-sonnet-4-6', 'gateway');
const wrappedModel = languageModel('anthropic/claude-sonnet-4-6', 'gateway-devtools');
const wrapLanguageModel = wrapWith(wrappedModel);
const provider = createKtxLlmProvider(
{
backend: 'gateway',
gateway: { baseURL: 'https://gateway.test/v1' },
modelSlots: { default: 'anthropic/claude-sonnet-4-6' },
promptCaching: { enabled: false },
},
{
createGateway: vi.fn(() => vi.fn(() => gatewayModel)),
wrapLanguageModel,
devToolsMiddleware: vi.fn(devtoolsMiddleware),
} satisfies KtxLlmProviderFactoryDeps,
);
expect(provider.getModel('default')).toBe(wrappedModel);
expect(wrapLanguageModel).toHaveBeenCalledTimes(1);
} finally {
if (originalEnv === undefined) {
delete process.env.KTX_AI_DEVTOOLS_ENABLED;
} else {
process.env.KTX_AI_DEVTOOLS_ENABLED = originalEnv;
}
}
});
it('does not wrap language models in production even when enabled', () => {
const originalNodeEnv = process.env.NODE_ENV;
process.env.NODE_ENV = 'production';
try {
const anthropicModel = languageModel('claude-sonnet-4-6', 'anthropic');
const wrapLanguageModel = vi.fn(defaultWrapLanguageModel);
const devToolsMiddleware = vi.fn(defaultDevToolsMiddleware);
const provider = createKtxLlmProvider(
{
backend: 'anthropic',
anthropic: { apiKey: 'test-anthropic-key' }, // pragma: allowlist secret
modelSlots: { default: 'claude-sonnet-4-6' },
promptCaching: { enabled: false },
},
{
createAnthropic: vi.fn(() => vi.fn(() => anthropicModel)),
devtoolsEnabled: true,
wrapLanguageModel,
devToolsMiddleware,
} satisfies KtxLlmProviderFactoryDeps,
);
expect(provider.getModel('default')).toBe(anthropicModel);
expect(wrapLanguageModel).not.toHaveBeenCalled();
expect(devToolsMiddleware).not.toHaveBeenCalled();
} finally {
if (originalNodeEnv === undefined) {
delete process.env.NODE_ENV;
} else {
process.env.NODE_ENV = originalNodeEnv;
}
}
});
it('uses direct Anthropic with both beta headers', () => {
const anthropicModel = languageModel('claude-sonnet-4-6', 'anthropic');
const anthropic = vi.fn(() => anthropicModel);
const createAnthropic = vi.fn(() => anthropic);
const provider = createKtxLlmProvider(
{
backend: 'anthropic',
anthropic: { apiKey: 'test-anthropic-key', baseURL: 'https://anthropic.test' }, // pragma: allowlist secret
modelSlots: { default: 'claude-sonnet-4-6' },
promptCaching: { enabled: false },
},
{ createAnthropic, devtoolsEnabled: false },
);
expect(provider.getModel('default')).toBe(anthropicModel);
expect(createAnthropic).toHaveBeenCalledWith({
apiKey: 'test-anthropic-key', // pragma: allowlist secret
baseURL: 'https://anthropic.test',
headers: {
'anthropic-beta': 'interleaved-thinking-2025-05-14,extended-cache-ttl-2025-04-11',
},
});
expect(anthropic).toHaveBeenCalledWith('claude-sonnet-4-6');
});
it('uses Vertex Anthropic without the direct-Anthropic beta header', () => {
const vertexModel = languageModel('claude-sonnet-4-6', 'vertex');
const vertex = vi.fn(() => vertexModel);
const createVertexAnthropic = vi.fn(() => vertex);
const provider = createKtxLlmProvider(
{
backend: 'vertex',
vertex: { project: 'ktx-test', location: 'us-east5' },
modelSlots: { default: 'claude-sonnet-4-6' },
promptCaching: { enabled: false },
},
{ createVertexAnthropic, devtoolsEnabled: false },
);
expect(provider.getModel('default')).toBe(vertexModel);
expect(createVertexAnthropic).toHaveBeenCalledWith({ project: 'ktx-test', location: 'us-east5' });
expect(vertex).toHaveBeenCalledWith('claude-sonnet-4-6');
});
it('uses Gateway and supports role fallback to default', () => {
const gatewayModel = languageModel('anthropic/claude-sonnet-4-6', 'gateway');
const gateway = vi.fn(() => gatewayModel);
const createGateway = vi.fn(() => gateway);
const provider = createKtxLlmProvider(
{
backend: 'gateway',
gateway: { apiKey: 'gateway-key', baseURL: 'https://gateway.test/v1' }, // pragma: allowlist secret
modelSlots: { default: 'anthropic/claude-sonnet-4-6' },
promptCaching: { enabled: false },
},
{ createGateway, devtoolsEnabled: false },
);
expect(provider.getModel('curator')).toBe(gatewayModel);
expect(createGateway).toHaveBeenCalledWith({
apiKey: 'gateway-key', // pragma: allowlist secret
baseURL: 'https://gateway.test/v1',
headers: {
'anthropic-beta': 'interleaved-thinking-2025-05-14,extended-cache-ttl-2025-04-11',
},
});
expect(gateway).toHaveBeenCalledWith('anthropic/claude-sonnet-4-6');
});
it('uses explicit role overrides before default', () => {
const anthropic = vi.fn((modelId: string) => languageModel(modelId, 'anthropic'));
const provider = createKtxLlmProvider(
{
backend: 'anthropic',
anthropic: { apiKey: 'test-anthropic-key' }, // pragma: allowlist secret
modelSlots: {
default: 'claude-sonnet-4-6',
triage: 'claude-haiku-4-5',
repair: 'claude-opus-4-7',
},
promptCaching: { enabled: false },
},
{ createAnthropic: vi.fn(() => anthropic) },
);
expect((provider.getModel('triage') as { modelId: string }).modelId).toBe('claude-haiku-4-5');
expect((provider.getModel('repair') as { modelId: string }).modelId).toBe('claude-opus-4-7');
expect((provider.getModel('reconcile') as { modelId: string }).modelId).toBe('claude-sonnet-4-6');
});
it('emits cache markers only when enabled and the model speaks Anthropic protocol', () => {
const provider = createKtxLlmProvider(
{
backend: 'gateway',
gateway: { baseURL: 'https://gateway.test/v1' },
modelSlots: { default: 'anthropic/claude-sonnet-4-6' },
promptCaching: { enabled: true },
},
{ createGateway: vi.fn(() => vi.fn((modelId: string) => languageModel(modelId, 'gateway'))) },
);
expect(provider.cacheMarker('1h', 'anthropic/claude-sonnet-4-6')).toEqual({
anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } },
});
expect(provider.cacheMarker('1h', 'gpt-5')).toBeUndefined();
});
it('returns Anthropic thinking provider options', () => {
const provider = createKtxLlmProvider(
{
backend: 'anthropic',
anthropic: { apiKey: 'test-anthropic-key' }, // pragma: allowlist secret
modelSlots: { default: 'claude-sonnet-4-6' },
promptCaching: { enabled: false },
},
{ createAnthropic: vi.fn(() => vi.fn((modelId: string) => languageModel(modelId, 'anthropic'))) },
);
expect(provider.thinkingProviderOptions('default', 12000)).toEqual({
anthropic: {
thinking: { type: 'enabled', budgetTokens: 12000 },
},
});
});
it('defaults prompt caching to enabled with canonical TTLs', () => {
const provider = createKtxLlmProvider(
{
backend: 'gateway',
gateway: { baseURL: 'https://gateway.test/v1' },
modelSlots: { default: 'anthropic/claude-sonnet-4-6' },
},
{ createGateway: vi.fn(() => vi.fn((modelId: string) => languageModel(modelId, 'gateway'))) },
);
expect(provider.promptCachingConfig()).toEqual({
enabled: true,
systemTtl: '1h',
toolsTtl: '1h',
historyTtl: '5m',
cacheSystem: true,
cacheTools: true,
cacheHistory: true,
vertexFallbackTo5m: false,
});
expect(provider.cacheMarker('1h', 'anthropic/claude-sonnet-4-6')).toEqual({
anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } },
});
});
it('preserves explicit prompt caching opt-out', () => {
const provider = createKtxLlmProvider(
{
backend: 'anthropic',
anthropic: { apiKey: 'test-anthropic-key' }, // pragma: allowlist secret
modelSlots: { default: 'claude-sonnet-4-6' },
promptCaching: { enabled: false },
},
{ createAnthropic: vi.fn(() => vi.fn((modelId: string) => languageModel(modelId, 'anthropic'))) },
);
expect(provider.promptCachingConfig().enabled).toBe(false);
expect(provider.cacheMarker('1h', 'claude-sonnet-4-6')).toBeUndefined();
});
it('throws instead of falling through when an unsupported LLM backend is passed to the AI SDK provider factory', () => {
expect(() =>
createKtxLlmProvider({
backend: 'claude-code',
modelSlots: { default: 'sonnet' },
promptCaching: { enabled: false },
}),
).toThrow('claude-code is not an AI SDK LanguageModel backend');
});
});

View file

@ -0,0 +1,198 @@
import { createAnthropic } from '@ai-sdk/anthropic';
import { devToolsMiddleware } from '@ai-sdk/devtools';
import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
import { createGateway, generateText, wrapLanguageModel, type LanguageModel } from 'ai';
import { createKtxToolCallRepairHandler } from './repair.js';
import type {
KtxLlmConfig,
KtxLlmProvider,
KtxModelRole,
KtxPromptCacheTtl,
KtxPromptCachingConfig,
KtxProviderOptions,
} from './types.js';
type AnthropicFactory = typeof createAnthropic;
type AnthropicModelFactory = (modelId: string) => LanguageModel;
type VertexAnthropicFactory = (options?: Parameters<typeof createVertexAnthropic>[0]) => AnthropicModelFactory;
type GatewayFactory = (options?: Parameters<typeof createGateway>[0]) => AnthropicModelFactory;
export interface KtxLlmProviderFactoryDeps {
createAnthropic?: (options?: Parameters<AnthropicFactory>[0]) => AnthropicModelFactory;
createVertexAnthropic?: VertexAnthropicFactory;
createGateway?: GatewayFactory;
generateText?: typeof generateText;
devtoolsEnabled?: boolean;
wrapLanguageModel?: typeof wrapLanguageModel;
devToolsMiddleware?: typeof devToolsMiddleware;
}
const DEFAULT_PROMPT_CACHING: KtxPromptCachingConfig = {
enabled: true,
systemTtl: '1h',
toolsTtl: '1h',
historyTtl: '5m',
cacheSystem: true,
cacheTools: true,
cacheHistory: true,
vertexFallbackTo5m: false,
};
const ANTHROPIC_BETA_HEADER = 'interleaved-thinking-2025-05-14,extended-cache-ttl-2025-04-11';
function resolvePromptCaching(config: KtxLlmConfig): KtxPromptCachingConfig {
return { ...DEFAULT_PROMPT_CACHING, ...config.promptCaching };
}
function resolveDevtoolsEnabled(override: boolean | undefined): boolean {
if (process.env.NODE_ENV === 'production') {
return false;
}
if (override !== undefined) {
return override;
}
const value = process.env.KTX_AI_DEVTOOLS_ENABLED?.trim().toLowerCase();
return value === 'true' || value === '1' || value === 'yes';
}
export function modelIdFromLanguageModel(model: LanguageModel | string): string {
return typeof model === 'string' ? model : ((model as { modelId?: string }).modelId ?? '');
}
function providerIdFromLanguageModel(model: Exclude<LanguageModel, string>): string | undefined {
return typeof (model as { provider?: unknown }).provider === 'string'
? (model as { provider: string }).provider
: undefined;
}
export function isAnthropicProtocolModel(model: LanguageModel | string): boolean {
const modelId = modelIdFromLanguageModel(model);
return modelId.startsWith('claude-') || modelId.startsWith('anthropic/') || modelId.includes('/claude-');
}
class DefaultKtxLlmProvider implements KtxLlmProvider {
private readonly promptCaching: KtxPromptCachingConfig;
private readonly getModelByResolvedName: (modelId: string) => LanguageModel;
private readonly runGenerateText: typeof generateText;
private readonly devtoolsEnabled: boolean;
private readonly runWrapLanguageModel: typeof wrapLanguageModel;
private readonly createDevToolsMiddleware: typeof devToolsMiddleware;
constructor(
private readonly config: KtxLlmConfig,
deps: KtxLlmProviderFactoryDeps,
) {
this.promptCaching = resolvePromptCaching(config);
this.runGenerateText = deps.generateText ?? generateText;
this.devtoolsEnabled = resolveDevtoolsEnabled(deps.devtoolsEnabled);
this.runWrapLanguageModel = deps.wrapLanguageModel ?? wrapLanguageModel;
this.createDevToolsMiddleware = deps.devToolsMiddleware ?? devToolsMiddleware;
this.getModelByResolvedName = this.createModelFactory(config, deps);
}
getModel(role: KtxModelRole): LanguageModel {
return this.getModelByName(this.resolveRole(role));
}
getModelByName(modelId: string): LanguageModel {
return this.withDevtools(this.getModelByResolvedName(modelId));
}
cacheMarker(ttl: KtxPromptCacheTtl, model?: LanguageModel | string) {
if (!this.promptCaching.enabled) {
return undefined;
}
if (model && !isAnthropicProtocolModel(model)) {
return undefined;
}
return { anthropic: { cacheControl: { type: 'ephemeral' as const, ttl } } };
}
repairToolCallHandler(options: { source?: string } = {}) {
return createKtxToolCallRepairHandler({
source: options.source ?? 'ktx-llm',
getRepairModel: () => this.getModel('repair'),
generateText: this.runGenerateText,
});
}
thinkingProviderOptions(_role: KtxModelRole, budgetTokens: number): KtxProviderOptions {
return {
anthropic: {
thinking: { type: 'enabled', budgetTokens },
},
};
}
telemetryConfig() {
return this.config.telemetry?.experimentalTelemetry;
}
promptCachingConfig(): KtxPromptCachingConfig {
return this.promptCaching;
}
activeBackend() {
return this.config.backend;
}
private resolveRole(role: KtxModelRole): string {
return this.config.modelSlots[role] ?? this.config.modelSlots.default;
}
private withDevtools(model: LanguageModel): LanguageModel {
if (!this.devtoolsEnabled || typeof model === 'string') {
return model;
}
return this.runWrapLanguageModel({
model: model as Parameters<typeof wrapLanguageModel>[0]['model'],
middleware: this.createDevToolsMiddleware(),
modelId: modelIdFromLanguageModel(model),
providerId: providerIdFromLanguageModel(model),
});
}
private createModelFactory(config: KtxLlmConfig, deps: KtxLlmProviderFactoryDeps): (modelId: string) => LanguageModel {
if (config.backend === 'anthropic') {
const anthropic = (deps.createAnthropic ?? createAnthropic)({
...(config.anthropic?.apiKey ? { apiKey: config.anthropic.apiKey } : {}),
...(config.anthropic?.baseURL ? { baseURL: config.anthropic.baseURL } : {}),
headers: {
'anthropic-beta': ANTHROPIC_BETA_HEADER,
},
});
return (modelId) => anthropic(modelId);
}
if (config.backend === 'vertex') {
if (!config.vertex?.location) {
throw new Error('vertex.location is required when KTX LLM backend is vertex');
}
const vertex = (deps.createVertexAnthropic ?? createVertexAnthropic)({
...(config.vertex.project ? { project: config.vertex.project } : {}),
location: config.vertex.location,
});
return (modelId) => vertex(modelId);
}
if (config.backend === 'gateway') {
const gateway = (deps.createGateway ?? createGateway)({
...(config.gateway?.apiKey ? { apiKey: config.gateway.apiKey } : {}),
...(config.gateway?.baseURL ? { baseURL: config.gateway.baseURL } : {}),
headers: {
'anthropic-beta': ANTHROPIC_BETA_HEADER,
},
});
return (modelId) => gateway(modelId);
}
throw new Error(`${config.backend} is not an AI SDK LanguageModel backend; use KtxLlmRuntimePort`);
}
}
export function createKtxLlmProvider(config: KtxLlmConfig, deps: KtxLlmProviderFactoryDeps = {}): KtxLlmProvider {
if (!config.modelSlots.default) {
throw new Error('modelSlots.default is required');
}
return new DefaultKtxLlmProvider(config, deps);
}

View file

@ -0,0 +1,19 @@
import { describe, expect, it } from 'vitest';
describe('@ktx/llm package exports', () => {
it('exports the canonical LLM and embedding surfaces', async () => {
const llm = await import('./index.js');
expect(llm.KTX_MODEL_ROLES).toEqual([
'default',
'triage',
'candidateExtraction',
'curator',
'reconcile',
'repair',
]);
expect(llm.createKtxLlmProvider).toBeTypeOf('function');
expect(llm.KtxMessageBuilder).toBeTypeOf('function');
expect(llm.createKtxEmbeddingProvider).toBeTypeOf('function');
});
});

View file

@ -0,0 +1,93 @@
import { NoSuchToolError, type LanguageModel } from 'ai';
import { describe, expect, it, vi } from 'vitest';
import { createKtxToolCallRepairHandler } from './repair.js';
const repairModel = { modelId: 'claude-repair', provider: 'anthropic' } as LanguageModel;
describe('createKtxToolCallRepairHandler', () => {
it('returns null for NoSuchToolError', async () => {
const handler = createKtxToolCallRepairHandler({
source: 'unit',
getRepairModel: () => repairModel,
generateText: vi.fn(),
});
await expect(
handler({
system: undefined,
messages: [],
toolCall: { type: 'tool-call', toolName: 'missing', toolCallId: 'tc_1', input: '{}' },
tools: {},
inputSchema: async () => ({}),
error: new NoSuchToolError({ toolName: 'missing' }),
}),
).resolves.toBeNull();
});
it('repairs string input by local JSON extraction without an LLM call', async () => {
const generateText = vi.fn();
const handler = createKtxToolCallRepairHandler({
source: 'unit',
getRepairModel: () => repairModel,
generateText,
});
await expect(
handler({
system: undefined,
messages: [],
toolCall: {
type: 'tool-call',
toolName: 'write_source',
toolCallId: 'tc_2',
input: 'prefix {"path":"orders.yaml"} suffix',
},
tools: { write_source: {} as never },
inputSchema: async () => ({ type: 'object' }),
error: new Error('Invalid tool input') as never,
}),
).resolves.toEqual({
type: 'tool-call',
toolName: 'write_source',
toolCallId: 'tc_2',
input: '{"path":"orders.yaml"}',
});
expect(generateText).not.toHaveBeenCalled();
});
it('falls back to the repair model when local extraction fails', async () => {
const generateText = vi.fn().mockResolvedValue({ text: '{"path":"customers.yaml"}' });
const handler = createKtxToolCallRepairHandler({
source: 'unit',
getRepairModel: () => repairModel,
generateText,
});
await expect(
handler({
system: undefined,
messages: [],
toolCall: {
type: 'tool-call',
toolName: 'write_source',
toolCallId: 'tc_3',
input: 'not json',
},
tools: { write_source: {} as never },
inputSchema: async () => ({ type: 'object', properties: { path: { type: 'string' } } }),
error: new Error('Invalid tool input') as never,
}),
).resolves.toEqual({
type: 'tool-call',
toolName: 'write_source',
toolCallId: 'tc_3',
input: '{"path":"customers.yaml"}',
});
expect(generateText).toHaveBeenCalledWith(
expect.objectContaining({
model: repairModel,
prompt: expect.stringContaining('The model tried to call the tool "write_source"'),
}),
);
});
});

View file

@ -0,0 +1,88 @@
import { NoSuchToolError, type LanguageModel, type ToolCallRepairFunction, type ToolSet, generateText } from 'ai';
interface KtxToolCallRepairHandlerInput {
source: string;
getRepairModel: () => LanguageModel;
generateText?: typeof generateText;
}
function extractJsonFromText(text: string): string | null {
const trimmed = text.trim();
if (!trimmed) {
return null;
}
try {
JSON.parse(trimmed);
return trimmed;
} catch {}
let start = trimmed.indexOf('{');
while (start >= 0) {
let end = trimmed.lastIndexOf('}');
while (end > start) {
const candidate = trimmed.slice(start, end + 1);
try {
JSON.parse(candidate);
return candidate;
} catch {}
end = trimmed.lastIndexOf('}', end - 1);
}
start = trimmed.indexOf('{', start + 1);
}
return null;
}
export function createKtxToolCallRepairHandler(
input: KtxToolCallRepairHandlerInput,
): ToolCallRepairFunction<ToolSet> {
const runGenerateText = input.generateText ?? generateText;
return async ({ toolCall, tools, inputSchema, error }) => {
if (NoSuchToolError.isInstance(error)) {
return null;
}
if (typeof toolCall.input === 'string') {
const extracted = extractJsonFromText(toolCall.input);
if (extracted) {
return {
type: 'tool-call',
toolName: toolCall.toolName,
toolCallId: toolCall.toolCallId,
input: extracted,
};
}
}
if (!(toolCall.toolName in tools)) {
return null;
}
try {
const schema = await inputSchema({ toolName: toolCall.toolName });
const { text } = await runGenerateText({
model: input.getRepairModel(),
prompt: `The model tried to call the tool "${toolCall.toolName}" with the following inputs:
${JSON.stringify(toolCall.input)}
However, this caused a validation error: ${error.message}
The tool accepts the following schema:
${JSON.stringify(schema)}
Please generate corrected inputs that match the schema. Return ONLY valid JSON, no explanation or markdown formatting.`,
});
const cleaned = extractJsonFromText(text) ?? text.trim();
const parsed = JSON.parse(cleaned);
return {
type: 'tool-call',
toolName: toolCall.toolName,
toolCallId: toolCall.toolCallId,
input: JSON.stringify(parsed),
};
} catch {
return null;
}
};
}

View file

@ -0,0 +1,95 @@
import type { LanguageModel, TelemetrySettings, ToolCallRepairFunction, ToolSet } from 'ai';
export const KTX_MODEL_ROLES = ['default', 'triage', 'candidateExtraction', 'curator', 'reconcile', 'repair'] as const;
export type KtxModelRole = (typeof KTX_MODEL_ROLES)[number];
export type KtxLlmBackend = 'anthropic' | 'vertex' | 'gateway' | 'claude-code';
export type KtxPromptCacheTtl = '5m' | '1h';
export type KtxJsonValue =
| null
| string
| number
| boolean
| KtxJsonValue[]
| { [key: string]: KtxJsonValue | undefined };
export type KtxProviderOptions = Record<string, { [key: string]: KtxJsonValue | undefined }>;
export interface KtxPromptCachingConfig {
enabled: boolean;
systemTtl: KtxPromptCacheTtl;
toolsTtl: KtxPromptCacheTtl;
historyTtl: KtxPromptCacheTtl;
cacheSystem: boolean;
cacheTools: boolean;
cacheHistory: boolean;
vertexFallbackTo5m: boolean;
}
export interface KtxTokenUsageEvent {
source?: string;
modelId?: string;
inputTokens?: number;
outputTokens?: number;
totalTokens?: number;
}
export interface KtxLlmConfig {
backend: KtxLlmBackend;
vertex?: { project?: string; location: string };
anthropic?: { apiKey?: string; baseURL?: string };
gateway?: { baseURL?: string; apiKey?: string };
modelSlots: { default: string } & Partial<Record<KtxModelRole, string>>;
promptCaching?: Partial<KtxPromptCachingConfig>;
telemetry?: {
experimentalTelemetry?: TelemetrySettings;
onTokenUsage?: (event: KtxTokenUsageEvent) => void;
};
}
export interface KtxLlmProvider {
getModel(role: KtxModelRole): LanguageModel;
getModelByName(modelId: string): LanguageModel;
cacheMarker(
ttl: KtxPromptCacheTtl,
model?: LanguageModel | string,
): { anthropic: { cacheControl: { type: 'ephemeral'; ttl: KtxPromptCacheTtl } } } | undefined;
repairToolCallHandler(options?: { source?: string }): ToolCallRepairFunction<ToolSet>;
thinkingProviderOptions(role: KtxModelRole, budgetTokens: number): KtxProviderOptions;
telemetryConfig(): TelemetrySettings | undefined;
promptCachingConfig(): KtxPromptCachingConfig;
activeBackend(): KtxLlmBackend;
}
export type KtxEmbeddingBackend = 'openai' | 'sentence-transformers';
export interface KtxEmbeddingTokenUsageEvent {
backend: KtxEmbeddingBackend;
model: string;
inputCount: number;
totalTokens?: number;
}
export interface KtxEmbeddingConfig {
backend: KtxEmbeddingBackend;
model: string;
dimensions: number;
openai?: { apiKey?: string; baseURL?: string };
sentenceTransformers?: { baseURL: string; pathPrefix?: string };
batchSize?: number;
telemetry?: { onTokenUsage?: (event: KtxEmbeddingTokenUsageEvent) => void };
}
export interface KtxEmbeddingProvider {
readonly dimensions: number;
readonly maxBatchSize: number;
embed(text: string): Promise<number[]>;
embedMany(texts: string[]): Promise<number[][]>;
}
export interface KtxPromptParts {
staticSystem: string;
dynamicSystem?: string;
leadingUserContext?: string;
}