ktx/packages/llm/src/model-provider.ts

199 lines
7 KiB
TypeScript
Raw Permalink Normal View History

2026-05-10 23:12:26 +02:00
import { createAnthropic } from '@ai-sdk/anthropic';
import { devToolsMiddleware } from '@ai-sdk/devtools';
2026-05-10 23:12:26 +02:00
import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
import { createGateway, generateText, wrapLanguageModel, type LanguageModel } from 'ai';
2026-05-10 23:51:24 +02:00
import { createKtxToolCallRepairHandler } from './repair.js';
2026-05-10 23:12:26 +02:00
import type {
2026-05-10 23:51:24 +02:00
KtxLlmConfig,
KtxLlmProvider,
KtxModelRole,
KtxPromptCacheTtl,
KtxPromptCachingConfig,
KtxProviderOptions,
2026-05-10 23:12:26 +02:00
} from './types.js';
type AnthropicFactory = typeof createAnthropic;
type AnthropicModelFactory = (modelId: string) => LanguageModel;
type VertexAnthropicFactory = (options?: Parameters<typeof createVertexAnthropic>[0]) => AnthropicModelFactory;
type GatewayFactory = (options?: Parameters<typeof createGateway>[0]) => AnthropicModelFactory;
2026-05-10 23:51:24 +02:00
export interface KtxLlmProviderFactoryDeps {
2026-05-10 23:12:26 +02:00
createAnthropic?: (options?: Parameters<AnthropicFactory>[0]) => AnthropicModelFactory;
createVertexAnthropic?: VertexAnthropicFactory;
createGateway?: GatewayFactory;
generateText?: typeof generateText;
devtoolsEnabled?: boolean;
wrapLanguageModel?: typeof wrapLanguageModel;
devToolsMiddleware?: typeof devToolsMiddleware;
2026-05-10 23:12:26 +02:00
}
2026-05-10 23:51:24 +02:00
const DEFAULT_PROMPT_CACHING: KtxPromptCachingConfig = {
2026-05-10 23:12:26 +02:00
enabled: true,
systemTtl: '1h',
toolsTtl: '1h',
historyTtl: '5m',
cacheSystem: true,
cacheTools: true,
cacheHistory: true,
vertexFallbackTo5m: false,
};
const ANTHROPIC_BETA_HEADER = 'interleaved-thinking-2025-05-14,extended-cache-ttl-2025-04-11';
2026-05-10 23:12:26 +02:00
2026-05-10 23:51:24 +02:00
function resolvePromptCaching(config: KtxLlmConfig): KtxPromptCachingConfig {
2026-05-10 23:12:26 +02:00
return { ...DEFAULT_PROMPT_CACHING, ...config.promptCaching };
}
function resolveDevtoolsEnabled(override: boolean | undefined): boolean {
if (process.env.NODE_ENV === 'production') {
return false;
}
if (override !== undefined) {
return override;
}
const value = process.env.KTX_AI_DEVTOOLS_ENABLED?.trim().toLowerCase();
return value === 'true' || value === '1' || value === 'yes';
}
2026-05-10 23:12:26 +02:00
export function modelIdFromLanguageModel(model: LanguageModel | string): string {
return typeof model === 'string' ? model : ((model as { modelId?: string }).modelId ?? '');
}
function providerIdFromLanguageModel(model: Exclude<LanguageModel, string>): string | undefined {
return typeof (model as { provider?: unknown }).provider === 'string'
? (model as { provider: string }).provider
: undefined;
}
2026-05-10 23:12:26 +02:00
export function isAnthropicProtocolModel(model: LanguageModel | string): boolean {
const modelId = modelIdFromLanguageModel(model);
return modelId.startsWith('claude-') || modelId.startsWith('anthropic/') || modelId.includes('/claude-');
}
2026-05-10 23:51:24 +02:00
class DefaultKtxLlmProvider implements KtxLlmProvider {
private readonly promptCaching: KtxPromptCachingConfig;
2026-05-10 23:12:26 +02:00
private readonly getModelByResolvedName: (modelId: string) => LanguageModel;
private readonly runGenerateText: typeof generateText;
private readonly devtoolsEnabled: boolean;
private readonly runWrapLanguageModel: typeof wrapLanguageModel;
private readonly createDevToolsMiddleware: typeof devToolsMiddleware;
2026-05-10 23:12:26 +02:00
constructor(
2026-05-10 23:51:24 +02:00
private readonly config: KtxLlmConfig,
deps: KtxLlmProviderFactoryDeps,
2026-05-10 23:12:26 +02:00
) {
this.promptCaching = resolvePromptCaching(config);
this.runGenerateText = deps.generateText ?? generateText;
this.devtoolsEnabled = resolveDevtoolsEnabled(deps.devtoolsEnabled);
this.runWrapLanguageModel = deps.wrapLanguageModel ?? wrapLanguageModel;
this.createDevToolsMiddleware = deps.devToolsMiddleware ?? devToolsMiddleware;
2026-05-10 23:12:26 +02:00
this.getModelByResolvedName = this.createModelFactory(config, deps);
}
2026-05-10 23:51:24 +02:00
getModel(role: KtxModelRole): LanguageModel {
2026-05-10 23:12:26 +02:00
return this.getModelByName(this.resolveRole(role));
}
getModelByName(modelId: string): LanguageModel {
return this.withDevtools(this.getModelByResolvedName(modelId));
2026-05-10 23:12:26 +02:00
}
2026-05-10 23:51:24 +02:00
cacheMarker(ttl: KtxPromptCacheTtl, model?: LanguageModel | string) {
2026-05-10 23:12:26 +02:00
if (!this.promptCaching.enabled) {
return undefined;
}
if (model && !isAnthropicProtocolModel(model)) {
return undefined;
}
return { anthropic: { cacheControl: { type: 'ephemeral' as const, ttl } } };
}
repairToolCallHandler(options: { source?: string } = {}) {
2026-05-10 23:51:24 +02:00
return createKtxToolCallRepairHandler({
source: options.source ?? 'ktx-llm',
2026-05-10 23:12:26 +02:00
getRepairModel: () => this.getModel('repair'),
generateText: this.runGenerateText,
});
}
2026-05-10 23:51:24 +02:00
thinkingProviderOptions(_role: KtxModelRole, budgetTokens: number): KtxProviderOptions {
2026-05-10 23:12:26 +02:00
return {
anthropic: {
thinking: { type: 'enabled', budgetTokens },
},
};
}
telemetryConfig() {
return this.config.telemetry?.experimentalTelemetry;
}
2026-05-10 23:51:24 +02:00
promptCachingConfig(): KtxPromptCachingConfig {
2026-05-10 23:12:26 +02:00
return this.promptCaching;
}
activeBackend() {
return this.config.backend;
}
2026-05-10 23:51:24 +02:00
private resolveRole(role: KtxModelRole): string {
2026-05-10 23:12:26 +02:00
return this.config.modelSlots[role] ?? this.config.modelSlots.default;
}
private withDevtools(model: LanguageModel): LanguageModel {
if (!this.devtoolsEnabled || typeof model === 'string') {
return model;
}
return this.runWrapLanguageModel({
model: model as Parameters<typeof wrapLanguageModel>[0]['model'],
middleware: this.createDevToolsMiddleware(),
modelId: modelIdFromLanguageModel(model),
providerId: providerIdFromLanguageModel(model),
});
}
2026-05-10 23:51:24 +02:00
private createModelFactory(config: KtxLlmConfig, deps: KtxLlmProviderFactoryDeps): (modelId: string) => LanguageModel {
2026-05-10 23:12:26 +02:00
if (config.backend === 'anthropic') {
const anthropic = (deps.createAnthropic ?? createAnthropic)({
...(config.anthropic?.apiKey ? { apiKey: config.anthropic.apiKey } : {}),
...(config.anthropic?.baseURL ? { baseURL: config.anthropic.baseURL } : {}),
headers: {
'anthropic-beta': ANTHROPIC_BETA_HEADER,
2026-05-10 23:12:26 +02:00
},
});
return (modelId) => anthropic(modelId);
}
if (config.backend === 'vertex') {
if (!config.vertex?.location) {
2026-05-10 23:51:24 +02:00
throw new Error('vertex.location is required when KTX LLM backend is vertex');
2026-05-10 23:12:26 +02:00
}
const vertex = (deps.createVertexAnthropic ?? createVertexAnthropic)({
...(config.vertex.project ? { project: config.vertex.project } : {}),
location: config.vertex.location,
});
return (modelId) => vertex(modelId);
}
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
if (config.backend === 'gateway') {
const gateway = (deps.createGateway ?? createGateway)({
...(config.gateway?.apiKey ? { apiKey: config.gateway.apiKey } : {}),
...(config.gateway?.baseURL ? { baseURL: config.gateway.baseURL } : {}),
headers: {
'anthropic-beta': ANTHROPIC_BETA_HEADER,
},
});
return (modelId) => gateway(modelId);
}
throw new Error(`${config.backend} is not an AI SDK LanguageModel backend; use KtxLlmRuntimePort`);
2026-05-10 23:12:26 +02:00
}
}
2026-05-10 23:51:24 +02:00
export function createKtxLlmProvider(config: KtxLlmConfig, deps: KtxLlmProviderFactoryDeps = {}): KtxLlmProvider {
2026-05-10 23:12:26 +02:00
if (!config.modelSlots.default) {
throw new Error('modelSlots.default is required');
}
2026-05-10 23:51:24 +02:00
return new DefaultKtxLlmProvider(config, deps);
2026-05-10 23:12:26 +02:00
}