mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-28 08:49:38 +02:00
Initial open-source release
This commit is contained in:
commit
1a42152e6f
1199 changed files with 257054 additions and 0 deletions
106
packages/llm/src/embedding-health.test.ts
Normal file
106
packages/llm/src/embedding-health.test.ts
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { runKloEmbeddingHealthCheck } from './embedding-health.js';
|
||||
|
||||
describe('KLO embedding health check', () => {
|
||||
it('runs a one-shot OpenAI embedding check through the configured provider', async () => {
|
||||
const createOpenAIClient = vi.fn(() => ({
|
||||
embeddings: {
|
||||
create: vi.fn().mockResolvedValue({
|
||||
data: [{ index: 0, embedding: [0.1, 0.2, 0.3] }],
|
||||
}),
|
||||
},
|
||||
}));
|
||||
|
||||
await expect(
|
||||
runKloEmbeddingHealthCheck(
|
||||
{
|
||||
backend: 'openai',
|
||||
model: 'text-embedding-3-small',
|
||||
dimensions: 3,
|
||||
openai: { apiKey: 'sk-openai-test' },
|
||||
},
|
||||
{ deps: { createOpenAIClient } },
|
||||
),
|
||||
).resolves.toEqual({ ok: true });
|
||||
|
||||
expect(createOpenAIClient).toHaveBeenCalledWith({ apiKey: 'sk-openai-test', baseURL: undefined });
|
||||
});
|
||||
|
||||
it('returns failed when the provider returns the wrong dimensions', async () => {
|
||||
const createOpenAIClient = vi.fn(() => ({
|
||||
embeddings: {
|
||||
create: vi.fn().mockResolvedValue({
|
||||
data: [{ index: 0, embedding: [0.1, 0.2] }],
|
||||
}),
|
||||
},
|
||||
}));
|
||||
|
||||
await expect(
|
||||
runKloEmbeddingHealthCheck(
|
||||
{
|
||||
backend: 'openai',
|
||||
model: 'text-embedding-3-small',
|
||||
dimensions: 3,
|
||||
openai: { apiKey: 'sk-openai-test' },
|
||||
},
|
||||
{ deps: { createOpenAIClient } },
|
||||
),
|
||||
).resolves.toEqual({
|
||||
ok: false,
|
||||
message: 'Embedding provider openai returned vector with 2 dimensions; expected 3',
|
||||
});
|
||||
});
|
||||
|
||||
it('redacts credential values from health-check failures', async () => {
|
||||
const createOpenAIClient = vi.fn(() => ({
|
||||
embeddings: {
|
||||
create: vi.fn(async () => {
|
||||
throw new Error('401 invalid api key sk-openai-secret');
|
||||
}),
|
||||
},
|
||||
}));
|
||||
|
||||
await expect(
|
||||
runKloEmbeddingHealthCheck(
|
||||
{
|
||||
backend: 'openai',
|
||||
model: 'text-embedding-3-small',
|
||||
dimensions: 3,
|
||||
openai: { apiKey: 'sk-openai-secret' },
|
||||
},
|
||||
{ deps: { createOpenAIClient } },
|
||||
),
|
||||
).resolves.toEqual({
|
||||
ok: false,
|
||||
message: '401 invalid api key [redacted]',
|
||||
});
|
||||
});
|
||||
|
||||
it('returns failed when the health check times out', async () => {
|
||||
const createOpenAIClient = vi.fn(() => ({
|
||||
embeddings: {
|
||||
create: vi.fn(
|
||||
() =>
|
||||
new Promise<{ data: Array<{ index?: number; embedding: number[] }>; usage?: { total_tokens?: number } }>(
|
||||
() => undefined,
|
||||
),
|
||||
),
|
||||
},
|
||||
}));
|
||||
|
||||
await expect(
|
||||
runKloEmbeddingHealthCheck(
|
||||
{
|
||||
backend: 'openai',
|
||||
model: 'text-embedding-3-small',
|
||||
dimensions: 3,
|
||||
openai: { apiKey: 'sk-openai-test' },
|
||||
},
|
||||
{ timeoutMs: 1, deps: { createOpenAIClient } },
|
||||
),
|
||||
).resolves.toEqual({
|
||||
ok: false,
|
||||
message: 'Embedding health check timed out after 1ms',
|
||||
});
|
||||
});
|
||||
});
|
||||
54
packages/llm/src/embedding-health.ts
Normal file
54
packages/llm/src/embedding-health.ts
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
import { createKloEmbeddingProvider, type KloEmbeddingProviderDeps } from './embedding-provider.js';
|
||||
import type { KloEmbeddingConfig } from './types.js';
|
||||
|
||||
export type KloEmbeddingHealthCheckResult = { ok: true } | { ok: false; message: string };
|
||||
|
||||
export interface KloEmbeddingHealthCheckOptions {
|
||||
text?: string;
|
||||
timeoutMs?: number;
|
||||
deps?: KloEmbeddingProviderDeps;
|
||||
}
|
||||
|
||||
function redactHealthCheckMessage(message: string, config: KloEmbeddingConfig): string {
|
||||
const secrets = [config.openai?.apiKey].filter(
|
||||
(value): value is string => typeof value === 'string' && value.length > 0,
|
||||
);
|
||||
return secrets.reduce((current, secret) => current.split(secret).join('[redacted]'), message);
|
||||
}
|
||||
|
||||
async function withTimeout<T>(promise: Promise<T>, timeoutMs: number): Promise<T> {
|
||||
let timeout: NodeJS.Timeout | undefined;
|
||||
const timeoutPromise = new Promise<never>((_resolve, reject) => {
|
||||
timeout = setTimeout(() => reject(new Error(`Embedding health check timed out after ${timeoutMs}ms`)), timeoutMs);
|
||||
});
|
||||
try {
|
||||
return await Promise.race([promise, timeoutPromise]);
|
||||
} finally {
|
||||
if (timeout) {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function runKloEmbeddingHealthCheck(
|
||||
config: KloEmbeddingConfig,
|
||||
options: KloEmbeddingHealthCheckOptions = {},
|
||||
): Promise<KloEmbeddingHealthCheckResult> {
|
||||
try {
|
||||
const provider = createKloEmbeddingProvider(config, options.deps);
|
||||
const embedding = await withTimeout(
|
||||
provider.embed(options.text ?? 'KLO embedding health check'),
|
||||
options.timeoutMs ?? 15_000,
|
||||
);
|
||||
if (embedding.length !== config.dimensions) {
|
||||
return {
|
||||
ok: false,
|
||||
message: `Embedding provider ${config.backend} returned vector with ${embedding.length} dimensions; expected ${config.dimensions}`,
|
||||
};
|
||||
}
|
||||
return { ok: true };
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return { ok: false, message: redactHealthCheckMessage(message, config) };
|
||||
}
|
||||
}
|
||||
146
packages/llm/src/embedding-provider.test.ts
Normal file
146
packages/llm/src/embedding-provider.test.ts
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createKloEmbeddingProvider } from './embedding-provider.js';
|
||||
import type { KloEmbeddingConfig } from './types.js';
|
||||
|
||||
describe('createKloEmbeddingProvider', () => {
|
||||
it('creates deterministic embeddings with stable dimensions', async () => {
|
||||
const provider = createKloEmbeddingProvider({
|
||||
backend: 'deterministic',
|
||||
model: 'sha256',
|
||||
dimensions: 6,
|
||||
batchSize: 4,
|
||||
});
|
||||
|
||||
await expect(provider.embed('Revenue policy')).resolves.toHaveLength(6);
|
||||
await expect(provider.embed('Revenue policy')).resolves.toEqual(await provider.embed('Revenue policy'));
|
||||
await expect(provider.embed('Revenue policy')).resolves.not.toEqual(await provider.embed('Approval policy'));
|
||||
await expect(provider.embedMany(['a', 'b'])).resolves.toHaveLength(2);
|
||||
expect(provider.maxBatchSize).toBe(4);
|
||||
});
|
||||
|
||||
it('rejects gateway embeddings', () => {
|
||||
const config = JSON.parse(
|
||||
JSON.stringify({
|
||||
backend: 'gateway',
|
||||
model: 'provider/text-embedding',
|
||||
dimensions: 2,
|
||||
gateway: { apiKey: 'gateway-key' }, // pragma: allowlist secret
|
||||
}),
|
||||
) as KloEmbeddingConfig;
|
||||
|
||||
expect(() => createKloEmbeddingProvider(config)).toThrow('Unsupported KLO embedding backend: gateway');
|
||||
});
|
||||
|
||||
it('uses OpenAI embeddings with configured dimensions', async () => {
|
||||
const createOpenAIClient = vi.fn(() => ({
|
||||
embeddings: {
|
||||
create: vi.fn().mockResolvedValue({
|
||||
data: [{ index: 0, embedding: [0.1, 0.2] }],
|
||||
usage: { total_tokens: 7 },
|
||||
}),
|
||||
},
|
||||
}));
|
||||
|
||||
const provider = createKloEmbeddingProvider(
|
||||
{
|
||||
backend: 'openai',
|
||||
model: 'text-embedding-3-small',
|
||||
dimensions: 2,
|
||||
openai: { apiKey: 'openai-key', baseURL: 'https://openai.test/v1' }, // pragma: allowlist secret
|
||||
},
|
||||
{ createOpenAIClient },
|
||||
);
|
||||
|
||||
await expect(provider.embed('hello')).resolves.toEqual([0.1, 0.2]);
|
||||
expect(createOpenAIClient).toHaveBeenCalledWith({
|
||||
apiKey: 'openai-key', // pragma: allowlist secret
|
||||
baseURL: 'https://openai.test/v1',
|
||||
});
|
||||
});
|
||||
|
||||
it('supports sentence-transformers pathPrefix defaults and explicit empty prefix', async () => {
|
||||
const fetch = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce(new Response(JSON.stringify({ embedding: [0.1, 0.2] }), { status: 200 }))
|
||||
.mockResolvedValueOnce(new Response(JSON.stringify({ embedding: [0.3, 0.4] }), { status: 200 }));
|
||||
|
||||
const provider = createKloEmbeddingProvider(
|
||||
{
|
||||
backend: 'sentence-transformers',
|
||||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 2,
|
||||
sentenceTransformers: { baseURL: 'https://python.test/' },
|
||||
},
|
||||
{ fetch },
|
||||
);
|
||||
|
||||
await expect(provider.embed('hello')).resolves.toEqual([0.3, 0.4]);
|
||||
expect(fetch).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
'https://python.test/api/embeddings/compute',
|
||||
expect.objectContaining({ method: 'POST' }),
|
||||
);
|
||||
expect(fetch).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
'https://python.test/api/embeddings/compute',
|
||||
expect.objectContaining({ method: 'POST' }),
|
||||
);
|
||||
|
||||
const daemonFetch = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce(new Response(JSON.stringify({ embedding: [0.1, 0.2] }), { status: 200 }))
|
||||
.mockResolvedValueOnce(new Response(JSON.stringify({ embeddings: [[0.5, 0.6]] }), { status: 200 }));
|
||||
|
||||
const daemonProvider = createKloEmbeddingProvider(
|
||||
{
|
||||
backend: 'sentence-transformers',
|
||||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 2,
|
||||
sentenceTransformers: { baseURL: 'https://daemon.test/base/', pathPrefix: '' },
|
||||
},
|
||||
{ fetch: daemonFetch },
|
||||
);
|
||||
|
||||
await expect(daemonProvider.embedMany(['hello'])).resolves.toEqual([[0.5, 0.6]]);
|
||||
expect(daemonFetch).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
'https://daemon.test/base/embeddings/compute',
|
||||
expect.objectContaining({ method: 'POST' }),
|
||||
);
|
||||
expect(daemonFetch).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
'https://daemon.test/base/embeddings/compute-bulk',
|
||||
expect.objectContaining({ method: 'POST' }),
|
||||
);
|
||||
});
|
||||
|
||||
it('falls back to one-shot klo-daemon inference when the local HTTP daemon is unavailable', async () => {
|
||||
const fetch = vi.fn().mockRejectedValue(new TypeError('fetch failed'));
|
||||
const runSentenceTransformersJson = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({ embedding: [0.1, 0.2] })
|
||||
.mockResolvedValueOnce({ embeddings: [[0.3, 0.4], [0.5, 0.6]] });
|
||||
|
||||
const provider = createKloEmbeddingProvider(
|
||||
{
|
||||
backend: 'sentence-transformers',
|
||||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 2,
|
||||
sentenceTransformers: { baseURL: 'http://127.0.0.1:8765', pathPrefix: '' },
|
||||
},
|
||||
{ fetch, runSentenceTransformersJson },
|
||||
);
|
||||
|
||||
await expect(provider.embedMany(['hello', 'world'])).resolves.toEqual([
|
||||
[0.3, 0.4],
|
||||
[0.5, 0.6],
|
||||
]);
|
||||
expect(fetch).toHaveBeenCalledTimes(1);
|
||||
expect(runSentenceTransformersJson).toHaveBeenNthCalledWith(1, 'embedding-compute', {
|
||||
text: '__klo_embedding_probe__',
|
||||
});
|
||||
expect(runSentenceTransformersJson).toHaveBeenNthCalledWith(2, 'embedding-compute-bulk', {
|
||||
texts: ['hello', 'world'],
|
||||
});
|
||||
});
|
||||
});
|
||||
379
packages/llm/src/embedding-provider.ts
Normal file
379
packages/llm/src/embedding-provider.ts
Normal file
|
|
@ -0,0 +1,379 @@
|
|||
import { createHash } from 'node:crypto';
|
||||
import { spawn } from 'node:child_process';
|
||||
import { join } from 'node:path';
|
||||
import OpenAI from 'openai';
|
||||
import type { KloEmbeddingConfig, KloEmbeddingProvider } from './types.js';
|
||||
|
||||
type FetchFn = typeof fetch;
|
||||
type SentenceTransformersCommand = 'embedding-compute' | 'embedding-compute-bulk';
|
||||
type SentenceTransformersJsonRunner = (
|
||||
subcommand: SentenceTransformersCommand,
|
||||
payload: Record<string, unknown>,
|
||||
) => Promise<Record<string, unknown>>;
|
||||
type SentenceTransformersProcessCommand = { command: string; args: string[] };
|
||||
|
||||
export interface KloEmbeddingProviderDeps {
|
||||
createOpenAIClient?: (options: { apiKey?: string; baseURL?: string }) => {
|
||||
embeddings: {
|
||||
create(input: {
|
||||
model: string;
|
||||
input: string | string[];
|
||||
dimensions: number;
|
||||
encoding_format: 'float';
|
||||
}): Promise<{ data: Array<{ index?: number; embedding: number[] }>; usage?: { total_tokens?: number } }>;
|
||||
};
|
||||
};
|
||||
fetch?: FetchFn;
|
||||
runSentenceTransformersJson?: SentenceTransformersJsonRunner;
|
||||
sentenceTransformersCommand?: string;
|
||||
sentenceTransformersArgs?: string[];
|
||||
sentenceTransformersCwd?: string;
|
||||
sentenceTransformersEnv?: NodeJS.ProcessEnv;
|
||||
}
|
||||
|
||||
const DEFAULT_BATCH_SIZE = 100;
|
||||
|
||||
function deterministicVector(text: string, dimensions: number): number[] {
|
||||
const digest = createHash('sha256').update(text).digest();
|
||||
return Array.from({ length: dimensions }, (_, index) => {
|
||||
const byte = digest[index % digest.length];
|
||||
return Number(((byte / 255) * 2 - 1).toFixed(6));
|
||||
});
|
||||
}
|
||||
|
||||
function assertNonEmptyText(text: string): void {
|
||||
if (!text.trim()) {
|
||||
throw new Error('Embedding text must be non-empty');
|
||||
}
|
||||
}
|
||||
|
||||
function assertBatchSize(texts: string[], maxBatchSize: number): void {
|
||||
if (texts.length === 0) {
|
||||
throw new Error('Embedding text batch must not be empty');
|
||||
}
|
||||
if (texts.length > maxBatchSize) {
|
||||
throw new Error(`Embedding batch size ${texts.length} exceeds maximum ${maxBatchSize}`);
|
||||
}
|
||||
for (const text of texts) {
|
||||
assertNonEmptyText(text);
|
||||
}
|
||||
}
|
||||
|
||||
function assertVectorDimensions(vector: number[], expected: number, backend: string): number[] {
|
||||
if (!Array.isArray(vector) || vector.some((item) => typeof item !== 'number')) {
|
||||
throw new Error(`Embedding provider ${backend} returned a malformed vector`);
|
||||
}
|
||||
if (vector.length !== expected) {
|
||||
throw new Error(
|
||||
`Embedding provider ${backend} returned vector with ${vector.length} dimensions; expected ${expected}`,
|
||||
);
|
||||
}
|
||||
return vector;
|
||||
}
|
||||
|
||||
function joinUrl(baseURL: string, pathPrefix: string, path: string): string {
|
||||
const base = baseURL.replace(/\/+$/, '');
|
||||
const prefix = pathPrefix.replace(/^\/+|\/+$/g, '');
|
||||
const suffix = path.replace(/^\/+/, '');
|
||||
return prefix ? `${base}/${prefix}/${suffix}` : `${base}/${suffix}`;
|
||||
}
|
||||
|
||||
function errorText(error: unknown): string {
|
||||
if (error instanceof Error) {
|
||||
return error.cause
|
||||
? `${error.name}: ${error.message}; cause: ${errorText(error.cause)}`
|
||||
: `${error.name}: ${error.message}`;
|
||||
}
|
||||
return String(error);
|
||||
}
|
||||
|
||||
function parseJsonObject(raw: string, subcommand: string): Record<string, unknown> {
|
||||
const parsed = JSON.parse(raw) as unknown;
|
||||
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
||||
throw new Error(`klo-daemon ${subcommand} returned non-object JSON`);
|
||||
}
|
||||
return parsed as Record<string, unknown>;
|
||||
}
|
||||
|
||||
function isCommandNotFound(error: unknown): boolean {
|
||||
return (
|
||||
error instanceof Error &&
|
||||
('code' in error || 'errno' in error) &&
|
||||
((error as { code?: unknown }).code === 'ENOENT' || (error as { errno?: unknown }).errno === 'ENOENT')
|
||||
);
|
||||
}
|
||||
|
||||
function defaultSentenceTransformersProcessCommands(): SentenceTransformersProcessCommand[] {
|
||||
const venvBin =
|
||||
process.platform === 'win32' ? join('.venv', 'Scripts', 'klo-daemon.exe') : join('.venv', 'bin', 'klo-daemon');
|
||||
const repoVenvBin =
|
||||
process.platform === 'win32'
|
||||
? join('klo', '.venv', 'Scripts', 'klo-daemon.exe')
|
||||
: join('klo', '.venv', 'bin', 'klo-daemon');
|
||||
return [
|
||||
{ command: 'klo-daemon', args: [] },
|
||||
{ command: venvBin, args: [] },
|
||||
{ command: repoVenvBin, args: [] },
|
||||
];
|
||||
}
|
||||
|
||||
function runSentenceTransformersProcessCommand(
|
||||
options: SentenceTransformersProcessCommand & {
|
||||
cwd?: string;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
},
|
||||
): SentenceTransformersJsonRunner {
|
||||
return async (
|
||||
subcommand: SentenceTransformersCommand,
|
||||
payload: Record<string, unknown>,
|
||||
): Promise<Record<string, unknown>> =>
|
||||
new Promise((resolve, reject) => {
|
||||
const child = spawn(options.command, [...options.args, subcommand], {
|
||||
cwd: options.cwd,
|
||||
env: { ...process.env, ...options.env },
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
});
|
||||
const stdout: Buffer[] = [];
|
||||
const stderr: Buffer[] = [];
|
||||
|
||||
child.stdout.on('data', (chunk: Buffer) => stdout.push(chunk));
|
||||
child.stderr.on('data', (chunk: Buffer) => stderr.push(chunk));
|
||||
child.on('error', reject);
|
||||
child.on('close', (code) => {
|
||||
const stdoutText = Buffer.concat(stdout).toString('utf8').trim();
|
||||
const stderrText = Buffer.concat(stderr).toString('utf8').trim();
|
||||
if (code !== 0) {
|
||||
reject(new Error(`klo-daemon ${subcommand} failed: ${stderrText || `exit code ${code}`}`));
|
||||
return;
|
||||
}
|
||||
try {
|
||||
resolve(parseJsonObject(stdoutText, subcommand));
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
child.stdin.end(`${JSON.stringify(payload)}\n`);
|
||||
});
|
||||
}
|
||||
|
||||
function runSentenceTransformersProcessJson(options: {
|
||||
commands: SentenceTransformersProcessCommand[];
|
||||
cwd?: string;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): SentenceTransformersJsonRunner {
|
||||
return async (
|
||||
subcommand: SentenceTransformersCommand,
|
||||
payload: Record<string, unknown>,
|
||||
): Promise<Record<string, unknown>> => {
|
||||
const errors: string[] = [];
|
||||
for (const command of options.commands) {
|
||||
try {
|
||||
return await runSentenceTransformersProcessCommand({
|
||||
...command,
|
||||
cwd: options.cwd,
|
||||
env: options.env,
|
||||
})(subcommand, payload);
|
||||
} catch (error) {
|
||||
errors.push(`${command.command}: ${errorText(error)}`);
|
||||
if (!isCommandNotFound(error)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
throw new Error(`klo-daemon ${subcommand} failed: ${errors.join('; ')}`);
|
||||
};
|
||||
}
|
||||
|
||||
class DeterministicEmbeddingProvider implements KloEmbeddingProvider {
|
||||
readonly maxBatchSize: number;
|
||||
|
||||
constructor(readonly dimensions: number, batchSize = DEFAULT_BATCH_SIZE) {
|
||||
this.maxBatchSize = batchSize;
|
||||
}
|
||||
|
||||
async embed(text: string): Promise<number[]> {
|
||||
assertNonEmptyText(text);
|
||||
return deterministicVector(text, this.dimensions);
|
||||
}
|
||||
|
||||
async embedMany(texts: string[]): Promise<number[][]> {
|
||||
assertBatchSize(texts, this.maxBatchSize);
|
||||
return texts.map((text) => deterministicVector(text, this.dimensions));
|
||||
}
|
||||
}
|
||||
|
||||
class OpenAIEmbeddingProvider implements KloEmbeddingProvider {
|
||||
readonly dimensions: number;
|
||||
readonly maxBatchSize: number;
|
||||
private readonly client: ReturnType<NonNullable<KloEmbeddingProviderDeps['createOpenAIClient']>>;
|
||||
|
||||
constructor(
|
||||
private readonly config: KloEmbeddingConfig,
|
||||
deps: KloEmbeddingProviderDeps,
|
||||
) {
|
||||
this.dimensions = config.dimensions;
|
||||
this.maxBatchSize = config.batchSize ?? DEFAULT_BATCH_SIZE;
|
||||
if (!config.openai?.apiKey) {
|
||||
throw new Error('openai.apiKey is required when KLO embedding backend is openai');
|
||||
}
|
||||
this.client = deps.createOpenAIClient
|
||||
? deps.createOpenAIClient({ apiKey: config.openai.apiKey, baseURL: config.openai.baseURL })
|
||||
: new OpenAI({
|
||||
apiKey: config.openai.apiKey,
|
||||
...(config.openai.baseURL ? { baseURL: config.openai.baseURL } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
async embed(text: string): Promise<number[]> {
|
||||
const [embedding] = await this.embedMany([text]);
|
||||
if (!embedding) {
|
||||
throw new Error('Embedding provider openai returned no embedding');
|
||||
}
|
||||
return embedding;
|
||||
}
|
||||
|
||||
async embedMany(texts: string[]): Promise<number[][]> {
|
||||
assertBatchSize(texts, this.maxBatchSize);
|
||||
const response = await this.client.embeddings.create({
|
||||
model: this.config.model,
|
||||
input: texts.length === 1 ? texts[0] : texts,
|
||||
dimensions: this.dimensions,
|
||||
encoding_format: 'float',
|
||||
});
|
||||
const sorted = [...response.data].sort((a, b) => (a.index ?? 0) - (b.index ?? 0));
|
||||
const embeddings = sorted.map((item) => item.embedding);
|
||||
if (embeddings.length !== texts.length) {
|
||||
throw new Error(`Embedding provider openai returned ${embeddings.length} embeddings for ${texts.length} texts`);
|
||||
}
|
||||
return embeddings.map((embedding) => assertVectorDimensions(embedding, this.dimensions, 'openai'));
|
||||
}
|
||||
}
|
||||
|
||||
class SentenceTransformersEmbeddingProvider implements KloEmbeddingProvider {
|
||||
readonly dimensions: number;
|
||||
readonly maxBatchSize: number;
|
||||
private readonly fetch: FetchFn;
|
||||
private readonly baseURL: string;
|
||||
private readonly pathPrefix: string;
|
||||
private readonly runJson: SentenceTransformersJsonRunner;
|
||||
private readonly startupProbe: Promise<void>;
|
||||
private useProcessRunner = false;
|
||||
|
||||
constructor(config: KloEmbeddingConfig, deps: KloEmbeddingProviderDeps) {
|
||||
if (!config.sentenceTransformers?.baseURL) {
|
||||
throw new Error('sentenceTransformers.baseURL is required when KLO embedding backend is sentence-transformers');
|
||||
}
|
||||
this.dimensions = config.dimensions;
|
||||
this.maxBatchSize = config.batchSize ?? DEFAULT_BATCH_SIZE;
|
||||
this.fetch = deps.fetch ?? fetch;
|
||||
this.baseURL = config.sentenceTransformers.baseURL;
|
||||
this.pathPrefix = config.sentenceTransformers.pathPrefix ?? '/api';
|
||||
this.runJson =
|
||||
deps.runSentenceTransformersJson ??
|
||||
runSentenceTransformersProcessJson({
|
||||
commands: deps.sentenceTransformersCommand
|
||||
? [{ command: deps.sentenceTransformersCommand, args: deps.sentenceTransformersArgs ?? [] }]
|
||||
: defaultSentenceTransformersProcessCommands(),
|
||||
cwd: deps.sentenceTransformersCwd,
|
||||
env: deps.sentenceTransformersEnv,
|
||||
});
|
||||
this.startupProbe = this.requestSingle('__klo_embedding_probe__').then((embedding) => {
|
||||
assertVectorDimensions(embedding, this.dimensions, 'sentence-transformers');
|
||||
});
|
||||
}
|
||||
|
||||
async embed(text: string): Promise<number[]> {
|
||||
assertNonEmptyText(text);
|
||||
await this.startupProbe;
|
||||
return assertVectorDimensions(await this.requestSingle(text), this.dimensions, 'sentence-transformers');
|
||||
}
|
||||
|
||||
async embedMany(texts: string[]): Promise<number[][]> {
|
||||
assertBatchSize(texts, this.maxBatchSize);
|
||||
await this.startupProbe;
|
||||
const response = await this.requestJson('embedding-compute-bulk', '/embeddings/compute-bulk', { texts });
|
||||
if (
|
||||
!response ||
|
||||
typeof response !== 'object' ||
|
||||
!('embeddings' in response) ||
|
||||
!Array.isArray(response.embeddings)
|
||||
) {
|
||||
throw new Error('Embedding provider sentence-transformers returned malformed bulk response');
|
||||
}
|
||||
if (response.embeddings.length !== texts.length) {
|
||||
const count = response.embeddings.length;
|
||||
throw new Error(
|
||||
`Embedding provider sentence-transformers returned ${count} embeddings for ${texts.length} texts`,
|
||||
);
|
||||
}
|
||||
return response.embeddings.map((embedding: unknown) =>
|
||||
assertVectorDimensions(embedding as number[], this.dimensions, 'sentence-transformers'),
|
||||
);
|
||||
}
|
||||
|
||||
private async requestSingle(text: string): Promise<number[]> {
|
||||
const response = await this.requestJson('embedding-compute', '/embeddings/compute', { text });
|
||||
if (!response || typeof response !== 'object' || !('embedding' in response) || !Array.isArray(response.embedding)) {
|
||||
throw new Error('Embedding provider sentence-transformers returned malformed single response');
|
||||
}
|
||||
return response.embedding;
|
||||
}
|
||||
|
||||
private async requestJson(
|
||||
command: SentenceTransformersCommand,
|
||||
path: string,
|
||||
body: Record<string, unknown>,
|
||||
): Promise<Record<string, unknown>> {
|
||||
if (this.useProcessRunner) {
|
||||
return this.runJson(command, body);
|
||||
}
|
||||
|
||||
try {
|
||||
return await this.postJson(path, body);
|
||||
} catch (httpError) {
|
||||
try {
|
||||
const response = await this.runJson(command, body);
|
||||
this.useProcessRunner = true;
|
||||
return response;
|
||||
} catch (processError) {
|
||||
throw new Error(
|
||||
`Embedding provider sentence-transformers local HTTP request failed (${errorText(
|
||||
httpError,
|
||||
)}) and klo-daemon fallback failed (${errorText(processError)})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async postJson(path: string, body: Record<string, unknown>): Promise<Record<string, unknown>> {
|
||||
const response = await this.fetch(joinUrl(this.baseURL, this.pathPrefix, path), {
|
||||
method: 'POST',
|
||||
headers: { 'content-type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`Embedding provider sentence-transformers request failed with HTTP ${response.status}`);
|
||||
}
|
||||
const parsed = (await response.json()) as unknown;
|
||||
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
||||
throw new Error('Embedding provider sentence-transformers returned non-object JSON');
|
||||
}
|
||||
return parsed as Record<string, unknown>;
|
||||
}
|
||||
}
|
||||
|
||||
export function createKloEmbeddingProvider(
|
||||
config: KloEmbeddingConfig,
|
||||
deps: KloEmbeddingProviderDeps = {},
|
||||
): KloEmbeddingProvider {
|
||||
switch (config.backend) {
|
||||
case 'deterministic':
|
||||
return new DeterministicEmbeddingProvider(config.dimensions, config.batchSize);
|
||||
case 'openai':
|
||||
return new OpenAIEmbeddingProvider(config, deps);
|
||||
case 'sentence-transformers':
|
||||
return new SentenceTransformersEmbeddingProvider(config, deps);
|
||||
default:
|
||||
throw new Error(`Unsupported KLO embedding backend: ${String((config as { backend?: string }).backend)}`);
|
||||
}
|
||||
}
|
||||
30
packages/llm/src/index.ts
Normal file
30
packages/llm/src/index.ts
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
export { createKloEmbeddingProvider } from './embedding-provider.js';
|
||||
export { runKloEmbeddingHealthCheck } from './embedding-health.js';
|
||||
export { KloMessageBuilder } from './message-builder.js';
|
||||
export type { KloEmbeddingHealthCheckOptions, KloEmbeddingHealthCheckResult } from './embedding-health.js';
|
||||
export type { KloEmbeddingProviderDeps } from './embedding-provider.js';
|
||||
export type { KloLlmHealthCheckDeps, KloLlmHealthCheckOptions, KloLlmHealthCheckResult } from './model-health.js';
|
||||
export { runKloLlmHealthCheck } from './model-health.js';
|
||||
export {
|
||||
createKloLlmProvider,
|
||||
isAnthropicProtocolModel,
|
||||
modelIdFromLanguageModel,
|
||||
type KloLlmProviderFactoryDeps,
|
||||
} from './model-provider.js';
|
||||
export type {
|
||||
KloEmbeddingBackend,
|
||||
KloEmbeddingConfig,
|
||||
KloEmbeddingProvider,
|
||||
KloEmbeddingTokenUsageEvent,
|
||||
KloJsonValue,
|
||||
KloLlmBackend,
|
||||
KloLlmConfig,
|
||||
KloLlmProvider,
|
||||
KloModelRole,
|
||||
KloPromptCacheTtl,
|
||||
KloPromptCachingConfig,
|
||||
KloPromptParts,
|
||||
KloProviderOptions,
|
||||
KloTokenUsageEvent,
|
||||
} from './types.js';
|
||||
export { KLO_MODEL_ROLES } from './types.js';
|
||||
113
packages/llm/src/message-builder.test.ts
Normal file
113
packages/llm/src/message-builder.test.ts
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
import type { ModelMessage } from 'ai';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { KloMessageBuilder } from './message-builder.js';
|
||||
import { createKloLlmProvider } from './model-provider.js';
|
||||
|
||||
function makeBuilder(overrides: Parameters<typeof createKloLlmProvider>[0]['promptCaching'] = {}) {
|
||||
const provider = createKloLlmProvider({
|
||||
backend: 'gateway',
|
||||
gateway: { baseURL: 'https://gateway.test' },
|
||||
modelSlots: { default: 'anthropic/claude-sonnet-4-6' },
|
||||
promptCaching: { enabled: true, ...overrides },
|
||||
});
|
||||
return new KloMessageBuilder(provider);
|
||||
}
|
||||
|
||||
describe('KloMessageBuilder.build', () => {
|
||||
it('caches static system, last sorted tool, and last history message', () => {
|
||||
const builder = makeBuilder();
|
||||
|
||||
const out = builder.build({
|
||||
parts: { staticSystem: 'STATIC', dynamicSystem: 'DYNAMIC' },
|
||||
history: [
|
||||
{ role: 'user', content: 'first' },
|
||||
{ role: 'assistant', content: [{ type: 'text', text: 'reply A' }, { type: 'text', text: 'reply B' }] } as ModelMessage,
|
||||
],
|
||||
currentMessage: { role: 'user', content: 'now' },
|
||||
tools: {
|
||||
zoo: { description: 'z' },
|
||||
apple: { description: 'a' },
|
||||
},
|
||||
model: 'anthropic/claude-sonnet-4-6',
|
||||
});
|
||||
|
||||
expect(out.messages[0]).toMatchObject({
|
||||
role: 'system',
|
||||
content: 'STATIC',
|
||||
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } },
|
||||
});
|
||||
expect(out.messages[1]).toMatchObject({ role: 'system', content: 'DYNAMIC' });
|
||||
expect((out.messages[1] as { providerOptions?: unknown }).providerOptions).toBeUndefined();
|
||||
expect((out.messages[3] as { content: Array<{ providerOptions?: unknown }> }).content[1].providerOptions).toEqual({
|
||||
anthropic: { cacheControl: { type: 'ephemeral', ttl: '5m' } },
|
||||
});
|
||||
expect(Object.keys(out.tools)).toEqual(['apple', 'zoo']);
|
||||
expect((out.tools.zoo as { providerOptions?: unknown }).providerOptions).toEqual({
|
||||
anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } },
|
||||
});
|
||||
});
|
||||
|
||||
it('wraps leading user context onto currentMessage as a system reminder part', () => {
|
||||
const builder = makeBuilder();
|
||||
|
||||
const out = builder.build({
|
||||
parts: { staticSystem: 'STATIC', leadingUserContext: 'current_date: 2026-05-04' },
|
||||
history: [],
|
||||
currentMessage: { role: 'user', content: 'question' },
|
||||
tools: {},
|
||||
model: 'anthropic/claude-sonnet-4-6',
|
||||
});
|
||||
|
||||
expect(out.messages[out.messages.length - 1]).toMatchObject({
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: '<system-reminder>\ncurrent_date: 2026-05-04\n</system-reminder>' },
|
||||
{ type: 'text', text: 'question' },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('omits cache markers for non-Anthropic protocol models', () => {
|
||||
const builder = makeBuilder();
|
||||
|
||||
const out = builder.wrapSimple({
|
||||
system: 'SYS',
|
||||
messages: [{ role: 'user', content: 'q' }],
|
||||
tools: { z: {} },
|
||||
model: 'gpt-5',
|
||||
});
|
||||
|
||||
expect((out.messages[0] as { providerOptions?: unknown }).providerOptions).toBeUndefined();
|
||||
expect((out.tools.z as { providerOptions?: unknown }).providerOptions).toBeUndefined();
|
||||
});
|
||||
|
||||
it('clamps every TTL to 5m for Vertex when vertexFallbackTo5m is enabled', () => {
|
||||
const provider = createKloLlmProvider({
|
||||
backend: 'vertex',
|
||||
vertex: { project: 'klo-test', location: 'us-east5' },
|
||||
modelSlots: { default: 'claude-sonnet-4-6' },
|
||||
promptCaching: {
|
||||
enabled: true,
|
||||
systemTtl: '1h',
|
||||
toolsTtl: '1h',
|
||||
historyTtl: '1h',
|
||||
vertexFallbackTo5m: true,
|
||||
},
|
||||
});
|
||||
const builder = new KloMessageBuilder(provider);
|
||||
|
||||
const out = builder.build({
|
||||
parts: { staticSystem: 'STATIC' },
|
||||
history: [{ role: 'user', content: 'history' }],
|
||||
currentMessage: { role: 'user', content: 'now' },
|
||||
tools: { z: {} },
|
||||
model: 'claude-sonnet-4-6',
|
||||
});
|
||||
|
||||
expect((out.messages[0] as { providerOptions: any }).providerOptions.anthropic.cacheControl.ttl).toBe('5m');
|
||||
expect((out.messages[1] as { content: Array<{ providerOptions: any }> }).content[0].providerOptions.anthropic.cacheControl.ttl).toBe(
|
||||
'5m',
|
||||
);
|
||||
expect((out.tools.z as { providerOptions: any }).providerOptions.anthropic.cacheControl.ttl).toBe('5m');
|
||||
});
|
||||
});
|
||||
197
packages/llm/src/message-builder.ts
Normal file
197
packages/llm/src/message-builder.ts
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
import type { LanguageModel, ModelMessage, ToolSet } from 'ai';
|
||||
import { isAnthropicProtocolModel } from './model-provider.js';
|
||||
import type { KloLlmProvider, KloPromptCacheTtl, KloPromptParts } from './types.js';
|
||||
|
||||
type ToolMap = ToolSet | Record<string, Record<string, unknown>>;
|
||||
|
||||
interface KloMessageBuilderOptions {
|
||||
cacheSystem?: boolean;
|
||||
cacheTools?: boolean;
|
||||
cacheLastHistory?: boolean;
|
||||
}
|
||||
|
||||
interface KloBuildInput {
|
||||
parts: KloPromptParts;
|
||||
history: ModelMessage[];
|
||||
currentMessage: ModelMessage;
|
||||
tools: ToolMap;
|
||||
model: LanguageModel | string;
|
||||
}
|
||||
|
||||
interface KloWrapSimpleInput {
|
||||
system?: string;
|
||||
messages?: ModelMessage[];
|
||||
tools?: ToolMap;
|
||||
model: LanguageModel | string;
|
||||
}
|
||||
|
||||
interface KloBuildOutput {
|
||||
messages: ModelMessage[];
|
||||
tools: ToolMap;
|
||||
}
|
||||
|
||||
export class KloMessageBuilder {
|
||||
constructor(
|
||||
private readonly provider: KloLlmProvider,
|
||||
private readonly options: KloMessageBuilderOptions = {},
|
||||
) {}
|
||||
|
||||
build(input: KloBuildInput): KloBuildOutput {
|
||||
const cfg = this.provider.promptCachingConfig();
|
||||
const cachingActive = cfg.enabled && isAnthropicProtocolModel(input.model);
|
||||
const ttls = this.resolveTtls(input.model);
|
||||
const messages: ModelMessage[] = [];
|
||||
|
||||
const systemMessage: ModelMessage & { providerOptions?: unknown } = {
|
||||
role: 'system',
|
||||
content: input.parts.staticSystem,
|
||||
};
|
||||
if (cachingActive && this.cacheSystemEnabled()) {
|
||||
systemMessage.providerOptions = this.provider.cacheMarker(ttls.systemTtl, input.model);
|
||||
}
|
||||
messages.push(systemMessage);
|
||||
|
||||
if (input.parts.dynamicSystem) {
|
||||
messages.push({ role: 'system', content: input.parts.dynamicSystem });
|
||||
}
|
||||
|
||||
const historyToEmit =
|
||||
cachingActive && this.cacheHistoryEnabled()
|
||||
? this.markLastHistoryMessage(input.history, ttls.historyTtl, input.model)
|
||||
: input.history;
|
||||
messages.push(...historyToEmit);
|
||||
messages.push(this.wrapLeading(input.currentMessage, input.parts.leadingUserContext));
|
||||
|
||||
return {
|
||||
messages,
|
||||
tools: this.sortAndMarkTools(input.tools, cachingActive, this.cacheToolsEnabled(), ttls.toolsTtl, input.model),
|
||||
};
|
||||
}
|
||||
|
||||
wrapSimple(input: KloWrapSimpleInput): KloBuildOutput {
|
||||
const cfg = this.provider.promptCachingConfig();
|
||||
const cachingActive = cfg.enabled && isAnthropicProtocolModel(input.model);
|
||||
const ttls = this.resolveTtls(input.model);
|
||||
const messages: ModelMessage[] = [];
|
||||
|
||||
if (input.system) {
|
||||
const systemMessage: ModelMessage & { providerOptions?: unknown } = {
|
||||
role: 'system',
|
||||
content: input.system,
|
||||
};
|
||||
if (cachingActive && this.cacheSystemEnabled()) {
|
||||
systemMessage.providerOptions = this.provider.cacheMarker(ttls.systemTtl, input.model);
|
||||
}
|
||||
messages.push(systemMessage);
|
||||
}
|
||||
|
||||
if (input.messages) {
|
||||
messages.push(
|
||||
...(cachingActive && this.cacheHistoryEnabled()
|
||||
? this.markLastHistoryMessage(input.messages, ttls.historyTtl, input.model)
|
||||
: input.messages),
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
messages,
|
||||
tools: this.sortAndMarkTools(input.tools ?? {}, cachingActive, this.cacheToolsEnabled(), ttls.toolsTtl, input.model),
|
||||
};
|
||||
}
|
||||
|
||||
private cacheSystemEnabled(): boolean {
|
||||
return this.options.cacheSystem ?? this.provider.promptCachingConfig().cacheSystem;
|
||||
}
|
||||
|
||||
private cacheToolsEnabled(): boolean {
|
||||
return this.options.cacheTools ?? this.provider.promptCachingConfig().cacheTools;
|
||||
}
|
||||
|
||||
private cacheHistoryEnabled(): boolean {
|
||||
return this.options.cacheLastHistory ?? this.provider.promptCachingConfig().cacheHistory;
|
||||
}
|
||||
|
||||
private resolveTtls(model: LanguageModel | string): {
|
||||
systemTtl: KloPromptCacheTtl;
|
||||
toolsTtl: KloPromptCacheTtl;
|
||||
historyTtl: KloPromptCacheTtl;
|
||||
} {
|
||||
const cfg = this.provider.promptCachingConfig();
|
||||
if (cfg.vertexFallbackTo5m && this.provider.activeBackend() === 'vertex' && isAnthropicProtocolModel(model)) {
|
||||
return { systemTtl: '5m', toolsTtl: '5m', historyTtl: '5m' };
|
||||
}
|
||||
return { systemTtl: cfg.systemTtl, toolsTtl: cfg.toolsTtl, historyTtl: cfg.historyTtl };
|
||||
}
|
||||
|
||||
private wrapLeading(currentMessage: ModelMessage, leadingUserContext?: string): ModelMessage {
|
||||
if (!leadingUserContext) {
|
||||
return currentMessage;
|
||||
}
|
||||
const reminderPart = {
|
||||
type: 'text' as const,
|
||||
text: `<system-reminder>\n${leadingUserContext}\n</system-reminder>`,
|
||||
};
|
||||
if (typeof currentMessage.content === 'string') {
|
||||
return {
|
||||
...currentMessage,
|
||||
content: [reminderPart, { type: 'text' as const, text: currentMessage.content }],
|
||||
} as ModelMessage;
|
||||
}
|
||||
if (Array.isArray(currentMessage.content)) {
|
||||
return { ...currentMessage, content: [reminderPart, ...currentMessage.content] } as ModelMessage;
|
||||
}
|
||||
return currentMessage;
|
||||
}
|
||||
|
||||
private markLastHistoryMessage(
|
||||
history: ModelMessage[],
|
||||
ttl: KloPromptCacheTtl,
|
||||
model: LanguageModel | string,
|
||||
): ModelMessage[] {
|
||||
if (history.length === 0) {
|
||||
return history;
|
||||
}
|
||||
const out = [...history];
|
||||
const last = out[out.length - 1];
|
||||
const marker = this.provider.cacheMarker(ttl, model);
|
||||
if (!marker) {
|
||||
return history;
|
||||
}
|
||||
if (typeof last.content === 'string') {
|
||||
out[out.length - 1] = {
|
||||
...last,
|
||||
content: [{ type: 'text', text: last.content, providerOptions: marker }],
|
||||
} as ModelMessage;
|
||||
return out;
|
||||
}
|
||||
if (Array.isArray(last.content) && last.content.length > 0) {
|
||||
const parts = [...last.content];
|
||||
const lastPart = parts[parts.length - 1];
|
||||
parts[parts.length - 1] = Object.assign({}, lastPart, { providerOptions: marker });
|
||||
out[out.length - 1] = { ...last, content: parts } as ModelMessage;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
private sortAndMarkTools(
|
||||
tools: ToolMap,
|
||||
cachingActive: boolean,
|
||||
cacheTools: boolean,
|
||||
ttl: KloPromptCacheTtl,
|
||||
model: LanguageModel | string,
|
||||
): ToolMap {
|
||||
const keys = Object.keys(tools).sort();
|
||||
const sorted: Record<string, unknown> = {};
|
||||
for (const key of keys) {
|
||||
sorted[key] = tools[key as keyof typeof tools];
|
||||
}
|
||||
if (cachingActive && cacheTools && keys.length > 0) {
|
||||
const lastKey = keys[keys.length - 1];
|
||||
const marker = this.provider.cacheMarker(ttl, model);
|
||||
if (marker) {
|
||||
sorted[lastKey] = { ...(sorted[lastKey] as Record<string, unknown>), providerOptions: marker };
|
||||
}
|
||||
}
|
||||
return sorted as ToolMap;
|
||||
}
|
||||
}
|
||||
61
packages/llm/src/model-health.test.ts
Normal file
61
packages/llm/src/model-health.test.ts
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { runKloLlmHealthCheck } from './model-health.js';
|
||||
|
||||
const anthropicModel = { modelId: 'claude-sonnet-4-6' } as never;
|
||||
|
||||
describe('KLO LLM health check', () => {
|
||||
it('runs a minimal non-streaming model call through the configured provider', async () => {
|
||||
const generateText = vi.fn(async () => ({ text: 'ok' }));
|
||||
const createAnthropic = vi.fn(() => vi.fn(() => anthropicModel));
|
||||
|
||||
await expect(
|
||||
runKloLlmHealthCheck(
|
||||
{
|
||||
backend: 'anthropic',
|
||||
anthropic: { apiKey: 'sk-ant-test' },
|
||||
modelSlots: { default: 'claude-sonnet-4-6' },
|
||||
},
|
||||
{ deps: { createAnthropic, generateText } },
|
||||
),
|
||||
).resolves.toEqual({ ok: true });
|
||||
|
||||
expect(createAnthropic).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
apiKey: 'sk-ant-test',
|
||||
}),
|
||||
);
|
||||
expect(generateText).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
model: anthropicModel,
|
||||
prompt: 'Reply with exactly: ok',
|
||||
temperature: 0,
|
||||
maxOutputTokens: 8,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('returns a failed result without exposing secret values', async () => {
|
||||
const generateText = vi.fn(async () => {
|
||||
throw new Error('401 invalid x-api-key sk-ant-secret');
|
||||
});
|
||||
|
||||
await expect(
|
||||
runKloLlmHealthCheck(
|
||||
{
|
||||
backend: 'anthropic',
|
||||
anthropic: { apiKey: 'sk-ant-secret' },
|
||||
modelSlots: { default: 'claude-sonnet-4-6' },
|
||||
},
|
||||
{
|
||||
deps: {
|
||||
createAnthropic: vi.fn(() => vi.fn(() => anthropicModel)),
|
||||
generateText,
|
||||
},
|
||||
},
|
||||
),
|
||||
).resolves.toEqual({
|
||||
ok: false,
|
||||
message: '401 invalid x-api-key [redacted]',
|
||||
});
|
||||
});
|
||||
});
|
||||
60
packages/llm/src/model-health.ts
Normal file
60
packages/llm/src/model-health.ts
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
import { generateText } from 'ai';
|
||||
import { createKloLlmProvider, type KloLlmProviderFactoryDeps } from './model-provider.js';
|
||||
import type { KloLlmConfig } from './types.js';
|
||||
|
||||
export type KloLlmHealthCheckResult = { ok: true } | { ok: false; message: string };
|
||||
|
||||
export interface KloLlmHealthCheckDeps extends Omit<KloLlmProviderFactoryDeps, 'generateText'> {
|
||||
generateText?: (options: Parameters<typeof generateText>[0]) => Promise<unknown>;
|
||||
}
|
||||
|
||||
export interface KloLlmHealthCheckOptions {
|
||||
prompt?: string;
|
||||
timeoutMs?: number;
|
||||
deps?: KloLlmHealthCheckDeps;
|
||||
}
|
||||
|
||||
function redactHealthCheckMessage(message: string, config: KloLlmConfig): string {
|
||||
const secrets = [config.anthropic?.apiKey, config.gateway?.apiKey].filter(
|
||||
(value): value is string => typeof value === 'string' && value.length > 0,
|
||||
);
|
||||
return secrets.reduce((current, secret) => current.split(secret).join('[redacted]'), message);
|
||||
}
|
||||
|
||||
async function withTimeout<T>(promise: Promise<T>, timeoutMs: number): Promise<T> {
|
||||
let timeout: NodeJS.Timeout | undefined;
|
||||
const timeoutPromise = new Promise<never>((_resolve, reject) => {
|
||||
timeout = setTimeout(() => reject(new Error(`LLM health check timed out after ${timeoutMs}ms`)), timeoutMs);
|
||||
});
|
||||
try {
|
||||
return await Promise.race([promise, timeoutPromise]);
|
||||
} finally {
|
||||
if (timeout) {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function runKloLlmHealthCheck(
|
||||
config: KloLlmConfig,
|
||||
options: KloLlmHealthCheckOptions = {},
|
||||
): Promise<KloLlmHealthCheckResult> {
|
||||
try {
|
||||
const { generateText: runGenerateTextOverride, ...providerDeps } = options.deps ?? {};
|
||||
const provider = createKloLlmProvider(config, providerDeps);
|
||||
const runGenerateText = runGenerateTextOverride ?? generateText;
|
||||
await withTimeout(
|
||||
runGenerateText({
|
||||
model: provider.getModel('default'),
|
||||
prompt: options.prompt ?? 'Reply with exactly: ok',
|
||||
temperature: 0,
|
||||
maxOutputTokens: 8,
|
||||
}),
|
||||
options.timeoutMs ?? 15_000,
|
||||
);
|
||||
return { ok: true };
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return { ok: false, message: redactHealthCheckMessage(message, config) };
|
||||
}
|
||||
}
|
||||
173
packages/llm/src/model-provider.test.ts
Normal file
173
packages/llm/src/model-provider.test.ts
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
import type { LanguageModel } from 'ai';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createKloLlmProvider } from './model-provider.js';
|
||||
|
||||
const languageModel = (modelId: string, provider = 'test'): LanguageModel => ({ modelId, provider }) as LanguageModel;
|
||||
|
||||
describe('createKloLlmProvider', () => {
|
||||
it('uses direct Anthropic with both beta headers', () => {
|
||||
const anthropicModel = languageModel('claude-sonnet-4-6', 'anthropic');
|
||||
const anthropic = vi.fn(() => anthropicModel);
|
||||
const createAnthropic = vi.fn(() => anthropic);
|
||||
|
||||
const provider = createKloLlmProvider(
|
||||
{
|
||||
backend: 'anthropic',
|
||||
anthropic: { apiKey: 'test-anthropic-key', baseURL: 'https://anthropic.test' }, // pragma: allowlist secret
|
||||
modelSlots: { default: 'claude-sonnet-4-6' },
|
||||
promptCaching: { enabled: false },
|
||||
},
|
||||
{ createAnthropic },
|
||||
);
|
||||
|
||||
expect(provider.getModel('default')).toBe(anthropicModel);
|
||||
expect(createAnthropic).toHaveBeenCalledWith({
|
||||
apiKey: 'test-anthropic-key', // pragma: allowlist secret
|
||||
baseURL: 'https://anthropic.test',
|
||||
headers: {
|
||||
'anthropic-beta': 'interleaved-thinking-2025-05-14,extended-cache-ttl-2025-04-11',
|
||||
},
|
||||
});
|
||||
expect(anthropic).toHaveBeenCalledWith('claude-sonnet-4-6');
|
||||
});
|
||||
|
||||
it('uses Vertex Anthropic without the direct-Anthropic beta header', () => {
|
||||
const vertexModel = languageModel('claude-sonnet-4-6', 'vertex');
|
||||
const vertex = vi.fn(() => vertexModel);
|
||||
const createVertexAnthropic = vi.fn(() => vertex);
|
||||
|
||||
const provider = createKloLlmProvider(
|
||||
{
|
||||
backend: 'vertex',
|
||||
vertex: { project: 'klo-test', location: 'us-east5' },
|
||||
modelSlots: { default: 'claude-sonnet-4-6' },
|
||||
promptCaching: { enabled: false },
|
||||
},
|
||||
{ createVertexAnthropic },
|
||||
);
|
||||
|
||||
expect(provider.getModel('default')).toBe(vertexModel);
|
||||
expect(createVertexAnthropic).toHaveBeenCalledWith({ project: 'klo-test', location: 'us-east5' });
|
||||
expect(vertex).toHaveBeenCalledWith('claude-sonnet-4-6');
|
||||
});
|
||||
|
||||
it('uses Gateway and supports role fallback to default', () => {
|
||||
const gatewayModel = languageModel('anthropic/claude-sonnet-4-6', 'gateway');
|
||||
const gateway = vi.fn(() => gatewayModel);
|
||||
const createGateway = vi.fn(() => gateway);
|
||||
|
||||
const provider = createKloLlmProvider(
|
||||
{
|
||||
backend: 'gateway',
|
||||
gateway: { apiKey: 'gateway-key', baseURL: 'https://gateway.test/v1' }, // pragma: allowlist secret
|
||||
modelSlots: { default: 'anthropic/claude-sonnet-4-6' },
|
||||
promptCaching: { enabled: false },
|
||||
},
|
||||
{ createGateway },
|
||||
);
|
||||
|
||||
expect(provider.getModel('curator')).toBe(gatewayModel);
|
||||
expect(createGateway).toHaveBeenCalledWith({
|
||||
apiKey: 'gateway-key', // pragma: allowlist secret
|
||||
baseURL: 'https://gateway.test/v1',
|
||||
});
|
||||
expect(gateway).toHaveBeenCalledWith('anthropic/claude-sonnet-4-6');
|
||||
});
|
||||
|
||||
it('uses explicit role overrides before default', () => {
|
||||
const anthropic = vi.fn((modelId: string) => languageModel(modelId, 'anthropic'));
|
||||
|
||||
const provider = createKloLlmProvider(
|
||||
{
|
||||
backend: 'anthropic',
|
||||
anthropic: { apiKey: 'test-anthropic-key' }, // pragma: allowlist secret
|
||||
modelSlots: {
|
||||
default: 'claude-sonnet-4-6',
|
||||
triage: 'claude-haiku-4-5',
|
||||
repair: 'claude-opus-4-7',
|
||||
},
|
||||
promptCaching: { enabled: false },
|
||||
},
|
||||
{ createAnthropic: vi.fn(() => anthropic) },
|
||||
);
|
||||
|
||||
expect((provider.getModel('triage') as { modelId: string }).modelId).toBe('claude-haiku-4-5');
|
||||
expect((provider.getModel('repair') as { modelId: string }).modelId).toBe('claude-opus-4-7');
|
||||
expect((provider.getModel('reconcile') as { modelId: string }).modelId).toBe('claude-sonnet-4-6');
|
||||
});
|
||||
|
||||
it('emits cache markers only when enabled and the model speaks Anthropic protocol', () => {
|
||||
const provider = createKloLlmProvider(
|
||||
{
|
||||
backend: 'gateway',
|
||||
gateway: { baseURL: 'https://gateway.test/v1' },
|
||||
modelSlots: { default: 'anthropic/claude-sonnet-4-6' },
|
||||
promptCaching: { enabled: true },
|
||||
},
|
||||
{ createGateway: vi.fn(() => vi.fn((modelId: string) => languageModel(modelId, 'gateway'))) },
|
||||
);
|
||||
|
||||
expect(provider.cacheMarker('1h', 'anthropic/claude-sonnet-4-6')).toEqual({
|
||||
anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } },
|
||||
});
|
||||
expect(provider.cacheMarker('1h', 'gpt-5')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns Anthropic thinking provider options', () => {
|
||||
const provider = createKloLlmProvider(
|
||||
{
|
||||
backend: 'anthropic',
|
||||
anthropic: { apiKey: 'test-anthropic-key' }, // pragma: allowlist secret
|
||||
modelSlots: { default: 'claude-sonnet-4-6' },
|
||||
promptCaching: { enabled: false },
|
||||
},
|
||||
{ createAnthropic: vi.fn(() => vi.fn((modelId: string) => languageModel(modelId, 'anthropic'))) },
|
||||
);
|
||||
|
||||
expect(provider.thinkingProviderOptions('default', 12000)).toEqual({
|
||||
anthropic: {
|
||||
thinking: { type: 'enabled', budgetTokens: 12000 },
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('defaults prompt caching to enabled with canonical TTLs', () => {
|
||||
const provider = createKloLlmProvider(
|
||||
{
|
||||
backend: 'gateway',
|
||||
gateway: { baseURL: 'https://gateway.test/v1' },
|
||||
modelSlots: { default: 'anthropic/claude-sonnet-4-6' },
|
||||
},
|
||||
{ createGateway: vi.fn(() => vi.fn((modelId: string) => languageModel(modelId, 'gateway'))) },
|
||||
);
|
||||
|
||||
expect(provider.promptCachingConfig()).toEqual({
|
||||
enabled: true,
|
||||
systemTtl: '1h',
|
||||
toolsTtl: '1h',
|
||||
historyTtl: '5m',
|
||||
cacheSystem: true,
|
||||
cacheTools: true,
|
||||
cacheHistory: true,
|
||||
vertexFallbackTo5m: false,
|
||||
});
|
||||
expect(provider.cacheMarker('1h', 'anthropic/claude-sonnet-4-6')).toEqual({
|
||||
anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } },
|
||||
});
|
||||
});
|
||||
|
||||
it('preserves explicit prompt caching opt-out', () => {
|
||||
const provider = createKloLlmProvider(
|
||||
{
|
||||
backend: 'anthropic',
|
||||
anthropic: { apiKey: 'test-anthropic-key' }, // pragma: allowlist secret
|
||||
modelSlots: { default: 'claude-sonnet-4-6' },
|
||||
promptCaching: { enabled: false },
|
||||
},
|
||||
{ createAnthropic: vi.fn(() => vi.fn((modelId: string) => languageModel(modelId, 'anthropic'))) },
|
||||
);
|
||||
|
||||
expect(provider.promptCachingConfig().enabled).toBe(false);
|
||||
expect(provider.cacheMarker('1h', 'claude-sonnet-4-6')).toBeUndefined();
|
||||
});
|
||||
});
|
||||
152
packages/llm/src/model-provider.ts
Normal file
152
packages/llm/src/model-provider.ts
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
import { createAnthropic } from '@ai-sdk/anthropic';
|
||||
import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic';
|
||||
import { createGateway, generateText, type LanguageModel } from 'ai';
|
||||
import { createKloToolCallRepairHandler } from './repair.js';
|
||||
import type {
|
||||
KloLlmConfig,
|
||||
KloLlmProvider,
|
||||
KloModelRole,
|
||||
KloPromptCacheTtl,
|
||||
KloPromptCachingConfig,
|
||||
KloProviderOptions,
|
||||
} from './types.js';
|
||||
|
||||
type AnthropicFactory = typeof createAnthropic;
|
||||
type AnthropicModelFactory = (modelId: string) => LanguageModel;
|
||||
type VertexAnthropicFactory = (options?: Parameters<typeof createVertexAnthropic>[0]) => AnthropicModelFactory;
|
||||
type GatewayFactory = (options?: Parameters<typeof createGateway>[0]) => AnthropicModelFactory;
|
||||
|
||||
export interface KloLlmProviderFactoryDeps {
|
||||
createAnthropic?: (options?: Parameters<AnthropicFactory>[0]) => AnthropicModelFactory;
|
||||
createVertexAnthropic?: VertexAnthropicFactory;
|
||||
createGateway?: GatewayFactory;
|
||||
generateText?: typeof generateText;
|
||||
}
|
||||
|
||||
const DEFAULT_PROMPT_CACHING: KloPromptCachingConfig = {
|
||||
enabled: true,
|
||||
systemTtl: '1h',
|
||||
toolsTtl: '1h',
|
||||
historyTtl: '5m',
|
||||
cacheSystem: true,
|
||||
cacheTools: true,
|
||||
cacheHistory: true,
|
||||
vertexFallbackTo5m: false,
|
||||
};
|
||||
|
||||
const DIRECT_ANTHROPIC_BETA_HEADER = 'interleaved-thinking-2025-05-14,extended-cache-ttl-2025-04-11';
|
||||
|
||||
function resolvePromptCaching(config: KloLlmConfig): KloPromptCachingConfig {
|
||||
return { ...DEFAULT_PROMPT_CACHING, ...config.promptCaching };
|
||||
}
|
||||
|
||||
export function modelIdFromLanguageModel(model: LanguageModel | string): string {
|
||||
return typeof model === 'string' ? model : ((model as { modelId?: string }).modelId ?? '');
|
||||
}
|
||||
|
||||
export function isAnthropicProtocolModel(model: LanguageModel | string): boolean {
|
||||
const modelId = modelIdFromLanguageModel(model);
|
||||
return modelId.startsWith('claude-') || modelId.startsWith('anthropic/') || modelId.includes('/claude-');
|
||||
}
|
||||
|
||||
class DefaultKloLlmProvider implements KloLlmProvider {
|
||||
private readonly promptCaching: KloPromptCachingConfig;
|
||||
private readonly getModelByResolvedName: (modelId: string) => LanguageModel;
|
||||
private readonly runGenerateText: typeof generateText;
|
||||
|
||||
constructor(
|
||||
private readonly config: KloLlmConfig,
|
||||
deps: KloLlmProviderFactoryDeps,
|
||||
) {
|
||||
this.promptCaching = resolvePromptCaching(config);
|
||||
this.runGenerateText = deps.generateText ?? generateText;
|
||||
this.getModelByResolvedName = this.createModelFactory(config, deps);
|
||||
}
|
||||
|
||||
getModel(role: KloModelRole): LanguageModel {
|
||||
return this.getModelByName(this.resolveRole(role));
|
||||
}
|
||||
|
||||
getModelByName(modelId: string): LanguageModel {
|
||||
return this.getModelByResolvedName(modelId);
|
||||
}
|
||||
|
||||
cacheMarker(ttl: KloPromptCacheTtl, model?: LanguageModel | string) {
|
||||
if (!this.promptCaching.enabled) {
|
||||
return undefined;
|
||||
}
|
||||
if (model && !isAnthropicProtocolModel(model)) {
|
||||
return undefined;
|
||||
}
|
||||
return { anthropic: { cacheControl: { type: 'ephemeral' as const, ttl } } };
|
||||
}
|
||||
|
||||
repairToolCallHandler(options: { source?: string } = {}) {
|
||||
return createKloToolCallRepairHandler({
|
||||
source: options.source ?? 'klo-llm',
|
||||
getRepairModel: () => this.getModel('repair'),
|
||||
generateText: this.runGenerateText,
|
||||
});
|
||||
}
|
||||
|
||||
thinkingProviderOptions(_role: KloModelRole, budgetTokens: number): KloProviderOptions {
|
||||
return {
|
||||
anthropic: {
|
||||
thinking: { type: 'enabled', budgetTokens },
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
telemetryConfig() {
|
||||
return this.config.telemetry?.experimentalTelemetry;
|
||||
}
|
||||
|
||||
promptCachingConfig(): KloPromptCachingConfig {
|
||||
return this.promptCaching;
|
||||
}
|
||||
|
||||
activeBackend() {
|
||||
return this.config.backend;
|
||||
}
|
||||
|
||||
private resolveRole(role: KloModelRole): string {
|
||||
return this.config.modelSlots[role] ?? this.config.modelSlots.default;
|
||||
}
|
||||
|
||||
private createModelFactory(config: KloLlmConfig, deps: KloLlmProviderFactoryDeps): (modelId: string) => LanguageModel {
|
||||
if (config.backend === 'anthropic') {
|
||||
const anthropic = (deps.createAnthropic ?? createAnthropic)({
|
||||
...(config.anthropic?.apiKey ? { apiKey: config.anthropic.apiKey } : {}),
|
||||
...(config.anthropic?.baseURL ? { baseURL: config.anthropic.baseURL } : {}),
|
||||
headers: {
|
||||
'anthropic-beta': DIRECT_ANTHROPIC_BETA_HEADER,
|
||||
},
|
||||
});
|
||||
return (modelId) => anthropic(modelId);
|
||||
}
|
||||
|
||||
if (config.backend === 'vertex') {
|
||||
if (!config.vertex?.location) {
|
||||
throw new Error('vertex.location is required when KLO LLM backend is vertex');
|
||||
}
|
||||
const vertex = (deps.createVertexAnthropic ?? createVertexAnthropic)({
|
||||
...(config.vertex.project ? { project: config.vertex.project } : {}),
|
||||
location: config.vertex.location,
|
||||
});
|
||||
return (modelId) => vertex(modelId);
|
||||
}
|
||||
|
||||
const gateway = (deps.createGateway ?? createGateway)({
|
||||
...(config.gateway?.apiKey ? { apiKey: config.gateway.apiKey } : {}),
|
||||
...(config.gateway?.baseURL ? { baseURL: config.gateway.baseURL } : {}),
|
||||
});
|
||||
return (modelId) => gateway(modelId);
|
||||
}
|
||||
}
|
||||
|
||||
export function createKloLlmProvider(config: KloLlmConfig, deps: KloLlmProviderFactoryDeps = {}): KloLlmProvider {
|
||||
if (!config.modelSlots.default) {
|
||||
throw new Error('modelSlots.default is required');
|
||||
}
|
||||
return new DefaultKloLlmProvider(config, deps);
|
||||
}
|
||||
19
packages/llm/src/package-exports.test.ts
Normal file
19
packages/llm/src/package-exports.test.ts
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
describe('@klo/llm package exports', () => {
|
||||
it('exports the canonical LLM and embedding surfaces', async () => {
|
||||
const llm = await import('./index.js');
|
||||
|
||||
expect(llm.KLO_MODEL_ROLES).toEqual([
|
||||
'default',
|
||||
'triage',
|
||||
'candidateExtraction',
|
||||
'curator',
|
||||
'reconcile',
|
||||
'repair',
|
||||
]);
|
||||
expect(llm.createKloLlmProvider).toBeTypeOf('function');
|
||||
expect(llm.KloMessageBuilder).toBeTypeOf('function');
|
||||
expect(llm.createKloEmbeddingProvider).toBeTypeOf('function');
|
||||
});
|
||||
});
|
||||
93
packages/llm/src/repair.test.ts
Normal file
93
packages/llm/src/repair.test.ts
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
import { NoSuchToolError, type LanguageModel } from 'ai';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createKloToolCallRepairHandler } from './repair.js';
|
||||
|
||||
const repairModel = { modelId: 'claude-repair', provider: 'anthropic' } as LanguageModel;
|
||||
|
||||
describe('createKloToolCallRepairHandler', () => {
|
||||
it('returns null for NoSuchToolError', async () => {
|
||||
const handler = createKloToolCallRepairHandler({
|
||||
source: 'unit',
|
||||
getRepairModel: () => repairModel,
|
||||
generateText: vi.fn(),
|
||||
});
|
||||
|
||||
await expect(
|
||||
handler({
|
||||
system: undefined,
|
||||
messages: [],
|
||||
toolCall: { type: 'tool-call', toolName: 'missing', toolCallId: 'tc_1', input: '{}' },
|
||||
tools: {},
|
||||
inputSchema: async () => ({}),
|
||||
error: new NoSuchToolError({ toolName: 'missing' }),
|
||||
}),
|
||||
).resolves.toBeNull();
|
||||
});
|
||||
|
||||
it('repairs string input by local JSON extraction without an LLM call', async () => {
|
||||
const generateText = vi.fn();
|
||||
const handler = createKloToolCallRepairHandler({
|
||||
source: 'unit',
|
||||
getRepairModel: () => repairModel,
|
||||
generateText,
|
||||
});
|
||||
|
||||
await expect(
|
||||
handler({
|
||||
system: undefined,
|
||||
messages: [],
|
||||
toolCall: {
|
||||
type: 'tool-call',
|
||||
toolName: 'write_source',
|
||||
toolCallId: 'tc_2',
|
||||
input: 'prefix {"path":"orders.yaml"} suffix',
|
||||
},
|
||||
tools: { write_source: {} as never },
|
||||
inputSchema: async () => ({ type: 'object' }),
|
||||
error: new Error('Invalid tool input') as never,
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
type: 'tool-call',
|
||||
toolName: 'write_source',
|
||||
toolCallId: 'tc_2',
|
||||
input: '{"path":"orders.yaml"}',
|
||||
});
|
||||
expect(generateText).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('falls back to the repair model when local extraction fails', async () => {
|
||||
const generateText = vi.fn().mockResolvedValue({ text: '{"path":"customers.yaml"}' });
|
||||
const handler = createKloToolCallRepairHandler({
|
||||
source: 'unit',
|
||||
getRepairModel: () => repairModel,
|
||||
generateText,
|
||||
});
|
||||
|
||||
await expect(
|
||||
handler({
|
||||
system: undefined,
|
||||
messages: [],
|
||||
toolCall: {
|
||||
type: 'tool-call',
|
||||
toolName: 'write_source',
|
||||
toolCallId: 'tc_3',
|
||||
input: 'not json',
|
||||
},
|
||||
tools: { write_source: {} as never },
|
||||
inputSchema: async () => ({ type: 'object', properties: { path: { type: 'string' } } }),
|
||||
error: new Error('Invalid tool input') as never,
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
type: 'tool-call',
|
||||
toolName: 'write_source',
|
||||
toolCallId: 'tc_3',
|
||||
input: '{"path":"customers.yaml"}',
|
||||
});
|
||||
expect(generateText).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
model: repairModel,
|
||||
prompt: expect.stringContaining('The model tried to call the tool "write_source"'),
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
88
packages/llm/src/repair.ts
Normal file
88
packages/llm/src/repair.ts
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
import { NoSuchToolError, type LanguageModel, type ToolCallRepairFunction, type ToolSet, generateText } from 'ai';
|
||||
|
||||
interface KloToolCallRepairHandlerInput {
|
||||
source: string;
|
||||
getRepairModel: () => LanguageModel;
|
||||
generateText?: typeof generateText;
|
||||
}
|
||||
|
||||
function extractJsonFromText(text: string): string | null {
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
JSON.parse(trimmed);
|
||||
return trimmed;
|
||||
} catch {}
|
||||
|
||||
let start = trimmed.indexOf('{');
|
||||
while (start >= 0) {
|
||||
let end = trimmed.lastIndexOf('}');
|
||||
while (end > start) {
|
||||
const candidate = trimmed.slice(start, end + 1);
|
||||
try {
|
||||
JSON.parse(candidate);
|
||||
return candidate;
|
||||
} catch {}
|
||||
end = trimmed.lastIndexOf('}', end - 1);
|
||||
}
|
||||
start = trimmed.indexOf('{', start + 1);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export function createKloToolCallRepairHandler(
|
||||
input: KloToolCallRepairHandlerInput,
|
||||
): ToolCallRepairFunction<ToolSet> {
|
||||
const runGenerateText = input.generateText ?? generateText;
|
||||
|
||||
return async ({ toolCall, tools, inputSchema, error }) => {
|
||||
if (NoSuchToolError.isInstance(error)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (typeof toolCall.input === 'string') {
|
||||
const extracted = extractJsonFromText(toolCall.input);
|
||||
if (extracted) {
|
||||
return {
|
||||
type: 'tool-call',
|
||||
toolName: toolCall.toolName,
|
||||
toolCallId: toolCall.toolCallId,
|
||||
input: extracted,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (!(toolCall.toolName in tools)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const schema = await inputSchema({ toolName: toolCall.toolName });
|
||||
const { text } = await runGenerateText({
|
||||
model: input.getRepairModel(),
|
||||
prompt: `The model tried to call the tool "${toolCall.toolName}" with the following inputs:
|
||||
${JSON.stringify(toolCall.input)}
|
||||
|
||||
However, this caused a validation error: ${error.message}
|
||||
|
||||
The tool accepts the following schema:
|
||||
${JSON.stringify(schema)}
|
||||
|
||||
Please generate corrected inputs that match the schema. Return ONLY valid JSON, no explanation or markdown formatting.`,
|
||||
});
|
||||
|
||||
const cleaned = extractJsonFromText(text) ?? text.trim();
|
||||
const parsed = JSON.parse(cleaned);
|
||||
return {
|
||||
type: 'tool-call',
|
||||
toolName: toolCall.toolName,
|
||||
toolCallId: toolCall.toolCallId,
|
||||
input: JSON.stringify(parsed),
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
}
|
||||
95
packages/llm/src/types.ts
Normal file
95
packages/llm/src/types.ts
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
import type { LanguageModel, TelemetrySettings, ToolCallRepairFunction, ToolSet } from 'ai';
|
||||
|
||||
export const KLO_MODEL_ROLES = ['default', 'triage', 'candidateExtraction', 'curator', 'reconcile', 'repair'] as const;
|
||||
|
||||
export type KloModelRole = (typeof KLO_MODEL_ROLES)[number];
|
||||
export type KloLlmBackend = 'anthropic' | 'vertex' | 'gateway';
|
||||
export type KloPromptCacheTtl = '5m' | '1h';
|
||||
|
||||
export type KloJsonValue =
|
||||
| null
|
||||
| string
|
||||
| number
|
||||
| boolean
|
||||
| KloJsonValue[]
|
||||
| { [key: string]: KloJsonValue | undefined };
|
||||
|
||||
export type KloProviderOptions = Record<string, { [key: string]: KloJsonValue | undefined }>;
|
||||
|
||||
export interface KloPromptCachingConfig {
|
||||
enabled: boolean;
|
||||
systemTtl: KloPromptCacheTtl;
|
||||
toolsTtl: KloPromptCacheTtl;
|
||||
historyTtl: KloPromptCacheTtl;
|
||||
cacheSystem: boolean;
|
||||
cacheTools: boolean;
|
||||
cacheHistory: boolean;
|
||||
vertexFallbackTo5m: boolean;
|
||||
}
|
||||
|
||||
export interface KloTokenUsageEvent {
|
||||
source?: string;
|
||||
modelId?: string;
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
}
|
||||
|
||||
export interface KloLlmConfig {
|
||||
backend: KloLlmBackend;
|
||||
vertex?: { project?: string; location: string };
|
||||
anthropic?: { apiKey?: string; baseURL?: string };
|
||||
gateway?: { baseURL?: string; apiKey?: string };
|
||||
modelSlots: { default: string } & Partial<Record<KloModelRole, string>>;
|
||||
promptCaching?: Partial<KloPromptCachingConfig>;
|
||||
telemetry?: {
|
||||
experimentalTelemetry?: TelemetrySettings;
|
||||
onTokenUsage?: (event: KloTokenUsageEvent) => void;
|
||||
};
|
||||
}
|
||||
|
||||
export interface KloLlmProvider {
|
||||
getModel(role: KloModelRole): LanguageModel;
|
||||
getModelByName(modelId: string): LanguageModel;
|
||||
cacheMarker(
|
||||
ttl: KloPromptCacheTtl,
|
||||
model?: LanguageModel | string,
|
||||
): { anthropic: { cacheControl: { type: 'ephemeral'; ttl: KloPromptCacheTtl } } } | undefined;
|
||||
repairToolCallHandler(options?: { source?: string }): ToolCallRepairFunction<ToolSet>;
|
||||
thinkingProviderOptions(role: KloModelRole, budgetTokens: number): KloProviderOptions;
|
||||
telemetryConfig(): TelemetrySettings | undefined;
|
||||
promptCachingConfig(): KloPromptCachingConfig;
|
||||
activeBackend(): KloLlmBackend;
|
||||
}
|
||||
|
||||
export type KloEmbeddingBackend = 'openai' | 'deterministic' | 'sentence-transformers';
|
||||
|
||||
export interface KloEmbeddingTokenUsageEvent {
|
||||
backend: KloEmbeddingBackend;
|
||||
model: string;
|
||||
inputCount: number;
|
||||
totalTokens?: number;
|
||||
}
|
||||
|
||||
export interface KloEmbeddingConfig {
|
||||
backend: KloEmbeddingBackend;
|
||||
model: string;
|
||||
dimensions: number;
|
||||
openai?: { apiKey?: string; baseURL?: string };
|
||||
sentenceTransformers?: { baseURL: string; pathPrefix?: string };
|
||||
batchSize?: number;
|
||||
telemetry?: { onTokenUsage?: (event: KloEmbeddingTokenUsageEvent) => void };
|
||||
}
|
||||
|
||||
export interface KloEmbeddingProvider {
|
||||
readonly dimensions: number;
|
||||
readonly maxBatchSize: number;
|
||||
embed(text: string): Promise<number[]>;
|
||||
embedMany(texts: string[]): Promise<number[][]>;
|
||||
}
|
||||
|
||||
export interface KloPromptParts {
|
||||
staticSystem: string;
|
||||
dynamicSystem?: string;
|
||||
leadingUserContext?: string;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue