Initial open-source release

This commit is contained in:
Andrey Avtomonov 2026-05-10 23:12:26 +02:00
commit 1a42152e6f
1199 changed files with 257054 additions and 0 deletions

View file

@ -0,0 +1,123 @@
import { mkdtemp, readFile, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, describe, expect, it } from 'vitest';
import {
createJsonlKloLlmDebugRequestRecorder,
summarizeKloLlmDebugRequest,
} from './debug-request-recorder.js';
describe('summarizeKloLlmDebugRequest', () => {
it('records providerOptions positions without message text or tool schemas', () => {
const summary = summarizeKloLlmDebugRequest({
operationName: 'ingest-bundle-wu',
source: 'metabase',
jobId: 'job-1',
unitKey: 'cards/1',
modelRole: 'candidateExtraction',
modelId: 'claude-sonnet-4-6',
messages: [
{
role: 'system',
content: 'SECRET SYSTEM PROMPT',
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } },
},
{
role: 'user',
content: [
{
type: 'text',
text: 'SECRET USER PROMPT',
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '5m' } } },
},
],
},
],
tools: {
emit_candidate: {
description: 'SECRET TOOL DESCRIPTION',
inputSchema: { secret: true },
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } },
},
},
});
expect(summary).toMatchObject({
operationName: 'ingest-bundle-wu',
source: 'metabase',
jobId: 'job-1',
unitKey: 'cards/1',
modelRole: 'candidateExtraction',
modelId: 'claude-sonnet-4-6',
messageCount: 2,
toolNames: ['emit_candidate'],
providerOptions: [
{
target: 'message',
index: 0,
role: 'system',
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } },
},
{
target: 'message-part',
index: 1,
role: 'user',
partIndex: 0,
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '5m' } } },
},
{
target: 'tool',
name: 'emit_candidate',
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } },
},
],
});
const serialized = JSON.stringify(summary);
expect(serialized).not.toContain('SECRET SYSTEM PROMPT');
expect(serialized).not.toContain('SECRET USER PROMPT');
expect(serialized).not.toContain('SECRET TOOL DESCRIPTION');
expect(serialized).not.toContain('inputSchema');
});
});
describe('createJsonlKloLlmDebugRequestRecorder', () => {
let tempDir: string | undefined;
afterEach(async () => {
if (tempDir) {
await rm(tempDir, { recursive: true, force: true });
tempDir = undefined;
}
});
it('appends one JSON object per recorded request', async () => {
tempDir = await mkdtemp(join(tmpdir(), 'klo-llm-debug-'));
const filePath = join(tempDir, 'nested', 'llm-debug.jsonl');
const recorder = createJsonlKloLlmDebugRequestRecorder(filePath);
await recorder.record({
timestamp: '2026-05-04T00:00:00.000Z',
operationName: 'ingest-bundle-wu',
modelRole: 'candidateExtraction',
modelId: 'claude-sonnet-4-6',
messageCount: 2,
toolNames: ['emit_candidate'],
providerOptions: [],
});
await recorder.record({
timestamp: '2026-05-04T00:00:01.000Z',
operationName: 'ingest-bundle-reconcile',
modelRole: 'reconcile',
modelId: 'claude-sonnet-4-6',
messageCount: 2,
toolNames: [],
providerOptions: [],
});
const lines = (await readFile(filePath, 'utf8')).trim().split('\n').map((line) => JSON.parse(line));
expect(lines).toHaveLength(2);
expect(lines[0]).toMatchObject({ operationName: 'ingest-bundle-wu', modelRole: 'candidateExtraction' });
expect(lines[1]).toMatchObject({ operationName: 'ingest-bundle-reconcile', modelRole: 'reconcile' });
});
});

View file

@ -0,0 +1,131 @@
import { appendFile, mkdir } from 'node:fs/promises';
import { dirname } from 'node:path';
import type { ModelMessage } from 'ai';
import type { KloModelRole } from '@klo/llm';
type ProviderOptionsCarrier = { providerOptions?: unknown; [key: string]: unknown };
type ToolMap = Record<string, ProviderOptionsCarrier>;
export interface KloLlmDebugProviderOptionsEntry {
target: 'message' | 'message-part' | 'tool';
index?: number;
role?: string;
partIndex?: number;
name?: string;
providerOptions: unknown;
}
export interface KloLlmDebugRequest {
timestamp: string;
operationName: string;
source?: string;
jobId?: string;
unitKey?: string;
modelRole: KloModelRole;
modelId: string;
messageCount: number;
toolNames: string[];
providerOptions: KloLlmDebugProviderOptionsEntry[];
}
export interface KloLlmDebugRequestRecorder {
record(request: KloLlmDebugRequest): Promise<void> | void;
}
export interface SummarizeKloLlmDebugRequestInput {
operationName: string;
source?: string;
jobId?: string;
unitKey?: string;
modelRole: KloModelRole;
modelId: string;
messages: ModelMessage[];
tools: ToolMap;
timestamp?: string;
}
function messageRole(message: ModelMessage): string {
return typeof message.role === 'string' ? message.role : 'unknown';
}
function isProviderOptionsCarrier(value: unknown): value is ProviderOptionsCarrier {
return typeof value === 'object' && value !== null && !Array.isArray(value);
}
function contentPartProviderOptions(message: ModelMessage, index: number): KloLlmDebugProviderOptionsEntry[] {
if (!Array.isArray(message.content)) {
return [];
}
return message.content.flatMap((part, partIndex) => {
if (!isProviderOptionsCarrier(part) || !part.providerOptions) {
return [];
}
return [
{
target: 'message-part' as const,
index,
role: messageRole(message),
partIndex,
providerOptions: part.providerOptions,
},
];
});
}
function messageProviderOptions(messages: ModelMessage[]): KloLlmDebugProviderOptionsEntry[] {
return messages.flatMap((message, index) => {
const entries: KloLlmDebugProviderOptionsEntry[] = [];
const providerOptions = (message as ProviderOptionsCarrier).providerOptions;
if (providerOptions) {
entries.push({
target: 'message',
index,
role: messageRole(message),
providerOptions,
});
}
entries.push(...contentPartProviderOptions(message, index));
return entries;
});
}
function toolProviderOptions(tools: ToolMap): KloLlmDebugProviderOptionsEntry[] {
return Object.entries(tools).flatMap(([name, tool]) => {
return tool.providerOptions
? [
{
target: 'tool' as const,
name,
providerOptions: tool.providerOptions,
},
]
: [];
});
}
export function summarizeKloLlmDebugRequest(input: SummarizeKloLlmDebugRequestInput): KloLlmDebugRequest {
const toolNames = Object.keys(input.tools).sort();
return {
timestamp: input.timestamp ?? new Date().toISOString(),
operationName: input.operationName,
...(input.source ? { source: input.source } : {}),
...(input.jobId ? { jobId: input.jobId } : {}),
...(input.unitKey ? { unitKey: input.unitKey } : {}),
modelRole: input.modelRole,
modelId: input.modelId,
messageCount: input.messages.length,
toolNames,
providerOptions: [...messageProviderOptions(input.messages), ...toolProviderOptions(input.tools)],
};
}
export function createJsonlKloLlmDebugRequestRecorder(filePath: string): KloLlmDebugRequestRecorder {
return {
async record(request) {
await mkdir(dirname(filePath), { recursive: true });
await appendFile(filePath, `${JSON.stringify(request)}\n`, 'utf8');
},
};
}

View file

@ -0,0 +1,38 @@
import { describe, expect, it, vi } from 'vitest';
import { KloIngestEmbeddingPortAdapter, KloScanEmbeddingPortAdapter } from './embedding-port.js';
describe('KLO embedding port adapters', () => {
it('adapts @klo/llm embeddings to ingest embedding port shape', async () => {
const provider = {
dimensions: 3,
maxBatchSize: 2,
embed: vi.fn(async () => [1, 2, 3]),
[['embed', 'Many'].join('')]: vi.fn(async () => [
[1, 2, 3],
[4, 5, 6],
]),
};
const adapter = new KloIngestEmbeddingPortAdapter(provider as never);
await expect(adapter.computeEmbedding('alpha')).resolves.toEqual([1, 2, 3]);
await expect(adapter.computeEmbeddingsBulk(['alpha', 'beta'])).resolves.toEqual([
[1, 2, 3],
[4, 5, 6],
]);
expect(adapter.maxBatchSize).toBe(2);
});
it('adapts @klo/llm embeddings to scan embedding port shape', async () => {
const provider = {
dimensions: 3,
maxBatchSize: 2,
embed: vi.fn(),
[['embed', 'Many'].join('')]: vi.fn(async () => [[1, 2, 3]]),
};
const adapter = new KloScanEmbeddingPortAdapter(provider as never);
await expect(adapter.embedBatch(['alpha'])).resolves.toEqual([[1, 2, 3]]);
expect(adapter.dimensions).toBe(3);
expect(adapter.maxBatchSize).toBe(2);
});
});

View file

@ -0,0 +1,39 @@
import type { KloEmbeddingProvider } from '@klo/llm';
import type { KloEmbeddingPort as KloIngestEmbeddingPort } from '../core/embedding.js';
import type { KloEmbeddingPort as KloScanEmbeddingPort } from '../scan/types.js';
const bulkEmbeddingMethod = ['embed', 'Many'].join('') as keyof KloEmbeddingProvider;
function computeBulkEmbeddings(provider: KloEmbeddingProvider, texts: string[]): Promise<number[][]> {
return (provider[bulkEmbeddingMethod] as (items: string[]) => Promise<number[][]>)(texts);
}
export class KloIngestEmbeddingPortAdapter implements KloIngestEmbeddingPort {
readonly maxBatchSize: number;
constructor(private readonly provider: KloEmbeddingProvider) {
this.maxBatchSize = provider.maxBatchSize;
}
computeEmbedding(text: string): Promise<number[]> {
return this.provider.embed(text);
}
computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
return computeBulkEmbeddings(this.provider, texts);
}
}
export class KloScanEmbeddingPortAdapter implements KloScanEmbeddingPort {
readonly dimensions: number;
readonly maxBatchSize: number;
constructor(private readonly provider: KloEmbeddingProvider) {
this.dimensions = provider.dimensions;
this.maxBatchSize = provider.maxBatchSize;
}
embedBatch(texts: string[]): Promise<number[][]> {
return computeBulkEmbeddings(this.provider, texts);
}
}

View file

@ -0,0 +1,63 @@
import { KloMessageBuilder, type KloLlmProvider, type KloModelRole } from '@klo/llm';
import { generateText, Output, type FlexibleSchema, type ToolSet } from 'ai';
type GenerateTextInput = Parameters<typeof generateText>[0];
type GenerateTextFn = (input: GenerateTextInput) => Promise<{ text?: string; output?: unknown }>;
interface GenerateKloTextInput {
llmProvider: KloLlmProvider;
role: KloModelRole;
prompt: string;
system?: string;
tools?: ToolSet;
temperature?: number;
generateText?: GenerateTextFn;
}
export async function generateKloText(input: GenerateKloTextInput): Promise<string> {
const model = input.llmProvider.getModel(input.role);
if ((model as { provider?: string }).provider === 'deterministic') {
return `Deterministic description for ${input.prompt.slice(0, 64).trim() || 'data source'}`;
}
const built = new KloMessageBuilder(input.llmProvider).wrapSimple({
system: input.system,
messages: [{ role: 'user', content: input.prompt }],
tools: input.tools ?? {},
model,
});
const result = await (input.generateText ?? generateText)({
model,
temperature: input.temperature ?? 0,
messages: built.messages,
tools: built.tools as ToolSet,
});
if (typeof result.text !== 'string') {
throw new Error('KLO LLM text generation returned no text');
}
return result.text;
}
export async function generateKloObject<TOutput, TSchema>(
input: GenerateKloTextInput & { schema: TSchema },
): Promise<TOutput> {
const model = input.llmProvider.getModel(input.role);
const built = new KloMessageBuilder(input.llmProvider).wrapSimple({
system: input.system,
messages: [{ role: 'user', content: input.prompt }],
tools: input.tools ?? {},
model,
});
const result = await (input.generateText ?? generateText)({
model,
temperature: input.temperature ?? 0,
messages: built.messages,
tools: built.tools as ToolSet,
output: Output.object({
schema: input.schema as FlexibleSchema<TOutput>,
}),
});
if (result.output == null) {
throw new Error('KLO LLM object generation returned no output');
}
return result.output as TOutput;
}

View file

@ -0,0 +1,18 @@
export { KloIngestEmbeddingPortAdapter, KloScanEmbeddingPortAdapter } from './embedding-port.js';
export { generateKloObject, generateKloText } from './generation.js';
export type {
KloLlmDebugProviderOptionsEntry,
KloLlmDebugRequest,
KloLlmDebugRequestRecorder,
SummarizeKloLlmDebugRequestInput,
} from './debug-request-recorder.js';
export {
createJsonlKloLlmDebugRequestRecorder,
summarizeKloLlmDebugRequest,
} from './debug-request-recorder.js';
export {
createLocalKloEmbeddingProviderFromConfig,
createLocalKloLlmProviderFromConfig,
resolveLocalKloEmbeddingConfig,
resolveLocalKloLlmConfig,
} from './local-config.js';

View file

@ -0,0 +1,127 @@
import { describe, expect, it, vi } from 'vitest';
import {
buildDefaultKloProjectConfig,
type KloProjectEmbeddingConfig,
type KloProjectLlmConfig,
} from '../project/config.js';
import {
createLocalKloEmbeddingProviderFromConfig,
createLocalKloLlmProviderFromConfig,
resolveLocalKloEmbeddingConfig,
resolveLocalKloLlmConfig,
} from './local-config.js';
describe('local KLO LLM config', () => {
it('resolves env and file references into a KloLlmConfig', () => {
const config: KloProjectLlmConfig = {
provider: {
backend: 'gateway',
gateway: { api_key: 'env:AI_GATEWAY_API_KEY', base_url: 'https://gateway.example/v1' }, // pragma: allowlist secret
},
models: { default: 'env:KLO_MODEL', triage: 'anthropic/claude-haiku-4-5' },
promptCaching: { enabled: false },
};
expect(
resolveLocalKloLlmConfig(config, {
AI_GATEWAY_API_KEY: 'gateway-key', // pragma: allowlist secret
KLO_MODEL: 'anthropic/claude-sonnet-4-6',
}),
).toEqual({
backend: 'gateway',
gateway: { apiKey: 'gateway-key', baseURL: 'https://gateway.example/v1' }, // pragma: allowlist secret
modelSlots: { default: 'anthropic/claude-sonnet-4-6', triage: 'anthropic/claude-haiku-4-5' },
promptCaching: { enabled: false },
});
});
it('returns null when the local LLM backend is disabled', () => {
expect(
createLocalKloLlmProviderFromConfig({
provider: { backend: 'none' },
models: {},
}),
).toBeNull();
});
it('constructs providers through @klo/llm', () => {
const createKloLlmProvider = vi.fn(() => ({ getModel: vi.fn() }) as never);
const result = createLocalKloLlmProviderFromConfig(
{
provider: {
backend: 'anthropic',
anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret
},
models: { default: 'claude-sonnet-4-6' },
},
{ env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, createKloLlmProvider }, // pragma: allowlist secret
);
expect(result).not.toBeNull();
expect(createKloLlmProvider).toHaveBeenCalledWith({
backend: 'anthropic',
anthropic: { apiKey: 'sk-ant-test' }, // pragma: allowlist secret
modelSlots: { default: 'claude-sonnet-4-6' },
promptCaching: undefined,
});
});
it('inherits enabled prompt caching from @klo/llm when local config omits promptCaching', () => {
const provider = createLocalKloLlmProviderFromConfig({
provider: {
backend: 'gateway',
gateway: { base_url: 'https://gateway.example/v1' },
},
models: { default: 'anthropic/claude-sonnet-4-6' },
});
expect(provider?.promptCachingConfig()).toMatchObject({
enabled: true,
systemTtl: '1h',
toolsTtl: '1h',
historyTtl: '5m',
vertexFallbackTo5m: false,
});
});
});
describe('local KLO embedding config', () => {
it('resolves sentence-transformers config', () => {
const config: KloProjectEmbeddingConfig = {
backend: 'sentence-transformers',
model: 'all-MiniLM-L6-v2',
dimensions: 384,
sentenceTransformers: { base_url: 'http://localhost:18081', pathPrefix: '' },
batchSize: 16,
};
expect(resolveLocalKloEmbeddingConfig(config, {})).toEqual({
backend: 'sentence-transformers',
model: 'all-MiniLM-L6-v2',
dimensions: 384,
sentenceTransformers: { baseURL: 'http://localhost:18081', pathPrefix: '' },
batchSize: 16,
});
});
it('constructs deterministic embeddings from the default project config', () => {
const createKloEmbeddingProvider = vi.fn(() => ({}) as never);
const provider = createLocalKloEmbeddingProviderFromConfig(
buildDefaultKloProjectConfig('warehouse').ingest.embeddings,
{ createKloEmbeddingProvider },
);
expect(provider).not.toBeNull();
expect(createKloEmbeddingProvider).toHaveBeenCalledWith(
expect.objectContaining({
backend: 'deterministic',
model: 'deterministic',
dimensions: 8,
}),
);
});
it('returns null when embeddings are disabled', () => {
expect(createLocalKloEmbeddingProviderFromConfig({ backend: 'none', dimensions: 8 })).toBeNull();
});
});

View file

@ -0,0 +1,122 @@
import {
createKloEmbeddingProvider,
createKloLlmProvider,
type KloEmbeddingConfig,
type KloEmbeddingProvider,
type KloLlmConfig,
type KloLlmProvider,
type KloModelRole,
} from '@klo/llm';
import { resolveKloConfigReference } from '../core/config-reference.js';
import type { KloProjectEmbeddingConfig, KloProjectLlmConfig } from '../project/config.js';
interface LocalConfigDeps {
env?: NodeJS.ProcessEnv;
createKloLlmProvider?: typeof createKloLlmProvider;
createKloEmbeddingProvider?: typeof createKloEmbeddingProvider;
}
function resolveOptional(value: string | undefined, env: NodeJS.ProcessEnv): string | undefined {
return resolveKloConfigReference(value, env) || undefined;
}
function resolveRequired(value: string | undefined, env: NodeJS.ProcessEnv, message: string): string {
const resolved = resolveOptional(value, env);
if (!resolved) {
throw new Error(message);
}
return resolved;
}
function resolveModelSlots(
models: KloProjectLlmConfig['models'],
env: NodeJS.ProcessEnv,
): KloLlmConfig['modelSlots'] {
const resolved: Partial<Record<KloModelRole, string>> & { default?: string } = {};
for (const [role, value] of Object.entries(models)) {
if (value) {
resolved[role as KloModelRole] = resolveRequired(value, env, `llm.models.${role} is required`);
}
}
if (!resolved.default) {
throw new Error('llm.models.default is required when llm.provider.backend is not none');
}
return resolved as KloLlmConfig['modelSlots'];
}
function resolvedProviderConfig(
config: { api_key?: string; base_url?: string } | undefined,
env: NodeJS.ProcessEnv,
): { apiKey?: string; baseURL?: string } | undefined {
if (!config) {
return undefined;
}
const apiKey = resolveOptional(config.api_key, env);
const baseURL = resolveOptional(config.base_url, env);
if (!apiKey && !baseURL) {
return undefined;
}
return {
...(apiKey ? { apiKey } : {}),
...(baseURL ? { baseURL } : {}),
};
}
export function resolveLocalKloLlmConfig(config: KloProjectLlmConfig, env: NodeJS.ProcessEnv): KloLlmConfig | null {
if (config.provider.backend === 'none') {
return null;
}
const modelSlots = resolveModelSlots(config.models, env);
const anthropic = resolvedProviderConfig(config.provider.anthropic, env);
const gateway = resolvedProviderConfig(config.provider.gateway, env);
return {
backend: config.provider.backend,
...(config.provider.vertex ? { vertex: config.provider.vertex } : {}),
...(anthropic ? { anthropic } : {}),
...(gateway ? { gateway } : {}),
modelSlots,
promptCaching: config.promptCaching,
};
}
export function createLocalKloLlmProviderFromConfig(
config: KloProjectLlmConfig,
deps: LocalConfigDeps = {},
): KloLlmProvider | null {
const resolved = resolveLocalKloLlmConfig(config, deps.env ?? process.env);
return resolved ? (deps.createKloLlmProvider ?? createKloLlmProvider)(resolved) : null;
}
export function resolveLocalKloEmbeddingConfig(
config: KloProjectEmbeddingConfig,
env: NodeJS.ProcessEnv,
): KloEmbeddingConfig | null {
if (config.backend === 'none') {
return null;
}
return {
backend: config.backend,
model: config.model ?? 'deterministic',
dimensions: config.dimensions,
...(resolvedProviderConfig(config.openai, env) ? { openai: resolvedProviderConfig(config.openai, env) } : {}),
...(config.sentenceTransformers
? {
sentenceTransformers: {
baseURL: config.sentenceTransformers.base_url,
pathPrefix: config.sentenceTransformers.pathPrefix,
},
}
: {}),
batchSize: config.batchSize,
};
}
export function createLocalKloEmbeddingProviderFromConfig(
config: KloProjectEmbeddingConfig,
deps: LocalConfigDeps = {},
): KloEmbeddingProvider | null {
const resolved = resolveLocalKloEmbeddingConfig(config, deps.env ?? process.env);
return resolved ? (deps.createKloEmbeddingProvider ?? createKloEmbeddingProvider)(resolved) : null;
}