mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-28 08:49:38 +02:00
chore(workspace): gate dead-code with knip production mode (#196)
* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm * refactor(workspace): rewrite @ktx/llm imports to relative paths * refactor(workspace): fold internal packages into cli * chore(workspace): gate dead-code with knip production mode Turn on production-mode knip plus an autofix run in pre-commit and the `pnpm dead-code` script, document the `/** @internal */` convention for test-only exports in AGENTS.md, annotate test-only exports across the CLI with that JSDoc, and drop dead exports/wrappers the new gate surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`, `createLocalScanEnrichmentProvidersFromConfig`, `PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports). Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit production entries so cross-package barrel leaks are caught. * refactor(cli): delete internal barrel index.ts files The 34 `index.ts` re-export barrels inside `packages/cli/src/` were holdovers from the pre-fold multi-workspace structure. Post-fold-in they served no production purpose: external consumers go through the single package main entry, and in-repo callers mostly imported through them only because the path was short. Internally, knip flagged most barrel re-exports as production-dead (only reached via tests). This change: - Deletes every internal barrel except `packages/cli/src/index.ts` (the published package entry). - Rewrites ~270 source/test files to import each name directly from the file that defines it. - Moves `tools/warehouse-verification/index.ts` to `create-warehouse-verification-tools.ts` (the function it defined locally) and updates its single consumer. - Renames `search/backend-conformance.ts` → `.test-utils.ts` to match the existing test-helper file convention. - Deletes 13 dead test-only chains (dbt-descriptions/*, live-database/extracted-schema, live-database/structural-sync, relationship-* feedback/review chain) plus their tests and a cascading orphan integration test. - Updates test mocks that pointed at deleted barrel paths (notion-client, connector barrels in scan/local-scan-connectors tests) to mock the source files instead. - Points the maintainer benchmark script (`scripts/relationship-benchmark-report.mjs`) at source files instead of `dist/context/scan/index.js`. - Drops the barrel `!` entries from `knip.json`; adds explicit production entries only for the benchmark code reached via dist by the maintainer script. Net: 413 files changed, ~1.2k insertions, ~9.4k deletions. `pnpm run dead-code` (Biome + knip default + knip production) and `pnpm run type-check` are clean; 2277 tests pass. * refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly Promote the CLI workspace package to the public name `@kaelio/ktx` and drop the separate `scripts/build-public-npm-package.mjs` wrapper. The CLI package is now publishable in place (`publishConfig.access: public`, `provenance: true`), so artifact packing uses `pnpm pack` against `packages/cli/` instead of assembling a parallel package tree. Updates all workspace filter invocations, docs, tests, and release readiness checks to reference the new package name, and folds the tarball-name helper into `scripts/public-npm-release-metadata.mjs`. * docs: align "agent clients" and "data agents" terminology Replace "client agents" with "agent clients" and "database agents" with "data agents" across AGENTS.md, README.md, the docs-site copy, and the matching setup-agents test description, matching the canonical vocabulary in docs/terminology.md. Also moves packages/cli/tsconfig.json's tsBuildInfoFile from node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive node_modules reinstalls. * refactor(release): single source of truth for package version Make packages/cli/package.json the single source of truth for the @kaelio/ktx version. publicNpmPackageVersion() now reads it directly, so artifact filenames, release-readiness checks, and the Python wheel version all derive from one field. The duplicate release-policy.json.publicNpmPackageVersion is removed. Previously the two fields could drift: tarballs were named kaelio-ktx-0.4.1.tgz while internally containing @kaelio/ktx@0.0.0-private. - update-public-release-version.mjs rewrites both Python pyproject.toml files (ktx-daemon, ktx-sl) alongside the npm package.jsons, normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2). - semantic-release-config.cjs adds the two pyproject.toml files to @semantic-release/git assets so the release commit back to main carries every version source in lockstep. - The six "?? '0.0.0-private'" fallback literals across the CLI are replaced with "?? getKtxCliPackageInfo().version", and createDefaultKtxMcpServer makes its version arg required. - docs/release.md describes the actual commit-back model: the dev tree always reflects the most recent release; no sentinel pin to maintain. Verified: pnpm run artifacts:build now produces kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with @kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and 2287 vitests + 173 script tests pass. * refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and scan command entrypoints so tests can stub them, and teach resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime feature when ktx.yaml selects sentence-transformers. * chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal Both symbols are consumed only by status-project.test.ts. Annotating with /** @internal */ keeps knip's production-mode check clean without changing runtime behavior. * fix(cli): use real package metadata in print-command-tree The stubbed package name embedded a forbidden product identifier that tripped the boundary check in CI. Read the metadata from package.json instead — keeps the rendered tree unchanged and removes a duplicate source of truth. * feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer source counts, computed with `SUM(embedding_json IS NOT NULL)` over `knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to "Wiki" (canonical per `docs/terminology.md`) and rename the matching `localStats.knowledgePages` field to `localStats.wikiPages`. Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those duplicated the per-surface rows above. Disk now reports only actual byte usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` / `semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry` helpers, and the `filter` arg on `summarizeDir` are removed.
This commit is contained in:
parent
a1cfb03d73
commit
2366b00301
1002 changed files with 2286 additions and 12051 deletions
337
packages/cli/src/context/llm/ai-sdk-runtime.test.ts
Normal file
337
packages/cli/src/context/llm/ai-sdk-runtime.test.ts
Normal file
|
|
@ -0,0 +1,337 @@
|
|||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
vi.mock('ai', () => ({
|
||||
generateText: vi.fn(),
|
||||
stepCountIs: (n: number) => n,
|
||||
tool: (def: unknown) => def,
|
||||
}));
|
||||
|
||||
import { generateText } from 'ai';
|
||||
import { AiSdkKtxLlmRuntime } from './ai-sdk-runtime.js';
|
||||
import type { RunLoopStepInfo } from './runtime-port.js';
|
||||
|
||||
describe('AiSdkKtxLlmRuntime.runAgentLoop', () => {
|
||||
let runtime: AiSdkKtxLlmRuntime;
|
||||
const llmProvider = {
|
||||
getModel: vi.fn().mockReturnValue({ modelId: 'claude-sonnet-4-6', provider: 'anthropic' }),
|
||||
getModelByName: vi.fn(),
|
||||
cacheMarker: vi.fn(),
|
||||
repairToolCallHandler: vi.fn(),
|
||||
thinkingProviderOptions: vi.fn(),
|
||||
telemetryConfig: vi.fn(),
|
||||
promptCachingConfig: vi.fn(() => ({
|
||||
enabled: false,
|
||||
systemTtl: '1h',
|
||||
toolsTtl: '1h',
|
||||
historyTtl: '5m',
|
||||
cacheSystem: true,
|
||||
cacheTools: true,
|
||||
cacheHistory: true,
|
||||
vertexFallbackTo5m: false,
|
||||
})),
|
||||
activeBackend: vi.fn(() => 'anthropic'),
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
runtime = new AiSdkKtxLlmRuntime({ llmProvider: llmProvider as any });
|
||||
});
|
||||
|
||||
afterEach(() => vi.clearAllMocks());
|
||||
|
||||
it('passes systemPrompt, userPrompt, tools, and step budget through to generateText', async () => {
|
||||
(generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] });
|
||||
const repairHandler = vi.fn();
|
||||
llmProvider.repairToolCallHandler.mockReturnValueOnce(repairHandler);
|
||||
const tools = { noop: { description: 'noop', inputSchema: {}, execute: vi.fn() } };
|
||||
await runtime.runAgentLoop({
|
||||
modelRole: 'candidateExtraction',
|
||||
systemPrompt: 'SYS',
|
||||
userPrompt: 'USR',
|
||||
toolSet: tools as any,
|
||||
stepBudget: 17,
|
||||
telemetryTags: { source: 'test' },
|
||||
});
|
||||
const call = (generateText as any).mock.calls[0][0];
|
||||
expect(call.system).toEqual({ role: 'system', content: 'SYS' });
|
||||
expect(call.messages).toEqual([{ role: 'user', content: 'USR' }]);
|
||||
expect(call.prompt).toBeUndefined();
|
||||
expect(call.tools.noop).toEqual(
|
||||
expect.objectContaining({
|
||||
description: 'noop',
|
||||
inputSchema: {},
|
||||
execute: expect.any(Function),
|
||||
toModelOutput: expect.any(Function),
|
||||
}),
|
||||
);
|
||||
expect(call.stopWhen).toBe(17);
|
||||
expect(call.temperature).toBe(0);
|
||||
expect(call.experimental_repairToolCall).toBe(repairHandler);
|
||||
expect(llmProvider.getModel).toHaveBeenCalledWith('candidateExtraction');
|
||||
expect(llmProvider.repairToolCallHandler).toHaveBeenCalledWith({ source: 'ktx-agent-runner' });
|
||||
});
|
||||
|
||||
it('returns stopReason=natural when the loop completes without error', async () => {
|
||||
(generateText as any).mockResolvedValue({ text: 'done', toolCalls: [], steps: [] });
|
||||
const result = await runtime.runAgentLoop({
|
||||
modelRole: 'candidateExtraction',
|
||||
systemPrompt: 'system',
|
||||
userPrompt: 'user',
|
||||
toolSet: {},
|
||||
stepBudget: 10,
|
||||
telemetryTags: {},
|
||||
});
|
||||
expect(result.stopReason).toBe('natural');
|
||||
expect(result.error).toBeUndefined();
|
||||
expect(llmProvider.getModel).toHaveBeenCalledWith('candidateExtraction');
|
||||
expect(generateText).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
system: { role: 'system', content: 'system' },
|
||||
messages: [{ role: 'user', content: 'user' }],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('returns stopReason=error with the error on generateText failure', async () => {
|
||||
const err = new Error('LLM unavailable');
|
||||
(generateText as any).mockRejectedValue(err);
|
||||
const result = await runtime.runAgentLoop({
|
||||
modelRole: 'candidateExtraction',
|
||||
systemPrompt: '',
|
||||
userPrompt: '',
|
||||
toolSet: {},
|
||||
stepBudget: 10,
|
||||
telemetryTags: {},
|
||||
});
|
||||
expect(result.stopReason).toBe('error');
|
||||
expect(result.error).toBe(err);
|
||||
});
|
||||
|
||||
it('invokes caller onStepFinish with incrementing stepIndex and total budget', async () => {
|
||||
const calls: RunLoopStepInfo[] = [];
|
||||
(generateText as any).mockImplementation(async (opts: any) => {
|
||||
for (let i = 0; i < 3; i++) {
|
||||
await opts.onStepFinish({});
|
||||
}
|
||||
return { text: 'ok', toolCalls: [], steps: [] };
|
||||
});
|
||||
|
||||
await runtime.runAgentLoop({
|
||||
modelRole: 'candidateExtraction',
|
||||
systemPrompt: '',
|
||||
userPrompt: '',
|
||||
toolSet: {},
|
||||
stepBudget: 10,
|
||||
telemetryTags: {},
|
||||
onStepFinish: (info) => {
|
||||
calls.push(info);
|
||||
},
|
||||
});
|
||||
|
||||
expect(calls).toEqual([
|
||||
{ stepIndex: 1, stepBudget: 10 },
|
||||
{ stepIndex: 2, stepBudget: 10 },
|
||||
{ stepIndex: 3, stepBudget: 10 },
|
||||
]);
|
||||
});
|
||||
|
||||
it('swallows errors thrown from caller onStepFinish without aborting the loop', async () => {
|
||||
(generateText as any).mockImplementation(async (opts: any) => {
|
||||
await opts.onStepFinish({});
|
||||
return { text: 'ok', toolCalls: [], steps: [] };
|
||||
});
|
||||
|
||||
const result = await runtime.runAgentLoop({
|
||||
modelRole: 'candidateExtraction',
|
||||
systemPrompt: '',
|
||||
userPrompt: '',
|
||||
toolSet: {},
|
||||
stepBudget: 10,
|
||||
telemetryTags: {},
|
||||
onStepFinish: () => {
|
||||
throw new Error('boom');
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.stopReason).toBe('natural');
|
||||
});
|
||||
|
||||
it('forwards telemetryTags.source through experimental_telemetry metadata', async () => {
|
||||
(generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] });
|
||||
const telemetryConfigEnabled = {
|
||||
isEnabled: () => true,
|
||||
devtoolsEnabled: false,
|
||||
appSettingsService: {
|
||||
settings: { telemetry: { recordInputs: false, recordOutputs: false } },
|
||||
},
|
||||
systemConfigService: {
|
||||
config: { instance: { name: 'test-instance' } },
|
||||
},
|
||||
} as any;
|
||||
const runtimeWithTelemetry = new AiSdkKtxLlmRuntime({
|
||||
llmProvider: llmProvider as any,
|
||||
telemetry: {
|
||||
createTelemetry: (tags) => ({
|
||||
isEnabled: telemetryConfigEnabled.isEnabled(),
|
||||
metadata: {
|
||||
source: tags.source ?? 'RESEARCH',
|
||||
jobId: tags.jobId,
|
||||
unitKey: tags.unitKey,
|
||||
},
|
||||
}),
|
||||
},
|
||||
});
|
||||
await runtimeWithTelemetry.runAgentLoop({
|
||||
modelRole: 'candidateExtraction',
|
||||
systemPrompt: '',
|
||||
userPrompt: '',
|
||||
toolSet: {},
|
||||
stepBudget: 10,
|
||||
telemetryTags: { source: 'metabase', jobId: 'job-123', unitKey: 'u/1' },
|
||||
});
|
||||
const call = (generateText as any).mock.calls[0][0];
|
||||
expect(call.experimental_telemetry.metadata.source).toBe('metabase');
|
||||
});
|
||||
|
||||
it('defaults to source=RESEARCH when telemetryTags omits source', async () => {
|
||||
(generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] });
|
||||
const telemetryConfigEnabled = {
|
||||
isEnabled: () => true,
|
||||
devtoolsEnabled: false,
|
||||
appSettingsService: {
|
||||
settings: { telemetry: { recordInputs: false, recordOutputs: false } },
|
||||
},
|
||||
systemConfigService: {
|
||||
config: { instance: { name: 'test-instance' } },
|
||||
},
|
||||
} as any;
|
||||
const runtimeWithTelemetry = new AiSdkKtxLlmRuntime({
|
||||
llmProvider: llmProvider as any,
|
||||
telemetry: {
|
||||
createTelemetry: (tags) => ({
|
||||
isEnabled: telemetryConfigEnabled.isEnabled(),
|
||||
metadata: {
|
||||
source: tags.source ?? 'RESEARCH',
|
||||
jobId: tags.jobId,
|
||||
unitKey: tags.unitKey,
|
||||
},
|
||||
}),
|
||||
},
|
||||
});
|
||||
await runtimeWithTelemetry.runAgentLoop({
|
||||
modelRole: 'candidateExtraction',
|
||||
systemPrompt: '',
|
||||
userPrompt: '',
|
||||
toolSet: {},
|
||||
stepBudget: 10,
|
||||
telemetryTags: { operationName: 'memory-agent-ingest' },
|
||||
});
|
||||
const call = (generateText as any).mock.calls[0][0];
|
||||
expect(call.experimental_telemetry.metadata.source).toBe('RESEARCH');
|
||||
});
|
||||
|
||||
it('forwards jobId and unitKey through experimental_telemetry metadata', async () => {
|
||||
(generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] });
|
||||
const telemetryConfigEnabled = {
|
||||
isEnabled: () => true,
|
||||
devtoolsEnabled: false,
|
||||
appSettingsService: {
|
||||
settings: { telemetry: { recordInputs: false, recordOutputs: false } },
|
||||
},
|
||||
systemConfigService: {
|
||||
config: { instance: { name: 'test-instance' } },
|
||||
},
|
||||
} as any;
|
||||
const runtimeWithTelemetry = new AiSdkKtxLlmRuntime({
|
||||
llmProvider: llmProvider as any,
|
||||
telemetry: {
|
||||
createTelemetry: (tags) => ({
|
||||
isEnabled: telemetryConfigEnabled.isEnabled(),
|
||||
metadata: {
|
||||
source: tags.source ?? 'RESEARCH',
|
||||
jobId: tags.jobId,
|
||||
unitKey: tags.unitKey,
|
||||
},
|
||||
}),
|
||||
},
|
||||
});
|
||||
await runtimeWithTelemetry.runAgentLoop({
|
||||
modelRole: 'candidateExtraction',
|
||||
systemPrompt: '',
|
||||
userPrompt: '',
|
||||
toolSet: {},
|
||||
stepBudget: 10,
|
||||
telemetryTags: { source: 'metabase', jobId: 'job-777', unitKey: 'sources/users' },
|
||||
});
|
||||
const call = (generateText as any).mock.calls[0][0];
|
||||
expect(call.experimental_telemetry.metadata.jobId).toBe('job-777');
|
||||
expect(call.experimental_telemetry.metadata.unitKey).toBe('sources/users');
|
||||
});
|
||||
|
||||
it('records a sanitized LLM debug request when a recorder is injected', async () => {
|
||||
(generateText as any).mockResolvedValue({ text: 'ok', toolCalls: [], steps: [] });
|
||||
const record = vi.fn();
|
||||
const provider = {
|
||||
...llmProvider,
|
||||
cacheMarker: vi.fn((ttl: '5m' | '1h') => ({
|
||||
anthropic: { cacheControl: { type: 'ephemeral' as const, ttl } },
|
||||
})),
|
||||
promptCachingConfig: vi.fn(() => ({
|
||||
enabled: true,
|
||||
systemTtl: '1h',
|
||||
toolsTtl: '1h',
|
||||
historyTtl: '5m',
|
||||
cacheSystem: true,
|
||||
cacheTools: true,
|
||||
cacheHistory: true,
|
||||
vertexFallbackTo5m: false,
|
||||
})),
|
||||
};
|
||||
const runtimeWithDebug = new AiSdkKtxLlmRuntime({
|
||||
llmProvider: provider as any,
|
||||
debugRequestRecorder: { record },
|
||||
});
|
||||
|
||||
await runtimeWithDebug.runAgentLoop({
|
||||
modelRole: 'candidateExtraction',
|
||||
systemPrompt: 'SECRET SYSTEM PROMPT',
|
||||
userPrompt: 'SECRET USER PROMPT',
|
||||
toolSet: {
|
||||
emit_candidate: {
|
||||
description: 'SECRET TOOL DESCRIPTION',
|
||||
inputSchema: {},
|
||||
execute: vi.fn(),
|
||||
} as any,
|
||||
},
|
||||
stepBudget: 10,
|
||||
telemetryTags: { operationName: 'ingest-bundle-wu', source: 'metabase', jobId: 'job-1', unitKey: 'cards/1' },
|
||||
});
|
||||
|
||||
expect(record).toHaveBeenCalledTimes(1);
|
||||
expect(record).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
operationName: 'ingest-bundle-wu',
|
||||
source: 'metabase',
|
||||
jobId: 'job-1',
|
||||
unitKey: 'cards/1',
|
||||
modelRole: 'candidateExtraction',
|
||||
modelId: 'claude-sonnet-4-6',
|
||||
messageCount: 2,
|
||||
toolNames: ['emit_candidate'],
|
||||
}),
|
||||
);
|
||||
const providerOptions = record.mock.calls[0][0].providerOptions;
|
||||
expect(providerOptions).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({ target: 'message', index: 0, role: 'system' }),
|
||||
expect.objectContaining({ target: 'message-part', index: 1, role: 'user', partIndex: 0 }),
|
||||
expect.objectContaining({ target: 'tool', name: 'emit_candidate' }),
|
||||
]),
|
||||
);
|
||||
expect(providerOptions).toHaveLength(3);
|
||||
const serialized = JSON.stringify(record.mock.calls[0][0]);
|
||||
expect(serialized).not.toContain('SECRET SYSTEM PROMPT');
|
||||
expect(serialized).not.toContain('SECRET USER PROMPT');
|
||||
expect(serialized).not.toContain('SECRET TOOL DESCRIPTION');
|
||||
});
|
||||
});
|
||||
165
packages/cli/src/context/llm/ai-sdk-runtime.ts
Normal file
165
packages/cli/src/context/llm/ai-sdk-runtime.ts
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
import { KtxMessageBuilder, splitKtxSystemMessages } from '../../llm/message-builder.js';
|
||||
import type { KtxLlmProvider } from '../../llm/types.js';
|
||||
import { generateText, Output, stepCountIs, type FlexibleSchema, type TelemetrySettings, type ToolSet } from 'ai';
|
||||
import type { z } from 'zod';
|
||||
import { noopLogger, type KtxLogger } from '../../context/core/config.js';
|
||||
import { summarizeKtxLlmDebugRequest, type KtxLlmDebugRequestRecorder } from './debug-request-recorder.js';
|
||||
import { createAiSdkToolSet } from './runtime-tools.js';
|
||||
import type {
|
||||
KtxGenerateObjectInput,
|
||||
KtxGenerateTextInput,
|
||||
KtxLlmRuntimePort,
|
||||
RunLoopParams,
|
||||
RunLoopResult,
|
||||
} from './runtime-port.js';
|
||||
|
||||
interface AgentTelemetryPort {
|
||||
createTelemetry(tags: Record<string, string>): TelemetrySettings;
|
||||
}
|
||||
|
||||
export interface AiSdkKtxLlmRuntimeDeps {
|
||||
llmProvider: KtxLlmProvider;
|
||||
telemetry?: AgentTelemetryPort;
|
||||
logger?: KtxLogger;
|
||||
debugRequestRecorder?: KtxLlmDebugRequestRecorder;
|
||||
}
|
||||
|
||||
function hasTools(tools: Record<string, unknown>): boolean {
|
||||
return Object.keys(tools).length > 0;
|
||||
}
|
||||
|
||||
export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
|
||||
private readonly logger: KtxLogger;
|
||||
|
||||
constructor(private readonly deps: AiSdkKtxLlmRuntimeDeps) {
|
||||
this.logger = deps.logger ?? noopLogger;
|
||||
}
|
||||
|
||||
async generateText(input: KtxGenerateTextInput): Promise<string> {
|
||||
const model = this.deps.llmProvider.getModel(input.role);
|
||||
if ((model as { provider?: string }).provider === 'deterministic') {
|
||||
return `Deterministic description for ${input.prompt.slice(0, 64).trim() || 'data source'}`;
|
||||
}
|
||||
const tools = createAiSdkToolSet(input.tools ?? {});
|
||||
const built = new KtxMessageBuilder(this.deps.llmProvider).wrapSimple({
|
||||
system: input.system,
|
||||
messages: [{ role: 'user', content: input.prompt }],
|
||||
tools,
|
||||
model,
|
||||
});
|
||||
const split = splitKtxSystemMessages(built.messages);
|
||||
const result = await generateText({
|
||||
model,
|
||||
temperature: input.temperature ?? 0,
|
||||
...(split.system ? { system: split.system } : {}),
|
||||
messages: split.messages,
|
||||
tools: built.tools as ToolSet,
|
||||
...(hasTools(tools)
|
||||
? {
|
||||
experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({
|
||||
source: `ktx-${input.role}`,
|
||||
}),
|
||||
}
|
||||
: {}),
|
||||
});
|
||||
if (typeof result.text !== 'string') {
|
||||
throw new Error('KTX LLM text generation returned no text');
|
||||
}
|
||||
return result.text;
|
||||
}
|
||||
|
||||
async generateObject<TOutput, TSchema extends z.ZodType<TOutput>>(
|
||||
input: KtxGenerateObjectInput<TOutput, TSchema>,
|
||||
): Promise<TOutput> {
|
||||
const model = this.deps.llmProvider.getModel(input.role);
|
||||
const tools = createAiSdkToolSet(input.tools ?? {});
|
||||
const built = new KtxMessageBuilder(this.deps.llmProvider).wrapSimple({
|
||||
system: input.system,
|
||||
messages: [{ role: 'user', content: input.prompt }],
|
||||
tools,
|
||||
model,
|
||||
});
|
||||
const split = splitKtxSystemMessages(built.messages);
|
||||
const result = await generateText({
|
||||
model,
|
||||
temperature: input.temperature ?? 0,
|
||||
...(split.system ? { system: split.system } : {}),
|
||||
messages: split.messages,
|
||||
tools: built.tools as ToolSet,
|
||||
...(hasTools(tools)
|
||||
? {
|
||||
experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({
|
||||
source: `ktx-${input.role}`,
|
||||
}),
|
||||
}
|
||||
: {}),
|
||||
output: Output.object({ schema: input.schema as unknown as FlexibleSchema<TOutput> }),
|
||||
});
|
||||
if (result.output == null) {
|
||||
throw new Error('KTX LLM object generation returned no output');
|
||||
}
|
||||
return result.output as TOutput;
|
||||
}
|
||||
|
||||
async runAgentLoop(params: RunLoopParams): Promise<RunLoopResult> {
|
||||
let stepIndex = 0;
|
||||
try {
|
||||
const model = this.deps.llmProvider.getModel(params.modelRole);
|
||||
const tools = createAiSdkToolSet(params.toolSet);
|
||||
const builder = new KtxMessageBuilder(this.deps.llmProvider);
|
||||
const built = builder.wrapSimple({
|
||||
system: params.systemPrompt,
|
||||
messages: [{ role: 'user', content: params.userPrompt }],
|
||||
tools,
|
||||
model,
|
||||
});
|
||||
const promptMessages = splitKtxSystemMessages(built.messages);
|
||||
|
||||
await this.deps.debugRequestRecorder?.record(
|
||||
summarizeKtxLlmDebugRequest({
|
||||
operationName: params.telemetryTags.operationName ?? 'ktx-agent-runner',
|
||||
source: params.telemetryTags.source,
|
||||
jobId: params.telemetryTags.jobId,
|
||||
unitKey: params.telemetryTags.unitKey,
|
||||
modelRole: params.modelRole,
|
||||
modelId: (model as { modelId?: string }).modelId ?? params.modelRole,
|
||||
messages: built.messages,
|
||||
tools: built.tools as Record<string, { providerOptions?: unknown }>,
|
||||
}),
|
||||
);
|
||||
|
||||
await generateText({
|
||||
model,
|
||||
temperature: 0,
|
||||
stopWhen: stepCountIs(params.stepBudget),
|
||||
experimental_telemetry: this.deps.telemetry?.createTelemetry(params.telemetryTags) ?? this.deps.llmProvider.telemetryConfig(),
|
||||
experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({
|
||||
source: params.telemetryTags.operationName ?? 'ktx-agent-runner',
|
||||
}),
|
||||
...(promptMessages.system ? { system: promptMessages.system } : {}),
|
||||
messages: promptMessages.messages,
|
||||
tools: built.tools as ToolSet,
|
||||
onStepFinish: async () => {
|
||||
stepIndex += 1;
|
||||
if (!params.onStepFinish) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await params.onStepFinish({ stepIndex, stepBudget: params.stepBudget });
|
||||
} catch (err) {
|
||||
this.logger.warn(
|
||||
`[agent-runner] onStepFinish callback threw; ignoring: ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
return { stopReason: 'natural' };
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error(String(error));
|
||||
this.logger.warn(`[agent-runner] loop failed: ${err.message}`);
|
||||
return { stopReason: 'error', error: err };
|
||||
}
|
||||
}
|
||||
}
|
||||
19
packages/cli/src/context/llm/claude-code-env.test.ts
Normal file
19
packages/cli/src/context/llm/claude-code-env.test.ts
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { CLAUDE_CODE_PROVIDER_ENV_DENYLIST, createKtxClaudeCodeEnv } from './claude-code-env.js';
|
||||
|
||||
describe('createKtxClaudeCodeEnv', () => {
|
||||
it('strips provider-routing credentials from the Claude Code child environment', () => {
|
||||
const seeded = Object.fromEntries(CLAUDE_CODE_PROVIDER_ENV_DENYLIST.map((key) => [key, `${key}-value`]));
|
||||
const env = createKtxClaudeCodeEnv({
|
||||
...seeded,
|
||||
PATH: '/usr/bin',
|
||||
HOME: '/Users/test',
|
||||
});
|
||||
|
||||
for (const key of CLAUDE_CODE_PROVIDER_ENV_DENYLIST) {
|
||||
expect(env).not.toHaveProperty(key);
|
||||
}
|
||||
expect(env.PATH).toBe('/usr/bin');
|
||||
expect(env.HOME).toBe('/Users/test');
|
||||
});
|
||||
});
|
||||
24
packages/cli/src/context/llm/claude-code-env.ts
Normal file
24
packages/cli/src/context/llm/claude-code-env.ts
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
/** @internal */
|
||||
export const CLAUDE_CODE_PROVIDER_ENV_DENYLIST = [
|
||||
'ANTHROPIC_API_KEY',
|
||||
'ANTHROPIC_AUTH_TOKEN',
|
||||
'ANTHROPIC_BASE_URL',
|
||||
'ANTHROPIC_MODEL',
|
||||
'ANTHROPIC_VERTEX_PROJECT_ID',
|
||||
'CLOUD_ML_REGION',
|
||||
'GOOGLE_APPLICATION_CREDENTIALS',
|
||||
'GOOGLE_CLOUD_PROJECT',
|
||||
'AWS_ACCESS_KEY_ID',
|
||||
'AWS_SECRET_ACCESS_KEY',
|
||||
'AWS_SESSION_TOKEN',
|
||||
'AWS_REGION',
|
||||
'AWS_PROFILE',
|
||||
'CLAUDE_CODE_USE_BEDROCK',
|
||||
'CLAUDE_CODE_USE_VERTEX',
|
||||
] as const;
|
||||
|
||||
const DENYLIST = new Set<string>(CLAUDE_CODE_PROVIDER_ENV_DENYLIST);
|
||||
|
||||
export function createKtxClaudeCodeEnv(env: NodeJS.ProcessEnv = process.env): Record<string, string | undefined> {
|
||||
return Object.fromEntries(Object.entries(env).filter(([key]) => !DENYLIST.has(key)));
|
||||
}
|
||||
17
packages/cli/src/context/llm/claude-code-models.test.ts
Normal file
17
packages/cli/src/context/llm/claude-code-models.test.ts
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { resolveClaudeCodeModel } from './claude-code-models.js';
|
||||
|
||||
describe('resolveClaudeCodeModel', () => {
|
||||
it.each([
|
||||
['sonnet', 'claude-sonnet-4-6'],
|
||||
['opus', 'claude-opus-4-7'],
|
||||
['haiku', 'claude-haiku-4-5'],
|
||||
['claude-sonnet-4-6', 'claude-sonnet-4-6'],
|
||||
])('maps %s to %s', (input, expected) => {
|
||||
expect(resolveClaudeCodeModel(input)).toBe(expected);
|
||||
});
|
||||
|
||||
it('rejects unsupported aliases', () => {
|
||||
expect(() => resolveClaudeCodeModel('gpt-5')).toThrow('Unsupported Claude Code model');
|
||||
});
|
||||
});
|
||||
19
packages/cli/src/context/llm/claude-code-models.ts
Normal file
19
packages/cli/src/context/llm/claude-code-models.ts
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
const CLAUDE_CODE_MODEL_ALIASES: Record<string, string> = {
|
||||
sonnet: 'claude-sonnet-4-6',
|
||||
opus: 'claude-opus-4-7',
|
||||
haiku: 'claude-haiku-4-5',
|
||||
};
|
||||
|
||||
const FULL_MODEL_ID = /^claude-(sonnet|opus|haiku)-[0-9]+-[0-9]+$/;
|
||||
|
||||
export function resolveClaudeCodeModel(model: string): string {
|
||||
const normalized = model.trim();
|
||||
const alias = CLAUDE_CODE_MODEL_ALIASES[normalized];
|
||||
if (alias) {
|
||||
return alias;
|
||||
}
|
||||
if (FULL_MODEL_ID.test(normalized)) {
|
||||
return normalized;
|
||||
}
|
||||
throw new Error(`Unsupported Claude Code model "${model}". Use sonnet, opus, haiku, or a claude-* model id.`);
|
||||
}
|
||||
497
packages/cli/src/context/llm/claude-code-runtime.test.ts
Normal file
497
packages/cli/src/context/llm/claude-code-runtime.test.ts
Normal file
|
|
@ -0,0 +1,497 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
import type { SDKMessage } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { ClaudeCodeKtxLlmRuntime, mapClaudeCodeStopReason, runClaudeCodeAuthProbe } from './claude-code-runtime.js';
|
||||
|
||||
async function* stream(messages: SDKMessage[]): AsyncGenerator<SDKMessage, void> {
|
||||
for (const message of messages) {
|
||||
yield message;
|
||||
}
|
||||
}
|
||||
|
||||
function initMessage(overrides: Partial<Extract<SDKMessage, { type: 'system'; subtype: 'init' }>> = {}): Extract<
|
||||
SDKMessage,
|
||||
{ type: 'system'; subtype: 'init' }
|
||||
> {
|
||||
return {
|
||||
type: 'system',
|
||||
subtype: 'init',
|
||||
apiKeySource: 'none' as never, // pragma: allowlist secret
|
||||
claude_code_version: '0.3.142',
|
||||
cwd: '/tmp/project',
|
||||
tools: [],
|
||||
mcp_servers: [],
|
||||
model: 'claude-sonnet-4-6',
|
||||
permissionMode: 'dontAsk',
|
||||
slash_commands: [],
|
||||
output_style: 'default',
|
||||
skills: [],
|
||||
plugins: [],
|
||||
uuid: '00000000-0000-4000-8000-000000000001',
|
||||
session_id: 'session-id',
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function resultMessage(overrides: Partial<Extract<SDKMessage, { type: 'result' }>> = {}): Extract<
|
||||
SDKMessage,
|
||||
{ type: 'result' }
|
||||
> {
|
||||
return {
|
||||
type: 'result',
|
||||
subtype: 'success',
|
||||
duration_ms: 1,
|
||||
duration_api_ms: 1,
|
||||
is_error: false,
|
||||
num_turns: 1,
|
||||
result: 'ok',
|
||||
stop_reason: null,
|
||||
total_cost_usd: 0,
|
||||
usage: {} as never,
|
||||
modelUsage: {},
|
||||
permission_denials: [],
|
||||
errors: [],
|
||||
uuid: '00000000-0000-4000-8000-000000000002',
|
||||
session_id: 'session-id',
|
||||
...overrides,
|
||||
} as Extract<SDKMessage, { type: 'result' }>;
|
||||
}
|
||||
|
||||
describe('ClaudeCodeKtxLlmRuntime', () => {
|
||||
it('passes isolation options and scrubbed env to text generation', async () => {
|
||||
const query = vi.fn((_input: any) => stream([initMessage(), resultMessage({ result: 'hello' })]));
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query,
|
||||
env: { ANTHROPIC_API_KEY: 'sk-ant-test', PATH: '/usr/bin' }, // pragma: allowlist secret
|
||||
});
|
||||
|
||||
await expect(runtime.generateText({ role: 'default', prompt: 'say hello' })).resolves.toBe('hello');
|
||||
expect(query).toHaveBeenCalledWith({
|
||||
prompt: 'say hello',
|
||||
options: expect.objectContaining({
|
||||
cwd: '/tmp/project',
|
||||
model: 'claude-sonnet-4-6',
|
||||
maxTurns: 1,
|
||||
settingSources: [],
|
||||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
managedSettings: {
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [],
|
||||
},
|
||||
strictMcpConfig: true,
|
||||
allowedTools: [],
|
||||
permissionMode: 'dontAsk',
|
||||
persistSession: false,
|
||||
env: expect.not.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test' }),
|
||||
}),
|
||||
});
|
||||
});
|
||||
|
||||
it('validates structured output with the caller schema', async () => {
|
||||
const schema = z.object({ answer: z.string() });
|
||||
const query = vi.fn((_input: any) => stream([initMessage(), resultMessage({ structured_output: { answer: 'yes' } })]));
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query,
|
||||
env: {},
|
||||
});
|
||||
|
||||
await expect(runtime.generateObject({ role: 'default', prompt: 'json', schema })).resolves.toEqual({ answer: 'yes' });
|
||||
expect(query.mock.calls[0][0].options.outputFormat).toMatchObject({
|
||||
type: 'json_schema',
|
||||
schema: expect.objectContaining({ type: 'object' }),
|
||||
});
|
||||
});
|
||||
|
||||
it('registers only exact KTX MCP tool ids and denies non-KTX tools', async () => {
|
||||
const query = vi.fn((_input: any) =>
|
||||
stream([
|
||||
initMessage({ tools: ['mcp__ktx__load_skill'], mcp_servers: [{ name: 'ktx', status: 'connected' }] }),
|
||||
{
|
||||
type: 'assistant',
|
||||
message: { role: 'assistant', content: [] },
|
||||
parent_tool_use_id: null,
|
||||
uuid: '00000000-0000-4000-8000-000000000003',
|
||||
session_id: 'session-id',
|
||||
} as unknown as SDKMessage,
|
||||
resultMessage({ subtype: 'error_max_turns', is_error: true }),
|
||||
]),
|
||||
);
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query,
|
||||
env: {},
|
||||
});
|
||||
const onStepFinish = vi.fn();
|
||||
|
||||
await runtime.runAgentLoop({
|
||||
modelRole: 'default',
|
||||
systemPrompt: 'system',
|
||||
userPrompt: 'user',
|
||||
toolSet: {
|
||||
load_skill: {
|
||||
name: 'load_skill',
|
||||
description: 'Load skill.',
|
||||
inputSchema: z.object({ name: z.string() }),
|
||||
execute: async () => ({ markdown: 'loaded' }),
|
||||
},
|
||||
},
|
||||
stepBudget: 1,
|
||||
telemetryTags: { operationName: 'test' },
|
||||
onStepFinish,
|
||||
});
|
||||
|
||||
const options = query.mock.calls[0][0].options;
|
||||
expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']);
|
||||
expect(options.managedSettings).toEqual({
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [{ serverName: 'ktx' }],
|
||||
});
|
||||
expect(options.strictMcpConfig).toBe(true);
|
||||
expect(await options.canUseTool('mcp__ktx__load_skill', {}, { signal: new AbortController().signal, toolUseID: '1' })).toEqual({
|
||||
behavior: 'allow',
|
||||
toolUseID: '1',
|
||||
});
|
||||
expect(await options.canUseTool('Bash', {}, { signal: new AbortController().signal, toolUseID: '2' })).toMatchObject({
|
||||
behavior: 'deny',
|
||||
toolUseID: '2',
|
||||
});
|
||||
expect(onStepFinish).toHaveBeenCalledWith({ stepIndex: 1, stepBudget: 1 });
|
||||
});
|
||||
|
||||
it('treats host-discovered commands skills and agents as non-fatal init metadata for text and auth probe', async () => {
|
||||
const hostDiscoveredInit = initMessage({
|
||||
slash_commands: ['/help', '/compact', '/clear', '/user-command'],
|
||||
skills: ['pdf', 'docx'],
|
||||
agents: ['claude', 'Explore', 'general-purpose'],
|
||||
});
|
||||
const textQuery = vi.fn((_input: any) => stream([hostDiscoveredInit, resultMessage({ result: 'hello' })]));
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query: textQuery,
|
||||
env: { ANTHROPIC_API_KEY: 'sk-ant-test', PATH: '/usr/bin' }, // pragma: allowlist secret
|
||||
});
|
||||
|
||||
await expect(runtime.generateText({ role: 'default', prompt: 'say hello' })).resolves.toBe('hello');
|
||||
const textOptions = textQuery.mock.calls[0][0].options;
|
||||
expect(textOptions).toMatchObject({
|
||||
settingSources: [],
|
||||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
managedSettings: {
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [],
|
||||
},
|
||||
strictMcpConfig: true,
|
||||
allowedTools: [],
|
||||
permissionMode: 'dontAsk',
|
||||
persistSession: false,
|
||||
env: expect.not.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test' }),
|
||||
});
|
||||
expect(textOptions.disallowedTools).toEqual(expect.arrayContaining(['Agent', 'Task', 'Bash']));
|
||||
expect(await textOptions.canUseTool('Agent', {}, { signal: new AbortController().signal, toolUseID: 'agent' })).toMatchObject({
|
||||
behavior: 'deny',
|
||||
toolUseID: 'agent',
|
||||
});
|
||||
expect(await textOptions.canUseTool('Skill', {}, { signal: new AbortController().signal, toolUseID: 'skill' })).toMatchObject({
|
||||
behavior: 'deny',
|
||||
toolUseID: 'skill',
|
||||
});
|
||||
expect(
|
||||
await textOptions.canUseTool('SlashCommand', {}, { signal: new AbortController().signal, toolUseID: 'slash' }),
|
||||
).toMatchObject({
|
||||
behavior: 'deny',
|
||||
toolUseID: 'slash',
|
||||
});
|
||||
|
||||
const probeQuery = vi.fn((_input: any) => stream([hostDiscoveredInit, resultMessage({ result: 'ok' })]));
|
||||
await expect(
|
||||
runClaudeCodeAuthProbe({
|
||||
projectDir: '/tmp/project',
|
||||
model: 'sonnet',
|
||||
query: probeQuery,
|
||||
env: { ANTHROPIC_AUTH_TOKEN: 'token', HOME: '/Users/test' },
|
||||
}),
|
||||
).resolves.toEqual({ ok: true });
|
||||
expect(probeQuery.mock.calls[0][0].options).toMatchObject({
|
||||
settingSources: [],
|
||||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
allowedTools: [],
|
||||
permissionMode: 'dontAsk',
|
||||
persistSession: false,
|
||||
env: expect.objectContaining({ HOME: '/Users/test' }),
|
||||
});
|
||||
expect(probeQuery.mock.calls[0][0].options.env).not.toEqual(
|
||||
expect.objectContaining({ ANTHROPIC_AUTH_TOKEN: 'token' }),
|
||||
);
|
||||
});
|
||||
|
||||
it('allows host-discovered context during agent loops while requiring exact KTX MCP tools and servers', async () => {
|
||||
const query = vi.fn((_input: any) =>
|
||||
stream([
|
||||
initMessage({
|
||||
tools: ['mcp__ktx__load_skill'],
|
||||
mcp_servers: [{ name: 'ktx', status: 'connected' }],
|
||||
slash_commands: ['/help', '/compact', '/clear'],
|
||||
skills: ['memory-agent', 'doc-reader'],
|
||||
agents: ['claude', 'Plan', 'Explore'],
|
||||
}),
|
||||
{
|
||||
type: 'assistant',
|
||||
message: { role: 'assistant', content: [] },
|
||||
parent_tool_use_id: null,
|
||||
uuid: '00000000-0000-4000-8000-000000000006',
|
||||
session_id: 'session-id',
|
||||
} as unknown as SDKMessage,
|
||||
resultMessage({ subtype: 'error_max_turns', is_error: true }),
|
||||
]),
|
||||
);
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query,
|
||||
env: {},
|
||||
});
|
||||
|
||||
await expect(
|
||||
runtime.runAgentLoop({
|
||||
modelRole: 'default',
|
||||
systemPrompt: 'system',
|
||||
userPrompt: 'user',
|
||||
toolSet: {
|
||||
load_skill: {
|
||||
name: 'load_skill',
|
||||
description: 'Load skill.',
|
||||
inputSchema: z.object({ name: z.string() }),
|
||||
execute: async () => ({ markdown: 'loaded' }),
|
||||
},
|
||||
},
|
||||
stepBudget: 1,
|
||||
telemetryTags: { operationName: 'test' },
|
||||
}),
|
||||
).resolves.toEqual({ stopReason: 'budget' });
|
||||
|
||||
const options = query.mock.calls[0][0].options;
|
||||
expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']);
|
||||
expect(options.managedSettings).toEqual({
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [{ serverName: 'ktx' }],
|
||||
});
|
||||
expect(options.strictMcpConfig).toBe(true);
|
||||
expect(await options.canUseTool('mcp__ktx__load_skill', {}, { signal: new AbortController().signal, toolUseID: '1' })).toEqual({
|
||||
behavior: 'allow',
|
||||
toolUseID: '1',
|
||||
});
|
||||
expect(await options.canUseTool('Task', {}, { signal: new AbortController().signal, toolUseID: '2' })).toMatchObject({
|
||||
behavior: 'deny',
|
||||
toolUseID: '2',
|
||||
});
|
||||
expect(await options.canUseTool('Skill', {}, { signal: new AbortController().signal, toolUseID: '3' })).toMatchObject({
|
||||
behavior: 'deny',
|
||||
toolUseID: '3',
|
||||
});
|
||||
});
|
||||
|
||||
it('still rejects unexpected tools, missing KTX tools, plugins, and non-KTX MCP servers from init messages', async () => {
|
||||
const query = vi.fn((_input: any) =>
|
||||
stream([
|
||||
initMessage({
|
||||
tools: ['Bash'],
|
||||
mcp_servers: [{ name: 'filesystem', status: 'connected' }],
|
||||
plugins: [{ name: 'host-plugin', path: '/tmp/plugin' }],
|
||||
}),
|
||||
resultMessage({ result: 'hello' }),
|
||||
]),
|
||||
);
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query,
|
||||
env: {},
|
||||
});
|
||||
|
||||
await expect(
|
||||
runtime.generateText({
|
||||
role: 'default',
|
||||
prompt: 'say hello',
|
||||
tools: {
|
||||
load_skill: {
|
||||
name: 'load_skill',
|
||||
description: 'Load skill.',
|
||||
inputSchema: z.object({ name: z.string() }),
|
||||
execute: async () => ({ markdown: 'loaded' }),
|
||||
},
|
||||
},
|
||||
}),
|
||||
).rejects.toThrow(
|
||||
/Claude Code runtime isolation failed: .*tools=Bash.*missing_tools=mcp__ktx__load_skill.*mcp_servers=filesystem.*plugins=host-plugin/,
|
||||
);
|
||||
});
|
||||
|
||||
it('passes scrubbed env to object generation and agent loops', async () => {
|
||||
const schema = z.object({ answer: z.string() });
|
||||
const objectQuery = vi.fn((_input: any) =>
|
||||
stream([initMessage(), resultMessage({ structured_output: { answer: 'yes' } })]),
|
||||
);
|
||||
const objectRuntime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query: objectQuery,
|
||||
env: { ANTHROPIC_API_KEY: 'sk-ant-test', AWS_PROFILE: 'prod', PATH: '/usr/bin' }, // pragma: allowlist secret
|
||||
});
|
||||
|
||||
await expect(objectRuntime.generateObject({ role: 'default', prompt: 'json', schema })).resolves.toEqual({
|
||||
answer: 'yes',
|
||||
});
|
||||
expect(objectQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ PATH: '/usr/bin' }));
|
||||
expect(objectQuery.mock.calls[0][0].options.managedSettings).toEqual({
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [],
|
||||
});
|
||||
expect(objectQuery.mock.calls[0][0].options.env).not.toEqual(
|
||||
expect.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test', AWS_PROFILE: 'prod' }), // pragma: allowlist secret
|
||||
);
|
||||
|
||||
const agentQuery = vi.fn((_input: any) =>
|
||||
stream([
|
||||
initMessage({ tools: ['mcp__ktx__load_skill'], mcp_servers: [{ name: 'ktx', status: 'connected' }] }),
|
||||
{
|
||||
type: 'assistant',
|
||||
message: { role: 'assistant', content: [] },
|
||||
parent_tool_use_id: null,
|
||||
uuid: '00000000-0000-4000-8000-000000000004',
|
||||
session_id: 'session-id',
|
||||
} as unknown as SDKMessage,
|
||||
resultMessage({ subtype: 'error_max_turns', is_error: true }),
|
||||
]),
|
||||
);
|
||||
const agentRuntime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query: agentQuery,
|
||||
env: { ANTHROPIC_AUTH_TOKEN: 'token', CLAUDE_CODE_USE_VERTEX: '1', HOME: '/Users/test' },
|
||||
});
|
||||
|
||||
await agentRuntime.runAgentLoop({
|
||||
modelRole: 'default',
|
||||
systemPrompt: 'system',
|
||||
userPrompt: 'user',
|
||||
toolSet: {
|
||||
load_skill: {
|
||||
name: 'load_skill',
|
||||
description: 'Load skill.',
|
||||
inputSchema: z.object({ name: z.string() }),
|
||||
execute: async () => ({ markdown: 'loaded' }),
|
||||
},
|
||||
},
|
||||
stepBudget: 1,
|
||||
telemetryTags: { operationName: 'test' },
|
||||
});
|
||||
expect(agentQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ HOME: '/Users/test' }));
|
||||
expect(agentQuery.mock.calls[0][0].options.managedSettings).toEqual({
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [{ serverName: 'ktx' }],
|
||||
});
|
||||
expect(agentQuery.mock.calls[0][0].options.env).not.toEqual(
|
||||
expect.objectContaining({ ANTHROPIC_AUTH_TOKEN: 'token', CLAUDE_CODE_USE_VERTEX: '1' }),
|
||||
);
|
||||
});
|
||||
|
||||
it('logs and ignores onStepFinish callback errors', async () => {
|
||||
const query = vi.fn((_input: any) =>
|
||||
stream([
|
||||
initMessage(),
|
||||
{
|
||||
type: 'assistant',
|
||||
message: { role: 'assistant', content: [] },
|
||||
parent_tool_use_id: null,
|
||||
uuid: '00000000-0000-4000-8000-000000000005',
|
||||
session_id: 'session-id',
|
||||
} as unknown as SDKMessage,
|
||||
resultMessage({ subtype: 'success', terminal_reason: 'completed' }),
|
||||
]),
|
||||
);
|
||||
const logger = {
|
||||
debug: vi.fn(),
|
||||
log: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
error: vi.fn(),
|
||||
};
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query,
|
||||
env: {},
|
||||
logger,
|
||||
});
|
||||
|
||||
await expect(
|
||||
runtime.runAgentLoop({
|
||||
modelRole: 'default',
|
||||
systemPrompt: 'system',
|
||||
userPrompt: 'user',
|
||||
toolSet: {},
|
||||
stepBudget: 1,
|
||||
telemetryTags: { operationName: 'test' },
|
||||
onStepFinish: async () => {
|
||||
throw new Error('callback exploded');
|
||||
},
|
||||
}),
|
||||
).resolves.toEqual({ stopReason: 'natural' });
|
||||
expect(logger.warn).toHaveBeenCalledWith(expect.stringContaining('callback exploded'));
|
||||
});
|
||||
|
||||
it('maps max-turn terminal reasons to budget', () => {
|
||||
expect(mapClaudeCodeStopReason(resultMessage({ subtype: 'error_max_turns' }))).toBe('budget');
|
||||
expect(mapClaudeCodeStopReason(resultMessage({ terminal_reason: 'max_turns' }))).toBe('budget');
|
||||
expect(mapClaudeCodeStopReason(resultMessage({ stop_reason: 'max_turns' }))).toBe('budget');
|
||||
expect(mapClaudeCodeStopReason(resultMessage({ subtype: 'success', terminal_reason: 'completed' }))).toBe('natural');
|
||||
expect(mapClaudeCodeStopReason(resultMessage({ subtype: 'error_during_execution' }))).toBe('error');
|
||||
});
|
||||
|
||||
it('auth probe uses isolation options and a scrubbed env', async () => {
|
||||
const query = vi.fn((_input: any) => stream([initMessage(), resultMessage({ result: 'ok' })]));
|
||||
|
||||
await expect(
|
||||
runClaudeCodeAuthProbe({ projectDir: '/tmp/project', model: 'sonnet', query, env: { ANTHROPIC_API_KEY: 'sk-ant-test' } }), // pragma: allowlist secret
|
||||
).resolves.toEqual({ ok: true });
|
||||
expect(query.mock.calls[0][0].options).toMatchObject({
|
||||
settingSources: [],
|
||||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
managedSettings: {
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: [],
|
||||
},
|
||||
strictMcpConfig: true,
|
||||
allowedTools: [],
|
||||
persistSession: false,
|
||||
env: expect.not.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test' }),
|
||||
});
|
||||
});
|
||||
|
||||
it('reports unsupported Claude Code models without framing them as auth failures', async () => {
|
||||
await expect(
|
||||
runClaudeCodeAuthProbe({
|
||||
projectDir: '/tmp/project',
|
||||
model: 'gpt-5',
|
||||
query: vi.fn(),
|
||||
env: {},
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
ok: false,
|
||||
message: 'Unsupported Claude Code model "gpt-5". Use sonnet, opus, haiku, or a claude-* model id.',
|
||||
});
|
||||
});
|
||||
});
|
||||
347
packages/cli/src/context/llm/claude-code-runtime.ts
Normal file
347
packages/cli/src/context/llm/claude-code-runtime.ts
Normal file
|
|
@ -0,0 +1,347 @@
|
|||
import {
|
||||
createSdkMcpServer,
|
||||
query as defaultQuery,
|
||||
type Options,
|
||||
type SDKMessage,
|
||||
type SDKResultMessage,
|
||||
} from '@anthropic-ai/claude-agent-sdk';
|
||||
import { z } from 'zod';
|
||||
import { noopLogger, type KtxLogger } from '../../context/core/config.js';
|
||||
import { createKtxClaudeCodeEnv } from './claude-code-env.js';
|
||||
import { resolveClaudeCodeModel } from './claude-code-models.js';
|
||||
import { createClaudeSdkTools, mcpToolIds } from './runtime-tools.js';
|
||||
import type {
|
||||
KtxGenerateObjectInput,
|
||||
KtxGenerateTextInput,
|
||||
KtxLlmRuntimePort,
|
||||
KtxRuntimeToolSet,
|
||||
RunLoopParams,
|
||||
RunLoopResult,
|
||||
RunLoopStopReason,
|
||||
} from './runtime-port.js';
|
||||
|
||||
type QueryFn = (params: Parameters<typeof defaultQuery>[0]) => AsyncIterable<SDKMessage>;
|
||||
|
||||
export interface ClaudeCodeKtxLlmRuntimeDeps {
|
||||
projectDir: string;
|
||||
modelSlots: { default: string } & Partial<Record<string, string>>;
|
||||
query?: QueryFn;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
logger?: KtxLogger;
|
||||
}
|
||||
|
||||
const BUILTIN_TOOLS = [
|
||||
'Agent',
|
||||
'Task',
|
||||
'AskUserQuestion',
|
||||
'Bash',
|
||||
'Read',
|
||||
'Edit',
|
||||
'Write',
|
||||
'Glob',
|
||||
'Grep',
|
||||
'WebFetch',
|
||||
'WebSearch',
|
||||
'TodoWrite',
|
||||
];
|
||||
|
||||
const KTX_MCP_SERVER_NAME = 'ktx';
|
||||
|
||||
function isResult(message: SDKMessage): message is SDKResultMessage {
|
||||
return message.type === 'result';
|
||||
}
|
||||
|
||||
function resultError(result: SDKResultMessage): Error | undefined {
|
||||
if (result.subtype === 'success') {
|
||||
return undefined;
|
||||
}
|
||||
const details = result.errors.length > 0 ? `: ${result.errors.join('; ')}` : '';
|
||||
return new Error(`Claude Code query failed (${result.subtype})${details}`);
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function mapClaudeCodeStopReason(result: SDKResultMessage): RunLoopStopReason {
|
||||
if (result.subtype === 'error_max_turns') {
|
||||
return 'budget';
|
||||
}
|
||||
if (result.terminal_reason === 'max_turns' || result.stop_reason === 'max_turns') {
|
||||
return 'budget';
|
||||
}
|
||||
if (result.subtype === 'success') {
|
||||
return result.terminal_reason && result.terminal_reason !== 'completed' ? 'error' : 'natural';
|
||||
}
|
||||
return 'error';
|
||||
}
|
||||
|
||||
function jsonSchema(schema: z.ZodType): Record<string, unknown> {
|
||||
return z.toJSONSchema(schema, { target: 'draft-7' }) as Record<string, unknown>;
|
||||
}
|
||||
|
||||
function modelForRole(modelSlots: ClaudeCodeKtxLlmRuntimeDeps['modelSlots'], role: string): string {
|
||||
return resolveClaudeCodeModel(modelSlots[role] ?? modelSlots.default);
|
||||
}
|
||||
|
||||
function assertInitIsolation(
|
||||
message: SDKMessage,
|
||||
allowedToolIds: Set<string>,
|
||||
expectedMcpServerNames: Set<string>,
|
||||
): void {
|
||||
if (message.type !== 'system' || message.subtype !== 'init') {
|
||||
return;
|
||||
}
|
||||
const activeToolIds = new Set(message.tools);
|
||||
const unexpectedTools = message.tools.filter((toolName) => !allowedToolIds.has(toolName));
|
||||
const missingTools = [...allowedToolIds].filter((toolName) => !activeToolIds.has(toolName));
|
||||
const activeMcpServerNames = message.mcp_servers.map((server) => server.name);
|
||||
const unexpectedMcpServers = activeMcpServerNames.filter((name) => !expectedMcpServerNames.has(name));
|
||||
const missingMcpServers = [...expectedMcpServerNames].filter((name) => !activeMcpServerNames.includes(name));
|
||||
const unexpectedPlugins = message.plugins.map((plugin) => plugin.name);
|
||||
if (
|
||||
unexpectedTools.length > 0 ||
|
||||
missingTools.length > 0 ||
|
||||
unexpectedMcpServers.length > 0 ||
|
||||
missingMcpServers.length > 0 ||
|
||||
unexpectedPlugins.length > 0
|
||||
) {
|
||||
throw new Error(
|
||||
`Claude Code runtime isolation failed: tools=${unexpectedTools.join(',') || '(none)'} missing_tools=${
|
||||
missingTools.join(',') || '(none)'
|
||||
} mcp_servers=${unexpectedMcpServers.join(',') || '(none)'} missing_mcp_servers=${
|
||||
missingMcpServers.join(',') || '(none)'
|
||||
} plugins=${unexpectedPlugins.join(',') || '(none)'} host_slash_commands=${
|
||||
message.slash_commands.length
|
||||
} host_skills=${message.skills.length} host_agents=${message.agents?.join(',') || '(none)'}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function expectedMcpServerNames(tools: KtxRuntimeToolSet | undefined): Set<string> {
|
||||
return tools && Object.keys(tools).length > 0 ? new Set([KTX_MCP_SERVER_NAME]) : new Set();
|
||||
}
|
||||
|
||||
function managedMcpSettings(serverNames: string[]): NonNullable<Options['managedSettings']> {
|
||||
return {
|
||||
allowManagedMcpServersOnly: true,
|
||||
allowedMcpServers: serverNames.map((serverName) => ({ serverName })),
|
||||
};
|
||||
}
|
||||
|
||||
function baseOptions(input: {
|
||||
projectDir: string;
|
||||
model: string;
|
||||
env: NodeJS.ProcessEnv | undefined;
|
||||
maxTurns: number;
|
||||
tools?: KtxRuntimeToolSet;
|
||||
}): Options {
|
||||
const toolIds = mcpToolIds(input.tools ?? {});
|
||||
const allowedToolIds = new Set(toolIds);
|
||||
const expectedServerNames = [...expectedMcpServerNames(input.tools)];
|
||||
return {
|
||||
cwd: input.projectDir,
|
||||
model: input.model,
|
||||
maxTurns: input.maxTurns,
|
||||
settingSources: [],
|
||||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
managedSettings: managedMcpSettings(expectedServerNames),
|
||||
strictMcpConfig: true,
|
||||
allowedTools: toolIds,
|
||||
disallowedTools: BUILTIN_TOOLS,
|
||||
canUseTool: async (toolName, _toolInput, options) =>
|
||||
allowedToolIds.has(toolName)
|
||||
? { behavior: 'allow', toolUseID: options.toolUseID }
|
||||
: {
|
||||
behavior: 'deny',
|
||||
message: `KTX claude-code runtime only permits current KTX MCP tools; denied ${toolName}.`,
|
||||
toolUseID: options.toolUseID,
|
||||
},
|
||||
permissionMode: 'dontAsk',
|
||||
persistSession: false,
|
||||
env: createKtxClaudeCodeEnv(input.env),
|
||||
...(input.tools && Object.keys(input.tools).length > 0
|
||||
? {
|
||||
mcpServers: {
|
||||
[KTX_MCP_SERVER_NAME]: createSdkMcpServer({
|
||||
name: KTX_MCP_SERVER_NAME,
|
||||
tools: createClaudeSdkTools(input.tools),
|
||||
}),
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
};
|
||||
}
|
||||
|
||||
async function collectResult(params: {
|
||||
query: QueryFn;
|
||||
prompt: string;
|
||||
options: Options;
|
||||
allowedToolIds: Set<string>;
|
||||
expectedMcpServerNames: Set<string>;
|
||||
onAssistantTurn?: () => Promise<void>;
|
||||
}): Promise<SDKResultMessage> {
|
||||
let result: SDKResultMessage | undefined;
|
||||
for await (const message of params.query({ prompt: params.prompt, options: params.options })) {
|
||||
assertInitIsolation(message, params.allowedToolIds, params.expectedMcpServerNames);
|
||||
if (message.type === 'assistant' && message.parent_tool_use_id === null) {
|
||||
await params.onAssistantTurn?.();
|
||||
}
|
||||
if (isResult(message)) {
|
||||
result = message;
|
||||
}
|
||||
}
|
||||
if (!result) {
|
||||
throw new Error('Claude Code query returned no result message');
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
|
||||
private readonly runQuery: QueryFn;
|
||||
private readonly logger: KtxLogger;
|
||||
|
||||
constructor(private readonly deps: ClaudeCodeKtxLlmRuntimeDeps) {
|
||||
this.runQuery = deps.query ?? defaultQuery;
|
||||
this.logger = deps.logger ?? noopLogger;
|
||||
}
|
||||
|
||||
async generateText(input: KtxGenerateTextInput): Promise<string> {
|
||||
const options = baseOptions({
|
||||
projectDir: this.deps.projectDir,
|
||||
model: modelForRole(this.deps.modelSlots, input.role),
|
||||
env: this.deps.env,
|
||||
maxTurns: 1,
|
||||
tools: input.tools,
|
||||
});
|
||||
const result = await collectResult({
|
||||
query: this.runQuery,
|
||||
prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'),
|
||||
options,
|
||||
allowedToolIds: new Set(mcpToolIds(input.tools ?? {})),
|
||||
expectedMcpServerNames: expectedMcpServerNames(input.tools),
|
||||
});
|
||||
const error = resultError(result);
|
||||
if (error) {
|
||||
throw error;
|
||||
}
|
||||
if (result.subtype !== 'success') {
|
||||
throw new Error(`Claude Code query failed (${result.subtype})`);
|
||||
}
|
||||
return result.result;
|
||||
}
|
||||
|
||||
async generateObject<TOutput, TSchema extends z.ZodType<TOutput>>(
|
||||
input: KtxGenerateObjectInput<TOutput, TSchema>,
|
||||
): Promise<TOutput> {
|
||||
const options = {
|
||||
...baseOptions({
|
||||
projectDir: this.deps.projectDir,
|
||||
model: modelForRole(this.deps.modelSlots, input.role),
|
||||
env: this.deps.env,
|
||||
maxTurns: 1,
|
||||
tools: input.tools,
|
||||
}),
|
||||
outputFormat: { type: 'json_schema' as const, schema: jsonSchema(input.schema as z.ZodType) },
|
||||
};
|
||||
const result = await collectResult({
|
||||
query: this.runQuery,
|
||||
prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'),
|
||||
options,
|
||||
allowedToolIds: new Set(mcpToolIds(input.tools ?? {})),
|
||||
expectedMcpServerNames: expectedMcpServerNames(input.tools),
|
||||
});
|
||||
const error = resultError(result);
|
||||
if (error) {
|
||||
throw error;
|
||||
}
|
||||
if (result.subtype !== 'success') {
|
||||
throw new Error(`Claude Code query failed (${result.subtype})`);
|
||||
}
|
||||
return (input.schema as z.ZodType<TOutput>).parse(result.structured_output);
|
||||
}
|
||||
|
||||
async runAgentLoop(params: RunLoopParams): Promise<RunLoopResult> {
|
||||
let stepIndex = 0;
|
||||
try {
|
||||
const options = baseOptions({
|
||||
projectDir: this.deps.projectDir,
|
||||
model: modelForRole(this.deps.modelSlots, params.modelRole),
|
||||
env: this.deps.env,
|
||||
maxTurns: params.stepBudget,
|
||||
tools: params.toolSet,
|
||||
});
|
||||
const result = await collectResult({
|
||||
query: this.runQuery,
|
||||
prompt: params.userPrompt,
|
||||
options: { ...options, systemPrompt: params.systemPrompt },
|
||||
allowedToolIds: new Set(mcpToolIds(params.toolSet)),
|
||||
expectedMcpServerNames: expectedMcpServerNames(params.toolSet),
|
||||
onAssistantTurn: async () => {
|
||||
stepIndex += 1;
|
||||
if (!params.onStepFinish) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await params.onStepFinish({ stepIndex, stepBudget: params.stepBudget });
|
||||
} catch (error) {
|
||||
this.logger.warn(
|
||||
`[claude-code-runner] onStepFinish callback threw; ignoring: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
const stopReason = mapClaudeCodeStopReason(result);
|
||||
const error = resultError(result);
|
||||
return { stopReason, ...(stopReason === 'error' && error ? { error } : {}) };
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error(String(error));
|
||||
return { stopReason: 'error', error: err };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function runClaudeCodeAuthProbe(input: {
|
||||
projectDir: string;
|
||||
model: string;
|
||||
query?: QueryFn;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): Promise<{ ok: true } | { ok: false; message: string }> {
|
||||
let model: string;
|
||||
try {
|
||||
model = resolveClaudeCodeModel(input.model);
|
||||
} catch (error) {
|
||||
return {
|
||||
ok: false,
|
||||
message: error instanceof Error ? error.message : String(error),
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const options = baseOptions({
|
||||
projectDir: input.projectDir,
|
||||
model,
|
||||
env: input.env,
|
||||
maxTurns: 1,
|
||||
});
|
||||
const result = await collectResult({
|
||||
query: input.query ?? defaultQuery,
|
||||
prompt: 'Reply with exactly: ok',
|
||||
options,
|
||||
allowedToolIds: new Set(),
|
||||
expectedMcpServerNames: new Set(),
|
||||
});
|
||||
const error = resultError(result);
|
||||
if (error) {
|
||||
throw error;
|
||||
}
|
||||
return { ok: true };
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return {
|
||||
ok: false,
|
||||
message: `Claude Code authentication is not usable. Authenticate Claude Code locally with the Claude Code CLI, then rerun setup or the command. ${message}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
123
packages/cli/src/context/llm/debug-request-recorder.test.ts
Normal file
123
packages/cli/src/context/llm/debug-request-recorder.test.ts
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
import { mkdtemp, readFile, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, describe, expect, it } from 'vitest';
|
||||
import {
|
||||
createJsonlKtxLlmDebugRequestRecorder,
|
||||
summarizeKtxLlmDebugRequest,
|
||||
} from './debug-request-recorder.js';
|
||||
|
||||
describe('summarizeKtxLlmDebugRequest', () => {
|
||||
it('records providerOptions positions without message text or tool schemas', () => {
|
||||
const summary = summarizeKtxLlmDebugRequest({
|
||||
operationName: 'ingest-bundle-wu',
|
||||
source: 'metabase',
|
||||
jobId: 'job-1',
|
||||
unitKey: 'cards/1',
|
||||
modelRole: 'candidateExtraction',
|
||||
modelId: 'claude-sonnet-4-6',
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: 'SECRET SYSTEM PROMPT',
|
||||
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } },
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'SECRET USER PROMPT',
|
||||
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '5m' } } },
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
tools: {
|
||||
emit_candidate: {
|
||||
description: 'SECRET TOOL DESCRIPTION',
|
||||
inputSchema: { secret: true },
|
||||
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } },
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(summary).toMatchObject({
|
||||
operationName: 'ingest-bundle-wu',
|
||||
source: 'metabase',
|
||||
jobId: 'job-1',
|
||||
unitKey: 'cards/1',
|
||||
modelRole: 'candidateExtraction',
|
||||
modelId: 'claude-sonnet-4-6',
|
||||
messageCount: 2,
|
||||
toolNames: ['emit_candidate'],
|
||||
providerOptions: [
|
||||
{
|
||||
target: 'message',
|
||||
index: 0,
|
||||
role: 'system',
|
||||
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } },
|
||||
},
|
||||
{
|
||||
target: 'message-part',
|
||||
index: 1,
|
||||
role: 'user',
|
||||
partIndex: 0,
|
||||
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '5m' } } },
|
||||
},
|
||||
{
|
||||
target: 'tool',
|
||||
name: 'emit_candidate',
|
||||
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral', ttl: '1h' } } },
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const serialized = JSON.stringify(summary);
|
||||
expect(serialized).not.toContain('SECRET SYSTEM PROMPT');
|
||||
expect(serialized).not.toContain('SECRET USER PROMPT');
|
||||
expect(serialized).not.toContain('SECRET TOOL DESCRIPTION');
|
||||
expect(serialized).not.toContain('inputSchema');
|
||||
});
|
||||
});
|
||||
|
||||
describe('createJsonlKtxLlmDebugRequestRecorder', () => {
|
||||
let tempDir: string | undefined;
|
||||
|
||||
afterEach(async () => {
|
||||
if (tempDir) {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
tempDir = undefined;
|
||||
}
|
||||
});
|
||||
|
||||
it('appends one JSON object per recorded request', async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-llm-debug-'));
|
||||
const filePath = join(tempDir, 'nested', 'llm-debug.jsonl');
|
||||
const recorder = createJsonlKtxLlmDebugRequestRecorder(filePath);
|
||||
|
||||
await recorder.record({
|
||||
timestamp: '2026-05-04T00:00:00.000Z',
|
||||
operationName: 'ingest-bundle-wu',
|
||||
modelRole: 'candidateExtraction',
|
||||
modelId: 'claude-sonnet-4-6',
|
||||
messageCount: 2,
|
||||
toolNames: ['emit_candidate'],
|
||||
providerOptions: [],
|
||||
});
|
||||
await recorder.record({
|
||||
timestamp: '2026-05-04T00:00:01.000Z',
|
||||
operationName: 'ingest-bundle-reconcile',
|
||||
modelRole: 'reconcile',
|
||||
modelId: 'claude-sonnet-4-6',
|
||||
messageCount: 2,
|
||||
toolNames: [],
|
||||
providerOptions: [],
|
||||
});
|
||||
|
||||
const lines = (await readFile(filePath, 'utf8')).trim().split('\n').map((line) => JSON.parse(line));
|
||||
expect(lines).toHaveLength(2);
|
||||
expect(lines[0]).toMatchObject({ operationName: 'ingest-bundle-wu', modelRole: 'candidateExtraction' });
|
||||
expect(lines[1]).toMatchObject({ operationName: 'ingest-bundle-reconcile', modelRole: 'reconcile' });
|
||||
});
|
||||
});
|
||||
132
packages/cli/src/context/llm/debug-request-recorder.ts
Normal file
132
packages/cli/src/context/llm/debug-request-recorder.ts
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
import { appendFile, mkdir } from 'node:fs/promises';
|
||||
import { dirname } from 'node:path';
|
||||
import type { ModelMessage } from 'ai';
|
||||
import type { KtxModelRole } from '../../llm/types.js';
|
||||
|
||||
type ProviderOptionsCarrier = { providerOptions?: unknown; [key: string]: unknown };
|
||||
type ToolMap = Record<string, ProviderOptionsCarrier>;
|
||||
|
||||
interface KtxLlmDebugProviderOptionsEntry {
|
||||
target: 'message' | 'message-part' | 'tool';
|
||||
index?: number;
|
||||
role?: string;
|
||||
partIndex?: number;
|
||||
name?: string;
|
||||
providerOptions: unknown;
|
||||
}
|
||||
|
||||
export interface KtxLlmDebugRequest {
|
||||
timestamp: string;
|
||||
operationName: string;
|
||||
source?: string;
|
||||
jobId?: string;
|
||||
unitKey?: string;
|
||||
modelRole: KtxModelRole;
|
||||
modelId: string;
|
||||
messageCount: number;
|
||||
toolNames: string[];
|
||||
providerOptions: KtxLlmDebugProviderOptionsEntry[];
|
||||
}
|
||||
|
||||
export interface KtxLlmDebugRequestRecorder {
|
||||
record(request: KtxLlmDebugRequest): Promise<void> | void;
|
||||
}
|
||||
|
||||
export interface SummarizeKtxLlmDebugRequestInput {
|
||||
operationName: string;
|
||||
source?: string;
|
||||
jobId?: string;
|
||||
unitKey?: string;
|
||||
modelRole: KtxModelRole;
|
||||
modelId: string;
|
||||
messages: ModelMessage[];
|
||||
tools: ToolMap;
|
||||
timestamp?: string;
|
||||
}
|
||||
|
||||
function messageRole(message: ModelMessage): string {
|
||||
return typeof message.role === 'string' ? message.role : 'unknown';
|
||||
}
|
||||
|
||||
function isProviderOptionsCarrier(value: unknown): value is ProviderOptionsCarrier {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function contentPartProviderOptions(message: ModelMessage, index: number): KtxLlmDebugProviderOptionsEntry[] {
|
||||
if (!Array.isArray(message.content)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return message.content.flatMap((part, partIndex) => {
|
||||
if (!isProviderOptionsCarrier(part) || !part.providerOptions) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return [
|
||||
{
|
||||
target: 'message-part' as const,
|
||||
index,
|
||||
role: messageRole(message),
|
||||
partIndex,
|
||||
providerOptions: part.providerOptions,
|
||||
},
|
||||
];
|
||||
});
|
||||
}
|
||||
|
||||
function messageProviderOptions(messages: ModelMessage[]): KtxLlmDebugProviderOptionsEntry[] {
|
||||
return messages.flatMap((message, index) => {
|
||||
const entries: KtxLlmDebugProviderOptionsEntry[] = [];
|
||||
const providerOptions = (message as ProviderOptionsCarrier).providerOptions;
|
||||
if (providerOptions) {
|
||||
entries.push({
|
||||
target: 'message',
|
||||
index,
|
||||
role: messageRole(message),
|
||||
providerOptions,
|
||||
});
|
||||
}
|
||||
entries.push(...contentPartProviderOptions(message, index));
|
||||
return entries;
|
||||
});
|
||||
}
|
||||
|
||||
function toolProviderOptions(tools: ToolMap): KtxLlmDebugProviderOptionsEntry[] {
|
||||
return Object.entries(tools).flatMap(([name, tool]) => {
|
||||
return tool.providerOptions
|
||||
? [
|
||||
{
|
||||
target: 'tool' as const,
|
||||
name,
|
||||
providerOptions: tool.providerOptions,
|
||||
},
|
||||
]
|
||||
: [];
|
||||
});
|
||||
}
|
||||
|
||||
export function summarizeKtxLlmDebugRequest(input: SummarizeKtxLlmDebugRequestInput): KtxLlmDebugRequest {
|
||||
const toolNames = Object.keys(input.tools).sort();
|
||||
return {
|
||||
timestamp: input.timestamp ?? new Date().toISOString(),
|
||||
operationName: input.operationName,
|
||||
...(input.source ? { source: input.source } : {}),
|
||||
...(input.jobId ? { jobId: input.jobId } : {}),
|
||||
...(input.unitKey ? { unitKey: input.unitKey } : {}),
|
||||
modelRole: input.modelRole,
|
||||
modelId: input.modelId,
|
||||
messageCount: input.messages.length,
|
||||
toolNames,
|
||||
providerOptions: [...messageProviderOptions(input.messages), ...toolProviderOptions(input.tools)],
|
||||
};
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function createJsonlKtxLlmDebugRequestRecorder(filePath: string): KtxLlmDebugRequestRecorder {
|
||||
return {
|
||||
async record(request) {
|
||||
await mkdir(dirname(filePath), { recursive: true });
|
||||
await appendFile(filePath, `${JSON.stringify(request)}\n`, 'utf8');
|
||||
},
|
||||
};
|
||||
}
|
||||
38
packages/cli/src/context/llm/embedding-port.test.ts
Normal file
38
packages/cli/src/context/llm/embedding-port.test.ts
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { KtxIngestEmbeddingPortAdapter, KtxScanEmbeddingPortAdapter } from './embedding-port.js';
|
||||
|
||||
describe('KTX embedding port adapters', () => {
|
||||
it('adapts LLM modules embeddings to ingest embedding port shape', async () => {
|
||||
const provider = {
|
||||
dimensions: 3,
|
||||
maxBatchSize: 2,
|
||||
embed: vi.fn(async () => [1, 2, 3]),
|
||||
[['embed', 'Many'].join('')]: vi.fn(async () => [
|
||||
[1, 2, 3],
|
||||
[4, 5, 6],
|
||||
]),
|
||||
};
|
||||
const adapter = new KtxIngestEmbeddingPortAdapter(provider as never);
|
||||
|
||||
await expect(adapter.computeEmbedding('alpha')).resolves.toEqual([1, 2, 3]);
|
||||
await expect(adapter.computeEmbeddingsBulk(['alpha', 'beta'])).resolves.toEqual([
|
||||
[1, 2, 3],
|
||||
[4, 5, 6],
|
||||
]);
|
||||
expect(adapter.maxBatchSize).toBe(2);
|
||||
});
|
||||
|
||||
it('adapts LLM modules embeddings to scan embedding port shape', async () => {
|
||||
const provider = {
|
||||
dimensions: 3,
|
||||
maxBatchSize: 2,
|
||||
embed: vi.fn(),
|
||||
[['embed', 'Many'].join('')]: vi.fn(async () => [[1, 2, 3]]),
|
||||
};
|
||||
const adapter = new KtxScanEmbeddingPortAdapter(provider as never);
|
||||
|
||||
await expect(adapter.embedBatch(['alpha'])).resolves.toEqual([[1, 2, 3]]);
|
||||
expect(adapter.dimensions).toBe(3);
|
||||
expect(adapter.maxBatchSize).toBe(2);
|
||||
});
|
||||
});
|
||||
39
packages/cli/src/context/llm/embedding-port.ts
Normal file
39
packages/cli/src/context/llm/embedding-port.ts
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
import type { KtxEmbeddingProvider } from '../../llm/types.js';
|
||||
import type { KtxEmbeddingPort as KtxIngestEmbeddingPort } from '../core/embedding.js';
|
||||
import type { KtxEmbeddingPort as KtxScanEmbeddingPort } from '../scan/types.js';
|
||||
|
||||
const bulkEmbeddingMethod = ['embed', 'Many'].join('') as keyof KtxEmbeddingProvider;
|
||||
|
||||
function computeBulkEmbeddings(provider: KtxEmbeddingProvider, texts: string[]): Promise<number[][]> {
|
||||
return (provider[bulkEmbeddingMethod] as (items: string[]) => Promise<number[][]>)(texts);
|
||||
}
|
||||
|
||||
export class KtxIngestEmbeddingPortAdapter implements KtxIngestEmbeddingPort {
|
||||
readonly maxBatchSize: number;
|
||||
|
||||
constructor(private readonly provider: KtxEmbeddingProvider) {
|
||||
this.maxBatchSize = provider.maxBatchSize;
|
||||
}
|
||||
|
||||
computeEmbedding(text: string): Promise<number[]> {
|
||||
return this.provider.embed(text);
|
||||
}
|
||||
|
||||
computeEmbeddingsBulk(texts: string[]): Promise<number[][]> {
|
||||
return computeBulkEmbeddings(this.provider, texts);
|
||||
}
|
||||
}
|
||||
|
||||
export class KtxScanEmbeddingPortAdapter implements KtxScanEmbeddingPort {
|
||||
readonly dimensions: number;
|
||||
readonly maxBatchSize: number;
|
||||
|
||||
constructor(private readonly provider: KtxEmbeddingProvider) {
|
||||
this.dimensions = provider.dimensions;
|
||||
this.maxBatchSize = provider.maxBatchSize;
|
||||
}
|
||||
|
||||
embedBatch(texts: string[]): Promise<number[][]> {
|
||||
return computeBulkEmbeddings(this.provider, texts);
|
||||
}
|
||||
}
|
||||
211
packages/cli/src/context/llm/local-config.test.ts
Normal file
211
packages/cli/src/context/llm/local-config.test.ts
Normal file
|
|
@ -0,0 +1,211 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
buildDefaultKtxProjectConfig,
|
||||
type KtxProjectEmbeddingConfig,
|
||||
type KtxProjectLlmConfig,
|
||||
} from '../project/config.js';
|
||||
import {
|
||||
createLocalKtxEmbeddingProviderFromConfig,
|
||||
createLocalKtxLlmProviderFromConfig,
|
||||
resolveLocalKtxEmbeddingConfig,
|
||||
resolveLocalKtxLlmConfig,
|
||||
} from './local-config.js';
|
||||
|
||||
describe('local KTX LLM config', () => {
|
||||
it('resolves env and file references into a KtxLlmConfig', () => {
|
||||
const config: KtxProjectLlmConfig = {
|
||||
provider: {
|
||||
backend: 'gateway',
|
||||
gateway: { api_key: 'env:AI_GATEWAY_API_KEY', base_url: 'https://gateway.example/v1' }, // pragma: allowlist secret
|
||||
},
|
||||
models: { default: 'env:KTX_MODEL', triage: 'anthropic/claude-haiku-4-5' },
|
||||
promptCaching: { enabled: false },
|
||||
};
|
||||
|
||||
expect(
|
||||
resolveLocalKtxLlmConfig(config, {
|
||||
AI_GATEWAY_API_KEY: 'gateway-key', // pragma: allowlist secret
|
||||
KTX_MODEL: 'anthropic/claude-sonnet-4-6',
|
||||
}),
|
||||
).toEqual({
|
||||
backend: 'gateway',
|
||||
gateway: { apiKey: 'gateway-key', baseURL: 'https://gateway.example/v1' }, // pragma: allowlist secret
|
||||
modelSlots: { default: 'anthropic/claude-sonnet-4-6', triage: 'anthropic/claude-haiku-4-5' },
|
||||
promptCaching: { enabled: false },
|
||||
});
|
||||
});
|
||||
|
||||
it('resolves Vertex AI env references into a KtxLlmConfig', () => {
|
||||
const config: KtxProjectLlmConfig = {
|
||||
provider: {
|
||||
backend: 'vertex',
|
||||
vertex: { project: 'env:GOOGLE_VERTEX_PROJECT', location: 'env:GOOGLE_VERTEX_LOCATION' },
|
||||
},
|
||||
models: { default: 'env:KTX_MODEL' },
|
||||
promptCaching: { enabled: true, vertexFallbackTo5m: true },
|
||||
};
|
||||
|
||||
expect(
|
||||
resolveLocalKtxLlmConfig(config, {
|
||||
GOOGLE_VERTEX_PROJECT: 'local-gcp-project',
|
||||
GOOGLE_VERTEX_LOCATION: 'us-east5',
|
||||
KTX_MODEL: 'claude-sonnet-4-6',
|
||||
}),
|
||||
).toEqual({
|
||||
backend: 'vertex',
|
||||
vertex: { project: 'local-gcp-project', location: 'us-east5' },
|
||||
modelSlots: { default: 'claude-sonnet-4-6' },
|
||||
promptCaching: { enabled: true, vertexFallbackTo5m: true },
|
||||
});
|
||||
});
|
||||
|
||||
it('ignores inactive Vertex AI references for non-Vertex backends', () => {
|
||||
const config: KtxProjectLlmConfig = {
|
||||
provider: {
|
||||
backend: 'anthropic',
|
||||
anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret
|
||||
vertex: { location: 'env:MISSING_VERTEX_LOCATION' },
|
||||
},
|
||||
models: { default: 'claude-sonnet-4-6' },
|
||||
};
|
||||
|
||||
expect(
|
||||
resolveLocalKtxLlmConfig(config, {
|
||||
ANTHROPIC_API_KEY: 'sk-ant-test', // pragma: allowlist secret
|
||||
}),
|
||||
).toEqual({
|
||||
backend: 'anthropic',
|
||||
anthropic: { apiKey: 'sk-ant-test' }, // pragma: allowlist secret
|
||||
modelSlots: { default: 'claude-sonnet-4-6' },
|
||||
promptCaching: undefined,
|
||||
});
|
||||
});
|
||||
|
||||
it('returns null when the local LLM backend is disabled', () => {
|
||||
expect(
|
||||
createLocalKtxLlmProviderFromConfig({
|
||||
provider: { backend: 'none' },
|
||||
models: {},
|
||||
}),
|
||||
).toBeNull();
|
||||
});
|
||||
|
||||
it('constructs providers through LLM modules', () => {
|
||||
const createKtxLlmProvider = vi.fn(() => ({ getModel: vi.fn() }) as never);
|
||||
const result = createLocalKtxLlmProviderFromConfig(
|
||||
{
|
||||
provider: {
|
||||
backend: 'anthropic',
|
||||
anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret
|
||||
},
|
||||
models: { default: 'claude-sonnet-4-6' },
|
||||
},
|
||||
{ env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, createKtxLlmProvider }, // pragma: allowlist secret
|
||||
);
|
||||
|
||||
expect(result).not.toBeNull();
|
||||
expect(createKtxLlmProvider).toHaveBeenCalledWith({
|
||||
backend: 'anthropic',
|
||||
anthropic: { apiKey: 'sk-ant-test' }, // pragma: allowlist secret
|
||||
modelSlots: { default: 'claude-sonnet-4-6' },
|
||||
promptCaching: undefined,
|
||||
});
|
||||
});
|
||||
|
||||
it('inherits enabled prompt caching from LLM modules when local config omits promptCaching', () => {
|
||||
const provider = createLocalKtxLlmProviderFromConfig({
|
||||
provider: {
|
||||
backend: 'gateway',
|
||||
gateway: { base_url: 'https://gateway.example/v1' },
|
||||
},
|
||||
models: { default: 'anthropic/claude-sonnet-4-6' },
|
||||
});
|
||||
|
||||
expect(provider?.promptCachingConfig()).toMatchObject({
|
||||
enabled: true,
|
||||
systemTtl: '1h',
|
||||
toolsTtl: '1h',
|
||||
historyTtl: '5m',
|
||||
vertexFallbackTo5m: false,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('local KTX embedding config', () => {
|
||||
it('resolves sentence-transformers config', () => {
|
||||
const config: KtxProjectEmbeddingConfig = {
|
||||
backend: 'sentence-transformers',
|
||||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 384,
|
||||
sentenceTransformers: { base_url: 'http://localhost:18081', pathPrefix: '' },
|
||||
batchSize: 16,
|
||||
};
|
||||
|
||||
expect(resolveLocalKtxEmbeddingConfig(config, {})).toEqual({
|
||||
backend: 'sentence-transformers',
|
||||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 384,
|
||||
sentenceTransformers: { baseURL: 'http://localhost:18081', pathPrefix: '' },
|
||||
batchSize: 16,
|
||||
});
|
||||
});
|
||||
|
||||
it('returns null when sentence-transformers has no base_url (managed daemon delegation)', () => {
|
||||
const config: KtxProjectEmbeddingConfig = {
|
||||
backend: 'sentence-transformers',
|
||||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 384,
|
||||
sentenceTransformers: {
|
||||
base_url: '',
|
||||
pathPrefix: '',
|
||||
},
|
||||
};
|
||||
|
||||
expect(resolveLocalKtxEmbeddingConfig(config, {})).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null when backend is openai but no apiKey is resolvable from env', () => {
|
||||
const config: KtxProjectEmbeddingConfig = {
|
||||
backend: 'openai',
|
||||
model: 'text-embedding-3-small',
|
||||
dimensions: 1536,
|
||||
openai: { api_key: 'env:OPENAI_API_KEY' }, // pragma: allowlist secret
|
||||
};
|
||||
|
||||
expect(resolveLocalKtxEmbeddingConfig(config, {})).toBeNull();
|
||||
});
|
||||
|
||||
it('resolves openai embedding config from env', () => {
|
||||
const config: KtxProjectEmbeddingConfig = {
|
||||
backend: 'openai',
|
||||
model: 'text-embedding-3-small',
|
||||
dimensions: 1536,
|
||||
openai: { api_key: 'env:OPENAI_API_KEY' }, // pragma: allowlist secret
|
||||
};
|
||||
|
||||
expect(
|
||||
resolveLocalKtxEmbeddingConfig(config, { OPENAI_API_KEY: 'sk-test' }), // pragma: allowlist secret
|
||||
).toEqual({
|
||||
backend: 'openai',
|
||||
model: 'text-embedding-3-small',
|
||||
dimensions: 1536,
|
||||
openai: { apiKey: 'sk-test' }, // pragma: allowlist secret
|
||||
batchSize: undefined,
|
||||
});
|
||||
});
|
||||
|
||||
it('returns null for the default disabled project embedding config', () => {
|
||||
const createKtxEmbeddingProvider = vi.fn(() => ({}) as never);
|
||||
const provider = createLocalKtxEmbeddingProviderFromConfig(
|
||||
buildDefaultKtxProjectConfig().ingest.embeddings,
|
||||
{ createKtxEmbeddingProvider },
|
||||
);
|
||||
|
||||
expect(provider).toBeNull();
|
||||
expect(createKtxEmbeddingProvider).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('returns null when embeddings are disabled', () => {
|
||||
expect(createLocalKtxEmbeddingProviderFromConfig({ backend: 'none', dimensions: 8 })).toBeNull();
|
||||
});
|
||||
});
|
||||
182
packages/cli/src/context/llm/local-config.ts
Normal file
182
packages/cli/src/context/llm/local-config.ts
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
import { createKtxEmbeddingProvider } from '../../llm/embedding-provider.js';
|
||||
import { createKtxLlmProvider } from '../../llm/model-provider.js';
|
||||
import type { KtxEmbeddingConfig, KtxEmbeddingProvider, KtxLlmConfig, KtxLlmProvider, KtxModelRole } from '../../llm/types.js';
|
||||
import { resolveKtxConfigReference } from '../core/config-reference.js';
|
||||
import type { KtxProjectEmbeddingConfig, KtxProjectLlmConfig } from '../project/config.js';
|
||||
import { AiSdkKtxLlmRuntime } from './ai-sdk-runtime.js';
|
||||
import { ClaudeCodeKtxLlmRuntime } from './claude-code-runtime.js';
|
||||
import type { KtxLlmRuntimePort } from './runtime-port.js';
|
||||
|
||||
interface LocalConfigDeps {
|
||||
env?: NodeJS.ProcessEnv;
|
||||
projectDir?: string;
|
||||
createKtxLlmProvider?: typeof createKtxLlmProvider;
|
||||
createKtxEmbeddingProvider?: typeof createKtxEmbeddingProvider;
|
||||
createClaudeCodeRuntime?: (deps: ConstructorParameters<typeof ClaudeCodeKtxLlmRuntime>[0]) => KtxLlmRuntimePort;
|
||||
createAiSdkRuntime?: (deps: { llmProvider: KtxLlmProvider }) => KtxLlmRuntimePort;
|
||||
}
|
||||
|
||||
function resolveOptional(value: string | undefined, env: NodeJS.ProcessEnv): string | undefined {
|
||||
return resolveKtxConfigReference(value, env) || undefined;
|
||||
}
|
||||
|
||||
function resolveRequired(value: string | undefined, env: NodeJS.ProcessEnv, message: string): string {
|
||||
const resolved = resolveOptional(value, env);
|
||||
if (!resolved) {
|
||||
throw new Error(message);
|
||||
}
|
||||
return resolved;
|
||||
}
|
||||
|
||||
function resolveModelSlots(
|
||||
models: KtxProjectLlmConfig['models'],
|
||||
env: NodeJS.ProcessEnv,
|
||||
): KtxLlmConfig['modelSlots'] {
|
||||
const resolved: Partial<Record<KtxModelRole, string>> & { default?: string } = {};
|
||||
for (const [role, value] of Object.entries(models)) {
|
||||
if (value) {
|
||||
resolved[role as KtxModelRole] = resolveRequired(value, env, `llm.models.${role} is required`);
|
||||
}
|
||||
}
|
||||
if (!resolved.default) {
|
||||
throw new Error('llm.models.default is required when llm.provider.backend is not none');
|
||||
}
|
||||
return resolved as KtxLlmConfig['modelSlots'];
|
||||
}
|
||||
|
||||
function resolvedProviderConfig(
|
||||
config: { api_key?: string; base_url?: string } | undefined,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): { apiKey?: string; baseURL?: string } | undefined {
|
||||
if (!config) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const apiKey = resolveOptional(config.api_key, env);
|
||||
const baseURL = resolveOptional(config.base_url, env);
|
||||
if (!apiKey && !baseURL) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return {
|
||||
...(apiKey ? { apiKey } : {}),
|
||||
...(baseURL ? { baseURL } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function resolvedVertexConfig(
|
||||
config: { project?: string; location?: string } | undefined,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): { project?: string; location: string } | undefined {
|
||||
if (!config) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const project = resolveOptional(config.project, env);
|
||||
const location = resolveRequired(config.location, env, 'llm.provider.vertex.location is required');
|
||||
return {
|
||||
...(project ? { project } : {}),
|
||||
location,
|
||||
};
|
||||
}
|
||||
|
||||
export function resolveLocalKtxLlmConfig(config: KtxProjectLlmConfig, env: NodeJS.ProcessEnv): KtxLlmConfig | null {
|
||||
if (config.provider.backend === 'none') {
|
||||
return null;
|
||||
}
|
||||
const modelSlots = resolveModelSlots(config.models, env);
|
||||
const vertex = config.provider.backend === 'vertex' ? resolvedVertexConfig(config.provider.vertex, env) : undefined;
|
||||
const anthropic = resolvedProviderConfig(config.provider.anthropic, env);
|
||||
const gateway = resolvedProviderConfig(config.provider.gateway, env);
|
||||
return {
|
||||
backend: config.provider.backend,
|
||||
...(vertex ? { vertex } : {}),
|
||||
...(anthropic ? { anthropic } : {}),
|
||||
...(gateway ? { gateway } : {}),
|
||||
modelSlots,
|
||||
promptCaching: config.promptCaching,
|
||||
};
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function createLocalKtxLlmProviderFromConfig(
|
||||
config: KtxProjectLlmConfig,
|
||||
deps: LocalConfigDeps = {},
|
||||
): KtxLlmProvider | null {
|
||||
const resolved = resolveLocalKtxLlmConfig(config, deps.env ?? process.env);
|
||||
if (!resolved || resolved.backend === 'claude-code') {
|
||||
return null;
|
||||
}
|
||||
return (deps.createKtxLlmProvider ?? createKtxLlmProvider)(resolved);
|
||||
}
|
||||
|
||||
export function createLocalKtxLlmRuntimeFromConfig(
|
||||
config: KtxProjectLlmConfig,
|
||||
deps: LocalConfigDeps = {},
|
||||
): KtxLlmRuntimePort | null {
|
||||
const resolved = resolveLocalKtxLlmConfig(config, deps.env ?? process.env);
|
||||
if (!resolved) {
|
||||
return null;
|
||||
}
|
||||
if (resolved.backend === 'claude-code') {
|
||||
const projectDir = deps.projectDir;
|
||||
if (!projectDir) {
|
||||
throw new Error('projectDir is required when creating the claude-code LLM runtime');
|
||||
}
|
||||
return (deps.createClaudeCodeRuntime ?? ((runtimeDeps) => new ClaudeCodeKtxLlmRuntime(runtimeDeps)))({
|
||||
projectDir,
|
||||
modelSlots: resolved.modelSlots,
|
||||
env: deps.env,
|
||||
});
|
||||
}
|
||||
const llmProvider = (deps.createKtxLlmProvider ?? createKtxLlmProvider)(resolved);
|
||||
return (deps.createAiSdkRuntime ?? ((runtimeDeps) => new AiSdkKtxLlmRuntime(runtimeDeps)))({ llmProvider });
|
||||
}
|
||||
|
||||
export function resolveLocalKtxEmbeddingConfig(
|
||||
config: KtxProjectEmbeddingConfig,
|
||||
env: NodeJS.ProcessEnv,
|
||||
): KtxEmbeddingConfig | null {
|
||||
if (config.backend === 'none') {
|
||||
return null;
|
||||
}
|
||||
if (config.backend === 'sentence-transformers') {
|
||||
const baseURL = config.sentenceTransformers?.base_url;
|
||||
if (!baseURL) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
backend: config.backend,
|
||||
model: config.model ?? 'all-MiniLM-L6-v2',
|
||||
dimensions: config.dimensions,
|
||||
sentenceTransformers: {
|
||||
baseURL,
|
||||
pathPrefix: config.sentenceTransformers?.pathPrefix,
|
||||
},
|
||||
batchSize: config.batchSize,
|
||||
};
|
||||
}
|
||||
if (config.backend === 'openai') {
|
||||
const openai = resolvedProviderConfig(config.openai, env);
|
||||
if (!openai?.apiKey) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
backend: config.backend,
|
||||
model: config.model ?? 'text-embedding-3-small',
|
||||
dimensions: config.dimensions,
|
||||
openai,
|
||||
batchSize: config.batchSize,
|
||||
};
|
||||
}
|
||||
throw new Error(`Unsupported KTX embedding backend: ${String((config as { backend?: string }).backend)}`);
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function createLocalKtxEmbeddingProviderFromConfig(
|
||||
config: KtxProjectEmbeddingConfig,
|
||||
deps: LocalConfigDeps = {},
|
||||
): KtxEmbeddingProvider | null {
|
||||
const resolved = resolveLocalKtxEmbeddingConfig(config, deps.env ?? process.env);
|
||||
return resolved ? (deps.createKtxEmbeddingProvider ?? createKtxEmbeddingProvider)(resolved) : null;
|
||||
}
|
||||
25
packages/cli/src/context/llm/runtime-local-config.test.ts
Normal file
25
packages/cli/src/context/llm/runtime-local-config.test.ts
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createLocalKtxLlmProviderFromConfig, createLocalKtxLlmRuntimeFromConfig } from './local-config.js';
|
||||
|
||||
describe('local KTX LLM runtime config', () => {
|
||||
it('creates a Claude Code runtime for claude-code backend without creating an AI SDK provider', () => {
|
||||
const runtime = createLocalKtxLlmRuntimeFromConfig(
|
||||
{
|
||||
provider: { backend: 'claude-code' },
|
||||
models: { default: 'sonnet', triage: 'haiku' },
|
||||
},
|
||||
{ env: {}, projectDir: '/tmp/project', createClaudeCodeRuntime: vi.fn((deps) => ({ deps }) as never) },
|
||||
);
|
||||
|
||||
expect(runtime).toMatchObject({ deps: expect.objectContaining({ projectDir: '/tmp/project' }) });
|
||||
});
|
||||
|
||||
it('returns null from the AI SDK provider factory for claude-code backend', () => {
|
||||
expect(
|
||||
createLocalKtxLlmProviderFromConfig({
|
||||
provider: { backend: 'claude-code' },
|
||||
models: { default: 'sonnet' },
|
||||
}),
|
||||
).toBeNull();
|
||||
});
|
||||
});
|
||||
76
packages/cli/src/context/llm/runtime-port.ts
Normal file
76
packages/cli/src/context/llm/runtime-port.ts
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
import type { KtxModelRole } from '../../llm/types.js';
|
||||
import type { z } from 'zod';
|
||||
|
||||
export interface KtxRuntimeToolOutput<TOutput = unknown> {
|
||||
markdown: string;
|
||||
structured?: TOutput;
|
||||
}
|
||||
|
||||
export interface KtxRuntimeToolDescriptor<TInput = unknown, TOutput = unknown> {
|
||||
name: string;
|
||||
description: string;
|
||||
inputSchema: z.ZodObject<z.ZodRawShape>;
|
||||
execute(input: TInput): Promise<KtxRuntimeToolOutput<TOutput>>;
|
||||
}
|
||||
|
||||
export type KtxRuntimeToolSet = Record<string, KtxRuntimeToolDescriptor>;
|
||||
|
||||
export type RunLoopStopReason = 'budget' | 'natural' | 'error';
|
||||
|
||||
/** @internal */
|
||||
export interface RunLoopStepInfo {
|
||||
stepIndex: number;
|
||||
stepBudget: number;
|
||||
}
|
||||
|
||||
export interface RunLoopParams {
|
||||
modelRole: KtxModelRole;
|
||||
systemPrompt: string;
|
||||
userPrompt: string;
|
||||
toolSet: KtxRuntimeToolSet;
|
||||
stepBudget: number;
|
||||
telemetryTags: Record<string, string>;
|
||||
onStepFinish?: (info: RunLoopStepInfo) => void | Promise<void>;
|
||||
}
|
||||
|
||||
export interface RunLoopResult {
|
||||
stopReason: RunLoopStopReason;
|
||||
error?: Error;
|
||||
}
|
||||
|
||||
export interface KtxGenerateTextInput {
|
||||
role: KtxModelRole;
|
||||
prompt: string;
|
||||
system?: string;
|
||||
tools?: KtxRuntimeToolSet;
|
||||
temperature?: number;
|
||||
}
|
||||
|
||||
export interface KtxGenerateObjectInput<TOutput, TSchema extends z.ZodType<TOutput>> {
|
||||
role: KtxModelRole;
|
||||
prompt: string;
|
||||
system?: string;
|
||||
tools?: KtxRuntimeToolSet;
|
||||
temperature?: number;
|
||||
schema: TSchema;
|
||||
}
|
||||
|
||||
export interface KtxLlmRuntimePort {
|
||||
generateText(input: KtxGenerateTextInput): Promise<string>;
|
||||
generateObject<TOutput, TSchema extends z.ZodType<TOutput>>(
|
||||
input: KtxGenerateObjectInput<TOutput, TSchema>,
|
||||
): Promise<TOutput>;
|
||||
runAgentLoop(params: RunLoopParams): Promise<RunLoopResult>;
|
||||
}
|
||||
|
||||
export interface AgentRunnerPort {
|
||||
runLoop(params: RunLoopParams): Promise<RunLoopResult>;
|
||||
}
|
||||
|
||||
export class RuntimeAgentRunner implements AgentRunnerPort {
|
||||
constructor(private readonly runtime: KtxLlmRuntimePort) {}
|
||||
|
||||
runLoop(params: RunLoopParams): Promise<RunLoopResult> {
|
||||
return this.runtime.runAgentLoop(params);
|
||||
}
|
||||
}
|
||||
43
packages/cli/src/context/llm/runtime-tools.test.ts
Normal file
43
packages/cli/src/context/llm/runtime-tools.test.ts
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
import { createAiSdkToolSet, createClaudeSdkTools, normalizeKtxRuntimeToolOutput } from './runtime-tools.js';
|
||||
import type { KtxRuntimeToolDescriptor } from './runtime-port.js';
|
||||
|
||||
describe('runtime tool descriptors', () => {
|
||||
const descriptor: KtxRuntimeToolDescriptor<{ id: string }, { ok: boolean }> = {
|
||||
name: 'read_thing',
|
||||
description: 'Read one thing.',
|
||||
inputSchema: z.object({ id: z.string() }),
|
||||
execute: vi.fn(async (input) => ({
|
||||
markdown: `Read ${input.id}`,
|
||||
structured: { ok: true },
|
||||
})),
|
||||
};
|
||||
|
||||
it('normalizes string and object tool outputs into markdown plus optional structured payload', () => {
|
||||
expect(normalizeKtxRuntimeToolOutput('plain text')).toEqual({ markdown: 'plain text' });
|
||||
expect(normalizeKtxRuntimeToolOutput({ markdown: 'shown', structured: { id: 1 } })).toEqual({
|
||||
markdown: 'shown',
|
||||
structured: { id: 1 },
|
||||
});
|
||||
expect(normalizeKtxRuntimeToolOutput({ name: 'skill', content: 'body' })).toEqual({
|
||||
markdown: '```json\n{\n "name": "skill",\n "content": "body"\n}\n```',
|
||||
structured: { name: 'skill', content: 'body' },
|
||||
});
|
||||
});
|
||||
|
||||
it('builds AI SDK tools that expose markdown to the model', async () => {
|
||||
const tools = createAiSdkToolSet({ read_thing: descriptor });
|
||||
const output = await tools.read_thing.execute?.({ id: 'a' }, { toolCallId: 'call-1', messages: [] } as never);
|
||||
const modelOutput = tools.read_thing.toModelOutput?.({ output } as never);
|
||||
|
||||
expect(modelOutput).toEqual({ type: 'text', value: 'Read a' });
|
||||
});
|
||||
|
||||
it('builds Claude SDK tools that return text content only', async () => {
|
||||
const tools = createClaudeSdkTools({ read_thing: descriptor });
|
||||
const result = await tools[0].handler({ id: 'b' } as never, {});
|
||||
|
||||
expect(result).toEqual({ content: [{ type: 'text', text: 'Read b' }] });
|
||||
});
|
||||
});
|
||||
85
packages/cli/src/context/llm/runtime-tools.ts
Normal file
85
packages/cli/src/context/llm/runtime-tools.ts
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
import { tool as aiTool, type Tool, type ToolSet } from 'ai';
|
||||
import { tool as claudeTool, type SdkMcpToolDefinition } from '@anthropic-ai/claude-agent-sdk';
|
||||
import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
|
||||
import { z } from 'zod';
|
||||
import type { KtxRuntimeToolDescriptor, KtxRuntimeToolOutput, KtxRuntimeToolSet } from './runtime-port.js';
|
||||
|
||||
function isRuntimeOutput(value: unknown): value is KtxRuntimeToolOutput {
|
||||
return Boolean(
|
||||
value &&
|
||||
typeof value === 'object' &&
|
||||
'markdown' in value &&
|
||||
typeof (value as { markdown?: unknown }).markdown === 'string',
|
||||
);
|
||||
}
|
||||
|
||||
export function normalizeKtxRuntimeToolOutput(value: unknown): KtxRuntimeToolOutput {
|
||||
if (isRuntimeOutput(value)) {
|
||||
return 'structured' in value ? { markdown: value.markdown, structured: value.structured } : { markdown: value.markdown };
|
||||
}
|
||||
if (typeof value === 'string') {
|
||||
return { markdown: value };
|
||||
}
|
||||
return {
|
||||
markdown: `\`\`\`json\n${JSON.stringify(value, null, 2)}\n\`\`\``,
|
||||
structured: value,
|
||||
};
|
||||
}
|
||||
|
||||
function assertObjectSchema(name: string, schema: z.ZodType): asserts schema is z.ZodObject<z.ZodRawShape> {
|
||||
if (!(schema instanceof z.ZodObject)) {
|
||||
throw new Error(`KTX runtime tool "${name}" must use z.object input schema for claude-code`);
|
||||
}
|
||||
}
|
||||
|
||||
export function createAiSdkToolSet(tools: KtxRuntimeToolSet = {}): ToolSet {
|
||||
return Object.fromEntries(
|
||||
Object.entries(tools).map(([name, descriptor]) => [
|
||||
name,
|
||||
aiTool({
|
||||
description: descriptor.description,
|
||||
inputSchema: descriptor.inputSchema,
|
||||
execute: async (input) => descriptor.execute(input),
|
||||
toModelOutput: ({ output }) => {
|
||||
const normalized = normalizeKtxRuntimeToolOutput(output);
|
||||
return { type: 'text', value: normalized.markdown };
|
||||
},
|
||||
}),
|
||||
]),
|
||||
);
|
||||
}
|
||||
|
||||
export function createClaudeSdkTools(tools: KtxRuntimeToolSet = {}): Array<SdkMcpToolDefinition<z.ZodRawShape>> {
|
||||
return Object.values(tools).map((descriptor) => {
|
||||
assertObjectSchema(descriptor.name, descriptor.inputSchema);
|
||||
return claudeTool(
|
||||
descriptor.name,
|
||||
descriptor.description,
|
||||
descriptor.inputSchema.shape,
|
||||
async (input): Promise<CallToolResult> => {
|
||||
const normalized = normalizeKtxRuntimeToolOutput(await descriptor.execute(input));
|
||||
return { content: [{ type: 'text', text: normalized.markdown }] };
|
||||
},
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
export function mcpToolIds(tools: KtxRuntimeToolSet = {}): string[] {
|
||||
return Object.keys(tools).map((name) => `mcp__ktx__${name}`);
|
||||
}
|
||||
|
||||
export function createRuntimeToolDescriptorFromAiTool(name: string, aiSdkTool: Tool): KtxRuntimeToolDescriptor {
|
||||
return {
|
||||
name,
|
||||
description: aiSdkTool.description ?? '',
|
||||
inputSchema: aiSdkTool.inputSchema as KtxRuntimeToolDescriptor['inputSchema'],
|
||||
execute: async (input) => {
|
||||
if (typeof aiSdkTool.execute !== 'function') {
|
||||
throw new Error(`KTX runtime tool "${name}" has no execute function`);
|
||||
}
|
||||
return normalizeKtxRuntimeToolOutput(
|
||||
await aiSdkTool.execute(input as never, { toolCallId: `runtime-${name}` } as never),
|
||||
);
|
||||
},
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue