feat: add claude-code llm backend with runtime port (#115)

* docs: revise claude-code ingest backend spec

* docs: keep claude-code spec focused on ingest

* docs: expand claude-code spec to full llm parity

* Refine claude-code backend spec after adversarial review iteration 1

* Refine claude-code backend spec after adversarial review iteration 2

* Refine claude-code backend spec after adversarial review iteration 3

* feat: recognize claude-code llm backend

* feat: add ktx llm runtime port

* feat: add claude-code llm runtime

* feat: route non-agent llm calls through runtime

* feat: run ingest agents through llm runtime

* feat: support claude-code setup and status

* test: verify claude-code backend runtime

* docs: add claude-code backend v1 runtime plan

* fix: close claude-code runtime isolation checks

* fix: warn on claude-code prompt caching during setup

* chore: verify claude-code v1 closure

* docs: add claude-code backend v1 isolation closure plan

* fix: update claude-code ingest setup guidance

* docs: add claude-code backend v1 ingest guidance closure plan

* docs: align claude-code isolation spec with sdk metadata

* test: cover claude-code host discovery metadata

* fix: tolerate claude-code host discovery metadata

* docs: clarify claude-code host discovery metadata

* docs: add claude-code auth-probe isolation fix plan

* chore: prepare kaelio ktx rc1 release

* chore: add semantic release workflow

* fix: unblock ci checks

* chore(release): 0.1.0-rc.1

* feat: add Claude Code model selection to setup

* fix: keep git maintenance attached in local repos
This commit is contained in:
Andrey Avtomonov 2026-05-16 12:06:34 +02:00 committed by GitHub
parent e6d578c03f
commit b565e44a22
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
109 changed files with 10218 additions and 1093 deletions

View file

@ -3,14 +3,27 @@ name: KTX Release
on:
workflow_dispatch:
inputs:
release_kind:
description: "Release kind: rc publishes to next, stable publishes to latest"
required: true
type: choice
default: "rc"
options:
- rc
- stable
force_release:
description: "Force a patch release even if semantic-release finds no releasable commits"
required: false
type: boolean
default: false
publish_live:
description: "Publish @kaelio/ktx to npm instead of running a dry-run"
description: "Create the release and publish @kaelio/ktx to npm instead of running a dry-run"
required: true
type: boolean
default: false
permissions:
contents: read
contents: write
concurrency:
group: ktx-release-${{ github.ref }}
@ -22,6 +35,8 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
- name: Setup pnpm
uses: pnpm/action-setup@0e279bb959325dab635dd2c09392533439d90093 # v6.0.8
@ -34,6 +49,7 @@ jobs:
node-version: "24"
cache: "pnpm"
cache-dependency-path: "pnpm-lock.yaml"
registry-url: "https://registry.npmjs.org"
- name: Install TypeScript dependencies
run: pnpm install --frozen-lockfile
@ -52,18 +68,19 @@ jobs:
- name: Install Python dependencies
run: uv sync --all-packages
- name: Build and verify artifacts
run: pnpm run artifacts:check
- name: Check release readiness
run: pnpm run release:readiness
- name: Dry-run npm publish
- name: Dry-run semantic release
if: ${{ !inputs.publish_live }}
run: pnpm run release:npm-publish
- name: Publish npm package
if: ${{ inputs.publish_live }}
run: pnpm run release:npm-publish -- --publish
run: pnpm run semantic-release:dry-run
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
KTX_RELEASE_KIND: ${{ inputs.release_kind }}
FORCE_RELEASE: ${{ inputs.force_release }}
- name: Create semantic release
if: ${{ inputs.publish_live }}
run: pnpm run semantic-release
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
KTX_RELEASE_KIND: ${{ inputs.release_kind }}
FORCE_RELEASE: ${{ inputs.force_release }}
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}

3
.releaserc.cjs Normal file
View file

@ -0,0 +1,3 @@
const { createReleaseConfig } = require('./scripts/semantic-release-config.cjs');
module.exports = createReleaseConfig(process.env);

View file

@ -34,7 +34,7 @@ SQLite.
## Quick Start
```bash
npm install -g @kaelio/ktx
pnpm add --global @kaelio/ktx
ktx setup
ktx status
```

View file

@ -51,17 +51,21 @@ scripted project creation. They are not shown in `ktx setup --help`.
| Flag | Description |
|------|-------------|
| `--llm-backend <backend>` | LLM backend: `anthropic` or `vertex` |
| `--llm-backend <backend>` | LLM backend: `anthropic`, `vertex`, or `claude-code` |
| `--llm-backend claude-code` | Use the local Claude Code session for KTX LLM calls |
| `--llm-model <model>` | LLM model ID or backend model alias to validate and save |
| `--anthropic-api-key-env <name>` | Environment variable containing the Anthropic API key |
| `--anthropic-api-key-file <path>` | File containing the Anthropic API key |
| `--anthropic-model <model>` | Anthropic model ID to validate and save |
| `--anthropic-model <model>` | Legacy alias for `--llm-model` |
| `--vertex-project <project>` | Vertex AI project ID, `env:NAME`, or `file:/path` reference |
| `--vertex-location <location>` | Vertex AI location, `env:NAME`, or `file:/path` reference |
| `--skip-llm` | Leave LLM setup incomplete |
Choose only one Anthropic credential source. Anthropic credential flags are only
valid with the Anthropic backend; Vertex flags are only valid with the Vertex
backend.
backend. The `claude-code` backend uses local Claude Code authentication instead
of Anthropic API key or Vertex flags. For Claude Code, `--llm-model` accepts
`sonnet`, `opus`, `haiku`, or a full Claude model ID.
### Embeddings
@ -142,6 +146,12 @@ ktx setup
# Run setup for a specific project directory
ktx setup --project-dir ./analytics
# Use Claude Code with Opus for KTX LLM calls
ktx setup \
--project-dir ./analytics \
--llm-backend claude-code \
--llm-model opus
# Script a Postgres connection that reads its URL from the environment
ktx setup \
--project-dir ./analytics \

View file

@ -47,6 +47,10 @@ ktx status --project-dir ./analytics
`ktx status` prints grouped doctor checks. Agents should use
`ktx status --json --no-input` when they need to branch on readiness state.
For `llm.provider.backend: claude-code`, `ktx status` checks that the local
Claude Code session is usable. If auth fails, run the Claude Code CLI login
flow, then rerun `ktx status`.
```json
{
"title": "KTX project doctor",

View file

@ -59,12 +59,13 @@ setup progress under `.ktx/setup/` and resumes from the remaining work.
KTX uses a Claude model for ingest agents that turn schemas, SQL, BI metadata,
and documents into semantic-layer sources and wiki context.
Setup supports two LLM provider paths:
Setup supports three LLM provider paths:
| Provider | Use when | Credential model |
|----------|----------|------------------|
| Anthropic API | You have an Anthropic API key | `ANTHROPIC_API_KEY` or a local `file:` secret |
| Google Vertex AI for Anthropic Claude | Your organization runs Claude through Google Cloud | Application Default Credentials plus Vertex project and location |
| Claude Code | You want KTX to use your local Claude Code session | Claude Code local authentication |
For Anthropic API, setup can read the key from the environment or save a pasted
key to `.ktx/secrets/anthropic-api-key`. `ktx.yaml` stores an `env:` or `file:`
@ -74,6 +75,27 @@ For Vertex AI, setup uses Google Application Default Credentials. It can read
your active `gcloud` project, list visible projects, or accept explicit
`--vertex-project` and `--vertex-location` values.
To use your local Claude Code session instead of an API key, set:
```yaml
llm:
provider:
backend: claude-code
models:
default: sonnet
triage: haiku
candidateExtraction: sonnet
curator: sonnet
reconcile: sonnet
repair: sonnet
```
`claude-code` uses the Claude Code authentication already configured on your
machine. It doesn't use `ANTHROPIC_API_KEY`, Vertex credentials, AI Gateway
tokens, or Bedrock credentials. In non-interactive setup, pass
`--llm-model opus`, `--llm-model sonnet`, `--llm-model haiku`, or a full Claude
model ID to select the Claude Code model.
Setup checks the selected model before saving. Anthropic API setup fetches live
Claude model choices when possible and falls back to bundled defaults if model
discovery is unavailable.

View file

@ -58,6 +58,11 @@ ktx ingest --all --deep
Deep ingest needs LLM and embedding readiness. If those providers are not
configured, run `ktx setup` or use `--fast`.
When you use `claude-code`, KTX still controls the tool surface for ingest and
memory capture. Claude Code built-in tools, discovered MCP servers, plugins,
skills, agents, and slash commands are not invokable by KTX agent loops unless
they are exact KTX MCP tools for the current run.
## Query history
PostgreSQL, BigQuery, and Snowflake can add query-history context. This helps

View file

@ -0,0 +1,61 @@
---
title: LLM configuration
description: Configure KTX LLM providers, model roles, and prompt caching.
---
KTX uses the top-level `llm` block in `ktx.yaml` for text generation,
structured extraction, and ingest or memory agent loops.
## Backends
Set `llm.provider.backend` to one of these values:
- `anthropic`: Use the Anthropic API through `ANTHROPIC_API_KEY` or the
configured `api_key` reference.
- `vertex`: Use Vertex AI Anthropic models through Google Cloud credentials.
- `gateway`: Use AI Gateway-compatible Anthropic model ids.
- `claude-code`: Use your local Claude Code session through the Claude Agent
SDK. KTX removes provider-routing environment variables from Claude Code
child processes, so this backend doesn't silently fall back to
`ANTHROPIC_API_KEY`, Vertex, Gateway, or Bedrock credentials.
## Claude Code
Use aliases or full Claude model IDs in `llm.models`:
```yaml
llm:
provider:
backend: claude-code
models:
default: sonnet
triage: haiku
candidateExtraction: sonnet
curator: sonnet
reconcile: sonnet
repair: sonnet
```
During setup, choose the Claude Code backend interactively or pass the model in
automation:
```bash
ktx setup --llm-backend claude-code --llm-model opus --no-input
```
For Claude Code, `sonnet`, `opus`, and `haiku` map to the current KTX defaults.
You can also pass a full Claude model ID, such as `claude-opus-4-7`.
`claude-code` keeps KTX tool boundaries intact. KTX exposes only the MCP tools
needed for the current KTX agent loop, disables Claude Code built-in tools,
keeps plugins empty, and denies every non-KTX tool request through
`canUseTool`. The Claude Agent SDK may still report host-discovered slash
commands, skills, and subagent names in init metadata; that metadata is not an
execution grant for KTX agent loops.
## Prompt caching
`llm.promptCaching` has partial parity on `claude-code`. KTX doesn't pass
Anthropic cache-control markers to the Claude Agent SDK. Status and doctor warn
when you configure prompt-cache TTL, tool, or history fields that the Claude
Agent SDK backend ignores.

View file

@ -1,5 +1,5 @@
{
"title": "Guides",
"defaultOpen": true,
"pages": ["building-context", "writing-context", "serving-agents"]
"pages": ["building-context", "llm-configuration", "writing-context", "serving-agents"]
}

View file

@ -15,6 +15,12 @@ const config = {
},
async redirects() {
return [
{
source: "/docs",
destination: "/docs/getting-started/introduction",
permanent: false,
basePath: false,
},
{
source: "/:path*",
has: [{ type: "host", value: "docs.ktx.sh" }],

99
docs/release.md Normal file
View file

@ -0,0 +1,99 @@
# KTX release runbook
This runbook covers the maintainer workflow for publishing `@kaelio/ktx` to
npm through GitHub Actions. The workflow uses semantic-release to choose the
next version, update release metadata, publish the package, create the GitHub
release, and commit the release files back to the repository.
## Release channels
KTX has two npm release channels:
- `rc` publishes prereleases such as `0.1.0-rc.2` to the npm `next` tag.
- `stable` publishes normal releases such as `0.1.0` to the npm `latest` tag.
Run stable releases only from `main`. The workflow rejects stable releases from
other branches.
## Prerequisites
Before you publish, confirm these requirements:
- The repository has an Actions secret named `NPM_TOKEN`.
- `NPM_TOKEN` is a granular npm token that can publish `@kaelio/ktx`.
- The token can publish non-interactively if the npm account or package uses
two-factor authentication for writes.
- The repository has a baseline semantic-release tag for the latest published
package version, such as `v0.1.0-rc.1`.
If no baseline tag exists, semantic-release treats the run as the first release
and may choose a version that doesn't match the currently published package.
## Dry-run a release
Use a dry-run to verify the next version and generated release notes without
publishing to npm.
1. Open **Actions** in GitHub.
2. Select **KTX Release**.
3. Select the branch to release from.
4. Set **release_kind** to `rc` or `stable`.
5. Leave **publish_live** set to `false`.
6. Optional: Set **force_release** to `true` when you need a patch release even
if semantic-release doesn't find a releasable commit.
7. Run the workflow.
The dry-run uses the same semantic-release configuration as a live release. It
doesn't publish to npm and doesn't commit release files.
## Publish an rc release
Publish an rc release when you need a prerelease package for validation before
promoting to `latest`.
1. Open **Actions** in GitHub.
2. Select **KTX Release**.
3. Select the branch to release from.
4. Set **release_kind** to `rc`.
5. Set **publish_live** to `true`.
6. Optional: Set **force_release** to `true`.
7. Run the workflow.
The workflow publishes `@kaelio/ktx` with `--access public --tag next`, runs the
published package smoke test, creates a GitHub release, and commits
`CHANGELOG.md`, `package.json`, and `release-policy.json`.
## Publish a stable release
Publish a stable release from `main` after you have validated an rc package.
1. Open **Actions** in GitHub.
2. Select **KTX Release**.
3. Select `main`.
4. Set **release_kind** to `stable`.
5. Set **publish_live** to `true`.
6. Optional: Set **force_release** to `true`.
7. Run the workflow.
The workflow publishes `@kaelio/ktx` with `--access public --tag latest`, runs
the published package smoke test, creates a GitHub release, and commits the
release metadata.
## Release metadata
semantic-release calls `scripts/update-public-release-version.mjs` during the
prepare step. That script updates:
- `package.json` with the semantic-release version.
- `release-policy.json` with `publicNpmPackageVersion`, npm publish settings,
and the published package smoke-test version.
The artifact packaging and readiness scripts read `publicNpmPackageVersion`
from `release-policy.json`, so manual version edits in build scripts aren't
needed for rc releases.
## Trusted Publishing follow-up
This workflow uses `NPM_TOKEN` today. Move to npm Trusted Publishing after the
final publish command path is verified for the package manager and workflow
filename configured in npm package settings.

View file

@ -0,0 +1,678 @@
# Claude Code Auth Probe Isolation Fix Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Make the `claude-code` auth probe and runtime tolerate host-discovered
Claude Code init metadata while preserving KTX-owned tool, MCP, and plugin
restrictions.
**Architecture:** Keep the existing Claude Code runtime and SDK option tuple.
Change the init-message assertion from "no host discovery appears" to "only the
KTX-controlled execution surface is active." Align the design spec and user docs
with the pinned SDK behavior: `settingSources: []` disables filesystem settings,
`skills: []` is a context filter, and deny-by-default `canUseTool` is the
runtime enforcement boundary.
**Tech Stack:** TypeScript, pnpm, Vitest, Markdown, Fumadocs MDX,
`@anthropic-ai/claude-agent-sdk@0.3.142`.
---
## Audit result
The current strict isolation assertion is a v1-blocking bug. A real authenticated
Claude Code host can report non-empty `slash_commands`, `skills`, and `agents`
in the SDK init message even when KTX passes `settingSources: []`, `skills: []`,
`plugins: []`, `tools: []`, exact KTX MCP `allowedTools`, `disallowedTools`, and
deny-by-default `canUseTool`.
Spec findings:
- `docs/superpowers/specs/2026-05-15-claude-code-backend-design.md:45-47`
requires host-discovered capabilities not to expand the KTX agent-loop tool
surface. That requirement is about invocation, not necessarily about zero
diagnostic metadata in the init message.
- `docs/superpowers/specs/2026-05-15-claude-code-backend-design.md:254-265`
overreaches by asking the implementation to assert that unexpected
settings-derived commands, skills, agents, plugins, or MCP servers are
inactive from the SDK init message. In `@anthropic-ai/claude-agent-sdk@0.3.142`,
the available SDK controls cannot make `message.slash_commands`,
`message.skills`, or `message.agents` reliably empty on an authenticated host.
- `docs/superpowers/specs/2026-05-15-claude-code-backend-design.md:266-267`
says skills are disabled with `skills: []`. The pinned SDK type definitions
document `skills` as a context filter, not a sandbox.
- `docs/superpowers/specs/2026-05-15-claude-code-backend-design.md:543-545`
correctly requires the auth probe to pass the isolation option tuple and no
MCP servers. It does not require failing when host discovery metadata is
present.
SDK evidence from
`node_modules/.pnpm/@anthropic-ai+claude-agent-sdk@0.3.142_zod@4.4.3/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts`:
- Lines `1686-1695`: `settingSources: []` disables filesystem settings only.
- Lines `1697-1718`: `skills: []` is a context filter; unlisted skills are
hidden from listing and rejected by the Skill tool, but files remain on disk.
- Lines `1202-1213`: `allowedTools` is auto-approval, while `canUseTool` is the
permission handler for controlling tool execution.
- Lines `1224-1228`: `disallowedTools` removes listed tools from context and
prevents use.
- Lines `1255-1264`: `tools: []` disables built-in tools.
- Lines `1545-1558`: `plugins` loads plugins when supplied; KTX supplies `[]`.
- Lines `3465-3489`: the init message reports `agents`, `tools`,
`mcp_servers`, `slash_commands`, `skills`, and `plugins`.
Implemented plan audit:
- `2026-05-15-claude-code-backend-v1-runtime.md` is implemented for config,
runtime port, SDK dependency, model aliases, environment scrubbing, Claude Code
text/object/agent execution, setup/status/doctor support, docs, and LLM
call-site migration.
- `2026-05-15-claude-code-backend-v1-isolation-closure.md` is implemented, but
it converted the spec's ambiguous "assert inactive" line into an impossible
assertion against non-empty `slash_commands`, `skills`, and `agents`.
- `2026-05-15-claude-code-backend-v1-ingest-guidance-closure.md` is implemented
for the ingest missing-LLM guidance and associated CLI/context tests.
Remaining v1-blocking gaps:
- `packages/context/src/llm/claude-code-runtime.ts:94-101` throws on
host-discovered slash commands, skills, and agents.
- `packages/context/src/llm/claude-code-runtime.test.ts:158-178` encodes the
wrong behavior by requiring the runtime to reject any init message with
discovered agents.
- The auth probe has no regression coverage for an authenticated host whose init
message reports non-empty `slash_commands`, `skills`, and `agents`.
- User docs under `docs-site/content/docs/guides/` say KTX "disables" skills,
agents, hooks, and slash commands. That wording is stronger than the SDK
contract and must be changed to "not invokable by KTX agent loops."
Non-blocking gaps:
- Same-step AI SDK tool-call repair parity remains out of scope for v1.
- OTEL telemetry parity remains out of scope for v1.
- Embedding parity remains out of scope because embeddings are configured
separately.
- Full prompt-caching parity remains out of scope. V1 keeps warning on ignored
prompt-cache fields and avoids AI SDK cache markers on the Claude Code path.
Decision:
- Choose option (a): relax the assertion in code and align the spec text. Do not
rely on an invented SDK mechanism. The pinned type definitions expose
`settingSources`, `skills`, `plugins`, `tools`, `allowedTools`,
`disallowedTools`, and `canUseTool`, but they do not expose a query option that
disables all host-discovered slash commands or user-level subagent names in the
init message.
## File structure
Modify these files:
- `docs/superpowers/specs/2026-05-15-claude-code-backend-design.md` aligns the
design with the real SDK contract.
- `packages/context/src/llm/claude-code-runtime.test.ts` adds the failing
regression tests for auth probe and runtime init metadata.
- `packages/context/src/llm/claude-code-runtime.ts` relaxes init metadata checks
while tightening exact tool equality.
- `docs-site/content/docs/guides/llm-configuration.mdx` changes user docs from
"disabled" to "not invokable."
- `docs-site/content/docs/guides/building-context.mdx` applies the same
user-facing wording at the ingest guide boundary.
### Task 1: Align the design spec with SDK reality
**Files:**
- Modify: `docs/superpowers/specs/2026-05-15-claude-code-backend-design.md`
- [ ] **Step 1: Update the tool-boundary goal**
Replace the goal bullet at lines `45-47` with:
```markdown
- Preserve KTX's curated tool boundaries. Claude Code built-ins,
filesystem-discovered MCP servers, hooks, skills, plugins, agents, and slash
commands must not become invokable in KTX agent loops. The Agent SDK init
message may still report host-discovered slash commands, skills, and agents;
KTX treats that metadata as diagnostic only and restricts execution through
`tools: []`, exact KTX MCP `allowedTools`, `disallowedTools`, and
deny-by-default `canUseTool`.
```
- [ ] **Step 2: Replace the over-broad init assertion requirement**
Replace the bullet at lines `254-265` with:
```markdown
- Filesystem settings are not loaded. The SDK's documented default for an
omitted `settingSources` is `["user", "project", "local"]`
(`@anthropic-ai/claude-agent-sdk@0.3.142` `sdk.d.ts:1686-1695`),
which would inherit the user's Claude Code filesystem settings. Every KTX
`query()` call site - agent loops, text generation, object generation, and
the auth probe - MUST pass `settingSources: []` explicitly, along with
`skills: []`, `plugins: []`, `tools: []`, `persistSession: false`, and no
`mcpServers` entries other than the KTX MCP server (omitted entirely when
the call site does not expose tools). The implementation MUST assert from
the SDK init message that the controlled execution surface matches KTX's
expectations:
- `message.tools` equals the exact generated KTX MCP tool ids for the current
call.
- `message.mcp_servers` equals the expected KTX MCP server set: `[]` when the
call exposes no tools, or `["ktx"]` when it does.
- `message.plugins` is empty.
The implementation MUST NOT reject a run solely because
`message.slash_commands`, `message.skills`, or `message.agents` contain
host-discovered names. In `@anthropic-ai/claude-agent-sdk@0.3.142`, those
fields can report host discovery even when KTX passes the isolation options.
They are not part of the KTX execution surface when `tools: []`,
`allowedTools`, `disallowedTools`, and deny-by-default `canUseTool` are set.
```
- [ ] **Step 3: Replace the skills/plugin wording**
Replace the bullets at lines `266-289` with:
```markdown
- `skills: []` is a context filter in the pinned SDK
(`sdk.d.ts:1697-1718`): unlisted skills are hidden from the model's skill
listing and rejected by the Skill tool, but discovered skill names may still
appear in init metadata. KTX must still pass `skills: []`.
- Plugins are disabled with `plugins: []`, and the runtime asserts that
`message.plugins` is empty in the init message.
- Built-in tools are disabled by setting `tools: []`. The pinned SDK type
(`@anthropic-ai/claude-agent-sdk@0.3.142`, `sdk.d.ts:1255-1264`) documents
`tools` as the base set of built-in tools, with `[]` meaning "disable all
built-ins"; `tools` does not accept MCP tool ids and cannot be used to
restrict MCP availability.
- MCP tool availability is granted by registering the KTX MCP server through
`mcpServers`. The SDK does not document a wildcard like `mcp__ktx__*` for
any tool field; KTX must enumerate exact generated MCP tool ids of the form
`mcp__ktx__<toolName>` (derived from the tool map handed to
`createSdkMcpServer`) wherever a list of tool ids is required.
- Pre-approval under `permissionMode: "dontAsk"` is configured by listing those
same exact `mcp__ktx__<toolName>` ids in `allowedTools` (documented as
auto-allow without prompting). Treat `allowedTools` as auto-approval, not
restriction.
- Defense-in-depth restriction uses `canUseTool`. The KTX runtime supplies a
`canUseTool` handler that allows only tool names in the current KTX MCP tool
map and denies everything else, so host-discovered slash commands, skills,
agents, future SDK defaults, or a misconfigured MCP server cannot expand the
execution surface.
- `disallowedTools` MUST additionally list the current built-in tool names
(`Agent`, `Task`, `AskUserQuestion`, `Bash`, `Read`, `Edit`, `Write`, `Glob`,
`Grep`, `WebFetch`, `WebSearch`, `TodoWrite`) as redundant insurance.
```
- [ ] **Step 4: Update auth probe acceptance text**
After the auth probe option list at lines `543-545`, add:
```markdown
The auth probe MUST tolerate init messages with non-empty
`slash_commands`, `skills`, and `agents` when `message.tools` is empty,
`message.mcp_servers` is empty, `message.plugins` is empty, and the query
options contain the KTX isolation tuple. Host discovery metadata is not an
auth failure.
```
- [ ] **Step 5: Update verified evidence and open items**
Replace lines `621-623` with:
```markdown
- The Agent SDK skills docs say the `skills` option is a context filter rather
than a sandbox. KTX must pass `skills: []`, but must not assert that
`message.skills` is empty in the SDK init message.
```
Replace open item `8` at lines `648-649` with:
```markdown
8. Write tests proving a raw built-in Claude Code tool request is denied,
host-discovered Skill/Agent/SlashCommand requests are denied by `canUseTool`,
and only exact `mcp__ktx__*` tools are allowed during KTX agent loops.
```
Replace open item `9` at lines `650-654` with:
```markdown
9. Write a test that asserts every KTX-originated `query()` invocation
(agent loop, text generation, object generation, auth probe) is called
with `settingSources: []`, `skills: []`, `plugins: []`, `tools: []`, and
`persistSession: false`, by spying on the SDK entry point. The test must
fail if any path falls back to SDK defaults for those fields. The test must
also prove that non-empty host-discovered `slash_commands`, `skills`, and
`agents` in the init message do not fail the auth probe or runtime when the
controlled tool, MCP server, and plugin surfaces match KTX expectations.
```
- [ ] **Step 6: Commit the spec alignment**
Run:
```bash
git add docs/superpowers/specs/2026-05-15-claude-code-backend-design.md
git commit -m "docs: align claude-code isolation spec with sdk metadata"
```
Expected: the design spec no longer requires zero host-discovery metadata in
the SDK init message.
### Task 2: Add regression tests for host-discovered init metadata
**Files:**
- Modify: `packages/context/src/llm/claude-code-runtime.test.ts`
- [ ] **Step 1: Replace the invalid agent rejection test**
In `packages/context/src/llm/claude-code-runtime.test.ts`, replace the test named
`rejects settings-derived agents and non-KTX MCP servers from init messages`
with these tests:
```ts
it('treats host-discovered commands skills and agents as non-fatal init metadata for text and auth probe', async () => {
const hostDiscoveredInit = initMessage({
slash_commands: ['/help', '/compact', '/clear', '/user-command'],
skills: ['pdf', 'docx'],
agents: ['claude', 'Explore', 'general-purpose'],
});
const textQuery = vi.fn((_input: any) =>
stream([hostDiscoveredInit, resultMessage({ result: 'hello' })]),
);
const runtime = new ClaudeCodeKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'sonnet' },
query: textQuery,
env: { ANTHROPIC_API_KEY: 'sk-ant-test', PATH: '/usr/bin' }, // pragma: allowlist secret
});
await expect(runtime.generateText({ role: 'default', prompt: 'say hello' })).resolves.toBe('hello');
const textOptions = textQuery.mock.calls[0][0].options;
expect(textOptions).toMatchObject({
settingSources: [],
skills: [],
plugins: [],
tools: [],
allowedTools: [],
permissionMode: 'dontAsk',
persistSession: false,
env: expect.not.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test' }),
});
expect(textOptions.disallowedTools).toEqual(expect.arrayContaining(['Agent', 'Task', 'Bash']));
expect(await textOptions.canUseTool('Agent', {}, { signal: new AbortController().signal, toolUseID: 'agent' })).toMatchObject({
behavior: 'deny',
toolUseID: 'agent',
});
expect(await textOptions.canUseTool('Skill', {}, { signal: new AbortController().signal, toolUseID: 'skill' })).toMatchObject({
behavior: 'deny',
toolUseID: 'skill',
});
expect(
await textOptions.canUseTool('SlashCommand', {}, { signal: new AbortController().signal, toolUseID: 'slash' }),
).toMatchObject({
behavior: 'deny',
toolUseID: 'slash',
});
const probeQuery = vi.fn((_input: any) =>
stream([hostDiscoveredInit, resultMessage({ result: 'ok' })]),
);
await expect(
runClaudeCodeAuthProbe({
projectDir: '/tmp/project',
model: 'sonnet',
query: probeQuery,
env: { ANTHROPIC_AUTH_TOKEN: 'token', HOME: '/Users/test' },
}),
).resolves.toEqual({ ok: true });
expect(probeQuery.mock.calls[0][0].options).toMatchObject({
settingSources: [],
skills: [],
plugins: [],
tools: [],
allowedTools: [],
permissionMode: 'dontAsk',
persistSession: false,
env: expect.objectContaining({ HOME: '/Users/test' }),
});
expect(probeQuery.mock.calls[0][0].options.env).not.toEqual(
expect.objectContaining({ ANTHROPIC_AUTH_TOKEN: 'token' }),
);
});
it('allows host-discovered context during agent loops while requiring exact KTX MCP tools and servers', async () => {
const query = vi.fn((_input: any) =>
stream([
initMessage({
tools: ['mcp__ktx__load_skill'],
mcp_servers: [{ name: 'ktx', status: 'connected' }],
slash_commands: ['/help', '/compact', '/clear'],
skills: ['memory-agent', 'doc-reader'],
agents: ['claude', 'Plan', 'Explore'],
}),
{
type: 'assistant',
message: { role: 'assistant', content: [] },
parent_tool_use_id: null,
uuid: '00000000-0000-4000-8000-000000000006',
session_id: 'session-id',
} as unknown as SDKMessage,
resultMessage({ subtype: 'error_max_turns', is_error: true }),
]),
);
const runtime = new ClaudeCodeKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'sonnet' },
query,
env: {},
});
await expect(
runtime.runAgentLoop({
modelRole: 'default',
systemPrompt: 'system',
userPrompt: 'user',
toolSet: {
load_skill: {
name: 'load_skill',
description: 'Load skill.',
inputSchema: z.object({ name: z.string() }),
execute: async () => ({ markdown: 'loaded' }),
},
},
stepBudget: 1,
telemetryTags: { operationName: 'test' },
}),
).resolves.toEqual({ stopReason: 'budget' });
const options = query.mock.calls[0][0].options;
expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']);
expect(await options.canUseTool('mcp__ktx__load_skill', {}, { signal: new AbortController().signal, toolUseID: '1' })).toEqual({
behavior: 'allow',
toolUseID: '1',
});
expect(await options.canUseTool('Task', {}, { signal: new AbortController().signal, toolUseID: '2' })).toMatchObject({
behavior: 'deny',
toolUseID: '2',
});
expect(await options.canUseTool('Skill', {}, { signal: new AbortController().signal, toolUseID: '3' })).toMatchObject({
behavior: 'deny',
toolUseID: '3',
});
});
it('still rejects unexpected tools, missing KTX tools, plugins, and non-KTX MCP servers from init messages', async () => {
const query = vi.fn((_input: any) =>
stream([
initMessage({
tools: ['Bash'],
mcp_servers: [{ name: 'filesystem', status: 'connected' }],
plugins: [{ name: 'host-plugin', path: '/tmp/plugin' }],
}),
resultMessage({ result: 'hello' }),
]),
);
const runtime = new ClaudeCodeKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'sonnet' },
query,
env: {},
});
await expect(
runtime.generateText({
role: 'default',
prompt: 'say hello',
tools: {
load_skill: {
name: 'load_skill',
description: 'Load skill.',
inputSchema: z.object({ name: z.string() }),
execute: async () => ({ markdown: 'loaded' }),
},
},
}),
).rejects.toThrow(
/Claude Code runtime isolation failed: .*tools=Bash.*missing_tools=mcp__ktx__load_skill.*mcp_servers=filesystem.*plugins=host-plugin/,
);
});
```
- [ ] **Step 2: Run the runtime test to verify it fails**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/llm/claude-code-runtime.test.ts
```
Expected: FAIL. The first new test fails because `runClaudeCodeAuthProbe(...)`
returns `{ ok: false, ... }` and `generateText(...)` rejects when init metadata
contains non-empty `slash_commands`, `skills`, or `agents`. The second new test
fails because `runAgentLoop(...)` returns `{ stopReason: 'error', ... }` for the
same reason.
- [ ] **Step 3: Commit the failing regression test**
Run:
```bash
git add packages/context/src/llm/claude-code-runtime.test.ts
git commit -m "test: cover claude-code host discovery metadata"
```
Expected: the commit contains tests that fail before the runtime assertion is
fixed.
### Task 3: Relax init metadata assertions to the controlled execution surface
**Files:**
- Modify: `packages/context/src/llm/claude-code-runtime.ts`
- [ ] **Step 1: Replace `assertInitIsolation`**
In `packages/context/src/llm/claude-code-runtime.ts`, replace the full
`assertInitIsolation(...)` function with:
```ts
function assertInitIsolation(
message: SDKMessage,
allowedToolIds: Set<string>,
expectedMcpServerNames: Set<string>,
): void {
if (message.type !== 'system' || message.subtype !== 'init') {
return;
}
const activeToolIds = new Set(message.tools);
const unexpectedTools = message.tools.filter((toolName) => !allowedToolIds.has(toolName));
const missingTools = [...allowedToolIds].filter((toolName) => !activeToolIds.has(toolName));
const activeMcpServerNames = message.mcp_servers.map((server) => server.name);
const unexpectedMcpServers = activeMcpServerNames.filter((name) => !expectedMcpServerNames.has(name));
const missingMcpServers = [...expectedMcpServerNames].filter((name) => !activeMcpServerNames.includes(name));
const unexpectedPlugins = message.plugins.map((plugin) => plugin.name);
if (
unexpectedTools.length > 0 ||
missingTools.length > 0 ||
unexpectedMcpServers.length > 0 ||
missingMcpServers.length > 0 ||
unexpectedPlugins.length > 0
) {
throw new Error(
`Claude Code runtime isolation failed: tools=${unexpectedTools.join(',') || '(none)'} missing_tools=${
missingTools.join(',') || '(none)'
} mcp_servers=${unexpectedMcpServers.join(',') || '(none)'} missing_mcp_servers=${
missingMcpServers.join(',') || '(none)'
} plugins=${unexpectedPlugins.join(',') || '(none)'} host_slash_commands=${
message.slash_commands.length
} host_skills=${message.skills.length} host_agents=${message.agents?.join(',') || '(none)'}`,
);
}
}
```
This preserves strict checks for the KTX-controlled execution surface:
- `message.tools` must exactly equal the generated KTX MCP tool ids for the
current call.
- `message.mcp_servers` must exactly equal the expected KTX MCP server names.
- `message.plugins` must be empty.
It deliberately stops treating `message.slash_commands`, `message.skills`, and
`message.agents` as fatal because those fields can contain host-discovered
metadata that KTX cannot disable through the pinned SDK options.
- [ ] **Step 2: Run the runtime test to verify it passes**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/llm/claude-code-runtime.test.ts
```
Expected: PASS.
- [ ] **Step 3: Commit the runtime fix**
Run:
```bash
git add packages/context/src/llm/claude-code-runtime.ts packages/context/src/llm/claude-code-runtime.test.ts
git commit -m "fix: tolerate claude-code host discovery metadata"
```
Expected: the auth probe and runtime no longer fail solely because the SDK init
message reports host-discovered slash commands, skills, or agents.
### Task 4: Correct user-facing docs wording
**Files:**
- Modify: `docs-site/content/docs/guides/llm-configuration.mdx`
- Modify: `docs-site/content/docs/guides/building-context.mdx`
- [ ] **Step 1: Update the LLM configuration guide wording**
In `docs-site/content/docs/guides/llm-configuration.mdx`, replace lines `39-41`
with:
```mdx
`claude-code` keeps KTX tool boundaries intact. KTX exposes only the MCP tools
needed for the current KTX agent loop, disables Claude Code built-in tools,
keeps plugins empty, and denies every non-KTX tool request through
`canUseTool`. The Claude Agent SDK may still report host-discovered slash
commands, skills, and subagent names in init metadata; that metadata is not an
execution grant for KTX agent loops.
```
- [ ] **Step 2: Update the building context guide wording**
In `docs-site/content/docs/guides/building-context.mdx`, replace lines `61-63`
with:
```mdx
When you use `claude-code`, KTX still controls the tool surface for ingest and
memory capture. Claude Code built-in tools, discovered MCP servers, plugins,
skills, agents, and slash commands are not invokable by KTX agent loops unless
they are exact KTX MCP tools for the current run.
```
- [ ] **Step 3: Run docs tests**
Run:
```bash
pnpm --filter ktx-docs run test
```
Expected: PASS.
- [ ] **Step 4: Commit docs wording**
Run:
```bash
git add docs-site/content/docs/guides/llm-configuration.mdx docs-site/content/docs/guides/building-context.mdx
git commit -m "docs: clarify claude-code host discovery metadata"
```
Expected: user docs describe invocation control rather than promising zero
host-discovery metadata.
### Task 5: Final verification
**Files:**
- Verify: `docs/superpowers/specs/2026-05-15-claude-code-backend-design.md`
- Verify: `packages/context/src/llm/claude-code-runtime.ts`
- Verify: `packages/context/src/llm/claude-code-runtime.test.ts`
- Verify: `docs-site/content/docs/guides/llm-configuration.mdx`
- Verify: `docs-site/content/docs/guides/building-context.mdx`
- [ ] **Step 1: Run targeted runtime tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/llm/claude-code-runtime.test.ts src/llm/runtime-tools.test.ts src/llm/claude-code-env.test.ts
```
Expected: PASS.
- [ ] **Step 2: Run package type-check**
Run:
```bash
pnpm --filter @ktx/context run type-check
```
Expected: PASS.
- [ ] **Step 3: Run docs verification**
Run:
```bash
pnpm --filter ktx-docs run test
```
Expected: PASS.
- [ ] **Step 4: Run dead-code checks**
Run:
```bash
pnpm run dead-code
```
Expected: PASS or only pre-existing unrelated findings. Investigate and fix any
finding caused by the runtime assertion or test changes.
- [ ] **Step 5: Inspect git status**
Run:
```bash
git status --short
```
Expected: only files from this plan are modified, or the working tree is clean
if each task was committed.
## Self-review
- Spec coverage: This plan addresses the v1-blocking auth probe failure,
aligns the spec with the SDK contract, preserves the real KTX execution
boundary, and adds regression coverage for non-empty host-discovered
`slash_commands`, `skills`, and `agents` in both auth probe and runtime paths.
- Placeholder scan: No placeholder markers remain. Every code-changing step
includes exact file paths, code blocks, commands, and expected results.
- Type consistency: The plan uses existing names from the codebase:
`ClaudeCodeKtxLlmRuntime`, `runClaudeCodeAuthProbe`, `initMessage`,
`resultMessage`, `assertInitIsolation`, `mcpToolIds`, `KtxRuntimeToolSet`, and
`canUseTool`.

View file

@ -0,0 +1,160 @@
# Claude Code Backend V1 Ingest Guidance Closure Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Make the `ktx ingest` missing-LLM guidance treat `claude-code` as a first-class setup path and restore the CLI ingest test suite.
**Architecture:** Keep the existing Claude Code runtime implementation unchanged. Update the single local-ingest guard message so users see both the local Claude Code setup path and the Anthropic API setup path, then align the context and CLI tests with that user-facing copy.
**Tech Stack:** TypeScript, pnpm, Vitest.
---
## Audit summary
The May 15 Claude Code backend runtime and isolation plans are implemented for
the core runtime path: config accepts `claude-code`, runtime calls use
`KtxLlmRuntimePort`, Claude SDK calls pass isolation options and scrubbed env,
setup/status/doctor validate Claude Code auth, and docs describe the backend.
One v1-blocking issue remains: `packages/context/src/ingest/local-bundle-runtime.ts`
lists `claude-code` in the missing-LLM guard line but still tells users only to
"Configure an Anthropic provider." The full CLI ingest test suite currently
fails because `packages/cli/src/ingest.test.ts` still expects the old provider
list without `claude-code`. This is v1-blocking because CI is red and the
fallback guidance is not first-class for the new backend.
Non-blocking gaps from the original spec remain unchanged:
- Same-step AI SDK tool-call repair parity is out of scope for the Claude Code
runtime.
- OTEL telemetry parity is out of scope for the Claude Code runtime.
- Embedding parity is out of scope because embeddings stay independently
configured.
- Full prompt-caching parity for tools, history, and per-section TTLs is out of
scope; v1 only needs no AI SDK cache markers on `claude-code` and explicit
warnings for ignored fields.
## File structure
Modify these files:
- `packages/context/src/ingest/local-bundle-runtime.ts` owns the missing-LLM
guard message used by local ingest and MCP-triggered ingest.
- `packages/context/src/ingest/local-bundle-runtime.test.ts` verifies the guard
message at the context boundary.
- `packages/cli/src/ingest.test.ts` verifies the user-facing CLI output.
No `docs-site/` update is required because the existing public docs already
document `claude-code` setup and ingest behavior; this plan only fixes an
inline runtime error message.
### Task 1: Update ingest LLM setup guidance
**Files:**
- Modify: `packages/context/src/ingest/local-bundle-runtime.test.ts`
- Modify: `packages/cli/src/ingest.test.ts`
- Modify: `packages/context/src/ingest/local-bundle-runtime.ts`
- [ ] **Step 1: Update the context guard-message test**
In `packages/context/src/ingest/local-bundle-runtime.test.ts`, replace the
expected message in `requires an agent runner or configured local ingest LLM`
with this exact array:
```ts
[
'ktx ingest requires llm.provider.backend: anthropic, vertex, gateway, or claude-code, or an injected agentRunner.',
'Configure a local Claude Code session or API-backed LLM, then rerun ingest:',
` ktx setup --project-dir ${project.projectDir} --llm-backend claude-code --no-input`,
` ktx setup --project-dir ${project.projectDir} --llm-backend anthropic --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
].join('\n')
```
- [ ] **Step 2: Update the CLI ingest test**
In `packages/cli/src/ingest.test.ts`, replace the stale provider-list
assertion in `prints provider setup guidance when a skip-llm setup project runs
ingest` with:
```ts
expect(runIo.stderr()).toContain(
'ktx ingest requires llm.provider.backend: anthropic, vertex, gateway, or claude-code, or an injected agentRunner.',
);
expect(runIo.stderr()).toContain('Configure a local Claude Code session or API-backed LLM, then rerun ingest:');
expect(runIo.stderr()).toContain(`ktx setup --project-dir ${projectDir} --llm-backend claude-code --no-input`);
expect(runIo.stderr()).toContain(
`ktx setup --project-dir ${projectDir} --llm-backend anthropic --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
);
```
- [ ] **Step 3: Run tests to verify the new expectations fail**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-runtime.test.ts
pnpm --filter @ktx/cli exec vitest run src/ingest.test.ts
```
Expected: both suites fail because the source message still says
`Configure an Anthropic provider, then rerun ingest:` and does not include the
Claude Code setup command.
- [ ] **Step 4: Update the ingest guard message**
In `packages/context/src/ingest/local-bundle-runtime.ts`, replace
`localIngestLlmProviderGuardMessage` with:
```ts
function localIngestLlmProviderGuardMessage(projectDir: string): string {
return [
'ktx ingest requires llm.provider.backend: anthropic, vertex, gateway, or claude-code, or an injected agentRunner.',
'Configure a local Claude Code session or API-backed LLM, then rerun ingest:',
` ktx setup --project-dir ${projectDir} --llm-backend claude-code --no-input`,
` ktx setup --project-dir ${projectDir} --llm-backend anthropic --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
].join('\n');
}
```
- [ ] **Step 5: Run the targeted tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-runtime.test.ts
pnpm --filter @ktx/cli exec vitest run src/ingest.test.ts
```
Expected: both suites pass.
- [ ] **Step 6: Run package type-checks**
Run:
```bash
pnpm --filter @ktx/context run type-check
pnpm --filter @ktx/cli run type-check
```
Expected: both commands pass.
- [ ] **Step 7: Commit**
Run:
```bash
git add packages/context/src/ingest/local-bundle-runtime.ts packages/context/src/ingest/local-bundle-runtime.test.ts packages/cli/src/ingest.test.ts
git commit -m "fix: update claude-code ingest setup guidance"
```
## Self-review
- Spec coverage: This plan closes the only remaining v1-blocking audit finding:
ingest setup guidance and CLI test expectations now include `claude-code` as
a first-class backend.
- Placeholder scan: No placeholders remain; every step includes exact paths,
code, commands, and expected output.
- Type consistency: The exact guard string is identical across the source and
both test updates.

View file

@ -0,0 +1,575 @@
# Claude Code Backend V1 Isolation Closure Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Close the remaining v1-blocking Claude Code backend gaps around SDK
init isolation assertions and setup-time prompt-caching warnings.
**Architecture:** Keep the existing runtime port and Claude Code runtime. Add
the missing init-message checks inside the Claude runtime, then share the
prompt-caching warning formatter between status/doctor and setup so all
user-facing readiness flows report ignored Claude Code cache knobs consistently.
**Tech Stack:** TypeScript, pnpm, Vitest, Zod, `@anthropic-ai/claude-agent-sdk@0.3.142`.
---
## Audit Summary
The May 15 Claude Code backend v1 plan is mostly implemented. Remaining
v1-blocking gaps from the original spec are:
- `packages/context/src/llm/claude-code-runtime.ts` asserts init-message tools,
slash commands, skills, and plugins, but does not assert `agents` or
unexpected `mcp_servers`. The spec requires asserting that settings-derived
commands, skills, agents, plugins, and MCP servers are inactive.
- `packages/cli/src/setup-models.ts` validates Claude Code auth but does not
surface ignored `llm.promptCaching` fields during setup. The spec requires
setup, status, and doctor to surface ignored prompt-caching fields for the
`claude-code` backend. Status and doctor already warn.
Non-blocking gaps:
- Same-step tool-call repair parity remains out of scope for v1.
- OTEL telemetry parity remains out of scope for v1.
- Embedding parity remains out of scope because embeddings are configured
independently.
- Full prompt-caching parity for tools, history, and per-section TTLs remains
out of scope; v1 only needs explicit warnings and no AI SDK cache markers on
the Claude Code path.
## File Structure
Modify these files:
- `packages/context/src/llm/claude-code-runtime.ts` adds complete init-message
isolation checks for agents and MCP servers.
- `packages/context/src/llm/claude-code-runtime.test.ts` adds regression tests
for rejected agents/MCP servers, object/agent env scrubbing, and callback
error handling.
- `packages/cli/src/claude-code-prompt-caching.ts` is created as the shared
formatter for ignored prompt-caching fields.
- `packages/cli/src/status-project.ts` imports the shared formatter instead of
keeping a local helper.
- `packages/cli/src/setup-models.ts` emits the shared warning when setup saves
`llm.provider.backend: claude-code` and existing prompt-caching fields are
present.
- `packages/cli/src/setup-models.test.ts` covers setup warning output.
- `packages/cli/src/doctor.test.ts` keeps coverage for doctor output using the
shared formatter.
### Task 1: Complete Claude Code init isolation checks
**Files:**
- Modify: `packages/context/src/llm/claude-code-runtime.test.ts`
- Modify: `packages/context/src/llm/claude-code-runtime.ts`
- [ ] **Step 1: Add failing isolation and runtime behavior tests**
Add these tests inside `describe('ClaudeCodeKtxLlmRuntime', ...)` in
`packages/context/src/llm/claude-code-runtime.test.ts`:
```ts
it('rejects settings-derived agents and non-KTX MCP servers from init messages', async () => {
const query = vi.fn((_input: any) =>
stream([
initMessage({
agents: ['project-agent'],
mcp_servers: [{ name: 'filesystem', status: 'connected' }],
}),
resultMessage({ result: 'hello' }),
]),
);
const runtime = new ClaudeCodeKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'sonnet' },
query,
env: {},
});
await expect(runtime.generateText({ role: 'default', prompt: 'say hello' })).rejects.toThrow(
/Claude Code runtime isolation failed: .*mcp_servers=filesystem.*agents=project-agent/,
);
});
it('passes scrubbed env to object generation and agent loops', async () => {
const schema = z.object({ answer: z.string() });
const objectQuery = vi.fn((_input: any) =>
stream([initMessage(), resultMessage({ structured_output: { answer: 'yes' } })]),
);
const objectRuntime = new ClaudeCodeKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'sonnet' },
query: objectQuery,
env: { ANTHROPIC_API_KEY: 'sk-ant-test', AWS_PROFILE: 'prod', PATH: '/usr/bin' }, // pragma: allowlist secret
});
await expect(objectRuntime.generateObject({ role: 'default', prompt: 'json', schema })).resolves.toEqual({
answer: 'yes',
});
expect(objectQuery.mock.calls[0][0].options.env).toEqual(
expect.objectContaining({ PATH: '/usr/bin' }),
);
expect(objectQuery.mock.calls[0][0].options.env).not.toEqual(
expect.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test', AWS_PROFILE: 'prod' }), // pragma: allowlist secret
);
const agentQuery = vi.fn((_input: any) =>
stream([
initMessage({ tools: ['mcp__ktx__load_skill'], mcp_servers: [{ name: 'ktx', status: 'connected' }] }),
{
type: 'assistant',
message: { role: 'assistant', content: [] },
parent_tool_use_id: null,
uuid: '00000000-0000-4000-8000-000000000004',
session_id: 'session-id',
} as unknown as SDKMessage,
resultMessage({ subtype: 'error_max_turns', is_error: true }),
]),
);
const agentRuntime = new ClaudeCodeKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'sonnet' },
query: agentQuery,
env: { ANTHROPIC_AUTH_TOKEN: 'token', CLAUDE_CODE_USE_VERTEX: '1', HOME: '/Users/test' },
});
await agentRuntime.runAgentLoop({
modelRole: 'default',
systemPrompt: 'system',
userPrompt: 'user',
toolSet: {
load_skill: {
name: 'load_skill',
description: 'Load skill.',
inputSchema: z.object({ name: z.string() }),
execute: async () => ({ markdown: 'loaded' }),
},
},
stepBudget: 1,
telemetryTags: { operationName: 'test' },
});
expect(agentQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ HOME: '/Users/test' }));
expect(agentQuery.mock.calls[0][0].options.env).not.toEqual(
expect.objectContaining({ ANTHROPIC_AUTH_TOKEN: 'token', CLAUDE_CODE_USE_VERTEX: '1' }),
);
});
it('logs and ignores onStepFinish callback errors', async () => {
const query = vi.fn((_input: any) =>
stream([
initMessage(),
{
type: 'assistant',
message: { role: 'assistant', content: [] },
parent_tool_use_id: null,
uuid: '00000000-0000-4000-8000-000000000005',
session_id: 'session-id',
} as unknown as SDKMessage,
resultMessage({ subtype: 'success', terminal_reason: 'completed' }),
]),
);
const logger = {
debug: vi.fn(),
log: vi.fn(),
warn: vi.fn(),
error: vi.fn(),
};
const runtime = new ClaudeCodeKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'sonnet' },
query,
env: {},
logger,
});
await expect(
runtime.runAgentLoop({
modelRole: 'default',
systemPrompt: 'system',
userPrompt: 'user',
toolSet: {},
stepBudget: 1,
telemetryTags: { operationName: 'test' },
onStepFinish: async () => {
throw new Error('callback exploded');
},
}),
).resolves.toEqual({ stopReason: 'natural' });
expect(logger.warn).toHaveBeenCalledWith(expect.stringContaining('callback exploded'));
});
```
- [ ] **Step 2: Run the Claude runtime test to verify it fails**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/llm/claude-code-runtime.test.ts
```
Expected: FAIL because the new agents/MCP-server isolation test resolves
successfully instead of throwing.
- [ ] **Step 3: Add expected MCP server metadata and complete init assertions**
In `packages/context/src/llm/claude-code-runtime.ts`, replace
`assertInitIsolation` and add the helper below it:
```ts
function assertInitIsolation(
message: SDKMessage,
allowedToolIds: Set<string>,
expectedMcpServerNames: Set<string>,
): void {
if (message.type !== 'system' || message.subtype !== 'init') {
return;
}
const unexpectedTools = message.tools.filter((toolName) => !allowedToolIds.has(toolName));
const activeMcpServerNames = message.mcp_servers.map((server) => server.name);
const unexpectedMcpServers = activeMcpServerNames.filter((name) => !expectedMcpServerNames.has(name));
const missingMcpServers = [...expectedMcpServerNames].filter((name) => !activeMcpServerNames.includes(name));
const unexpectedAgents = message.agents ?? [];
if (
unexpectedTools.length > 0 ||
unexpectedMcpServers.length > 0 ||
missingMcpServers.length > 0 ||
message.slash_commands.length > 0 ||
message.skills.length > 0 ||
message.plugins.length > 0 ||
unexpectedAgents.length > 0
) {
throw new Error(
`Claude Code runtime isolation failed: tools=${unexpectedTools.join(',') || '(none)'} mcp_servers=${
unexpectedMcpServers.join(',') || '(none)'
} missing_mcp_servers=${missingMcpServers.join(',') || '(none)'} slash_commands=${
message.slash_commands.length
} skills=${message.skills.length} plugins=${message.plugins.length} agents=${
unexpectedAgents.join(',') || '(none)'
}`,
);
}
}
function expectedMcpServerNames(tools: KtxRuntimeToolSet | undefined): Set<string> {
return tools && Object.keys(tools).length > 0 ? new Set(['ktx']) : new Set();
}
```
Update `collectResult` parameters:
```ts
async function collectResult(params: {
query: QueryFn;
prompt: string;
options: Options;
allowedToolIds: Set<string>;
expectedMcpServerNames: Set<string>;
onAssistantTurn?: () => Promise<void>;
}): Promise<SDKResultMessage> {
let result: SDKResultMessage | undefined;
for await (const message of params.query({ prompt: params.prompt, options: params.options })) {
assertInitIsolation(message, params.allowedToolIds, params.expectedMcpServerNames);
```
Update the four `collectResult(...)` calls:
```ts
const tools = input.tools ?? {};
const result = await collectResult({
query: this.runQuery,
prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'),
options,
allowedToolIds: new Set(mcpToolIds(tools)),
expectedMcpServerNames: expectedMcpServerNames(input.tools),
});
```
For `runAgentLoop(...)`, use:
```ts
const result = await collectResult({
query: this.runQuery,
prompt: params.userPrompt,
options: { ...options, systemPrompt: params.systemPrompt },
allowedToolIds: new Set(mcpToolIds(params.toolSet)),
expectedMcpServerNames: expectedMcpServerNames(params.toolSet),
onAssistantTurn: async () => {
```
For `runClaudeCodeAuthProbe(...)`, use:
```ts
const result = await collectResult({
query: input.query ?? defaultQuery,
prompt: 'Reply with exactly: ok',
options,
allowedToolIds: new Set(),
expectedMcpServerNames: new Set(),
});
```
- [ ] **Step 4: Run the Claude runtime test to verify it passes**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/llm/claude-code-runtime.test.ts
```
Expected: PASS.
- [ ] **Step 5: Commit**
Run:
```bash
git add packages/context/src/llm/claude-code-runtime.ts packages/context/src/llm/claude-code-runtime.test.ts
git commit -m "fix: close claude-code runtime isolation checks"
```
### Task 2: Surface Claude Code prompt-caching warnings during setup
**Files:**
- Create: `packages/cli/src/claude-code-prompt-caching.ts`
- Modify: `packages/cli/src/status-project.ts`
- Modify: `packages/cli/src/setup-models.ts`
- Modify: `packages/cli/src/setup-models.test.ts`
- Modify: `packages/cli/src/doctor.test.ts`
- [ ] **Step 1: Add failing setup warning test**
Add this test to `packages/cli/src/setup-models.test.ts`:
```ts
it('warns during Claude Code setup when existing prompt-caching fields will be ignored', async () => {
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'llm:',
' provider:',
' backend: anthropic',
' models:',
' default: claude-sonnet-4-6',
' promptCaching:',
' enabled: true',
' systemTtl: 1h',
' toolsTtl: 1h',
' historyTtl: 5m',
'',
].join('\n'),
'utf-8',
);
const io = makeIo();
const result = await runKtxSetupAnthropicModelStep(
{
projectDir: tempDir,
inputMode: 'disabled',
llmBackend: 'claude-code',
skipLlm: false,
},
io.io,
{
claudeCodeAuthProbe: async () => ({ ok: true as const }),
},
);
expect(result.status).toBe('ready');
expect(io.stderr()).toContain('claude-code ignores llm.promptCaching.systemTtl');
expect(io.stderr()).toContain('Claude Agent SDK does not expose KTX prompt-cache TTL, tool, or history markers');
});
```
- [ ] **Step 2: Run setup tests to verify the new test fails**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/setup-models.test.ts
```
Expected: FAIL because setup does not emit the ignored prompt-caching warning.
- [ ] **Step 3: Create the shared prompt-caching warning helper**
Create `packages/cli/src/claude-code-prompt-caching.ts`:
```ts
import type { KtxProjectLlmConfig } from '@ktx/context/project';
const CLAUDE_CODE_IGNORED_PROMPT_CACHING_FIELDS = [
'systemTtl',
'toolsTtl',
'historyTtl',
'vertexFallbackTo5m',
] as const;
export function ignoredClaudeCodePromptCachingFields(config: KtxProjectLlmConfig): string[] {
if (config.provider.backend !== 'claude-code' || !config.promptCaching) {
return [];
}
return CLAUDE_CODE_IGNORED_PROMPT_CACHING_FIELDS.filter((key) => key in config.promptCaching).map(
(key) => `llm.promptCaching.${key}`,
);
}
export function formatClaudeCodePromptCachingWarning(fields: string[]): string | null {
if (fields.length === 0) {
return null;
}
return `claude-code ignores ${fields.join(', ')} because the Claude Agent SDK does not expose KTX prompt-cache TTL, tool, or history markers.`;
}
export function formatClaudeCodePromptCachingFix(): string {
return 'Remove those promptCaching fields or use anthropic, vertex, or gateway when those cache knobs are required.';
}
```
- [ ] **Step 4: Update status/doctor to use the shared helper**
In `packages/cli/src/status-project.ts`, add:
```ts
import {
formatClaudeCodePromptCachingFix,
formatClaudeCodePromptCachingWarning,
ignoredClaudeCodePromptCachingFields,
} from './claude-code-prompt-caching.js';
```
Delete the local `ignoredClaudeCodePromptCachingFields(...)` function.
Replace the warning block in `buildWarnings(...)` with:
```ts
const warning = formatClaudeCodePromptCachingWarning(ignoredClaudeCodePromptCachingFields(config.llm));
if (warning) {
warnings.push({
message: warning,
fix: formatClaudeCodePromptCachingFix(),
});
}
```
- [ ] **Step 5: Emit the setup warning before persisting Claude Code config**
In `packages/cli/src/setup-models.ts`, add:
```ts
import {
formatClaudeCodePromptCachingWarning,
ignoredClaudeCodePromptCachingFields,
} from './claude-code-prompt-caching.js';
```
Inside the `backendChoice.backend === 'claude-code'` branch, immediately before
`await persistLlmConfig(...)`, add:
```ts
const warning = formatClaudeCodePromptCachingWarning(
ignoredClaudeCodePromptCachingFields(buildProjectLlmConfig(project.config.llm, { backend: 'claude-code' }, model)),
);
if (warning) {
io.stderr.write(`${warning}\n`);
}
```
- [ ] **Step 6: Run CLI tests**
Run:
```bash
pnpm --filter @ktx/cli exec vitest run src/setup-models.test.ts src/doctor.test.ts
```
Expected: PASS.
- [ ] **Step 7: Commit**
Run:
```bash
git add packages/cli/src/claude-code-prompt-caching.ts packages/cli/src/status-project.ts packages/cli/src/setup-models.ts packages/cli/src/setup-models.test.ts packages/cli/src/doctor.test.ts
git commit -m "fix: warn on claude-code prompt caching during setup"
```
### Task 3: Final verification
**Files:**
- Verify: `packages/context/src/llm/claude-code-runtime.ts`
- Verify: `packages/cli/src/setup-models.ts`
- Verify: `packages/cli/src/status-project.ts`
- [ ] **Step 1: Run targeted tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/llm/claude-code-runtime.test.ts src/llm/runtime-tools.test.ts src/llm/claude-code-env.test.ts src/llm/claude-code-models.test.ts src/llm/runtime-local-config.test.ts
pnpm --filter @ktx/cli exec vitest run src/setup-models.test.ts src/doctor.test.ts
```
Expected: PASS.
- [ ] **Step 2: Run package type-checks**
Run:
```bash
pnpm --filter @ktx/context run type-check
pnpm --filter @ktx/cli run type-check
```
Expected: PASS.
- [ ] **Step 3: Run the LLM boundary audit**
Run:
```bash
rg -n "generateKtxText\\(|generateKtxObject\\(|new AgentRunnerService\\(|AgentRunnerService\\b|llmProvider\\b|getModel\\(|getModelByName\\(" packages/context/src packages/cli/src packages/llm/src --glob '!**/*.test.ts'
```
Expected: remaining matches are limited to:
- `packages/llm/src/**`
- `packages/context/src/llm/ai-sdk-runtime.ts`
- `packages/context/src/llm/local-config.ts`
- `packages/context/src/agent/agent-runner.service.ts`
- type/export declarations that intentionally preserve the AI SDK adapter
boundary.
- [ ] **Step 4: Run dead-code check**
Run:
```bash
pnpm run dead-code
```
Expected: PASS or only pre-existing unrelated findings. Investigate and fix
any finding caused by the new helper file.
- [ ] **Step 5: Commit verification cleanup if needed**
If verification required small cleanup, run:
```bash
git add packages/context/src/llm/claude-code-runtime.ts packages/context/src/llm/claude-code-runtime.test.ts packages/cli/src/claude-code-prompt-caching.ts packages/cli/src/status-project.ts packages/cli/src/setup-models.ts packages/cli/src/setup-models.test.ts packages/cli/src/doctor.test.ts
git commit -m "chore: verify claude-code v1 closure"
```
If no files changed after verification, skip this commit.
## Self-Review
- Spec coverage: The plan closes the remaining v1-blocking isolation assertion
and setup-warning requirements from the original spec.
- Placeholder scan: No placeholders remain; every task includes file paths,
code, commands, and expected output.
- Type consistency: The helper names and runtime function signatures are used
consistently across tasks.

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,698 @@
# Brainstorm: `claude-code` backend with full KTX LLM parity
Adds a `claude-code` backend that gives KTX full parity with the existing
`ANTHROPIC_API_KEY`-based `anthropic` backend for **all KTX LLM calls**. The
backend uses `@anthropic-ai/claude-agent-sdk` and reuses the user's existing
local Claude Code authentication. Users select it in `ktx.yaml`.
This is not an implementation plan. It is the revised design after expanding
the requirement from "`ktx ingest` works with Claude Code" to "every KTX LLM
call works with Claude Code." The follow-up implementation plan should be
written separately.
## Core decision
`claude-code` is a first-class global LLM backend. Any code path that currently
works with `llm.provider.backend: anthropic` must work with
`llm.provider.backend: claude-code`, unless it is not an LLM call at all.
This includes:
- Agent loops implemented through `AgentRunnerService.runLoop(...)`.
- Text generation through `generateKtxText(...)`.
- Structured object generation through `generateKtxObject(...)`.
- Local ingest and MCP-triggered local ingest flows.
- Page triage and light extraction.
- Context-candidate curation and reconciliation.
- Memory capture.
- Scan/enrichment internals and relationship LLM proposals.
- Future KTX LLM call sites that use the shared runtime boundary.
Commands that do not use LLMs do not need special Claude Code behavior. There
must be no silent fallback from `claude-code` to gateway, Anthropic API-key
execution, or deterministic output.
## Goals
- Let a KTX user run all KTX LLM-backed behavior through their existing local
Claude Code session without provisioning `ANTHROPIC_API_KEY`, Vertex
credentials, or an AI Gateway key.
- Preserve the existing user-facing CLI and MCP behavior. `claude-code` changes
how LLM calls execute, not which KTX workflows exist.
- Preserve role-based model selection. `llm.models.default`, `triage`,
`candidateExtraction`, `curator`, `reconcile`, and `repair` remain the source
of model selection for every LLM call.
- Preserve KTX's curated tool boundaries. Claude Code built-ins,
filesystem-discovered MCP servers, hooks, skills, plugins, agents, and slash
commands must not become invokable in KTX agent loops. The Agent SDK init
message may still report host-discovered slash commands, skills, and agents;
KTX treats that metadata as diagnostic only and restricts execution through
`tools: []`, exact KTX MCP `allowedTools`, `disallowedTools`, and
deny-by-default `canUseTool`.
- Keep embeddings independent. Claude does not provide embeddings; users keep
configuring `ingest.embeddings` and scan/enrichment embeddings as they do
today.
- Fail fast with a clear message if local Claude Code authentication is not
usable.
## Non-goals
- **Embedding parity.** Embeddings remain separate from LLM execution.
- **Tool-call repair parity in the first pass.** The AI SDK runner uses
`experimental_repairToolCall` (`packages/llm/src/repair.ts:35-88`). The Claude
Agent SDK has no transparent same-step repair hook. MVP behavior is next-turn
self-correction from schema errors or a normal tool-failure count.
- **OTEL telemetry parity in the first pass.** The AI SDK runner uses
`experimental_telemetry`. The Agent SDK exposes hooks such as
`PostToolUseFailure` and `SessionEnd`, but no drop-in OTEL switch. MVP ships
without telemetry parity on this backend.
- **Productizing Claude subscription limits.** Documentation must frame this as
"use your own local Claude Code session," not as a third-party Claude Max or
Claude.ai product feature.
## Approaches considered
### Recommended: global LLM runtime port
Introduce a backend-neutral KTX LLM runtime port for operations, not just model
construction:
```ts
interface KtxLlmRuntimePort {
generateText(input: KtxGenerateTextInput): Promise<string>;
generateObject<T>(input: KtxGenerateObjectInput<T>): Promise<T>;
runAgentLoop(params: RunLoopParams): Promise<RunLoopResult>;
}
```
The existing `anthropic`, `vertex`, and `gateway` backends implement the runtime
through the AI SDK and existing `KtxLlmProvider`. The new `claude-code` backend
implements the same runtime through `@anthropic-ai/claude-agent-sdk`.
This is the recommended approach because KTX call sites need operations:
"generate text," "generate a structured object," and "run an agent loop." They
do not inherently need direct access to an AI SDK `LanguageModel`. The Agent SDK
is a session/agent API, not an AI SDK model factory, so the runtime port avoids
pretending those APIs are the same.
### Rejected: fake AI SDK `LanguageModel` for Claude Code
Trying to make Claude Code look like an AI SDK `LanguageModel` would be brittle.
The Agent SDK owns session execution, permissions, MCP tools, structured output,
and result messages. Those semantics do not map cleanly onto a normal
`getModel(...)` return value.
### Rejected: branch at every call site
Adding `if backend === "claude-code"` around each LLM call would work briefly
but would duplicate prompt wrapping, structured output handling, debug logging,
tool conversion, auth checks, and error mapping. It would also make future LLM
call sites easy to miss.
## Architecture
```text
ktx.yaml
llm.provider.backend: anthropic | vertex | gateway | claude-code
llm.models.<role>: model alias or model ID
createLocalKtxLlmRuntimeFromConfig(project.config.llm)
-> AiSdkKtxLlmRuntime
- wraps existing KtxLlmProvider
- generateText / Output.object / AgentRunnerService
-> ClaudeCodeKtxLlmRuntime
- uses @anthropic-ai/claude-agent-sdk query()
- implements text, object, and agent-loop operations
All KTX LLM call sites
-> KtxLlmRuntimePort
```
The runtime is selected at the same boundaries that currently construct an
`llmProvider` or `AgentRunnerService`:
- `packages/context/src/llm/local-config.ts`
- `packages/context/src/ingest/local-bundle-runtime.ts`
- `packages/context/src/memory/local-memory.ts`
- `packages/context/src/scan/local-scan.ts`
- `packages/context/src/mcp/local-project-ports.ts`
- Any CLI setup/status/doctor code that validates LLM readiness
After the change, services should not need to know whether the configured
backend is AI SDK based or Claude Code based. They call the runtime operation
they need.
## LLM call-site migration
The implementation plan must migrate every current KTX LLM call site to the
runtime port:
- `packages/context/src/llm/generation.ts`: `generateKtxText` and
`generateKtxObject` become runtime-backed helpers or are folded into the
runtime.
- `packages/context/src/agent/agent-runner.service.ts`: the AI SDK agent loop
becomes the AI SDK implementation of `runAgentLoop`.
- `packages/context/src/ingest/page-triage/page-triage.service.ts`: page triage
and light extraction depend on `KtxLlmRuntimePort`, not raw `KtxLlmProvider`.
- `packages/context/src/scan/description-generation.ts`: AI descriptions use
the runtime text-generation operation.
- `packages/context/src/scan/relationship-llm-proposal.ts`: relationship
proposals use the runtime object-generation operation.
- `packages/context/src/ingest/stages/stage-3-work-units.ts`,
`packages/context/src/ingest/stages/stage-4-reconciliation.ts`,
`packages/context/src/ingest/context-candidates/curator-pagination.service.ts`,
and `packages/context/src/memory/memory-agent.service.ts`: agent loops use the
runtime agent-loop operation or a thin `AgentRunnerPort` backed by it.
- Test helpers and MCP local project ports that inject `llmProvider` or
`agentRunner` must either inject the runtime port or use compatibility test
adapters during the migration.
The plan must include a grep-based audit so new or overlooked `getModel(...)`,
`generateKtxText(...)`, `generateKtxObject(...)`, `AgentRunnerService`, and
`llmProvider` usages are either migrated or explicitly proven non-runtime.
## Config design
The config should make `claude-code` a first-class backend:
```yaml
llm:
provider:
backend: claude-code
models:
default: sonnet
triage: haiku
candidateExtraction: sonnet
curator: sonnet
reconcile: sonnet
repair: sonnet
```
Implementation implications:
- Extend `KTX_LLM_BACKENDS` in `packages/context/src/project/config.ts` and
`KtxLlmBackend` in `packages/llm/src/types.ts`.
- Update setup, status, doctor, schema generation, examples, and docs so
`claude-code` is understood everywhere `anthropic` is understood.
- Update `createKtxLlmProvider` / `createModelFactory` so unsupported backend
values throw instead of falling through to gateway.
- Keep `llm.models` as the per-role binding source. The Claude Code runtime maps
each KTX role to the configured model string for the current call.
- Define accepted model aliases, such as `sonnet`, `opus`, and `haiku`, and full
model IDs supported by the pinned SDK version.
## Claude Agent SDK runtime behavior
Every Agent SDK call must be isolated enough for KTX execution. Use explicit
options even when SDK defaults currently match the desired value.
For agent loops with tools:
```ts
query({
prompt,
options: {
cwd: project.projectDir,
systemPrompt,
model: resolveModel(modelRole),
maxTurns: stepBudget,
settingSources: [],
skills: [],
plugins: [],
mcpServers: { ktx: createSdkMcpServer({ name: "ktx", tools }) },
tools: [],
allowedTools: [/* exact mcp__ktx__<toolName> ids generated from the tool map */],
canUseTool: ktxCanUseTool,
permissionMode: "dontAsk",
persistSession: false,
env: ktxClaudeCodeEnv
}
});
```
`ktxClaudeCodeEnv` is the controlled environment described in
"Agent SDK environment and auth boundary" below; it must be passed on every
KTX `query()` call.
For plain text generation:
- Use the same `query()` runtime with `maxTurns: 1`.
- Pass `settingSources: []`, `skills: []`, `plugins: []`, `tools: []`,
`permissionMode: "dontAsk"`, `persistSession: false`, and
`env: ktxClaudeCodeEnv`.
- Do not expose MCP tools unless the KTX call explicitly passed tools.
- Return the final result message text.
For structured object generation:
- Use the same `query()` runtime with the Agent SDK structured output option
for JSON schema output, plus the same isolation tuple including
`env: ktxClaudeCodeEnv`.
- Convert KTX Zod schemas at the runtime boundary.
- Parse and validate the returned object with the original KTX schema before
returning it to the caller.
The plan must confirm the exact option names against the pinned SDK version, but
the required outcome is fixed:
- Filesystem settings are not loaded. The SDK's documented default for an
omitted `settingSources` is `["user", "project", "local"]`
(`@anthropic-ai/claude-agent-sdk@0.3.142` `sdk.d.ts:1686-1695`),
which would inherit the user's Claude Code filesystem settings. Every KTX
`query()` call site - agent loops, text generation, object generation, and
the auth probe - MUST pass `settingSources: []` explicitly, along with
`skills: []`, `plugins: []`, `tools: []`, `persistSession: false`, and no
`mcpServers` entries other than the KTX MCP server (omitted entirely when
the call site does not expose tools). The implementation MUST assert from
the SDK init message that the controlled execution surface matches KTX's
expectations:
- `message.tools` equals the exact generated KTX MCP tool ids for the current
call.
- `message.mcp_servers` equals the expected KTX MCP server set: `[]` when the
call exposes no tools, or `["ktx"]` when it does.
- `message.plugins` is empty.
The implementation MUST NOT reject a run solely because
`message.slash_commands`, `message.skills`, or `message.agents` contain
host-discovered names. In `@anthropic-ai/claude-agent-sdk@0.3.142`, those
fields can report host discovery even when KTX passes the isolation options.
They are not part of the KTX execution surface when `tools: []`,
`allowedTools`, `disallowedTools`, and deny-by-default `canUseTool` are set.
- `skills: []` is a context filter in the pinned SDK
(`sdk.d.ts:1697-1718`): unlisted skills are hidden from the model's skill
listing and rejected by the Skill tool, but discovered skill names may still
appear in init metadata. KTX must still pass `skills: []`.
- Plugins are disabled with `plugins: []`, and the runtime asserts that
`message.plugins` is empty in the init message.
- Built-in tools are disabled by setting `tools: []`. The pinned SDK type
(`@anthropic-ai/claude-agent-sdk@0.3.142`, `sdk.d.ts`) documents `tools` as
the base set of built-in tools, with `[]` meaning "disable all built-ins";
`tools` does not accept MCP tool ids and cannot be used to restrict MCP
availability.
- MCP tool availability is granted by registering the KTX MCP server through
`mcpServers`. The SDK does not document a wildcard like `mcp__ktx__*` for
any tool field; KTX must enumerate exact generated MCP tool ids of the form
`mcp__ktx__<toolName>` (derived from the tool map handed to
`createSdkMcpServer`) wherever a list of tool ids is required.
- Pre-approval under `permissionMode: "dontAsk"` is configured by listing those
same exact `mcp__ktx__<toolName>` ids in `allowedTools` (documented as
auto-allow without prompting). Treat `allowedTools` as auto-approval, not
restriction.
- Defense-in-depth restriction uses `canUseTool`. The KTX runtime supplies a
`canUseTool` handler that allows only tool names in the current KTX MCP tool
map and denies everything else, so host-discovered slash commands, skills,
agents, future SDK defaults, or a misconfigured MCP server cannot expand the
execution surface.
- `disallowedTools` MUST additionally list the current built-in tool names
(`Agent`, `Task`, `AskUserQuestion`, `Bash`, `Read`, `Edit`, `Write`, `Glob`,
`Grep`, `WebFetch`, `WebSearch`, `TodoWrite`) as redundant insurance.
- `cwd` is `project.projectDir`, resolved at startup via `resolveKtxProjectDir`,
not `process.cwd()`.
- Sessions are not persisted unless the plan identifies a concrete debugging
feature that needs persistence.
## Agent SDK environment and auth boundary
The Agent SDK's `query()` option `env` (`@anthropic-ai/claude-agent-sdk@0.3.142`
`sdk.d.ts:1265-1279`) is the environment passed to the Claude Code child
process and defaults to `process.env`. Without an explicit `env`, the SDK
inherits the parent's environment, including any `ANTHROPIC_API_KEY`,
`ANTHROPIC_AUTH_TOKEN`, `ANTHROPIC_BASE_URL`, gateway/AI-Gateway tokens,
`GOOGLE_APPLICATION_CREDENTIALS` / `CLOUD_ML_REGION` (Vertex), and
`AWS_*` (Bedrock) credentials — any of which can switch the Claude Code CLI's
authentication source to API-key or another provider, bypassing the user's
local Claude Code session. That would silently violate the core requirement
that `claude-code` runs through the user's existing local Claude Code session
and that there is no silent fallback to gateway, Anthropic API-key, or other
provider execution.
Every `claude-code` `query()` call site - agent loops, text generation,
object generation, and the auth probe - MUST pass an explicit `env`
(`ktxClaudeCodeEnv`) constructed from `process.env` with the following
denylist removed:
- `ANTHROPIC_API_KEY`
- `ANTHROPIC_AUTH_TOKEN`
- `ANTHROPIC_BASE_URL`
- `ANTHROPIC_MODEL` (provider-routing override)
- `ANTHROPIC_VERTEX_PROJECT_ID`, `CLOUD_ML_REGION`,
`GOOGLE_APPLICATION_CREDENTIALS`, `GOOGLE_CLOUD_PROJECT`
- `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN`,
`AWS_REGION`, `AWS_PROFILE`
- `CLAUDE_CODE_USE_BEDROCK`, `CLAUDE_CODE_USE_VERTEX`
- Any future provider-routing variables the pinned SDK version documents
The denylist is the source of truth and lives next to the runtime constructor
so adding a variable is a single-file change.
Acceptance criteria:
- The constructed `ktxClaudeCodeEnv` does not contain any denylisted key, and
this is verified by a unit test that seeds each denylisted key in a fake
`process.env`.
- The auth probe fails with the same "authenticate Claude Code locally"
message even when `ANTHROPIC_API_KEY` (or any other denylisted credential)
is present in `process.env` and no valid local Claude Code session exists.
- Every KTX-originated `query()` invocation is spied to assert that `env`
was passed and that it does not contain any denylisted key; the test fails
if any code path falls back to the SDK default `process.env`.
- The "no silent fallback" rule is preserved end-to-end: a machine with
`ANTHROPIC_API_KEY` set but no local Claude Code authentication still fails
setup/status/doctor on `claude-code`.
## Tool boundary
Agent-loop tools cannot remain only raw AI SDK `Record<string, Tool>` values if
two backends must consume them. The plan must define a backend-neutral tool
descriptor for the final tool map handed to an agent loop:
```ts
interface KtxRuntimeToolDescriptor<TInput, TOutput> {
name: string;
description: string;
inputSchema: z.ZodObject<z.ZodRawShape>;
execute(input: TInput): Promise<KtxRuntimeToolOutput<TOutput>>;
}
interface KtxRuntimeToolOutput<TOutput> {
// What the model sees as the tool_result content. Always a markdown string;
// never a raw JS object. This matches BaseTool's existing
// `toModelOutput` contract (`packages/context/src/tools/base-tool.ts:154-162`)
// which sends only markdown to the LLM.
markdown: string;
// Out-of-band payload preserved for tool callers (transcripts, debug,
// verification ledger, downstream KTX consumers). Not sent to the model.
structured?: TOutput;
}
```
Every composed tool entry must produce this descriptor shape, including:
- `BaseTool` outputs from factory toolsets, which already return
`{ markdown, structured }`.
- Source-specific raw tools such as `emit_historic_sql_evidence` in
`packages/context/src/ingest/local-bundle-runtime.ts`.
- Stage-local tools in `buildWuToolSet` and `buildReconcileToolSet`.
- Inline `load_skill`, read/raw/span, stage/diff, eviction, and emit tools in
`packages/context/src/ingest/ingest-bundle.runner.ts`.
- Memory-agent `load_skill` in
`packages/context/src/memory/memory-agent.service.ts`.
- The `withVerificationLedger` wrapping layer, whose markdown/structured
guard outputs (`packages/context/src/ingest/tools/verification-ledger.tool.ts:40-97`)
already match the contract.
### Tool output contract
The runtime defines a single output contract for both backends so the model
sees the same content regardless of provider:
- **Model-visible content**: the `markdown` field, mapped to the Agent SDK
tool handler return as `{ content: [{ type: "text", text: markdown }] }` for
`claude-code`, and surfaced through the existing `toModelOutput` markdown
path for AI SDK backends. The model never sees raw JS objects.
- **Structured payload**: the optional `structured` field, preserved on the
in-process tool-result envelope for transcript/debug capture, the
verification ledger, and any KTX caller that introspects results. The
Claude adapter does not put structured JSON into model-visible content
unless an individual call site explicitly opts in.
- **Normalization of existing raw tools**: tools that today return a bare
string (e.g. `load_skill` "Skill not available" responses in
`packages/context/src/ingest/ingest-bundle.runner.ts:697-721` and
`:924-936`, and `packages/context/src/memory/memory-agent.service.ts:128-152`)
must be wrapped at the descriptor boundary so `markdown` is the string and
`structured` is omitted. Tools that today return a plain object (e.g.
skill payload `{ name, content, skillDirectory }`) must be wrapped so
`markdown` is a deterministic human-readable rendering (e.g. the skill
body with a header) and the original object is preserved on `structured`.
No KTX tool may return a raw object as the model-visible payload on the
Claude Code backend, because the Agent SDK MCP handler will otherwise
stringify it and drop the structured fields.
- **AI SDK parity**: the AI SDK adapter MUST preserve BaseTool's existing
`toModelOutput` markdown-only behavior. Migrating BaseTool-derived tools
to the descriptor must not start sending structured JSON to the model.
The AI SDK adapter converts descriptors to `tool(...)` with a `toModelOutput`
that emits `markdown` only. The Claude Code adapter converts descriptors to
Agent SDK `tool(name, description, schema.shape, handler)` entries inside
`createSdkMcpServer(...)` and returns `{ content: [{ type: "text", text:
markdown }] }`.
Non-object schemas are unsupported for `claude-code` and must be rejected at
startup with a clear error. In practice KTX tool inputs are already `z.object`.
## Stop reasons and failures
The Claude runner maps the SDK's typed `SDKResultMessage` (union of
`SDKResultSuccess` and `SDKResultError` in
`@anthropic-ai/claude-agent-sdk@0.3.142`, `sdk.d.ts`) to
`RunLoopStopReason = "budget" | "natural" | "error"`. The mapping must consider
three typed signals in this precedence order, because each successive signal
may be present where the previous one is absent:
1. `subtype`: `"error_max_turns"` -> `"budget"`; `"success"` -> `"natural"`;
other error subtypes (`"error_during_execution"`,
`"error_max_budget_usd"`, `"error_max_structured_output_retries"`) ->
`"error"`.
2. `terminal_reason` (optional `TerminalReason` field on both success and
error results): `"max_turns"` -> `"budget"`; `"completed"` -> `"natural"`;
any other terminal reason such as `"blocking_limit"`,
`"rapid_refill_breaker"`, `"prompt_too_long"`, `"image_error"`,
`"model_error"`, `"aborted_streaming"`, `"aborted_tools"`,
`"stop_hook_prevented"`, `"hook_stopped"`, or `"tool_deferred"` ->
`"error"`.
3. The assistant message `stop_reason`: `"max_turns"` -> `"budget"`; any
other non-null unsuccessful stop reason -> `"error"`.
A `max_turns` signal arriving through any of the three sources must map to
`"budget"`; the runner MUST NOT classify a max-turn termination as
`"natural"` or as a generic `"error"` because it was reported via
`terminal_reason` instead of `subtype`.
`Stop` hooks are not the authoritative stop-reason source because they do not
carry the terminal reason. They remain useful for lifecycle logging. Tool failure
counting should use `PostToolUseFailure` and feed the same mechanism that
`stage-3-work-units.ts` checks through `toolFailureCount?(wu.unitKey)`.
For text and object generation, SDK authentication, billing, rate-limit,
permission, max-turn, structured-output, and execution errors must map to the
same error surfaces that KTX uses for the Anthropic API-key backend.
## Agent-loop progress callbacks
`RunLoopParams.onStepFinish`
(`packages/context/src/agent/agent-runner.service.ts:20`) is part of the
current agent-loop contract. The AI SDK runner increments `stepIndex` on each
`generateText` step and invokes the callback
(`agent-runner.service.ts:83-97`). KTX consumers depend on this:
`packages/context/src/ingest/ingest-bundle.runner.ts:782` emits
`work_unit_step` events from it, and `:1036` / `:1089` update reconciliation
progress for the user-visible "Reconciling results · step N" status.
The `claude-code` runner MUST preserve `onStepFinish` semantics:
- It MUST invoke `onStepFinish` exactly once per assistant turn (i.e. once per
step the SDK reports), incrementing `stepIndex` starting at 1.
- The plan MUST name the concrete SDK stream event used as the step boundary
(the implementation plan picks one of the documented assistant/result
message events from the pinned SDK version and justifies it). The chosen
event must produce the same `stepIndex` count as the AI SDK runner for an
equivalent run: N tool-using turns yield N callbacks.
- Callback errors MUST be caught and logged at `warn` level without aborting
the loop, matching `agent-runner.service.ts:90-96`.
- `stepBudget` passed to the callback MUST equal the `maxTurns` configured on
the SDK `query()` call.
Acceptance criteria:
- A `claude-code` agent loop run with `stepBudget: N` produces N
`work_unit_step` events when the loop runs to budget.
- A reconciliation run under `claude-code` produces the same
`updateProgress` calls (count and `stepIndex / stepBudget` ratio) as the
Anthropic API-key backend for an equivalent fixture.
- An `onStepFinish` callback that throws does not surface the error as the
loop result.
## Prompt caching parity
`packages/llm/src/types.ts:44, :61` exposes `llm.promptCaching` as a config
field, and the AI SDK message builder
(`packages/llm/src/message-builder.ts:62-114, :141-218`) applies
`anthropic.cacheControl: { type: "ephemeral", ttl }` markers to the system
message, the last history message, and sorted tools, with TTLs split into
`systemTtl`, `toolsTtl`, and `historyTtl`. `model-provider.test.ts:276`
verifies caching is enabled by default with those three TTLs.
The Agent SDK does not expose KTX's marker-based contract. The closest
mechanism is `systemPrompt: string[]` with
`SYSTEM_PROMPT_DYNAMIC_BOUNDARY` (`sdk.d.ts:1746-1799`), which marks a static
prefix as cacheable but provides no per-tool, per-history, or per-TTL knobs.
For the `claude-code` backend, the spec treats `llm.promptCaching` as
**partial parity**:
- The Claude runtime MAY map a non-empty static system prefix to a cacheable
`systemPrompt` array using `SYSTEM_PROMPT_DYNAMIC_BOUNDARY` when
`cacheSystem` is enabled in the resolved `KtxPromptCachingConfig`. The
implementation plan decides whether to ship this mapping in the first pass
or defer it.
- `cacheTools`, `cacheHistory`, and the `systemTtl` / `toolsTtl` /
`historyTtl` fields have no Agent SDK equivalent. The runtime MUST NOT
silently drop them: when a user sets non-default values under
`llm.promptCaching` and the backend is `claude-code`, status/doctor and the
setup wizard MUST surface that these fields are ignored on this backend.
- Docs under `docs-site/content/docs/` MUST document this divergence in the
same pages that describe `claude-code` setup, so users do not assume the
TTL/tool/history knobs apply.
Acceptance criteria:
- A `claude-code` runtime constructed from a config with default
`promptCaching` does not throw and does not pass KTX `cacheControl`
markers to the Agent SDK (the AI-SDK-only markers stay on the AI SDK
path).
- A `claude-code` runtime constructed from a config with non-default
`promptCaching` values yields a warning surfaced through doctor/status
output identifying the ignored fields.
## Auth and setup
`ktx setup`, status, and doctor flows must validate that Claude Code SDK auth is
usable, not just that `~/.claude/` exists. Acceptable validation strategies:
- A minimal SDK probe call with `settingSources: []`, `skills: []`,
`plugins: []`, `tools: []`, `persistSession: false`, no `mcpServers`,
`env: ktxClaudeCodeEnv`, and `maxTurns: 1`. The probe MUST NOT rely on
the SDK's documented default for any of these fields, because the default
for `settingSources` is `["user", "project", "local"]` (loads filesystem
settings) and the default for `env` is `process.env` (can route auth
through `ANTHROPIC_API_KEY` or other provider credentials and hide a
missing local Claude Code session). See "Agent SDK environment and auth
boundary" above for the `env` denylist.
The auth probe MUST tolerate init messages with non-empty `slash_commands`,
`skills`, and `agents` when `message.tools` is empty, `message.mcp_servers`
is empty, `message.plugins` is empty, and the query options contain the KTX
isolation tuple. Host discovery metadata is not an auth failure.
- An SDK-provided account/auth status method if the pinned version exposes one.
- A docs-endorsed file-presence check only if the official SDK docs explicitly
state that it proves auth usability.
Failure copy should tell the user to authenticate Claude Code locally with the
Claude Code CLI, then rerun setup or the command they attempted.
## Documentation impact
Docs updates are required because this changes user-visible setup and LLM
provider behavior:
- `docs-site/content/docs/getting-started/quickstart.mdx`
- `docs-site/content/docs/cli-reference/ktx-setup.mdx`
- `docs-site/content/docs/guides/building-context.mdx`
- Any config reference page that documents `llm.provider.backend`
- Any status or doctor docs that describe LLM readiness
The docs must say that `claude-code` uses the user's own local Claude Code
session. Do not describe it as a way for KTX to resell, pool, or productize
Claude subscription limits.
## Verified evidence
- Current `KtxLlmProvider` returns AI SDK `LanguageModel` instances and only
supports `anthropic`, `vertex`, and `gateway`
(`packages/llm/src/types.ts`, `packages/llm/src/model-provider.ts`).
- Project config currently accepts `llm.provider.backend: none | anthropic |
vertex | gateway` (`packages/context/src/project/config.ts`).
- `generateKtxText` and `generateKtxObject` are shared non-agent generation
helpers (`packages/context/src/llm/generation.ts`).
- `AgentRunnerService` is the shared AI SDK agent-loop implementation
(`packages/context/src/agent/agent-runner.service.ts`).
- Page triage and light extraction currently use raw `KtxLlmProvider`
(`packages/context/src/ingest/page-triage/page-triage.service.ts`).
- Scan/enrichment internals currently use `createLocalKtxLlmProviderFromConfig`,
`generateKtxText`, and `generateKtxObject`
(`packages/context/src/scan/local-scan.ts`,
`packages/context/src/scan/description-generation.ts`,
`packages/context/src/scan/relationship-llm-proposal.ts`).
- Local ingest and MCP local project ports inject `llmProvider` and
`agentRunner` today (`packages/context/src/ingest/local-bundle-runtime.ts`,
`packages/context/src/mcp/local-project-ports.ts`).
- The Agent SDK TypeScript reference (`@anthropic-ai/claude-agent-sdk@0.3.142`,
`sdk.d.ts:1690-1697` and the `sdk.mjs` runtime default
`["user","project","local"]`) documents `settingSources` **defaulting to
loading user, project, and local filesystem settings** when omitted; passing
`[]` is the explicit opt-out ("SDK isolation mode"). The same reference
documents `allowedTools` as auto-approval rather than restriction,
`canUseTool` as the programmatic permission handler,
`permissionMode: "dontAsk"`, `tools` as the base built-in set with `[]`
meaning "disable all built-ins" and no MCP-id support, `disallowedTools`,
`maxTurns`, `mcpServers`, `cwd`, `persistSession`, and SDK result/hook
message shapes.
- `SDKResultMessage = SDKResultSuccess | SDKResultError` in
`@anthropic-ai/claude-agent-sdk@0.3.142` (`sdk.d.ts`); both variants expose
an optional `terminal_reason: TerminalReason`, where `TerminalReason`
includes `'max_turns' | 'completed'` alongside other terminal reasons.
- The Agent SDK MCP docs and SDK examples (e.g. Context7
`/nothflare/claude-agent-sdk-docs` custom-tools guide) show registering MCP
servers in `query()` options and listing exact `mcp__<server>__<tool>` ids
in `allowedTools`; no SDK doc or type currently documents a wildcard form.
- BaseTool's `toModelOutput` already sends only `markdown` to the model while
preserving structured output for callers
(`packages/context/src/tools/base-tool.ts:154-162`); some raw AI SDK tools
in `packages/context/src/ingest/ingest-bundle.runner.ts:697-721, :924-936`
and `packages/context/src/memory/memory-agent.service.ts:128-152` currently
return bare strings or plain objects and must be normalized at the
descriptor boundary so both backends preserve the contract.
- The Agent SDK skills docs say the `skills` option is a context filter rather
than a sandbox. KTX must pass `skills: []`, but must not assert that
`message.skills` is empty in the SDK init message.
- `Options.env` in `@anthropic-ai/claude-agent-sdk@0.3.142`
(`sdk.d.ts:1265-1279`) is the environment passed to the Claude Code
process and defaults to `process.env`. Without an explicit `env`, the SDK
inherits the parent environment, including any provider-routing variables
(`ANTHROPIC_API_KEY`, Vertex/Bedrock credentials, gateway tokens) that
could change the active authentication source of the Claude Code CLI and
hide a missing local Claude Code session.
## Open items for the implementation plan
1. Confirm exact TypeScript option names and result-message discriminants
against the pinned `@anthropic-ai/claude-agent-sdk` version.
2. Define the final `KtxLlmRuntimePort` file location and package exports.
3. Define model alias validation for `sonnet`, `opus`, `haiku`, and full model
IDs.
4. Define the auth probe and make setup/status/doctor report actionable
messages.
5. Run a repo-wide audit for all LLM call sites and migrate each one to the
runtime boundary.
6. Write tests proving `claude-code` works for text generation, structured
object generation, and agent-loop execution.
7. Write tests proving page triage, scan/enrichment internals, memory capture,
MCP-triggered local ingest, and normal local ingest all use the
`claude-code` runtime when configured.
8. Write tests proving a raw built-in Claude Code tool request is denied,
host-discovered Skill/Agent/SlashCommand requests are denied by `canUseTool`,
and only exact `mcp__ktx__*` tools are allowed during KTX agent loops.
9. Write a test that asserts every KTX-originated `query()` invocation
(agent loop, text generation, object generation, auth probe) is called
with `settingSources: []`, `skills: []`, `plugins: []`, `tools: []`, and
`persistSession: false`, by spying on the SDK entry point. The test must
fail if any path falls back to SDK defaults for those fields. The test must
also prove that non-empty host-discovered `slash_commands`, `skills`, and
`agents` in the init message do not fail the auth probe or runtime when the
controlled tool, MCP server, and plugin surfaces match KTX expectations.
10. Write a test that asserts `onStepFinish` is invoked the expected number
of times for a fixed-budget `claude-code` agent loop, including the
work-unit and reconciliation progress paths.
11. Write a test that asserts every KTX-originated `query()` invocation
(agent loop, text generation, object generation, auth probe) is called
with an explicit `env` and that none of the denylisted provider-routing
variables (`ANTHROPIC_API_KEY`, `ANTHROPIC_AUTH_TOKEN`,
`ANTHROPIC_BASE_URL`, `ANTHROPIC_MODEL`, `ANTHROPIC_VERTEX_PROJECT_ID`,
`CLOUD_ML_REGION`, `GOOGLE_APPLICATION_CREDENTIALS`,
`GOOGLE_CLOUD_PROJECT`, `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`,
`AWS_SESSION_TOKEN`, `AWS_REGION`, `AWS_PROFILE`,
`CLAUDE_CODE_USE_BEDROCK`, `CLAUDE_CODE_USE_VERTEX`) are present in
that env, by seeding each variable in a fake `process.env`. The test
must also assert that the auth probe still fails when
`ANTHROPIC_API_KEY` is set in `process.env` but no local Claude Code
session exists.

View file

@ -3,7 +3,13 @@
"workspaces": {
".": {
"entry": ["scripts/**/*.mjs"],
"project": ["scripts/**/*.mjs"]
"project": ["scripts/**/*.mjs"],
"ignoreDependencies": [
"@semantic-release/commit-analyzer",
"@semantic-release/github",
"@semantic-release/release-notes-generator",
"conventional-changelog-conventionalcommits"
]
},
"packages/cli": {
"entry": [

View file

@ -1,6 +1,6 @@
{
"name": "ktx-workspace",
"version": "0.0.0-private",
"version": "0.1.0-rc.1",
"description": "Workspace root for ktx packages",
"private": true,
"type": "module",
@ -30,10 +30,14 @@
"release:local-embeddings-smoke": "node scripts/local-embeddings-runtime-smoke.mjs --require-opt-in",
"release:npm-publish": "node scripts/publish-public-npm-package.mjs",
"release:readiness": "node scripts/release-readiness.mjs",
"release:update-version": "node scripts/update-public-release-version.mjs",
"relationships:acquire-public-fixtures": "node scripts/acquire-public-benchmark-fixtures.mjs",
"relationships:rebuild-public-snapshots": "node scripts/build-benchmark-snapshot.mjs --rebuild-all",
"relationships:build-adventureworks-oltp": "node scripts/build-adventureworks-oltp-fixture.mjs",
"relationships:verify-orbit": "node scripts/relationship-orbit-verification.mjs",
"semantic-release": "semantic-release",
"semantic-release:debug": "semantic-release --dry-run --debug",
"semantic-release:dry-run": "semantic-release --dry-run --no-ci",
"smoke": "pnpm run build && pnpm --filter @ktx/cli run smoke",
"test": "node --test scripts/*.test.mjs && pnpm --filter './packages/*' run test",
"test:coverage": "pnpm run test:coverage:ts && pnpm run test:coverage:py",
@ -44,9 +48,17 @@
},
"devDependencies": {
"@biomejs/biome": "^2.4.15",
"@semantic-release/changelog": "^6.0.3",
"@semantic-release/commit-analyzer": "^13.0.1",
"@semantic-release/exec": "^7.1.0",
"@semantic-release/git": "^10.0.1",
"@semantic-release/github": "^12.0.8",
"@semantic-release/release-notes-generator": "^14.1.1",
"@types/node": "^25.7.0",
"better-sqlite3": "^12.10.0",
"conventional-changelog-conventionalcommits": "^9.3.1",
"knip": "^6.12.2",
"semantic-release": "^25.0.3",
"typescript": "^6.0.3",
"yaml": "^2.9.0"
},

View file

@ -0,0 +1,29 @@
import type { KtxProjectLlmConfig } from '@ktx/context/project';
const CLAUDE_CODE_IGNORED_PROMPT_CACHING_FIELDS = [
'systemTtl',
'toolsTtl',
'historyTtl',
'vertexFallbackTo5m',
] as const;
export function ignoredClaudeCodePromptCachingFields(config: KtxProjectLlmConfig): string[] {
if (config.provider.backend !== 'claude-code' || !config.promptCaching) {
return [];
}
const promptCaching = config.promptCaching;
return CLAUDE_CODE_IGNORED_PROMPT_CACHING_FIELDS.filter((key) => key in promptCaching).map(
(key) => `llm.promptCaching.${key}`,
);
}
export function formatClaudeCodePromptCachingWarning(fields: string[]): string | null {
if (fields.length === 0) {
return null;
}
return `claude-code ignores ${fields.join(', ')} because the Claude Agent SDK does not expose KTX prompt-cache TTL, tool, or history markers.`;
}
export function formatClaudeCodePromptCachingFix(): string {
return 'Remove those promptCaching fields or use anthropic, vertex, or gateway when those cache knobs are required.';
}

View file

@ -29,7 +29,7 @@ function embeddingBackend(value: string): 'openai' | 'sentence-transformers' {
}
function llmBackend(value: string): KtxSetupLlmBackend {
if (value === 'anthropic' || value === 'vertex') {
if (value === 'anthropic' || value === 'vertex' || value === 'claude-code') {
return value;
}
throw new InvalidArgumentError(`invalid choice '${value}'`);
@ -97,6 +97,7 @@ function shouldShowSetupEntryMenu(
llmBackend?: KtxSetupLlmBackend;
anthropicApiKeyEnv?: string;
anthropicApiKeyFile?: string;
llmModel?: string;
anthropicModel?: string;
vertexProject?: string;
vertexLocation?: string;
@ -171,6 +172,7 @@ function shouldShowSetupEntryMenu(
'llmBackend',
'anthropicApiKeyEnv',
'anthropicApiKeyFile',
'llmModel',
'anthropicModel',
'vertexProject',
'vertexLocation',
@ -237,6 +239,7 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
.addOption(
new Option('--anthropic-api-key-file <path>', 'File containing the Anthropic API key').hideHelp(),
)
.addOption(new Option('--llm-model <model>', 'LLM model ID or backend model alias').hideHelp())
.addOption(new Option('--anthropic-model <model>', 'Anthropic model ID to validate and save').hideHelp())
.addOption(new Option('--vertex-project <project>', 'Google Vertex AI project ID, env:NAME, or file:/path').hideHelp())
.addOption(new Option('--vertex-location <location>', 'Google Vertex AI location, env:NAME, or file:/path').hideHelp())
@ -362,12 +365,21 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
context.setExitCode(1);
return;
}
if (options.llmBackend === 'vertex' && (options.anthropicApiKeyEnv || options.anthropicApiKeyFile)) {
if (options.llmModel && options.anthropicModel) {
context.io.stderr.write('Choose only one LLM model flag: --llm-model or --anthropic-model.\n');
context.setExitCode(1);
return;
}
if (
options.llmBackend &&
options.llmBackend !== 'anthropic' &&
(options.anthropicApiKeyEnv || options.anthropicApiKeyFile)
) {
context.io.stderr.write('Anthropic API key flags are only valid with --llm-backend anthropic.\n');
context.setExitCode(1);
return;
}
if (options.llmBackend === 'anthropic' && (options.vertexProject || options.vertexLocation)) {
if (options.llmBackend && options.llmBackend !== 'vertex' && (options.vertexProject || options.vertexLocation)) {
context.io.stderr.write('Vertex AI flags are only valid with --llm-backend vertex.\n');
context.setExitCode(1);
return;
@ -423,6 +435,7 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
...(options.llmBackend ? { llmBackend: options.llmBackend } : {}),
...(options.anthropicApiKeyEnv ? { anthropicApiKeyEnv: options.anthropicApiKeyEnv } : {}),
...(options.anthropicApiKeyFile ? { anthropicApiKeyFile: options.anthropicApiKeyFile } : {}),
...(options.llmModel ? { llmModel: options.llmModel } : {}),
...(options.anthropicModel ? { anthropicModel: options.anthropicModel } : {}),
...(options.vertexProject ? { vertexProject: options.vertexProject } : {}),
...(options.vertexLocation ? { vertexLocation: options.vertexLocation } : {}),

View file

@ -464,6 +464,44 @@ describe('runKtxDoctor', () => {
delete process.env.OPENAI_API_KEY;
});
it('reports Claude Code auth failures and ignored prompt-caching fields in project doctor output', async () => {
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'llm:',
' provider:',
' backend: claude-code',
' models:',
' default: sonnet',
' promptCaching:',
' enabled: true',
' systemTtl: 1h',
' toolsTtl: 1h',
' historyTtl: 5m',
'',
].join('\n'),
'utf-8',
);
const testIo = makeIo();
await expect(
runKtxDoctor(
{ command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' },
testIo.io,
{
claudeCodeAuthProbe: async () => ({
ok: false as const,
message: 'Authenticate Claude Code locally.',
}),
},
),
).resolves.toBe(1);
expect(testIo.stdout()).toContain('claude-code');
expect(testIo.stdout()).toContain('Authenticate Claude Code locally');
expect(testIo.stdout()).toContain('claude-code ignores llm.promptCaching');
});
it('includes Postgres query-history readiness in project doctor output', async () => {
process.env.ANTHROPIC_API_KEY = 'test-key'; // pragma: allowlist secret
process.env.OPENAI_API_KEY = 'test-key'; // pragma: allowlist secret

View file

@ -1074,6 +1074,41 @@ describe('runKtxCli', () => {
);
});
it('dispatches the provider-neutral LLM model setup flag to the setup runner', async () => {
const setup = vi.fn(async () => 0);
const setupIo = makeIo();
await expect(
runKtxCli(
[
'--project-dir',
tempDir,
'setup',
'--no-input',
'--llm-backend',
'claude-code',
'--llm-model',
'opus',
],
setupIo.io,
{ setup },
),
).resolves.toBe(0);
expect(setup).toHaveBeenCalledWith(
expect.objectContaining({
command: 'run',
projectDir: tempDir,
inputMode: 'disabled',
cliVersion: '0.0.0-private',
llmBackend: 'claude-code',
llmModel: 'opus',
skipLlm: false,
}),
setupIo.io,
);
});
it('rejects conflicting Anthropic credential setup flags', async () => {
const setup = vi.fn(async () => 0);
const setupIo = makeIo();

View file

@ -1,7 +1,7 @@
import { EventEmitter } from 'node:events';
import { mkdir, writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import { AgentRunnerService, type RunLoopParams } from '@ktx/context/agent';
import type { AgentRunnerPort, RunLoopParams } from '@ktx/context';
import {
KtxYamlMetabaseSourceStateReader,
LocalMetabaseDiscoveryCache,
@ -255,8 +255,8 @@ export function failedLocalBundleRun(input: RunLocalIngestOptions, jobId: string
};
}
export class CliLookerSlWritingAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async (params: RunLoopParams) => {
export class CliLookerSlWritingAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (
params.telemetryTags?.operationName === 'ingest-bundle-wu' &&
params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders'
@ -265,53 +265,39 @@ export class CliLookerSlWritingAgentRunner extends AgentRunnerService {
if (!ledger?.execute) {
throw new Error('record_verification_ledger tool was not available to the Looker WorkUnit');
}
await ledger.execute(
{
summary: 'Test fixture verified Looker explore target identifiers before writing SL.',
verifiedIdentifiers: ['prod-warehouse', 'public.orders'],
unverifiedIdentifiers: [],
},
{ toolCallId: 'cli-looker-verification-ledger', messages: [] },
);
await ledger.execute({
summary: 'Test fixture verified Looker explore target identifiers before writing SL.',
verifiedIdentifiers: ['prod-warehouse', 'public.orders'],
unverifiedIdentifiers: [],
});
const slWrite = params.toolSet.sl_write_source;
if (!slWrite?.execute) {
throw new Error('sl_write_source tool was not available to the Looker WorkUnit');
}
const result = await slWrite.execute(
{
connectionId: 'prod-warehouse',
sourceName: 'looker__ecommerce__orders',
source: {
name: 'looker__ecommerce__orders',
table: 'public.orders',
grain: ['id'],
columns: [
{ name: 'id', type: 'number' },
{ name: 'revenue', type: 'number' },
],
measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }],
},
const result = await slWrite.execute({
connectionId: 'prod-warehouse',
sourceName: 'looker__ecommerce__orders',
source: {
name: 'looker__ecommerce__orders',
table: 'public.orders',
grain: ['id'],
columns: [
{ name: 'id', type: 'number' },
{ name: 'revenue', type: 'number' },
],
measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }],
},
{ toolCallId: 'cli-looker-sl-write', messages: [] },
);
if (!result.structured.success) {
});
if (!(result.structured as { success?: boolean } | undefined)?.success) {
throw new Error(result.markdown);
}
}
return { stopReason: 'natural' as const };
});
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
}
export class CliMetabaseAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async () => ({ stopReason: 'natural' as const }));
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
export class CliMetabaseAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async () => ({ stopReason: 'natural' as const }));
}
export class CliMetabaseSourceAdapter implements SourceAdapter {

View file

@ -311,10 +311,12 @@ describe('runKtxIngest', () => {
expect(runIo.stdout()).toBe('');
expect(runIo.stderr()).toContain(
'ktx ingest requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.',
'ktx ingest requires llm.provider.backend: anthropic, vertex, gateway, or claude-code, or an injected agentRunner.',
);
expect(runIo.stderr()).toContain('Configure a local Claude Code session or API-backed LLM, then rerun ingest:');
expect(runIo.stderr()).toContain(`ktx setup --project-dir ${projectDir} --llm-backend claude-code --no-input`);
expect(runIo.stderr()).toContain(
`ktx setup --project-dir ${projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
`ktx setup --project-dir ${projectDir} --llm-backend anthropic --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
);
});

View file

@ -86,11 +86,11 @@ export interface KtxIngestDeps {
renderStoredMemoryFlow?: typeof renderMemoryFlowTui;
startLiveMemoryFlow?: typeof startLiveMemoryFlowTui;
env?: NodeJS.ProcessEnv;
localIngestOptions?: Pick<
RunLocalIngestOptions,
| 'agentRunner'
| 'llmProvider'
| 'memoryModel'
localIngestOptions?: Pick<
RunLocalIngestOptions,
| 'agentRunner'
| 'llmRuntime'
| 'memoryModel'
| 'semanticLayerCompute'
| 'queryExecutor'
| 'logger'

View file

@ -61,7 +61,12 @@ function makePromptAdapter(options: {
if (message.includes('LLM provider')) {
providerPromptCount += 1;
const nextProviderChoice = selectValues[0];
if (nextProviderChoice === 'anthropic' || nextProviderChoice === 'vertex' || nextProviderChoice === 'back') {
if (
nextProviderChoice === 'anthropic' ||
nextProviderChoice === 'vertex' ||
nextProviderChoice === 'claude-code' ||
nextProviderChoice === 'back'
) {
return selectValues.shift() ?? nextProviderChoice;
}
if (options.credentialChoice === 'back' && providerPromptCount > 1) {
@ -180,6 +185,100 @@ describe('setup Anthropic model step', () => {
);
});
it('configures Claude Code backend and validates local auth', async () => {
const io = makeIo();
const authProbe = vi.fn(async () => ({ ok: true as const }));
const result = await runKtxSetupAnthropicModelStep(
{
projectDir: tempDir,
inputMode: 'disabled',
llmBackend: 'claude-code',
skipLlm: false,
},
io.io,
{ claudeCodeAuthProbe: authProbe },
);
expect(result.status).toBe('ready');
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
expect(config.llm).toMatchObject({
provider: { backend: 'claude-code' },
models: { default: 'sonnet' },
});
expect(authProbe).toHaveBeenCalledWith(expect.objectContaining({ projectDir: tempDir, model: 'sonnet' }));
});
it('prompts for the Claude Code model during interactive setup', async () => {
const io = makeIo();
const prompts = makePromptAdapter({ selectValues: ['claude-code', 'opus'] });
const authProbe = vi.fn(async () => ({ ok: true as const }));
const result = await runKtxSetupAnthropicModelStep(
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
io.io,
{ prompts, claudeCodeAuthProbe: authProbe },
);
expect(result.status).toBe('ready');
expect(prompts.select).toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining('Which Claude Code model should KTX use?'),
options: [
{ value: 'sonnet', label: 'Claude Sonnet', hint: 'recommended' },
{ value: 'opus', label: 'Claude Opus' },
{ value: 'haiku', label: 'Claude Haiku' },
{ value: 'manual', label: 'Enter a Claude Code model ID manually' },
{ value: 'back', label: 'Back' },
],
}),
);
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
expect(config.llm).toMatchObject({
provider: { backend: 'claude-code' },
models: { default: 'opus' },
});
expect(authProbe).toHaveBeenCalledWith(expect.objectContaining({ projectDir: tempDir, model: 'opus' }));
});
it('warns during Claude Code setup when existing prompt-caching fields will be ignored', async () => {
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'llm:',
' provider:',
' backend: anthropic',
' models:',
' default: claude-sonnet-4-6',
' promptCaching:',
' enabled: true',
' systemTtl: 1h',
' toolsTtl: 1h',
' historyTtl: 5m',
'',
].join('\n'),
'utf-8',
);
const io = makeIo();
const result = await runKtxSetupAnthropicModelStep(
{
projectDir: tempDir,
inputMode: 'disabled',
llmBackend: 'claude-code',
skipLlm: false,
},
io.io,
{
claudeCodeAuthProbe: async () => ({ ok: true as const }),
},
);
expect(result.status).toBe('ready');
expect(io.stderr()).toContain('claude-code ignores llm.promptCaching.systemTtl');
expect(io.stderr()).toContain('Claude Agent SDK does not expose KTX prompt-cache TTL, tool, or history markers');
});
it('returns from Anthropic credential Back to provider selection', async () => {
const prompts = makePromptAdapter({ selectValues: ['anthropic', 'back', 'back'] });
@ -649,7 +748,7 @@ describe('setup Anthropic model step', () => {
expect(io.stderr()).not.toContain('--skip-llm');
});
it('does not recommend skipping when non-interactive setup is missing an Anthropic model', async () => {
it('does not recommend skipping when non-interactive setup is missing an LLM model', async () => {
const io = makeIo();
const healthCheck = vi.fn(async () => ({ ok: true as const }));
@ -666,7 +765,7 @@ describe('setup Anthropic model step', () => {
expect(result.status).toBe('missing-input');
expect(healthCheck).not.toHaveBeenCalled();
expect(io.stderr()).toContain('Missing Anthropic model: pass --anthropic-model.');
expect(io.stderr()).toContain('Missing LLM model: pass --llm-model.');
expect(io.stderr()).not.toContain('--skip-llm');
});

View file

@ -1,7 +1,7 @@
import { execFile } from 'node:child_process';
import { writeFile } from 'node:fs/promises';
import { promisify } from 'node:util';
import { resolveLocalKtxLlmConfig } from '@ktx/context';
import { resolveLocalKtxLlmConfig, runClaudeCodeAuthProbe } from '@ktx/context';
import { resolveKtxConfigReference } from '@ktx/context/core';
import {
type KtxProjectConfig,
@ -11,6 +11,10 @@ import {
serializeKtxProjectConfig,
} from '@ktx/context/project';
import { type KtxLlmConfig, type KtxLlmHealthCheckResult, runKtxLlmHealthCheck } from '@ktx/llm';
import {
formatClaudeCodePromptCachingWarning,
ignoredClaudeCodePromptCachingFields,
} from './claude-code-prompt-caching.js';
import { createClackSpinner, type KtxCliSpinner } from './clack.js';
import type { KtxCliIo } from './cli-runtime.js';
import { withTextInputNavigation } from './prompt-navigation.js';
@ -32,6 +36,7 @@ export interface KtxSetupModelArgs {
llmBackend?: KtxSetupLlmBackend;
anthropicApiKeyEnv?: string;
anthropicApiKeyFile?: string;
llmModel?: string;
anthropicModel?: string;
vertexProject?: string;
vertexLocation?: string;
@ -53,7 +58,7 @@ export interface AnthropicModelChoice {
recommended: boolean;
}
export type KtxSetupLlmBackend = 'anthropic' | 'vertex';
export type KtxSetupLlmBackend = 'anthropic' | 'vertex' | 'claude-code';
export interface KtxSetupModelPromptAdapter {
select(options: { message: string; options: KtxSetupPromptOption[] }): Promise<string>;
@ -68,6 +73,11 @@ export interface KtxSetupModelDeps {
prompts?: KtxSetupModelPromptAdapter;
listModels?: (apiKey: string) => Promise<AnthropicModelChoice[]>;
healthCheck?: (config: KtxLlmConfig) => Promise<KtxLlmHealthCheckResult>;
claudeCodeAuthProbe?: (input: {
projectDir: string;
model: string;
env?: NodeJS.ProcessEnv;
}) => Promise<{ ok: true } | { ok: false; message: string }>;
readGcloudProject?: () => Promise<string | undefined>;
listGcloudProjects?: () => Promise<GcloudProjectChoice[]>;
spinner?: () => KtxCliSpinner;
@ -91,6 +101,12 @@ const VERTEX_ANTHROPIC_MODELS: AnthropicModelChoice[] = [
{ id: 'claude-opus-4-1', label: 'Claude Opus 4.1', recommended: false },
];
const CLAUDE_CODE_MODELS: AnthropicModelChoice[] = [
{ id: 'sonnet', label: 'Claude Sonnet', recommended: true },
{ id: 'opus', label: 'Claude Opus', recommended: false },
{ id: 'haiku', label: 'Claude Haiku', recommended: false },
];
const HIDDEN_ANTHROPIC_MODEL_PATTERNS = [
/^claude-sonnet-4$/i,
/^claude-opus-4$/i,
@ -252,7 +268,7 @@ export function isKtxSetupLlmConfigReady(config: KtxProjectLlmConfig): boolean {
return typeof resolved.vertex?.location === 'string' && resolved.vertex.location.trim().length > 0;
}
return resolved.backend === 'anthropic' || resolved.backend === 'gateway';
return resolved.backend === 'anthropic' || resolved.backend === 'gateway' || resolved.backend === 'claude-code';
}
function hasUsableConfiguredLlm(config: KtxProjectConfig): boolean {
@ -263,9 +279,18 @@ function buildProjectLlmConfig(
existing: KtxProjectLlmConfig,
provider:
| { backend: 'anthropic'; credentialRef: string }
| { backend: 'vertex'; vertex: { project?: string; location: string } },
| { backend: 'vertex'; vertex: { project?: string; location: string } }
| { backend: 'claude-code' },
model: string,
): KtxProjectLlmConfig {
if (provider.backend === 'claude-code') {
return {
provider: { backend: 'claude-code' },
models: { ...existing.models, default: model },
promptCaching: existing.promptCaching,
};
}
if (provider.backend === 'vertex') {
return {
provider: {
@ -453,12 +478,16 @@ function requestedBackend(args: KtxSetupModelArgs): KtxSetupLlmBackend | undefin
if (args.vertexProject || args.vertexLocation) {
return 'vertex';
}
if (args.anthropicApiKeyEnv || args.anthropicApiKeyFile || args.anthropicModel) {
if (args.anthropicApiKeyEnv || args.anthropicApiKeyFile || args.llmModel || args.anthropicModel) {
return 'anthropic';
}
return undefined;
}
function requestedModel(args: KtxSetupModelArgs): string | undefined {
return args.llmModel ?? args.anthropicModel;
}
async function chooseBackend(
args: KtxSetupModelArgs,
io: KtxCliIo,
@ -480,16 +509,21 @@ async function chooseBackend(
}
const choice = await prompts.select({
message: 'Which LLM provider should KTX use?',
options: [
{ value: 'anthropic', label: 'Anthropic API' },
{ value: 'vertex', label: 'Google Vertex AI for Anthropic Claude' },
{ value: 'back', label: 'Back' },
],
options: [
{ value: 'anthropic', label: 'Anthropic API' },
{ value: 'vertex', label: 'Google Vertex AI for Anthropic Claude' },
{ value: 'claude-code', label: 'Local Claude Code session' },
{ value: 'back', label: 'Back' },
],
});
if (choice === 'back') {
return { status: 'back' };
}
return { status: 'ready', backend: choice === 'vertex' ? 'vertex' : 'anthropic', prompted: true };
return {
status: 'ready',
backend: choice === 'vertex' || choice === 'claude-code' ? choice : 'anthropic',
prompted: true,
};
}
function resolveProvidedVertexRef(
@ -708,11 +742,12 @@ async function chooseModel(
io: KtxCliIo,
deps: KtxSetupModelDeps,
): Promise<ChooseModelResult> {
if (args.anthropicModel) {
return { status: 'ready', model: args.anthropicModel };
const providedModel = requestedModel(args);
if (providedModel) {
return { status: 'ready', model: providedModel };
}
if (args.inputMode === 'disabled') {
io.stderr.write('Missing Anthropic model: pass --anthropic-model.\n');
io.stderr.write('Missing LLM model: pass --llm-model.\n');
return { status: 'missing-input' };
}
@ -765,11 +800,12 @@ async function chooseModel(
}
async function chooseVertexModel(args: KtxSetupModelArgs, io: KtxCliIo, deps: KtxSetupModelDeps): Promise<ChooseModelResult> {
if (args.anthropicModel) {
return { status: 'ready', model: args.anthropicModel };
const providedModel = requestedModel(args);
if (providedModel) {
return { status: 'ready', model: providedModel };
}
if (args.inputMode === 'disabled') {
io.stderr.write('Missing Anthropic model: pass --anthropic-model.\n');
io.stderr.write('Missing LLM model: pass --llm-model.\n');
return { status: 'missing-input' };
}
@ -803,11 +839,50 @@ async function chooseVertexModel(args: KtxSetupModelArgs, io: KtxCliIo, deps: Kt
return { status: 'ready', model: choice };
}
async function chooseClaudeCodeModel(args: KtxSetupModelArgs, deps: KtxSetupModelDeps): Promise<ChooseModelResult> {
const providedModel = requestedModel(args);
if (providedModel) {
return { status: 'ready', model: providedModel };
}
if (args.inputMode === 'disabled') {
return { status: 'ready', model: 'sonnet' };
}
const prompts = deps.prompts ?? createPromptAdapter();
const choice = await prompts.select({
message: `Which Claude Code model should KTX use?\n\n${ANTHROPIC_MODEL_PROMPT_CONTEXT}`,
options: [
...CLAUDE_CODE_MODELS.map((model) => ({
value: model.id,
label: model.label,
...(model.recommended ? { hint: 'recommended' } : {}),
})),
{ value: 'manual', label: 'Enter a Claude Code model ID manually' },
{ value: 'back', label: 'Back' },
],
});
if (choice === 'back') {
return { status: 'back' };
}
if (choice === 'manual') {
const manual = await prompts.text({
message: withTextInputNavigation('Claude Code model ID'),
placeholder: CLAUDE_CODE_MODELS.find((model) => model.recommended)?.id ?? CLAUDE_CODE_MODELS[0]?.id,
});
if (manual === undefined) {
return { status: 'back' };
}
return manual.trim() ? { status: 'ready', model: manual.trim() } : { status: 'missing-input' };
}
return { status: 'ready', model: choice };
}
async function persistLlmConfig(
projectDir: string,
provider:
| { backend: 'anthropic'; credentialRef: string }
| { backend: 'vertex'; vertex: { project?: string; location: string } },
| { backend: 'vertex'; vertex: { project?: string; location: string } }
| { backend: 'claude-code' },
model: string,
): Promise<void> {
const project = await loadKtxProject({ projectDir });
@ -853,6 +928,7 @@ export async function runKtxSetupAnthropicModelStep(
!args.llmBackend &&
!args.anthropicApiKeyEnv &&
!args.anthropicApiKeyFile &&
!args.llmModel &&
!args.anthropicModel &&
!args.vertexProject &&
!args.vertexLocation
@ -918,6 +994,37 @@ export async function runKtxSetupAnthropicModelStep(
continue;
}
if (backendChoice.backend === 'claude-code') {
const model = await chooseClaudeCodeModel(backendArgs, deps);
if (model.status === 'back' && backendChoice.prompted) {
attemptArgs = buildInteractiveRetryArgs(args);
continue;
}
if (model.status === 'invalid-credential') {
return { status: 'failed', projectDir: args.projectDir };
}
if (model.status !== 'ready') {
return { status: model.status, projectDir: args.projectDir };
}
const probe = deps.claudeCodeAuthProbe ?? runClaudeCodeAuthProbe;
const health = await probe({ projectDir: args.projectDir, model: model.model, env: deps.env ?? process.env });
if (!health.ok) {
io.stderr.write(`${health.message}\n`);
return { status: 'failed', projectDir: args.projectDir };
}
const warning = formatClaudeCodePromptCachingWarning(
ignoredClaudeCodePromptCachingFields(
buildProjectLlmConfig(project.config.llm, { backend: 'claude-code' }, model.model),
),
);
if (warning) {
io.stderr.write(`${warning}\n`);
}
await persistLlmConfig(args.projectDir, { backend: 'claude-code' }, model.model);
io.stdout.write(`│ LLM ready: yes (${model.model})\n`);
return { status: 'ready', projectDir: args.projectDir };
}
const credential = await chooseCredentialRef(backendArgs, io, deps);
if (credential.status === 'back' && backendChoice.prompted) {
attemptArgs = buildInteractiveRetryArgs(args);

View file

@ -77,6 +77,7 @@ export type KtxSetupArgs =
llmBackend?: KtxSetupLlmBackend;
anthropicApiKeyEnv?: string;
anthropicApiKeyFile?: string;
llmModel?: string;
anthropicModel?: string;
vertexProject?: string;
vertexLocation?: string;
@ -547,6 +548,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
...(args.llmBackend ? { llmBackend: args.llmBackend } : {}),
...(args.anthropicApiKeyEnv ? { anthropicApiKeyEnv: args.anthropicApiKeyEnv } : {}),
...(args.anthropicApiKeyFile ? { anthropicApiKeyFile: args.anthropicApiKeyFile } : {}),
...(args.llmModel ? { llmModel: args.llmModel } : {}),
...(args.anthropicModel ? { anthropicModel: args.anthropicModel } : {}),
...(args.vertexProject ? { vertexProject: args.vertexProject } : {}),
...(args.vertexLocation ? { vertexLocation: args.vertexLocation } : {}),

View file

@ -1,4 +1,5 @@
import { basename } from 'node:path';
import { runClaudeCodeAuthProbe } from '@ktx/context';
import type {
KtxConfigIssue,
KtxLocalProject,
@ -8,6 +9,11 @@ import type {
KtxProjectLlmConfig,
} from '@ktx/context/project';
import type { PostgresPgssProbeResult } from '@ktx/context/ingest';
import {
formatClaudeCodePromptCachingFix,
formatClaudeCodePromptCachingWarning,
ignoredClaudeCodePromptCachingFields,
} from './claude-code-prompt-caching.js';
import type { DoctorCheck } from './doctor.js';
import {
bold as _bold,
@ -77,6 +83,12 @@ interface WarningItem {
fix?: string;
}
type ClaudeCodeAuthProbe = (input: {
projectDir: string;
model: string;
env?: NodeJS.ProcessEnv;
}) => Promise<{ ok: true } | { ok: false; message: string }>;
const PROJECT_READY_COMMANDS = KTX_NEXT_STEP_DIRECT_COMMANDS.map((step) => step.command);
function isRecord(value: unknown): value is Record<string, unknown> {
@ -134,7 +146,15 @@ function envHint(value: unknown): string | undefined {
return undefined;
}
function buildLlmStatus(config: KtxProjectLlmConfig, env: NodeJS.ProcessEnv): LlmStatus {
async function buildLlmStatus(
config: KtxProjectLlmConfig,
options: {
projectDir: string;
env: NodeJS.ProcessEnv;
claudeCodeAuthProbe?: ClaudeCodeAuthProbe;
},
): Promise<LlmStatus> {
const env = options.env;
const backend = config.provider.backend;
const model = config.models?.default;
if (backend === 'none') {
@ -186,6 +206,26 @@ function buildLlmStatus(config: KtxProjectLlmConfig, env: NodeJS.ProcessEnv): Ll
fix: hint ? `Set ${hint}` : 'Set the gateway api_key or rerun `ktx setup`',
};
}
if (backend === 'claude-code') {
const modelName = model ?? 'sonnet';
const probe = options.claudeCodeAuthProbe ?? runClaudeCodeAuthProbe;
const auth = await probe({ projectDir: options.projectDir, model: modelName, env });
if (auth.ok) {
return {
backend,
model: modelName,
status: 'ok',
detail: 'local Claude Code session authenticated',
};
}
return {
backend,
model: modelName,
status: 'fail',
detail: auth.message,
fix: 'Authenticate Claude Code locally with the Claude Code CLI, then rerun `ktx status`.',
};
}
return { backend, model, status: 'warn', detail: 'unknown LLM backend' };
}
@ -568,6 +608,14 @@ function buildWarnings(
});
}
const warning = formatClaudeCodePromptCachingWarning(ignoredClaudeCodePromptCachingFields(config.llm));
if (warning) {
warnings.push({
message: warning,
fix: formatClaudeCodePromptCachingFix(),
});
}
return warnings;
}
@ -629,6 +677,7 @@ function buildVerdict(
export interface BuildProjectStatusOptions {
env?: NodeJS.ProcessEnv;
postgresQueryHistoryProbe?: PostgresQueryHistoryProbe;
claudeCodeAuthProbe?: ClaudeCodeAuthProbe;
configIssues?: KtxConfigIssue[];
}
@ -649,7 +698,11 @@ export async function buildProjectStatus(project: KtxLocalProject, options: Buil
const config = project.config;
const configStatus = buildConfigStatus(options.configIssues);
const llm = buildLlmStatus(config.llm, env);
const llm = await buildLlmStatus(config.llm, {
projectDir: project.projectDir,
env,
claudeCodeAuthProbe: options.claudeCodeAuthProbe,
});
const embeddings = buildEmbeddingsStatus(config.ingest.embeddings, env);
const storage = buildStorageStatus(config);
const connections = Object.entries(config.connections).map(([name, conn]) =>

View file

@ -129,6 +129,7 @@
"type-check": "tsc -p tsconfig.json --noEmit"
},
"dependencies": {
"@anthropic-ai/claude-agent-sdk": "0.3.142",
"@ktx/llm": "workspace:*",
"@looker/sdk": "^26.8.0",
"@looker/sdk-node": "^26.8.0",

View file

@ -55,7 +55,14 @@ describe('AgentRunnerService.runLoop', () => {
expect(call.system).toEqual({ role: 'system', content: 'SYS' });
expect(call.messages).toEqual([{ role: 'user', content: 'USR' }]);
expect(call.prompt).toBeUndefined();
expect(call.tools).toEqual(tools);
expect(call.tools.noop).toEqual(
expect.objectContaining({
description: 'noop',
inputSchema: {},
execute: expect.any(Function),
toModelOutput: expect.any(Function),
}),
);
expect(call.stopWhen).toBe(17);
expect(call.temperature).toBe(0);
expect(call.experimental_repairToolCall).toBe(repairHandler);

View file

@ -1,33 +1,15 @@
import { KtxMessageBuilder, splitKtxSystemMessages, type KtxLlmProvider, type KtxModelRole } from '@ktx/llm';
import { generateText, stepCountIs, type TelemetrySettings, type Tool } from 'ai';
import { noopLogger, type KtxLogger } from '../core/index.js';
import { summarizeKtxLlmDebugRequest, type KtxLlmDebugRequestRecorder } from '../llm/index.js';
export type RunLoopStopReason = 'budget' | 'natural' | 'error';
export interface RunLoopStepInfo {
stepIndex: number;
stepBudget: number;
}
export interface RunLoopParams {
modelRole: KtxModelRole;
systemPrompt: string;
userPrompt: string;
toolSet: Record<string, Tool>;
stepBudget: number;
telemetryTags: Record<string, string>;
onStepFinish?: (info: RunLoopStepInfo) => void | Promise<void>;
}
export interface RunLoopResult {
stopReason: RunLoopStopReason;
error?: Error;
}
export interface AgentTelemetryPort {
createTelemetry(tags: Record<string, string>): TelemetrySettings;
}
import type { KtxLlmProvider } from '@ktx/llm';
import type { KtxLogger } from '../core/index.js';
import { AiSdkKtxLlmRuntime, type AgentTelemetryPort } from '../llm/ai-sdk-runtime.js';
import type { KtxLlmDebugRequestRecorder } from '../llm/debug-request-recorder.js';
import type { AgentRunnerPort, RunLoopParams, RunLoopResult } from '../llm/runtime-port.js';
export type {
RunLoopParams,
RunLoopResult,
RunLoopStepInfo,
RunLoopStopReason,
} from '../llm/runtime-port.js';
export type { AgentTelemetryPort } from '../llm/ai-sdk-runtime.js';
export interface AgentRunnerServiceDeps {
llmProvider: KtxLlmProvider;
@ -36,71 +18,14 @@ export interface AgentRunnerServiceDeps {
logger?: KtxLogger;
}
export class AgentRunnerService {
private readonly logger: KtxLogger;
export class AgentRunnerService implements AgentRunnerPort {
private readonly runtime: AiSdkKtxLlmRuntime;
constructor(private readonly deps: AgentRunnerServiceDeps) {
this.logger = deps.logger ?? noopLogger;
constructor(deps: AgentRunnerServiceDeps) {
this.runtime = new AiSdkKtxLlmRuntime(deps);
}
async runLoop(params: RunLoopParams): Promise<RunLoopResult> {
let stepIndex = 0;
try {
const model = this.deps.llmProvider.getModel(params.modelRole);
const builder = new KtxMessageBuilder(this.deps.llmProvider);
const built = builder.wrapSimple({
system: params.systemPrompt,
messages: [{ role: 'user', content: params.userPrompt }],
tools: params.toolSet,
model,
});
const promptMessages = splitKtxSystemMessages(built.messages);
await this.deps.debugRequestRecorder?.record(
summarizeKtxLlmDebugRequest({
operationName: params.telemetryTags.operationName ?? 'ktx-agent-runner',
source: params.telemetryTags.source,
jobId: params.telemetryTags.jobId,
unitKey: params.telemetryTags.unitKey,
modelRole: params.modelRole,
modelId: (model as { modelId?: string }).modelId ?? params.modelRole,
messages: built.messages,
tools: built.tools as Record<string, { providerOptions?: unknown }>,
}),
);
await generateText({
model,
temperature: 0,
stopWhen: stepCountIs(params.stepBudget),
experimental_telemetry: this.deps.telemetry?.createTelemetry(params.telemetryTags),
experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({
source: params.telemetryTags.operationName ?? 'ktx-agent-runner',
}),
...(promptMessages.system ? { system: promptMessages.system } : {}),
messages: promptMessages.messages,
tools: built.tools as Record<string, Tool>,
onStepFinish: async () => {
stepIndex += 1;
if (!params.onStepFinish) {
return;
}
try {
await params.onStepFinish({ stepIndex, stepBudget: params.stepBudget });
} catch (err) {
this.logger.warn(
`[agent-runner] onStepFinish callback threw; ignoring: ${
err instanceof Error ? err.message : String(err)
}`,
);
}
},
});
return { stopReason: 'natural' };
} catch (error) {
const err = error instanceof Error ? error : new Error(String(error));
this.logger.warn(`[agent-runner] loop failed: ${err.message}`);
return { stopReason: 'error', error: err };
}
runLoop(params: RunLoopParams): Promise<RunLoopResult> {
return this.runtime.runAgentLoop(params);
}
}

View file

@ -1,4 +1,4 @@
import { mkdtemp, realpath, rm, writeFile } from 'node:fs/promises';
import { mkdtemp, readFile, realpath, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
@ -52,6 +52,13 @@ describe('GitService', () => {
const after = await service.revParseHead();
expect(after).toBe(before);
});
it('keeps git auto-maintenance attached for deterministic cleanup', async () => {
const config = await readFile(join(tempDir, '.git', 'config'), 'utf-8');
expect(config).toMatch(/\[gc]\n\s+autoDetach = false/);
expect(config).toMatch(/\[maintenance]\n\s+autoDetach = false/);
});
});
describe('commitFile `created` flag', () => {

View file

@ -105,6 +105,12 @@ export class GitService {
this.logger.log('Initialized git repository');
}
// Keep any auto-maintenance triggered by writes in-process. Detached maintenance can
// keep object-pack directories alive briefly after awaited git commands complete,
// which makes temp-project cleanup flaky in CI.
await this.git.addConfig('gc.autoDetach', 'false');
await this.git.addConfig('maintenance.autoDetach', 'false');
// Ensure HEAD always resolves to a commit so callers (e.g., the memory-agent squash flow)
// can rely on `revParseHead()` returning a SHA. Idempotent: skip if HEAD already exists.
const head = await this.revParseHead();

View file

@ -2,7 +2,7 @@ import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import YAML from 'yaml';
import { AgentRunnerService } from '../../../agent/index.js';
import type { AgentRunnerPort, RunLoopParams } from '../../../llm/index.js';
import { initKtxProject, loadKtxProject, type KtxLocalProject } from '../../../project/index.js';
import {
type SqlAnalysisBatchItem,
@ -47,8 +47,8 @@ class AcceptanceHistoricSqlReader implements HistoricSqlReader {
}
}
class HistoricSqlAcceptanceAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async (params: any) => {
class HistoricSqlAcceptanceAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (params.telemetryTags?.operationName !== 'ingest-bundle-wu') {
return { stopReason: 'natural' as const };
}
@ -59,78 +59,65 @@ class HistoricSqlAcceptanceAgentRunner extends AgentRunnerService {
}
if (params.telemetryTags.unitKey === 'historic-sql-table-public-orders') {
const result = await emitEvidence.execute(
{
kind: 'table_usage',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Analysts repeatedly inspect paid order lifecycle by customer segment.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['status', 'segment'],
commonJoins: [{ table: 'public.customers', on: ['customer_id', 'id'] }],
staleSince: null,
},
const result = await emitEvidence.execute({
kind: 'table_usage',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Analysts repeatedly inspect paid order lifecycle by customer segment.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['status', 'segment'],
commonJoins: [{ table: 'public.customers', on: ['customer_id', 'id'] }],
staleSince: null,
},
{ toolCallId: 'historic-sql-orders-usage' },
);
if (!String(result).includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected orders evidence result: ${String(result)}`);
});
if (!result.markdown.includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected orders evidence result: ${result.markdown}`);
}
}
if (params.telemetryTags.unitKey === 'historic-sql-table-public-customers') {
const result = await emitEvidence.execute(
{
kind: 'table_usage',
table: 'public.customers',
rawPath: 'tables/public.customers.json',
usage: {
narrative: 'Customers provide segment context for paid order lifecycle analysis.',
frequencyTier: 'mid',
commonFilters: [],
commonGroupBys: ['segment'],
commonJoins: [{ table: 'public.orders', on: ['id', 'customer_id'] }],
staleSince: null,
},
const result = await emitEvidence.execute({
kind: 'table_usage',
table: 'public.customers',
rawPath: 'tables/public.customers.json',
usage: {
narrative: 'Customers provide segment context for paid order lifecycle analysis.',
frequencyTier: 'mid',
commonFilters: [],
commonGroupBys: ['segment'],
commonJoins: [{ table: 'public.orders', on: ['id', 'customer_id'] }],
staleSince: null,
},
{ toolCallId: 'historic-sql-customers-usage' },
);
if (!String(result).includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected customers evidence result: ${String(result)}`);
});
if (!result.markdown.includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected customers evidence result: ${result.markdown}`);
}
}
if (params.telemetryTags.unitKey === 'historic-sql-patterns-part-0001') {
const result = await emitEvidence.execute(
{
kind: 'pattern',
rawPath: 'patterns-input/part-0001.json',
pattern: {
slug: 'paid-order-lifecycle',
title: 'Paid Order Lifecycle',
narrative: 'Analysts join orders and customers to compare paid order lifecycle by segment.',
definitionSql:
'select o.status, c.segment, count(*) from public.orders o join public.customers c on c.id = o.customer_id group by o.status, c.segment',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['pg:orders-lifecycle'],
},
const result = await emitEvidence.execute({
kind: 'pattern',
rawPath: 'patterns-input/part-0001.json',
pattern: {
slug: 'paid-order-lifecycle',
title: 'Paid Order Lifecycle',
narrative: 'Analysts join orders and customers to compare paid order lifecycle by segment.',
definitionSql:
'select o.status, c.segment, count(*) from public.orders o join public.customers c on c.id = o.customer_id group by o.status, c.segment',
tablesInvolved: ['public.orders', 'public.customers'],
slRefs: ['orders', 'customers'],
constituentTemplateIds: ['pg:orders-lifecycle'],
},
{ toolCallId: 'historic-sql-pattern' },
);
if (!String(result).includes('Recorded historic-SQL pattern evidence')) {
throw new Error(`Unexpected pattern evidence result: ${String(result)}`);
});
if (!result.markdown.includes('Recorded historic-SQL pattern evidence')) {
throw new Error(`Unexpected pattern evidence result: ${result.markdown}`);
}
}
return { stopReason: 'natural' as const };
});
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
}
function acceptanceSqlAnalysis(): SqlAnalysisPort {

View file

@ -1,7 +1,6 @@
import type { KtxModelRole } from '@ktx/llm';
import type { ToolSet } from 'ai';
import type { AgentRunnerService } from '../../agent/index.js';
import { type KtxLogger, noopLogger } from '../../core/index.js';
import type { AgentRunnerPort, KtxRuntimeToolSet } from '../../llm/index.js';
import type { MemoryAction } from '../../memory/index.js';
import type { ContextCandidateForDedup, CuratorPaginationPort, CuratorPaginationReport } from '../ports.js';
import type {
@ -38,7 +37,7 @@ export interface CuratorPaginationInput {
modelRole: KtxModelRole;
buildSystemPrompt: () => string;
buildUserPrompt: (input: CuratorPaginationPromptInput) => string;
buildToolSet: (passNumber: number) => ToolSet;
buildToolSet: (passNumber: number) => KtxRuntimeToolSet;
getReconciliationActions: () => MemoryAction[];
onStepFinish?: (info: { passNumber: number; stepIndex: number; stepBudget: number }) => void;
}
@ -50,7 +49,7 @@ interface CuratorPaginationResult extends ReconciliationOutcome {
export interface CuratorPaginationServiceDeps {
store: ContextCandidateStorePort;
agentRunner: AgentRunnerService;
agentRunner: AgentRunnerPort;
settings: CuratorPaginationSettings;
logger?: KtxLogger;
}

View file

@ -200,7 +200,7 @@ const makeDeps = () => {
const slValidator = { validateSingleSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }) };
const toolsetFactory = {
createIngestWuToolset: vi.fn().mockReturnValue({
toAiSdkTools: vi.fn().mockReturnValue({}),
toRuntimeTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
}),
@ -419,7 +419,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
sessions.push(toolSession);
return {
toAiSdkTools: vi.fn().mockReturnValue({}),
toRuntimeTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
};
@ -591,7 +591,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
currentToolSession = toolSession;
return {
toAiSdkTools: vi.fn().mockReturnValue({}),
toRuntimeTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
};
@ -663,7 +663,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
currentToolSession = toolSession;
return {
toAiSdkTools: vi.fn().mockReturnValue({}),
toRuntimeTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
};
@ -834,7 +834,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
it('stores memory-flow provenance and transcript summaries in the ingest report body', async () => {
const deps = makeDeps();
deps.toolsetFactory.createIngestWuToolset.mockReturnValue({
toAiSdkTools: vi.fn().mockReturnValue({
toRuntimeTools: vi.fn().mockReturnValue({
read_raw_span: {
description: 'read a raw span',
inputSchema: {},
@ -1376,7 +1376,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
currentToolSession = toolSession;
return {
toAiSdkTools: vi.fn().mockReturnValue({}),
toRuntimeTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
};
@ -1933,7 +1933,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
currentToolSession = toolSession;
return {
toAiSdkTools: vi.fn().mockReturnValue({}),
toRuntimeTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
};

View file

@ -1,9 +1,9 @@
import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
import { dirname, join } from 'node:path';
import { type Tool, tool } from 'ai';
import pLimit from 'p-limit';
import { z } from 'zod';
import { type KtxLogger, noopLogger } from '../core/index.js';
import { createRuntimeToolDescriptorFromAiTool, type KtxRuntimeToolSet } from '../llm/index.js';
import type { CaptureSession, MemoryAction } from '../memory/index.js';
import type { SemanticLayerService, SemanticLayerSource, SlValidationDeps } from '../sl/index.js';
import { createTouchedSlSources, type ToolContext, type ToolSession } from '../tools/index.js';
@ -694,8 +694,9 @@ export class IngestBundleRunner {
};
const skillsLoadedPerWu: string[] = [];
const loadSkillTool: Record<string, Tool> = {
load_skill: tool({
const loadSkillTool: KtxRuntimeToolSet = {
load_skill: {
name: 'load_skill',
description:
'Load a skill to get specialized instructions. Call this when a skill listed in the system prompt matches the current task.',
inputSchema: z.object({ name: z.string() }),
@ -705,19 +706,23 @@ export class IngestBundleRunner {
const available =
(await this.deps.skillsRegistry.listSkills('memory_agent')).map((s) => s.name).join(', ') ||
'(none)';
return `Skill "${name}" not available. Available: ${available}`;
return { markdown: `Skill "${name}" not available. Available: ${available}` };
}
const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8');
if (!skillsLoadedPerWu.includes(skill.name)) {
skillsLoadedPerWu.push(skill.name);
}
return {
const structured = {
name: skill.name,
skillDirectory: skill.path,
content: this.deps.skillsRegistry.stripFrontmatter(body),
};
return {
markdown: `# ${structured.name}\n\n${structured.content}`,
structured,
};
},
}),
},
};
const priorProvenance = await this.deps.provenance.findLatestArtifactsForRawPaths(
@ -726,12 +731,15 @@ export class IngestBundleRunner {
wu.rawFiles,
);
const wuEmitUnmappedFallbackTool = {
emit_unmapped_fallback: createEmitUnmappedFallbackTool({
stageIndex,
allowedPaths: new Set(wu.rawFiles),
tableRefExists: (tableRef) =>
this.tableRefExistsInSemanticLayer(scopedSemanticLayerService, slConnectionIds, tableRef),
}),
emit_unmapped_fallback: createRuntimeToolDescriptorFromAiTool(
'emit_unmapped_fallback',
createEmitUnmappedFallbackTool({
stageIndex,
allowedPaths: new Set(wu.rawFiles),
tableRefExists: (tableRef) =>
this.tableRefExistsInSemanticLayer(scopedSemanticLayerService, slConnectionIds, tableRef),
}),
),
};
const systemPrompt = buildWuSystemPrompt({
@ -765,7 +773,7 @@ export class IngestBundleRunner {
wu: wuInner,
loadSkillTool,
emitUnmappedFallbackTool: wuEmitUnmappedFallbackTool,
toolsetTools: wuToolset.toAiSdkTools(wuToolContext),
toolsetTools: wuToolset.toRuntimeTools(wuToolContext),
}),
join(transcriptDir, `${wuInner.unitKey}.jsonl`),
wuInner.unitKey,
@ -921,53 +929,79 @@ export class IngestBundleRunner {
ingest: ingestToolMetadata,
session: rcToolSession,
};
const rcLoadSkill: Record<string, Tool> = {
load_skill: tool({
const rcLoadSkill: KtxRuntimeToolSet = {
load_skill: {
name: 'load_skill',
description: 'Load a skill.',
inputSchema: z.object({ name: z.string() }),
execute: async ({ name }) => {
const skill = await this.deps.skillsRegistry.getSkill(name, 'memory_agent');
if (!skill) {
return `Skill "${name}" not found`;
return { markdown: `Skill "${name}" not found` };
}
const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8');
return { name: skill.name, content: this.deps.skillsRegistry.stripFrontmatter(body) };
const structured = { name: skill.name, content: this.deps.skillsRegistry.stripFrontmatter(body) };
return { markdown: `# ${structured.name}\n\n${structured.content}`, structured };
},
}),
},
};
const allStagedPaths = new Set<string>([...currentHashes.keys()]);
const rcRawSpanTool = { read_raw_span: createReadRawSpanTool({ stagedDir, allowedPaths: allStagedPaths }) };
const rcStageListTool = { stage_list: createStageListTool({ stageIndex }) };
const rcStageDiffTool = { stage_diff: createStageDiffTool({ stageIndex }) };
const rcRawSpanTool = {
read_raw_span: createRuntimeToolDescriptorFromAiTool(
'read_raw_span',
createReadRawSpanTool({ stagedDir, allowedPaths: allStagedPaths }),
),
};
const rcStageListTool = {
stage_list: createRuntimeToolDescriptorFromAiTool('stage_list', createStageListTool({ stageIndex })),
};
const rcStageDiffTool = {
stage_diff: createRuntimeToolDescriptorFromAiTool('stage_diff', createStageDiffTool({ stageIndex })),
};
const rcEvictionListTool = {
eviction_list: createEvictionListTool({
provenance: this.deps.provenance,
connectionId: job.connectionId,
sourceKey: job.sourceKey,
deletedRawPaths: eviction?.deletedRawPaths ?? [],
}),
eviction_list: createRuntimeToolDescriptorFromAiTool(
'eviction_list',
createEvictionListTool({
provenance: this.deps.provenance,
connectionId: job.connectionId,
sourceKey: job.sourceKey,
deletedRawPaths: eviction?.deletedRawPaths ?? [],
}),
),
};
const rcEmitConflictResolutionTool = {
emit_conflict_resolution: createEmitConflictResolutionTool({ stageIndex }),
emit_conflict_resolution: createRuntimeToolDescriptorFromAiTool(
'emit_conflict_resolution',
createEmitConflictResolutionTool({ stageIndex }),
),
};
const rcEmitEvictionDecisionTool = {
emit_eviction_decision: createEmitEvictionDecisionTool({
stageIndex,
deletedRawPaths: eviction?.deletedRawPaths ?? [],
}),
emit_eviction_decision: createRuntimeToolDescriptorFromAiTool(
'emit_eviction_decision',
createEmitEvictionDecisionTool({
stageIndex,
deletedRawPaths: eviction?.deletedRawPaths ?? [],
}),
),
};
const rcEmitArtifactResolutionTool = {
emit_artifact_resolution: createEmitArtifactResolutionTool({
stageIndex,
allowedPaths: allStagedPaths,
}),
emit_artifact_resolution: createRuntimeToolDescriptorFromAiTool(
'emit_artifact_resolution',
createEmitArtifactResolutionTool({
stageIndex,
allowedPaths: allStagedPaths,
}),
),
};
const rcEmitUnmappedFallbackTool = {
emit_unmapped_fallback: createEmitUnmappedFallbackTool({
stageIndex,
allowedPaths: allStagedPaths,
tableRefExists: (tableRef) => this.tableRefExistsInSemanticLayer(rcScopedSl, slConnectionIds, tableRef),
}),
emit_unmapped_fallback: createRuntimeToolDescriptorFromAiTool(
'emit_unmapped_fallback',
createEmitUnmappedFallbackTool({
stageIndex,
allowedPaths: allStagedPaths,
tableRefExists: (tableRef) => this.tableRefExistsInSemanticLayer(rcScopedSl, slConnectionIds, tableRef),
}),
),
};
const reconcileBaseFraming = await this.deps.promptService.loadPrompt('memory_agent_bundle_ingest_reconcile');
@ -1026,7 +1060,7 @@ export class IngestBundleRunner {
emitArtifactResolutionTool: rcEmitArtifactResolutionTool,
emitUnmappedFallbackTool: rcEmitUnmappedFallbackTool,
readRawSpanTool: rcRawSpanTool,
toolsetTools: rcToolset.toAiSdkTools(rcToolContext),
toolsetTools: rcToolset.toRuntimeTools(rcToolContext),
}),
join(transcriptDir, 'reconcile.jsonl'),
'reconcile',
@ -1075,7 +1109,7 @@ export class IngestBundleRunner {
emitArtifactResolutionTool: rcEmitArtifactResolutionTool,
emitUnmappedFallbackTool: rcEmitUnmappedFallbackTool,
readRawSpanTool: rcRawSpanTool,
toolsetTools: rcToolset.toAiSdkTools(rcToolContext),
toolsetTools: rcToolset.toRuntimeTools(rcToolContext),
}),
join(transcriptDir, 'reconcile.jsonl'),
'reconcile',

View file

@ -3,7 +3,7 @@ import { tmpdir } from 'node:os';
import { join } from 'node:path';
import Database from 'better-sqlite3';
import YAML from 'yaml';
import { AgentRunnerService } from '../agent/index.js';
import type { AgentRunnerPort, RunLoopParams } from '../llm/index.js';
import { initKtxProject, type KtxLocalProject, loadKtxProject } from '../project/index.js';
import { makeLocalGitRepo } from '../test/make-local-git-repo.js';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
@ -13,16 +13,12 @@ import { createDefaultLocalIngestAdapters, localPullConfigForAdapter } from './l
import { getLocalIngestStatus, runLocalIngest } from './local-ingest.js';
import type { ChunkResult, DiffSet, SourceAdapter } from './types.js';
class TestAgentRunner extends AgentRunnerService {
override runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' as const });
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
class TestAgentRunner implements AgentRunnerPort {
runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' as const });
}
class LookerSlWritingAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async (params: any) => {
class LookerSlWritingAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (
params.telemetryTags?.operationName === 'ingest-bundle-wu' &&
params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders'
@ -31,130 +27,100 @@ class LookerSlWritingAgentRunner extends AgentRunnerService {
if (!ledger?.execute) {
throw new Error('record_verification_ledger tool was not available to the Looker WorkUnit');
}
await ledger.execute(
{
summary: 'Test fixture verified Looker explore target identifiers before writing SL.',
verifiedIdentifiers: ['prod-warehouse', 'public.orders'],
unverifiedIdentifiers: [],
},
{ toolCallId: 'looker-verification-ledger', messages: [] },
);
await ledger.execute({
summary: 'Test fixture verified Looker explore target identifiers before writing SL.',
verifiedIdentifiers: ['prod-warehouse', 'public.orders'],
unverifiedIdentifiers: [],
});
const slWrite = params.toolSet.sl_write_source;
if (!slWrite?.execute) {
throw new Error('sl_write_source tool was not available to the Looker WorkUnit');
}
const result = await slWrite.execute(
{
connectionId: 'prod-warehouse',
sourceName: 'looker__ecommerce__orders',
source: {
name: 'looker__ecommerce__orders',
table: 'public.orders',
grain: ['id'],
columns: [
{ name: 'id', type: 'number' },
{ name: 'revenue', type: 'number' },
],
measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }],
},
const result = await slWrite.execute({
connectionId: 'prod-warehouse',
sourceName: 'looker__ecommerce__orders',
source: {
name: 'looker__ecommerce__orders',
table: 'public.orders',
grain: ['id'],
columns: [
{ name: 'id', type: 'number' },
{ name: 'revenue', type: 'number' },
],
measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }],
},
{ toolCallId: 'looker-sl-write' },
);
if (!result.structured.success) {
});
if (!(result.structured as { success?: boolean } | undefined)?.success) {
throw new Error(result.markdown);
}
}
return { stopReason: 'natural' as const };
});
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
}
class WikiWritingAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async (params: any) => {
class WikiWritingAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (params.telemetryTags?.operationName === 'ingest-bundle-wu') {
const ledger = params.toolSet.record_verification_ledger;
if (!ledger?.execute) {
throw new Error('record_verification_ledger tool was not available to the WorkUnit');
}
await ledger.execute(
{
summary: 'Test fixture writes wiki-only context with no warehouse identifiers.',
verifiedIdentifiers: [],
unverifiedIdentifiers: [],
},
{ toolCallId: 'wiki-verification-ledger', messages: [] },
);
await ledger.execute({
summary: 'Test fixture writes wiki-only context with no warehouse identifiers.',
verifiedIdentifiers: [],
unverifiedIdentifiers: [],
});
const wikiWrite = params.toolSet.wiki_write;
if (!wikiWrite?.execute) {
throw new Error('wiki_write tool was not available to the WorkUnit');
}
const result = await wikiWrite.execute(
{
key: 'orders_context',
summary: 'Orders source context',
content: 'Orders are purchase records used for revenue analysis.',
tags: ['orders'],
},
{ toolCallId: 'wiki-write' },
);
if (!result.structured.success) {
const result = await wikiWrite.execute({
key: 'orders_context',
summary: 'Orders source context',
content: 'Orders are purchase records used for revenue analysis.',
tags: ['orders'],
});
if (!(result.structured as { success?: boolean } | undefined)?.success) {
throw new Error(result.markdown);
}
}
return { stopReason: 'natural' as const };
});
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
}
class WikiWritingWithRawPathAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async (params: any) => {
class WikiWritingWithRawPathAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (params.telemetryTags?.operationName === 'ingest-bundle-wu') {
const ledger = params.toolSet.record_verification_ledger;
if (!ledger?.execute) {
throw new Error('record_verification_ledger tool was not available to the WorkUnit');
}
await ledger.execute(
{
summary: 'Test fixture writes wiki-only context with explicit raw provenance and no warehouse identifiers.',
verifiedIdentifiers: [],
unverifiedIdentifiers: [],
},
{ toolCallId: 'wiki-raw-path-verification-ledger', messages: [] },
);
await ledger.execute({
summary: 'Test fixture writes wiki-only context with explicit raw provenance and no warehouse identifiers.',
verifiedIdentifiers: [],
unverifiedIdentifiers: [],
});
const wikiWrite = params.toolSet.wiki_write;
if (!wikiWrite?.execute) {
throw new Error('wiki_write tool was not available to the WorkUnit');
}
const result = await wikiWrite.execute(
{
key: 'orders_context',
summary: 'Orders source context',
content: 'Orders are purchase records used for revenue analysis.',
tags: ['orders'],
rawPaths: ['orders/orders.json'],
},
{ toolCallId: 'wiki-write' },
);
if (!result.structured.success) {
const result = await wikiWrite.execute({
key: 'orders_context',
summary: 'Orders source context',
content: 'Orders are purchase records used for revenue analysis.',
tags: ['orders'],
rawPaths: ['orders/orders.json'],
});
if (!(result.structured as { success?: boolean } | undefined)?.success) {
throw new Error(result.markdown);
}
}
return { stopReason: 'natural' as const };
});
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
}
class HistoricSqlEvidenceAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async (params: any) => {
class HistoricSqlEvidenceAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (
params.telemetryTags?.operationName === 'ingest-bundle-wu' &&
params.telemetryTags?.unitKey === 'historic-sql-table-public-orders'
@ -163,31 +129,24 @@ class HistoricSqlEvidenceAgentRunner extends AgentRunnerService {
if (!emitEvidence?.execute) {
throw new Error('emit_historic_sql_evidence tool was not available to the historic-SQL WorkUnit');
}
const result = await emitEvidence.execute(
{
kind: 'table_usage',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Orders are repeatedly queried by lifecycle status.',
frequencyTier: 'high',
commonFilters: ['status'],
commonJoins: [],
staleSince: null,
},
const result = await emitEvidence.execute({
kind: 'table_usage',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Orders are repeatedly queried by lifecycle status.',
frequencyTier: 'high',
commonFilters: ['status'],
commonJoins: [],
staleSince: null,
},
{ toolCallId: 'historic-sql-evidence' },
);
if (!String(result).includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected historic-SQL evidence result: ${String(result)}`);
});
if (!result.markdown.includes('Recorded historic-SQL table_usage evidence')) {
throw new Error(`Unexpected historic-SQL evidence result: ${result.markdown}`);
}
}
return { stopReason: 'natural' as const };
});
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
}
class HistoricSqlEvidenceTestAdapter implements SourceAdapter {

View file

@ -1,7 +1,7 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { AgentRunnerService } from '../agent/index.js';
import type { AgentRunnerPort } from '../llm/index.js';
import { initKtxProject, type KtxLocalProject, loadKtxProject } from '../project/index.js';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { FakeSourceAdapter } from './adapters/fake/fake.adapter.js';
@ -17,6 +17,10 @@ type RuntimeWithConnectionDeps = {
};
};
function testAgentRunner(): AgentRunnerPort {
return { runLoop: vi.fn().mockResolvedValue({ stopReason: 'natural' as const }) };
}
describe('createLocalBundleIngestRuntime', () => {
let tempDir: string;
let project: KtxLocalProject;
@ -55,15 +59,42 @@ describe('createLocalBundleIngestRuntime', () => {
}),
).toThrow(
[
'ktx ingest requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.',
`Configure an Anthropic provider, then rerun ingest:`,
` ktx setup --project-dir ${project.projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
'ktx ingest requires llm.provider.backend: anthropic, vertex, gateway, or claude-code, or an injected agentRunner.',
'Configure a local Claude Code session or API-backed LLM, then rerun ingest:',
` ktx setup --project-dir ${project.projectDir} --llm-backend claude-code --no-input`,
` ktx setup --project-dir ${project.projectDir} --llm-backend anthropic --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
].join('\n'),
);
});
it('uses a runtime-backed agent runner when claude-code is configured', () => {
const runtime = {
generateText: vi.fn(),
generateObject: vi.fn(),
runAgentLoop: vi.fn(async () => ({ stopReason: 'natural' as const })),
};
project.config.llm = {
provider: { backend: 'claude-code' },
models: { default: 'sonnet' },
promptCaching: { enabled: false },
};
const createLlmRuntime = vi.fn(() => runtime);
const created = createLocalBundleIngestRuntime({
project,
adapters: [new FakeSourceAdapter()],
createLlmRuntime,
});
expect(created).toBeDefined();
expect(createLlmRuntime).toHaveBeenCalledWith(
project.config.llm,
expect.objectContaining({ projectDir: project.projectDir }),
);
});
it('builds runner deps with local SQLite stores and context tools enabled', async () => {
const agentRunner = new AgentRunnerService({ llmProvider: { getModel: () => ({}) as never } as any });
const agentRunner = testAgentRunner();
const runtime = createLocalBundleIngestRuntime({
project,
@ -94,7 +125,7 @@ describe('createLocalBundleIngestRuntime', () => {
project_id: 'acme',
dataset_id: 'warehouse',
};
const agentRunner = new AgentRunnerService({ llmProvider: { getModel: () => ({}) as never } as any });
const agentRunner = testAgentRunner();
const runtime = createLocalBundleIngestRuntime({
project,
@ -114,7 +145,7 @@ describe('createLocalBundleIngestRuntime', () => {
});
it('passes project connection config to local ingest query executors', async () => {
const agentRunner = new AgentRunnerService({ llmProvider: { getModel: () => ({}) as never } as any });
const agentRunner = testAgentRunner();
const queryExecutor = {
execute: vi.fn(async () => ({
headers: ['answer'],

View file

@ -1,20 +1,20 @@
import { mkdirSync } from 'node:fs';
import { join } from 'node:path';
import { fileURLToPath } from 'node:url';
import type { KtxLlmProvider } from '@ktx/llm';
import type { Tool } from 'ai';
import YAML from 'yaml';
import type { AgentRunnerService } from '../agent/index.js';
import { AgentRunnerService as DefaultAgentRunnerService } from '../agent/index.js';
import { localConnectionInfoFromConfig, type KtxSqlQueryExecutorPort } from '../connections/index.js';
import type { KtxEmbeddingPort, KtxLogger } from '../core/index.js';
import { noopLogger, SessionWorktreeService } from '../core/index.js';
import type { KtxSemanticLayerComputePort } from '../daemon/index.js';
import {
createJsonlKtxLlmDebugRequestRecorder,
createRuntimeToolDescriptorFromAiTool,
createLocalKtxEmbeddingProviderFromConfig,
createLocalKtxLlmProviderFromConfig,
createLocalKtxLlmRuntimeFromConfig,
KtxIngestEmbeddingPortAdapter,
RuntimeAgentRunner,
type AgentRunnerPort,
type KtxLlmRuntimePort,
type KtxRuntimeToolSet,
} from '../llm/index.js';
import type { KtxLocalProject } from '../project/index.js';
import { ktxLocalStateDbPath } from '../project/index.js';
@ -100,8 +100,9 @@ const LOCAL_SHAPE_WARNING = 'Local ingest validates semantic-layer YAML shape on
export interface CreateLocalBundleIngestRuntimeOptions {
project: KtxLocalProject;
adapters: SourceAdapter[];
agentRunner?: AgentRunnerService;
llmProvider?: KtxLlmProvider;
agentRunner?: AgentRunnerPort;
llmRuntime?: KtxLlmRuntimePort;
createLlmRuntime?: typeof createLocalKtxLlmRuntimeFromConfig;
llmDebugRequestFile?: string;
memoryModel?: string;
semanticLayerCompute?: KtxSemanticLayerComputePort;
@ -456,12 +457,12 @@ class NoopKnowledgeEventPort implements KnowledgeEventPort {
class LocalIngestToolSet implements IngestToolsetLike {
constructor(
private readonly tools: BaseTool[],
private readonly sourceTools: Record<string, Tool> = {},
private readonly sourceTools: KtxRuntimeToolSet = {},
) {}
toAiSdkTools(context: ToolContext) {
toRuntimeTools(context: ToolContext): KtxRuntimeToolSet {
return {
...Object.fromEntries(this.tools.map((tool) => [tool.name, tool.toAiSdkTool(context)])),
...Object.fromEntries(this.tools.map((tool) => [tool.name, tool.toRuntimeTool(context)])),
...this.sourceTools,
};
}
@ -541,13 +542,16 @@ class LocalIngestToolsetFactory implements IngestToolsetFactoryPort {
}
createIngestWuToolset(session: ToolSession, options?: { includeContextEvidenceTools?: boolean }): IngestToolsetLike {
const sourceTools: Record<string, Tool> =
const sourceTools: KtxRuntimeToolSet =
session.ingest?.sourceKey === 'historic-sql'
? {
emit_historic_sql_evidence: createEmitHistoricSqlEvidenceTool({
connectionId: session.connectionId,
session,
}),
emit_historic_sql_evidence: createRuntimeToolDescriptorFromAiTool(
'emit_historic_sql_evidence',
createEmitHistoricSqlEvidenceTool({
connectionId: session.connectionId,
session,
}),
),
}
: {};
return new LocalIngestToolSet(
@ -571,36 +575,36 @@ function nextLocalJobId(): string {
function localIngestLlmProviderGuardMessage(projectDir: string): string {
return [
'ktx ingest requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.',
'Configure an Anthropic provider, then rerun ingest:',
` ktx setup --project-dir ${projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
'ktx ingest requires llm.provider.backend: anthropic, vertex, gateway, or claude-code, or an injected agentRunner.',
'Configure a local Claude Code session or API-backed LLM, then rerun ingest:',
` ktx setup --project-dir ${projectDir} --llm-backend claude-code --no-input`,
` ktx setup --project-dir ${projectDir} --llm-backend anthropic --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
].join('\n');
}
function resolveAgentRunner(options: CreateLocalBundleIngestRuntimeOptions): {
agentRunner: AgentRunnerService;
llmProvider?: KtxLlmProvider;
agentRunner: AgentRunnerPort;
llmRuntime?: KtxLlmRuntimePort;
} {
const llmProvider =
options.llmProvider ?? createLocalKtxLlmProviderFromConfig(options.project.config.llm) ?? undefined;
const llmRuntime =
options.llmRuntime ??
(options.createLlmRuntime ?? createLocalKtxLlmRuntimeFromConfig)(options.project.config.llm, {
projectDir: options.project.projectDir,
env: process.env,
}) ??
undefined;
if (options.agentRunner) {
return { agentRunner: options.agentRunner, ...(llmProvider ? { llmProvider } : {}) };
return { agentRunner: options.agentRunner, ...(llmRuntime ? { llmRuntime } : {}) };
}
if (!llmProvider) {
if (!llmRuntime) {
throw new Error(localIngestLlmProviderGuardMessage(options.project.projectDir));
}
return {
agentRunner: new DefaultAgentRunnerService({
llmProvider,
logger: options.logger ?? noopLogger,
...(options.llmDebugRequestFile
? { debugRequestRecorder: createJsonlKtxLlmDebugRequestRecorder(options.llmDebugRequestFile) }
: {}),
}),
llmProvider,
agentRunner: new RuntimeAgentRunner(llmRuntime),
llmRuntime,
};
}
@ -627,7 +631,7 @@ export function createLocalBundleIngestRuntime(
const knowledgeIndex = new LocalKnowledgeIndex(options.project, embedding);
const knowledgeEvents = new NoopKnowledgeEventPort();
const wikiService = new KnowledgeWikiService(rootFileStore, embedding, knowledgeIndex, options.project.git, logger);
const { agentRunner, llmProvider } = resolveAgentRunner(options);
const { agentRunner, llmRuntime } = resolveAgentRunner(options);
const promptService = new PromptService({ promptsDir, partials: [], logger });
const storage = new LocalIngestStorage(options.project);
const registry = registerAdapters(options.adapters);
@ -681,10 +685,11 @@ export function createLocalBundleIngestRuntime(
commitMessages: new LocalCommitMessagePort(),
embedding,
contextEvidenceIndex: new ContextEvidenceIndexService({ store: contextStore, embeddings: embedding, logger }),
pageTriage: llmProvider
llmRuntime,
pageTriage: llmRuntime
? new PageTriageService({
store: contextStore,
llmProvider,
llmRuntime,
settings: {
enabled: true,
maxConcurrency: 2,

View file

@ -1,11 +1,10 @@
import { randomUUID } from 'node:crypto';
import { cp, mkdir, rm } from 'node:fs/promises';
import { isAbsolute, resolve } from 'node:path';
import type { KtxLlmProvider } from '@ktx/llm';
import type { AgentRunnerService } from '../agent/index.js';
import type { KtxSqlQueryExecutorPort } from '../connections/index.js';
import type { KtxLogger } from '../core/index.js';
import type { KtxSemanticLayerComputePort } from '../daemon/index.js';
import type { AgentRunnerPort, KtxLlmRuntimePort } from '../llm/index.js';
import type { KtxLocalProject } from '../project/index.js';
import { ktxLocalStateDbPath } from '../project/index.js';
import { planMetabaseFanoutChildren } from './adapters/metabase/fanout-planner.js';
@ -28,8 +27,8 @@ export interface RunLocalIngestOptions {
trigger?: IngestTrigger;
jobId?: string;
memoryFlow?: MemoryFlowEventSink;
agentRunner?: AgentRunnerService;
llmProvider?: KtxLlmProvider;
agentRunner?: AgentRunnerPort;
llmRuntime?: KtxLlmRuntimePort;
llmDebugRequestFile?: string;
memoryModel?: string;
semanticLayerCompute?: KtxSemanticLayerComputePort;
@ -41,7 +40,7 @@ export interface LocalIngestMcpOptions
extends Pick<
RunLocalIngestOptions,
| 'agentRunner'
| 'llmProvider'
| 'llmRuntime'
| 'memoryModel'
| 'semanticLayerCompute'
| 'queryExecutor'
@ -167,8 +166,8 @@ async function runScheduledPullJob(options: {
trigger?: IngestTrigger;
jobId?: string;
memoryFlow?: MemoryFlowEventSink;
agentRunner?: AgentRunnerService;
llmProvider?: KtxLlmProvider;
agentRunner?: AgentRunnerPort;
llmRuntime?: KtxLlmRuntimePort;
memoryModel?: string;
semanticLayerCompute?: KtxSemanticLayerComputePort;
queryExecutor?: KtxSqlQueryExecutorPort;
@ -221,7 +220,7 @@ export async function runLocalIngest(options: RunLocalIngestOptions): Promise<Lo
jobId,
memoryFlow: options.memoryFlow,
agentRunner: options.agentRunner,
llmProvider: options.llmProvider,
llmRuntime: options.llmRuntime,
memoryModel: options.memoryModel,
semanticLayerCompute: options.semanticLayerCompute,
queryExecutor: options.queryExecutor,
@ -406,7 +405,7 @@ export async function runLocalMetabaseIngest(
jobId: childJobId,
memoryFlow: options.memoryFlow,
agentRunner: options.agentRunner,
llmProvider: options.llmProvider,
llmRuntime: options.llmRuntime,
memoryModel: options.memoryModel,
semanticLayerCompute: options.semanticLayerCompute,
queryExecutor: options.queryExecutor,

View file

@ -1,24 +1,20 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { AgentRunnerService } from '../agent/index.js';
import type { AgentRunnerPort, RunLoopParams } from '../llm/index.js';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { initKtxProject, type KtxLocalProject } from '../project/index.js';
import { LocalMetabaseDiscoveryCache } from './adapters/metabase/local-source-state-store.js';
import { getLocalIngestStatus, runLocalMetabaseIngest } from './local-ingest.js';
import type { ChunkResult, FetchContext, SourceAdapter } from './types.js';
class TestAgentRunner extends AgentRunnerService {
override runLoop = vi.fn(async (params: Parameters<AgentRunnerService['runLoop']>[0]) => {
class TestAgentRunner implements AgentRunnerPort {
runLoop = vi.fn(async (params: RunLoopParams) => {
if (params.userPrompt.includes('metabase-db-2')) {
return { stopReason: 'error' as const, error: new Error('database 2 failed') };
}
return { stopReason: 'natural' as const };
});
constructor() {
super({ llmProvider: { getModel: () => ({}) as never } as never });
}
}
class FakeMetabaseSourceAdapter implements SourceAdapter {

View file

@ -21,7 +21,11 @@ describe('PageTriageService', () => {
};
let promptService: { loadPrompt: ReturnType<typeof vi.fn<(name: string) => Promise<string>>> };
let adapter: { triageSupported: true; getTriageSignals: ReturnType<typeof vi.fn> };
let generateTextMock: ReturnType<typeof vi.fn>;
let llmRuntime: {
generateText: ReturnType<typeof vi.fn>;
generateObject: ReturnType<typeof vi.fn>;
runAgentLoop: ReturnType<typeof vi.fn>;
};
beforeEach(async () => {
stagedDir = await mkdtemp(join(tmpdir(), 'page-triage-'));
@ -88,31 +92,16 @@ describe('PageTriageService', () => {
.fn<(name: string) => Promise<string>>()
.mockImplementation((name) => Promise.resolve(`prompt:${name}`)),
};
generateTextMock = vi.fn();
llmRuntime = {
generateText: vi.fn(),
generateObject: vi.fn(),
runAgentLoop: vi.fn(),
};
service = new PageTriageService({
store: repository as any,
llmProvider: {
getModel: vi.fn().mockReturnValue('model'),
getModelByName: vi.fn(),
cacheMarker: vi.fn(),
repairToolCallHandler: vi.fn(),
thinkingProviderOptions: vi.fn(),
telemetryConfig: vi.fn(),
promptCachingConfig: vi.fn(() => ({
enabled: false,
systemTtl: '1h',
toolsTtl: '1h',
historyTtl: '5m',
cacheSystem: true,
cacheTools: true,
cacheHistory: true,
vertexFallbackTo5m: false,
})),
activeBackend: vi.fn(() => 'anthropic'),
} as any,
llmRuntime: llmRuntime as any,
settings: triageSettings,
promptService: promptService as any,
generateText: generateTextMock as any,
});
});
@ -121,10 +110,10 @@ describe('PageTriageService', () => {
});
it('writes light-lane candidates and keeps the page out of full WorkUnits', async () => {
generateTextMock
.mockResolvedValueOnce({ text: JSON.stringify({ lane: 'light', reason: 'short durable policy' }) } as any)
.mockResolvedValueOnce({
text: JSON.stringify({
llmRuntime.generateText
.mockResolvedValueOnce(JSON.stringify({ lane: 'light', reason: 'short durable policy' }))
.mockResolvedValueOnce(
JSON.stringify({
candidates: [
{
candidateKey: 'support-handoff-owner',
@ -142,7 +131,7 @@ describe('PageTriageService', () => {
},
],
}),
} as any);
);
const result = await service.triageRun({
stagedDir,
@ -171,6 +160,7 @@ describe('PageTriageService', () => {
});
expect(result.fullRawPaths.has('pages/page-1/page.md')).toBe(false);
expect(adapter.getTriageSignals).toHaveBeenCalledWith(stagedDir, 'page-1');
expect(llmRuntime.generateText).toHaveBeenCalledWith(expect.objectContaining({ role: 'triage' }));
expect(repository.setDocumentTriageLane).toHaveBeenCalledWith('run-1', 'pages/page-1/page.md', 'light');
expect(repository.insertCandidate).toHaveBeenCalledWith(
expect.objectContaining({
@ -225,23 +215,20 @@ describe('PageTriageService', () => {
}
return Promise.resolve(`prompt:${name}`);
});
generateTextMock
llmRuntime.generateText
.mockImplementationOnce((args: any) => {
const systemMessage = args.system ?? args.messages.find((m: { role: string }) => m.role === 'system');
const userMessage = args.messages.find((m: { role: string }) => m.role === 'user');
const systemText =
typeof systemMessage === 'string' ? systemMessage : (systemMessage.content as string);
const userText = userMessage.content as string;
const systemText = args.system as string;
const userText = args.prompt as string;
expect(systemText).toContain(
'Reusable templates and scripts are durable knowledge regardless of subject matter.',
);
expect(systemText).toContain('Date-titled standups are still skip; named templates and scripts are not.');
expect(userText).toContain('Cold Call Script');
expect(userText).not.toContain('Reusable templates and scripts are durable knowledge');
return { text: JSON.stringify({ lane: 'light', reason: 'reusable sales script' }) } as any;
return JSON.stringify({ lane: 'light', reason: 'reusable sales script' });
})
.mockResolvedValueOnce({
text: JSON.stringify({
.mockResolvedValueOnce(
JSON.stringify({
candidates: [
{
candidateKey: 'cold-call-script',
@ -259,7 +246,7 @@ describe('PageTriageService', () => {
},
],
}),
} as any);
);
const result = await service.triageRun({
stagedDir,
@ -312,9 +299,7 @@ describe('PageTriageService', () => {
'utf-8',
);
generateTextMock.mockResolvedValue({
text: JSON.stringify({ lane: 'full', reason: 'durable policy page' }),
} as any);
llmRuntime.generateText.mockResolvedValue(JSON.stringify({ lane: 'full', reason: 'durable policy page' }));
const result = await service.triageRun({
stagedDir,
@ -351,7 +336,7 @@ describe('PageTriageService', () => {
});
it('falls back to full when classifier output is malformed', async () => {
generateTextMock.mockResolvedValueOnce({ text: 'not-json' } as any);
llmRuntime.generateText.mockResolvedValueOnce('not-json');
const result = await service.triageRun({
stagedDir,
@ -370,8 +355,8 @@ describe('PageTriageService', () => {
});
it('promotes a light page to full when light extraction fails', async () => {
generateTextMock
.mockResolvedValueOnce({ text: JSON.stringify({ lane: 'light', reason: 'short durable policy' }) } as any)
llmRuntime.generateText
.mockResolvedValueOnce(JSON.stringify({ lane: 'light', reason: 'short durable policy' }))
.mockRejectedValueOnce(new Error('provider unavailable'));
const result = await service.triageRun({
@ -405,7 +390,7 @@ describe('PageTriageService', () => {
});
expect(result).toEqual({ enabled: false, report: undefined, fullRawPaths: new Set<string>(), warnings: [] });
expect(generateTextMock).not.toHaveBeenCalled();
expect(llmRuntime.generateText).not.toHaveBeenCalled();
expect(repository.setDocumentTriageLane).not.toHaveBeenCalled();
});
});

View file

@ -1,11 +1,10 @@
import { createHash } from 'node:crypto';
import { readdir, readFile } from 'node:fs/promises';
import { dirname, join, relative } from 'node:path';
import { KtxMessageBuilder, splitKtxSystemMessages, type KtxLlmProvider } from '@ktx/llm';
import { generateText, type ToolSet } from 'ai';
import pLimit from 'p-limit';
import { z } from 'zod';
import { type KtxLogger, noopLogger } from '../../core/index.js';
import type { KtxLlmRuntimePort } from '../../llm/index.js';
import type { PromptService } from '../../prompts/index.js';
import type { InsertContextCandidateInput } from '../context-candidates/index.js';
import type { JsonValue } from '../ports.js';
@ -100,20 +99,17 @@ export interface PageTriageSettings {
export interface PageTriageServiceDeps {
store: PageTriageStorePort;
llmProvider: KtxLlmProvider;
llmRuntime: KtxLlmRuntimePort;
settings: PageTriageSettings;
promptService: PromptService;
logger?: KtxLogger;
generateText?: typeof generateText;
}
export class PageTriageService {
private readonly logger: KtxLogger;
private readonly runGenerateText: typeof generateText;
constructor(private readonly deps: PageTriageServiceDeps) {
this.logger = deps.logger ?? noopLogger;
this.runGenerateText = deps.generateText ?? generateText;
}
async triageRun(args: PageTriageRunArgs): Promise<PageTriageRunResult> {
@ -339,22 +335,12 @@ export class PageTriageService {
jobId: string;
unitKey: string;
}): Promise<string> {
const model = this.deps.llmProvider.getModel('triage');
const built = new KtxMessageBuilder(this.deps.llmProvider).wrapSimple({
return this.deps.llmRuntime.generateText({
role: 'triage',
system: params.system,
messages: [{ role: 'user', content: params.prompt }],
tools: {},
model,
});
const split = splitKtxSystemMessages(built.messages);
const result = await this.runGenerateText({
model,
prompt: params.prompt,
temperature: 0,
...(split.system ? { system: split.system } : {}),
messages: split.messages,
tools: built.tools as ToolSet,
});
return result.text;
}
private async buildClassifierSystem(): Promise<string> {

View file

@ -1,8 +1,7 @@
import type { ToolSet } from 'ai';
import type { KtxModelRole } from '@ktx/llm';
import type { AgentRunnerService } from '../agent/index.js';
import type { KtxEmbeddingPort } from '../core/embedding.js';
import type { GitService, KtxFileStorePort, KtxLogger, SessionOutcome } from '../core/index.js';
import type { AgentRunnerPort, KtxLlmRuntimePort, KtxRuntimeToolSet } from '../llm/index.js';
import type { CaptureSession, MemoryAction, MemoryKnowledgeSlRefsPort } from '../memory/index.js';
import type { PromptService } from '../prompts/index.js';
import type { SkillsRegistryService } from '../skills/index.js';
@ -163,7 +162,7 @@ export interface IngestCommitMessagePort {
}
export interface IngestToolsetLike {
toAiSdkTools(context: ToolContext): ToolSet;
toRuntimeTools(context: ToolContext): KtxRuntimeToolSet;
}
export interface IngestToolsetFactoryPort {
@ -315,7 +314,7 @@ export interface CuratorPaginationPort {
items: ReconcileCandidateForPrompt[];
runState: ReconcilePromptRunState;
}) => string;
buildToolSet: (passNumber: number) => ToolSet;
buildToolSet: (passNumber: number) => KtxRuntimeToolSet;
getReconciliationActions: () => MemoryAction[];
onStepFinish?: (info: { passNumber: number; stepIndex: number; stepBudget: number }) => void;
}): Promise<ReconciliationOutcome & { report: CuratorPaginationReport; warnings: string[] }>;
@ -350,7 +349,8 @@ export interface IngestBundleRunnerDeps {
registry: SourceAdapterRegistryPort;
diffSetService: DiffSetComputerPort;
sessionWorktreeService: IngestSessionWorktreePort;
agentRunner: AgentRunnerService;
agentRunner: AgentRunnerPort;
llmRuntime?: KtxLlmRuntimePort;
gitService: GitService;
lockingService: IngestLockPort;
storage: IngestStoragePort;

View file

@ -141,26 +141,17 @@ describe('buildReconcileToolSet', () => {
toolsetTools: { sl_write_source: { description: 'sl write', inputSchema: {} as any, execute: slWrite } as any },
});
const correction = await toolSet.sl_write_source.execute?.(
{ connectionId: 'warehouse', sourceName: 'accounts' },
{ toolCallId: 't1' } as any,
);
const correction = await toolSet.sl_write_source.execute?.({ connectionId: 'warehouse', sourceName: 'accounts' });
expect(slWrite).not.toHaveBeenCalled();
expect(correction).toMatchObject({ structured: { success: false, reason: 'verification_ledger_required' } });
await toolSet.record_verification_ledger.execute?.(
{
summary: 'Verified warehouse.accounts with entity_details.',
verifiedIdentifiers: ['warehouse.accounts'],
unverifiedIdentifiers: [],
},
{ toolCallId: 't2' } as any,
);
const written = await toolSet.sl_write_source.execute?.(
{ connectionId: 'warehouse', sourceName: 'accounts' },
{ toolCallId: 't3' } as any,
);
await toolSet.record_verification_ledger.execute?.({
summary: 'Verified warehouse.accounts with entity_details.',
verifiedIdentifiers: ['warehouse.accounts'],
unverifiedIdentifiers: [],
});
const written = await toolSet.sl_write_source.execute?.({ connectionId: 'warehouse', sourceName: 'accounts' });
expect(slWrite).toHaveBeenCalledTimes(1);
expect(written).toMatchObject({ structured: { success: true } });

View file

@ -1,5 +1,5 @@
import type { Tool, ToolSet } from 'ai';
import { buildCanonicalPinsPromptBlock, type CanonicalPin } from '../canonical-pins.js';
import type { KtxRuntimeToolSet } from '../../llm/index.js';
import {
createVerificationLedgerState,
VERIFICATION_LEDGER_PROMPT,
@ -181,19 +181,19 @@ export function buildReconcileUserPrompt(
}
export interface ReconcileToolSetInput {
loadSkillTool: Record<string, Tool>;
stageListTool: Record<string, Tool>;
stageDiffTool: Record<string, Tool>;
evictionListTool: Record<string, Tool>;
emitConflictResolutionTool: Record<string, Tool>;
emitEvictionDecisionTool: Record<string, Tool>;
emitArtifactResolutionTool: Record<string, Tool>;
emitUnmappedFallbackTool: Record<string, Tool>;
readRawSpanTool: Record<string, Tool>;
toolsetTools: ToolSet;
loadSkillTool: KtxRuntimeToolSet;
stageListTool: KtxRuntimeToolSet;
stageDiffTool: KtxRuntimeToolSet;
evictionListTool: KtxRuntimeToolSet;
emitConflictResolutionTool: KtxRuntimeToolSet;
emitEvictionDecisionTool: KtxRuntimeToolSet;
emitArtifactResolutionTool: KtxRuntimeToolSet;
emitUnmappedFallbackTool: KtxRuntimeToolSet;
readRawSpanTool: KtxRuntimeToolSet;
toolsetTools: KtxRuntimeToolSet;
}
export function buildReconcileToolSet(input: ReconcileToolSetInput): ToolSet {
export function buildReconcileToolSet(input: ReconcileToolSetInput): KtxRuntimeToolSet {
const state = createVerificationLedgerState();
return withVerificationLedger(
{

View file

@ -87,21 +87,18 @@ describe('buildWuToolSet', () => {
toolsetTools: { wiki_write: { description: 'write', inputSchema: {} as any, execute: wikiWrite } as any },
});
const correction = await toolSet.wiki_write.execute?.({ key: 'customer-rules' }, { toolCallId: 't1' } as any);
const correction = await toolSet.wiki_write.execute?.({ key: 'customer-rules' });
expect(wikiWrite).not.toHaveBeenCalled();
expect(correction).toMatchObject({ structured: { success: false, reason: 'verification_ledger_required' } });
expect(String((correction as any).markdown)).toContain('record_verification_ledger');
await toolSet.record_verification_ledger.execute?.(
{
summary: 'No warehouse identifiers will be emitted in this wiki write.',
verifiedIdentifiers: [],
unverifiedIdentifiers: [],
},
{ toolCallId: 't2' } as any,
);
const written = await toolSet.wiki_write.execute?.({ key: 'customer-rules' }, { toolCallId: 't3' } as any);
await toolSet.record_verification_ledger.execute?.({
summary: 'No warehouse identifiers will be emitted in this wiki write.',
verifiedIdentifiers: [],
unverifiedIdentifiers: [],
});
const written = await toolSet.wiki_write.execute?.({ key: 'customer-rules' });
expect(wikiWrite).toHaveBeenCalledTimes(1);
expect(written).toMatchObject({ structured: { success: true } });

View file

@ -1,6 +1,6 @@
import type { Tool, ToolSet } from 'ai';
import { buildCanonicalPinsPromptBlock, type CanonicalPin } from '../canonical-pins.js';
import { createLookerQueryToSlTool } from '../adapters/looker/tools/looker-query-to-sl.tool.js';
import { createRuntimeToolDescriptorFromAiTool, type KtxRuntimeToolSet } from '../../llm/index.js';
import type { IngestProvenanceRow } from '../ports.js';
import { createReadRawFileTool } from '../tools/read-raw-file.tool.js';
import { createReadRawSpanTool } from '../tools/read-raw-span.tool.js';
@ -88,12 +88,12 @@ export interface BuildWuToolSetInput {
sourceKey?: string;
stagedDir: string;
wu: WorkUnit;
loadSkillTool: Record<string, Tool>;
emitUnmappedFallbackTool: Record<string, Tool>;
toolsetTools: ToolSet;
loadSkillTool: KtxRuntimeToolSet;
emitUnmappedFallbackTool: KtxRuntimeToolSet;
toolsetTools: KtxRuntimeToolSet;
}
function withoutWriteSlTools(toolset: ToolSet, wu: WorkUnit): ToolSet {
function withoutWriteSlTools(toolset: KtxRuntimeToolSet, wu: WorkUnit): KtxRuntimeToolSet {
if (!wu.slDisallowed) {
return toolset;
}
@ -103,9 +103,12 @@ function withoutWriteSlTools(toolset: ToolSet, wu: WorkUnit): ToolSet {
return next;
}
export function buildWuToolSet(input: BuildWuToolSetInput): ToolSet {
export function buildWuToolSet(input: BuildWuToolSetInput): KtxRuntimeToolSet {
const allowedPaths = new Set<string>([...input.wu.rawFiles, ...input.wu.dependencyPaths]);
const lookerTools: ToolSet = input.sourceKey === 'looker' ? { looker_query_to_sl: createLookerQueryToSlTool() } : {};
const lookerTools: KtxRuntimeToolSet =
input.sourceKey === 'looker'
? { looker_query_to_sl: createRuntimeToolDescriptorFromAiTool('looker_query_to_sl', createLookerQueryToSlTool()) }
: {};
const state = createVerificationLedgerState();
return withVerificationLedger(
withoutWriteSlTools(
@ -114,8 +117,14 @@ export function buildWuToolSet(input: BuildWuToolSetInput): ToolSet {
...lookerTools,
...input.loadSkillTool,
...input.emitUnmappedFallbackTool,
read_raw_file: createReadRawFileTool({ stagedDir: input.stagedDir, allowedPaths }),
read_raw_span: createReadRawSpanTool({ stagedDir: input.stagedDir, allowedPaths }),
read_raw_file: createRuntimeToolDescriptorFromAiTool(
'read_raw_file',
createReadRawFileTool({ stagedDir: input.stagedDir, allowedPaths }),
),
read_raw_span: createRuntimeToolDescriptorFromAiTool(
'read_raw_span',
createReadRawSpanTool({ stagedDir: input.stagedDir, allowedPaths }),
),
},
input.wu,
),

View file

@ -1,6 +1,5 @@
import type { AgentRunnerService } from '@ktx/context/agent';
import type { KtxModelRole } from '@ktx/llm';
import type { Tool } from 'ai';
import type { AgentRunnerPort, KtxRuntimeToolSet } from '@ktx/context';
import type { CaptureSession, MemoryAction } from '../../memory/index.js';
import { listTouchedSlSources, type TouchedSlSource } from '../../tools/index.js';
import type { WorkUnit } from '../types.js';
@ -14,12 +13,12 @@ export interface TouchedValidationResult {
export interface WorkUnitExecutionDeps {
sessionWorktreeGit: { revParseHead(): Promise<string | null> };
agentRunner: AgentRunnerService;
agentRunner: AgentRunnerPort;
validateTouchedSources: (touched: TouchedSlSource[]) => Promise<TouchedValidationResult>;
resetHardTo: (targetSha: string) => Promise<void>;
buildSystemPrompt: (wu: WorkUnit) => string;
buildUserPrompt: (wu: WorkUnit) => string;
buildToolSet: (wu: WorkUnit) => Record<string, Tool>;
buildToolSet: (wu: WorkUnit) => KtxRuntimeToolSet;
captureSession: CaptureSession;
sessionActions: MemoryAction[];
modelRole: KtxModelRole;

View file

@ -1,16 +1,15 @@
import type { AgentRunnerService } from '@ktx/context/agent';
import type { AgentRunnerPort, KtxRuntimeToolSet } from '@ktx/context';
import type { KtxModelRole } from '@ktx/llm';
import type { ToolSet } from 'ai';
import type { EvictionUnit } from '../types.js';
import type { StageIndex } from './stage-index.types.js';
export interface ReconciliationContext {
stageIndex: StageIndex;
evictionUnit: EvictionUnit | undefined;
agentRunner: AgentRunnerService;
agentRunner: AgentRunnerPort;
buildSystemPrompt: (idx: StageIndex, ev: EvictionUnit | undefined) => string;
buildUserPrompt: (idx: StageIndex, ev: EvictionUnit | undefined) => string;
buildToolSet: () => ToolSet;
buildToolSet: () => KtxRuntimeToolSet;
modelRole: KtxModelRole;
stepBudget: number;
sourceKey: string;

View file

@ -1,6 +1,6 @@
import { appendFile, mkdir } from 'node:fs/promises';
import { dirname } from 'node:path';
import type { ToolExecuteFunction, ToolExecutionOptions, ToolSet } from 'ai';
import type { KtxRuntimeToolSet } from '../../llm/index.js';
export interface ToolCallLogEntry {
ts: string;
@ -31,7 +31,7 @@ interface ToolCallLoggerOptions {
* sequential (`generateText` awaits each tool result), so per-WU files are
* effectively single-writer and lines land in call order.
*/
export function wrapToolsWithLogger<T extends ToolSet>(
export function wrapToolsWithLogger<T extends KtxRuntimeToolSet>(
tools: T,
logFilePath: string,
wuKey: string,
@ -44,17 +44,13 @@ export function wrapToolsWithLogger<T extends ToolSet>(
wrapped[name] = original;
continue;
}
const wrappedExecute: ToolExecuteFunction<unknown, unknown> = async (
input: unknown,
opts: ToolExecutionOptions,
) => {
const wrappedExecute = async (input: unknown) => {
const start = Date.now();
try {
const output = await (originalExecute as ToolExecuteFunction<unknown, unknown>)(input, opts);
const output = await originalExecute(input);
const entry: ToolCallLogEntry = {
ts: new Date().toISOString(),
wuKey,
toolCallId: opts.toolCallId,
toolName: name,
durationMs: Date.now() - start,
input,
@ -67,7 +63,6 @@ export function wrapToolsWithLogger<T extends ToolSet>(
const entry: ToolCallLogEntry = {
ts: new Date().toISOString(),
wuKey,
toolCallId: opts.toolCallId,
toolName: name,
durationMs: Date.now() - start,
input,

View file

@ -1,5 +1,5 @@
import { tool, type ToolExecuteFunction, type ToolExecutionOptions, type ToolSet } from 'ai';
import { z } from 'zod';
import type { KtxRuntimeToolDescriptor, KtxRuntimeToolSet } from '../../llm/index.js';
const verificationLedgerInputSchema = z.object({
summary: z.string().min(1).max(2000),
@ -37,22 +37,19 @@ export function createVerificationLedgerState(): VerificationLedgerState {
return { entries: [] };
}
export function withVerificationLedger(tools: ToolSet, state: VerificationLedgerState): ToolSet {
const wrapped: ToolSet = {};
export function withVerificationLedger(tools: KtxRuntimeToolSet, state: VerificationLedgerState): KtxRuntimeToolSet {
const wrapped: KtxRuntimeToolSet = {};
for (const [name, original] of Object.entries(tools)) {
if (!WRITE_TOOL_NAMES.has(name) || typeof original.execute !== 'function') {
wrapped[name] = original;
continue;
}
const originalExecute = original.execute;
const guardedExecute: ToolExecuteFunction<unknown, unknown> = async (
input: unknown,
opts: ToolExecutionOptions,
) => {
const guardedExecute = async (input: unknown) => {
if (state.entries.length === 0) {
return verificationRequiredOutput(name);
}
return (originalExecute as ToolExecuteFunction<unknown, unknown>)(input, opts);
return originalExecute(input);
};
wrapped[name] = { ...original, execute: guardedExecute };
}
@ -60,8 +57,9 @@ export function withVerificationLedger(tools: ToolSet, state: VerificationLedger
return wrapped;
}
function createRecordVerificationLedgerTool(state: VerificationLedgerState) {
return tool({
function createRecordVerificationLedgerTool(state: VerificationLedgerState): KtxRuntimeToolDescriptor {
return {
name: 'record_verification_ledger',
description:
'Record the pre-write verification ledger required by loaded ingest skills. Call this before wiki/SL/fallback writes to state what was verified, which tool calls support it, and what remains intentionally unverified.',
inputSchema: verificationLedgerInputSchema,
@ -78,7 +76,7 @@ function createRecordVerificationLedgerTool(state: VerificationLedgerState) {
structured: { success: true, entry },
};
},
});
};
}
function verificationRequiredOutput(toolName: string) {

View file

@ -0,0 +1,164 @@
import { KtxMessageBuilder, splitKtxSystemMessages, type KtxLlmProvider } from '@ktx/llm';
import { generateText, Output, stepCountIs, type FlexibleSchema, type TelemetrySettings, type ToolSet } from 'ai';
import type { z } from 'zod';
import { noopLogger, type KtxLogger } from '../core/index.js';
import { summarizeKtxLlmDebugRequest, type KtxLlmDebugRequestRecorder } from './debug-request-recorder.js';
import { createAiSdkToolSet } from './runtime-tools.js';
import type {
KtxGenerateObjectInput,
KtxGenerateTextInput,
KtxLlmRuntimePort,
RunLoopParams,
RunLoopResult,
} from './runtime-port.js';
export interface AgentTelemetryPort {
createTelemetry(tags: Record<string, string>): TelemetrySettings;
}
export interface AiSdkKtxLlmRuntimeDeps {
llmProvider: KtxLlmProvider;
telemetry?: AgentTelemetryPort;
logger?: KtxLogger;
debugRequestRecorder?: KtxLlmDebugRequestRecorder;
}
function hasTools(tools: Record<string, unknown>): boolean {
return Object.keys(tools).length > 0;
}
export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
private readonly logger: KtxLogger;
constructor(private readonly deps: AiSdkKtxLlmRuntimeDeps) {
this.logger = deps.logger ?? noopLogger;
}
async generateText(input: KtxGenerateTextInput): Promise<string> {
const model = this.deps.llmProvider.getModel(input.role);
if ((model as { provider?: string }).provider === 'deterministic') {
return `Deterministic description for ${input.prompt.slice(0, 64).trim() || 'data source'}`;
}
const tools = createAiSdkToolSet(input.tools ?? {});
const built = new KtxMessageBuilder(this.deps.llmProvider).wrapSimple({
system: input.system,
messages: [{ role: 'user', content: input.prompt }],
tools,
model,
});
const split = splitKtxSystemMessages(built.messages);
const result = await generateText({
model,
temperature: input.temperature ?? 0,
...(split.system ? { system: split.system } : {}),
messages: split.messages,
tools: built.tools as ToolSet,
...(hasTools(tools)
? {
experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({
source: `ktx-${input.role}`,
}),
}
: {}),
});
if (typeof result.text !== 'string') {
throw new Error('KTX LLM text generation returned no text');
}
return result.text;
}
async generateObject<TOutput, TSchema extends z.ZodType<TOutput>>(
input: KtxGenerateObjectInput<TOutput, TSchema>,
): Promise<TOutput> {
const model = this.deps.llmProvider.getModel(input.role);
const tools = createAiSdkToolSet(input.tools ?? {});
const built = new KtxMessageBuilder(this.deps.llmProvider).wrapSimple({
system: input.system,
messages: [{ role: 'user', content: input.prompt }],
tools,
model,
});
const split = splitKtxSystemMessages(built.messages);
const result = await generateText({
model,
temperature: input.temperature ?? 0,
...(split.system ? { system: split.system } : {}),
messages: split.messages,
tools: built.tools as ToolSet,
...(hasTools(tools)
? {
experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({
source: `ktx-${input.role}`,
}),
}
: {}),
output: Output.object({ schema: input.schema as unknown as FlexibleSchema<TOutput> }),
});
if (result.output == null) {
throw new Error('KTX LLM object generation returned no output');
}
return result.output as TOutput;
}
async runAgentLoop(params: RunLoopParams): Promise<RunLoopResult> {
let stepIndex = 0;
try {
const model = this.deps.llmProvider.getModel(params.modelRole);
const tools = createAiSdkToolSet(params.toolSet);
const builder = new KtxMessageBuilder(this.deps.llmProvider);
const built = builder.wrapSimple({
system: params.systemPrompt,
messages: [{ role: 'user', content: params.userPrompt }],
tools,
model,
});
const promptMessages = splitKtxSystemMessages(built.messages);
await this.deps.debugRequestRecorder?.record(
summarizeKtxLlmDebugRequest({
operationName: params.telemetryTags.operationName ?? 'ktx-agent-runner',
source: params.telemetryTags.source,
jobId: params.telemetryTags.jobId,
unitKey: params.telemetryTags.unitKey,
modelRole: params.modelRole,
modelId: (model as { modelId?: string }).modelId ?? params.modelRole,
messages: built.messages,
tools: built.tools as Record<string, { providerOptions?: unknown }>,
}),
);
await generateText({
model,
temperature: 0,
stopWhen: stepCountIs(params.stepBudget),
experimental_telemetry: this.deps.telemetry?.createTelemetry(params.telemetryTags) ?? this.deps.llmProvider.telemetryConfig(),
experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({
source: params.telemetryTags.operationName ?? 'ktx-agent-runner',
}),
...(promptMessages.system ? { system: promptMessages.system } : {}),
messages: promptMessages.messages,
tools: built.tools as ToolSet,
onStepFinish: async () => {
stepIndex += 1;
if (!params.onStepFinish) {
return;
}
try {
await params.onStepFinish({ stepIndex, stepBudget: params.stepBudget });
} catch (err) {
this.logger.warn(
`[agent-runner] onStepFinish callback threw; ignoring: ${
err instanceof Error ? err.message : String(err)
}`,
);
}
},
});
return { stopReason: 'natural' };
} catch (error) {
const err = error instanceof Error ? error : new Error(String(error));
this.logger.warn(`[agent-runner] loop failed: ${err.message}`);
return { stopReason: 'error', error: err };
}
}
}

View file

@ -0,0 +1,19 @@
import { describe, expect, it } from 'vitest';
import { CLAUDE_CODE_PROVIDER_ENV_DENYLIST, createKtxClaudeCodeEnv } from './claude-code-env.js';
describe('createKtxClaudeCodeEnv', () => {
it('strips provider-routing credentials from the Claude Code child environment', () => {
const seeded = Object.fromEntries(CLAUDE_CODE_PROVIDER_ENV_DENYLIST.map((key) => [key, `${key}-value`]));
const env = createKtxClaudeCodeEnv({
...seeded,
PATH: '/usr/bin',
HOME: '/Users/test',
});
for (const key of CLAUDE_CODE_PROVIDER_ENV_DENYLIST) {
expect(env).not.toHaveProperty(key);
}
expect(env.PATH).toBe('/usr/bin');
expect(env.HOME).toBe('/Users/test');
});
});

View file

@ -0,0 +1,23 @@
export const CLAUDE_CODE_PROVIDER_ENV_DENYLIST = [
'ANTHROPIC_API_KEY',
'ANTHROPIC_AUTH_TOKEN',
'ANTHROPIC_BASE_URL',
'ANTHROPIC_MODEL',
'ANTHROPIC_VERTEX_PROJECT_ID',
'CLOUD_ML_REGION',
'GOOGLE_APPLICATION_CREDENTIALS',
'GOOGLE_CLOUD_PROJECT',
'AWS_ACCESS_KEY_ID',
'AWS_SECRET_ACCESS_KEY',
'AWS_SESSION_TOKEN',
'AWS_REGION',
'AWS_PROFILE',
'CLAUDE_CODE_USE_BEDROCK',
'CLAUDE_CODE_USE_VERTEX',
] as const;
const DENYLIST = new Set<string>(CLAUDE_CODE_PROVIDER_ENV_DENYLIST);
export function createKtxClaudeCodeEnv(env: NodeJS.ProcessEnv = process.env): Record<string, string | undefined> {
return Object.fromEntries(Object.entries(env).filter(([key]) => !DENYLIST.has(key)));
}

View file

@ -0,0 +1,17 @@
import { describe, expect, it } from 'vitest';
import { resolveClaudeCodeModel } from './claude-code-models.js';
describe('resolveClaudeCodeModel', () => {
it.each([
['sonnet', 'claude-sonnet-4-6'],
['opus', 'claude-opus-4-7'],
['haiku', 'claude-haiku-4-5'],
['claude-sonnet-4-6', 'claude-sonnet-4-6'],
])('maps %s to %s', (input, expected) => {
expect(resolveClaudeCodeModel(input)).toBe(expected);
});
it('rejects unsupported aliases', () => {
expect(() => resolveClaudeCodeModel('gpt-5')).toThrow('Unsupported Claude Code model');
});
});

View file

@ -0,0 +1,19 @@
const CLAUDE_CODE_MODEL_ALIASES: Record<string, string> = {
sonnet: 'claude-sonnet-4-6',
opus: 'claude-opus-4-7',
haiku: 'claude-haiku-4-5',
};
const FULL_MODEL_ID = /^claude-(sonnet|opus|haiku)-[0-9]+-[0-9]+$/;
export function resolveClaudeCodeModel(model: string): string {
const normalized = model.trim();
const alias = CLAUDE_CODE_MODEL_ALIASES[normalized];
if (alias) {
return alias;
}
if (FULL_MODEL_ID.test(normalized)) {
return normalized;
}
throw new Error(`Unsupported Claude Code model "${model}". Use sonnet, opus, haiku, or a claude-* model id.`);
}

View file

@ -0,0 +1,464 @@
import { describe, expect, it, vi } from 'vitest';
import { z } from 'zod';
import type { SDKMessage } from '@anthropic-ai/claude-agent-sdk';
import { ClaudeCodeKtxLlmRuntime, mapClaudeCodeStopReason, runClaudeCodeAuthProbe } from './claude-code-runtime.js';
async function* stream(messages: SDKMessage[]): AsyncGenerator<SDKMessage, void> {
for (const message of messages) {
yield message;
}
}
function initMessage(overrides: Partial<Extract<SDKMessage, { type: 'system'; subtype: 'init' }>> = {}): Extract<
SDKMessage,
{ type: 'system'; subtype: 'init' }
> {
return {
type: 'system',
subtype: 'init',
apiKeySource: 'none' as never, // pragma: allowlist secret
claude_code_version: '0.3.142',
cwd: '/tmp/project',
tools: [],
mcp_servers: [],
model: 'claude-sonnet-4-6',
permissionMode: 'dontAsk',
slash_commands: [],
output_style: 'default',
skills: [],
plugins: [],
uuid: '00000000-0000-4000-8000-000000000001',
session_id: 'session-id',
...overrides,
};
}
function resultMessage(overrides: Partial<Extract<SDKMessage, { type: 'result' }>> = {}): Extract<
SDKMessage,
{ type: 'result' }
> {
return {
type: 'result',
subtype: 'success',
duration_ms: 1,
duration_api_ms: 1,
is_error: false,
num_turns: 1,
result: 'ok',
stop_reason: null,
total_cost_usd: 0,
usage: {} as never,
modelUsage: {},
permission_denials: [],
errors: [],
uuid: '00000000-0000-4000-8000-000000000002',
session_id: 'session-id',
...overrides,
} as Extract<SDKMessage, { type: 'result' }>;
}
describe('ClaudeCodeKtxLlmRuntime', () => {
it('passes isolation options and scrubbed env to text generation', async () => {
const query = vi.fn((_input: any) => stream([initMessage(), resultMessage({ result: 'hello' })]));
const runtime = new ClaudeCodeKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'sonnet' },
query,
env: { ANTHROPIC_API_KEY: 'sk-ant-test', PATH: '/usr/bin' }, // pragma: allowlist secret
});
await expect(runtime.generateText({ role: 'default', prompt: 'say hello' })).resolves.toBe('hello');
expect(query).toHaveBeenCalledWith({
prompt: 'say hello',
options: expect.objectContaining({
cwd: '/tmp/project',
model: 'claude-sonnet-4-6',
maxTurns: 1,
settingSources: [],
skills: [],
plugins: [],
tools: [],
allowedTools: [],
permissionMode: 'dontAsk',
persistSession: false,
env: expect.not.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test' }),
}),
});
});
it('validates structured output with the caller schema', async () => {
const schema = z.object({ answer: z.string() });
const query = vi.fn((_input: any) => stream([initMessage(), resultMessage({ structured_output: { answer: 'yes' } })]));
const runtime = new ClaudeCodeKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'sonnet' },
query,
env: {},
});
await expect(runtime.generateObject({ role: 'default', prompt: 'json', schema })).resolves.toEqual({ answer: 'yes' });
expect(query.mock.calls[0][0].options.outputFormat).toMatchObject({
type: 'json_schema',
schema: expect.objectContaining({ type: 'object' }),
});
});
it('registers only exact KTX MCP tool ids and denies non-KTX tools', async () => {
const query = vi.fn((_input: any) =>
stream([
initMessage({ tools: ['mcp__ktx__load_skill'], mcp_servers: [{ name: 'ktx', status: 'connected' }] }),
{
type: 'assistant',
message: { role: 'assistant', content: [] },
parent_tool_use_id: null,
uuid: '00000000-0000-4000-8000-000000000003',
session_id: 'session-id',
} as unknown as SDKMessage,
resultMessage({ subtype: 'error_max_turns', is_error: true }),
]),
);
const runtime = new ClaudeCodeKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'sonnet' },
query,
env: {},
});
const onStepFinish = vi.fn();
await runtime.runAgentLoop({
modelRole: 'default',
systemPrompt: 'system',
userPrompt: 'user',
toolSet: {
load_skill: {
name: 'load_skill',
description: 'Load skill.',
inputSchema: z.object({ name: z.string() }),
execute: async () => ({ markdown: 'loaded' }),
},
},
stepBudget: 1,
telemetryTags: { operationName: 'test' },
onStepFinish,
});
const options = query.mock.calls[0][0].options;
expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']);
expect(await options.canUseTool('mcp__ktx__load_skill', {}, { signal: new AbortController().signal, toolUseID: '1' })).toEqual({
behavior: 'allow',
toolUseID: '1',
});
expect(await options.canUseTool('Bash', {}, { signal: new AbortController().signal, toolUseID: '2' })).toMatchObject({
behavior: 'deny',
toolUseID: '2',
});
expect(onStepFinish).toHaveBeenCalledWith({ stepIndex: 1, stepBudget: 1 });
});
it('treats host-discovered commands skills and agents as non-fatal init metadata for text and auth probe', async () => {
const hostDiscoveredInit = initMessage({
slash_commands: ['/help', '/compact', '/clear', '/user-command'],
skills: ['pdf', 'docx'],
agents: ['claude', 'Explore', 'general-purpose'],
});
const textQuery = vi.fn((_input: any) => stream([hostDiscoveredInit, resultMessage({ result: 'hello' })]));
const runtime = new ClaudeCodeKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'sonnet' },
query: textQuery,
env: { ANTHROPIC_API_KEY: 'sk-ant-test', PATH: '/usr/bin' }, // pragma: allowlist secret
});
await expect(runtime.generateText({ role: 'default', prompt: 'say hello' })).resolves.toBe('hello');
const textOptions = textQuery.mock.calls[0][0].options;
expect(textOptions).toMatchObject({
settingSources: [],
skills: [],
plugins: [],
tools: [],
allowedTools: [],
permissionMode: 'dontAsk',
persistSession: false,
env: expect.not.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test' }),
});
expect(textOptions.disallowedTools).toEqual(expect.arrayContaining(['Agent', 'Task', 'Bash']));
expect(await textOptions.canUseTool('Agent', {}, { signal: new AbortController().signal, toolUseID: 'agent' })).toMatchObject({
behavior: 'deny',
toolUseID: 'agent',
});
expect(await textOptions.canUseTool('Skill', {}, { signal: new AbortController().signal, toolUseID: 'skill' })).toMatchObject({
behavior: 'deny',
toolUseID: 'skill',
});
expect(
await textOptions.canUseTool('SlashCommand', {}, { signal: new AbortController().signal, toolUseID: 'slash' }),
).toMatchObject({
behavior: 'deny',
toolUseID: 'slash',
});
const probeQuery = vi.fn((_input: any) => stream([hostDiscoveredInit, resultMessage({ result: 'ok' })]));
await expect(
runClaudeCodeAuthProbe({
projectDir: '/tmp/project',
model: 'sonnet',
query: probeQuery,
env: { ANTHROPIC_AUTH_TOKEN: 'token', HOME: '/Users/test' },
}),
).resolves.toEqual({ ok: true });
expect(probeQuery.mock.calls[0][0].options).toMatchObject({
settingSources: [],
skills: [],
plugins: [],
tools: [],
allowedTools: [],
permissionMode: 'dontAsk',
persistSession: false,
env: expect.objectContaining({ HOME: '/Users/test' }),
});
expect(probeQuery.mock.calls[0][0].options.env).not.toEqual(
expect.objectContaining({ ANTHROPIC_AUTH_TOKEN: 'token' }),
);
});
it('allows host-discovered context during agent loops while requiring exact KTX MCP tools and servers', async () => {
const query = vi.fn((_input: any) =>
stream([
initMessage({
tools: ['mcp__ktx__load_skill'],
mcp_servers: [{ name: 'ktx', status: 'connected' }],
slash_commands: ['/help', '/compact', '/clear'],
skills: ['memory-agent', 'doc-reader'],
agents: ['claude', 'Plan', 'Explore'],
}),
{
type: 'assistant',
message: { role: 'assistant', content: [] },
parent_tool_use_id: null,
uuid: '00000000-0000-4000-8000-000000000006',
session_id: 'session-id',
} as unknown as SDKMessage,
resultMessage({ subtype: 'error_max_turns', is_error: true }),
]),
);
const runtime = new ClaudeCodeKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'sonnet' },
query,
env: {},
});
await expect(
runtime.runAgentLoop({
modelRole: 'default',
systemPrompt: 'system',
userPrompt: 'user',
toolSet: {
load_skill: {
name: 'load_skill',
description: 'Load skill.',
inputSchema: z.object({ name: z.string() }),
execute: async () => ({ markdown: 'loaded' }),
},
},
stepBudget: 1,
telemetryTags: { operationName: 'test' },
}),
).resolves.toEqual({ stopReason: 'budget' });
const options = query.mock.calls[0][0].options;
expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']);
expect(await options.canUseTool('mcp__ktx__load_skill', {}, { signal: new AbortController().signal, toolUseID: '1' })).toEqual({
behavior: 'allow',
toolUseID: '1',
});
expect(await options.canUseTool('Task', {}, { signal: new AbortController().signal, toolUseID: '2' })).toMatchObject({
behavior: 'deny',
toolUseID: '2',
});
expect(await options.canUseTool('Skill', {}, { signal: new AbortController().signal, toolUseID: '3' })).toMatchObject({
behavior: 'deny',
toolUseID: '3',
});
});
it('still rejects unexpected tools, missing KTX tools, plugins, and non-KTX MCP servers from init messages', async () => {
const query = vi.fn((_input: any) =>
stream([
initMessage({
tools: ['Bash'],
mcp_servers: [{ name: 'filesystem', status: 'connected' }],
plugins: [{ name: 'host-plugin', path: '/tmp/plugin' }],
}),
resultMessage({ result: 'hello' }),
]),
);
const runtime = new ClaudeCodeKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'sonnet' },
query,
env: {},
});
await expect(
runtime.generateText({
role: 'default',
prompt: 'say hello',
tools: {
load_skill: {
name: 'load_skill',
description: 'Load skill.',
inputSchema: z.object({ name: z.string() }),
execute: async () => ({ markdown: 'loaded' }),
},
},
}),
).rejects.toThrow(
/Claude Code runtime isolation failed: .*tools=Bash.*missing_tools=mcp__ktx__load_skill.*mcp_servers=filesystem.*plugins=host-plugin/,
);
});
it('passes scrubbed env to object generation and agent loops', async () => {
const schema = z.object({ answer: z.string() });
const objectQuery = vi.fn((_input: any) =>
stream([initMessage(), resultMessage({ structured_output: { answer: 'yes' } })]),
);
const objectRuntime = new ClaudeCodeKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'sonnet' },
query: objectQuery,
env: { ANTHROPIC_API_KEY: 'sk-ant-test', AWS_PROFILE: 'prod', PATH: '/usr/bin' }, // pragma: allowlist secret
});
await expect(objectRuntime.generateObject({ role: 'default', prompt: 'json', schema })).resolves.toEqual({
answer: 'yes',
});
expect(objectQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ PATH: '/usr/bin' }));
expect(objectQuery.mock.calls[0][0].options.env).not.toEqual(
expect.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test', AWS_PROFILE: 'prod' }), // pragma: allowlist secret
);
const agentQuery = vi.fn((_input: any) =>
stream([
initMessage({ tools: ['mcp__ktx__load_skill'], mcp_servers: [{ name: 'ktx', status: 'connected' }] }),
{
type: 'assistant',
message: { role: 'assistant', content: [] },
parent_tool_use_id: null,
uuid: '00000000-0000-4000-8000-000000000004',
session_id: 'session-id',
} as unknown as SDKMessage,
resultMessage({ subtype: 'error_max_turns', is_error: true }),
]),
);
const agentRuntime = new ClaudeCodeKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'sonnet' },
query: agentQuery,
env: { ANTHROPIC_AUTH_TOKEN: 'token', CLAUDE_CODE_USE_VERTEX: '1', HOME: '/Users/test' },
});
await agentRuntime.runAgentLoop({
modelRole: 'default',
systemPrompt: 'system',
userPrompt: 'user',
toolSet: {
load_skill: {
name: 'load_skill',
description: 'Load skill.',
inputSchema: z.object({ name: z.string() }),
execute: async () => ({ markdown: 'loaded' }),
},
},
stepBudget: 1,
telemetryTags: { operationName: 'test' },
});
expect(agentQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ HOME: '/Users/test' }));
expect(agentQuery.mock.calls[0][0].options.env).not.toEqual(
expect.objectContaining({ ANTHROPIC_AUTH_TOKEN: 'token', CLAUDE_CODE_USE_VERTEX: '1' }),
);
});
it('logs and ignores onStepFinish callback errors', async () => {
const query = vi.fn((_input: any) =>
stream([
initMessage(),
{
type: 'assistant',
message: { role: 'assistant', content: [] },
parent_tool_use_id: null,
uuid: '00000000-0000-4000-8000-000000000005',
session_id: 'session-id',
} as unknown as SDKMessage,
resultMessage({ subtype: 'success', terminal_reason: 'completed' }),
]),
);
const logger = {
debug: vi.fn(),
log: vi.fn(),
warn: vi.fn(),
error: vi.fn(),
};
const runtime = new ClaudeCodeKtxLlmRuntime({
projectDir: '/tmp/project',
modelSlots: { default: 'sonnet' },
query,
env: {},
logger,
});
await expect(
runtime.runAgentLoop({
modelRole: 'default',
systemPrompt: 'system',
userPrompt: 'user',
toolSet: {},
stepBudget: 1,
telemetryTags: { operationName: 'test' },
onStepFinish: async () => {
throw new Error('callback exploded');
},
}),
).resolves.toEqual({ stopReason: 'natural' });
expect(logger.warn).toHaveBeenCalledWith(expect.stringContaining('callback exploded'));
});
it('maps max-turn terminal reasons to budget', () => {
expect(mapClaudeCodeStopReason(resultMessage({ subtype: 'error_max_turns' }))).toBe('budget');
expect(mapClaudeCodeStopReason(resultMessage({ terminal_reason: 'max_turns' }))).toBe('budget');
expect(mapClaudeCodeStopReason(resultMessage({ stop_reason: 'max_turns' }))).toBe('budget');
expect(mapClaudeCodeStopReason(resultMessage({ subtype: 'success', terminal_reason: 'completed' }))).toBe('natural');
expect(mapClaudeCodeStopReason(resultMessage({ subtype: 'error_during_execution' }))).toBe('error');
});
it('auth probe uses isolation options and a scrubbed env', async () => {
const query = vi.fn((_input: any) => stream([initMessage(), resultMessage({ result: 'ok' })]));
await expect(
runClaudeCodeAuthProbe({ projectDir: '/tmp/project', model: 'sonnet', query, env: { ANTHROPIC_API_KEY: 'sk-ant-test' } }), // pragma: allowlist secret
).resolves.toEqual({ ok: true });
expect(query.mock.calls[0][0].options).toMatchObject({
settingSources: [],
skills: [],
plugins: [],
tools: [],
allowedTools: [],
persistSession: false,
env: expect.not.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test' }),
});
});
it('reports unsupported Claude Code models without framing them as auth failures', async () => {
await expect(
runClaudeCodeAuthProbe({
projectDir: '/tmp/project',
model: 'gpt-5',
query: vi.fn(),
env: {},
}),
).resolves.toEqual({
ok: false,
message: 'Unsupported Claude Code model "gpt-5". Use sonnet, opus, haiku, or a claude-* model id.',
});
});
});

View file

@ -0,0 +1,327 @@
import {
createSdkMcpServer,
query as defaultQuery,
type Options,
type SDKMessage,
type SDKResultMessage,
} from '@anthropic-ai/claude-agent-sdk';
import { z } from 'zod';
import { noopLogger, type KtxLogger } from '../core/index.js';
import { createKtxClaudeCodeEnv } from './claude-code-env.js';
import { resolveClaudeCodeModel } from './claude-code-models.js';
import { createClaudeSdkTools, mcpToolIds } from './runtime-tools.js';
import type {
KtxGenerateObjectInput,
KtxGenerateTextInput,
KtxLlmRuntimePort,
KtxRuntimeToolSet,
RunLoopParams,
RunLoopResult,
RunLoopStopReason,
} from './runtime-port.js';
type QueryFn = (params: Parameters<typeof defaultQuery>[0]) => AsyncIterable<SDKMessage>;
export interface ClaudeCodeKtxLlmRuntimeDeps {
projectDir: string;
modelSlots: { default: string } & Partial<Record<string, string>>;
query?: QueryFn;
env?: NodeJS.ProcessEnv;
logger?: KtxLogger;
}
const BUILTIN_TOOLS = [
'Agent',
'Task',
'AskUserQuestion',
'Bash',
'Read',
'Edit',
'Write',
'Glob',
'Grep',
'WebFetch',
'WebSearch',
'TodoWrite',
];
function isResult(message: SDKMessage): message is SDKResultMessage {
return message.type === 'result';
}
function resultError(result: SDKResultMessage): Error | undefined {
if (result.subtype === 'success') {
return undefined;
}
const details = result.errors.length > 0 ? `: ${result.errors.join('; ')}` : '';
return new Error(`Claude Code query failed (${result.subtype})${details}`);
}
export function mapClaudeCodeStopReason(result: SDKResultMessage): RunLoopStopReason {
if (result.subtype === 'error_max_turns') {
return 'budget';
}
if (result.terminal_reason === 'max_turns' || result.stop_reason === 'max_turns') {
return 'budget';
}
if (result.subtype === 'success') {
return result.terminal_reason && result.terminal_reason !== 'completed' ? 'error' : 'natural';
}
return 'error';
}
function jsonSchema(schema: z.ZodType): Record<string, unknown> {
return z.toJSONSchema(schema, { target: 'draft-7' }) as Record<string, unknown>;
}
function modelForRole(modelSlots: ClaudeCodeKtxLlmRuntimeDeps['modelSlots'], role: string): string {
return resolveClaudeCodeModel(modelSlots[role] ?? modelSlots.default);
}
function assertInitIsolation(
message: SDKMessage,
allowedToolIds: Set<string>,
expectedMcpServerNames: Set<string>,
): void {
if (message.type !== 'system' || message.subtype !== 'init') {
return;
}
const activeToolIds = new Set(message.tools);
const unexpectedTools = message.tools.filter((toolName) => !allowedToolIds.has(toolName));
const missingTools = [...allowedToolIds].filter((toolName) => !activeToolIds.has(toolName));
const activeMcpServerNames = message.mcp_servers.map((server) => server.name);
const unexpectedMcpServers = activeMcpServerNames.filter((name) => !expectedMcpServerNames.has(name));
const missingMcpServers = [...expectedMcpServerNames].filter((name) => !activeMcpServerNames.includes(name));
const unexpectedPlugins = message.plugins.map((plugin) => plugin.name);
if (
unexpectedTools.length > 0 ||
missingTools.length > 0 ||
unexpectedMcpServers.length > 0 ||
missingMcpServers.length > 0 ||
unexpectedPlugins.length > 0
) {
throw new Error(
`Claude Code runtime isolation failed: tools=${unexpectedTools.join(',') || '(none)'} missing_tools=${
missingTools.join(',') || '(none)'
} mcp_servers=${unexpectedMcpServers.join(',') || '(none)'} missing_mcp_servers=${
missingMcpServers.join(',') || '(none)'
} plugins=${unexpectedPlugins.join(',') || '(none)'} host_slash_commands=${
message.slash_commands.length
} host_skills=${message.skills.length} host_agents=${message.agents?.join(',') || '(none)'}`,
);
}
}
function expectedMcpServerNames(tools: KtxRuntimeToolSet | undefined): Set<string> {
return tools && Object.keys(tools).length > 0 ? new Set(['ktx']) : new Set();
}
function baseOptions(input: {
projectDir: string;
model: string;
env: NodeJS.ProcessEnv | undefined;
maxTurns: number;
tools?: KtxRuntimeToolSet;
}): Options {
const toolIds = mcpToolIds(input.tools ?? {});
const allowedToolIds = new Set(toolIds);
return {
cwd: input.projectDir,
model: input.model,
maxTurns: input.maxTurns,
settingSources: [],
skills: [],
plugins: [],
tools: [],
allowedTools: toolIds,
disallowedTools: BUILTIN_TOOLS,
canUseTool: async (toolName, _toolInput, options) =>
allowedToolIds.has(toolName)
? { behavior: 'allow', toolUseID: options.toolUseID }
: {
behavior: 'deny',
message: `KTX claude-code runtime only permits current KTX MCP tools; denied ${toolName}.`,
toolUseID: options.toolUseID,
},
permissionMode: 'dontAsk',
persistSession: false,
env: createKtxClaudeCodeEnv(input.env),
...(input.tools && Object.keys(input.tools).length > 0
? { mcpServers: { ktx: createSdkMcpServer({ name: 'ktx', tools: createClaudeSdkTools(input.tools) }) } }
: {}),
};
}
async function collectResult(params: {
query: QueryFn;
prompt: string;
options: Options;
allowedToolIds: Set<string>;
expectedMcpServerNames: Set<string>;
onAssistantTurn?: () => Promise<void>;
}): Promise<SDKResultMessage> {
let result: SDKResultMessage | undefined;
for await (const message of params.query({ prompt: params.prompt, options: params.options })) {
assertInitIsolation(message, params.allowedToolIds, params.expectedMcpServerNames);
if (message.type === 'assistant' && message.parent_tool_use_id === null) {
await params.onAssistantTurn?.();
}
if (isResult(message)) {
result = message;
}
}
if (!result) {
throw new Error('Claude Code query returned no result message');
}
return result;
}
export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
private readonly runQuery: QueryFn;
private readonly logger: KtxLogger;
constructor(private readonly deps: ClaudeCodeKtxLlmRuntimeDeps) {
this.runQuery = deps.query ?? defaultQuery;
this.logger = deps.logger ?? noopLogger;
}
async generateText(input: KtxGenerateTextInput): Promise<string> {
const options = baseOptions({
projectDir: this.deps.projectDir,
model: modelForRole(this.deps.modelSlots, input.role),
env: this.deps.env,
maxTurns: 1,
tools: input.tools,
});
const result = await collectResult({
query: this.runQuery,
prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'),
options,
allowedToolIds: new Set(mcpToolIds(input.tools ?? {})),
expectedMcpServerNames: expectedMcpServerNames(input.tools),
});
const error = resultError(result);
if (error) {
throw error;
}
if (result.subtype !== 'success') {
throw new Error(`Claude Code query failed (${result.subtype})`);
}
return result.result;
}
async generateObject<TOutput, TSchema extends z.ZodType<TOutput>>(
input: KtxGenerateObjectInput<TOutput, TSchema>,
): Promise<TOutput> {
const options = {
...baseOptions({
projectDir: this.deps.projectDir,
model: modelForRole(this.deps.modelSlots, input.role),
env: this.deps.env,
maxTurns: 1,
tools: input.tools,
}),
outputFormat: { type: 'json_schema' as const, schema: jsonSchema(input.schema as z.ZodType) },
};
const result = await collectResult({
query: this.runQuery,
prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'),
options,
allowedToolIds: new Set(mcpToolIds(input.tools ?? {})),
expectedMcpServerNames: expectedMcpServerNames(input.tools),
});
const error = resultError(result);
if (error) {
throw error;
}
if (result.subtype !== 'success') {
throw new Error(`Claude Code query failed (${result.subtype})`);
}
return (input.schema as z.ZodType<TOutput>).parse(result.structured_output);
}
async runAgentLoop(params: RunLoopParams): Promise<RunLoopResult> {
let stepIndex = 0;
try {
const options = baseOptions({
projectDir: this.deps.projectDir,
model: modelForRole(this.deps.modelSlots, params.modelRole),
env: this.deps.env,
maxTurns: params.stepBudget,
tools: params.toolSet,
});
const result = await collectResult({
query: this.runQuery,
prompt: params.userPrompt,
options: { ...options, systemPrompt: params.systemPrompt },
allowedToolIds: new Set(mcpToolIds(params.toolSet)),
expectedMcpServerNames: expectedMcpServerNames(params.toolSet),
onAssistantTurn: async () => {
stepIndex += 1;
if (!params.onStepFinish) {
return;
}
try {
await params.onStepFinish({ stepIndex, stepBudget: params.stepBudget });
} catch (error) {
this.logger.warn(
`[claude-code-runner] onStepFinish callback threw; ignoring: ${
error instanceof Error ? error.message : String(error)
}`,
);
}
},
});
const stopReason = mapClaudeCodeStopReason(result);
const error = resultError(result);
return { stopReason, ...(stopReason === 'error' && error ? { error } : {}) };
} catch (error) {
const err = error instanceof Error ? error : new Error(String(error));
return { stopReason: 'error', error: err };
}
}
}
export async function runClaudeCodeAuthProbe(input: {
projectDir: string;
model: string;
query?: QueryFn;
env?: NodeJS.ProcessEnv;
}): Promise<{ ok: true } | { ok: false; message: string }> {
let model: string;
try {
model = resolveClaudeCodeModel(input.model);
} catch (error) {
return {
ok: false,
message: error instanceof Error ? error.message : String(error),
};
}
try {
const options = baseOptions({
projectDir: input.projectDir,
model,
env: input.env,
maxTurns: 1,
});
const result = await collectResult({
query: input.query ?? defaultQuery,
prompt: 'Reply with exactly: ok',
options,
allowedToolIds: new Set(),
expectedMcpServerNames: new Set(),
});
const error = resultError(result);
if (error) {
throw error;
}
return { ok: true };
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
return {
ok: false,
message: `Claude Code authentication is not usable. Authenticate Claude Code locally with the Claude Code CLI, then rerun setup or the command. ${message}`,
};
}
}

View file

@ -1,85 +1,12 @@
import { KtxMessageBuilder, splitKtxSystemMessages, type KtxLlmProvider, type KtxModelRole } from '@ktx/llm';
import { generateText, Output, type FlexibleSchema, type ToolSet } from 'ai';
import type { z } from 'zod';
import type { KtxGenerateObjectInput, KtxGenerateTextInput, KtxLlmRuntimePort } from './runtime-port.js';
type GenerateTextInput = Parameters<typeof generateText>[0];
type GenerateTextFn = (input: GenerateTextInput) => Promise<{ text?: string; output?: unknown }>;
function hasTools(tools: ToolSet): boolean {
return Object.keys(tools).length > 0;
export async function generateKtxText(input: KtxGenerateTextInput & { runtime: KtxLlmRuntimePort }): Promise<string> {
return input.runtime.generateText(input);
}
interface GenerateKtxTextInput {
llmProvider: KtxLlmProvider;
role: KtxModelRole;
prompt: string;
system?: string;
tools?: ToolSet;
temperature?: number;
generateText?: GenerateTextFn;
}
export async function generateKtxText(input: GenerateKtxTextInput): Promise<string> {
const model = input.llmProvider.getModel(input.role);
if ((model as { provider?: string }).provider === 'deterministic') {
return `Deterministic description for ${input.prompt.slice(0, 64).trim() || 'data source'}`;
}
const built = new KtxMessageBuilder(input.llmProvider).wrapSimple({
system: input.system,
messages: [{ role: 'user', content: input.prompt }],
tools: input.tools ?? {},
model,
});
const split = splitKtxSystemMessages(built.messages);
const result = await (input.generateText ?? generateText)({
model,
temperature: input.temperature ?? 0,
...(split.system ? { system: split.system } : {}),
messages: split.messages,
tools: built.tools as ToolSet,
...(hasTools(built.tools as ToolSet)
? {
experimental_repairToolCall: input.llmProvider.repairToolCallHandler({
source: `ktx-${input.role}`,
}),
}
: {}),
});
if (typeof result.text !== 'string') {
throw new Error('KTX LLM text generation returned no text');
}
return result.text;
}
export async function generateKtxObject<TOutput, TSchema>(
input: GenerateKtxTextInput & { schema: TSchema },
export async function generateKtxObject<TOutput, TSchema extends z.ZodType<TOutput>>(
input: KtxGenerateObjectInput<TOutput, TSchema> & { runtime: KtxLlmRuntimePort },
): Promise<TOutput> {
const model = input.llmProvider.getModel(input.role);
const built = new KtxMessageBuilder(input.llmProvider).wrapSimple({
system: input.system,
messages: [{ role: 'user', content: input.prompt }],
tools: input.tools ?? {},
model,
});
const split = splitKtxSystemMessages(built.messages);
const result = await (input.generateText ?? generateText)({
model,
temperature: input.temperature ?? 0,
...(split.system ? { system: split.system } : {}),
messages: split.messages,
tools: built.tools as ToolSet,
...(hasTools(built.tools as ToolSet)
? {
experimental_repairToolCall: input.llmProvider.repairToolCallHandler({
source: `ktx-${input.role}`,
}),
}
: {}),
output: Output.object({
schema: input.schema as FlexibleSchema<TOutput>,
}),
});
if (result.output == null) {
throw new Error('KTX LLM object generation returned no output');
}
return result.output as TOutput;
return input.runtime.generateObject(input);
}

View file

@ -1,5 +1,31 @@
export { KtxIngestEmbeddingPortAdapter, KtxScanEmbeddingPortAdapter } from './embedding-port.js';
export { AiSdkKtxLlmRuntime } from './ai-sdk-runtime.js';
export type { AgentTelemetryPort, AiSdkKtxLlmRuntimeDeps } from './ai-sdk-runtime.js';
export { createKtxClaudeCodeEnv, CLAUDE_CODE_PROVIDER_ENV_DENYLIST } from './claude-code-env.js';
export { resolveClaudeCodeModel } from './claude-code-models.js';
export { ClaudeCodeKtxLlmRuntime, mapClaudeCodeStopReason, runClaudeCodeAuthProbe } from './claude-code-runtime.js';
export { generateKtxObject, generateKtxText } from './generation.js';
export type {
AgentRunnerPort,
KtxGenerateObjectInput,
KtxGenerateTextInput,
KtxLlmRuntimePort,
KtxRuntimeToolDescriptor,
KtxRuntimeToolOutput,
KtxRuntimeToolSet,
RunLoopParams,
RunLoopResult,
RunLoopStepInfo,
RunLoopStopReason,
} from './runtime-port.js';
export { RuntimeAgentRunner } from './runtime-port.js';
export {
createAiSdkToolSet,
createClaudeSdkTools,
createRuntimeToolDescriptorFromAiTool,
createRuntimeToolSetFromAiSdkTools,
normalizeKtxRuntimeToolOutput,
} from './runtime-tools.js';
export type {
KtxLlmDebugProviderOptionsEntry,
KtxLlmDebugRequest,
@ -15,6 +41,7 @@ export {
MANAGED_SENTENCE_TRANSFORMERS_BASE_URL_ENV,
createLocalKtxEmbeddingProviderFromConfig,
createLocalKtxLlmProviderFromConfig,
createLocalKtxLlmRuntimeFromConfig,
resolveLocalKtxEmbeddingConfig,
resolveLocalKtxLlmConfig,
} from './local-config.js';

View file

@ -9,11 +9,17 @@ import {
} from '@ktx/llm';
import { resolveKtxConfigReference } from '../core/config-reference.js';
import type { KtxProjectEmbeddingConfig, KtxProjectLlmConfig } from '../project/config.js';
import { AiSdkKtxLlmRuntime } from './ai-sdk-runtime.js';
import { ClaudeCodeKtxLlmRuntime } from './claude-code-runtime.js';
import type { KtxLlmRuntimePort } from './runtime-port.js';
interface LocalConfigDeps {
env?: NodeJS.ProcessEnv;
projectDir?: string;
createKtxLlmProvider?: typeof createKtxLlmProvider;
createKtxEmbeddingProvider?: typeof createKtxEmbeddingProvider;
createClaudeCodeRuntime?: (deps: ConstructorParameters<typeof ClaudeCodeKtxLlmRuntime>[0]) => KtxLlmRuntimePort;
createAiSdkRuntime?: (deps: { llmProvider: KtxLlmProvider }) => KtxLlmRuntimePort;
}
export const MANAGED_SENTENCE_TRANSFORMERS_BASE_URL = 'managed:local-embeddings';
@ -106,7 +112,33 @@ export function createLocalKtxLlmProviderFromConfig(
deps: LocalConfigDeps = {},
): KtxLlmProvider | null {
const resolved = resolveLocalKtxLlmConfig(config, deps.env ?? process.env);
return resolved ? (deps.createKtxLlmProvider ?? createKtxLlmProvider)(resolved) : null;
if (!resolved || resolved.backend === 'claude-code') {
return null;
}
return (deps.createKtxLlmProvider ?? createKtxLlmProvider)(resolved);
}
export function createLocalKtxLlmRuntimeFromConfig(
config: KtxProjectLlmConfig,
deps: LocalConfigDeps = {},
): KtxLlmRuntimePort | null {
const resolved = resolveLocalKtxLlmConfig(config, deps.env ?? process.env);
if (!resolved) {
return null;
}
if (resolved.backend === 'claude-code') {
const projectDir = deps.projectDir;
if (!projectDir) {
throw new Error('projectDir is required when creating the claude-code LLM runtime');
}
return (deps.createClaudeCodeRuntime ?? ((runtimeDeps) => new ClaudeCodeKtxLlmRuntime(runtimeDeps)))({
projectDir,
modelSlots: resolved.modelSlots,
env: deps.env,
});
}
const llmProvider = (deps.createKtxLlmProvider ?? createKtxLlmProvider)(resolved);
return (deps.createAiSdkRuntime ?? ((runtimeDeps) => new AiSdkKtxLlmRuntime(runtimeDeps)))({ llmProvider });
}
function resolveSentenceTransformersBaseUrl(

View file

@ -0,0 +1,25 @@
import { describe, expect, it, vi } from 'vitest';
import { createLocalKtxLlmProviderFromConfig, createLocalKtxLlmRuntimeFromConfig } from './local-config.js';
describe('local KTX LLM runtime config', () => {
it('creates a Claude Code runtime for claude-code backend without creating an AI SDK provider', () => {
const runtime = createLocalKtxLlmRuntimeFromConfig(
{
provider: { backend: 'claude-code' },
models: { default: 'sonnet', triage: 'haiku' },
},
{ env: {}, projectDir: '/tmp/project', createClaudeCodeRuntime: vi.fn((deps) => ({ deps }) as never) },
);
expect(runtime).toMatchObject({ deps: expect.objectContaining({ projectDir: '/tmp/project' }) });
});
it('returns null from the AI SDK provider factory for claude-code backend', () => {
expect(
createLocalKtxLlmProviderFromConfig({
provider: { backend: 'claude-code' },
models: { default: 'sonnet' },
}),
).toBeNull();
});
});

View file

@ -0,0 +1,75 @@
import type { KtxModelRole } from '@ktx/llm';
import type { z } from 'zod';
export interface KtxRuntimeToolOutput<TOutput = unknown> {
markdown: string;
structured?: TOutput;
}
export interface KtxRuntimeToolDescriptor<TInput = unknown, TOutput = unknown> {
name: string;
description: string;
inputSchema: z.ZodObject<z.ZodRawShape>;
execute(input: TInput): Promise<KtxRuntimeToolOutput<TOutput>>;
}
export type KtxRuntimeToolSet = Record<string, KtxRuntimeToolDescriptor>;
export type RunLoopStopReason = 'budget' | 'natural' | 'error';
export interface RunLoopStepInfo {
stepIndex: number;
stepBudget: number;
}
export interface RunLoopParams {
modelRole: KtxModelRole;
systemPrompt: string;
userPrompt: string;
toolSet: KtxRuntimeToolSet;
stepBudget: number;
telemetryTags: Record<string, string>;
onStepFinish?: (info: RunLoopStepInfo) => void | Promise<void>;
}
export interface RunLoopResult {
stopReason: RunLoopStopReason;
error?: Error;
}
export interface KtxGenerateTextInput {
role: KtxModelRole;
prompt: string;
system?: string;
tools?: KtxRuntimeToolSet;
temperature?: number;
}
export interface KtxGenerateObjectInput<TOutput, TSchema extends z.ZodType<TOutput>> {
role: KtxModelRole;
prompt: string;
system?: string;
tools?: KtxRuntimeToolSet;
temperature?: number;
schema: TSchema;
}
export interface KtxLlmRuntimePort {
generateText(input: KtxGenerateTextInput): Promise<string>;
generateObject<TOutput, TSchema extends z.ZodType<TOutput>>(
input: KtxGenerateObjectInput<TOutput, TSchema>,
): Promise<TOutput>;
runAgentLoop(params: RunLoopParams): Promise<RunLoopResult>;
}
export interface AgentRunnerPort {
runLoop(params: RunLoopParams): Promise<RunLoopResult>;
}
export class RuntimeAgentRunner implements AgentRunnerPort {
constructor(private readonly runtime: KtxLlmRuntimePort) {}
runLoop(params: RunLoopParams): Promise<RunLoopResult> {
return this.runtime.runAgentLoop(params);
}
}

View file

@ -0,0 +1,43 @@
import { describe, expect, it, vi } from 'vitest';
import { z } from 'zod';
import { createAiSdkToolSet, createClaudeSdkTools, normalizeKtxRuntimeToolOutput } from './runtime-tools.js';
import type { KtxRuntimeToolDescriptor } from './runtime-port.js';
describe('runtime tool descriptors', () => {
const descriptor: KtxRuntimeToolDescriptor<{ id: string }, { ok: boolean }> = {
name: 'read_thing',
description: 'Read one thing.',
inputSchema: z.object({ id: z.string() }),
execute: vi.fn(async (input) => ({
markdown: `Read ${input.id}`,
structured: { ok: true },
})),
};
it('normalizes string and object tool outputs into markdown plus optional structured payload', () => {
expect(normalizeKtxRuntimeToolOutput('plain text')).toEqual({ markdown: 'plain text' });
expect(normalizeKtxRuntimeToolOutput({ markdown: 'shown', structured: { id: 1 } })).toEqual({
markdown: 'shown',
structured: { id: 1 },
});
expect(normalizeKtxRuntimeToolOutput({ name: 'skill', content: 'body' })).toEqual({
markdown: '```json\n{\n "name": "skill",\n "content": "body"\n}\n```',
structured: { name: 'skill', content: 'body' },
});
});
it('builds AI SDK tools that expose markdown to the model', async () => {
const tools = createAiSdkToolSet({ read_thing: descriptor });
const output = await tools.read_thing.execute?.({ id: 'a' }, { toolCallId: 'call-1', messages: [] } as never);
const modelOutput = tools.read_thing.toModelOutput?.({ output } as never);
expect(modelOutput).toEqual({ type: 'text', value: 'Read a' });
});
it('builds Claude SDK tools that return text content only', async () => {
const tools = createClaudeSdkTools({ read_thing: descriptor });
const result = await tools[0].handler({ id: 'b' } as never, {});
expect(result).toEqual({ content: [{ type: 'text', text: 'Read b' }] });
});
});

View file

@ -0,0 +1,91 @@
import { tool as aiTool, type Tool, type ToolSet } from 'ai';
import { tool as claudeTool, type SdkMcpToolDefinition } from '@anthropic-ai/claude-agent-sdk';
import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
import { z } from 'zod';
import type { KtxRuntimeToolDescriptor, KtxRuntimeToolOutput, KtxRuntimeToolSet } from './runtime-port.js';
function isRuntimeOutput(value: unknown): value is KtxRuntimeToolOutput {
return Boolean(
value &&
typeof value === 'object' &&
'markdown' in value &&
typeof (value as { markdown?: unknown }).markdown === 'string',
);
}
export function normalizeKtxRuntimeToolOutput(value: unknown): KtxRuntimeToolOutput {
if (isRuntimeOutput(value)) {
return 'structured' in value ? { markdown: value.markdown, structured: value.structured } : { markdown: value.markdown };
}
if (typeof value === 'string') {
return { markdown: value };
}
return {
markdown: `\`\`\`json\n${JSON.stringify(value, null, 2)}\n\`\`\``,
structured: value,
};
}
function assertObjectSchema(name: string, schema: z.ZodType): asserts schema is z.ZodObject<z.ZodRawShape> {
if (!(schema instanceof z.ZodObject)) {
throw new Error(`KTX runtime tool "${name}" must use z.object input schema for claude-code`);
}
}
export function createAiSdkToolSet(tools: KtxRuntimeToolSet = {}): ToolSet {
return Object.fromEntries(
Object.entries(tools).map(([name, descriptor]) => [
name,
aiTool({
description: descriptor.description,
inputSchema: descriptor.inputSchema,
execute: async (input) => descriptor.execute(input),
toModelOutput: ({ output }) => {
const normalized = normalizeKtxRuntimeToolOutput(output);
return { type: 'text', value: normalized.markdown };
},
}),
]),
);
}
export function createClaudeSdkTools(tools: KtxRuntimeToolSet = {}): Array<SdkMcpToolDefinition<z.ZodRawShape>> {
return Object.values(tools).map((descriptor) => {
assertObjectSchema(descriptor.name, descriptor.inputSchema);
return claudeTool(
descriptor.name,
descriptor.description,
descriptor.inputSchema.shape,
async (input): Promise<CallToolResult> => {
const normalized = normalizeKtxRuntimeToolOutput(await descriptor.execute(input));
return { content: [{ type: 'text', text: normalized.markdown }] };
},
);
});
}
export function mcpToolIds(tools: KtxRuntimeToolSet = {}): string[] {
return Object.keys(tools).map((name) => `mcp__ktx__${name}`);
}
export function createRuntimeToolDescriptorFromAiTool(name: string, aiSdkTool: Tool): KtxRuntimeToolDescriptor {
return {
name,
description: aiSdkTool.description ?? '',
inputSchema: aiSdkTool.inputSchema as KtxRuntimeToolDescriptor['inputSchema'],
execute: async (input) => {
if (typeof aiSdkTool.execute !== 'function') {
throw new Error(`KTX runtime tool "${name}" has no execute function`);
}
return normalizeKtxRuntimeToolOutput(
await aiSdkTool.execute(input as never, { toolCallId: `runtime-${name}` } as never),
);
},
};
}
export function createRuntimeToolSetFromAiSdkTools(tools: ToolSet = {}): KtxRuntimeToolSet {
return Object.fromEntries(
Object.entries(tools).map(([name, aiSdkTool]) => [name, createRuntimeToolDescriptorFromAiTool(name, aiSdkTool as Tool)]),
);
}

View file

@ -1,13 +1,17 @@
import { join } from 'node:path';
import { fileURLToPath } from 'node:url';
import type { KtxLlmProvider } from '@ktx/llm';
import YAML from 'yaml';
import { AgentRunnerService } from '../agent/index.js';
import { localConnectionInfoFromConfig } from '../connections/index.js';
import type { KtxEmbeddingPort, KtxFileStorePort, KtxFileWriteResult } from '../core/index.js';
import { type KtxLogger, noopLogger, SessionWorktreeService } from '../core/index.js';
import type { KtxSemanticLayerComputePort } from '../daemon/index.js';
import { createLocalKtxLlmProviderFromConfig } from '../llm/index.js';
import {
createLocalKtxLlmRuntimeFromConfig,
RuntimeAgentRunner,
type AgentRunnerPort,
type KtxLlmRuntimePort,
type KtxRuntimeToolSet,
} from '../llm/index.js';
import type { KtxLocalProject } from '../project/index.js';
import { PromptService } from '../prompts/index.js';
import { SkillsRegistryService } from '../skills/index.js';
@ -63,8 +67,8 @@ const LOCAL_AUTHOR = { name: 'KTX Local', email: 'local@ktx.local' };
const LOCAL_SHAPE_WARNING = 'Local memory ingest validates semantic-layer YAML shape only.';
export interface CreateLocalProjectMemoryIngestOptions {
llmProvider?: KtxLlmProvider;
agentRunner?: AgentRunnerService;
llmRuntime?: KtxLlmRuntimePort;
agentRunner?: AgentRunnerPort;
memoryModel?: string;
semanticLayerCompute?: KtxSemanticLayerComputePort;
queryExecutor?: { execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise<KtxQueryResult> };
@ -89,7 +93,8 @@ export function createLocalProjectMemoryIngest(
const slSearchService = new SlSearchService(embedding, slSourcesRepository, logger);
const wikiService = new KnowledgeWikiService(rootFileStore, embedding, knowledgeIndex, project.git, logger);
const authorResolver = new LocalAuthorResolver();
const llmProvider = options.llmProvider ?? createLocalKtxLlmProviderFromConfig(project.config.llm);
const llmRuntime =
options.llmRuntime ?? createLocalKtxLlmRuntimeFromConfig(project.config.llm, { projectDir: project.projectDir });
const toolsetFactory = new LocalMemoryToolsetFactory({
project,
embedding,
@ -104,10 +109,7 @@ export function createLocalProjectMemoryIngest(
});
const agentRunner =
options.agentRunner ??
new AgentRunnerService({
llmProvider: requireLlmProvider(llmProvider),
logger,
});
new RuntimeAgentRunner(requireLlmRuntime(llmRuntime));
const memoryAgent = new MemoryAgentService({
settings: {
knowledge: { userScopedKnowledgeEnabled: false },
@ -143,11 +145,11 @@ export function createLocalProjectMemoryIngest(
});
}
function requireLlmProvider(provider: KtxLlmProvider | null | undefined): KtxLlmProvider {
if (!provider) {
function requireLlmRuntime(runtime: KtxLlmRuntimePort | null | undefined): KtxLlmRuntimePort {
if (!runtime) {
throw new Error('createLocalProjectMemoryIngest requires llm.provider.backend or an injected agentRunner');
}
return provider;
return runtime;
}
class LocalMemoryFileStore implements MemoryFileStorePort {
@ -386,8 +388,8 @@ class LocalShapeOnlySlValidator implements SlValidatorPort<SlValidationDeps> {
class LocalMemoryToolSet implements MemoryToolSetLike {
constructor(private readonly tools: BaseTool[]) {}
toAiSdkTools(context: ToolContext) {
return Object.fromEntries(this.tools.map((tool) => [tool.name, tool.toAiSdkTool(context)]));
toRuntimeTools(context: ToolContext): KtxRuntimeToolSet {
return Object.fromEntries(this.tools.map((tool) => [tool.name, tool.toRuntimeTool(context)]));
}
}

View file

@ -1,3 +1,6 @@
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
// Module-level mock for 'ai' so generateText is a stub. This file is separate from
@ -15,7 +18,6 @@ import { MemoryAgentService } from './memory-agent.service.js';
interface BuiltMocks {
appSettings: any;
llmProvider: any;
prompt: any;
eventTracker: any;
telemetry: any;
@ -63,7 +65,6 @@ const buildMocks = (overrides: Partial<BuiltMocks> = {}): BuiltMocks => {
llm: { memoryIngestionModel: 'test-model' },
},
},
llmProvider: { getModel: vi.fn().mockReturnValue({}) },
prompt: { loadPrompt: vi.fn().mockResolvedValue('base framing') },
eventTracker: { trackEvent: vi.fn(), createTelemetryIntegration: vi.fn().mockReturnValue(undefined) },
telemetry: {
@ -124,11 +125,11 @@ const buildMocks = (overrides: Partial<BuiltMocks> = {}): BuiltMocks => {
slValidator: { validateSingleSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }) },
toolsetFactory: {
createIngestWuToolset: vi.fn().mockReturnValue({
toAiSdkTools: vi.fn().mockReturnValue({}),
toRuntimeTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
}),
createToolset: vi.fn().mockReturnValue({
toAiSdkTools: vi.fn().mockReturnValue({}),
toRuntimeTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
}),
},
@ -241,6 +242,39 @@ describe('MemoryAgentService.ingest — session-branch orchestration', () => {
expect(result.commitHash).toBe('cafebabe');
});
it('normalizes load_skill output to markdown while preserving structured payload', async () => {
const tempDir = await mkdtemp(join(tmpdir(), 'ktx-memory-skill-'));
const skillDir = join(tempDir, 'memory_agent');
await mkdir(skillDir, { recursive: true });
await writeFile(join(skillDir, 'SKILL.md'), '---\nname: memory_agent\n---\nSkill body', 'utf-8');
try {
const agentRunner = {
runLoop: vi.fn(async (params: any) => {
const result = await params.toolSet.load_skill.execute({ name: 'memory_agent' });
expect(result.markdown).toContain('memory_agent');
expect(result.structured).toMatchObject({ name: 'memory_agent' });
return { stopReason: 'natural' as const };
}),
};
const mocks = buildMocks({
agentRunner,
skillsRegistry: {
listSkills: vi.fn().mockResolvedValue([{ name: 'memory_agent', path: skillDir }]),
buildSkillsPrompt: vi.fn().mockReturnValue(''),
getSkill: vi.fn().mockResolvedValue({ name: 'memory_agent', path: skillDir }),
stripFrontmatter: vi.fn().mockReturnValue('Skill body'),
},
});
const svc = buildService(mocks);
await svc.ingest(baseInput);
expect(agentRunner.runLoop).toHaveBeenCalled();
} finally {
await rm(tempDir, { recursive: true, force: true });
}
});
it('logs prompt debug output when KTX_MEMORY_AGENT_DEBUG_PROMPTS is enabled', async () => {
const previousDebugPrompts = process.env.KTX_MEMORY_AGENT_DEBUG_PROMPTS;
const mocks = buildMocks();

View file

@ -1,10 +1,10 @@
import { createHash } from 'node:crypto';
import { readFile } from 'node:fs/promises';
import { join } from 'node:path';
import { tool } from 'ai';
import * as YAML from 'yaml';
import { z } from 'zod';
import { type KtxLogger, noopLogger } from '../core/index.js';
import type { KtxRuntimeToolSet } from '../llm/index.js';
import {
revertSourceToPreHead,
type SemanticLayerSource,
@ -125,8 +125,9 @@ export class MemoryAgentService {
session: toolSession,
};
const loadSkillTool = {
load_skill: tool({
const loadSkillTool: KtxRuntimeToolSet = {
load_skill: {
name: 'load_skill',
description:
'Load a skill to get specialized instructions. Call this when a skill listed in the system prompt matches the current task.',
inputSchema: z.object({
@ -137,23 +138,27 @@ export class MemoryAgentService {
if (!skill) {
const available =
(await this.deps.skillsRegistry.listSkills('memory_agent')).map((s) => s.name).join(', ') || '(none)';
return `Skill "${name}" not available to the memory agent. Available: ${available}`;
return { markdown: `Skill "${name}" not available to the memory agent. Available: ${available}` };
}
try {
const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8');
if (!skillsLoaded.includes(skill.name)) {
skillsLoaded.push(skill.name);
}
return {
const structured = {
name: skill.name,
skillDirectory: skill.path,
content: this.deps.skillsRegistry.stripFrontmatter(body),
};
return {
markdown: `# ${structured.name}\n\n${structured.content}`,
structured,
};
} catch (e) {
return `Error loading skill "${name}": ${e instanceof Error ? e.message : String(e)}`;
return { markdown: `Error loading skill "${name}": ${e instanceof Error ? e.message : String(e)}` };
}
},
}),
},
};
const skillNames: string[] = [...DEFAULT_SKILL_NAMES];
@ -212,7 +217,7 @@ export class MemoryAgentService {
modelRole: 'candidateExtraction',
systemPrompt,
userPrompt: prompt,
toolSet: { ...toolset.toAiSdkTools(toolContext), ...loadSkillTool },
toolSet: { ...toolset.toRuntimeTools(toolContext), ...loadSkillTool },
stepBudget,
telemetryTags: {
operationName: 'memory-agent-ingest',

View file

@ -1,5 +1,4 @@
import type { Tool } from 'ai';
import type { AgentRunnerService } from '../agent/index.js';
import type { AgentRunnerPort, KtxRuntimeToolSet } from '../llm/index.js';
import type { GitService, KtxFileStorePort, KtxLogger, SessionWorktreeService } from '../core/index.js';
import type { PromptService } from '../prompts/index.js';
import type { SkillsRegistryService } from '../skills/index.js';
@ -118,7 +117,7 @@ export interface MemoryCommitMessagePort {
export interface MemoryFileStorePort extends KtxFileStorePort<MemoryFileStorePort>, MemoryCommitMessagePort {}
export interface MemoryToolSetLike {
toAiSdkTools(context: ToolContext): Record<string, Tool>;
toRuntimeTools(context: ToolContext): KtxRuntimeToolSet;
}
export interface MemoryToolsetFactoryPort {
@ -150,7 +149,7 @@ export interface MemoryAgentServiceDeps {
slSourcesRepository: SlSourcesIndexPort;
sessionWorktreeService: SessionWorktreeService<MemoryFileStorePort>;
semanticLayerSourceReconciler: MemorySlSourceReconcilerPort;
agentRunner: AgentRunnerService;
agentRunner: AgentRunnerPort;
slValidator: SlValidatorPort<SlValidationDeps>;
toolsetFactory: MemoryToolsetFactoryPort;
telemetry?: MemoryTelemetryPort;

View file

@ -180,6 +180,31 @@ llm:
});
});
it('parses Claude Code as a first-class LLM backend', () => {
const config = parseKtxProjectConfig(`
llm:
provider:
backend: claude-code
models:
default: sonnet
triage: haiku
candidateExtraction: sonnet
curator: sonnet
reconcile: sonnet
repair: opus
`);
expect(config.llm.provider.backend).toBe('claude-code');
expect(config.llm.models).toEqual({
default: 'sonnet',
triage: 'haiku',
candidateExtraction: 'sonnet',
curator: 'sonnet',
reconcile: 'sonnet',
repair: 'opus',
});
});
it('parses gateway LLM, OpenAI scan embeddings, and sentence-transformers ingest embeddings', () => {
const config = parseKtxProjectConfig(`
llm:
@ -497,7 +522,7 @@ describe('generateKtxProjectConfigJsonSchema', () => {
const llm = (schema.properties as Record<string, { properties?: Record<string, unknown> }>).llm;
const provider = llm?.properties?.provider as { properties?: Record<string, unknown> };
const backend = provider?.properties?.backend as { enum?: readonly string[] };
expect(backend?.enum).toEqual(['none', 'anthropic', 'vertex', 'gateway']);
expect(backend?.enum).toEqual(['none', 'anthropic', 'vertex', 'gateway', 'claude-code']);
const storage = (schema.properties as Record<string, { properties?: Record<string, unknown> }>).storage;
const state = storage?.properties?.state as { enum?: readonly string[] };

View file

@ -3,7 +3,7 @@ import YAML from 'yaml';
import * as z from 'zod';
import { connectionConfigSchema } from './driver-schemas.js';
const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway'] as const;
const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway', 'claude-code'] as const;
const KTX_EMBEDDING_BACKENDS = ['none', 'deterministic', 'openai', 'sentence-transformers'] as const;
const KTX_PROMPT_CACHE_TTLS = ['5m', '1h'] as const;
const KTX_ENRICHMENT_MODES = ['none', 'deterministic', 'llm'] as const;
@ -46,7 +46,9 @@ const llmProviderSchema = z
backend: z
.enum(KTX_LLM_BACKENDS)
.default('none')
.describe('LLM provider backend. "none" disables LLM features; "anthropic" / "vertex" / "gateway" require the matching nested credentials block.'),
.describe(
'LLM provider backend. "none" disables LLM features; "anthropic" / "vertex" / "gateway" require the matching nested credentials block; "claude-code" uses the local Claude Code session.',
),
vertex: vertexProviderSchema.optional().describe('Vertex AI credentials, used when backend is "vertex".'),
anthropic: apiCredentialsSchema.optional().describe('Anthropic API credentials, used when backend is "anthropic".'),
gateway: apiCredentialsSchema.optional().describe('AI Gateway credentials, used when backend is "gateway".'),

View file

@ -31,46 +31,32 @@ function createCache(initial: Record<string, string> = {}): KtxDescriptionCacheP
function createLlmProvider(text = 'generated description') {
vi.mocked(generateText).mockResolvedValue({ text } as never);
return {
getModel: vi.fn().mockReturnValue({ modelId: 'claude-sonnet-4-6', provider: 'anthropic' }),
getModelByName: vi.fn(),
cacheMarker: vi.fn(),
repairToolCallHandler: vi.fn(),
thinkingProviderOptions: vi.fn(),
telemetryConfig: vi.fn(),
promptCachingConfig: vi.fn(() => ({
enabled: false,
systemTtl: '1h',
toolsTtl: '1h',
historyTtl: '5m',
cacheSystem: true,
cacheTools: true,
cacheHistory: true,
vertexFallbackTo5m: false,
})),
activeBackend: vi.fn(() => 'anthropic'),
generateText: vi.fn(async (input) => {
const result = await generateText({
system: input.system ? { role: 'system', content: input.system } : undefined,
messages: [{ role: 'user', content: input.prompt }],
temperature: input.temperature,
} as never);
return result.text;
}),
generateObject: vi.fn(),
runAgentLoop: vi.fn(),
} as any;
}
function createFailingLlmProvider(message = 'timeout exceeded when trying to connect') {
vi.mocked(generateText).mockRejectedValue(new Error(message) as never);
return {
getModel: vi.fn().mockReturnValue({ modelId: 'claude-sonnet-4-6', provider: 'anthropic' }),
getModelByName: vi.fn(),
cacheMarker: vi.fn(),
repairToolCallHandler: vi.fn(),
thinkingProviderOptions: vi.fn(),
telemetryConfig: vi.fn(),
promptCachingConfig: vi.fn(() => ({
enabled: false,
systemTtl: '1h',
toolsTtl: '1h',
historyTtl: '5m',
cacheSystem: true,
cacheTools: true,
cacheHistory: true,
vertexFallbackTo5m: false,
})),
activeBackend: vi.fn(() => 'anthropic'),
generateText: vi.fn(async (input) => {
const result = await generateText({
system: input.system ? { role: 'system', content: input.system } : undefined,
messages: [{ role: 'user', content: input.prompt }],
temperature: input.temperature,
} as never);
return result.text;
}),
generateObject: vi.fn(),
runAgentLoop: vi.fn(),
} as any;
}
@ -158,10 +144,10 @@ describe('KTX description prompt builders', () => {
describe('KtxDescriptionGenerator', () => {
it('generates column descriptions with pre-fetched values, cache hits, and word-limit metadata', async () => {
const cache = createCache({ 'warehouse.public.orders.cached_status': 'Cached status description' });
const llmProvider = createLlmProvider('Payment state');
const llmRuntime = createLlmProvider('Payment state');
const connector = createConnector();
const generator = new KtxDescriptionGenerator({
llmProvider,
llmRuntime,
cache,
settings: {
columnMaxWords: 12,
@ -222,7 +208,7 @@ describe('KtxDescriptionGenerator', () => {
it('samples through the connector when column values are not pre-fetched', async () => {
const connector = createConnector();
const generator = new KtxDescriptionGenerator({
llmProvider: createLlmProvider('Current order state'),
llmRuntime: createLlmProvider('Current order state'),
settings: {
columnMaxWords: 12,
tableMaxWords: 18,
@ -271,7 +257,7 @@ describe('KtxDescriptionGenerator', () => {
})),
};
const generator = new KtxDescriptionGenerator({
llmProvider: createLlmProvider('Generated through sampler'),
llmRuntime: createLlmProvider('Generated through sampler'),
settings: {
columnMaxWords: 12,
tableMaxWords: 18,
@ -310,7 +296,7 @@ describe('KtxDescriptionGenerator', () => {
const cache = createCache();
const connector = createConnector();
const generator = new KtxDescriptionGenerator({
llmProvider: createFailingLlmProvider(),
llmRuntime: createFailingLlmProvider(),
cache,
settings: {
columnMaxWords: 12,
@ -355,7 +341,7 @@ describe('KtxDescriptionGenerator', () => {
const cache = createCache();
const connector = createConnector();
const generator = new KtxDescriptionGenerator({
llmProvider: createLlmProvider('Commerce orders'),
llmRuntime: createLlmProvider('Commerce orders'),
cache,
settings: {
columnMaxWords: 12,
@ -424,7 +410,7 @@ describe('KtxDescriptionGenerator resilience', () => {
const logger = createLogger();
const warnings: Array<{ code: string; table?: string }> = [];
const generator = new KtxDescriptionGenerator({
llmProvider: createLlmProvider('Commerce orders'),
llmRuntime: createLlmProvider('Commerce orders'),
logger,
onWarning: (warning) => warnings.push({ code: warning.code, ...(warning.table ? { table: warning.table } : {}) }),
settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24, concurrencyLimit: 2 },
@ -455,7 +441,7 @@ describe('KtxDescriptionGenerator resilience', () => {
const logger = createLogger();
const warnings: Array<{ code: string; table?: string; metadata?: Record<string, unknown> }> = [];
const generator = new KtxDescriptionGenerator({
llmProvider: createLlmProvider('Customer reference data'),
llmRuntime: createLlmProvider('Customer reference data'),
logger,
onWarning: (warning) =>
warnings.push({
@ -503,7 +489,7 @@ describe('KtxDescriptionGenerator resilience', () => {
};
const warnings: string[] = [];
const generator = new KtxDescriptionGenerator({
llmProvider: createFailingLlmProvider(),
llmRuntime: createFailingLlmProvider(),
onWarning: (warning) => warnings.push(warning.code),
settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
});
@ -528,7 +514,7 @@ describe('KtxDescriptionGenerator resilience', () => {
};
const warnings: string[] = [];
const generator = new KtxDescriptionGenerator({
llmProvider: createLlmProvider('Orders mart'),
llmRuntime: createLlmProvider('Orders mart'),
onWarning: (warning) => warnings.push(warning.code),
settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
});
@ -562,7 +548,7 @@ describe('KtxDescriptionGenerator resilience', () => {
};
const warnings: string[] = [];
const generator = new KtxDescriptionGenerator({
llmProvider: createLlmProvider('should not be called'),
llmRuntime: createLlmProvider('should not be called'),
onWarning: (warning) => warnings.push(warning.code),
settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
});
@ -588,7 +574,7 @@ describe('KtxDescriptionGenerator resilience', () => {
};
const logger = createLogger();
const generator = new KtxDescriptionGenerator({
llmProvider: createLlmProvider('Payment lifecycle state'),
llmRuntime: createLlmProvider('Payment lifecycle state'),
logger,
settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
});
@ -625,7 +611,7 @@ describe('KtxDescriptionGenerator resilience', () => {
sampleColumn,
};
const generator = new KtxDescriptionGenerator({
llmProvider: createLlmProvider('Customer reference identifier'),
llmRuntime: createLlmProvider('Customer reference identifier'),
settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
});
@ -657,7 +643,7 @@ describe('KtxDescriptionGenerator resilience', () => {
};
vi.mocked(generateText).mockClear();
const generator = new KtxDescriptionGenerator({
llmProvider: createLlmProvider('should not be called'),
llmRuntime: createLlmProvider('should not be called'),
settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
});

View file

@ -1,5 +1,4 @@
import type { KtxLlmProvider } from '@ktx/llm';
import { generateKtxText } from '../llm/index.js';
import type { KtxLlmRuntimePort } from '../llm/index.js';
import type {
KtxColumnSampleInput,
KtxColumnSampleResult,
@ -120,7 +119,7 @@ export interface KtxGenerateDataSourceDescriptionInput {
}
export interface KtxDescriptionGeneratorOptions {
llmProvider: KtxLlmProvider;
llmRuntime: KtxLlmRuntimePort;
cache?: KtxDescriptionCachePort;
logger?: KtxScanLoggerPort;
onWarning?: (warning: KtxScanWarning) => void;
@ -400,14 +399,14 @@ Data source type: ${input.dataSourceType}`;
}
export class KtxDescriptionGenerator {
private readonly llmProvider: KtxLlmProvider;
private readonly llmRuntime: KtxLlmRuntimePort;
private readonly cache?: KtxDescriptionCachePort;
private readonly logger?: KtxScanLoggerPort;
private readonly onWarning?: (warning: KtxScanWarning) => void;
private readonly settings: ResolvedKtxDescriptionGenerationSettings;
constructor(options: KtxDescriptionGeneratorOptions) {
this.llmProvider = options.llmProvider;
this.llmRuntime = options.llmRuntime;
this.cache = options.cache;
this.logger = options.logger;
this.onWarning = options.onWarning;
@ -779,8 +778,7 @@ export class KtxDescriptionGenerator {
private async generateAiDescription(prompt: KtxDescriptionPrompt, _operationName: string): Promise<string | null> {
try {
const text = await generateKtxText({
llmProvider: this.llmProvider,
const text = await this.llmRuntime.generateText({
role: 'candidateExtraction',
system: prompt.system,
prompt: prompt.user,

View file

@ -264,7 +264,6 @@ export type {
} from './relationship-graph-resolver.js';
export { resolveKtxRelationshipGraph } from './relationship-graph-resolver.js';
export type {
KtxRelationshipLlmProposalGenerateText,
KtxRelationshipLlmProposalResult,
KtxRelationshipLlmProposalSettings,
ProposeKtxRelationshipCandidatesWithLlmInput,

View file

@ -356,7 +356,7 @@ describe('local scan enrichment', () => {
it('honors scan relationship config when LLM proposals are disabled', async () => {
const providers = createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 3 });
const getModel = vi.fn(() => ({ modelId: 'provider/language-model', provider: 'gateway' }));
const generateObject = vi.fn();
const result = await runLocalScanEnrichment({
connectionId: 'warehouse',
mode: 'relationships',
@ -365,9 +365,9 @@ describe('local scan enrichment', () => {
context: { runId: 'scan-run-llm-disabled' },
providers: {
...providers,
llm: {
...providers.llm,
getModel: getModel as never,
llmRuntime: {
...providers.llmRuntime,
generateObject: generateObject as never,
},
},
relationshipSettings: {
@ -378,7 +378,7 @@ describe('local scan enrichment', () => {
});
expect(result.summary.llmRelationshipValidation).toBe('skipped');
expect(getModel).not.toHaveBeenCalledWith('candidateExtraction');
expect(generateObject).not.toHaveBeenCalled();
});
it('skips relationship detection when scan relationships are disabled', async () => {
@ -628,7 +628,7 @@ describe('local scan enrichment', () => {
connector: scanConnector,
context: { runId: 'scan-run-batched-embeddings' },
providers: {
llm: deterministicProviders.llm,
llmRuntime: deterministicProviders.llmRuntime,
embedding: {
dimensions: 3,
maxBatchSize: 2,
@ -658,7 +658,7 @@ describe('local scan enrichment', () => {
providerIdentity: { provider: 'deterministic', embeddingDimensions: 6 },
});
const getModel = vi.spyOn(providers.llm, 'getModel');
const generateText = vi.spyOn(providers.llmRuntime, 'generateText');
const embedBatch = vi.spyOn(providers.embedding, 'embedBatch');
const second = await runLocalScanEnrichment({
connectionId: 'warehouse',
@ -676,7 +676,7 @@ describe('local scan enrichment', () => {
expect(first.state.resumedStages).toEqual([]);
expect(second.state.resumedStages).toEqual(['descriptions', 'embeddings', 'relationships']);
expect(second.state.completedStages).toEqual(['descriptions', 'embeddings', 'relationships']);
expect(getModel).not.toHaveBeenCalled();
expect(generateText).not.toHaveBeenCalled();
expect(embedBatch).not.toHaveBeenCalled();
expect(second.descriptionUpdates).toEqual(first.descriptionUpdates);
expect(second.embeddingUpdates).toEqual(first.embeddingUpdates);
@ -711,7 +711,7 @@ describe('local scan enrichment', () => {
tables: [{ ...firstTable, name: 'customers' }],
})),
};
const getModel = vi.spyOn(providers.llm, 'getModel');
const generateText = vi.spyOn(providers.llmRuntime, 'generateText');
const result = await runLocalScanEnrichment({
connectionId: 'warehouse',
@ -727,7 +727,7 @@ describe('local scan enrichment', () => {
expect(result.state.resumedStages).toEqual([]);
expect(result.state.completedStages).toEqual(['descriptions', 'embeddings', 'relationships']);
expect(getModel).toHaveBeenCalled();
expect(generateText).toHaveBeenCalled();
});
it('runs providerless enriched scans as relationship-only discovery enrichment', async () => {

View file

@ -1,5 +1,5 @@
import type { KtxLlmProvider } from '@ktx/llm';
import pLimit from 'p-limit';
import type { KtxLlmRuntimePort } from '../llm/index.js';
import { buildDefaultKtxProjectConfig, type KtxScanRelationshipConfig } from '../project/config.js';
import { type KtxDescriptionColumnTable, KtxDescriptionGenerator } from './description-generation.js';
import { buildKtxColumnEmbeddingText } from './embedding-text.js';
@ -49,7 +49,7 @@ export interface DeterministicLocalScanEnrichmentProviderOptions {
}
export interface KtxLocalScanEnrichmentProviders {
llm: KtxLlmProvider;
llmRuntime: KtxLlmRuntimePort;
embedding: KtxEmbeddingPort;
}
@ -190,7 +190,7 @@ export function createDeterministicLocalScanEnrichmentProviders(
const dimensions = options.embeddingDimensions ?? 8;
const maxBatchSize = options.maxBatchSize ?? 64;
return {
llm: deterministicLlmProvider(),
llmRuntime: deterministicLlmRuntime(),
embedding: {
dimensions,
maxBatchSize,
@ -201,41 +201,16 @@ export function createDeterministicLocalScanEnrichmentProviders(
};
}
function deterministicLlmProvider(): KtxLlmProvider {
const model = { modelId: 'deterministic-scan', provider: 'deterministic' };
function deterministicLlmRuntime(): KtxLlmRuntimePort {
return {
getModel() {
return model as ReturnType<KtxLlmProvider['getModel']>;
async generateText(input) {
return `Deterministic description for ${input.prompt.slice(0, 64).trim() || 'data source'}`;
},
getModelByName() {
return model as ReturnType<KtxLlmProvider['getModelByName']>;
async generateObject() {
return { pkCandidates: [], fkCandidates: [] } as never;
},
cacheMarker() {
return undefined;
},
repairToolCallHandler() {
throw new Error('deterministic scan provider does not support tool-call repair');
},
thinkingProviderOptions() {
return {};
},
telemetryConfig() {
return undefined;
},
promptCachingConfig() {
return {
enabled: false,
systemTtl: '1h',
toolsTtl: '1h',
historyTtl: '5m',
cacheSystem: true,
cacheTools: true,
cacheHistory: true,
vertexFallbackTo5m: false,
};
},
activeBackend() {
return 'gateway';
async runAgentLoop() {
return { stopReason: 'natural' };
},
};
}
@ -324,7 +299,7 @@ async function generateDescriptions(input: {
}): Promise<KtxLocalScanEnrichmentResult['descriptionUpdates']> {
const warningSink = input.warnings;
const generator = new KtxDescriptionGenerator({
llmProvider: input.providers.llm,
llmRuntime: input.providers.llmRuntime,
...(input.context.logger ? { logger: input.context.logger } : {}),
...(warningSink
? {
@ -643,7 +618,7 @@ export async function runLocalScanEnrichment(
schema,
context: input.context,
settings: relationshipSettings,
llmProvider: input.providers?.llm ?? null,
llmRuntime: input.providers?.llmRuntime ?? null,
});
await relationshipProgress?.update(

View file

@ -1,10 +1,10 @@
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import type { KtxLlmProvider } from '@ktx/llm';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import YAML from 'yaml';
import type { SourceAdapter } from '../ingest/index.js';
import type { KtxLlmRuntimePort } from '../llm/index.js';
import { initKtxProject, type KtxLocalProject, loadKtxProject } from '../project/index.js';
import { filterSnapshotTables, getLocalScanReport, getLocalScanStatus, resolveEnabledTables, runLocalScan } from './local-scan.js';
import type { KtxQueryResult, KtxReadOnlyQueryInput, KtxSchemaSnapshot, KtxSchemaTable } from './types.js';
@ -79,25 +79,11 @@ function relationshipSqlResult(
throw new Error(`Unexpected relationship SQL: ${input.sql}`);
}
function deterministicLlmProvider(): KtxLlmProvider {
function deterministicLlmRuntime(): KtxLlmRuntimePort {
return {
getModel: () => ({ provider: 'deterministic', modelId: 'deterministic' }) as never,
getModelByName: () => ({ provider: 'deterministic', modelId: 'deterministic' }) as never,
cacheMarker: () => undefined,
repairToolCallHandler: (() => undefined) as never,
thinkingProviderOptions: () => ({}),
telemetryConfig: () => undefined,
promptCachingConfig: () => ({
enabled: false,
systemTtl: '1h',
toolsTtl: '1h',
historyTtl: '5m',
cacheSystem: true,
cacheTools: true,
cacheHistory: true,
vertexFallbackTo5m: false,
}),
activeBackend: () => 'gateway',
generateText: vi.fn(async (input) => `Deterministic description for ${input.prompt.slice(0, 64).trim() || 'data source'}`),
generateObject: vi.fn(async () => ({ pkCandidates: [], fkCandidates: [] }) as never),
runAgentLoop: vi.fn(),
};
}
@ -571,7 +557,7 @@ describe('local scan', () => {
llmProposals: false,
maxLlmTablesPerBatch: 7,
};
const getModel = vi.fn(() => ({ modelId: 'provider/language-model', provider: 'gateway' }));
const generateObject = vi.fn(async () => ({ pkCandidates: [], fkCandidates: [] }));
const connector = {
id: 'test:warehouse',
driver: 'postgres' as const,
@ -650,9 +636,9 @@ describe('local scan', () => {
detectRelationships: true,
connector,
enrichmentProviders: {
llm: {
...deterministicLlmProvider(),
getModel: getModel as never,
llmRuntime: {
...deterministicLlmRuntime(),
generateObject: generateObject as never,
},
embedding: {
dimensions: 8,
@ -668,7 +654,7 @@ describe('local scan', () => {
expect(result.report.relationships.accepted).toBe(1);
expect(result.report.enrichment.llmRelationshipValidation).toBe('skipped');
expect(getModel).not.toHaveBeenCalledWith('candidateExtraction');
expect(generateObject).not.toHaveBeenCalled();
});
it('accepts no-declared-constraint relationships and writes relationship artifacts', async () => {
@ -1206,7 +1192,7 @@ describe('local scan', () => {
mode: 'enriched',
connector,
enrichmentProviders: {
llm: deterministicLlmProvider(),
llmRuntime: deterministicLlmRuntime(),
embedding: {
dimensions: 8,
maxBatchSize: 64,
@ -1314,7 +1300,7 @@ describe('local scan', () => {
return { values: ['1'], nullCount: 0, distinctCount: 1 };
},
};
const llm = deterministicLlmProvider();
const llmRuntime = deterministicLlmRuntime();
const first = await runLocalScan({
project,
@ -1323,7 +1309,7 @@ describe('local scan', () => {
mode: 'enriched',
connector,
enrichmentProviders: {
llm,
llmRuntime,
embedding: {
dimensions: 8,
maxBatchSize: 64,
@ -1344,7 +1330,7 @@ describe('local scan', () => {
});
expect(first.report.enrichment.embeddings).toBe('failed');
const getModel = vi.spyOn(llm, 'getModel');
const generateObject = vi.spyOn(llmRuntime, 'generateObject');
const retry = await runLocalScan({
project,
adapters: [fetchOnlyAdapter()],
@ -1352,7 +1338,7 @@ describe('local scan', () => {
mode: 'enriched',
connector,
enrichmentProviders: {
llm,
llmRuntime,
embedding: {
dimensions: 8,
maxBatchSize: 64,
@ -1373,8 +1359,8 @@ describe('local scan', () => {
failedStages: [],
});
expect(retry.report.enrichment.embeddings).toBe('completed');
expect(getModel).toHaveBeenCalledTimes(1);
expect(getModel).toHaveBeenCalledWith('candidateExtraction');
expect(generateObject).toHaveBeenCalledTimes(1);
expect(generateObject).toHaveBeenCalledWith(expect.objectContaining({ role: 'candidateExtraction' }));
expect(embeddingAttempts).toBe(2);
const reportPath = retry.report.artifactPaths.reportPath;

View file

@ -8,7 +8,7 @@ import {
} from '../ingest/index.js';
import {
createLocalKtxEmbeddingProviderFromConfig,
createLocalKtxLlmProviderFromConfig,
createLocalKtxLlmRuntimeFromConfig,
KtxScanEmbeddingPortAdapter,
} from '../llm/index.js';
import type { KtxProjectLlmConfig, KtxScanEnrichmentConfig, KtxScanRelationshipConfig } from '../project/config.js';
@ -150,6 +150,7 @@ interface LocalScanEnrichmentProviderDeps {
createKtxLlmProvider?: typeof createKtxLlmProvider;
createKtxEmbeddingProvider?: typeof createKtxEmbeddingProvider;
env?: NodeJS.ProcessEnv;
projectDir?: string;
}
export function createLocalScanEnrichmentProvidersFromConfig(
@ -165,14 +166,17 @@ export function createLocalScanEnrichmentProvidersFromConfig(
return null;
}
const llm = createLocalKtxLlmProviderFromConfig(llmConfig, deps);
const llmRuntime = createLocalKtxLlmRuntimeFromConfig(llmConfig, {
...deps,
projectDir: deps.projectDir,
});
const embeddingProvider = createLocalKtxEmbeddingProviderFromConfig(config.embeddings, deps);
if (!llm || !embeddingProvider) {
if (!llmRuntime || !embeddingProvider) {
return null;
}
return {
llm,
llmRuntime,
embedding: new KtxScanEmbeddingPortAdapter(embeddingProvider),
};
}
@ -378,7 +382,9 @@ export async function runLocalScan(options: RunLocalScanOptions): Promise<LocalS
connector && (mode !== 'structural' || options.detectRelationships)
? options.enrichmentProviders !== undefined
? options.enrichmentProviders
: createLocalScanEnrichmentProvidersFromConfig(options.project.config.scan.enrichment, options.project.config.llm)
: createLocalScanEnrichmentProvidersFromConfig(options.project.config.scan.enrichment, options.project.config.llm, {
projectDir: options.project.projectDir,
})
: null;
await options.progress?.update(0.15, 'Inspecting database schema');

View file

@ -6,6 +6,7 @@ import { gunzipSync } from 'node:zlib';
import Database from 'better-sqlite3';
import YAML from 'yaml';
import { z } from 'zod';
import type { KtxLlmRuntimePort } from '../llm/index.js';
import type { KtxEnrichedRelationship, KtxEnrichedSchema, KtxRelationshipType } from './enrichment-types.js';
import { snapshotToKtxEnrichedSchema } from './local-enrichment.js';
import type { KtxRelationshipDiscoveryCandidate } from './relationship-candidates.js';
@ -13,7 +14,6 @@ import {
generateKtxRelationshipDiscoveryCandidates,
mergeKtxRelationshipDiscoveryCandidates,
} from './relationship-candidates.js';
import type { KtxLlmProvider } from '@ktx/llm';
import { proposeKtxRelationshipCandidatesWithLlm } from './relationship-llm-proposal.js';
import {
discoverKtxCompositeRelationships,
@ -527,7 +527,7 @@ export function isKtxRelationshipBenchmarkTuningEligible(input: {
}
export function ktxRelationshipBenchmarkDetectorWithLlm(
llmProvider: KtxLlmProvider,
llmRuntime: KtxLlmRuntimePort,
): KtxRelationshipBenchmarkDetector {
return {
async detect(input) {
@ -566,7 +566,7 @@ export function ktxRelationshipBenchmarkDetectorWithLlm(
connectionId: input.snapshot.connectionId,
schema: input.schema,
profile: profiles,
llmProvider,
llmRuntime,
});
const candidates = mergeKtxRelationshipDiscoveryCandidates([
...broadRelationshipCandidates,

View file

@ -1,6 +1,6 @@
import type { KtxLlmProvider } from '@ktx/llm';
import Database from 'better-sqlite3';
import { afterEach, describe, expect, it, vi } from 'vitest';
import type { KtxLlmRuntimePort } from '../llm/index.js';
import { buildDefaultKtxProjectConfig } from '../project/config.js';
import { snapshotToKtxEnrichedSchema } from './local-enrichment.js';
import {
@ -216,29 +216,11 @@ function connector(executor: InMemorySqliteExecutor | null): KtxScanConnector {
};
}
function llmProvider(): KtxLlmProvider {
const model = { modelId: 'claude-sonnet-4-6', provider: 'anthropic' };
function llmRuntime(output: unknown): KtxLlmRuntimePort {
return {
getModel: vi.fn(() => model as ReturnType<KtxLlmProvider['getModel']>),
getModelByName: vi.fn(() => model as ReturnType<KtxLlmProvider['getModelByName']>),
cacheMarker: vi.fn(),
repairToolCallHandler: vi.fn(),
thinkingProviderOptions: vi.fn(() => ({})),
telemetryConfig: vi.fn(() => undefined),
promptCachingConfig: vi.fn(
() =>
({
enabled: false,
systemTtl: '1h',
toolsTtl: '1h',
historyTtl: '5m',
cacheSystem: true,
cacheTools: true,
cacheHistory: true,
vertexFallbackTo5m: false,
}) as ReturnType<KtxLlmProvider['promptCachingConfig']>,
),
activeBackend: vi.fn(() => 'anthropic' as ReturnType<KtxLlmProvider['activeBackend']>),
generateText: vi.fn(),
generateObject: vi.fn(async () => output) as KtxLlmRuntimePort['generateObject'],
runAgentLoop: vi.fn(),
};
}
@ -505,21 +487,19 @@ describe('production relationship discovery', () => {
INSERT INTO customers (id) VALUES (1), (2);
INSERT INTO orders (id, buyer_ref) VALUES (10, 1), (11, 2);
`);
const generateText = vi.fn(async () => ({
output: {
pkCandidates: [{ table: 'customers', column: 'id', confidence: 0.91, rationale: 'Unique customer key.' }],
fkCandidates: [
{
fromTable: 'orders',
fromColumn: 'buyer_ref',
toTable: 'customers',
toColumn: 'id',
confidence: 0.89,
rationale: 'Buyer reference values align with customer identifiers.',
},
],
},
}));
const llmOutput = {
pkCandidates: [{ table: 'customers', column: 'id', confidence: 0.91, rationale: 'Unique customer key.' }],
fkCandidates: [
{
fromTable: 'orders',
fromColumn: 'buyer_ref',
toTable: 'customers',
toColumn: 'id',
confidence: 0.89,
rationale: 'Buyer reference values align with customer identifiers.',
},
],
};
const result = await discoverKtxRelationships({
connectionId: 'warehouse',
@ -528,8 +508,7 @@ describe('production relationship discovery', () => {
schema: snapshotToKtxEnrichedSchema(llmOnlyRelationshipSnapshot()),
context: { runId: 'llm-relationship-orchestrator' },
settings: relationshipSettings(),
llmProvider: llmProvider(),
generateText,
llmRuntime: llmRuntime(llmOutput),
});
expect(result.llmRelationshipValidation).toBe('completed');

View file

@ -1,4 +1,4 @@
import type { KtxLlmProvider } from '@ktx/llm';
import type { KtxLlmRuntimePort } from '../llm/index.js';
import type { KtxScanRelationshipConfig } from '../project/config.js';
import type { KtxEnrichedRelationship, KtxEnrichedSchema, KtxRelationshipUpdate } from './enrichment-types.js';
import {
@ -15,10 +15,7 @@ import {
type KtxResolvedRelationshipDiscoveryCandidate,
resolveKtxRelationshipGraph,
} from './relationship-graph-resolver.js';
import {
type KtxRelationshipLlmProposalGenerateText,
proposeKtxRelationshipCandidatesWithLlm,
} from './relationship-llm-proposal.js';
import { proposeKtxRelationshipCandidatesWithLlm } from './relationship-llm-proposal.js';
import {
createKtxRelationshipProfileCache,
type KtxRelationshipProfileArtifact,
@ -42,8 +39,7 @@ export interface DiscoverKtxRelationshipsInput {
schema: KtxEnrichedSchema;
context: KtxScanContext;
settings: KtxScanRelationshipConfig;
llmProvider?: KtxLlmProvider | null;
generateText?: KtxRelationshipLlmProposalGenerateText;
llmRuntime?: KtxLlmRuntimePort | null;
}
export interface DiscoverKtxRelationshipsResult {
@ -246,11 +242,10 @@ export async function discoverKtxRelationships(
connectionId: input.connectionId,
schema: input.schema,
profile,
llmProvider: input.llmProvider ?? null,
llmRuntime: input.llmRuntime ?? null,
settings: {
maxTablesPerBatch: input.settings.maxLlmTablesPerBatch,
},
generateText: input.generateText,
})
: { candidates: [], warnings: [], llmCalls: 0, summary: 'skipped' as const };
const candidates = mergeKtxRelationshipDiscoveryCandidates([

View file

@ -1,32 +1,14 @@
import type { KtxLlmProvider } from '@ktx/llm';
import { describe, expect, it, vi } from 'vitest';
import type { KtxLlmRuntimePort } from '../llm/index.js';
import type { KtxEnrichedColumn, KtxEnrichedSchema, KtxEnrichedTable } from './enrichment-types.js';
import type { KtxRelationshipProfileArtifact } from './relationship-profiling.js';
import { proposeKtxRelationshipCandidatesWithLlm } from './relationship-llm-proposal.js';
function llmProvider(provider = 'anthropic'): KtxLlmProvider {
const model = { modelId: 'claude-sonnet-4-6', provider };
function llmRuntime(output?: unknown): KtxLlmRuntimePort {
return {
getModel: vi.fn(() => model as ReturnType<KtxLlmProvider['getModel']>),
getModelByName: vi.fn(() => model as ReturnType<KtxLlmProvider['getModelByName']>),
cacheMarker: vi.fn(),
repairToolCallHandler: vi.fn(),
thinkingProviderOptions: vi.fn(() => ({})),
telemetryConfig: vi.fn(() => undefined),
promptCachingConfig: vi.fn(
() =>
({
enabled: false,
systemTtl: '1h',
toolsTtl: '1h',
historyTtl: '5m',
cacheSystem: true,
cacheTools: true,
cacheHistory: true,
vertexFallbackTo5m: false,
}) as ReturnType<KtxLlmProvider['promptCachingConfig']>,
),
activeBackend: vi.fn(() => provider as ReturnType<KtxLlmProvider['activeBackend']>),
generateText: vi.fn(),
generateObject: vi.fn(async () => output) as KtxLlmRuntimePort['generateObject'],
runAgentLoop: vi.fn(),
};
}
@ -125,28 +107,25 @@ function profile(): KtxRelationshipProfileArtifact {
describe('relationship LLM proposals', () => {
it('maps valid structured FK proposals into review candidates with rationale evidence', async () => {
const generateText = vi.fn(async () => ({
output: {
pkCandidates: [{ table: 'customers', column: 'id', confidence: 0.94, rationale: 'Unique customer identifier.' }],
fkCandidates: [
{
fromTable: 'orders',
fromColumn: 'buyer_ref',
toTable: 'customers',
toColumn: 'id',
confidence: 0.88,
rationale: 'Buyer reference values match customer identifiers.',
},
],
},
}));
const runtime = llmRuntime({
pkCandidates: [{ table: 'customers', column: 'id', confidence: 0.94, rationale: 'Unique customer identifier.' }],
fkCandidates: [
{
fromTable: 'orders',
fromColumn: 'buyer_ref',
toTable: 'customers',
toColumn: 'id',
confidence: 0.88,
rationale: 'Buyer reference values match customer identifiers.',
},
],
});
const result = await proposeKtxRelationshipCandidatesWithLlm({
connectionId: 'warehouse',
schema: schema(),
profile: profile(),
llmProvider: llmProvider(),
generateText,
llmRuntime: runtime,
});
expect(result.summary).toBe('completed');
@ -164,42 +143,27 @@ describe('relationship LLM proposals', () => {
reasons: ['llm_proposal', 'llm_pk_proposal'],
},
});
expect(generateText).toHaveBeenCalledWith(
expect(runtime.generateObject).toHaveBeenCalledWith(
expect.objectContaining({
system: expect.objectContaining({
role: 'system',
content: expect.stringContaining('You are helping KTX review possible SQL relationships'),
}),
messages: expect.arrayContaining([
expect.objectContaining({
role: 'user',
content: expect.stringContaining('"tables"'),
}),
]),
role: 'candidateExtraction',
system: expect.stringContaining('You are helping KTX review possible SQL relationships'),
prompt: expect.stringContaining('"tables"'),
}),
);
const call = (
generateText.mock.calls as unknown as Array<[{ messages: Array<{ role: string; content: string }> }]>
)[0]?.[0];
const userMessage = call?.messages.find((m) => m.role === 'user');
expect(userMessage?.content).not.toContain('You are helping KTX review possible SQL relationships');
expect(call?.messages.some((m) => m.role === 'system')).toBe(false);
const call = vi.mocked(runtime.generateObject).mock.calls[0]?.[0];
expect(call?.prompt).not.toContain('You are helping KTX review possible SQL relationships');
});
it('skips deterministic providers without calling generateText', async () => {
const generateText = vi.fn();
it('skips when no runtime is configured', async () => {
const result = await proposeKtxRelationshipCandidatesWithLlm({
connectionId: 'warehouse',
schema: schema(),
profile: profile(),
llmProvider: llmProvider('deterministic'),
generateText,
llmRuntime: null,
});
expect(result).toMatchObject({ candidates: [], llmCalls: 0, summary: 'skipped' });
expect(result.warnings).toEqual([]);
expect(generateText).not.toHaveBeenCalled();
});
it('returns recoverable warnings for invalid references and generation failures', async () => {
@ -207,22 +171,19 @@ describe('relationship LLM proposals', () => {
connectionId: 'warehouse',
schema: schema(),
profile: profile(),
llmProvider: llmProvider(),
generateText: vi.fn(async () => ({
output: {
pkCandidates: [],
fkCandidates: [
{
fromTable: 'orders',
fromColumn: 'missing_column',
toTable: 'customers',
toColumn: 'id',
confidence: 0.7,
rationale: 'Invalid source column.',
},
],
},
})),
llmRuntime: llmRuntime({
pkCandidates: [],
fkCandidates: [
{
fromTable: 'orders',
fromColumn: 'missing_column',
toTable: 'customers',
toColumn: 'id',
confidence: 0.7,
rationale: 'Invalid source column.',
},
],
}),
});
expect(invalidReference.candidates).toEqual([]);
expect(invalidReference.summary).toBe('completed');
@ -235,10 +196,13 @@ describe('relationship LLM proposals', () => {
connectionId: 'warehouse',
schema: schema(),
profile: profile(),
llmProvider: llmProvider(),
generateText: vi.fn(async () => {
throw new Error('model unavailable');
}),
llmRuntime: {
generateText: vi.fn(),
generateObject: vi.fn(async () => {
throw new Error('model unavailable');
}),
runAgentLoop: vi.fn(),
},
});
expect(failed).toMatchObject({ candidates: [], llmCalls: 1, summary: 'failed' });
expect(failed.warnings[0]).toMatchObject({

View file

@ -1,7 +1,5 @@
import type { KtxLlmProvider } from '@ktx/llm';
import type { generateText } from 'ai';
import { z } from 'zod';
import { generateKtxObject } from '../llm/index.js';
import { generateKtxObject, type KtxLlmRuntimePort } from '../llm/index.js';
import type { KtxEnrichedColumn, KtxEnrichedSchema, KtxEnrichedTable } from './enrichment-types.js';
import {
normalizeKtxRelationshipName,
@ -32,10 +30,6 @@ const relationshipLlmProposalSchema = z.object({
});
type KtxRelationshipLlmProposalOutput = z.infer<typeof relationshipLlmProposalSchema>;
type GenerateTextInput = Parameters<typeof generateText>[0];
export type KtxRelationshipLlmProposalGenerateText = (
input: GenerateTextInput,
) => Promise<{ text?: string; output?: unknown }>;
export interface KtxRelationshipLlmProposalSettings {
maxTablesPerBatch: number;
@ -48,9 +42,8 @@ export interface ProposeKtxRelationshipCandidatesWithLlmInput {
connectionId: string;
schema: KtxEnrichedSchema;
profile: KtxRelationshipProfileArtifact;
llmProvider: KtxLlmProvider | null;
llmRuntime: KtxLlmRuntimePort | null;
settings?: Partial<KtxRelationshipLlmProposalSettings>;
generateText?: KtxRelationshipLlmProposalGenerateText;
}
export interface KtxRelationshipLlmProposalResult {
@ -77,11 +70,6 @@ function clampConfidence(value: number): number {
return Number(Math.max(0, Math.min(1, value)).toFixed(3));
}
function modelIsDeterministic(llmProvider: KtxLlmProvider): boolean {
const model = llmProvider.getModel('candidateExtraction');
return (model as { provider?: string }).provider === 'deterministic';
}
function findTable(schema: KtxEnrichedSchema, name: string): KtxEnrichedTable | null {
const normalized = name.toLowerCase();
return schema.tables.find((table) => table.ref.name.toLowerCase() === normalized) ?? null;
@ -238,7 +226,7 @@ function generationFailureWarning(error: unknown): KtxScanWarning {
export async function proposeKtxRelationshipCandidatesWithLlm(
input: ProposeKtxRelationshipCandidatesWithLlmInput,
): Promise<KtxRelationshipLlmProposalResult> {
if (!input.llmProvider || modelIsDeterministic(input.llmProvider)) {
if (!input.llmRuntime) {
return { candidates: [], warnings: [], llmCalls: 0, summary: 'skipped' };
}
@ -256,12 +244,11 @@ export async function proposeKtxRelationshipCandidatesWithLlm(
KtxRelationshipLlmProposalOutput,
typeof relationshipLlmProposalSchema
>({
llmProvider: input.llmProvider,
runtime: input.llmRuntime,
role: 'candidateExtraction',
system,
prompt,
schema: relationshipLlmProposalSchema,
generateText: input.generateText,
});
const output = relationshipLlmProposalSchema.parse(generated);
const mapped = mapValidProposals(input.schema, output, settings);

View file

@ -1,6 +1,8 @@
import { tool } from 'ai';
import { z, type ZodType } from 'zod';
import { noopLogger, type KtxLogger } from '../core/index.js';
import type { KtxRuntimeToolDescriptor } from '../llm/runtime-port.js';
import { normalizeKtxRuntimeToolOutput } from '../llm/runtime-tools.js';
import type { IngestToolMetadata, ToolSession } from './tool-session.js';
export interface ToolOutput<T = unknown> {
@ -164,6 +166,23 @@ export abstract class BaseTool<TInput extends ZodType = ZodType> {
});
}
toRuntimeTool(context: ToolContext): KtxRuntimeToolDescriptor {
const toolName = this.name;
return {
name: toolName,
description: this.description,
inputSchema: this.inputSchema as unknown as KtxRuntimeToolDescriptor['inputSchema'],
execute: async (params) => {
const callContext = { ...context };
if (!callContext.userId) {
throw new Error('Authentication required: userId must be provided in ToolContext');
}
const parsedInput = this.parseInput(params as Record<string, any>);
return normalizeKtxRuntimeToolOutput(await this.call(parsedInput, callContext));
},
};
}
parseInput(input: Record<string, any>): z.infer<TInput> {
return this.inputSchema.parse(input);
}

View file

@ -61,4 +61,17 @@ describe('KTX LLM health check', () => {
message: '401 invalid x-api-key [redacted]',
});
});
it('reports claude-code as unsupported by the AI SDK health check', async () => {
const result = await runKtxLlmHealthCheck({
backend: 'claude-code',
modelSlots: { default: 'sonnet' },
promptCaching: { enabled: false },
});
expect(result).toEqual({
ok: false,
message: expect.stringContaining('claude-code is not an AI SDK LanguageModel backend'),
});
});
});

View file

@ -302,4 +302,14 @@ describe('createKtxLlmProvider', () => {
expect(provider.promptCachingConfig().enabled).toBe(false);
expect(provider.cacheMarker('1h', 'claude-sonnet-4-6')).toBeUndefined();
});
it('throws instead of falling through when an unsupported LLM backend is passed to the AI SDK provider factory', () => {
expect(() =>
createKtxLlmProvider({
backend: 'claude-code',
modelSlots: { default: 'sonnet' },
promptCaching: { enabled: false },
}),
).toThrow('claude-code is not an AI SDK LanguageModel backend');
});
});

View file

@ -175,14 +175,18 @@ class DefaultKtxLlmProvider implements KtxLlmProvider {
return (modelId) => vertex(modelId);
}
const gateway = (deps.createGateway ?? createGateway)({
...(config.gateway?.apiKey ? { apiKey: config.gateway.apiKey } : {}),
...(config.gateway?.baseURL ? { baseURL: config.gateway.baseURL } : {}),
headers: {
'anthropic-beta': ANTHROPIC_BETA_HEADER,
},
});
return (modelId) => gateway(modelId);
if (config.backend === 'gateway') {
const gateway = (deps.createGateway ?? createGateway)({
...(config.gateway?.apiKey ? { apiKey: config.gateway.apiKey } : {}),
...(config.gateway?.baseURL ? { baseURL: config.gateway.baseURL } : {}),
headers: {
'anthropic-beta': ANTHROPIC_BETA_HEADER,
},
});
return (modelId) => gateway(modelId);
}
throw new Error(`${config.backend} is not an AI SDK LanguageModel backend; use KtxLlmRuntimePort`);
}
}

View file

@ -3,7 +3,7 @@ import type { LanguageModel, TelemetrySettings, ToolCallRepairFunction, ToolSet
export const KTX_MODEL_ROLES = ['default', 'triage', 'candidateExtraction', 'curator', 'reconcile', 'repair'] as const;
export type KtxModelRole = (typeof KTX_MODEL_ROLES)[number];
export type KtxLlmBackend = 'anthropic' | 'vertex' | 'gateway';
export type KtxLlmBackend = 'anthropic' | 'vertex' | 'gateway' | 'claude-code';
export type KtxPromptCacheTtl = '5m' | '1h';
export type KtxJsonValue =

2245
pnpm-lock.yaml generated

File diff suppressed because it is too large Load diff

View file

@ -1,21 +1,26 @@
{
"schemaVersion": 1,
"publicNpmPackageVersion": "0.1.0-rc.1",
"releaseMode": "npm-public-release-ready",
"npm": {
"publish": true,
"registry": null,
"access": "public",
"tag": "next",
"packages": ["@kaelio/ktx"]
"packages": [
"@kaelio/ktx"
]
},
"python": {
"publish": false,
"repository": null,
"packages": ["kaelio-ktx"]
"packages": [
"kaelio-ktx"
]
},
"publishedPackageSmoke": {
"packageName": "@kaelio/ktx",
"version": "0.1.0-rc.0",
"version": "0.1.0-rc.1",
"registry": null
},
"runtimeInstaller": {

View file

@ -6,10 +6,15 @@ import { dirname, join, resolve } from 'node:path';
import { fileURLToPath, pathToFileURL } from 'node:url';
import { promisify } from 'node:util';
import {
PUBLIC_NPM_PACKAGE_NAME,
publicNpmPackageVersion,
} from './public-npm-release-metadata.mjs';
const execFileAsync = promisify(execFile);
export const PUBLIC_NPM_PACKAGE_NAME = '@kaelio/ktx';
export const PUBLIC_NPM_PACKAGE_VERSION = '0.1.0-rc.0';
export const PUBLIC_NPM_PACKAGE_VERSION = publicNpmPackageVersion();
export { PUBLIC_NPM_PACKAGE_NAME };
export function publicNpmPackageTarballName(version = PUBLIC_NPM_PACKAGE_VERSION) {
return `kaelio-ktx-${version}.tgz`;

View file

@ -142,9 +142,9 @@ describe('publicNpmPackageLayout', () => {
it('uses the first public npm release version for the tarball name', () => {
const layout = publicNpmPackageLayout('/repo/ktx');
assert.equal(PUBLIC_NPM_PACKAGE_VERSION, '0.1.0-rc.0');
assert.equal(publicNpmPackageTarballName(), 'kaelio-ktx-0.1.0-rc.0.tgz');
assert.equal(layout.tarballPath, '/repo/ktx/dist/artifacts/npm/kaelio-ktx-0.1.0-rc.0.tgz');
assert.equal(PUBLIC_NPM_PACKAGE_VERSION, '0.1.0-rc.1');
assert.equal(publicNpmPackageTarballName(), 'kaelio-ktx-0.1.0-rc.1.tgz');
assert.equal(layout.tarballPath, '/repo/ktx/dist/artifacts/npm/kaelio-ktx-0.1.0-rc.1.tgz');
});
});
@ -211,7 +211,7 @@ describe('publicNpmPackageJson', () => {
);
assert.equal(packageJson.name, PUBLIC_NPM_PACKAGE_NAME);
assert.equal(packageJson.version, '0.1.0-rc.0');
assert.equal(packageJson.version, '0.1.0-rc.1');
assert.equal(packageJson.private, false);
assert.deepEqual(packageJson.bin, { ktx: './dist/bin.js' });
assert.deepEqual(packageJson.dependencies, { commander: '14.0.3' });
@ -267,7 +267,7 @@ describe('publicNpmPackCommand', () => {
'--config.node-linker=hoisted',
'pack',
'--out',
'/repo/ktx/dist/artifacts/npm/kaelio-ktx-0.1.0-rc.0.tgz',
'/repo/ktx/dist/artifacts/npm/kaelio-ktx-0.1.0-rc.1.tgz',
],
cwd: '/repo/ktx/dist/public-npm-package',
});

View file

@ -9,7 +9,7 @@ const runtimeAssetPatterns = [/^packages\/[^/]+\/prompts\/.+\.md$/, /^packages\/
const identifierSkipPrefixes = ['docs/', 'docs-site/', 'examples/', 'python/ktx-sl/plans/', 'python/ktx-sl/openspec/'];
const identifierAllowPatterns = [
/^packages\/cli\/src\/(?:index|managed-local-embeddings|managed-python-command|managed-python-daemon|managed-python-runtime|runtime)(?:\.test)?\.ts$/,
/^scripts\/(?:build-public-npm-package|build-python-runtime-wheel|local-embeddings-runtime-smoke|package-artifacts|publish-public-npm-package|published-package-smoke|release-readiness)(?:\.test)?\.mjs$/,
/^scripts\/(?:build-public-npm-package|build-python-runtime-wheel|local-embeddings-runtime-smoke|package-artifacts|public-npm-release-metadata|publish-public-npm-package|published-package-smoke|release-readiness)(?:\.test)?\.mjs$/,
];
const forbiddenIdentifierTerms = ['kae' + 'lio', 'Kae' + 'lio', 'KAE' + 'LIO_'];

View file

@ -77,6 +77,7 @@ describe('scanFileContent', () => {
assert.equal(scanFileContent('scripts/local-embeddings-runtime-smoke.mjs', `@${name}/ktx`).length, 0);
assert.equal(scanFileContent('scripts/package-artifacts.test.mjs', `${name}-ktx`).length, 0);
assert.equal(scanFileContent('scripts/public-npm-release-metadata.mjs', `@${name}/ktx`).length, 0);
assert.equal(scanFileContent('scripts/publish-public-npm-package.test.mjs', `@${name}/ktx`).length, 0);
assert.equal(scanFileContent('packages/cli/src/managed-python-runtime.ts', `${name}_ktx`).length, 0);
});

View file

@ -36,8 +36,8 @@ describe('localEmbeddingsSmokeOptIn', () => {
describe('publicKtxTarballName', () => {
it('selects the public @kaelio/ktx tarball name', () => {
assert.equal(
publicKtxTarballName(['kaelio-ktx-0.1.0-rc.0.tgz', 'ignore-me.tgz']),
'kaelio-ktx-0.1.0-rc.0.tgz',
publicKtxTarballName(['kaelio-ktx-0.1.0-rc.1.tgz', 'ignore-me.tgz']),
'kaelio-ktx-0.1.0-rc.1.tgz',
);
});
@ -50,7 +50,7 @@ describe('publicKtxTarballName', () => {
it('fails when multiple public package tarballs are present', () => {
assert.throws(
() => publicKtxTarballName(['kaelio-ktx-0.1.0-rc.0.tgz', 'kaelio-ktx-0.2.0.tgz']),
() => publicKtxTarballName(['kaelio-ktx-0.1.0-rc.1.tgz', 'kaelio-ktx-0.2.0.tgz']),
/Expected exactly one @kaelio\/ktx tarball/,
);
});
@ -60,7 +60,7 @@ describe('expectedPublicKtxVersionPattern', () => {
it('matches the public package version and rejects the private workspace version', () => {
const pattern = expectedPublicKtxVersionPattern();
assert.match('@kaelio/ktx 0.1.0-rc.0\n', pattern);
assert.match('@kaelio/ktx 0.1.0-rc.1\n', pattern);
assert.doesNotMatch('@kaelio/ktx 0.0.0-private\n', pattern);
});
});

View file

@ -14,9 +14,9 @@ import {
} from './build-python-runtime-wheel.mjs';
import {
PUBLIC_NPM_PACKAGE_NAME,
PUBLIC_NPM_PACKAGE_VERSION,
publicNpmPackageTarballName,
} from './build-public-npm-package.mjs';
import { publicNpmPackageVersion } from './public-npm-release-metadata.mjs';
export {
RUNTIME_WHEEL_DISTRIBUTION_NAME,
@ -45,24 +45,27 @@ function scriptRootDir() {
return resolve(dirname(fileURLToPath(import.meta.url)), '..');
}
function npmPackageTarballName(packageName) {
function npmPackageTarballName(packageName, version) {
if (packageName !== PUBLIC_NPM_PACKAGE_NAME) {
throw new Error(`Unsupported npm artifact package: ${packageName}`);
}
return publicNpmPackageTarballName(PUBLIC_NPM_PACKAGE_VERSION);
return publicNpmPackageTarballName(version);
}
function npmPackageTarballs(npmDir) {
function npmPackageTarballs(npmDir, version) {
return Object.fromEntries(
NPM_ARTIFACT_PACKAGES.map((packageInfo) => [packageInfo.name, join(npmDir, npmPackageTarballName(packageInfo.name))]),
NPM_ARTIFACT_PACKAGES.map((packageInfo) => [
packageInfo.name,
join(npmDir, npmPackageTarballName(packageInfo.name, version)),
]),
);
}
export function packageArtifactLayout(rootDir = scriptRootDir()) {
export function packageArtifactLayout(rootDir = scriptRootDir(), version = publicNpmPackageVersion(rootDir)) {
const artifactDir = join(rootDir, 'dist', 'artifacts');
const npmDir = join(artifactDir, 'npm');
const pythonDir = join(artifactDir, 'python');
const npmTarballs = npmPackageTarballs(npmDir);
const npmTarballs = npmPackageTarballs(npmDir, version);
return {
rootDir,
@ -170,7 +173,7 @@ function releaseMetadataEntry({ ecosystem, packageName, packageRoot, packageVers
};
}
async function readNpmPackageMetadata(rootDir, packageInfo) {
async function readNpmPackageMetadata(rootDir, packageInfo, version) {
const packageJson = await readJson(join(rootDir, packageInfo.packageRoot, 'package.json'));
const expectedSourceName = packageInfo.name === PUBLIC_NPM_PACKAGE_NAME ? '@ktx/cli' : packageInfo.name;
if (packageJson.name !== expectedSourceName) {
@ -183,14 +186,14 @@ async function readNpmPackageMetadata(rootDir, packageInfo) {
ecosystem: 'npm',
packageName: packageInfo.name,
packageRoot: packageInfo.packageRoot,
packageVersion: isPublicKtxPackage ? PUBLIC_NPM_PACKAGE_VERSION : packageJson.version,
packageVersion: isPublicKtxPackage ? version : packageJson.version,
privatePackage: isPublicKtxPackage ? false : packageJson.private === true,
});
}
export async function packageReleaseMetadata(rootDir = scriptRootDir()) {
export async function packageReleaseMetadata(rootDir = scriptRootDir(), version = publicNpmPackageVersion(rootDir)) {
const npmPackages = await Promise.all(
NPM_ARTIFACT_PACKAGES.map((packageInfo) => readNpmPackageMetadata(rootDir, packageInfo)),
NPM_ARTIFACT_PACKAGES.map((packageInfo) => readNpmPackageMetadata(rootDir, packageInfo, version)),
);
return [

View file

@ -5,6 +5,7 @@ import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, it } from 'node:test';
import { PUBLIC_NPM_PACKAGE_VERSION } from './build-public-npm-package.mjs';
import {
CLI_PYTHON_ASSET_MANIFEST,
INTERNAL_NPM_WORKSPACE_PACKAGES,
@ -32,6 +33,35 @@ async function writeJson(path, value) {
}
async function writeReleaseMetadataInputs(root) {
await writeJson(join(root, 'release-policy.json'), {
schemaVersion: 1,
publicNpmPackageVersion: PUBLIC_NPM_PACKAGE_VERSION,
releaseMode: 'ci-artifact-only',
npm: {
publish: false,
registry: null,
access: 'public',
tag: 'next',
packages: ['@kaelio/ktx'],
},
python: {
publish: false,
repository: null,
packages: ['kaelio-ktx'],
},
publishedPackageSmoke: {
packageName: '@kaelio/ktx',
version: PUBLIC_NPM_PACKAGE_VERSION,
registry: null,
},
runtimeInstaller: {
uvStrategy: 'path-prerequisite',
bootstrapUv: false,
missingUvBehavior: 'focused-error',
},
requiredBeforePublishing: ['Choose public release version.'],
});
for (const packageInfo of INTERNAL_NPM_WORKSPACE_PACKAGES) {
await mkdir(join(root, packageInfo.packageRoot), { recursive: true });
await writeJson(join(root, packageInfo.packageRoot, 'package.json'), {
@ -64,19 +94,19 @@ async function writeUploadableArtifactFixtures(layout) {
describe('packageArtifactLayout', () => {
it('uses stable artifact paths under ktx/dist/artifacts', () => {
const layout = packageArtifactLayout('/repo/ktx');
const layout = packageArtifactLayout('/repo/ktx', PUBLIC_NPM_PACKAGE_VERSION);
assert.equal(layout.artifactDir, '/repo/ktx/dist/artifacts');
assert.equal(layout.npmDir, '/repo/ktx/dist/artifacts/npm');
assert.equal(layout.pythonDir, '/repo/ktx/dist/artifacts/python');
assert.equal(layout.cliTarball, '/repo/ktx/dist/artifacts/npm/kaelio-ktx-0.1.0-rc.0.tgz');
assert.equal(layout.cliTarball, '/repo/ktx/dist/artifacts/npm/kaelio-ktx-0.1.0-rc.1.tgz');
assert.deepEqual(Object.keys(layout.npmTarballs), ['@kaelio/ktx']);
});
});
describe('buildArtifactCommands', () => {
it('builds TypeScript packages in parallel topology, then the runtime wheel, then packs npm artifacts', () => {
const layout = packageArtifactLayout('/repo/ktx');
const layout = packageArtifactLayout('/repo/ktx', PUBLIC_NPM_PACKAGE_VERSION);
const commands = buildArtifactCommands(layout);
assert.deepEqual(
@ -101,7 +131,7 @@ describe('packageReleaseMetadata', () => {
ecosystem: 'npm',
packageName: '@kaelio/ktx',
packageRoot: 'packages/cli',
packageVersion: '0.1.0-rc.0',
packageVersion: '0.1.0-rc.1',
private: false,
releaseMode: 'ci-artifact-only',
},
@ -147,7 +177,7 @@ describe('findPythonArtifacts', () => {
describe('artifact manifest', () => {
it('writes release metadata, source revision, checksums, and byte counts for every uploadable artifact', async () => {
const root = await mkdtemp(join(tmpdir(), 'ktx-artifacts-manifest-test-'));
const layout = packageArtifactLayout(root);
const layout = packageArtifactLayout(root, PUBLIC_NPM_PACKAGE_VERSION);
try {
await writeReleaseMetadataInputs(root);
await writeUploadableArtifactFixtures(layout);
@ -167,7 +197,7 @@ describe('artifact manifest', () => {
ecosystem: 'npm',
packageName: '@kaelio/ktx',
packageRoot: 'packages/cli',
packageVersion: '0.1.0-rc.0',
packageVersion: '0.1.0-rc.1',
private: false,
releaseMode: 'ci-artifact-only',
},
@ -202,8 +232,8 @@ describe('artifact manifest', () => {
artifactKind: 'tarball',
ecosystem: 'npm',
packageName: '@kaelio/ktx',
packageVersion: '0.1.0-rc.0',
path: 'npm/kaelio-ktx-0.1.0-rc.0.tgz',
packageVersion: '0.1.0-rc.1',
path: 'npm/kaelio-ktx-0.1.0-rc.1.tgz',
},
],
);
@ -228,7 +258,7 @@ describe('artifact manifest', () => {
],
);
const npmEntry = manifest.files.find((file) => file.path === 'npm/kaelio-ktx-0.1.0-rc.0.tgz');
const npmEntry = manifest.files.find((file) => file.path === 'npm/kaelio-ktx-0.1.0-rc.1.tgz');
assert.ok(npmEntry);
assert.equal(npmEntry.bytes, Buffer.byteLength('@kaelio/ktx-tarball'));
assert.equal(npmEntry.sha256, createHash('sha256').update('@kaelio/ktx-tarball').digest('hex'));
@ -244,7 +274,7 @@ describe('artifact manifest', () => {
describe('verifyArtifactManifest', () => {
it('accepts a schema version 2 manifest that matches the artifact directory', async () => {
const root = await mkdtemp(join(tmpdir(), 'ktx-artifacts-verify-manifest-test-'));
const layout = packageArtifactLayout(root);
const layout = packageArtifactLayout(root, PUBLIC_NPM_PACKAGE_VERSION);
try {
await writeReleaseMetadataInputs(root);
await writeUploadableArtifactFixtures(layout);
@ -266,7 +296,7 @@ describe('verifyArtifactManifest', () => {
it('rejects a manifest when a file checksum has drifted', async () => {
const root = await mkdtemp(join(tmpdir(), 'ktx-artifacts-checksum-drift-test-'));
const layout = packageArtifactLayout(root);
const layout = packageArtifactLayout(root, PUBLIC_NPM_PACKAGE_VERSION);
try {
await writeReleaseMetadataInputs(root);
await writeUploadableArtifactFixtures(layout);
@ -286,7 +316,7 @@ describe('verifyArtifactManifest', () => {
it('rejects a manifest with an unsafe artifact path', async () => {
const root = await mkdtemp(join(tmpdir(), 'ktx-artifacts-path-test-'));
const layout = packageArtifactLayout(root);
const layout = packageArtifactLayout(root, PUBLIC_NPM_PACKAGE_VERSION);
try {
await writeReleaseMetadataInputs(root);
await writeUploadableArtifactFixtures(layout);
@ -304,7 +334,7 @@ describe('verifyArtifactManifest', () => {
it('rejects a manifest from the wrong source revision when one is required', async () => {
const root = await mkdtemp(join(tmpdir(), 'ktx-artifacts-revision-test-'));
const layout = packageArtifactLayout(root);
const layout = packageArtifactLayout(root, PUBLIC_NPM_PACKAGE_VERSION);
try {
await writeReleaseMetadataInputs(root);
await writeUploadableArtifactFixtures(layout);
@ -328,7 +358,7 @@ describe('verifyArtifactManifest', () => {
describe('copyRuntimeWheelAssets', () => {
it('copies the runtime wheel and checksum manifest into CLI assets', async () => {
const root = await mkdtemp(join(tmpdir(), 'ktx-runtime-assets-test-'));
const layout = packageArtifactLayout(root);
const layout = packageArtifactLayout(root, PUBLIC_NPM_PACKAGE_VERSION);
try {
await mkdir(layout.pythonDir, { recursive: true });
await writeFile(
@ -399,7 +429,7 @@ describe('standalone Python artifact cleanup', () => {
describe('verification snippets', () => {
it('pins the smoke project to the public package artifact', () => {
const layout = packageArtifactLayout('/repo/ktx');
const layout = packageArtifactLayout('/repo/ktx', PUBLIC_NPM_PACKAGE_VERSION);
const packageJson = npmSmokePackageJson(layout);
assert.deepEqual(packageJson.dependencies, {

Some files were not shown because too many files have changed in this diff Show more