mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
feat: add claude-code llm backend with runtime port (#115)
* docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
This commit is contained in:
parent
e6d578c03f
commit
b565e44a22
109 changed files with 10218 additions and 1093 deletions
45
.github/workflows/release.yml
vendored
45
.github/workflows/release.yml
vendored
|
|
@ -3,14 +3,27 @@ name: KTX Release
|
|||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
release_kind:
|
||||
description: "Release kind: rc publishes to next, stable publishes to latest"
|
||||
required: true
|
||||
type: choice
|
||||
default: "rc"
|
||||
options:
|
||||
- rc
|
||||
- stable
|
||||
force_release:
|
||||
description: "Force a patch release even if semantic-release finds no releasable commits"
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
publish_live:
|
||||
description: "Publish @kaelio/ktx to npm instead of running a dry-run"
|
||||
description: "Create the release and publish @kaelio/ktx to npm instead of running a dry-run"
|
||||
required: true
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
contents: write
|
||||
|
||||
concurrency:
|
||||
group: ktx-release-${{ github.ref }}
|
||||
|
|
@ -22,6 +35,8 @@ jobs:
|
|||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@0e279bb959325dab635dd2c09392533439d90093 # v6.0.8
|
||||
|
|
@ -34,6 +49,7 @@ jobs:
|
|||
node-version: "24"
|
||||
cache: "pnpm"
|
||||
cache-dependency-path: "pnpm-lock.yaml"
|
||||
registry-url: "https://registry.npmjs.org"
|
||||
|
||||
- name: Install TypeScript dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
|
@ -52,18 +68,19 @@ jobs:
|
|||
- name: Install Python dependencies
|
||||
run: uv sync --all-packages
|
||||
|
||||
- name: Build and verify artifacts
|
||||
run: pnpm run artifacts:check
|
||||
|
||||
- name: Check release readiness
|
||||
run: pnpm run release:readiness
|
||||
|
||||
- name: Dry-run npm publish
|
||||
- name: Dry-run semantic release
|
||||
if: ${{ !inputs.publish_live }}
|
||||
run: pnpm run release:npm-publish
|
||||
|
||||
- name: Publish npm package
|
||||
if: ${{ inputs.publish_live }}
|
||||
run: pnpm run release:npm-publish -- --publish
|
||||
run: pnpm run semantic-release:dry-run
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
KTX_RELEASE_KIND: ${{ inputs.release_kind }}
|
||||
FORCE_RELEASE: ${{ inputs.force_release }}
|
||||
|
||||
- name: Create semantic release
|
||||
if: ${{ inputs.publish_live }}
|
||||
run: pnpm run semantic-release
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
KTX_RELEASE_KIND: ${{ inputs.release_kind }}
|
||||
FORCE_RELEASE: ${{ inputs.force_release }}
|
||||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
|
|
|
|||
3
.releaserc.cjs
Normal file
3
.releaserc.cjs
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
const { createReleaseConfig } = require('./scripts/semantic-release-config.cjs');
|
||||
|
||||
module.exports = createReleaseConfig(process.env);
|
||||
|
|
@ -34,7 +34,7 @@ SQLite.
|
|||
## Quick Start
|
||||
|
||||
```bash
|
||||
npm install -g @kaelio/ktx
|
||||
pnpm add --global @kaelio/ktx
|
||||
ktx setup
|
||||
ktx status
|
||||
```
|
||||
|
|
|
|||
|
|
@ -51,17 +51,21 @@ scripted project creation. They are not shown in `ktx setup --help`.
|
|||
|
||||
| Flag | Description |
|
||||
|------|-------------|
|
||||
| `--llm-backend <backend>` | LLM backend: `anthropic` or `vertex` |
|
||||
| `--llm-backend <backend>` | LLM backend: `anthropic`, `vertex`, or `claude-code` |
|
||||
| `--llm-backend claude-code` | Use the local Claude Code session for KTX LLM calls |
|
||||
| `--llm-model <model>` | LLM model ID or backend model alias to validate and save |
|
||||
| `--anthropic-api-key-env <name>` | Environment variable containing the Anthropic API key |
|
||||
| `--anthropic-api-key-file <path>` | File containing the Anthropic API key |
|
||||
| `--anthropic-model <model>` | Anthropic model ID to validate and save |
|
||||
| `--anthropic-model <model>` | Legacy alias for `--llm-model` |
|
||||
| `--vertex-project <project>` | Vertex AI project ID, `env:NAME`, or `file:/path` reference |
|
||||
| `--vertex-location <location>` | Vertex AI location, `env:NAME`, or `file:/path` reference |
|
||||
| `--skip-llm` | Leave LLM setup incomplete |
|
||||
|
||||
Choose only one Anthropic credential source. Anthropic credential flags are only
|
||||
valid with the Anthropic backend; Vertex flags are only valid with the Vertex
|
||||
backend.
|
||||
backend. The `claude-code` backend uses local Claude Code authentication instead
|
||||
of Anthropic API key or Vertex flags. For Claude Code, `--llm-model` accepts
|
||||
`sonnet`, `opus`, `haiku`, or a full Claude model ID.
|
||||
|
||||
### Embeddings
|
||||
|
||||
|
|
@ -142,6 +146,12 @@ ktx setup
|
|||
# Run setup for a specific project directory
|
||||
ktx setup --project-dir ./analytics
|
||||
|
||||
# Use Claude Code with Opus for KTX LLM calls
|
||||
ktx setup \
|
||||
--project-dir ./analytics \
|
||||
--llm-backend claude-code \
|
||||
--llm-model opus
|
||||
|
||||
# Script a Postgres connection that reads its URL from the environment
|
||||
ktx setup \
|
||||
--project-dir ./analytics \
|
||||
|
|
|
|||
|
|
@ -47,6 +47,10 @@ ktx status --project-dir ./analytics
|
|||
`ktx status` prints grouped doctor checks. Agents should use
|
||||
`ktx status --json --no-input` when they need to branch on readiness state.
|
||||
|
||||
For `llm.provider.backend: claude-code`, `ktx status` checks that the local
|
||||
Claude Code session is usable. If auth fails, run the Claude Code CLI login
|
||||
flow, then rerun `ktx status`.
|
||||
|
||||
```json
|
||||
{
|
||||
"title": "KTX project doctor",
|
||||
|
|
|
|||
|
|
@ -59,12 +59,13 @@ setup progress under `.ktx/setup/` and resumes from the remaining work.
|
|||
KTX uses a Claude model for ingest agents that turn schemas, SQL, BI metadata,
|
||||
and documents into semantic-layer sources and wiki context.
|
||||
|
||||
Setup supports two LLM provider paths:
|
||||
Setup supports three LLM provider paths:
|
||||
|
||||
| Provider | Use when | Credential model |
|
||||
|----------|----------|------------------|
|
||||
| Anthropic API | You have an Anthropic API key | `ANTHROPIC_API_KEY` or a local `file:` secret |
|
||||
| Google Vertex AI for Anthropic Claude | Your organization runs Claude through Google Cloud | Application Default Credentials plus Vertex project and location |
|
||||
| Claude Code | You want KTX to use your local Claude Code session | Claude Code local authentication |
|
||||
|
||||
For Anthropic API, setup can read the key from the environment or save a pasted
|
||||
key to `.ktx/secrets/anthropic-api-key`. `ktx.yaml` stores an `env:` or `file:`
|
||||
|
|
@ -74,6 +75,27 @@ For Vertex AI, setup uses Google Application Default Credentials. It can read
|
|||
your active `gcloud` project, list visible projects, or accept explicit
|
||||
`--vertex-project` and `--vertex-location` values.
|
||||
|
||||
To use your local Claude Code session instead of an API key, set:
|
||||
|
||||
```yaml
|
||||
llm:
|
||||
provider:
|
||||
backend: claude-code
|
||||
models:
|
||||
default: sonnet
|
||||
triage: haiku
|
||||
candidateExtraction: sonnet
|
||||
curator: sonnet
|
||||
reconcile: sonnet
|
||||
repair: sonnet
|
||||
```
|
||||
|
||||
`claude-code` uses the Claude Code authentication already configured on your
|
||||
machine. It doesn't use `ANTHROPIC_API_KEY`, Vertex credentials, AI Gateway
|
||||
tokens, or Bedrock credentials. In non-interactive setup, pass
|
||||
`--llm-model opus`, `--llm-model sonnet`, `--llm-model haiku`, or a full Claude
|
||||
model ID to select the Claude Code model.
|
||||
|
||||
Setup checks the selected model before saving. Anthropic API setup fetches live
|
||||
Claude model choices when possible and falls back to bundled defaults if model
|
||||
discovery is unavailable.
|
||||
|
|
|
|||
|
|
@ -58,6 +58,11 @@ ktx ingest --all --deep
|
|||
Deep ingest needs LLM and embedding readiness. If those providers are not
|
||||
configured, run `ktx setup` or use `--fast`.
|
||||
|
||||
When you use `claude-code`, KTX still controls the tool surface for ingest and
|
||||
memory capture. Claude Code built-in tools, discovered MCP servers, plugins,
|
||||
skills, agents, and slash commands are not invokable by KTX agent loops unless
|
||||
they are exact KTX MCP tools for the current run.
|
||||
|
||||
## Query history
|
||||
|
||||
PostgreSQL, BigQuery, and Snowflake can add query-history context. This helps
|
||||
|
|
|
|||
61
docs-site/content/docs/guides/llm-configuration.mdx
Normal file
61
docs-site/content/docs/guides/llm-configuration.mdx
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
---
|
||||
title: LLM configuration
|
||||
description: Configure KTX LLM providers, model roles, and prompt caching.
|
||||
---
|
||||
|
||||
KTX uses the top-level `llm` block in `ktx.yaml` for text generation,
|
||||
structured extraction, and ingest or memory agent loops.
|
||||
|
||||
## Backends
|
||||
|
||||
Set `llm.provider.backend` to one of these values:
|
||||
|
||||
- `anthropic`: Use the Anthropic API through `ANTHROPIC_API_KEY` or the
|
||||
configured `api_key` reference.
|
||||
- `vertex`: Use Vertex AI Anthropic models through Google Cloud credentials.
|
||||
- `gateway`: Use AI Gateway-compatible Anthropic model ids.
|
||||
- `claude-code`: Use your local Claude Code session through the Claude Agent
|
||||
SDK. KTX removes provider-routing environment variables from Claude Code
|
||||
child processes, so this backend doesn't silently fall back to
|
||||
`ANTHROPIC_API_KEY`, Vertex, Gateway, or Bedrock credentials.
|
||||
|
||||
## Claude Code
|
||||
|
||||
Use aliases or full Claude model IDs in `llm.models`:
|
||||
|
||||
```yaml
|
||||
llm:
|
||||
provider:
|
||||
backend: claude-code
|
||||
models:
|
||||
default: sonnet
|
||||
triage: haiku
|
||||
candidateExtraction: sonnet
|
||||
curator: sonnet
|
||||
reconcile: sonnet
|
||||
repair: sonnet
|
||||
```
|
||||
|
||||
During setup, choose the Claude Code backend interactively or pass the model in
|
||||
automation:
|
||||
|
||||
```bash
|
||||
ktx setup --llm-backend claude-code --llm-model opus --no-input
|
||||
```
|
||||
|
||||
For Claude Code, `sonnet`, `opus`, and `haiku` map to the current KTX defaults.
|
||||
You can also pass a full Claude model ID, such as `claude-opus-4-7`.
|
||||
|
||||
`claude-code` keeps KTX tool boundaries intact. KTX exposes only the MCP tools
|
||||
needed for the current KTX agent loop, disables Claude Code built-in tools,
|
||||
keeps plugins empty, and denies every non-KTX tool request through
|
||||
`canUseTool`. The Claude Agent SDK may still report host-discovered slash
|
||||
commands, skills, and subagent names in init metadata; that metadata is not an
|
||||
execution grant for KTX agent loops.
|
||||
|
||||
## Prompt caching
|
||||
|
||||
`llm.promptCaching` has partial parity on `claude-code`. KTX doesn't pass
|
||||
Anthropic cache-control markers to the Claude Agent SDK. Status and doctor warn
|
||||
when you configure prompt-cache TTL, tool, or history fields that the Claude
|
||||
Agent SDK backend ignores.
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"title": "Guides",
|
||||
"defaultOpen": true,
|
||||
"pages": ["building-context", "writing-context", "serving-agents"]
|
||||
"pages": ["building-context", "llm-configuration", "writing-context", "serving-agents"]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,6 +15,12 @@ const config = {
|
|||
},
|
||||
async redirects() {
|
||||
return [
|
||||
{
|
||||
source: "/docs",
|
||||
destination: "/docs/getting-started/introduction",
|
||||
permanent: false,
|
||||
basePath: false,
|
||||
},
|
||||
{
|
||||
source: "/:path*",
|
||||
has: [{ type: "host", value: "docs.ktx.sh" }],
|
||||
|
|
|
|||
99
docs/release.md
Normal file
99
docs/release.md
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
# KTX release runbook
|
||||
|
||||
This runbook covers the maintainer workflow for publishing `@kaelio/ktx` to
|
||||
npm through GitHub Actions. The workflow uses semantic-release to choose the
|
||||
next version, update release metadata, publish the package, create the GitHub
|
||||
release, and commit the release files back to the repository.
|
||||
|
||||
## Release channels
|
||||
|
||||
KTX has two npm release channels:
|
||||
|
||||
- `rc` publishes prereleases such as `0.1.0-rc.2` to the npm `next` tag.
|
||||
- `stable` publishes normal releases such as `0.1.0` to the npm `latest` tag.
|
||||
|
||||
Run stable releases only from `main`. The workflow rejects stable releases from
|
||||
other branches.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before you publish, confirm these requirements:
|
||||
|
||||
- The repository has an Actions secret named `NPM_TOKEN`.
|
||||
- `NPM_TOKEN` is a granular npm token that can publish `@kaelio/ktx`.
|
||||
- The token can publish non-interactively if the npm account or package uses
|
||||
two-factor authentication for writes.
|
||||
- The repository has a baseline semantic-release tag for the latest published
|
||||
package version, such as `v0.1.0-rc.1`.
|
||||
|
||||
If no baseline tag exists, semantic-release treats the run as the first release
|
||||
and may choose a version that doesn't match the currently published package.
|
||||
|
||||
## Dry-run a release
|
||||
|
||||
Use a dry-run to verify the next version and generated release notes without
|
||||
publishing to npm.
|
||||
|
||||
1. Open **Actions** in GitHub.
|
||||
2. Select **KTX Release**.
|
||||
3. Select the branch to release from.
|
||||
4. Set **release_kind** to `rc` or `stable`.
|
||||
5. Leave **publish_live** set to `false`.
|
||||
6. Optional: Set **force_release** to `true` when you need a patch release even
|
||||
if semantic-release doesn't find a releasable commit.
|
||||
7. Run the workflow.
|
||||
|
||||
The dry-run uses the same semantic-release configuration as a live release. It
|
||||
doesn't publish to npm and doesn't commit release files.
|
||||
|
||||
## Publish an rc release
|
||||
|
||||
Publish an rc release when you need a prerelease package for validation before
|
||||
promoting to `latest`.
|
||||
|
||||
1. Open **Actions** in GitHub.
|
||||
2. Select **KTX Release**.
|
||||
3. Select the branch to release from.
|
||||
4. Set **release_kind** to `rc`.
|
||||
5. Set **publish_live** to `true`.
|
||||
6. Optional: Set **force_release** to `true`.
|
||||
7. Run the workflow.
|
||||
|
||||
The workflow publishes `@kaelio/ktx` with `--access public --tag next`, runs the
|
||||
published package smoke test, creates a GitHub release, and commits
|
||||
`CHANGELOG.md`, `package.json`, and `release-policy.json`.
|
||||
|
||||
## Publish a stable release
|
||||
|
||||
Publish a stable release from `main` after you have validated an rc package.
|
||||
|
||||
1. Open **Actions** in GitHub.
|
||||
2. Select **KTX Release**.
|
||||
3. Select `main`.
|
||||
4. Set **release_kind** to `stable`.
|
||||
5. Set **publish_live** to `true`.
|
||||
6. Optional: Set **force_release** to `true`.
|
||||
7. Run the workflow.
|
||||
|
||||
The workflow publishes `@kaelio/ktx` with `--access public --tag latest`, runs
|
||||
the published package smoke test, creates a GitHub release, and commits the
|
||||
release metadata.
|
||||
|
||||
## Release metadata
|
||||
|
||||
semantic-release calls `scripts/update-public-release-version.mjs` during the
|
||||
prepare step. That script updates:
|
||||
|
||||
- `package.json` with the semantic-release version.
|
||||
- `release-policy.json` with `publicNpmPackageVersion`, npm publish settings,
|
||||
and the published package smoke-test version.
|
||||
|
||||
The artifact packaging and readiness scripts read `publicNpmPackageVersion`
|
||||
from `release-policy.json`, so manual version edits in build scripts aren't
|
||||
needed for rc releases.
|
||||
|
||||
## Trusted Publishing follow-up
|
||||
|
||||
This workflow uses `NPM_TOKEN` today. Move to npm Trusted Publishing after the
|
||||
final publish command path is verified for the package manager and workflow
|
||||
filename configured in npm package settings.
|
||||
|
|
@ -0,0 +1,678 @@
|
|||
# Claude Code Auth Probe Isolation Fix Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Make the `claude-code` auth probe and runtime tolerate host-discovered
|
||||
Claude Code init metadata while preserving KTX-owned tool, MCP, and plugin
|
||||
restrictions.
|
||||
|
||||
**Architecture:** Keep the existing Claude Code runtime and SDK option tuple.
|
||||
Change the init-message assertion from "no host discovery appears" to "only the
|
||||
KTX-controlled execution surface is active." Align the design spec and user docs
|
||||
with the pinned SDK behavior: `settingSources: []` disables filesystem settings,
|
||||
`skills: []` is a context filter, and deny-by-default `canUseTool` is the
|
||||
runtime enforcement boundary.
|
||||
|
||||
**Tech Stack:** TypeScript, pnpm, Vitest, Markdown, Fumadocs MDX,
|
||||
`@anthropic-ai/claude-agent-sdk@0.3.142`.
|
||||
|
||||
---
|
||||
|
||||
## Audit result
|
||||
|
||||
The current strict isolation assertion is a v1-blocking bug. A real authenticated
|
||||
Claude Code host can report non-empty `slash_commands`, `skills`, and `agents`
|
||||
in the SDK init message even when KTX passes `settingSources: []`, `skills: []`,
|
||||
`plugins: []`, `tools: []`, exact KTX MCP `allowedTools`, `disallowedTools`, and
|
||||
deny-by-default `canUseTool`.
|
||||
|
||||
Spec findings:
|
||||
|
||||
- `docs/superpowers/specs/2026-05-15-claude-code-backend-design.md:45-47`
|
||||
requires host-discovered capabilities not to expand the KTX agent-loop tool
|
||||
surface. That requirement is about invocation, not necessarily about zero
|
||||
diagnostic metadata in the init message.
|
||||
- `docs/superpowers/specs/2026-05-15-claude-code-backend-design.md:254-265`
|
||||
overreaches by asking the implementation to assert that unexpected
|
||||
settings-derived commands, skills, agents, plugins, or MCP servers are
|
||||
inactive from the SDK init message. In `@anthropic-ai/claude-agent-sdk@0.3.142`,
|
||||
the available SDK controls cannot make `message.slash_commands`,
|
||||
`message.skills`, or `message.agents` reliably empty on an authenticated host.
|
||||
- `docs/superpowers/specs/2026-05-15-claude-code-backend-design.md:266-267`
|
||||
says skills are disabled with `skills: []`. The pinned SDK type definitions
|
||||
document `skills` as a context filter, not a sandbox.
|
||||
- `docs/superpowers/specs/2026-05-15-claude-code-backend-design.md:543-545`
|
||||
correctly requires the auth probe to pass the isolation option tuple and no
|
||||
MCP servers. It does not require failing when host discovery metadata is
|
||||
present.
|
||||
|
||||
SDK evidence from
|
||||
`node_modules/.pnpm/@anthropic-ai+claude-agent-sdk@0.3.142_zod@4.4.3/node_modules/@anthropic-ai/claude-agent-sdk/sdk.d.ts`:
|
||||
|
||||
- Lines `1686-1695`: `settingSources: []` disables filesystem settings only.
|
||||
- Lines `1697-1718`: `skills: []` is a context filter; unlisted skills are
|
||||
hidden from listing and rejected by the Skill tool, but files remain on disk.
|
||||
- Lines `1202-1213`: `allowedTools` is auto-approval, while `canUseTool` is the
|
||||
permission handler for controlling tool execution.
|
||||
- Lines `1224-1228`: `disallowedTools` removes listed tools from context and
|
||||
prevents use.
|
||||
- Lines `1255-1264`: `tools: []` disables built-in tools.
|
||||
- Lines `1545-1558`: `plugins` loads plugins when supplied; KTX supplies `[]`.
|
||||
- Lines `3465-3489`: the init message reports `agents`, `tools`,
|
||||
`mcp_servers`, `slash_commands`, `skills`, and `plugins`.
|
||||
|
||||
Implemented plan audit:
|
||||
|
||||
- `2026-05-15-claude-code-backend-v1-runtime.md` is implemented for config,
|
||||
runtime port, SDK dependency, model aliases, environment scrubbing, Claude Code
|
||||
text/object/agent execution, setup/status/doctor support, docs, and LLM
|
||||
call-site migration.
|
||||
- `2026-05-15-claude-code-backend-v1-isolation-closure.md` is implemented, but
|
||||
it converted the spec's ambiguous "assert inactive" line into an impossible
|
||||
assertion against non-empty `slash_commands`, `skills`, and `agents`.
|
||||
- `2026-05-15-claude-code-backend-v1-ingest-guidance-closure.md` is implemented
|
||||
for the ingest missing-LLM guidance and associated CLI/context tests.
|
||||
|
||||
Remaining v1-blocking gaps:
|
||||
|
||||
- `packages/context/src/llm/claude-code-runtime.ts:94-101` throws on
|
||||
host-discovered slash commands, skills, and agents.
|
||||
- `packages/context/src/llm/claude-code-runtime.test.ts:158-178` encodes the
|
||||
wrong behavior by requiring the runtime to reject any init message with
|
||||
discovered agents.
|
||||
- The auth probe has no regression coverage for an authenticated host whose init
|
||||
message reports non-empty `slash_commands`, `skills`, and `agents`.
|
||||
- User docs under `docs-site/content/docs/guides/` say KTX "disables" skills,
|
||||
agents, hooks, and slash commands. That wording is stronger than the SDK
|
||||
contract and must be changed to "not invokable by KTX agent loops."
|
||||
|
||||
Non-blocking gaps:
|
||||
|
||||
- Same-step AI SDK tool-call repair parity remains out of scope for v1.
|
||||
- OTEL telemetry parity remains out of scope for v1.
|
||||
- Embedding parity remains out of scope because embeddings are configured
|
||||
separately.
|
||||
- Full prompt-caching parity remains out of scope. V1 keeps warning on ignored
|
||||
prompt-cache fields and avoids AI SDK cache markers on the Claude Code path.
|
||||
|
||||
Decision:
|
||||
|
||||
- Choose option (a): relax the assertion in code and align the spec text. Do not
|
||||
rely on an invented SDK mechanism. The pinned type definitions expose
|
||||
`settingSources`, `skills`, `plugins`, `tools`, `allowedTools`,
|
||||
`disallowedTools`, and `canUseTool`, but they do not expose a query option that
|
||||
disables all host-discovered slash commands or user-level subagent names in the
|
||||
init message.
|
||||
|
||||
## File structure
|
||||
|
||||
Modify these files:
|
||||
|
||||
- `docs/superpowers/specs/2026-05-15-claude-code-backend-design.md` aligns the
|
||||
design with the real SDK contract.
|
||||
- `packages/context/src/llm/claude-code-runtime.test.ts` adds the failing
|
||||
regression tests for auth probe and runtime init metadata.
|
||||
- `packages/context/src/llm/claude-code-runtime.ts` relaxes init metadata checks
|
||||
while tightening exact tool equality.
|
||||
- `docs-site/content/docs/guides/llm-configuration.mdx` changes user docs from
|
||||
"disabled" to "not invokable."
|
||||
- `docs-site/content/docs/guides/building-context.mdx` applies the same
|
||||
user-facing wording at the ingest guide boundary.
|
||||
|
||||
### Task 1: Align the design spec with SDK reality
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `docs/superpowers/specs/2026-05-15-claude-code-backend-design.md`
|
||||
|
||||
- [ ] **Step 1: Update the tool-boundary goal**
|
||||
|
||||
Replace the goal bullet at lines `45-47` with:
|
||||
|
||||
```markdown
|
||||
- Preserve KTX's curated tool boundaries. Claude Code built-ins,
|
||||
filesystem-discovered MCP servers, hooks, skills, plugins, agents, and slash
|
||||
commands must not become invokable in KTX agent loops. The Agent SDK init
|
||||
message may still report host-discovered slash commands, skills, and agents;
|
||||
KTX treats that metadata as diagnostic only and restricts execution through
|
||||
`tools: []`, exact KTX MCP `allowedTools`, `disallowedTools`, and
|
||||
deny-by-default `canUseTool`.
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Replace the over-broad init assertion requirement**
|
||||
|
||||
Replace the bullet at lines `254-265` with:
|
||||
|
||||
```markdown
|
||||
- Filesystem settings are not loaded. The SDK's documented default for an
|
||||
omitted `settingSources` is `["user", "project", "local"]`
|
||||
(`@anthropic-ai/claude-agent-sdk@0.3.142` `sdk.d.ts:1686-1695`),
|
||||
which would inherit the user's Claude Code filesystem settings. Every KTX
|
||||
`query()` call site - agent loops, text generation, object generation, and
|
||||
the auth probe - MUST pass `settingSources: []` explicitly, along with
|
||||
`skills: []`, `plugins: []`, `tools: []`, `persistSession: false`, and no
|
||||
`mcpServers` entries other than the KTX MCP server (omitted entirely when
|
||||
the call site does not expose tools). The implementation MUST assert from
|
||||
the SDK init message that the controlled execution surface matches KTX's
|
||||
expectations:
|
||||
|
||||
- `message.tools` equals the exact generated KTX MCP tool ids for the current
|
||||
call.
|
||||
- `message.mcp_servers` equals the expected KTX MCP server set: `[]` when the
|
||||
call exposes no tools, or `["ktx"]` when it does.
|
||||
- `message.plugins` is empty.
|
||||
|
||||
The implementation MUST NOT reject a run solely because
|
||||
`message.slash_commands`, `message.skills`, or `message.agents` contain
|
||||
host-discovered names. In `@anthropic-ai/claude-agent-sdk@0.3.142`, those
|
||||
fields can report host discovery even when KTX passes the isolation options.
|
||||
They are not part of the KTX execution surface when `tools: []`,
|
||||
`allowedTools`, `disallowedTools`, and deny-by-default `canUseTool` are set.
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Replace the skills/plugin wording**
|
||||
|
||||
Replace the bullets at lines `266-289` with:
|
||||
|
||||
```markdown
|
||||
- `skills: []` is a context filter in the pinned SDK
|
||||
(`sdk.d.ts:1697-1718`): unlisted skills are hidden from the model's skill
|
||||
listing and rejected by the Skill tool, but discovered skill names may still
|
||||
appear in init metadata. KTX must still pass `skills: []`.
|
||||
- Plugins are disabled with `plugins: []`, and the runtime asserts that
|
||||
`message.plugins` is empty in the init message.
|
||||
- Built-in tools are disabled by setting `tools: []`. The pinned SDK type
|
||||
(`@anthropic-ai/claude-agent-sdk@0.3.142`, `sdk.d.ts:1255-1264`) documents
|
||||
`tools` as the base set of built-in tools, with `[]` meaning "disable all
|
||||
built-ins"; `tools` does not accept MCP tool ids and cannot be used to
|
||||
restrict MCP availability.
|
||||
- MCP tool availability is granted by registering the KTX MCP server through
|
||||
`mcpServers`. The SDK does not document a wildcard like `mcp__ktx__*` for
|
||||
any tool field; KTX must enumerate exact generated MCP tool ids of the form
|
||||
`mcp__ktx__<toolName>` (derived from the tool map handed to
|
||||
`createSdkMcpServer`) wherever a list of tool ids is required.
|
||||
- Pre-approval under `permissionMode: "dontAsk"` is configured by listing those
|
||||
same exact `mcp__ktx__<toolName>` ids in `allowedTools` (documented as
|
||||
auto-allow without prompting). Treat `allowedTools` as auto-approval, not
|
||||
restriction.
|
||||
- Defense-in-depth restriction uses `canUseTool`. The KTX runtime supplies a
|
||||
`canUseTool` handler that allows only tool names in the current KTX MCP tool
|
||||
map and denies everything else, so host-discovered slash commands, skills,
|
||||
agents, future SDK defaults, or a misconfigured MCP server cannot expand the
|
||||
execution surface.
|
||||
- `disallowedTools` MUST additionally list the current built-in tool names
|
||||
(`Agent`, `Task`, `AskUserQuestion`, `Bash`, `Read`, `Edit`, `Write`, `Glob`,
|
||||
`Grep`, `WebFetch`, `WebSearch`, `TodoWrite`) as redundant insurance.
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Update auth probe acceptance text**
|
||||
|
||||
After the auth probe option list at lines `543-545`, add:
|
||||
|
||||
```markdown
|
||||
The auth probe MUST tolerate init messages with non-empty
|
||||
`slash_commands`, `skills`, and `agents` when `message.tools` is empty,
|
||||
`message.mcp_servers` is empty, `message.plugins` is empty, and the query
|
||||
options contain the KTX isolation tuple. Host discovery metadata is not an
|
||||
auth failure.
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Update verified evidence and open items**
|
||||
|
||||
Replace lines `621-623` with:
|
||||
|
||||
```markdown
|
||||
- The Agent SDK skills docs say the `skills` option is a context filter rather
|
||||
than a sandbox. KTX must pass `skills: []`, but must not assert that
|
||||
`message.skills` is empty in the SDK init message.
|
||||
```
|
||||
|
||||
Replace open item `8` at lines `648-649` with:
|
||||
|
||||
```markdown
|
||||
8. Write tests proving a raw built-in Claude Code tool request is denied,
|
||||
host-discovered Skill/Agent/SlashCommand requests are denied by `canUseTool`,
|
||||
and only exact `mcp__ktx__*` tools are allowed during KTX agent loops.
|
||||
```
|
||||
|
||||
Replace open item `9` at lines `650-654` with:
|
||||
|
||||
```markdown
|
||||
9. Write a test that asserts every KTX-originated `query()` invocation
|
||||
(agent loop, text generation, object generation, auth probe) is called
|
||||
with `settingSources: []`, `skills: []`, `plugins: []`, `tools: []`, and
|
||||
`persistSession: false`, by spying on the SDK entry point. The test must
|
||||
fail if any path falls back to SDK defaults for those fields. The test must
|
||||
also prove that non-empty host-discovered `slash_commands`, `skills`, and
|
||||
`agents` in the init message do not fail the auth probe or runtime when the
|
||||
controlled tool, MCP server, and plugin surfaces match KTX expectations.
|
||||
```
|
||||
|
||||
- [ ] **Step 6: Commit the spec alignment**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add docs/superpowers/specs/2026-05-15-claude-code-backend-design.md
|
||||
git commit -m "docs: align claude-code isolation spec with sdk metadata"
|
||||
```
|
||||
|
||||
Expected: the design spec no longer requires zero host-discovery metadata in
|
||||
the SDK init message.
|
||||
|
||||
### Task 2: Add regression tests for host-discovered init metadata
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/context/src/llm/claude-code-runtime.test.ts`
|
||||
|
||||
- [ ] **Step 1: Replace the invalid agent rejection test**
|
||||
|
||||
In `packages/context/src/llm/claude-code-runtime.test.ts`, replace the test named
|
||||
`rejects settings-derived agents and non-KTX MCP servers from init messages`
|
||||
with these tests:
|
||||
|
||||
```ts
|
||||
it('treats host-discovered commands skills and agents as non-fatal init metadata for text and auth probe', async () => {
|
||||
const hostDiscoveredInit = initMessage({
|
||||
slash_commands: ['/help', '/compact', '/clear', '/user-command'],
|
||||
skills: ['pdf', 'docx'],
|
||||
agents: ['claude', 'Explore', 'general-purpose'],
|
||||
});
|
||||
const textQuery = vi.fn((_input: any) =>
|
||||
stream([hostDiscoveredInit, resultMessage({ result: 'hello' })]),
|
||||
);
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query: textQuery,
|
||||
env: { ANTHROPIC_API_KEY: 'sk-ant-test', PATH: '/usr/bin' }, // pragma: allowlist secret
|
||||
});
|
||||
|
||||
await expect(runtime.generateText({ role: 'default', prompt: 'say hello' })).resolves.toBe('hello');
|
||||
const textOptions = textQuery.mock.calls[0][0].options;
|
||||
expect(textOptions).toMatchObject({
|
||||
settingSources: [],
|
||||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
allowedTools: [],
|
||||
permissionMode: 'dontAsk',
|
||||
persistSession: false,
|
||||
env: expect.not.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test' }),
|
||||
});
|
||||
expect(textOptions.disallowedTools).toEqual(expect.arrayContaining(['Agent', 'Task', 'Bash']));
|
||||
expect(await textOptions.canUseTool('Agent', {}, { signal: new AbortController().signal, toolUseID: 'agent' })).toMatchObject({
|
||||
behavior: 'deny',
|
||||
toolUseID: 'agent',
|
||||
});
|
||||
expect(await textOptions.canUseTool('Skill', {}, { signal: new AbortController().signal, toolUseID: 'skill' })).toMatchObject({
|
||||
behavior: 'deny',
|
||||
toolUseID: 'skill',
|
||||
});
|
||||
expect(
|
||||
await textOptions.canUseTool('SlashCommand', {}, { signal: new AbortController().signal, toolUseID: 'slash' }),
|
||||
).toMatchObject({
|
||||
behavior: 'deny',
|
||||
toolUseID: 'slash',
|
||||
});
|
||||
|
||||
const probeQuery = vi.fn((_input: any) =>
|
||||
stream([hostDiscoveredInit, resultMessage({ result: 'ok' })]),
|
||||
);
|
||||
await expect(
|
||||
runClaudeCodeAuthProbe({
|
||||
projectDir: '/tmp/project',
|
||||
model: 'sonnet',
|
||||
query: probeQuery,
|
||||
env: { ANTHROPIC_AUTH_TOKEN: 'token', HOME: '/Users/test' },
|
||||
}),
|
||||
).resolves.toEqual({ ok: true });
|
||||
expect(probeQuery.mock.calls[0][0].options).toMatchObject({
|
||||
settingSources: [],
|
||||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
allowedTools: [],
|
||||
permissionMode: 'dontAsk',
|
||||
persistSession: false,
|
||||
env: expect.objectContaining({ HOME: '/Users/test' }),
|
||||
});
|
||||
expect(probeQuery.mock.calls[0][0].options.env).not.toEqual(
|
||||
expect.objectContaining({ ANTHROPIC_AUTH_TOKEN: 'token' }),
|
||||
);
|
||||
});
|
||||
|
||||
it('allows host-discovered context during agent loops while requiring exact KTX MCP tools and servers', async () => {
|
||||
const query = vi.fn((_input: any) =>
|
||||
stream([
|
||||
initMessage({
|
||||
tools: ['mcp__ktx__load_skill'],
|
||||
mcp_servers: [{ name: 'ktx', status: 'connected' }],
|
||||
slash_commands: ['/help', '/compact', '/clear'],
|
||||
skills: ['memory-agent', 'doc-reader'],
|
||||
agents: ['claude', 'Plan', 'Explore'],
|
||||
}),
|
||||
{
|
||||
type: 'assistant',
|
||||
message: { role: 'assistant', content: [] },
|
||||
parent_tool_use_id: null,
|
||||
uuid: '00000000-0000-4000-8000-000000000006',
|
||||
session_id: 'session-id',
|
||||
} as unknown as SDKMessage,
|
||||
resultMessage({ subtype: 'error_max_turns', is_error: true }),
|
||||
]),
|
||||
);
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query,
|
||||
env: {},
|
||||
});
|
||||
|
||||
await expect(
|
||||
runtime.runAgentLoop({
|
||||
modelRole: 'default',
|
||||
systemPrompt: 'system',
|
||||
userPrompt: 'user',
|
||||
toolSet: {
|
||||
load_skill: {
|
||||
name: 'load_skill',
|
||||
description: 'Load skill.',
|
||||
inputSchema: z.object({ name: z.string() }),
|
||||
execute: async () => ({ markdown: 'loaded' }),
|
||||
},
|
||||
},
|
||||
stepBudget: 1,
|
||||
telemetryTags: { operationName: 'test' },
|
||||
}),
|
||||
).resolves.toEqual({ stopReason: 'budget' });
|
||||
|
||||
const options = query.mock.calls[0][0].options;
|
||||
expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']);
|
||||
expect(await options.canUseTool('mcp__ktx__load_skill', {}, { signal: new AbortController().signal, toolUseID: '1' })).toEqual({
|
||||
behavior: 'allow',
|
||||
toolUseID: '1',
|
||||
});
|
||||
expect(await options.canUseTool('Task', {}, { signal: new AbortController().signal, toolUseID: '2' })).toMatchObject({
|
||||
behavior: 'deny',
|
||||
toolUseID: '2',
|
||||
});
|
||||
expect(await options.canUseTool('Skill', {}, { signal: new AbortController().signal, toolUseID: '3' })).toMatchObject({
|
||||
behavior: 'deny',
|
||||
toolUseID: '3',
|
||||
});
|
||||
});
|
||||
|
||||
it('still rejects unexpected tools, missing KTX tools, plugins, and non-KTX MCP servers from init messages', async () => {
|
||||
const query = vi.fn((_input: any) =>
|
||||
stream([
|
||||
initMessage({
|
||||
tools: ['Bash'],
|
||||
mcp_servers: [{ name: 'filesystem', status: 'connected' }],
|
||||
plugins: [{ name: 'host-plugin', path: '/tmp/plugin' }],
|
||||
}),
|
||||
resultMessage({ result: 'hello' }),
|
||||
]),
|
||||
);
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query,
|
||||
env: {},
|
||||
});
|
||||
|
||||
await expect(
|
||||
runtime.generateText({
|
||||
role: 'default',
|
||||
prompt: 'say hello',
|
||||
tools: {
|
||||
load_skill: {
|
||||
name: 'load_skill',
|
||||
description: 'Load skill.',
|
||||
inputSchema: z.object({ name: z.string() }),
|
||||
execute: async () => ({ markdown: 'loaded' }),
|
||||
},
|
||||
},
|
||||
}),
|
||||
).rejects.toThrow(
|
||||
/Claude Code runtime isolation failed: .*tools=Bash.*missing_tools=mcp__ktx__load_skill.*mcp_servers=filesystem.*plugins=host-plugin/,
|
||||
);
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run the runtime test to verify it fails**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/llm/claude-code-runtime.test.ts
|
||||
```
|
||||
|
||||
Expected: FAIL. The first new test fails because `runClaudeCodeAuthProbe(...)`
|
||||
returns `{ ok: false, ... }` and `generateText(...)` rejects when init metadata
|
||||
contains non-empty `slash_commands`, `skills`, or `agents`. The second new test
|
||||
fails because `runAgentLoop(...)` returns `{ stopReason: 'error', ... }` for the
|
||||
same reason.
|
||||
|
||||
- [ ] **Step 3: Commit the failing regression test**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add packages/context/src/llm/claude-code-runtime.test.ts
|
||||
git commit -m "test: cover claude-code host discovery metadata"
|
||||
```
|
||||
|
||||
Expected: the commit contains tests that fail before the runtime assertion is
|
||||
fixed.
|
||||
|
||||
### Task 3: Relax init metadata assertions to the controlled execution surface
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/context/src/llm/claude-code-runtime.ts`
|
||||
|
||||
- [ ] **Step 1: Replace `assertInitIsolation`**
|
||||
|
||||
In `packages/context/src/llm/claude-code-runtime.ts`, replace the full
|
||||
`assertInitIsolation(...)` function with:
|
||||
|
||||
```ts
|
||||
function assertInitIsolation(
|
||||
message: SDKMessage,
|
||||
allowedToolIds: Set<string>,
|
||||
expectedMcpServerNames: Set<string>,
|
||||
): void {
|
||||
if (message.type !== 'system' || message.subtype !== 'init') {
|
||||
return;
|
||||
}
|
||||
const activeToolIds = new Set(message.tools);
|
||||
const unexpectedTools = message.tools.filter((toolName) => !allowedToolIds.has(toolName));
|
||||
const missingTools = [...allowedToolIds].filter((toolName) => !activeToolIds.has(toolName));
|
||||
const activeMcpServerNames = message.mcp_servers.map((server) => server.name);
|
||||
const unexpectedMcpServers = activeMcpServerNames.filter((name) => !expectedMcpServerNames.has(name));
|
||||
const missingMcpServers = [...expectedMcpServerNames].filter((name) => !activeMcpServerNames.includes(name));
|
||||
const unexpectedPlugins = message.plugins.map((plugin) => plugin.name);
|
||||
if (
|
||||
unexpectedTools.length > 0 ||
|
||||
missingTools.length > 0 ||
|
||||
unexpectedMcpServers.length > 0 ||
|
||||
missingMcpServers.length > 0 ||
|
||||
unexpectedPlugins.length > 0
|
||||
) {
|
||||
throw new Error(
|
||||
`Claude Code runtime isolation failed: tools=${unexpectedTools.join(',') || '(none)'} missing_tools=${
|
||||
missingTools.join(',') || '(none)'
|
||||
} mcp_servers=${unexpectedMcpServers.join(',') || '(none)'} missing_mcp_servers=${
|
||||
missingMcpServers.join(',') || '(none)'
|
||||
} plugins=${unexpectedPlugins.join(',') || '(none)'} host_slash_commands=${
|
||||
message.slash_commands.length
|
||||
} host_skills=${message.skills.length} host_agents=${message.agents?.join(',') || '(none)'}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This preserves strict checks for the KTX-controlled execution surface:
|
||||
|
||||
- `message.tools` must exactly equal the generated KTX MCP tool ids for the
|
||||
current call.
|
||||
- `message.mcp_servers` must exactly equal the expected KTX MCP server names.
|
||||
- `message.plugins` must be empty.
|
||||
|
||||
It deliberately stops treating `message.slash_commands`, `message.skills`, and
|
||||
`message.agents` as fatal because those fields can contain host-discovered
|
||||
metadata that KTX cannot disable through the pinned SDK options.
|
||||
|
||||
- [ ] **Step 2: Run the runtime test to verify it passes**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/llm/claude-code-runtime.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 3: Commit the runtime fix**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add packages/context/src/llm/claude-code-runtime.ts packages/context/src/llm/claude-code-runtime.test.ts
|
||||
git commit -m "fix: tolerate claude-code host discovery metadata"
|
||||
```
|
||||
|
||||
Expected: the auth probe and runtime no longer fail solely because the SDK init
|
||||
message reports host-discovered slash commands, skills, or agents.
|
||||
|
||||
### Task 4: Correct user-facing docs wording
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `docs-site/content/docs/guides/llm-configuration.mdx`
|
||||
- Modify: `docs-site/content/docs/guides/building-context.mdx`
|
||||
|
||||
- [ ] **Step 1: Update the LLM configuration guide wording**
|
||||
|
||||
In `docs-site/content/docs/guides/llm-configuration.mdx`, replace lines `39-41`
|
||||
with:
|
||||
|
||||
```mdx
|
||||
`claude-code` keeps KTX tool boundaries intact. KTX exposes only the MCP tools
|
||||
needed for the current KTX agent loop, disables Claude Code built-in tools,
|
||||
keeps plugins empty, and denies every non-KTX tool request through
|
||||
`canUseTool`. The Claude Agent SDK may still report host-discovered slash
|
||||
commands, skills, and subagent names in init metadata; that metadata is not an
|
||||
execution grant for KTX agent loops.
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Update the building context guide wording**
|
||||
|
||||
In `docs-site/content/docs/guides/building-context.mdx`, replace lines `61-63`
|
||||
with:
|
||||
|
||||
```mdx
|
||||
When you use `claude-code`, KTX still controls the tool surface for ingest and
|
||||
memory capture. Claude Code built-in tools, discovered MCP servers, plugins,
|
||||
skills, agents, and slash commands are not invokable by KTX agent loops unless
|
||||
they are exact KTX MCP tools for the current run.
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Run docs tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter ktx-docs run test
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 4: Commit docs wording**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add docs-site/content/docs/guides/llm-configuration.mdx docs-site/content/docs/guides/building-context.mdx
|
||||
git commit -m "docs: clarify claude-code host discovery metadata"
|
||||
```
|
||||
|
||||
Expected: user docs describe invocation control rather than promising zero
|
||||
host-discovery metadata.
|
||||
|
||||
### Task 5: Final verification
|
||||
|
||||
**Files:**
|
||||
|
||||
- Verify: `docs/superpowers/specs/2026-05-15-claude-code-backend-design.md`
|
||||
- Verify: `packages/context/src/llm/claude-code-runtime.ts`
|
||||
- Verify: `packages/context/src/llm/claude-code-runtime.test.ts`
|
||||
- Verify: `docs-site/content/docs/guides/llm-configuration.mdx`
|
||||
- Verify: `docs-site/content/docs/guides/building-context.mdx`
|
||||
|
||||
- [ ] **Step 1: Run targeted runtime tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/llm/claude-code-runtime.test.ts src/llm/runtime-tools.test.ts src/llm/claude-code-env.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 2: Run package type-check**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run type-check
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 3: Run docs verification**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter ktx-docs run test
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 4: Run dead-code checks**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm run dead-code
|
||||
```
|
||||
|
||||
Expected: PASS or only pre-existing unrelated findings. Investigate and fix any
|
||||
finding caused by the runtime assertion or test changes.
|
||||
|
||||
- [ ] **Step 5: Inspect git status**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git status --short
|
||||
```
|
||||
|
||||
Expected: only files from this plan are modified, or the working tree is clean
|
||||
if each task was committed.
|
||||
|
||||
## Self-review
|
||||
|
||||
- Spec coverage: This plan addresses the v1-blocking auth probe failure,
|
||||
aligns the spec with the SDK contract, preserves the real KTX execution
|
||||
boundary, and adds regression coverage for non-empty host-discovered
|
||||
`slash_commands`, `skills`, and `agents` in both auth probe and runtime paths.
|
||||
- Placeholder scan: No placeholder markers remain. Every code-changing step
|
||||
includes exact file paths, code blocks, commands, and expected results.
|
||||
- Type consistency: The plan uses existing names from the codebase:
|
||||
`ClaudeCodeKtxLlmRuntime`, `runClaudeCodeAuthProbe`, `initMessage`,
|
||||
`resultMessage`, `assertInitIsolation`, `mcpToolIds`, `KtxRuntimeToolSet`, and
|
||||
`canUseTool`.
|
||||
|
|
@ -0,0 +1,160 @@
|
|||
# Claude Code Backend V1 Ingest Guidance Closure Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Make the `ktx ingest` missing-LLM guidance treat `claude-code` as a first-class setup path and restore the CLI ingest test suite.
|
||||
|
||||
**Architecture:** Keep the existing Claude Code runtime implementation unchanged. Update the single local-ingest guard message so users see both the local Claude Code setup path and the Anthropic API setup path, then align the context and CLI tests with that user-facing copy.
|
||||
|
||||
**Tech Stack:** TypeScript, pnpm, Vitest.
|
||||
|
||||
---
|
||||
|
||||
## Audit summary
|
||||
|
||||
The May 15 Claude Code backend runtime and isolation plans are implemented for
|
||||
the core runtime path: config accepts `claude-code`, runtime calls use
|
||||
`KtxLlmRuntimePort`, Claude SDK calls pass isolation options and scrubbed env,
|
||||
setup/status/doctor validate Claude Code auth, and docs describe the backend.
|
||||
|
||||
One v1-blocking issue remains: `packages/context/src/ingest/local-bundle-runtime.ts`
|
||||
lists `claude-code` in the missing-LLM guard line but still tells users only to
|
||||
"Configure an Anthropic provider." The full CLI ingest test suite currently
|
||||
fails because `packages/cli/src/ingest.test.ts` still expects the old provider
|
||||
list without `claude-code`. This is v1-blocking because CI is red and the
|
||||
fallback guidance is not first-class for the new backend.
|
||||
|
||||
Non-blocking gaps from the original spec remain unchanged:
|
||||
|
||||
- Same-step AI SDK tool-call repair parity is out of scope for the Claude Code
|
||||
runtime.
|
||||
- OTEL telemetry parity is out of scope for the Claude Code runtime.
|
||||
- Embedding parity is out of scope because embeddings stay independently
|
||||
configured.
|
||||
- Full prompt-caching parity for tools, history, and per-section TTLs is out of
|
||||
scope; v1 only needs no AI SDK cache markers on `claude-code` and explicit
|
||||
warnings for ignored fields.
|
||||
|
||||
## File structure
|
||||
|
||||
Modify these files:
|
||||
|
||||
- `packages/context/src/ingest/local-bundle-runtime.ts` owns the missing-LLM
|
||||
guard message used by local ingest and MCP-triggered ingest.
|
||||
- `packages/context/src/ingest/local-bundle-runtime.test.ts` verifies the guard
|
||||
message at the context boundary.
|
||||
- `packages/cli/src/ingest.test.ts` verifies the user-facing CLI output.
|
||||
|
||||
No `docs-site/` update is required because the existing public docs already
|
||||
document `claude-code` setup and ingest behavior; this plan only fixes an
|
||||
inline runtime error message.
|
||||
|
||||
### Task 1: Update ingest LLM setup guidance
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/context/src/ingest/local-bundle-runtime.test.ts`
|
||||
- Modify: `packages/cli/src/ingest.test.ts`
|
||||
- Modify: `packages/context/src/ingest/local-bundle-runtime.ts`
|
||||
|
||||
- [ ] **Step 1: Update the context guard-message test**
|
||||
|
||||
In `packages/context/src/ingest/local-bundle-runtime.test.ts`, replace the
|
||||
expected message in `requires an agent runner or configured local ingest LLM`
|
||||
with this exact array:
|
||||
|
||||
```ts
|
||||
[
|
||||
'ktx ingest requires llm.provider.backend: anthropic, vertex, gateway, or claude-code, or an injected agentRunner.',
|
||||
'Configure a local Claude Code session or API-backed LLM, then rerun ingest:',
|
||||
` ktx setup --project-dir ${project.projectDir} --llm-backend claude-code --no-input`,
|
||||
` ktx setup --project-dir ${project.projectDir} --llm-backend anthropic --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
|
||||
].join('\n')
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Update the CLI ingest test**
|
||||
|
||||
In `packages/cli/src/ingest.test.ts`, replace the stale provider-list
|
||||
assertion in `prints provider setup guidance when a skip-llm setup project runs
|
||||
ingest` with:
|
||||
|
||||
```ts
|
||||
expect(runIo.stderr()).toContain(
|
||||
'ktx ingest requires llm.provider.backend: anthropic, vertex, gateway, or claude-code, or an injected agentRunner.',
|
||||
);
|
||||
expect(runIo.stderr()).toContain('Configure a local Claude Code session or API-backed LLM, then rerun ingest:');
|
||||
expect(runIo.stderr()).toContain(`ktx setup --project-dir ${projectDir} --llm-backend claude-code --no-input`);
|
||||
expect(runIo.stderr()).toContain(
|
||||
`ktx setup --project-dir ${projectDir} --llm-backend anthropic --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
|
||||
);
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Run tests to verify the new expectations fail**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-runtime.test.ts
|
||||
pnpm --filter @ktx/cli exec vitest run src/ingest.test.ts
|
||||
```
|
||||
|
||||
Expected: both suites fail because the source message still says
|
||||
`Configure an Anthropic provider, then rerun ingest:` and does not include the
|
||||
Claude Code setup command.
|
||||
|
||||
- [ ] **Step 4: Update the ingest guard message**
|
||||
|
||||
In `packages/context/src/ingest/local-bundle-runtime.ts`, replace
|
||||
`localIngestLlmProviderGuardMessage` with:
|
||||
|
||||
```ts
|
||||
function localIngestLlmProviderGuardMessage(projectDir: string): string {
|
||||
return [
|
||||
'ktx ingest requires llm.provider.backend: anthropic, vertex, gateway, or claude-code, or an injected agentRunner.',
|
||||
'Configure a local Claude Code session or API-backed LLM, then rerun ingest:',
|
||||
` ktx setup --project-dir ${projectDir} --llm-backend claude-code --no-input`,
|
||||
` ktx setup --project-dir ${projectDir} --llm-backend anthropic --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
|
||||
].join('\n');
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Run the targeted tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-runtime.test.ts
|
||||
pnpm --filter @ktx/cli exec vitest run src/ingest.test.ts
|
||||
```
|
||||
|
||||
Expected: both suites pass.
|
||||
|
||||
- [ ] **Step 6: Run package type-checks**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run type-check
|
||||
pnpm --filter @ktx/cli run type-check
|
||||
```
|
||||
|
||||
Expected: both commands pass.
|
||||
|
||||
- [ ] **Step 7: Commit**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add packages/context/src/ingest/local-bundle-runtime.ts packages/context/src/ingest/local-bundle-runtime.test.ts packages/cli/src/ingest.test.ts
|
||||
git commit -m "fix: update claude-code ingest setup guidance"
|
||||
```
|
||||
|
||||
## Self-review
|
||||
|
||||
- Spec coverage: This plan closes the only remaining v1-blocking audit finding:
|
||||
ingest setup guidance and CLI test expectations now include `claude-code` as
|
||||
a first-class backend.
|
||||
- Placeholder scan: No placeholders remain; every step includes exact paths,
|
||||
code, commands, and expected output.
|
||||
- Type consistency: The exact guard string is identical across the source and
|
||||
both test updates.
|
||||
|
|
@ -0,0 +1,575 @@
|
|||
# Claude Code Backend V1 Isolation Closure Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Close the remaining v1-blocking Claude Code backend gaps around SDK
|
||||
init isolation assertions and setup-time prompt-caching warnings.
|
||||
|
||||
**Architecture:** Keep the existing runtime port and Claude Code runtime. Add
|
||||
the missing init-message checks inside the Claude runtime, then share the
|
||||
prompt-caching warning formatter between status/doctor and setup so all
|
||||
user-facing readiness flows report ignored Claude Code cache knobs consistently.
|
||||
|
||||
**Tech Stack:** TypeScript, pnpm, Vitest, Zod, `@anthropic-ai/claude-agent-sdk@0.3.142`.
|
||||
|
||||
---
|
||||
|
||||
## Audit Summary
|
||||
|
||||
The May 15 Claude Code backend v1 plan is mostly implemented. Remaining
|
||||
v1-blocking gaps from the original spec are:
|
||||
|
||||
- `packages/context/src/llm/claude-code-runtime.ts` asserts init-message tools,
|
||||
slash commands, skills, and plugins, but does not assert `agents` or
|
||||
unexpected `mcp_servers`. The spec requires asserting that settings-derived
|
||||
commands, skills, agents, plugins, and MCP servers are inactive.
|
||||
- `packages/cli/src/setup-models.ts` validates Claude Code auth but does not
|
||||
surface ignored `llm.promptCaching` fields during setup. The spec requires
|
||||
setup, status, and doctor to surface ignored prompt-caching fields for the
|
||||
`claude-code` backend. Status and doctor already warn.
|
||||
|
||||
Non-blocking gaps:
|
||||
|
||||
- Same-step tool-call repair parity remains out of scope for v1.
|
||||
- OTEL telemetry parity remains out of scope for v1.
|
||||
- Embedding parity remains out of scope because embeddings are configured
|
||||
independently.
|
||||
- Full prompt-caching parity for tools, history, and per-section TTLs remains
|
||||
out of scope; v1 only needs explicit warnings and no AI SDK cache markers on
|
||||
the Claude Code path.
|
||||
|
||||
## File Structure
|
||||
|
||||
Modify these files:
|
||||
|
||||
- `packages/context/src/llm/claude-code-runtime.ts` adds complete init-message
|
||||
isolation checks for agents and MCP servers.
|
||||
- `packages/context/src/llm/claude-code-runtime.test.ts` adds regression tests
|
||||
for rejected agents/MCP servers, object/agent env scrubbing, and callback
|
||||
error handling.
|
||||
- `packages/cli/src/claude-code-prompt-caching.ts` is created as the shared
|
||||
formatter for ignored prompt-caching fields.
|
||||
- `packages/cli/src/status-project.ts` imports the shared formatter instead of
|
||||
keeping a local helper.
|
||||
- `packages/cli/src/setup-models.ts` emits the shared warning when setup saves
|
||||
`llm.provider.backend: claude-code` and existing prompt-caching fields are
|
||||
present.
|
||||
- `packages/cli/src/setup-models.test.ts` covers setup warning output.
|
||||
- `packages/cli/src/doctor.test.ts` keeps coverage for doctor output using the
|
||||
shared formatter.
|
||||
|
||||
### Task 1: Complete Claude Code init isolation checks
|
||||
|
||||
**Files:**
|
||||
|
||||
- Modify: `packages/context/src/llm/claude-code-runtime.test.ts`
|
||||
- Modify: `packages/context/src/llm/claude-code-runtime.ts`
|
||||
|
||||
- [ ] **Step 1: Add failing isolation and runtime behavior tests**
|
||||
|
||||
Add these tests inside `describe('ClaudeCodeKtxLlmRuntime', ...)` in
|
||||
`packages/context/src/llm/claude-code-runtime.test.ts`:
|
||||
|
||||
```ts
|
||||
it('rejects settings-derived agents and non-KTX MCP servers from init messages', async () => {
|
||||
const query = vi.fn((_input: any) =>
|
||||
stream([
|
||||
initMessage({
|
||||
agents: ['project-agent'],
|
||||
mcp_servers: [{ name: 'filesystem', status: 'connected' }],
|
||||
}),
|
||||
resultMessage({ result: 'hello' }),
|
||||
]),
|
||||
);
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query,
|
||||
env: {},
|
||||
});
|
||||
|
||||
await expect(runtime.generateText({ role: 'default', prompt: 'say hello' })).rejects.toThrow(
|
||||
/Claude Code runtime isolation failed: .*mcp_servers=filesystem.*agents=project-agent/,
|
||||
);
|
||||
});
|
||||
|
||||
it('passes scrubbed env to object generation and agent loops', async () => {
|
||||
const schema = z.object({ answer: z.string() });
|
||||
const objectQuery = vi.fn((_input: any) =>
|
||||
stream([initMessage(), resultMessage({ structured_output: { answer: 'yes' } })]),
|
||||
);
|
||||
const objectRuntime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query: objectQuery,
|
||||
env: { ANTHROPIC_API_KEY: 'sk-ant-test', AWS_PROFILE: 'prod', PATH: '/usr/bin' }, // pragma: allowlist secret
|
||||
});
|
||||
|
||||
await expect(objectRuntime.generateObject({ role: 'default', prompt: 'json', schema })).resolves.toEqual({
|
||||
answer: 'yes',
|
||||
});
|
||||
expect(objectQuery.mock.calls[0][0].options.env).toEqual(
|
||||
expect.objectContaining({ PATH: '/usr/bin' }),
|
||||
);
|
||||
expect(objectQuery.mock.calls[0][0].options.env).not.toEqual(
|
||||
expect.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test', AWS_PROFILE: 'prod' }), // pragma: allowlist secret
|
||||
);
|
||||
|
||||
const agentQuery = vi.fn((_input: any) =>
|
||||
stream([
|
||||
initMessage({ tools: ['mcp__ktx__load_skill'], mcp_servers: [{ name: 'ktx', status: 'connected' }] }),
|
||||
{
|
||||
type: 'assistant',
|
||||
message: { role: 'assistant', content: [] },
|
||||
parent_tool_use_id: null,
|
||||
uuid: '00000000-0000-4000-8000-000000000004',
|
||||
session_id: 'session-id',
|
||||
} as unknown as SDKMessage,
|
||||
resultMessage({ subtype: 'error_max_turns', is_error: true }),
|
||||
]),
|
||||
);
|
||||
const agentRuntime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query: agentQuery,
|
||||
env: { ANTHROPIC_AUTH_TOKEN: 'token', CLAUDE_CODE_USE_VERTEX: '1', HOME: '/Users/test' },
|
||||
});
|
||||
|
||||
await agentRuntime.runAgentLoop({
|
||||
modelRole: 'default',
|
||||
systemPrompt: 'system',
|
||||
userPrompt: 'user',
|
||||
toolSet: {
|
||||
load_skill: {
|
||||
name: 'load_skill',
|
||||
description: 'Load skill.',
|
||||
inputSchema: z.object({ name: z.string() }),
|
||||
execute: async () => ({ markdown: 'loaded' }),
|
||||
},
|
||||
},
|
||||
stepBudget: 1,
|
||||
telemetryTags: { operationName: 'test' },
|
||||
});
|
||||
expect(agentQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ HOME: '/Users/test' }));
|
||||
expect(agentQuery.mock.calls[0][0].options.env).not.toEqual(
|
||||
expect.objectContaining({ ANTHROPIC_AUTH_TOKEN: 'token', CLAUDE_CODE_USE_VERTEX: '1' }),
|
||||
);
|
||||
});
|
||||
|
||||
it('logs and ignores onStepFinish callback errors', async () => {
|
||||
const query = vi.fn((_input: any) =>
|
||||
stream([
|
||||
initMessage(),
|
||||
{
|
||||
type: 'assistant',
|
||||
message: { role: 'assistant', content: [] },
|
||||
parent_tool_use_id: null,
|
||||
uuid: '00000000-0000-4000-8000-000000000005',
|
||||
session_id: 'session-id',
|
||||
} as unknown as SDKMessage,
|
||||
resultMessage({ subtype: 'success', terminal_reason: 'completed' }),
|
||||
]),
|
||||
);
|
||||
const logger = {
|
||||
debug: vi.fn(),
|
||||
log: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
error: vi.fn(),
|
||||
};
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query,
|
||||
env: {},
|
||||
logger,
|
||||
});
|
||||
|
||||
await expect(
|
||||
runtime.runAgentLoop({
|
||||
modelRole: 'default',
|
||||
systemPrompt: 'system',
|
||||
userPrompt: 'user',
|
||||
toolSet: {},
|
||||
stepBudget: 1,
|
||||
telemetryTags: { operationName: 'test' },
|
||||
onStepFinish: async () => {
|
||||
throw new Error('callback exploded');
|
||||
},
|
||||
}),
|
||||
).resolves.toEqual({ stopReason: 'natural' });
|
||||
expect(logger.warn).toHaveBeenCalledWith(expect.stringContaining('callback exploded'));
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run the Claude runtime test to verify it fails**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/llm/claude-code-runtime.test.ts
|
||||
```
|
||||
|
||||
Expected: FAIL because the new agents/MCP-server isolation test resolves
|
||||
successfully instead of throwing.
|
||||
|
||||
- [ ] **Step 3: Add expected MCP server metadata and complete init assertions**
|
||||
|
||||
In `packages/context/src/llm/claude-code-runtime.ts`, replace
|
||||
`assertInitIsolation` and add the helper below it:
|
||||
|
||||
```ts
|
||||
function assertInitIsolation(
|
||||
message: SDKMessage,
|
||||
allowedToolIds: Set<string>,
|
||||
expectedMcpServerNames: Set<string>,
|
||||
): void {
|
||||
if (message.type !== 'system' || message.subtype !== 'init') {
|
||||
return;
|
||||
}
|
||||
const unexpectedTools = message.tools.filter((toolName) => !allowedToolIds.has(toolName));
|
||||
const activeMcpServerNames = message.mcp_servers.map((server) => server.name);
|
||||
const unexpectedMcpServers = activeMcpServerNames.filter((name) => !expectedMcpServerNames.has(name));
|
||||
const missingMcpServers = [...expectedMcpServerNames].filter((name) => !activeMcpServerNames.includes(name));
|
||||
const unexpectedAgents = message.agents ?? [];
|
||||
if (
|
||||
unexpectedTools.length > 0 ||
|
||||
unexpectedMcpServers.length > 0 ||
|
||||
missingMcpServers.length > 0 ||
|
||||
message.slash_commands.length > 0 ||
|
||||
message.skills.length > 0 ||
|
||||
message.plugins.length > 0 ||
|
||||
unexpectedAgents.length > 0
|
||||
) {
|
||||
throw new Error(
|
||||
`Claude Code runtime isolation failed: tools=${unexpectedTools.join(',') || '(none)'} mcp_servers=${
|
||||
unexpectedMcpServers.join(',') || '(none)'
|
||||
} missing_mcp_servers=${missingMcpServers.join(',') || '(none)'} slash_commands=${
|
||||
message.slash_commands.length
|
||||
} skills=${message.skills.length} plugins=${message.plugins.length} agents=${
|
||||
unexpectedAgents.join(',') || '(none)'
|
||||
}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function expectedMcpServerNames(tools: KtxRuntimeToolSet | undefined): Set<string> {
|
||||
return tools && Object.keys(tools).length > 0 ? new Set(['ktx']) : new Set();
|
||||
}
|
||||
```
|
||||
|
||||
Update `collectResult` parameters:
|
||||
|
||||
```ts
|
||||
async function collectResult(params: {
|
||||
query: QueryFn;
|
||||
prompt: string;
|
||||
options: Options;
|
||||
allowedToolIds: Set<string>;
|
||||
expectedMcpServerNames: Set<string>;
|
||||
onAssistantTurn?: () => Promise<void>;
|
||||
}): Promise<SDKResultMessage> {
|
||||
let result: SDKResultMessage | undefined;
|
||||
for await (const message of params.query({ prompt: params.prompt, options: params.options })) {
|
||||
assertInitIsolation(message, params.allowedToolIds, params.expectedMcpServerNames);
|
||||
```
|
||||
|
||||
Update the four `collectResult(...)` calls:
|
||||
|
||||
```ts
|
||||
const tools = input.tools ?? {};
|
||||
const result = await collectResult({
|
||||
query: this.runQuery,
|
||||
prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'),
|
||||
options,
|
||||
allowedToolIds: new Set(mcpToolIds(tools)),
|
||||
expectedMcpServerNames: expectedMcpServerNames(input.tools),
|
||||
});
|
||||
```
|
||||
|
||||
For `runAgentLoop(...)`, use:
|
||||
|
||||
```ts
|
||||
const result = await collectResult({
|
||||
query: this.runQuery,
|
||||
prompt: params.userPrompt,
|
||||
options: { ...options, systemPrompt: params.systemPrompt },
|
||||
allowedToolIds: new Set(mcpToolIds(params.toolSet)),
|
||||
expectedMcpServerNames: expectedMcpServerNames(params.toolSet),
|
||||
onAssistantTurn: async () => {
|
||||
```
|
||||
|
||||
For `runClaudeCodeAuthProbe(...)`, use:
|
||||
|
||||
```ts
|
||||
const result = await collectResult({
|
||||
query: input.query ?? defaultQuery,
|
||||
prompt: 'Reply with exactly: ok',
|
||||
options,
|
||||
allowedToolIds: new Set(),
|
||||
expectedMcpServerNames: new Set(),
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Run the Claude runtime test to verify it passes**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/llm/claude-code-runtime.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add packages/context/src/llm/claude-code-runtime.ts packages/context/src/llm/claude-code-runtime.test.ts
|
||||
git commit -m "fix: close claude-code runtime isolation checks"
|
||||
```
|
||||
|
||||
### Task 2: Surface Claude Code prompt-caching warnings during setup
|
||||
|
||||
**Files:**
|
||||
|
||||
- Create: `packages/cli/src/claude-code-prompt-caching.ts`
|
||||
- Modify: `packages/cli/src/status-project.ts`
|
||||
- Modify: `packages/cli/src/setup-models.ts`
|
||||
- Modify: `packages/cli/src/setup-models.test.ts`
|
||||
- Modify: `packages/cli/src/doctor.test.ts`
|
||||
|
||||
- [ ] **Step 1: Add failing setup warning test**
|
||||
|
||||
Add this test to `packages/cli/src/setup-models.test.ts`:
|
||||
|
||||
```ts
|
||||
it('warns during Claude Code setup when existing prompt-caching fields will be ignored', async () => {
|
||||
await writeFile(
|
||||
join(tempDir, 'ktx.yaml'),
|
||||
[
|
||||
'llm:',
|
||||
' provider:',
|
||||
' backend: anthropic',
|
||||
' models:',
|
||||
' default: claude-sonnet-4-6',
|
||||
' promptCaching:',
|
||||
' enabled: true',
|
||||
' systemTtl: 1h',
|
||||
' toolsTtl: 1h',
|
||||
' historyTtl: 5m',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
const io = makeIo();
|
||||
|
||||
const result = await runKtxSetupAnthropicModelStep(
|
||||
{
|
||||
projectDir: tempDir,
|
||||
inputMode: 'disabled',
|
||||
llmBackend: 'claude-code',
|
||||
skipLlm: false,
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
claudeCodeAuthProbe: async () => ({ ok: true as const }),
|
||||
},
|
||||
);
|
||||
|
||||
expect(result.status).toBe('ready');
|
||||
expect(io.stderr()).toContain('claude-code ignores llm.promptCaching.systemTtl');
|
||||
expect(io.stderr()).toContain('Claude Agent SDK does not expose KTX prompt-cache TTL, tool, or history markers');
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run setup tests to verify the new test fails**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/cli exec vitest run src/setup-models.test.ts
|
||||
```
|
||||
|
||||
Expected: FAIL because setup does not emit the ignored prompt-caching warning.
|
||||
|
||||
- [ ] **Step 3: Create the shared prompt-caching warning helper**
|
||||
|
||||
Create `packages/cli/src/claude-code-prompt-caching.ts`:
|
||||
|
||||
```ts
|
||||
import type { KtxProjectLlmConfig } from '@ktx/context/project';
|
||||
|
||||
const CLAUDE_CODE_IGNORED_PROMPT_CACHING_FIELDS = [
|
||||
'systemTtl',
|
||||
'toolsTtl',
|
||||
'historyTtl',
|
||||
'vertexFallbackTo5m',
|
||||
] as const;
|
||||
|
||||
export function ignoredClaudeCodePromptCachingFields(config: KtxProjectLlmConfig): string[] {
|
||||
if (config.provider.backend !== 'claude-code' || !config.promptCaching) {
|
||||
return [];
|
||||
}
|
||||
return CLAUDE_CODE_IGNORED_PROMPT_CACHING_FIELDS.filter((key) => key in config.promptCaching).map(
|
||||
(key) => `llm.promptCaching.${key}`,
|
||||
);
|
||||
}
|
||||
|
||||
export function formatClaudeCodePromptCachingWarning(fields: string[]): string | null {
|
||||
if (fields.length === 0) {
|
||||
return null;
|
||||
}
|
||||
return `claude-code ignores ${fields.join(', ')} because the Claude Agent SDK does not expose KTX prompt-cache TTL, tool, or history markers.`;
|
||||
}
|
||||
|
||||
export function formatClaudeCodePromptCachingFix(): string {
|
||||
return 'Remove those promptCaching fields or use anthropic, vertex, or gateway when those cache knobs are required.';
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Update status/doctor to use the shared helper**
|
||||
|
||||
In `packages/cli/src/status-project.ts`, add:
|
||||
|
||||
```ts
|
||||
import {
|
||||
formatClaudeCodePromptCachingFix,
|
||||
formatClaudeCodePromptCachingWarning,
|
||||
ignoredClaudeCodePromptCachingFields,
|
||||
} from './claude-code-prompt-caching.js';
|
||||
```
|
||||
|
||||
Delete the local `ignoredClaudeCodePromptCachingFields(...)` function.
|
||||
|
||||
Replace the warning block in `buildWarnings(...)` with:
|
||||
|
||||
```ts
|
||||
const warning = formatClaudeCodePromptCachingWarning(ignoredClaudeCodePromptCachingFields(config.llm));
|
||||
if (warning) {
|
||||
warnings.push({
|
||||
message: warning,
|
||||
fix: formatClaudeCodePromptCachingFix(),
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Emit the setup warning before persisting Claude Code config**
|
||||
|
||||
In `packages/cli/src/setup-models.ts`, add:
|
||||
|
||||
```ts
|
||||
import {
|
||||
formatClaudeCodePromptCachingWarning,
|
||||
ignoredClaudeCodePromptCachingFields,
|
||||
} from './claude-code-prompt-caching.js';
|
||||
```
|
||||
|
||||
Inside the `backendChoice.backend === 'claude-code'` branch, immediately before
|
||||
`await persistLlmConfig(...)`, add:
|
||||
|
||||
```ts
|
||||
const warning = formatClaudeCodePromptCachingWarning(
|
||||
ignoredClaudeCodePromptCachingFields(buildProjectLlmConfig(project.config.llm, { backend: 'claude-code' }, model)),
|
||||
);
|
||||
if (warning) {
|
||||
io.stderr.write(`${warning}\n`);
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 6: Run CLI tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/cli exec vitest run src/setup-models.test.ts src/doctor.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 7: Commit**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add packages/cli/src/claude-code-prompt-caching.ts packages/cli/src/status-project.ts packages/cli/src/setup-models.ts packages/cli/src/setup-models.test.ts packages/cli/src/doctor.test.ts
|
||||
git commit -m "fix: warn on claude-code prompt caching during setup"
|
||||
```
|
||||
|
||||
### Task 3: Final verification
|
||||
|
||||
**Files:**
|
||||
|
||||
- Verify: `packages/context/src/llm/claude-code-runtime.ts`
|
||||
- Verify: `packages/cli/src/setup-models.ts`
|
||||
- Verify: `packages/cli/src/status-project.ts`
|
||||
|
||||
- [ ] **Step 1: Run targeted tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/llm/claude-code-runtime.test.ts src/llm/runtime-tools.test.ts src/llm/claude-code-env.test.ts src/llm/claude-code-models.test.ts src/llm/runtime-local-config.test.ts
|
||||
pnpm --filter @ktx/cli exec vitest run src/setup-models.test.ts src/doctor.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 2: Run package type-checks**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run type-check
|
||||
pnpm --filter @ktx/cli run type-check
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 3: Run the LLM boundary audit**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
rg -n "generateKtxText\\(|generateKtxObject\\(|new AgentRunnerService\\(|AgentRunnerService\\b|llmProvider\\b|getModel\\(|getModelByName\\(" packages/context/src packages/cli/src packages/llm/src --glob '!**/*.test.ts'
|
||||
```
|
||||
|
||||
Expected: remaining matches are limited to:
|
||||
|
||||
- `packages/llm/src/**`
|
||||
- `packages/context/src/llm/ai-sdk-runtime.ts`
|
||||
- `packages/context/src/llm/local-config.ts`
|
||||
- `packages/context/src/agent/agent-runner.service.ts`
|
||||
- type/export declarations that intentionally preserve the AI SDK adapter
|
||||
boundary.
|
||||
|
||||
- [ ] **Step 4: Run dead-code check**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm run dead-code
|
||||
```
|
||||
|
||||
Expected: PASS or only pre-existing unrelated findings. Investigate and fix
|
||||
any finding caused by the new helper file.
|
||||
|
||||
- [ ] **Step 5: Commit verification cleanup if needed**
|
||||
|
||||
If verification required small cleanup, run:
|
||||
|
||||
```bash
|
||||
git add packages/context/src/llm/claude-code-runtime.ts packages/context/src/llm/claude-code-runtime.test.ts packages/cli/src/claude-code-prompt-caching.ts packages/cli/src/status-project.ts packages/cli/src/setup-models.ts packages/cli/src/setup-models.test.ts packages/cli/src/doctor.test.ts
|
||||
git commit -m "chore: verify claude-code v1 closure"
|
||||
```
|
||||
|
||||
If no files changed after verification, skip this commit.
|
||||
|
||||
## Self-Review
|
||||
|
||||
- Spec coverage: The plan closes the remaining v1-blocking isolation assertion
|
||||
and setup-warning requirements from the original spec.
|
||||
- Placeholder scan: No placeholders remain; every task includes file paths,
|
||||
code, commands, and expected output.
|
||||
- Type consistency: The helper names and runtime function signatures are used
|
||||
consistently across tasks.
|
||||
2483
docs/superpowers/plans/2026-05-15-claude-code-backend-v1-runtime.md
Normal file
2483
docs/superpowers/plans/2026-05-15-claude-code-backend-v1-runtime.md
Normal file
File diff suppressed because it is too large
Load diff
698
docs/superpowers/specs/2026-05-15-claude-code-backend-design.md
Normal file
698
docs/superpowers/specs/2026-05-15-claude-code-backend-design.md
Normal file
|
|
@ -0,0 +1,698 @@
|
|||
# Brainstorm: `claude-code` backend with full KTX LLM parity
|
||||
|
||||
Adds a `claude-code` backend that gives KTX full parity with the existing
|
||||
`ANTHROPIC_API_KEY`-based `anthropic` backend for **all KTX LLM calls**. The
|
||||
backend uses `@anthropic-ai/claude-agent-sdk` and reuses the user's existing
|
||||
local Claude Code authentication. Users select it in `ktx.yaml`.
|
||||
|
||||
This is not an implementation plan. It is the revised design after expanding
|
||||
the requirement from "`ktx ingest` works with Claude Code" to "every KTX LLM
|
||||
call works with Claude Code." The follow-up implementation plan should be
|
||||
written separately.
|
||||
|
||||
## Core decision
|
||||
|
||||
`claude-code` is a first-class global LLM backend. Any code path that currently
|
||||
works with `llm.provider.backend: anthropic` must work with
|
||||
`llm.provider.backend: claude-code`, unless it is not an LLM call at all.
|
||||
|
||||
This includes:
|
||||
|
||||
- Agent loops implemented through `AgentRunnerService.runLoop(...)`.
|
||||
- Text generation through `generateKtxText(...)`.
|
||||
- Structured object generation through `generateKtxObject(...)`.
|
||||
- Local ingest and MCP-triggered local ingest flows.
|
||||
- Page triage and light extraction.
|
||||
- Context-candidate curation and reconciliation.
|
||||
- Memory capture.
|
||||
- Scan/enrichment internals and relationship LLM proposals.
|
||||
- Future KTX LLM call sites that use the shared runtime boundary.
|
||||
|
||||
Commands that do not use LLMs do not need special Claude Code behavior. There
|
||||
must be no silent fallback from `claude-code` to gateway, Anthropic API-key
|
||||
execution, or deterministic output.
|
||||
|
||||
## Goals
|
||||
|
||||
- Let a KTX user run all KTX LLM-backed behavior through their existing local
|
||||
Claude Code session without provisioning `ANTHROPIC_API_KEY`, Vertex
|
||||
credentials, or an AI Gateway key.
|
||||
- Preserve the existing user-facing CLI and MCP behavior. `claude-code` changes
|
||||
how LLM calls execute, not which KTX workflows exist.
|
||||
- Preserve role-based model selection. `llm.models.default`, `triage`,
|
||||
`candidateExtraction`, `curator`, `reconcile`, and `repair` remain the source
|
||||
of model selection for every LLM call.
|
||||
- Preserve KTX's curated tool boundaries. Claude Code built-ins,
|
||||
filesystem-discovered MCP servers, hooks, skills, plugins, agents, and slash
|
||||
commands must not become invokable in KTX agent loops. The Agent SDK init
|
||||
message may still report host-discovered slash commands, skills, and agents;
|
||||
KTX treats that metadata as diagnostic only and restricts execution through
|
||||
`tools: []`, exact KTX MCP `allowedTools`, `disallowedTools`, and
|
||||
deny-by-default `canUseTool`.
|
||||
- Keep embeddings independent. Claude does not provide embeddings; users keep
|
||||
configuring `ingest.embeddings` and scan/enrichment embeddings as they do
|
||||
today.
|
||||
- Fail fast with a clear message if local Claude Code authentication is not
|
||||
usable.
|
||||
|
||||
## Non-goals
|
||||
|
||||
- **Embedding parity.** Embeddings remain separate from LLM execution.
|
||||
- **Tool-call repair parity in the first pass.** The AI SDK runner uses
|
||||
`experimental_repairToolCall` (`packages/llm/src/repair.ts:35-88`). The Claude
|
||||
Agent SDK has no transparent same-step repair hook. MVP behavior is next-turn
|
||||
self-correction from schema errors or a normal tool-failure count.
|
||||
- **OTEL telemetry parity in the first pass.** The AI SDK runner uses
|
||||
`experimental_telemetry`. The Agent SDK exposes hooks such as
|
||||
`PostToolUseFailure` and `SessionEnd`, but no drop-in OTEL switch. MVP ships
|
||||
without telemetry parity on this backend.
|
||||
- **Productizing Claude subscription limits.** Documentation must frame this as
|
||||
"use your own local Claude Code session," not as a third-party Claude Max or
|
||||
Claude.ai product feature.
|
||||
|
||||
## Approaches considered
|
||||
|
||||
### Recommended: global LLM runtime port
|
||||
|
||||
Introduce a backend-neutral KTX LLM runtime port for operations, not just model
|
||||
construction:
|
||||
|
||||
```ts
|
||||
interface KtxLlmRuntimePort {
|
||||
generateText(input: KtxGenerateTextInput): Promise<string>;
|
||||
generateObject<T>(input: KtxGenerateObjectInput<T>): Promise<T>;
|
||||
runAgentLoop(params: RunLoopParams): Promise<RunLoopResult>;
|
||||
}
|
||||
```
|
||||
|
||||
The existing `anthropic`, `vertex`, and `gateway` backends implement the runtime
|
||||
through the AI SDK and existing `KtxLlmProvider`. The new `claude-code` backend
|
||||
implements the same runtime through `@anthropic-ai/claude-agent-sdk`.
|
||||
|
||||
This is the recommended approach because KTX call sites need operations:
|
||||
"generate text," "generate a structured object," and "run an agent loop." They
|
||||
do not inherently need direct access to an AI SDK `LanguageModel`. The Agent SDK
|
||||
is a session/agent API, not an AI SDK model factory, so the runtime port avoids
|
||||
pretending those APIs are the same.
|
||||
|
||||
### Rejected: fake AI SDK `LanguageModel` for Claude Code
|
||||
|
||||
Trying to make Claude Code look like an AI SDK `LanguageModel` would be brittle.
|
||||
The Agent SDK owns session execution, permissions, MCP tools, structured output,
|
||||
and result messages. Those semantics do not map cleanly onto a normal
|
||||
`getModel(...)` return value.
|
||||
|
||||
### Rejected: branch at every call site
|
||||
|
||||
Adding `if backend === "claude-code"` around each LLM call would work briefly
|
||||
but would duplicate prompt wrapping, structured output handling, debug logging,
|
||||
tool conversion, auth checks, and error mapping. It would also make future LLM
|
||||
call sites easy to miss.
|
||||
|
||||
## Architecture
|
||||
|
||||
```text
|
||||
ktx.yaml
|
||||
llm.provider.backend: anthropic | vertex | gateway | claude-code
|
||||
llm.models.<role>: model alias or model ID
|
||||
|
||||
createLocalKtxLlmRuntimeFromConfig(project.config.llm)
|
||||
-> AiSdkKtxLlmRuntime
|
||||
- wraps existing KtxLlmProvider
|
||||
- generateText / Output.object / AgentRunnerService
|
||||
-> ClaudeCodeKtxLlmRuntime
|
||||
- uses @anthropic-ai/claude-agent-sdk query()
|
||||
- implements text, object, and agent-loop operations
|
||||
|
||||
All KTX LLM call sites
|
||||
-> KtxLlmRuntimePort
|
||||
```
|
||||
|
||||
The runtime is selected at the same boundaries that currently construct an
|
||||
`llmProvider` or `AgentRunnerService`:
|
||||
|
||||
- `packages/context/src/llm/local-config.ts`
|
||||
- `packages/context/src/ingest/local-bundle-runtime.ts`
|
||||
- `packages/context/src/memory/local-memory.ts`
|
||||
- `packages/context/src/scan/local-scan.ts`
|
||||
- `packages/context/src/mcp/local-project-ports.ts`
|
||||
- Any CLI setup/status/doctor code that validates LLM readiness
|
||||
|
||||
After the change, services should not need to know whether the configured
|
||||
backend is AI SDK based or Claude Code based. They call the runtime operation
|
||||
they need.
|
||||
|
||||
## LLM call-site migration
|
||||
|
||||
The implementation plan must migrate every current KTX LLM call site to the
|
||||
runtime port:
|
||||
|
||||
- `packages/context/src/llm/generation.ts`: `generateKtxText` and
|
||||
`generateKtxObject` become runtime-backed helpers or are folded into the
|
||||
runtime.
|
||||
- `packages/context/src/agent/agent-runner.service.ts`: the AI SDK agent loop
|
||||
becomes the AI SDK implementation of `runAgentLoop`.
|
||||
- `packages/context/src/ingest/page-triage/page-triage.service.ts`: page triage
|
||||
and light extraction depend on `KtxLlmRuntimePort`, not raw `KtxLlmProvider`.
|
||||
- `packages/context/src/scan/description-generation.ts`: AI descriptions use
|
||||
the runtime text-generation operation.
|
||||
- `packages/context/src/scan/relationship-llm-proposal.ts`: relationship
|
||||
proposals use the runtime object-generation operation.
|
||||
- `packages/context/src/ingest/stages/stage-3-work-units.ts`,
|
||||
`packages/context/src/ingest/stages/stage-4-reconciliation.ts`,
|
||||
`packages/context/src/ingest/context-candidates/curator-pagination.service.ts`,
|
||||
and `packages/context/src/memory/memory-agent.service.ts`: agent loops use the
|
||||
runtime agent-loop operation or a thin `AgentRunnerPort` backed by it.
|
||||
- Test helpers and MCP local project ports that inject `llmProvider` or
|
||||
`agentRunner` must either inject the runtime port or use compatibility test
|
||||
adapters during the migration.
|
||||
|
||||
The plan must include a grep-based audit so new or overlooked `getModel(...)`,
|
||||
`generateKtxText(...)`, `generateKtxObject(...)`, `AgentRunnerService`, and
|
||||
`llmProvider` usages are either migrated or explicitly proven non-runtime.
|
||||
|
||||
## Config design
|
||||
|
||||
The config should make `claude-code` a first-class backend:
|
||||
|
||||
```yaml
|
||||
llm:
|
||||
provider:
|
||||
backend: claude-code
|
||||
models:
|
||||
default: sonnet
|
||||
triage: haiku
|
||||
candidateExtraction: sonnet
|
||||
curator: sonnet
|
||||
reconcile: sonnet
|
||||
repair: sonnet
|
||||
```
|
||||
|
||||
Implementation implications:
|
||||
|
||||
- Extend `KTX_LLM_BACKENDS` in `packages/context/src/project/config.ts` and
|
||||
`KtxLlmBackend` in `packages/llm/src/types.ts`.
|
||||
- Update setup, status, doctor, schema generation, examples, and docs so
|
||||
`claude-code` is understood everywhere `anthropic` is understood.
|
||||
- Update `createKtxLlmProvider` / `createModelFactory` so unsupported backend
|
||||
values throw instead of falling through to gateway.
|
||||
- Keep `llm.models` as the per-role binding source. The Claude Code runtime maps
|
||||
each KTX role to the configured model string for the current call.
|
||||
- Define accepted model aliases, such as `sonnet`, `opus`, and `haiku`, and full
|
||||
model IDs supported by the pinned SDK version.
|
||||
|
||||
## Claude Agent SDK runtime behavior
|
||||
|
||||
Every Agent SDK call must be isolated enough for KTX execution. Use explicit
|
||||
options even when SDK defaults currently match the desired value.
|
||||
|
||||
For agent loops with tools:
|
||||
|
||||
```ts
|
||||
query({
|
||||
prompt,
|
||||
options: {
|
||||
cwd: project.projectDir,
|
||||
systemPrompt,
|
||||
model: resolveModel(modelRole),
|
||||
maxTurns: stepBudget,
|
||||
settingSources: [],
|
||||
skills: [],
|
||||
plugins: [],
|
||||
mcpServers: { ktx: createSdkMcpServer({ name: "ktx", tools }) },
|
||||
tools: [],
|
||||
allowedTools: [/* exact mcp__ktx__<toolName> ids generated from the tool map */],
|
||||
canUseTool: ktxCanUseTool,
|
||||
permissionMode: "dontAsk",
|
||||
persistSession: false,
|
||||
env: ktxClaudeCodeEnv
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
`ktxClaudeCodeEnv` is the controlled environment described in
|
||||
"Agent SDK environment and auth boundary" below; it must be passed on every
|
||||
KTX `query()` call.
|
||||
|
||||
For plain text generation:
|
||||
|
||||
- Use the same `query()` runtime with `maxTurns: 1`.
|
||||
- Pass `settingSources: []`, `skills: []`, `plugins: []`, `tools: []`,
|
||||
`permissionMode: "dontAsk"`, `persistSession: false`, and
|
||||
`env: ktxClaudeCodeEnv`.
|
||||
- Do not expose MCP tools unless the KTX call explicitly passed tools.
|
||||
- Return the final result message text.
|
||||
|
||||
For structured object generation:
|
||||
|
||||
- Use the same `query()` runtime with the Agent SDK structured output option
|
||||
for JSON schema output, plus the same isolation tuple including
|
||||
`env: ktxClaudeCodeEnv`.
|
||||
- Convert KTX Zod schemas at the runtime boundary.
|
||||
- Parse and validate the returned object with the original KTX schema before
|
||||
returning it to the caller.
|
||||
|
||||
The plan must confirm the exact option names against the pinned SDK version, but
|
||||
the required outcome is fixed:
|
||||
|
||||
- Filesystem settings are not loaded. The SDK's documented default for an
|
||||
omitted `settingSources` is `["user", "project", "local"]`
|
||||
(`@anthropic-ai/claude-agent-sdk@0.3.142` `sdk.d.ts:1686-1695`),
|
||||
which would inherit the user's Claude Code filesystem settings. Every KTX
|
||||
`query()` call site - agent loops, text generation, object generation, and
|
||||
the auth probe - MUST pass `settingSources: []` explicitly, along with
|
||||
`skills: []`, `plugins: []`, `tools: []`, `persistSession: false`, and no
|
||||
`mcpServers` entries other than the KTX MCP server (omitted entirely when
|
||||
the call site does not expose tools). The implementation MUST assert from
|
||||
the SDK init message that the controlled execution surface matches KTX's
|
||||
expectations:
|
||||
|
||||
- `message.tools` equals the exact generated KTX MCP tool ids for the current
|
||||
call.
|
||||
- `message.mcp_servers` equals the expected KTX MCP server set: `[]` when the
|
||||
call exposes no tools, or `["ktx"]` when it does.
|
||||
- `message.plugins` is empty.
|
||||
|
||||
The implementation MUST NOT reject a run solely because
|
||||
`message.slash_commands`, `message.skills`, or `message.agents` contain
|
||||
host-discovered names. In `@anthropic-ai/claude-agent-sdk@0.3.142`, those
|
||||
fields can report host discovery even when KTX passes the isolation options.
|
||||
They are not part of the KTX execution surface when `tools: []`,
|
||||
`allowedTools`, `disallowedTools`, and deny-by-default `canUseTool` are set.
|
||||
- `skills: []` is a context filter in the pinned SDK
|
||||
(`sdk.d.ts:1697-1718`): unlisted skills are hidden from the model's skill
|
||||
listing and rejected by the Skill tool, but discovered skill names may still
|
||||
appear in init metadata. KTX must still pass `skills: []`.
|
||||
- Plugins are disabled with `plugins: []`, and the runtime asserts that
|
||||
`message.plugins` is empty in the init message.
|
||||
- Built-in tools are disabled by setting `tools: []`. The pinned SDK type
|
||||
(`@anthropic-ai/claude-agent-sdk@0.3.142`, `sdk.d.ts`) documents `tools` as
|
||||
the base set of built-in tools, with `[]` meaning "disable all built-ins";
|
||||
`tools` does not accept MCP tool ids and cannot be used to restrict MCP
|
||||
availability.
|
||||
- MCP tool availability is granted by registering the KTX MCP server through
|
||||
`mcpServers`. The SDK does not document a wildcard like `mcp__ktx__*` for
|
||||
any tool field; KTX must enumerate exact generated MCP tool ids of the form
|
||||
`mcp__ktx__<toolName>` (derived from the tool map handed to
|
||||
`createSdkMcpServer`) wherever a list of tool ids is required.
|
||||
- Pre-approval under `permissionMode: "dontAsk"` is configured by listing those
|
||||
same exact `mcp__ktx__<toolName>` ids in `allowedTools` (documented as
|
||||
auto-allow without prompting). Treat `allowedTools` as auto-approval, not
|
||||
restriction.
|
||||
- Defense-in-depth restriction uses `canUseTool`. The KTX runtime supplies a
|
||||
`canUseTool` handler that allows only tool names in the current KTX MCP tool
|
||||
map and denies everything else, so host-discovered slash commands, skills,
|
||||
agents, future SDK defaults, or a misconfigured MCP server cannot expand the
|
||||
execution surface.
|
||||
- `disallowedTools` MUST additionally list the current built-in tool names
|
||||
(`Agent`, `Task`, `AskUserQuestion`, `Bash`, `Read`, `Edit`, `Write`, `Glob`,
|
||||
`Grep`, `WebFetch`, `WebSearch`, `TodoWrite`) as redundant insurance.
|
||||
- `cwd` is `project.projectDir`, resolved at startup via `resolveKtxProjectDir`,
|
||||
not `process.cwd()`.
|
||||
- Sessions are not persisted unless the plan identifies a concrete debugging
|
||||
feature that needs persistence.
|
||||
|
||||
## Agent SDK environment and auth boundary
|
||||
|
||||
The Agent SDK's `query()` option `env` (`@anthropic-ai/claude-agent-sdk@0.3.142`
|
||||
`sdk.d.ts:1265-1279`) is the environment passed to the Claude Code child
|
||||
process and defaults to `process.env`. Without an explicit `env`, the SDK
|
||||
inherits the parent's environment, including any `ANTHROPIC_API_KEY`,
|
||||
`ANTHROPIC_AUTH_TOKEN`, `ANTHROPIC_BASE_URL`, gateway/AI-Gateway tokens,
|
||||
`GOOGLE_APPLICATION_CREDENTIALS` / `CLOUD_ML_REGION` (Vertex), and
|
||||
`AWS_*` (Bedrock) credentials — any of which can switch the Claude Code CLI's
|
||||
authentication source to API-key or another provider, bypassing the user's
|
||||
local Claude Code session. That would silently violate the core requirement
|
||||
that `claude-code` runs through the user's existing local Claude Code session
|
||||
and that there is no silent fallback to gateway, Anthropic API-key, or other
|
||||
provider execution.
|
||||
|
||||
Every `claude-code` `query()` call site - agent loops, text generation,
|
||||
object generation, and the auth probe - MUST pass an explicit `env`
|
||||
(`ktxClaudeCodeEnv`) constructed from `process.env` with the following
|
||||
denylist removed:
|
||||
|
||||
- `ANTHROPIC_API_KEY`
|
||||
- `ANTHROPIC_AUTH_TOKEN`
|
||||
- `ANTHROPIC_BASE_URL`
|
||||
- `ANTHROPIC_MODEL` (provider-routing override)
|
||||
- `ANTHROPIC_VERTEX_PROJECT_ID`, `CLOUD_ML_REGION`,
|
||||
`GOOGLE_APPLICATION_CREDENTIALS`, `GOOGLE_CLOUD_PROJECT`
|
||||
- `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN`,
|
||||
`AWS_REGION`, `AWS_PROFILE`
|
||||
- `CLAUDE_CODE_USE_BEDROCK`, `CLAUDE_CODE_USE_VERTEX`
|
||||
- Any future provider-routing variables the pinned SDK version documents
|
||||
|
||||
The denylist is the source of truth and lives next to the runtime constructor
|
||||
so adding a variable is a single-file change.
|
||||
|
||||
Acceptance criteria:
|
||||
|
||||
- The constructed `ktxClaudeCodeEnv` does not contain any denylisted key, and
|
||||
this is verified by a unit test that seeds each denylisted key in a fake
|
||||
`process.env`.
|
||||
- The auth probe fails with the same "authenticate Claude Code locally"
|
||||
message even when `ANTHROPIC_API_KEY` (or any other denylisted credential)
|
||||
is present in `process.env` and no valid local Claude Code session exists.
|
||||
- Every KTX-originated `query()` invocation is spied to assert that `env`
|
||||
was passed and that it does not contain any denylisted key; the test fails
|
||||
if any code path falls back to the SDK default `process.env`.
|
||||
- The "no silent fallback" rule is preserved end-to-end: a machine with
|
||||
`ANTHROPIC_API_KEY` set but no local Claude Code authentication still fails
|
||||
setup/status/doctor on `claude-code`.
|
||||
|
||||
## Tool boundary
|
||||
|
||||
Agent-loop tools cannot remain only raw AI SDK `Record<string, Tool>` values if
|
||||
two backends must consume them. The plan must define a backend-neutral tool
|
||||
descriptor for the final tool map handed to an agent loop:
|
||||
|
||||
```ts
|
||||
interface KtxRuntimeToolDescriptor<TInput, TOutput> {
|
||||
name: string;
|
||||
description: string;
|
||||
inputSchema: z.ZodObject<z.ZodRawShape>;
|
||||
execute(input: TInput): Promise<KtxRuntimeToolOutput<TOutput>>;
|
||||
}
|
||||
|
||||
interface KtxRuntimeToolOutput<TOutput> {
|
||||
// What the model sees as the tool_result content. Always a markdown string;
|
||||
// never a raw JS object. This matches BaseTool's existing
|
||||
// `toModelOutput` contract (`packages/context/src/tools/base-tool.ts:154-162`)
|
||||
// which sends only markdown to the LLM.
|
||||
markdown: string;
|
||||
// Out-of-band payload preserved for tool callers (transcripts, debug,
|
||||
// verification ledger, downstream KTX consumers). Not sent to the model.
|
||||
structured?: TOutput;
|
||||
}
|
||||
```
|
||||
|
||||
Every composed tool entry must produce this descriptor shape, including:
|
||||
|
||||
- `BaseTool` outputs from factory toolsets, which already return
|
||||
`{ markdown, structured }`.
|
||||
- Source-specific raw tools such as `emit_historic_sql_evidence` in
|
||||
`packages/context/src/ingest/local-bundle-runtime.ts`.
|
||||
- Stage-local tools in `buildWuToolSet` and `buildReconcileToolSet`.
|
||||
- Inline `load_skill`, read/raw/span, stage/diff, eviction, and emit tools in
|
||||
`packages/context/src/ingest/ingest-bundle.runner.ts`.
|
||||
- Memory-agent `load_skill` in
|
||||
`packages/context/src/memory/memory-agent.service.ts`.
|
||||
- The `withVerificationLedger` wrapping layer, whose markdown/structured
|
||||
guard outputs (`packages/context/src/ingest/tools/verification-ledger.tool.ts:40-97`)
|
||||
already match the contract.
|
||||
|
||||
### Tool output contract
|
||||
|
||||
The runtime defines a single output contract for both backends so the model
|
||||
sees the same content regardless of provider:
|
||||
|
||||
- **Model-visible content**: the `markdown` field, mapped to the Agent SDK
|
||||
tool handler return as `{ content: [{ type: "text", text: markdown }] }` for
|
||||
`claude-code`, and surfaced through the existing `toModelOutput` markdown
|
||||
path for AI SDK backends. The model never sees raw JS objects.
|
||||
- **Structured payload**: the optional `structured` field, preserved on the
|
||||
in-process tool-result envelope for transcript/debug capture, the
|
||||
verification ledger, and any KTX caller that introspects results. The
|
||||
Claude adapter does not put structured JSON into model-visible content
|
||||
unless an individual call site explicitly opts in.
|
||||
- **Normalization of existing raw tools**: tools that today return a bare
|
||||
string (e.g. `load_skill` "Skill not available" responses in
|
||||
`packages/context/src/ingest/ingest-bundle.runner.ts:697-721` and
|
||||
`:924-936`, and `packages/context/src/memory/memory-agent.service.ts:128-152`)
|
||||
must be wrapped at the descriptor boundary so `markdown` is the string and
|
||||
`structured` is omitted. Tools that today return a plain object (e.g.
|
||||
skill payload `{ name, content, skillDirectory }`) must be wrapped so
|
||||
`markdown` is a deterministic human-readable rendering (e.g. the skill
|
||||
body with a header) and the original object is preserved on `structured`.
|
||||
No KTX tool may return a raw object as the model-visible payload on the
|
||||
Claude Code backend, because the Agent SDK MCP handler will otherwise
|
||||
stringify it and drop the structured fields.
|
||||
- **AI SDK parity**: the AI SDK adapter MUST preserve BaseTool's existing
|
||||
`toModelOutput` markdown-only behavior. Migrating BaseTool-derived tools
|
||||
to the descriptor must not start sending structured JSON to the model.
|
||||
|
||||
The AI SDK adapter converts descriptors to `tool(...)` with a `toModelOutput`
|
||||
that emits `markdown` only. The Claude Code adapter converts descriptors to
|
||||
Agent SDK `tool(name, description, schema.shape, handler)` entries inside
|
||||
`createSdkMcpServer(...)` and returns `{ content: [{ type: "text", text:
|
||||
markdown }] }`.
|
||||
|
||||
Non-object schemas are unsupported for `claude-code` and must be rejected at
|
||||
startup with a clear error. In practice KTX tool inputs are already `z.object`.
|
||||
|
||||
## Stop reasons and failures
|
||||
|
||||
The Claude runner maps the SDK's typed `SDKResultMessage` (union of
|
||||
`SDKResultSuccess` and `SDKResultError` in
|
||||
`@anthropic-ai/claude-agent-sdk@0.3.142`, `sdk.d.ts`) to
|
||||
`RunLoopStopReason = "budget" | "natural" | "error"`. The mapping must consider
|
||||
three typed signals in this precedence order, because each successive signal
|
||||
may be present where the previous one is absent:
|
||||
|
||||
1. `subtype`: `"error_max_turns"` -> `"budget"`; `"success"` -> `"natural"`;
|
||||
other error subtypes (`"error_during_execution"`,
|
||||
`"error_max_budget_usd"`, `"error_max_structured_output_retries"`) ->
|
||||
`"error"`.
|
||||
2. `terminal_reason` (optional `TerminalReason` field on both success and
|
||||
error results): `"max_turns"` -> `"budget"`; `"completed"` -> `"natural"`;
|
||||
any other terminal reason such as `"blocking_limit"`,
|
||||
`"rapid_refill_breaker"`, `"prompt_too_long"`, `"image_error"`,
|
||||
`"model_error"`, `"aborted_streaming"`, `"aborted_tools"`,
|
||||
`"stop_hook_prevented"`, `"hook_stopped"`, or `"tool_deferred"` ->
|
||||
`"error"`.
|
||||
3. The assistant message `stop_reason`: `"max_turns"` -> `"budget"`; any
|
||||
other non-null unsuccessful stop reason -> `"error"`.
|
||||
|
||||
A `max_turns` signal arriving through any of the three sources must map to
|
||||
`"budget"`; the runner MUST NOT classify a max-turn termination as
|
||||
`"natural"` or as a generic `"error"` because it was reported via
|
||||
`terminal_reason` instead of `subtype`.
|
||||
|
||||
`Stop` hooks are not the authoritative stop-reason source because they do not
|
||||
carry the terminal reason. They remain useful for lifecycle logging. Tool failure
|
||||
counting should use `PostToolUseFailure` and feed the same mechanism that
|
||||
`stage-3-work-units.ts` checks through `toolFailureCount?(wu.unitKey)`.
|
||||
|
||||
For text and object generation, SDK authentication, billing, rate-limit,
|
||||
permission, max-turn, structured-output, and execution errors must map to the
|
||||
same error surfaces that KTX uses for the Anthropic API-key backend.
|
||||
|
||||
## Agent-loop progress callbacks
|
||||
|
||||
`RunLoopParams.onStepFinish`
|
||||
(`packages/context/src/agent/agent-runner.service.ts:20`) is part of the
|
||||
current agent-loop contract. The AI SDK runner increments `stepIndex` on each
|
||||
`generateText` step and invokes the callback
|
||||
(`agent-runner.service.ts:83-97`). KTX consumers depend on this:
|
||||
`packages/context/src/ingest/ingest-bundle.runner.ts:782` emits
|
||||
`work_unit_step` events from it, and `:1036` / `:1089` update reconciliation
|
||||
progress for the user-visible "Reconciling results · step N" status.
|
||||
|
||||
The `claude-code` runner MUST preserve `onStepFinish` semantics:
|
||||
|
||||
- It MUST invoke `onStepFinish` exactly once per assistant turn (i.e. once per
|
||||
step the SDK reports), incrementing `stepIndex` starting at 1.
|
||||
- The plan MUST name the concrete SDK stream event used as the step boundary
|
||||
(the implementation plan picks one of the documented assistant/result
|
||||
message events from the pinned SDK version and justifies it). The chosen
|
||||
event must produce the same `stepIndex` count as the AI SDK runner for an
|
||||
equivalent run: N tool-using turns yield N callbacks.
|
||||
- Callback errors MUST be caught and logged at `warn` level without aborting
|
||||
the loop, matching `agent-runner.service.ts:90-96`.
|
||||
- `stepBudget` passed to the callback MUST equal the `maxTurns` configured on
|
||||
the SDK `query()` call.
|
||||
|
||||
Acceptance criteria:
|
||||
|
||||
- A `claude-code` agent loop run with `stepBudget: N` produces N
|
||||
`work_unit_step` events when the loop runs to budget.
|
||||
- A reconciliation run under `claude-code` produces the same
|
||||
`updateProgress` calls (count and `stepIndex / stepBudget` ratio) as the
|
||||
Anthropic API-key backend for an equivalent fixture.
|
||||
- An `onStepFinish` callback that throws does not surface the error as the
|
||||
loop result.
|
||||
|
||||
## Prompt caching parity
|
||||
|
||||
`packages/llm/src/types.ts:44, :61` exposes `llm.promptCaching` as a config
|
||||
field, and the AI SDK message builder
|
||||
(`packages/llm/src/message-builder.ts:62-114, :141-218`) applies
|
||||
`anthropic.cacheControl: { type: "ephemeral", ttl }` markers to the system
|
||||
message, the last history message, and sorted tools, with TTLs split into
|
||||
`systemTtl`, `toolsTtl`, and `historyTtl`. `model-provider.test.ts:276`
|
||||
verifies caching is enabled by default with those three TTLs.
|
||||
|
||||
The Agent SDK does not expose KTX's marker-based contract. The closest
|
||||
mechanism is `systemPrompt: string[]` with
|
||||
`SYSTEM_PROMPT_DYNAMIC_BOUNDARY` (`sdk.d.ts:1746-1799`), which marks a static
|
||||
prefix as cacheable but provides no per-tool, per-history, or per-TTL knobs.
|
||||
|
||||
For the `claude-code` backend, the spec treats `llm.promptCaching` as
|
||||
**partial parity**:
|
||||
|
||||
- The Claude runtime MAY map a non-empty static system prefix to a cacheable
|
||||
`systemPrompt` array using `SYSTEM_PROMPT_DYNAMIC_BOUNDARY` when
|
||||
`cacheSystem` is enabled in the resolved `KtxPromptCachingConfig`. The
|
||||
implementation plan decides whether to ship this mapping in the first pass
|
||||
or defer it.
|
||||
- `cacheTools`, `cacheHistory`, and the `systemTtl` / `toolsTtl` /
|
||||
`historyTtl` fields have no Agent SDK equivalent. The runtime MUST NOT
|
||||
silently drop them: when a user sets non-default values under
|
||||
`llm.promptCaching` and the backend is `claude-code`, status/doctor and the
|
||||
setup wizard MUST surface that these fields are ignored on this backend.
|
||||
- Docs under `docs-site/content/docs/` MUST document this divergence in the
|
||||
same pages that describe `claude-code` setup, so users do not assume the
|
||||
TTL/tool/history knobs apply.
|
||||
|
||||
Acceptance criteria:
|
||||
|
||||
- A `claude-code` runtime constructed from a config with default
|
||||
`promptCaching` does not throw and does not pass KTX `cacheControl`
|
||||
markers to the Agent SDK (the AI-SDK-only markers stay on the AI SDK
|
||||
path).
|
||||
- A `claude-code` runtime constructed from a config with non-default
|
||||
`promptCaching` values yields a warning surfaced through doctor/status
|
||||
output identifying the ignored fields.
|
||||
|
||||
## Auth and setup
|
||||
|
||||
`ktx setup`, status, and doctor flows must validate that Claude Code SDK auth is
|
||||
usable, not just that `~/.claude/` exists. Acceptable validation strategies:
|
||||
|
||||
- A minimal SDK probe call with `settingSources: []`, `skills: []`,
|
||||
`plugins: []`, `tools: []`, `persistSession: false`, no `mcpServers`,
|
||||
`env: ktxClaudeCodeEnv`, and `maxTurns: 1`. The probe MUST NOT rely on
|
||||
the SDK's documented default for any of these fields, because the default
|
||||
for `settingSources` is `["user", "project", "local"]` (loads filesystem
|
||||
settings) and the default for `env` is `process.env` (can route auth
|
||||
through `ANTHROPIC_API_KEY` or other provider credentials and hide a
|
||||
missing local Claude Code session). See "Agent SDK environment and auth
|
||||
boundary" above for the `env` denylist.
|
||||
The auth probe MUST tolerate init messages with non-empty `slash_commands`,
|
||||
`skills`, and `agents` when `message.tools` is empty, `message.mcp_servers`
|
||||
is empty, `message.plugins` is empty, and the query options contain the KTX
|
||||
isolation tuple. Host discovery metadata is not an auth failure.
|
||||
- An SDK-provided account/auth status method if the pinned version exposes one.
|
||||
- A docs-endorsed file-presence check only if the official SDK docs explicitly
|
||||
state that it proves auth usability.
|
||||
|
||||
Failure copy should tell the user to authenticate Claude Code locally with the
|
||||
Claude Code CLI, then rerun setup or the command they attempted.
|
||||
|
||||
## Documentation impact
|
||||
|
||||
Docs updates are required because this changes user-visible setup and LLM
|
||||
provider behavior:
|
||||
|
||||
- `docs-site/content/docs/getting-started/quickstart.mdx`
|
||||
- `docs-site/content/docs/cli-reference/ktx-setup.mdx`
|
||||
- `docs-site/content/docs/guides/building-context.mdx`
|
||||
- Any config reference page that documents `llm.provider.backend`
|
||||
- Any status or doctor docs that describe LLM readiness
|
||||
|
||||
The docs must say that `claude-code` uses the user's own local Claude Code
|
||||
session. Do not describe it as a way for KTX to resell, pool, or productize
|
||||
Claude subscription limits.
|
||||
|
||||
## Verified evidence
|
||||
|
||||
- Current `KtxLlmProvider` returns AI SDK `LanguageModel` instances and only
|
||||
supports `anthropic`, `vertex`, and `gateway`
|
||||
(`packages/llm/src/types.ts`, `packages/llm/src/model-provider.ts`).
|
||||
- Project config currently accepts `llm.provider.backend: none | anthropic |
|
||||
vertex | gateway` (`packages/context/src/project/config.ts`).
|
||||
- `generateKtxText` and `generateKtxObject` are shared non-agent generation
|
||||
helpers (`packages/context/src/llm/generation.ts`).
|
||||
- `AgentRunnerService` is the shared AI SDK agent-loop implementation
|
||||
(`packages/context/src/agent/agent-runner.service.ts`).
|
||||
- Page triage and light extraction currently use raw `KtxLlmProvider`
|
||||
(`packages/context/src/ingest/page-triage/page-triage.service.ts`).
|
||||
- Scan/enrichment internals currently use `createLocalKtxLlmProviderFromConfig`,
|
||||
`generateKtxText`, and `generateKtxObject`
|
||||
(`packages/context/src/scan/local-scan.ts`,
|
||||
`packages/context/src/scan/description-generation.ts`,
|
||||
`packages/context/src/scan/relationship-llm-proposal.ts`).
|
||||
- Local ingest and MCP local project ports inject `llmProvider` and
|
||||
`agentRunner` today (`packages/context/src/ingest/local-bundle-runtime.ts`,
|
||||
`packages/context/src/mcp/local-project-ports.ts`).
|
||||
- The Agent SDK TypeScript reference (`@anthropic-ai/claude-agent-sdk@0.3.142`,
|
||||
`sdk.d.ts:1690-1697` and the `sdk.mjs` runtime default
|
||||
`["user","project","local"]`) documents `settingSources` **defaulting to
|
||||
loading user, project, and local filesystem settings** when omitted; passing
|
||||
`[]` is the explicit opt-out ("SDK isolation mode"). The same reference
|
||||
documents `allowedTools` as auto-approval rather than restriction,
|
||||
`canUseTool` as the programmatic permission handler,
|
||||
`permissionMode: "dontAsk"`, `tools` as the base built-in set with `[]`
|
||||
meaning "disable all built-ins" and no MCP-id support, `disallowedTools`,
|
||||
`maxTurns`, `mcpServers`, `cwd`, `persistSession`, and SDK result/hook
|
||||
message shapes.
|
||||
- `SDKResultMessage = SDKResultSuccess | SDKResultError` in
|
||||
`@anthropic-ai/claude-agent-sdk@0.3.142` (`sdk.d.ts`); both variants expose
|
||||
an optional `terminal_reason: TerminalReason`, where `TerminalReason`
|
||||
includes `'max_turns' | 'completed'` alongside other terminal reasons.
|
||||
- The Agent SDK MCP docs and SDK examples (e.g. Context7
|
||||
`/nothflare/claude-agent-sdk-docs` custom-tools guide) show registering MCP
|
||||
servers in `query()` options and listing exact `mcp__<server>__<tool>` ids
|
||||
in `allowedTools`; no SDK doc or type currently documents a wildcard form.
|
||||
- BaseTool's `toModelOutput` already sends only `markdown` to the model while
|
||||
preserving structured output for callers
|
||||
(`packages/context/src/tools/base-tool.ts:154-162`); some raw AI SDK tools
|
||||
in `packages/context/src/ingest/ingest-bundle.runner.ts:697-721, :924-936`
|
||||
and `packages/context/src/memory/memory-agent.service.ts:128-152` currently
|
||||
return bare strings or plain objects and must be normalized at the
|
||||
descriptor boundary so both backends preserve the contract.
|
||||
- The Agent SDK skills docs say the `skills` option is a context filter rather
|
||||
than a sandbox. KTX must pass `skills: []`, but must not assert that
|
||||
`message.skills` is empty in the SDK init message.
|
||||
- `Options.env` in `@anthropic-ai/claude-agent-sdk@0.3.142`
|
||||
(`sdk.d.ts:1265-1279`) is the environment passed to the Claude Code
|
||||
process and defaults to `process.env`. Without an explicit `env`, the SDK
|
||||
inherits the parent environment, including any provider-routing variables
|
||||
(`ANTHROPIC_API_KEY`, Vertex/Bedrock credentials, gateway tokens) that
|
||||
could change the active authentication source of the Claude Code CLI and
|
||||
hide a missing local Claude Code session.
|
||||
|
||||
## Open items for the implementation plan
|
||||
|
||||
1. Confirm exact TypeScript option names and result-message discriminants
|
||||
against the pinned `@anthropic-ai/claude-agent-sdk` version.
|
||||
2. Define the final `KtxLlmRuntimePort` file location and package exports.
|
||||
3. Define model alias validation for `sonnet`, `opus`, `haiku`, and full model
|
||||
IDs.
|
||||
4. Define the auth probe and make setup/status/doctor report actionable
|
||||
messages.
|
||||
5. Run a repo-wide audit for all LLM call sites and migrate each one to the
|
||||
runtime boundary.
|
||||
6. Write tests proving `claude-code` works for text generation, structured
|
||||
object generation, and agent-loop execution.
|
||||
7. Write tests proving page triage, scan/enrichment internals, memory capture,
|
||||
MCP-triggered local ingest, and normal local ingest all use the
|
||||
`claude-code` runtime when configured.
|
||||
8. Write tests proving a raw built-in Claude Code tool request is denied,
|
||||
host-discovered Skill/Agent/SlashCommand requests are denied by `canUseTool`,
|
||||
and only exact `mcp__ktx__*` tools are allowed during KTX agent loops.
|
||||
9. Write a test that asserts every KTX-originated `query()` invocation
|
||||
(agent loop, text generation, object generation, auth probe) is called
|
||||
with `settingSources: []`, `skills: []`, `plugins: []`, `tools: []`, and
|
||||
`persistSession: false`, by spying on the SDK entry point. The test must
|
||||
fail if any path falls back to SDK defaults for those fields. The test must
|
||||
also prove that non-empty host-discovered `slash_commands`, `skills`, and
|
||||
`agents` in the init message do not fail the auth probe or runtime when the
|
||||
controlled tool, MCP server, and plugin surfaces match KTX expectations.
|
||||
10. Write a test that asserts `onStepFinish` is invoked the expected number
|
||||
of times for a fixed-budget `claude-code` agent loop, including the
|
||||
work-unit and reconciliation progress paths.
|
||||
11. Write a test that asserts every KTX-originated `query()` invocation
|
||||
(agent loop, text generation, object generation, auth probe) is called
|
||||
with an explicit `env` and that none of the denylisted provider-routing
|
||||
variables (`ANTHROPIC_API_KEY`, `ANTHROPIC_AUTH_TOKEN`,
|
||||
`ANTHROPIC_BASE_URL`, `ANTHROPIC_MODEL`, `ANTHROPIC_VERTEX_PROJECT_ID`,
|
||||
`CLOUD_ML_REGION`, `GOOGLE_APPLICATION_CREDENTIALS`,
|
||||
`GOOGLE_CLOUD_PROJECT`, `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`,
|
||||
`AWS_SESSION_TOKEN`, `AWS_REGION`, `AWS_PROFILE`,
|
||||
`CLAUDE_CODE_USE_BEDROCK`, `CLAUDE_CODE_USE_VERTEX`) are present in
|
||||
that env, by seeding each variable in a fake `process.env`. The test
|
||||
must also assert that the auth probe still fails when
|
||||
`ANTHROPIC_API_KEY` is set in `process.env` but no local Claude Code
|
||||
session exists.
|
||||
|
|
@ -3,7 +3,13 @@
|
|||
"workspaces": {
|
||||
".": {
|
||||
"entry": ["scripts/**/*.mjs"],
|
||||
"project": ["scripts/**/*.mjs"]
|
||||
"project": ["scripts/**/*.mjs"],
|
||||
"ignoreDependencies": [
|
||||
"@semantic-release/commit-analyzer",
|
||||
"@semantic-release/github",
|
||||
"@semantic-release/release-notes-generator",
|
||||
"conventional-changelog-conventionalcommits"
|
||||
]
|
||||
},
|
||||
"packages/cli": {
|
||||
"entry": [
|
||||
|
|
|
|||
14
package.json
14
package.json
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "ktx-workspace",
|
||||
"version": "0.0.0-private",
|
||||
"version": "0.1.0-rc.1",
|
||||
"description": "Workspace root for ktx packages",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
|
|
@ -30,10 +30,14 @@
|
|||
"release:local-embeddings-smoke": "node scripts/local-embeddings-runtime-smoke.mjs --require-opt-in",
|
||||
"release:npm-publish": "node scripts/publish-public-npm-package.mjs",
|
||||
"release:readiness": "node scripts/release-readiness.mjs",
|
||||
"release:update-version": "node scripts/update-public-release-version.mjs",
|
||||
"relationships:acquire-public-fixtures": "node scripts/acquire-public-benchmark-fixtures.mjs",
|
||||
"relationships:rebuild-public-snapshots": "node scripts/build-benchmark-snapshot.mjs --rebuild-all",
|
||||
"relationships:build-adventureworks-oltp": "node scripts/build-adventureworks-oltp-fixture.mjs",
|
||||
"relationships:verify-orbit": "node scripts/relationship-orbit-verification.mjs",
|
||||
"semantic-release": "semantic-release",
|
||||
"semantic-release:debug": "semantic-release --dry-run --debug",
|
||||
"semantic-release:dry-run": "semantic-release --dry-run --no-ci",
|
||||
"smoke": "pnpm run build && pnpm --filter @ktx/cli run smoke",
|
||||
"test": "node --test scripts/*.test.mjs && pnpm --filter './packages/*' run test",
|
||||
"test:coverage": "pnpm run test:coverage:ts && pnpm run test:coverage:py",
|
||||
|
|
@ -44,9 +48,17 @@
|
|||
},
|
||||
"devDependencies": {
|
||||
"@biomejs/biome": "^2.4.15",
|
||||
"@semantic-release/changelog": "^6.0.3",
|
||||
"@semantic-release/commit-analyzer": "^13.0.1",
|
||||
"@semantic-release/exec": "^7.1.0",
|
||||
"@semantic-release/git": "^10.0.1",
|
||||
"@semantic-release/github": "^12.0.8",
|
||||
"@semantic-release/release-notes-generator": "^14.1.1",
|
||||
"@types/node": "^25.7.0",
|
||||
"better-sqlite3": "^12.10.0",
|
||||
"conventional-changelog-conventionalcommits": "^9.3.1",
|
||||
"knip": "^6.12.2",
|
||||
"semantic-release": "^25.0.3",
|
||||
"typescript": "^6.0.3",
|
||||
"yaml": "^2.9.0"
|
||||
},
|
||||
|
|
|
|||
29
packages/cli/src/claude-code-prompt-caching.ts
Normal file
29
packages/cli/src/claude-code-prompt-caching.ts
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
import type { KtxProjectLlmConfig } from '@ktx/context/project';
|
||||
|
||||
const CLAUDE_CODE_IGNORED_PROMPT_CACHING_FIELDS = [
|
||||
'systemTtl',
|
||||
'toolsTtl',
|
||||
'historyTtl',
|
||||
'vertexFallbackTo5m',
|
||||
] as const;
|
||||
|
||||
export function ignoredClaudeCodePromptCachingFields(config: KtxProjectLlmConfig): string[] {
|
||||
if (config.provider.backend !== 'claude-code' || !config.promptCaching) {
|
||||
return [];
|
||||
}
|
||||
const promptCaching = config.promptCaching;
|
||||
return CLAUDE_CODE_IGNORED_PROMPT_CACHING_FIELDS.filter((key) => key in promptCaching).map(
|
||||
(key) => `llm.promptCaching.${key}`,
|
||||
);
|
||||
}
|
||||
|
||||
export function formatClaudeCodePromptCachingWarning(fields: string[]): string | null {
|
||||
if (fields.length === 0) {
|
||||
return null;
|
||||
}
|
||||
return `claude-code ignores ${fields.join(', ')} because the Claude Agent SDK does not expose KTX prompt-cache TTL, tool, or history markers.`;
|
||||
}
|
||||
|
||||
export function formatClaudeCodePromptCachingFix(): string {
|
||||
return 'Remove those promptCaching fields or use anthropic, vertex, or gateway when those cache knobs are required.';
|
||||
}
|
||||
|
|
@ -29,7 +29,7 @@ function embeddingBackend(value: string): 'openai' | 'sentence-transformers' {
|
|||
}
|
||||
|
||||
function llmBackend(value: string): KtxSetupLlmBackend {
|
||||
if (value === 'anthropic' || value === 'vertex') {
|
||||
if (value === 'anthropic' || value === 'vertex' || value === 'claude-code') {
|
||||
return value;
|
||||
}
|
||||
throw new InvalidArgumentError(`invalid choice '${value}'`);
|
||||
|
|
@ -97,6 +97,7 @@ function shouldShowSetupEntryMenu(
|
|||
llmBackend?: KtxSetupLlmBackend;
|
||||
anthropicApiKeyEnv?: string;
|
||||
anthropicApiKeyFile?: string;
|
||||
llmModel?: string;
|
||||
anthropicModel?: string;
|
||||
vertexProject?: string;
|
||||
vertexLocation?: string;
|
||||
|
|
@ -171,6 +172,7 @@ function shouldShowSetupEntryMenu(
|
|||
'llmBackend',
|
||||
'anthropicApiKeyEnv',
|
||||
'anthropicApiKeyFile',
|
||||
'llmModel',
|
||||
'anthropicModel',
|
||||
'vertexProject',
|
||||
'vertexLocation',
|
||||
|
|
@ -237,6 +239,7 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
|
|||
.addOption(
|
||||
new Option('--anthropic-api-key-file <path>', 'File containing the Anthropic API key').hideHelp(),
|
||||
)
|
||||
.addOption(new Option('--llm-model <model>', 'LLM model ID or backend model alias').hideHelp())
|
||||
.addOption(new Option('--anthropic-model <model>', 'Anthropic model ID to validate and save').hideHelp())
|
||||
.addOption(new Option('--vertex-project <project>', 'Google Vertex AI project ID, env:NAME, or file:/path').hideHelp())
|
||||
.addOption(new Option('--vertex-location <location>', 'Google Vertex AI location, env:NAME, or file:/path').hideHelp())
|
||||
|
|
@ -362,12 +365,21 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
|
|||
context.setExitCode(1);
|
||||
return;
|
||||
}
|
||||
if (options.llmBackend === 'vertex' && (options.anthropicApiKeyEnv || options.anthropicApiKeyFile)) {
|
||||
if (options.llmModel && options.anthropicModel) {
|
||||
context.io.stderr.write('Choose only one LLM model flag: --llm-model or --anthropic-model.\n');
|
||||
context.setExitCode(1);
|
||||
return;
|
||||
}
|
||||
if (
|
||||
options.llmBackend &&
|
||||
options.llmBackend !== 'anthropic' &&
|
||||
(options.anthropicApiKeyEnv || options.anthropicApiKeyFile)
|
||||
) {
|
||||
context.io.stderr.write('Anthropic API key flags are only valid with --llm-backend anthropic.\n');
|
||||
context.setExitCode(1);
|
||||
return;
|
||||
}
|
||||
if (options.llmBackend === 'anthropic' && (options.vertexProject || options.vertexLocation)) {
|
||||
if (options.llmBackend && options.llmBackend !== 'vertex' && (options.vertexProject || options.vertexLocation)) {
|
||||
context.io.stderr.write('Vertex AI flags are only valid with --llm-backend vertex.\n');
|
||||
context.setExitCode(1);
|
||||
return;
|
||||
|
|
@ -423,6 +435,7 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo
|
|||
...(options.llmBackend ? { llmBackend: options.llmBackend } : {}),
|
||||
...(options.anthropicApiKeyEnv ? { anthropicApiKeyEnv: options.anthropicApiKeyEnv } : {}),
|
||||
...(options.anthropicApiKeyFile ? { anthropicApiKeyFile: options.anthropicApiKeyFile } : {}),
|
||||
...(options.llmModel ? { llmModel: options.llmModel } : {}),
|
||||
...(options.anthropicModel ? { anthropicModel: options.anthropicModel } : {}),
|
||||
...(options.vertexProject ? { vertexProject: options.vertexProject } : {}),
|
||||
...(options.vertexLocation ? { vertexLocation: options.vertexLocation } : {}),
|
||||
|
|
|
|||
|
|
@ -464,6 +464,44 @@ describe('runKtxDoctor', () => {
|
|||
delete process.env.OPENAI_API_KEY;
|
||||
});
|
||||
|
||||
it('reports Claude Code auth failures and ignored prompt-caching fields in project doctor output', async () => {
|
||||
await writeFile(
|
||||
join(tempDir, 'ktx.yaml'),
|
||||
[
|
||||
'llm:',
|
||||
' provider:',
|
||||
' backend: claude-code',
|
||||
' models:',
|
||||
' default: sonnet',
|
||||
' promptCaching:',
|
||||
' enabled: true',
|
||||
' systemTtl: 1h',
|
||||
' toolsTtl: 1h',
|
||||
' historyTtl: 5m',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
const testIo = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxDoctor(
|
||||
{ command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' },
|
||||
testIo.io,
|
||||
{
|
||||
claudeCodeAuthProbe: async () => ({
|
||||
ok: false as const,
|
||||
message: 'Authenticate Claude Code locally.',
|
||||
}),
|
||||
},
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(testIo.stdout()).toContain('claude-code');
|
||||
expect(testIo.stdout()).toContain('Authenticate Claude Code locally');
|
||||
expect(testIo.stdout()).toContain('claude-code ignores llm.promptCaching');
|
||||
});
|
||||
|
||||
it('includes Postgres query-history readiness in project doctor output', async () => {
|
||||
process.env.ANTHROPIC_API_KEY = 'test-key'; // pragma: allowlist secret
|
||||
process.env.OPENAI_API_KEY = 'test-key'; // pragma: allowlist secret
|
||||
|
|
|
|||
|
|
@ -1074,6 +1074,41 @@ describe('runKtxCli', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('dispatches the provider-neutral LLM model setup flag to the setup runner', async () => {
|
||||
const setup = vi.fn(async () => 0);
|
||||
const setupIo = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxCli(
|
||||
[
|
||||
'--project-dir',
|
||||
tempDir,
|
||||
'setup',
|
||||
'--no-input',
|
||||
'--llm-backend',
|
||||
'claude-code',
|
||||
'--llm-model',
|
||||
'opus',
|
||||
],
|
||||
setupIo.io,
|
||||
{ setup },
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(setup).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
command: 'run',
|
||||
projectDir: tempDir,
|
||||
inputMode: 'disabled',
|
||||
cliVersion: '0.0.0-private',
|
||||
llmBackend: 'claude-code',
|
||||
llmModel: 'opus',
|
||||
skipLlm: false,
|
||||
}),
|
||||
setupIo.io,
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects conflicting Anthropic credential setup flags', async () => {
|
||||
const setup = vi.fn(async () => 0);
|
||||
const setupIo = makeIo();
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import { EventEmitter } from 'node:events';
|
||||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { AgentRunnerService, type RunLoopParams } from '@ktx/context/agent';
|
||||
import type { AgentRunnerPort, RunLoopParams } from '@ktx/context';
|
||||
import {
|
||||
KtxYamlMetabaseSourceStateReader,
|
||||
LocalMetabaseDiscoveryCache,
|
||||
|
|
@ -255,8 +255,8 @@ export function failedLocalBundleRun(input: RunLocalIngestOptions, jobId: string
|
|||
};
|
||||
}
|
||||
|
||||
export class CliLookerSlWritingAgentRunner extends AgentRunnerService {
|
||||
override runLoop = vi.fn(async (params: RunLoopParams) => {
|
||||
export class CliLookerSlWritingAgentRunner implements AgentRunnerPort {
|
||||
runLoop = vi.fn(async (params: RunLoopParams) => {
|
||||
if (
|
||||
params.telemetryTags?.operationName === 'ingest-bundle-wu' &&
|
||||
params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders'
|
||||
|
|
@ -265,53 +265,39 @@ export class CliLookerSlWritingAgentRunner extends AgentRunnerService {
|
|||
if (!ledger?.execute) {
|
||||
throw new Error('record_verification_ledger tool was not available to the Looker WorkUnit');
|
||||
}
|
||||
await ledger.execute(
|
||||
{
|
||||
summary: 'Test fixture verified Looker explore target identifiers before writing SL.',
|
||||
verifiedIdentifiers: ['prod-warehouse', 'public.orders'],
|
||||
unverifiedIdentifiers: [],
|
||||
},
|
||||
{ toolCallId: 'cli-looker-verification-ledger', messages: [] },
|
||||
);
|
||||
await ledger.execute({
|
||||
summary: 'Test fixture verified Looker explore target identifiers before writing SL.',
|
||||
verifiedIdentifiers: ['prod-warehouse', 'public.orders'],
|
||||
unverifiedIdentifiers: [],
|
||||
});
|
||||
const slWrite = params.toolSet.sl_write_source;
|
||||
if (!slWrite?.execute) {
|
||||
throw new Error('sl_write_source tool was not available to the Looker WorkUnit');
|
||||
}
|
||||
const result = await slWrite.execute(
|
||||
{
|
||||
connectionId: 'prod-warehouse',
|
||||
sourceName: 'looker__ecommerce__orders',
|
||||
source: {
|
||||
name: 'looker__ecommerce__orders',
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [
|
||||
{ name: 'id', type: 'number' },
|
||||
{ name: 'revenue', type: 'number' },
|
||||
],
|
||||
measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }],
|
||||
},
|
||||
const result = await slWrite.execute({
|
||||
connectionId: 'prod-warehouse',
|
||||
sourceName: 'looker__ecommerce__orders',
|
||||
source: {
|
||||
name: 'looker__ecommerce__orders',
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [
|
||||
{ name: 'id', type: 'number' },
|
||||
{ name: 'revenue', type: 'number' },
|
||||
],
|
||||
measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }],
|
||||
},
|
||||
{ toolCallId: 'cli-looker-sl-write', messages: [] },
|
||||
);
|
||||
if (!result.structured.success) {
|
||||
});
|
||||
if (!(result.structured as { success?: boolean } | undefined)?.success) {
|
||||
throw new Error(result.markdown);
|
||||
}
|
||||
}
|
||||
return { stopReason: 'natural' as const };
|
||||
});
|
||||
|
||||
constructor() {
|
||||
super({ llmProvider: { getModel: () => ({}) as never } as never });
|
||||
}
|
||||
}
|
||||
|
||||
export class CliMetabaseAgentRunner extends AgentRunnerService {
|
||||
override runLoop = vi.fn(async () => ({ stopReason: 'natural' as const }));
|
||||
|
||||
constructor() {
|
||||
super({ llmProvider: { getModel: () => ({}) as never } as never });
|
||||
}
|
||||
export class CliMetabaseAgentRunner implements AgentRunnerPort {
|
||||
runLoop = vi.fn(async () => ({ stopReason: 'natural' as const }));
|
||||
}
|
||||
|
||||
export class CliMetabaseSourceAdapter implements SourceAdapter {
|
||||
|
|
|
|||
|
|
@ -311,10 +311,12 @@ describe('runKtxIngest', () => {
|
|||
|
||||
expect(runIo.stdout()).toBe('');
|
||||
expect(runIo.stderr()).toContain(
|
||||
'ktx ingest requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.',
|
||||
'ktx ingest requires llm.provider.backend: anthropic, vertex, gateway, or claude-code, or an injected agentRunner.',
|
||||
);
|
||||
expect(runIo.stderr()).toContain('Configure a local Claude Code session or API-backed LLM, then rerun ingest:');
|
||||
expect(runIo.stderr()).toContain(`ktx setup --project-dir ${projectDir} --llm-backend claude-code --no-input`);
|
||||
expect(runIo.stderr()).toContain(
|
||||
`ktx setup --project-dir ${projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
|
||||
`ktx setup --project-dir ${projectDir} --llm-backend anthropic --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
|
||||
);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -86,11 +86,11 @@ export interface KtxIngestDeps {
|
|||
renderStoredMemoryFlow?: typeof renderMemoryFlowTui;
|
||||
startLiveMemoryFlow?: typeof startLiveMemoryFlowTui;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
localIngestOptions?: Pick<
|
||||
RunLocalIngestOptions,
|
||||
| 'agentRunner'
|
||||
| 'llmProvider'
|
||||
| 'memoryModel'
|
||||
localIngestOptions?: Pick<
|
||||
RunLocalIngestOptions,
|
||||
| 'agentRunner'
|
||||
| 'llmRuntime'
|
||||
| 'memoryModel'
|
||||
| 'semanticLayerCompute'
|
||||
| 'queryExecutor'
|
||||
| 'logger'
|
||||
|
|
|
|||
|
|
@ -61,7 +61,12 @@ function makePromptAdapter(options: {
|
|||
if (message.includes('LLM provider')) {
|
||||
providerPromptCount += 1;
|
||||
const nextProviderChoice = selectValues[0];
|
||||
if (nextProviderChoice === 'anthropic' || nextProviderChoice === 'vertex' || nextProviderChoice === 'back') {
|
||||
if (
|
||||
nextProviderChoice === 'anthropic' ||
|
||||
nextProviderChoice === 'vertex' ||
|
||||
nextProviderChoice === 'claude-code' ||
|
||||
nextProviderChoice === 'back'
|
||||
) {
|
||||
return selectValues.shift() ?? nextProviderChoice;
|
||||
}
|
||||
if (options.credentialChoice === 'back' && providerPromptCount > 1) {
|
||||
|
|
@ -180,6 +185,100 @@ describe('setup Anthropic model step', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('configures Claude Code backend and validates local auth', async () => {
|
||||
const io = makeIo();
|
||||
const authProbe = vi.fn(async () => ({ ok: true as const }));
|
||||
|
||||
const result = await runKtxSetupAnthropicModelStep(
|
||||
{
|
||||
projectDir: tempDir,
|
||||
inputMode: 'disabled',
|
||||
llmBackend: 'claude-code',
|
||||
skipLlm: false,
|
||||
},
|
||||
io.io,
|
||||
{ claudeCodeAuthProbe: authProbe },
|
||||
);
|
||||
|
||||
expect(result.status).toBe('ready');
|
||||
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
||||
expect(config.llm).toMatchObject({
|
||||
provider: { backend: 'claude-code' },
|
||||
models: { default: 'sonnet' },
|
||||
});
|
||||
expect(authProbe).toHaveBeenCalledWith(expect.objectContaining({ projectDir: tempDir, model: 'sonnet' }));
|
||||
});
|
||||
|
||||
it('prompts for the Claude Code model during interactive setup', async () => {
|
||||
const io = makeIo();
|
||||
const prompts = makePromptAdapter({ selectValues: ['claude-code', 'opus'] });
|
||||
const authProbe = vi.fn(async () => ({ ok: true as const }));
|
||||
|
||||
const result = await runKtxSetupAnthropicModelStep(
|
||||
{ projectDir: tempDir, inputMode: 'auto', skipLlm: false },
|
||||
io.io,
|
||||
{ prompts, claudeCodeAuthProbe: authProbe },
|
||||
);
|
||||
|
||||
expect(result.status).toBe('ready');
|
||||
expect(prompts.select).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
message: expect.stringContaining('Which Claude Code model should KTX use?'),
|
||||
options: [
|
||||
{ value: 'sonnet', label: 'Claude Sonnet', hint: 'recommended' },
|
||||
{ value: 'opus', label: 'Claude Opus' },
|
||||
{ value: 'haiku', label: 'Claude Haiku' },
|
||||
{ value: 'manual', label: 'Enter a Claude Code model ID manually' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
}),
|
||||
);
|
||||
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
||||
expect(config.llm).toMatchObject({
|
||||
provider: { backend: 'claude-code' },
|
||||
models: { default: 'opus' },
|
||||
});
|
||||
expect(authProbe).toHaveBeenCalledWith(expect.objectContaining({ projectDir: tempDir, model: 'opus' }));
|
||||
});
|
||||
|
||||
it('warns during Claude Code setup when existing prompt-caching fields will be ignored', async () => {
|
||||
await writeFile(
|
||||
join(tempDir, 'ktx.yaml'),
|
||||
[
|
||||
'llm:',
|
||||
' provider:',
|
||||
' backend: anthropic',
|
||||
' models:',
|
||||
' default: claude-sonnet-4-6',
|
||||
' promptCaching:',
|
||||
' enabled: true',
|
||||
' systemTtl: 1h',
|
||||
' toolsTtl: 1h',
|
||||
' historyTtl: 5m',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
const io = makeIo();
|
||||
|
||||
const result = await runKtxSetupAnthropicModelStep(
|
||||
{
|
||||
projectDir: tempDir,
|
||||
inputMode: 'disabled',
|
||||
llmBackend: 'claude-code',
|
||||
skipLlm: false,
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
claudeCodeAuthProbe: async () => ({ ok: true as const }),
|
||||
},
|
||||
);
|
||||
|
||||
expect(result.status).toBe('ready');
|
||||
expect(io.stderr()).toContain('claude-code ignores llm.promptCaching.systemTtl');
|
||||
expect(io.stderr()).toContain('Claude Agent SDK does not expose KTX prompt-cache TTL, tool, or history markers');
|
||||
});
|
||||
|
||||
it('returns from Anthropic credential Back to provider selection', async () => {
|
||||
const prompts = makePromptAdapter({ selectValues: ['anthropic', 'back', 'back'] });
|
||||
|
||||
|
|
@ -649,7 +748,7 @@ describe('setup Anthropic model step', () => {
|
|||
expect(io.stderr()).not.toContain('--skip-llm');
|
||||
});
|
||||
|
||||
it('does not recommend skipping when non-interactive setup is missing an Anthropic model', async () => {
|
||||
it('does not recommend skipping when non-interactive setup is missing an LLM model', async () => {
|
||||
const io = makeIo();
|
||||
const healthCheck = vi.fn(async () => ({ ok: true as const }));
|
||||
|
||||
|
|
@ -666,7 +765,7 @@ describe('setup Anthropic model step', () => {
|
|||
|
||||
expect(result.status).toBe('missing-input');
|
||||
expect(healthCheck).not.toHaveBeenCalled();
|
||||
expect(io.stderr()).toContain('Missing Anthropic model: pass --anthropic-model.');
|
||||
expect(io.stderr()).toContain('Missing LLM model: pass --llm-model.');
|
||||
expect(io.stderr()).not.toContain('--skip-llm');
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import { execFile } from 'node:child_process';
|
||||
import { writeFile } from 'node:fs/promises';
|
||||
import { promisify } from 'node:util';
|
||||
import { resolveLocalKtxLlmConfig } from '@ktx/context';
|
||||
import { resolveLocalKtxLlmConfig, runClaudeCodeAuthProbe } from '@ktx/context';
|
||||
import { resolveKtxConfigReference } from '@ktx/context/core';
|
||||
import {
|
||||
type KtxProjectConfig,
|
||||
|
|
@ -11,6 +11,10 @@ import {
|
|||
serializeKtxProjectConfig,
|
||||
} from '@ktx/context/project';
|
||||
import { type KtxLlmConfig, type KtxLlmHealthCheckResult, runKtxLlmHealthCheck } from '@ktx/llm';
|
||||
import {
|
||||
formatClaudeCodePromptCachingWarning,
|
||||
ignoredClaudeCodePromptCachingFields,
|
||||
} from './claude-code-prompt-caching.js';
|
||||
import { createClackSpinner, type KtxCliSpinner } from './clack.js';
|
||||
import type { KtxCliIo } from './cli-runtime.js';
|
||||
import { withTextInputNavigation } from './prompt-navigation.js';
|
||||
|
|
@ -32,6 +36,7 @@ export interface KtxSetupModelArgs {
|
|||
llmBackend?: KtxSetupLlmBackend;
|
||||
anthropicApiKeyEnv?: string;
|
||||
anthropicApiKeyFile?: string;
|
||||
llmModel?: string;
|
||||
anthropicModel?: string;
|
||||
vertexProject?: string;
|
||||
vertexLocation?: string;
|
||||
|
|
@ -53,7 +58,7 @@ export interface AnthropicModelChoice {
|
|||
recommended: boolean;
|
||||
}
|
||||
|
||||
export type KtxSetupLlmBackend = 'anthropic' | 'vertex';
|
||||
export type KtxSetupLlmBackend = 'anthropic' | 'vertex' | 'claude-code';
|
||||
|
||||
export interface KtxSetupModelPromptAdapter {
|
||||
select(options: { message: string; options: KtxSetupPromptOption[] }): Promise<string>;
|
||||
|
|
@ -68,6 +73,11 @@ export interface KtxSetupModelDeps {
|
|||
prompts?: KtxSetupModelPromptAdapter;
|
||||
listModels?: (apiKey: string) => Promise<AnthropicModelChoice[]>;
|
||||
healthCheck?: (config: KtxLlmConfig) => Promise<KtxLlmHealthCheckResult>;
|
||||
claudeCodeAuthProbe?: (input: {
|
||||
projectDir: string;
|
||||
model: string;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}) => Promise<{ ok: true } | { ok: false; message: string }>;
|
||||
readGcloudProject?: () => Promise<string | undefined>;
|
||||
listGcloudProjects?: () => Promise<GcloudProjectChoice[]>;
|
||||
spinner?: () => KtxCliSpinner;
|
||||
|
|
@ -91,6 +101,12 @@ const VERTEX_ANTHROPIC_MODELS: AnthropicModelChoice[] = [
|
|||
{ id: 'claude-opus-4-1', label: 'Claude Opus 4.1', recommended: false },
|
||||
];
|
||||
|
||||
const CLAUDE_CODE_MODELS: AnthropicModelChoice[] = [
|
||||
{ id: 'sonnet', label: 'Claude Sonnet', recommended: true },
|
||||
{ id: 'opus', label: 'Claude Opus', recommended: false },
|
||||
{ id: 'haiku', label: 'Claude Haiku', recommended: false },
|
||||
];
|
||||
|
||||
const HIDDEN_ANTHROPIC_MODEL_PATTERNS = [
|
||||
/^claude-sonnet-4$/i,
|
||||
/^claude-opus-4$/i,
|
||||
|
|
@ -252,7 +268,7 @@ export function isKtxSetupLlmConfigReady(config: KtxProjectLlmConfig): boolean {
|
|||
return typeof resolved.vertex?.location === 'string' && resolved.vertex.location.trim().length > 0;
|
||||
}
|
||||
|
||||
return resolved.backend === 'anthropic' || resolved.backend === 'gateway';
|
||||
return resolved.backend === 'anthropic' || resolved.backend === 'gateway' || resolved.backend === 'claude-code';
|
||||
}
|
||||
|
||||
function hasUsableConfiguredLlm(config: KtxProjectConfig): boolean {
|
||||
|
|
@ -263,9 +279,18 @@ function buildProjectLlmConfig(
|
|||
existing: KtxProjectLlmConfig,
|
||||
provider:
|
||||
| { backend: 'anthropic'; credentialRef: string }
|
||||
| { backend: 'vertex'; vertex: { project?: string; location: string } },
|
||||
| { backend: 'vertex'; vertex: { project?: string; location: string } }
|
||||
| { backend: 'claude-code' },
|
||||
model: string,
|
||||
): KtxProjectLlmConfig {
|
||||
if (provider.backend === 'claude-code') {
|
||||
return {
|
||||
provider: { backend: 'claude-code' },
|
||||
models: { ...existing.models, default: model },
|
||||
promptCaching: existing.promptCaching,
|
||||
};
|
||||
}
|
||||
|
||||
if (provider.backend === 'vertex') {
|
||||
return {
|
||||
provider: {
|
||||
|
|
@ -453,12 +478,16 @@ function requestedBackend(args: KtxSetupModelArgs): KtxSetupLlmBackend | undefin
|
|||
if (args.vertexProject || args.vertexLocation) {
|
||||
return 'vertex';
|
||||
}
|
||||
if (args.anthropicApiKeyEnv || args.anthropicApiKeyFile || args.anthropicModel) {
|
||||
if (args.anthropicApiKeyEnv || args.anthropicApiKeyFile || args.llmModel || args.anthropicModel) {
|
||||
return 'anthropic';
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function requestedModel(args: KtxSetupModelArgs): string | undefined {
|
||||
return args.llmModel ?? args.anthropicModel;
|
||||
}
|
||||
|
||||
async function chooseBackend(
|
||||
args: KtxSetupModelArgs,
|
||||
io: KtxCliIo,
|
||||
|
|
@ -480,16 +509,21 @@ async function chooseBackend(
|
|||
}
|
||||
const choice = await prompts.select({
|
||||
message: 'Which LLM provider should KTX use?',
|
||||
options: [
|
||||
{ value: 'anthropic', label: 'Anthropic API' },
|
||||
{ value: 'vertex', label: 'Google Vertex AI for Anthropic Claude' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
options: [
|
||||
{ value: 'anthropic', label: 'Anthropic API' },
|
||||
{ value: 'vertex', label: 'Google Vertex AI for Anthropic Claude' },
|
||||
{ value: 'claude-code', label: 'Local Claude Code session' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
});
|
||||
if (choice === 'back') {
|
||||
return { status: 'back' };
|
||||
}
|
||||
return { status: 'ready', backend: choice === 'vertex' ? 'vertex' : 'anthropic', prompted: true };
|
||||
return {
|
||||
status: 'ready',
|
||||
backend: choice === 'vertex' || choice === 'claude-code' ? choice : 'anthropic',
|
||||
prompted: true,
|
||||
};
|
||||
}
|
||||
|
||||
function resolveProvidedVertexRef(
|
||||
|
|
@ -708,11 +742,12 @@ async function chooseModel(
|
|||
io: KtxCliIo,
|
||||
deps: KtxSetupModelDeps,
|
||||
): Promise<ChooseModelResult> {
|
||||
if (args.anthropicModel) {
|
||||
return { status: 'ready', model: args.anthropicModel };
|
||||
const providedModel = requestedModel(args);
|
||||
if (providedModel) {
|
||||
return { status: 'ready', model: providedModel };
|
||||
}
|
||||
if (args.inputMode === 'disabled') {
|
||||
io.stderr.write('Missing Anthropic model: pass --anthropic-model.\n');
|
||||
io.stderr.write('Missing LLM model: pass --llm-model.\n');
|
||||
return { status: 'missing-input' };
|
||||
}
|
||||
|
||||
|
|
@ -765,11 +800,12 @@ async function chooseModel(
|
|||
}
|
||||
|
||||
async function chooseVertexModel(args: KtxSetupModelArgs, io: KtxCliIo, deps: KtxSetupModelDeps): Promise<ChooseModelResult> {
|
||||
if (args.anthropicModel) {
|
||||
return { status: 'ready', model: args.anthropicModel };
|
||||
const providedModel = requestedModel(args);
|
||||
if (providedModel) {
|
||||
return { status: 'ready', model: providedModel };
|
||||
}
|
||||
if (args.inputMode === 'disabled') {
|
||||
io.stderr.write('Missing Anthropic model: pass --anthropic-model.\n');
|
||||
io.stderr.write('Missing LLM model: pass --llm-model.\n');
|
||||
return { status: 'missing-input' };
|
||||
}
|
||||
|
||||
|
|
@ -803,11 +839,50 @@ async function chooseVertexModel(args: KtxSetupModelArgs, io: KtxCliIo, deps: Kt
|
|||
return { status: 'ready', model: choice };
|
||||
}
|
||||
|
||||
async function chooseClaudeCodeModel(args: KtxSetupModelArgs, deps: KtxSetupModelDeps): Promise<ChooseModelResult> {
|
||||
const providedModel = requestedModel(args);
|
||||
if (providedModel) {
|
||||
return { status: 'ready', model: providedModel };
|
||||
}
|
||||
if (args.inputMode === 'disabled') {
|
||||
return { status: 'ready', model: 'sonnet' };
|
||||
}
|
||||
|
||||
const prompts = deps.prompts ?? createPromptAdapter();
|
||||
const choice = await prompts.select({
|
||||
message: `Which Claude Code model should KTX use?\n\n${ANTHROPIC_MODEL_PROMPT_CONTEXT}`,
|
||||
options: [
|
||||
...CLAUDE_CODE_MODELS.map((model) => ({
|
||||
value: model.id,
|
||||
label: model.label,
|
||||
...(model.recommended ? { hint: 'recommended' } : {}),
|
||||
})),
|
||||
{ value: 'manual', label: 'Enter a Claude Code model ID manually' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
});
|
||||
if (choice === 'back') {
|
||||
return { status: 'back' };
|
||||
}
|
||||
if (choice === 'manual') {
|
||||
const manual = await prompts.text({
|
||||
message: withTextInputNavigation('Claude Code model ID'),
|
||||
placeholder: CLAUDE_CODE_MODELS.find((model) => model.recommended)?.id ?? CLAUDE_CODE_MODELS[0]?.id,
|
||||
});
|
||||
if (manual === undefined) {
|
||||
return { status: 'back' };
|
||||
}
|
||||
return manual.trim() ? { status: 'ready', model: manual.trim() } : { status: 'missing-input' };
|
||||
}
|
||||
return { status: 'ready', model: choice };
|
||||
}
|
||||
|
||||
async function persistLlmConfig(
|
||||
projectDir: string,
|
||||
provider:
|
||||
| { backend: 'anthropic'; credentialRef: string }
|
||||
| { backend: 'vertex'; vertex: { project?: string; location: string } },
|
||||
| { backend: 'vertex'; vertex: { project?: string; location: string } }
|
||||
| { backend: 'claude-code' },
|
||||
model: string,
|
||||
): Promise<void> {
|
||||
const project = await loadKtxProject({ projectDir });
|
||||
|
|
@ -853,6 +928,7 @@ export async function runKtxSetupAnthropicModelStep(
|
|||
!args.llmBackend &&
|
||||
!args.anthropicApiKeyEnv &&
|
||||
!args.anthropicApiKeyFile &&
|
||||
!args.llmModel &&
|
||||
!args.anthropicModel &&
|
||||
!args.vertexProject &&
|
||||
!args.vertexLocation
|
||||
|
|
@ -918,6 +994,37 @@ export async function runKtxSetupAnthropicModelStep(
|
|||
continue;
|
||||
}
|
||||
|
||||
if (backendChoice.backend === 'claude-code') {
|
||||
const model = await chooseClaudeCodeModel(backendArgs, deps);
|
||||
if (model.status === 'back' && backendChoice.prompted) {
|
||||
attemptArgs = buildInteractiveRetryArgs(args);
|
||||
continue;
|
||||
}
|
||||
if (model.status === 'invalid-credential') {
|
||||
return { status: 'failed', projectDir: args.projectDir };
|
||||
}
|
||||
if (model.status !== 'ready') {
|
||||
return { status: model.status, projectDir: args.projectDir };
|
||||
}
|
||||
const probe = deps.claudeCodeAuthProbe ?? runClaudeCodeAuthProbe;
|
||||
const health = await probe({ projectDir: args.projectDir, model: model.model, env: deps.env ?? process.env });
|
||||
if (!health.ok) {
|
||||
io.stderr.write(`${health.message}\n`);
|
||||
return { status: 'failed', projectDir: args.projectDir };
|
||||
}
|
||||
const warning = formatClaudeCodePromptCachingWarning(
|
||||
ignoredClaudeCodePromptCachingFields(
|
||||
buildProjectLlmConfig(project.config.llm, { backend: 'claude-code' }, model.model),
|
||||
),
|
||||
);
|
||||
if (warning) {
|
||||
io.stderr.write(`${warning}\n`);
|
||||
}
|
||||
await persistLlmConfig(args.projectDir, { backend: 'claude-code' }, model.model);
|
||||
io.stdout.write(`│ LLM ready: yes (${model.model})\n`);
|
||||
return { status: 'ready', projectDir: args.projectDir };
|
||||
}
|
||||
|
||||
const credential = await chooseCredentialRef(backendArgs, io, deps);
|
||||
if (credential.status === 'back' && backendChoice.prompted) {
|
||||
attemptArgs = buildInteractiveRetryArgs(args);
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@ export type KtxSetupArgs =
|
|||
llmBackend?: KtxSetupLlmBackend;
|
||||
anthropicApiKeyEnv?: string;
|
||||
anthropicApiKeyFile?: string;
|
||||
llmModel?: string;
|
||||
anthropicModel?: string;
|
||||
vertexProject?: string;
|
||||
vertexLocation?: string;
|
||||
|
|
@ -547,6 +548,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
|
|||
...(args.llmBackend ? { llmBackend: args.llmBackend } : {}),
|
||||
...(args.anthropicApiKeyEnv ? { anthropicApiKeyEnv: args.anthropicApiKeyEnv } : {}),
|
||||
...(args.anthropicApiKeyFile ? { anthropicApiKeyFile: args.anthropicApiKeyFile } : {}),
|
||||
...(args.llmModel ? { llmModel: args.llmModel } : {}),
|
||||
...(args.anthropicModel ? { anthropicModel: args.anthropicModel } : {}),
|
||||
...(args.vertexProject ? { vertexProject: args.vertexProject } : {}),
|
||||
...(args.vertexLocation ? { vertexLocation: args.vertexLocation } : {}),
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import { basename } from 'node:path';
|
||||
import { runClaudeCodeAuthProbe } from '@ktx/context';
|
||||
import type {
|
||||
KtxConfigIssue,
|
||||
KtxLocalProject,
|
||||
|
|
@ -8,6 +9,11 @@ import type {
|
|||
KtxProjectLlmConfig,
|
||||
} from '@ktx/context/project';
|
||||
import type { PostgresPgssProbeResult } from '@ktx/context/ingest';
|
||||
import {
|
||||
formatClaudeCodePromptCachingFix,
|
||||
formatClaudeCodePromptCachingWarning,
|
||||
ignoredClaudeCodePromptCachingFields,
|
||||
} from './claude-code-prompt-caching.js';
|
||||
import type { DoctorCheck } from './doctor.js';
|
||||
import {
|
||||
bold as _bold,
|
||||
|
|
@ -77,6 +83,12 @@ interface WarningItem {
|
|||
fix?: string;
|
||||
}
|
||||
|
||||
type ClaudeCodeAuthProbe = (input: {
|
||||
projectDir: string;
|
||||
model: string;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}) => Promise<{ ok: true } | { ok: false; message: string }>;
|
||||
|
||||
const PROJECT_READY_COMMANDS = KTX_NEXT_STEP_DIRECT_COMMANDS.map((step) => step.command);
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
|
|
@ -134,7 +146,15 @@ function envHint(value: unknown): string | undefined {
|
|||
return undefined;
|
||||
}
|
||||
|
||||
function buildLlmStatus(config: KtxProjectLlmConfig, env: NodeJS.ProcessEnv): LlmStatus {
|
||||
async function buildLlmStatus(
|
||||
config: KtxProjectLlmConfig,
|
||||
options: {
|
||||
projectDir: string;
|
||||
env: NodeJS.ProcessEnv;
|
||||
claudeCodeAuthProbe?: ClaudeCodeAuthProbe;
|
||||
},
|
||||
): Promise<LlmStatus> {
|
||||
const env = options.env;
|
||||
const backend = config.provider.backend;
|
||||
const model = config.models?.default;
|
||||
if (backend === 'none') {
|
||||
|
|
@ -186,6 +206,26 @@ function buildLlmStatus(config: KtxProjectLlmConfig, env: NodeJS.ProcessEnv): Ll
|
|||
fix: hint ? `Set ${hint}` : 'Set the gateway api_key or rerun `ktx setup`',
|
||||
};
|
||||
}
|
||||
if (backend === 'claude-code') {
|
||||
const modelName = model ?? 'sonnet';
|
||||
const probe = options.claudeCodeAuthProbe ?? runClaudeCodeAuthProbe;
|
||||
const auth = await probe({ projectDir: options.projectDir, model: modelName, env });
|
||||
if (auth.ok) {
|
||||
return {
|
||||
backend,
|
||||
model: modelName,
|
||||
status: 'ok',
|
||||
detail: 'local Claude Code session authenticated',
|
||||
};
|
||||
}
|
||||
return {
|
||||
backend,
|
||||
model: modelName,
|
||||
status: 'fail',
|
||||
detail: auth.message,
|
||||
fix: 'Authenticate Claude Code locally with the Claude Code CLI, then rerun `ktx status`.',
|
||||
};
|
||||
}
|
||||
return { backend, model, status: 'warn', detail: 'unknown LLM backend' };
|
||||
}
|
||||
|
||||
|
|
@ -568,6 +608,14 @@ function buildWarnings(
|
|||
});
|
||||
}
|
||||
|
||||
const warning = formatClaudeCodePromptCachingWarning(ignoredClaudeCodePromptCachingFields(config.llm));
|
||||
if (warning) {
|
||||
warnings.push({
|
||||
message: warning,
|
||||
fix: formatClaudeCodePromptCachingFix(),
|
||||
});
|
||||
}
|
||||
|
||||
return warnings;
|
||||
}
|
||||
|
||||
|
|
@ -629,6 +677,7 @@ function buildVerdict(
|
|||
export interface BuildProjectStatusOptions {
|
||||
env?: NodeJS.ProcessEnv;
|
||||
postgresQueryHistoryProbe?: PostgresQueryHistoryProbe;
|
||||
claudeCodeAuthProbe?: ClaudeCodeAuthProbe;
|
||||
configIssues?: KtxConfigIssue[];
|
||||
}
|
||||
|
||||
|
|
@ -649,7 +698,11 @@ export async function buildProjectStatus(project: KtxLocalProject, options: Buil
|
|||
const config = project.config;
|
||||
|
||||
const configStatus = buildConfigStatus(options.configIssues);
|
||||
const llm = buildLlmStatus(config.llm, env);
|
||||
const llm = await buildLlmStatus(config.llm, {
|
||||
projectDir: project.projectDir,
|
||||
env,
|
||||
claudeCodeAuthProbe: options.claudeCodeAuthProbe,
|
||||
});
|
||||
const embeddings = buildEmbeddingsStatus(config.ingest.embeddings, env);
|
||||
const storage = buildStorageStatus(config);
|
||||
const connections = Object.entries(config.connections).map(([name, conn]) =>
|
||||
|
|
|
|||
|
|
@ -129,6 +129,7 @@
|
|||
"type-check": "tsc -p tsconfig.json --noEmit"
|
||||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/claude-agent-sdk": "0.3.142",
|
||||
"@ktx/llm": "workspace:*",
|
||||
"@looker/sdk": "^26.8.0",
|
||||
"@looker/sdk-node": "^26.8.0",
|
||||
|
|
|
|||
|
|
@ -55,7 +55,14 @@ describe('AgentRunnerService.runLoop', () => {
|
|||
expect(call.system).toEqual({ role: 'system', content: 'SYS' });
|
||||
expect(call.messages).toEqual([{ role: 'user', content: 'USR' }]);
|
||||
expect(call.prompt).toBeUndefined();
|
||||
expect(call.tools).toEqual(tools);
|
||||
expect(call.tools.noop).toEqual(
|
||||
expect.objectContaining({
|
||||
description: 'noop',
|
||||
inputSchema: {},
|
||||
execute: expect.any(Function),
|
||||
toModelOutput: expect.any(Function),
|
||||
}),
|
||||
);
|
||||
expect(call.stopWhen).toBe(17);
|
||||
expect(call.temperature).toBe(0);
|
||||
expect(call.experimental_repairToolCall).toBe(repairHandler);
|
||||
|
|
|
|||
|
|
@ -1,33 +1,15 @@
|
|||
import { KtxMessageBuilder, splitKtxSystemMessages, type KtxLlmProvider, type KtxModelRole } from '@ktx/llm';
|
||||
import { generateText, stepCountIs, type TelemetrySettings, type Tool } from 'ai';
|
||||
import { noopLogger, type KtxLogger } from '../core/index.js';
|
||||
import { summarizeKtxLlmDebugRequest, type KtxLlmDebugRequestRecorder } from '../llm/index.js';
|
||||
|
||||
export type RunLoopStopReason = 'budget' | 'natural' | 'error';
|
||||
|
||||
export interface RunLoopStepInfo {
|
||||
stepIndex: number;
|
||||
stepBudget: number;
|
||||
}
|
||||
|
||||
export interface RunLoopParams {
|
||||
modelRole: KtxModelRole;
|
||||
systemPrompt: string;
|
||||
userPrompt: string;
|
||||
toolSet: Record<string, Tool>;
|
||||
stepBudget: number;
|
||||
telemetryTags: Record<string, string>;
|
||||
onStepFinish?: (info: RunLoopStepInfo) => void | Promise<void>;
|
||||
}
|
||||
|
||||
export interface RunLoopResult {
|
||||
stopReason: RunLoopStopReason;
|
||||
error?: Error;
|
||||
}
|
||||
|
||||
export interface AgentTelemetryPort {
|
||||
createTelemetry(tags: Record<string, string>): TelemetrySettings;
|
||||
}
|
||||
import type { KtxLlmProvider } from '@ktx/llm';
|
||||
import type { KtxLogger } from '../core/index.js';
|
||||
import { AiSdkKtxLlmRuntime, type AgentTelemetryPort } from '../llm/ai-sdk-runtime.js';
|
||||
import type { KtxLlmDebugRequestRecorder } from '../llm/debug-request-recorder.js';
|
||||
import type { AgentRunnerPort, RunLoopParams, RunLoopResult } from '../llm/runtime-port.js';
|
||||
export type {
|
||||
RunLoopParams,
|
||||
RunLoopResult,
|
||||
RunLoopStepInfo,
|
||||
RunLoopStopReason,
|
||||
} from '../llm/runtime-port.js';
|
||||
export type { AgentTelemetryPort } from '../llm/ai-sdk-runtime.js';
|
||||
|
||||
export interface AgentRunnerServiceDeps {
|
||||
llmProvider: KtxLlmProvider;
|
||||
|
|
@ -36,71 +18,14 @@ export interface AgentRunnerServiceDeps {
|
|||
logger?: KtxLogger;
|
||||
}
|
||||
|
||||
export class AgentRunnerService {
|
||||
private readonly logger: KtxLogger;
|
||||
export class AgentRunnerService implements AgentRunnerPort {
|
||||
private readonly runtime: AiSdkKtxLlmRuntime;
|
||||
|
||||
constructor(private readonly deps: AgentRunnerServiceDeps) {
|
||||
this.logger = deps.logger ?? noopLogger;
|
||||
constructor(deps: AgentRunnerServiceDeps) {
|
||||
this.runtime = new AiSdkKtxLlmRuntime(deps);
|
||||
}
|
||||
|
||||
async runLoop(params: RunLoopParams): Promise<RunLoopResult> {
|
||||
let stepIndex = 0;
|
||||
try {
|
||||
const model = this.deps.llmProvider.getModel(params.modelRole);
|
||||
const builder = new KtxMessageBuilder(this.deps.llmProvider);
|
||||
const built = builder.wrapSimple({
|
||||
system: params.systemPrompt,
|
||||
messages: [{ role: 'user', content: params.userPrompt }],
|
||||
tools: params.toolSet,
|
||||
model,
|
||||
});
|
||||
const promptMessages = splitKtxSystemMessages(built.messages);
|
||||
|
||||
await this.deps.debugRequestRecorder?.record(
|
||||
summarizeKtxLlmDebugRequest({
|
||||
operationName: params.telemetryTags.operationName ?? 'ktx-agent-runner',
|
||||
source: params.telemetryTags.source,
|
||||
jobId: params.telemetryTags.jobId,
|
||||
unitKey: params.telemetryTags.unitKey,
|
||||
modelRole: params.modelRole,
|
||||
modelId: (model as { modelId?: string }).modelId ?? params.modelRole,
|
||||
messages: built.messages,
|
||||
tools: built.tools as Record<string, { providerOptions?: unknown }>,
|
||||
}),
|
||||
);
|
||||
|
||||
await generateText({
|
||||
model,
|
||||
temperature: 0,
|
||||
stopWhen: stepCountIs(params.stepBudget),
|
||||
experimental_telemetry: this.deps.telemetry?.createTelemetry(params.telemetryTags),
|
||||
experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({
|
||||
source: params.telemetryTags.operationName ?? 'ktx-agent-runner',
|
||||
}),
|
||||
...(promptMessages.system ? { system: promptMessages.system } : {}),
|
||||
messages: promptMessages.messages,
|
||||
tools: built.tools as Record<string, Tool>,
|
||||
onStepFinish: async () => {
|
||||
stepIndex += 1;
|
||||
if (!params.onStepFinish) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await params.onStepFinish({ stepIndex, stepBudget: params.stepBudget });
|
||||
} catch (err) {
|
||||
this.logger.warn(
|
||||
`[agent-runner] onStepFinish callback threw; ignoring: ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
return { stopReason: 'natural' };
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error(String(error));
|
||||
this.logger.warn(`[agent-runner] loop failed: ${err.message}`);
|
||||
return { stopReason: 'error', error: err };
|
||||
}
|
||||
runLoop(params: RunLoopParams): Promise<RunLoopResult> {
|
||||
return this.runtime.runAgentLoop(params);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { mkdtemp, realpath, rm, writeFile } from 'node:fs/promises';
|
||||
import { mkdtemp, readFile, realpath, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
|
|
@ -52,6 +52,13 @@ describe('GitService', () => {
|
|||
const after = await service.revParseHead();
|
||||
expect(after).toBe(before);
|
||||
});
|
||||
|
||||
it('keeps git auto-maintenance attached for deterministic cleanup', async () => {
|
||||
const config = await readFile(join(tempDir, '.git', 'config'), 'utf-8');
|
||||
|
||||
expect(config).toMatch(/\[gc]\n\s+autoDetach = false/);
|
||||
expect(config).toMatch(/\[maintenance]\n\s+autoDetach = false/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('commitFile `created` flag', () => {
|
||||
|
|
|
|||
|
|
@ -105,6 +105,12 @@ export class GitService {
|
|||
this.logger.log('Initialized git repository');
|
||||
}
|
||||
|
||||
// Keep any auto-maintenance triggered by writes in-process. Detached maintenance can
|
||||
// keep object-pack directories alive briefly after awaited git commands complete,
|
||||
// which makes temp-project cleanup flaky in CI.
|
||||
await this.git.addConfig('gc.autoDetach', 'false');
|
||||
await this.git.addConfig('maintenance.autoDetach', 'false');
|
||||
|
||||
// Ensure HEAD always resolves to a commit so callers (e.g., the memory-agent squash flow)
|
||||
// can rely on `revParseHead()` returning a SHA. Idempotent: skip if HEAD already exists.
|
||||
const head = await this.revParseHead();
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
|||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import YAML from 'yaml';
|
||||
import { AgentRunnerService } from '../../../agent/index.js';
|
||||
import type { AgentRunnerPort, RunLoopParams } from '../../../llm/index.js';
|
||||
import { initKtxProject, loadKtxProject, type KtxLocalProject } from '../../../project/index.js';
|
||||
import {
|
||||
type SqlAnalysisBatchItem,
|
||||
|
|
@ -47,8 +47,8 @@ class AcceptanceHistoricSqlReader implements HistoricSqlReader {
|
|||
}
|
||||
}
|
||||
|
||||
class HistoricSqlAcceptanceAgentRunner extends AgentRunnerService {
|
||||
override runLoop = vi.fn(async (params: any) => {
|
||||
class HistoricSqlAcceptanceAgentRunner implements AgentRunnerPort {
|
||||
runLoop = vi.fn(async (params: RunLoopParams) => {
|
||||
if (params.telemetryTags?.operationName !== 'ingest-bundle-wu') {
|
||||
return { stopReason: 'natural' as const };
|
||||
}
|
||||
|
|
@ -59,78 +59,65 @@ class HistoricSqlAcceptanceAgentRunner extends AgentRunnerService {
|
|||
}
|
||||
|
||||
if (params.telemetryTags.unitKey === 'historic-sql-table-public-orders') {
|
||||
const result = await emitEvidence.execute(
|
||||
{
|
||||
kind: 'table_usage',
|
||||
table: 'public.orders',
|
||||
rawPath: 'tables/public.orders.json',
|
||||
usage: {
|
||||
narrative: 'Analysts repeatedly inspect paid order lifecycle by customer segment.',
|
||||
frequencyTier: 'high',
|
||||
commonFilters: ['status'],
|
||||
commonGroupBys: ['status', 'segment'],
|
||||
commonJoins: [{ table: 'public.customers', on: ['customer_id', 'id'] }],
|
||||
staleSince: null,
|
||||
},
|
||||
const result = await emitEvidence.execute({
|
||||
kind: 'table_usage',
|
||||
table: 'public.orders',
|
||||
rawPath: 'tables/public.orders.json',
|
||||
usage: {
|
||||
narrative: 'Analysts repeatedly inspect paid order lifecycle by customer segment.',
|
||||
frequencyTier: 'high',
|
||||
commonFilters: ['status'],
|
||||
commonGroupBys: ['status', 'segment'],
|
||||
commonJoins: [{ table: 'public.customers', on: ['customer_id', 'id'] }],
|
||||
staleSince: null,
|
||||
},
|
||||
{ toolCallId: 'historic-sql-orders-usage' },
|
||||
);
|
||||
if (!String(result).includes('Recorded historic-SQL table_usage evidence')) {
|
||||
throw new Error(`Unexpected orders evidence result: ${String(result)}`);
|
||||
});
|
||||
if (!result.markdown.includes('Recorded historic-SQL table_usage evidence')) {
|
||||
throw new Error(`Unexpected orders evidence result: ${result.markdown}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (params.telemetryTags.unitKey === 'historic-sql-table-public-customers') {
|
||||
const result = await emitEvidence.execute(
|
||||
{
|
||||
kind: 'table_usage',
|
||||
table: 'public.customers',
|
||||
rawPath: 'tables/public.customers.json',
|
||||
usage: {
|
||||
narrative: 'Customers provide segment context for paid order lifecycle analysis.',
|
||||
frequencyTier: 'mid',
|
||||
commonFilters: [],
|
||||
commonGroupBys: ['segment'],
|
||||
commonJoins: [{ table: 'public.orders', on: ['id', 'customer_id'] }],
|
||||
staleSince: null,
|
||||
},
|
||||
const result = await emitEvidence.execute({
|
||||
kind: 'table_usage',
|
||||
table: 'public.customers',
|
||||
rawPath: 'tables/public.customers.json',
|
||||
usage: {
|
||||
narrative: 'Customers provide segment context for paid order lifecycle analysis.',
|
||||
frequencyTier: 'mid',
|
||||
commonFilters: [],
|
||||
commonGroupBys: ['segment'],
|
||||
commonJoins: [{ table: 'public.orders', on: ['id', 'customer_id'] }],
|
||||
staleSince: null,
|
||||
},
|
||||
{ toolCallId: 'historic-sql-customers-usage' },
|
||||
);
|
||||
if (!String(result).includes('Recorded historic-SQL table_usage evidence')) {
|
||||
throw new Error(`Unexpected customers evidence result: ${String(result)}`);
|
||||
});
|
||||
if (!result.markdown.includes('Recorded historic-SQL table_usage evidence')) {
|
||||
throw new Error(`Unexpected customers evidence result: ${result.markdown}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (params.telemetryTags.unitKey === 'historic-sql-patterns-part-0001') {
|
||||
const result = await emitEvidence.execute(
|
||||
{
|
||||
kind: 'pattern',
|
||||
rawPath: 'patterns-input/part-0001.json',
|
||||
pattern: {
|
||||
slug: 'paid-order-lifecycle',
|
||||
title: 'Paid Order Lifecycle',
|
||||
narrative: 'Analysts join orders and customers to compare paid order lifecycle by segment.',
|
||||
definitionSql:
|
||||
'select o.status, c.segment, count(*) from public.orders o join public.customers c on c.id = o.customer_id group by o.status, c.segment',
|
||||
tablesInvolved: ['public.orders', 'public.customers'],
|
||||
slRefs: ['orders', 'customers'],
|
||||
constituentTemplateIds: ['pg:orders-lifecycle'],
|
||||
},
|
||||
const result = await emitEvidence.execute({
|
||||
kind: 'pattern',
|
||||
rawPath: 'patterns-input/part-0001.json',
|
||||
pattern: {
|
||||
slug: 'paid-order-lifecycle',
|
||||
title: 'Paid Order Lifecycle',
|
||||
narrative: 'Analysts join orders and customers to compare paid order lifecycle by segment.',
|
||||
definitionSql:
|
||||
'select o.status, c.segment, count(*) from public.orders o join public.customers c on c.id = o.customer_id group by o.status, c.segment',
|
||||
tablesInvolved: ['public.orders', 'public.customers'],
|
||||
slRefs: ['orders', 'customers'],
|
||||
constituentTemplateIds: ['pg:orders-lifecycle'],
|
||||
},
|
||||
{ toolCallId: 'historic-sql-pattern' },
|
||||
);
|
||||
if (!String(result).includes('Recorded historic-SQL pattern evidence')) {
|
||||
throw new Error(`Unexpected pattern evidence result: ${String(result)}`);
|
||||
});
|
||||
if (!result.markdown.includes('Recorded historic-SQL pattern evidence')) {
|
||||
throw new Error(`Unexpected pattern evidence result: ${result.markdown}`);
|
||||
}
|
||||
}
|
||||
|
||||
return { stopReason: 'natural' as const };
|
||||
});
|
||||
|
||||
constructor() {
|
||||
super({ llmProvider: { getModel: () => ({}) as never } as never });
|
||||
}
|
||||
}
|
||||
|
||||
function acceptanceSqlAnalysis(): SqlAnalysisPort {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
import type { KtxModelRole } from '@ktx/llm';
|
||||
import type { ToolSet } from 'ai';
|
||||
import type { AgentRunnerService } from '../../agent/index.js';
|
||||
import { type KtxLogger, noopLogger } from '../../core/index.js';
|
||||
import type { AgentRunnerPort, KtxRuntimeToolSet } from '../../llm/index.js';
|
||||
import type { MemoryAction } from '../../memory/index.js';
|
||||
import type { ContextCandidateForDedup, CuratorPaginationPort, CuratorPaginationReport } from '../ports.js';
|
||||
import type {
|
||||
|
|
@ -38,7 +37,7 @@ export interface CuratorPaginationInput {
|
|||
modelRole: KtxModelRole;
|
||||
buildSystemPrompt: () => string;
|
||||
buildUserPrompt: (input: CuratorPaginationPromptInput) => string;
|
||||
buildToolSet: (passNumber: number) => ToolSet;
|
||||
buildToolSet: (passNumber: number) => KtxRuntimeToolSet;
|
||||
getReconciliationActions: () => MemoryAction[];
|
||||
onStepFinish?: (info: { passNumber: number; stepIndex: number; stepBudget: number }) => void;
|
||||
}
|
||||
|
|
@ -50,7 +49,7 @@ interface CuratorPaginationResult extends ReconciliationOutcome {
|
|||
|
||||
export interface CuratorPaginationServiceDeps {
|
||||
store: ContextCandidateStorePort;
|
||||
agentRunner: AgentRunnerService;
|
||||
agentRunner: AgentRunnerPort;
|
||||
settings: CuratorPaginationSettings;
|
||||
logger?: KtxLogger;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -200,7 +200,7 @@ const makeDeps = () => {
|
|||
const slValidator = { validateSingleSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }) };
|
||||
const toolsetFactory = {
|
||||
createIngestWuToolset: vi.fn().mockReturnValue({
|
||||
toAiSdkTools: vi.fn().mockReturnValue({}),
|
||||
toRuntimeTools: vi.fn().mockReturnValue({}),
|
||||
getAllTools: vi.fn().mockReturnValue([]),
|
||||
getToolNames: vi.fn().mockReturnValue([]),
|
||||
}),
|
||||
|
|
@ -419,7 +419,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
|
||||
sessions.push(toolSession);
|
||||
return {
|
||||
toAiSdkTools: vi.fn().mockReturnValue({}),
|
||||
toRuntimeTools: vi.fn().mockReturnValue({}),
|
||||
getAllTools: vi.fn().mockReturnValue([]),
|
||||
getToolNames: vi.fn().mockReturnValue([]),
|
||||
};
|
||||
|
|
@ -591,7 +591,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
|
||||
currentToolSession = toolSession;
|
||||
return {
|
||||
toAiSdkTools: vi.fn().mockReturnValue({}),
|
||||
toRuntimeTools: vi.fn().mockReturnValue({}),
|
||||
getAllTools: vi.fn().mockReturnValue([]),
|
||||
getToolNames: vi.fn().mockReturnValue([]),
|
||||
};
|
||||
|
|
@ -663,7 +663,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
|
||||
currentToolSession = toolSession;
|
||||
return {
|
||||
toAiSdkTools: vi.fn().mockReturnValue({}),
|
||||
toRuntimeTools: vi.fn().mockReturnValue({}),
|
||||
getAllTools: vi.fn().mockReturnValue([]),
|
||||
getToolNames: vi.fn().mockReturnValue([]),
|
||||
};
|
||||
|
|
@ -834,7 +834,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
it('stores memory-flow provenance and transcript summaries in the ingest report body', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.toolsetFactory.createIngestWuToolset.mockReturnValue({
|
||||
toAiSdkTools: vi.fn().mockReturnValue({
|
||||
toRuntimeTools: vi.fn().mockReturnValue({
|
||||
read_raw_span: {
|
||||
description: 'read a raw span',
|
||||
inputSchema: {},
|
||||
|
|
@ -1376,7 +1376,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
|
||||
currentToolSession = toolSession;
|
||||
return {
|
||||
toAiSdkTools: vi.fn().mockReturnValue({}),
|
||||
toRuntimeTools: vi.fn().mockReturnValue({}),
|
||||
getAllTools: vi.fn().mockReturnValue([]),
|
||||
getToolNames: vi.fn().mockReturnValue([]),
|
||||
};
|
||||
|
|
@ -1933,7 +1933,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
|
||||
currentToolSession = toolSession;
|
||||
return {
|
||||
toAiSdkTools: vi.fn().mockReturnValue({}),
|
||||
toRuntimeTools: vi.fn().mockReturnValue({}),
|
||||
getAllTools: vi.fn().mockReturnValue([]),
|
||||
getToolNames: vi.fn().mockReturnValue([]),
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { type Tool, tool } from 'ai';
|
||||
import pLimit from 'p-limit';
|
||||
import { z } from 'zod';
|
||||
import { type KtxLogger, noopLogger } from '../core/index.js';
|
||||
import { createRuntimeToolDescriptorFromAiTool, type KtxRuntimeToolSet } from '../llm/index.js';
|
||||
import type { CaptureSession, MemoryAction } from '../memory/index.js';
|
||||
import type { SemanticLayerService, SemanticLayerSource, SlValidationDeps } from '../sl/index.js';
|
||||
import { createTouchedSlSources, type ToolContext, type ToolSession } from '../tools/index.js';
|
||||
|
|
@ -694,8 +694,9 @@ export class IngestBundleRunner {
|
|||
};
|
||||
|
||||
const skillsLoadedPerWu: string[] = [];
|
||||
const loadSkillTool: Record<string, Tool> = {
|
||||
load_skill: tool({
|
||||
const loadSkillTool: KtxRuntimeToolSet = {
|
||||
load_skill: {
|
||||
name: 'load_skill',
|
||||
description:
|
||||
'Load a skill to get specialized instructions. Call this when a skill listed in the system prompt matches the current task.',
|
||||
inputSchema: z.object({ name: z.string() }),
|
||||
|
|
@ -705,19 +706,23 @@ export class IngestBundleRunner {
|
|||
const available =
|
||||
(await this.deps.skillsRegistry.listSkills('memory_agent')).map((s) => s.name).join(', ') ||
|
||||
'(none)';
|
||||
return `Skill "${name}" not available. Available: ${available}`;
|
||||
return { markdown: `Skill "${name}" not available. Available: ${available}` };
|
||||
}
|
||||
const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8');
|
||||
if (!skillsLoadedPerWu.includes(skill.name)) {
|
||||
skillsLoadedPerWu.push(skill.name);
|
||||
}
|
||||
return {
|
||||
const structured = {
|
||||
name: skill.name,
|
||||
skillDirectory: skill.path,
|
||||
content: this.deps.skillsRegistry.stripFrontmatter(body),
|
||||
};
|
||||
return {
|
||||
markdown: `# ${structured.name}\n\n${structured.content}`,
|
||||
structured,
|
||||
};
|
||||
},
|
||||
}),
|
||||
},
|
||||
};
|
||||
|
||||
const priorProvenance = await this.deps.provenance.findLatestArtifactsForRawPaths(
|
||||
|
|
@ -726,12 +731,15 @@ export class IngestBundleRunner {
|
|||
wu.rawFiles,
|
||||
);
|
||||
const wuEmitUnmappedFallbackTool = {
|
||||
emit_unmapped_fallback: createEmitUnmappedFallbackTool({
|
||||
stageIndex,
|
||||
allowedPaths: new Set(wu.rawFiles),
|
||||
tableRefExists: (tableRef) =>
|
||||
this.tableRefExistsInSemanticLayer(scopedSemanticLayerService, slConnectionIds, tableRef),
|
||||
}),
|
||||
emit_unmapped_fallback: createRuntimeToolDescriptorFromAiTool(
|
||||
'emit_unmapped_fallback',
|
||||
createEmitUnmappedFallbackTool({
|
||||
stageIndex,
|
||||
allowedPaths: new Set(wu.rawFiles),
|
||||
tableRefExists: (tableRef) =>
|
||||
this.tableRefExistsInSemanticLayer(scopedSemanticLayerService, slConnectionIds, tableRef),
|
||||
}),
|
||||
),
|
||||
};
|
||||
|
||||
const systemPrompt = buildWuSystemPrompt({
|
||||
|
|
@ -765,7 +773,7 @@ export class IngestBundleRunner {
|
|||
wu: wuInner,
|
||||
loadSkillTool,
|
||||
emitUnmappedFallbackTool: wuEmitUnmappedFallbackTool,
|
||||
toolsetTools: wuToolset.toAiSdkTools(wuToolContext),
|
||||
toolsetTools: wuToolset.toRuntimeTools(wuToolContext),
|
||||
}),
|
||||
join(transcriptDir, `${wuInner.unitKey}.jsonl`),
|
||||
wuInner.unitKey,
|
||||
|
|
@ -921,53 +929,79 @@ export class IngestBundleRunner {
|
|||
ingest: ingestToolMetadata,
|
||||
session: rcToolSession,
|
||||
};
|
||||
const rcLoadSkill: Record<string, Tool> = {
|
||||
load_skill: tool({
|
||||
const rcLoadSkill: KtxRuntimeToolSet = {
|
||||
load_skill: {
|
||||
name: 'load_skill',
|
||||
description: 'Load a skill.',
|
||||
inputSchema: z.object({ name: z.string() }),
|
||||
execute: async ({ name }) => {
|
||||
const skill = await this.deps.skillsRegistry.getSkill(name, 'memory_agent');
|
||||
if (!skill) {
|
||||
return `Skill "${name}" not found`;
|
||||
return { markdown: `Skill "${name}" not found` };
|
||||
}
|
||||
const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8');
|
||||
return { name: skill.name, content: this.deps.skillsRegistry.stripFrontmatter(body) };
|
||||
const structured = { name: skill.name, content: this.deps.skillsRegistry.stripFrontmatter(body) };
|
||||
return { markdown: `# ${structured.name}\n\n${structured.content}`, structured };
|
||||
},
|
||||
}),
|
||||
},
|
||||
};
|
||||
const allStagedPaths = new Set<string>([...currentHashes.keys()]);
|
||||
const rcRawSpanTool = { read_raw_span: createReadRawSpanTool({ stagedDir, allowedPaths: allStagedPaths }) };
|
||||
const rcStageListTool = { stage_list: createStageListTool({ stageIndex }) };
|
||||
const rcStageDiffTool = { stage_diff: createStageDiffTool({ stageIndex }) };
|
||||
const rcRawSpanTool = {
|
||||
read_raw_span: createRuntimeToolDescriptorFromAiTool(
|
||||
'read_raw_span',
|
||||
createReadRawSpanTool({ stagedDir, allowedPaths: allStagedPaths }),
|
||||
),
|
||||
};
|
||||
const rcStageListTool = {
|
||||
stage_list: createRuntimeToolDescriptorFromAiTool('stage_list', createStageListTool({ stageIndex })),
|
||||
};
|
||||
const rcStageDiffTool = {
|
||||
stage_diff: createRuntimeToolDescriptorFromAiTool('stage_diff', createStageDiffTool({ stageIndex })),
|
||||
};
|
||||
const rcEvictionListTool = {
|
||||
eviction_list: createEvictionListTool({
|
||||
provenance: this.deps.provenance,
|
||||
connectionId: job.connectionId,
|
||||
sourceKey: job.sourceKey,
|
||||
deletedRawPaths: eviction?.deletedRawPaths ?? [],
|
||||
}),
|
||||
eviction_list: createRuntimeToolDescriptorFromAiTool(
|
||||
'eviction_list',
|
||||
createEvictionListTool({
|
||||
provenance: this.deps.provenance,
|
||||
connectionId: job.connectionId,
|
||||
sourceKey: job.sourceKey,
|
||||
deletedRawPaths: eviction?.deletedRawPaths ?? [],
|
||||
}),
|
||||
),
|
||||
};
|
||||
const rcEmitConflictResolutionTool = {
|
||||
emit_conflict_resolution: createEmitConflictResolutionTool({ stageIndex }),
|
||||
emit_conflict_resolution: createRuntimeToolDescriptorFromAiTool(
|
||||
'emit_conflict_resolution',
|
||||
createEmitConflictResolutionTool({ stageIndex }),
|
||||
),
|
||||
};
|
||||
const rcEmitEvictionDecisionTool = {
|
||||
emit_eviction_decision: createEmitEvictionDecisionTool({
|
||||
stageIndex,
|
||||
deletedRawPaths: eviction?.deletedRawPaths ?? [],
|
||||
}),
|
||||
emit_eviction_decision: createRuntimeToolDescriptorFromAiTool(
|
||||
'emit_eviction_decision',
|
||||
createEmitEvictionDecisionTool({
|
||||
stageIndex,
|
||||
deletedRawPaths: eviction?.deletedRawPaths ?? [],
|
||||
}),
|
||||
),
|
||||
};
|
||||
const rcEmitArtifactResolutionTool = {
|
||||
emit_artifact_resolution: createEmitArtifactResolutionTool({
|
||||
stageIndex,
|
||||
allowedPaths: allStagedPaths,
|
||||
}),
|
||||
emit_artifact_resolution: createRuntimeToolDescriptorFromAiTool(
|
||||
'emit_artifact_resolution',
|
||||
createEmitArtifactResolutionTool({
|
||||
stageIndex,
|
||||
allowedPaths: allStagedPaths,
|
||||
}),
|
||||
),
|
||||
};
|
||||
const rcEmitUnmappedFallbackTool = {
|
||||
emit_unmapped_fallback: createEmitUnmappedFallbackTool({
|
||||
stageIndex,
|
||||
allowedPaths: allStagedPaths,
|
||||
tableRefExists: (tableRef) => this.tableRefExistsInSemanticLayer(rcScopedSl, slConnectionIds, tableRef),
|
||||
}),
|
||||
emit_unmapped_fallback: createRuntimeToolDescriptorFromAiTool(
|
||||
'emit_unmapped_fallback',
|
||||
createEmitUnmappedFallbackTool({
|
||||
stageIndex,
|
||||
allowedPaths: allStagedPaths,
|
||||
tableRefExists: (tableRef) => this.tableRefExistsInSemanticLayer(rcScopedSl, slConnectionIds, tableRef),
|
||||
}),
|
||||
),
|
||||
};
|
||||
|
||||
const reconcileBaseFraming = await this.deps.promptService.loadPrompt('memory_agent_bundle_ingest_reconcile');
|
||||
|
|
@ -1026,7 +1060,7 @@ export class IngestBundleRunner {
|
|||
emitArtifactResolutionTool: rcEmitArtifactResolutionTool,
|
||||
emitUnmappedFallbackTool: rcEmitUnmappedFallbackTool,
|
||||
readRawSpanTool: rcRawSpanTool,
|
||||
toolsetTools: rcToolset.toAiSdkTools(rcToolContext),
|
||||
toolsetTools: rcToolset.toRuntimeTools(rcToolContext),
|
||||
}),
|
||||
join(transcriptDir, 'reconcile.jsonl'),
|
||||
'reconcile',
|
||||
|
|
@ -1075,7 +1109,7 @@ export class IngestBundleRunner {
|
|||
emitArtifactResolutionTool: rcEmitArtifactResolutionTool,
|
||||
emitUnmappedFallbackTool: rcEmitUnmappedFallbackTool,
|
||||
readRawSpanTool: rcRawSpanTool,
|
||||
toolsetTools: rcToolset.toAiSdkTools(rcToolContext),
|
||||
toolsetTools: rcToolset.toRuntimeTools(rcToolContext),
|
||||
}),
|
||||
join(transcriptDir, 'reconcile.jsonl'),
|
||||
'reconcile',
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import { tmpdir } from 'node:os';
|
|||
import { join } from 'node:path';
|
||||
import Database from 'better-sqlite3';
|
||||
import YAML from 'yaml';
|
||||
import { AgentRunnerService } from '../agent/index.js';
|
||||
import type { AgentRunnerPort, RunLoopParams } from '../llm/index.js';
|
||||
import { initKtxProject, type KtxLocalProject, loadKtxProject } from '../project/index.js';
|
||||
import { makeLocalGitRepo } from '../test/make-local-git-repo.js';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
|
@ -13,16 +13,12 @@ import { createDefaultLocalIngestAdapters, localPullConfigForAdapter } from './l
|
|||
import { getLocalIngestStatus, runLocalIngest } from './local-ingest.js';
|
||||
import type { ChunkResult, DiffSet, SourceAdapter } from './types.js';
|
||||
|
||||
class TestAgentRunner extends AgentRunnerService {
|
||||
override runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' as const });
|
||||
|
||||
constructor() {
|
||||
super({ llmProvider: { getModel: () => ({}) as never } as never });
|
||||
}
|
||||
class TestAgentRunner implements AgentRunnerPort {
|
||||
runLoop = vi.fn().mockResolvedValue({ stopReason: 'natural' as const });
|
||||
}
|
||||
|
||||
class LookerSlWritingAgentRunner extends AgentRunnerService {
|
||||
override runLoop = vi.fn(async (params: any) => {
|
||||
class LookerSlWritingAgentRunner implements AgentRunnerPort {
|
||||
runLoop = vi.fn(async (params: RunLoopParams) => {
|
||||
if (
|
||||
params.telemetryTags?.operationName === 'ingest-bundle-wu' &&
|
||||
params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders'
|
||||
|
|
@ -31,130 +27,100 @@ class LookerSlWritingAgentRunner extends AgentRunnerService {
|
|||
if (!ledger?.execute) {
|
||||
throw new Error('record_verification_ledger tool was not available to the Looker WorkUnit');
|
||||
}
|
||||
await ledger.execute(
|
||||
{
|
||||
summary: 'Test fixture verified Looker explore target identifiers before writing SL.',
|
||||
verifiedIdentifiers: ['prod-warehouse', 'public.orders'],
|
||||
unverifiedIdentifiers: [],
|
||||
},
|
||||
{ toolCallId: 'looker-verification-ledger', messages: [] },
|
||||
);
|
||||
await ledger.execute({
|
||||
summary: 'Test fixture verified Looker explore target identifiers before writing SL.',
|
||||
verifiedIdentifiers: ['prod-warehouse', 'public.orders'],
|
||||
unverifiedIdentifiers: [],
|
||||
});
|
||||
const slWrite = params.toolSet.sl_write_source;
|
||||
if (!slWrite?.execute) {
|
||||
throw new Error('sl_write_source tool was not available to the Looker WorkUnit');
|
||||
}
|
||||
const result = await slWrite.execute(
|
||||
{
|
||||
connectionId: 'prod-warehouse',
|
||||
sourceName: 'looker__ecommerce__orders',
|
||||
source: {
|
||||
name: 'looker__ecommerce__orders',
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [
|
||||
{ name: 'id', type: 'number' },
|
||||
{ name: 'revenue', type: 'number' },
|
||||
],
|
||||
measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }],
|
||||
},
|
||||
const result = await slWrite.execute({
|
||||
connectionId: 'prod-warehouse',
|
||||
sourceName: 'looker__ecommerce__orders',
|
||||
source: {
|
||||
name: 'looker__ecommerce__orders',
|
||||
table: 'public.orders',
|
||||
grain: ['id'],
|
||||
columns: [
|
||||
{ name: 'id', type: 'number' },
|
||||
{ name: 'revenue', type: 'number' },
|
||||
],
|
||||
measures: [{ name: 'total_revenue', expr: 'sum(revenue)' }],
|
||||
},
|
||||
{ toolCallId: 'looker-sl-write' },
|
||||
);
|
||||
if (!result.structured.success) {
|
||||
});
|
||||
if (!(result.structured as { success?: boolean } | undefined)?.success) {
|
||||
throw new Error(result.markdown);
|
||||
}
|
||||
}
|
||||
return { stopReason: 'natural' as const };
|
||||
});
|
||||
|
||||
constructor() {
|
||||
super({ llmProvider: { getModel: () => ({}) as never } as never });
|
||||
}
|
||||
}
|
||||
|
||||
class WikiWritingAgentRunner extends AgentRunnerService {
|
||||
override runLoop = vi.fn(async (params: any) => {
|
||||
class WikiWritingAgentRunner implements AgentRunnerPort {
|
||||
runLoop = vi.fn(async (params: RunLoopParams) => {
|
||||
if (params.telemetryTags?.operationName === 'ingest-bundle-wu') {
|
||||
const ledger = params.toolSet.record_verification_ledger;
|
||||
if (!ledger?.execute) {
|
||||
throw new Error('record_verification_ledger tool was not available to the WorkUnit');
|
||||
}
|
||||
await ledger.execute(
|
||||
{
|
||||
summary: 'Test fixture writes wiki-only context with no warehouse identifiers.',
|
||||
verifiedIdentifiers: [],
|
||||
unverifiedIdentifiers: [],
|
||||
},
|
||||
{ toolCallId: 'wiki-verification-ledger', messages: [] },
|
||||
);
|
||||
await ledger.execute({
|
||||
summary: 'Test fixture writes wiki-only context with no warehouse identifiers.',
|
||||
verifiedIdentifiers: [],
|
||||
unverifiedIdentifiers: [],
|
||||
});
|
||||
const wikiWrite = params.toolSet.wiki_write;
|
||||
if (!wikiWrite?.execute) {
|
||||
throw new Error('wiki_write tool was not available to the WorkUnit');
|
||||
}
|
||||
const result = await wikiWrite.execute(
|
||||
{
|
||||
key: 'orders_context',
|
||||
summary: 'Orders source context',
|
||||
content: 'Orders are purchase records used for revenue analysis.',
|
||||
tags: ['orders'],
|
||||
},
|
||||
{ toolCallId: 'wiki-write' },
|
||||
);
|
||||
if (!result.structured.success) {
|
||||
const result = await wikiWrite.execute({
|
||||
key: 'orders_context',
|
||||
summary: 'Orders source context',
|
||||
content: 'Orders are purchase records used for revenue analysis.',
|
||||
tags: ['orders'],
|
||||
});
|
||||
if (!(result.structured as { success?: boolean } | undefined)?.success) {
|
||||
throw new Error(result.markdown);
|
||||
}
|
||||
}
|
||||
return { stopReason: 'natural' as const };
|
||||
});
|
||||
|
||||
constructor() {
|
||||
super({ llmProvider: { getModel: () => ({}) as never } as never });
|
||||
}
|
||||
}
|
||||
|
||||
class WikiWritingWithRawPathAgentRunner extends AgentRunnerService {
|
||||
override runLoop = vi.fn(async (params: any) => {
|
||||
class WikiWritingWithRawPathAgentRunner implements AgentRunnerPort {
|
||||
runLoop = vi.fn(async (params: RunLoopParams) => {
|
||||
if (params.telemetryTags?.operationName === 'ingest-bundle-wu') {
|
||||
const ledger = params.toolSet.record_verification_ledger;
|
||||
if (!ledger?.execute) {
|
||||
throw new Error('record_verification_ledger tool was not available to the WorkUnit');
|
||||
}
|
||||
await ledger.execute(
|
||||
{
|
||||
summary: 'Test fixture writes wiki-only context with explicit raw provenance and no warehouse identifiers.',
|
||||
verifiedIdentifiers: [],
|
||||
unverifiedIdentifiers: [],
|
||||
},
|
||||
{ toolCallId: 'wiki-raw-path-verification-ledger', messages: [] },
|
||||
);
|
||||
await ledger.execute({
|
||||
summary: 'Test fixture writes wiki-only context with explicit raw provenance and no warehouse identifiers.',
|
||||
verifiedIdentifiers: [],
|
||||
unverifiedIdentifiers: [],
|
||||
});
|
||||
const wikiWrite = params.toolSet.wiki_write;
|
||||
if (!wikiWrite?.execute) {
|
||||
throw new Error('wiki_write tool was not available to the WorkUnit');
|
||||
}
|
||||
const result = await wikiWrite.execute(
|
||||
{
|
||||
key: 'orders_context',
|
||||
summary: 'Orders source context',
|
||||
content: 'Orders are purchase records used for revenue analysis.',
|
||||
tags: ['orders'],
|
||||
rawPaths: ['orders/orders.json'],
|
||||
},
|
||||
{ toolCallId: 'wiki-write' },
|
||||
);
|
||||
if (!result.structured.success) {
|
||||
const result = await wikiWrite.execute({
|
||||
key: 'orders_context',
|
||||
summary: 'Orders source context',
|
||||
content: 'Orders are purchase records used for revenue analysis.',
|
||||
tags: ['orders'],
|
||||
rawPaths: ['orders/orders.json'],
|
||||
});
|
||||
if (!(result.structured as { success?: boolean } | undefined)?.success) {
|
||||
throw new Error(result.markdown);
|
||||
}
|
||||
}
|
||||
return { stopReason: 'natural' as const };
|
||||
});
|
||||
|
||||
constructor() {
|
||||
super({ llmProvider: { getModel: () => ({}) as never } as never });
|
||||
}
|
||||
}
|
||||
|
||||
class HistoricSqlEvidenceAgentRunner extends AgentRunnerService {
|
||||
override runLoop = vi.fn(async (params: any) => {
|
||||
class HistoricSqlEvidenceAgentRunner implements AgentRunnerPort {
|
||||
runLoop = vi.fn(async (params: RunLoopParams) => {
|
||||
if (
|
||||
params.telemetryTags?.operationName === 'ingest-bundle-wu' &&
|
||||
params.telemetryTags?.unitKey === 'historic-sql-table-public-orders'
|
||||
|
|
@ -163,31 +129,24 @@ class HistoricSqlEvidenceAgentRunner extends AgentRunnerService {
|
|||
if (!emitEvidence?.execute) {
|
||||
throw new Error('emit_historic_sql_evidence tool was not available to the historic-SQL WorkUnit');
|
||||
}
|
||||
const result = await emitEvidence.execute(
|
||||
{
|
||||
kind: 'table_usage',
|
||||
table: 'public.orders',
|
||||
rawPath: 'tables/public.orders.json',
|
||||
usage: {
|
||||
narrative: 'Orders are repeatedly queried by lifecycle status.',
|
||||
frequencyTier: 'high',
|
||||
commonFilters: ['status'],
|
||||
commonJoins: [],
|
||||
staleSince: null,
|
||||
},
|
||||
const result = await emitEvidence.execute({
|
||||
kind: 'table_usage',
|
||||
table: 'public.orders',
|
||||
rawPath: 'tables/public.orders.json',
|
||||
usage: {
|
||||
narrative: 'Orders are repeatedly queried by lifecycle status.',
|
||||
frequencyTier: 'high',
|
||||
commonFilters: ['status'],
|
||||
commonJoins: [],
|
||||
staleSince: null,
|
||||
},
|
||||
{ toolCallId: 'historic-sql-evidence' },
|
||||
);
|
||||
if (!String(result).includes('Recorded historic-SQL table_usage evidence')) {
|
||||
throw new Error(`Unexpected historic-SQL evidence result: ${String(result)}`);
|
||||
});
|
||||
if (!result.markdown.includes('Recorded historic-SQL table_usage evidence')) {
|
||||
throw new Error(`Unexpected historic-SQL evidence result: ${result.markdown}`);
|
||||
}
|
||||
}
|
||||
return { stopReason: 'natural' as const };
|
||||
});
|
||||
|
||||
constructor() {
|
||||
super({ llmProvider: { getModel: () => ({}) as never } as never });
|
||||
}
|
||||
}
|
||||
|
||||
class HistoricSqlEvidenceTestAdapter implements SourceAdapter {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { AgentRunnerService } from '../agent/index.js';
|
||||
import type { AgentRunnerPort } from '../llm/index.js';
|
||||
import { initKtxProject, type KtxLocalProject, loadKtxProject } from '../project/index.js';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { FakeSourceAdapter } from './adapters/fake/fake.adapter.js';
|
||||
|
|
@ -17,6 +17,10 @@ type RuntimeWithConnectionDeps = {
|
|||
};
|
||||
};
|
||||
|
||||
function testAgentRunner(): AgentRunnerPort {
|
||||
return { runLoop: vi.fn().mockResolvedValue({ stopReason: 'natural' as const }) };
|
||||
}
|
||||
|
||||
describe('createLocalBundleIngestRuntime', () => {
|
||||
let tempDir: string;
|
||||
let project: KtxLocalProject;
|
||||
|
|
@ -55,15 +59,42 @@ describe('createLocalBundleIngestRuntime', () => {
|
|||
}),
|
||||
).toThrow(
|
||||
[
|
||||
'ktx ingest requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.',
|
||||
`Configure an Anthropic provider, then rerun ingest:`,
|
||||
` ktx setup --project-dir ${project.projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
|
||||
'ktx ingest requires llm.provider.backend: anthropic, vertex, gateway, or claude-code, or an injected agentRunner.',
|
||||
'Configure a local Claude Code session or API-backed LLM, then rerun ingest:',
|
||||
` ktx setup --project-dir ${project.projectDir} --llm-backend claude-code --no-input`,
|
||||
` ktx setup --project-dir ${project.projectDir} --llm-backend anthropic --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
|
||||
].join('\n'),
|
||||
);
|
||||
});
|
||||
|
||||
it('uses a runtime-backed agent runner when claude-code is configured', () => {
|
||||
const runtime = {
|
||||
generateText: vi.fn(),
|
||||
generateObject: vi.fn(),
|
||||
runAgentLoop: vi.fn(async () => ({ stopReason: 'natural' as const })),
|
||||
};
|
||||
project.config.llm = {
|
||||
provider: { backend: 'claude-code' },
|
||||
models: { default: 'sonnet' },
|
||||
promptCaching: { enabled: false },
|
||||
};
|
||||
const createLlmRuntime = vi.fn(() => runtime);
|
||||
|
||||
const created = createLocalBundleIngestRuntime({
|
||||
project,
|
||||
adapters: [new FakeSourceAdapter()],
|
||||
createLlmRuntime,
|
||||
});
|
||||
|
||||
expect(created).toBeDefined();
|
||||
expect(createLlmRuntime).toHaveBeenCalledWith(
|
||||
project.config.llm,
|
||||
expect.objectContaining({ projectDir: project.projectDir }),
|
||||
);
|
||||
});
|
||||
|
||||
it('builds runner deps with local SQLite stores and context tools enabled', async () => {
|
||||
const agentRunner = new AgentRunnerService({ llmProvider: { getModel: () => ({}) as never } as any });
|
||||
const agentRunner = testAgentRunner();
|
||||
|
||||
const runtime = createLocalBundleIngestRuntime({
|
||||
project,
|
||||
|
|
@ -94,7 +125,7 @@ describe('createLocalBundleIngestRuntime', () => {
|
|||
project_id: 'acme',
|
||||
dataset_id: 'warehouse',
|
||||
};
|
||||
const agentRunner = new AgentRunnerService({ llmProvider: { getModel: () => ({}) as never } as any });
|
||||
const agentRunner = testAgentRunner();
|
||||
|
||||
const runtime = createLocalBundleIngestRuntime({
|
||||
project,
|
||||
|
|
@ -114,7 +145,7 @@ describe('createLocalBundleIngestRuntime', () => {
|
|||
});
|
||||
|
||||
it('passes project connection config to local ingest query executors', async () => {
|
||||
const agentRunner = new AgentRunnerService({ llmProvider: { getModel: () => ({}) as never } as any });
|
||||
const agentRunner = testAgentRunner();
|
||||
const queryExecutor = {
|
||||
execute: vi.fn(async () => ({
|
||||
headers: ['answer'],
|
||||
|
|
|
|||
|
|
@ -1,20 +1,20 @@
|
|||
import { mkdirSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import type { KtxLlmProvider } from '@ktx/llm';
|
||||
import type { Tool } from 'ai';
|
||||
import YAML from 'yaml';
|
||||
import type { AgentRunnerService } from '../agent/index.js';
|
||||
import { AgentRunnerService as DefaultAgentRunnerService } from '../agent/index.js';
|
||||
import { localConnectionInfoFromConfig, type KtxSqlQueryExecutorPort } from '../connections/index.js';
|
||||
import type { KtxEmbeddingPort, KtxLogger } from '../core/index.js';
|
||||
import { noopLogger, SessionWorktreeService } from '../core/index.js';
|
||||
import type { KtxSemanticLayerComputePort } from '../daemon/index.js';
|
||||
import {
|
||||
createJsonlKtxLlmDebugRequestRecorder,
|
||||
createRuntimeToolDescriptorFromAiTool,
|
||||
createLocalKtxEmbeddingProviderFromConfig,
|
||||
createLocalKtxLlmProviderFromConfig,
|
||||
createLocalKtxLlmRuntimeFromConfig,
|
||||
KtxIngestEmbeddingPortAdapter,
|
||||
RuntimeAgentRunner,
|
||||
type AgentRunnerPort,
|
||||
type KtxLlmRuntimePort,
|
||||
type KtxRuntimeToolSet,
|
||||
} from '../llm/index.js';
|
||||
import type { KtxLocalProject } from '../project/index.js';
|
||||
import { ktxLocalStateDbPath } from '../project/index.js';
|
||||
|
|
@ -100,8 +100,9 @@ const LOCAL_SHAPE_WARNING = 'Local ingest validates semantic-layer YAML shape on
|
|||
export interface CreateLocalBundleIngestRuntimeOptions {
|
||||
project: KtxLocalProject;
|
||||
adapters: SourceAdapter[];
|
||||
agentRunner?: AgentRunnerService;
|
||||
llmProvider?: KtxLlmProvider;
|
||||
agentRunner?: AgentRunnerPort;
|
||||
llmRuntime?: KtxLlmRuntimePort;
|
||||
createLlmRuntime?: typeof createLocalKtxLlmRuntimeFromConfig;
|
||||
llmDebugRequestFile?: string;
|
||||
memoryModel?: string;
|
||||
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
||||
|
|
@ -456,12 +457,12 @@ class NoopKnowledgeEventPort implements KnowledgeEventPort {
|
|||
class LocalIngestToolSet implements IngestToolsetLike {
|
||||
constructor(
|
||||
private readonly tools: BaseTool[],
|
||||
private readonly sourceTools: Record<string, Tool> = {},
|
||||
private readonly sourceTools: KtxRuntimeToolSet = {},
|
||||
) {}
|
||||
|
||||
toAiSdkTools(context: ToolContext) {
|
||||
toRuntimeTools(context: ToolContext): KtxRuntimeToolSet {
|
||||
return {
|
||||
...Object.fromEntries(this.tools.map((tool) => [tool.name, tool.toAiSdkTool(context)])),
|
||||
...Object.fromEntries(this.tools.map((tool) => [tool.name, tool.toRuntimeTool(context)])),
|
||||
...this.sourceTools,
|
||||
};
|
||||
}
|
||||
|
|
@ -541,13 +542,16 @@ class LocalIngestToolsetFactory implements IngestToolsetFactoryPort {
|
|||
}
|
||||
|
||||
createIngestWuToolset(session: ToolSession, options?: { includeContextEvidenceTools?: boolean }): IngestToolsetLike {
|
||||
const sourceTools: Record<string, Tool> =
|
||||
const sourceTools: KtxRuntimeToolSet =
|
||||
session.ingest?.sourceKey === 'historic-sql'
|
||||
? {
|
||||
emit_historic_sql_evidence: createEmitHistoricSqlEvidenceTool({
|
||||
connectionId: session.connectionId,
|
||||
session,
|
||||
}),
|
||||
emit_historic_sql_evidence: createRuntimeToolDescriptorFromAiTool(
|
||||
'emit_historic_sql_evidence',
|
||||
createEmitHistoricSqlEvidenceTool({
|
||||
connectionId: session.connectionId,
|
||||
session,
|
||||
}),
|
||||
),
|
||||
}
|
||||
: {};
|
||||
return new LocalIngestToolSet(
|
||||
|
|
@ -571,36 +575,36 @@ function nextLocalJobId(): string {
|
|||
|
||||
function localIngestLlmProviderGuardMessage(projectDir: string): string {
|
||||
return [
|
||||
'ktx ingest requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.',
|
||||
'Configure an Anthropic provider, then rerun ingest:',
|
||||
` ktx setup --project-dir ${projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
|
||||
'ktx ingest requires llm.provider.backend: anthropic, vertex, gateway, or claude-code, or an injected agentRunner.',
|
||||
'Configure a local Claude Code session or API-backed LLM, then rerun ingest:',
|
||||
` ktx setup --project-dir ${projectDir} --llm-backend claude-code --no-input`,
|
||||
` ktx setup --project-dir ${projectDir} --llm-backend anthropic --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`,
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
function resolveAgentRunner(options: CreateLocalBundleIngestRuntimeOptions): {
|
||||
agentRunner: AgentRunnerService;
|
||||
llmProvider?: KtxLlmProvider;
|
||||
agentRunner: AgentRunnerPort;
|
||||
llmRuntime?: KtxLlmRuntimePort;
|
||||
} {
|
||||
const llmProvider =
|
||||
options.llmProvider ?? createLocalKtxLlmProviderFromConfig(options.project.config.llm) ?? undefined;
|
||||
const llmRuntime =
|
||||
options.llmRuntime ??
|
||||
(options.createLlmRuntime ?? createLocalKtxLlmRuntimeFromConfig)(options.project.config.llm, {
|
||||
projectDir: options.project.projectDir,
|
||||
env: process.env,
|
||||
}) ??
|
||||
undefined;
|
||||
|
||||
if (options.agentRunner) {
|
||||
return { agentRunner: options.agentRunner, ...(llmProvider ? { llmProvider } : {}) };
|
||||
return { agentRunner: options.agentRunner, ...(llmRuntime ? { llmRuntime } : {}) };
|
||||
}
|
||||
|
||||
if (!llmProvider) {
|
||||
if (!llmRuntime) {
|
||||
throw new Error(localIngestLlmProviderGuardMessage(options.project.projectDir));
|
||||
}
|
||||
|
||||
return {
|
||||
agentRunner: new DefaultAgentRunnerService({
|
||||
llmProvider,
|
||||
logger: options.logger ?? noopLogger,
|
||||
...(options.llmDebugRequestFile
|
||||
? { debugRequestRecorder: createJsonlKtxLlmDebugRequestRecorder(options.llmDebugRequestFile) }
|
||||
: {}),
|
||||
}),
|
||||
llmProvider,
|
||||
agentRunner: new RuntimeAgentRunner(llmRuntime),
|
||||
llmRuntime,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -627,7 +631,7 @@ export function createLocalBundleIngestRuntime(
|
|||
const knowledgeIndex = new LocalKnowledgeIndex(options.project, embedding);
|
||||
const knowledgeEvents = new NoopKnowledgeEventPort();
|
||||
const wikiService = new KnowledgeWikiService(rootFileStore, embedding, knowledgeIndex, options.project.git, logger);
|
||||
const { agentRunner, llmProvider } = resolveAgentRunner(options);
|
||||
const { agentRunner, llmRuntime } = resolveAgentRunner(options);
|
||||
const promptService = new PromptService({ promptsDir, partials: [], logger });
|
||||
const storage = new LocalIngestStorage(options.project);
|
||||
const registry = registerAdapters(options.adapters);
|
||||
|
|
@ -681,10 +685,11 @@ export function createLocalBundleIngestRuntime(
|
|||
commitMessages: new LocalCommitMessagePort(),
|
||||
embedding,
|
||||
contextEvidenceIndex: new ContextEvidenceIndexService({ store: contextStore, embeddings: embedding, logger }),
|
||||
pageTriage: llmProvider
|
||||
llmRuntime,
|
||||
pageTriage: llmRuntime
|
||||
? new PageTriageService({
|
||||
store: contextStore,
|
||||
llmProvider,
|
||||
llmRuntime,
|
||||
settings: {
|
||||
enabled: true,
|
||||
maxConcurrency: 2,
|
||||
|
|
|
|||
|
|
@ -1,11 +1,10 @@
|
|||
import { randomUUID } from 'node:crypto';
|
||||
import { cp, mkdir, rm } from 'node:fs/promises';
|
||||
import { isAbsolute, resolve } from 'node:path';
|
||||
import type { KtxLlmProvider } from '@ktx/llm';
|
||||
import type { AgentRunnerService } from '../agent/index.js';
|
||||
import type { KtxSqlQueryExecutorPort } from '../connections/index.js';
|
||||
import type { KtxLogger } from '../core/index.js';
|
||||
import type { KtxSemanticLayerComputePort } from '../daemon/index.js';
|
||||
import type { AgentRunnerPort, KtxLlmRuntimePort } from '../llm/index.js';
|
||||
import type { KtxLocalProject } from '../project/index.js';
|
||||
import { ktxLocalStateDbPath } from '../project/index.js';
|
||||
import { planMetabaseFanoutChildren } from './adapters/metabase/fanout-planner.js';
|
||||
|
|
@ -28,8 +27,8 @@ export interface RunLocalIngestOptions {
|
|||
trigger?: IngestTrigger;
|
||||
jobId?: string;
|
||||
memoryFlow?: MemoryFlowEventSink;
|
||||
agentRunner?: AgentRunnerService;
|
||||
llmProvider?: KtxLlmProvider;
|
||||
agentRunner?: AgentRunnerPort;
|
||||
llmRuntime?: KtxLlmRuntimePort;
|
||||
llmDebugRequestFile?: string;
|
||||
memoryModel?: string;
|
||||
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
||||
|
|
@ -41,7 +40,7 @@ export interface LocalIngestMcpOptions
|
|||
extends Pick<
|
||||
RunLocalIngestOptions,
|
||||
| 'agentRunner'
|
||||
| 'llmProvider'
|
||||
| 'llmRuntime'
|
||||
| 'memoryModel'
|
||||
| 'semanticLayerCompute'
|
||||
| 'queryExecutor'
|
||||
|
|
@ -167,8 +166,8 @@ async function runScheduledPullJob(options: {
|
|||
trigger?: IngestTrigger;
|
||||
jobId?: string;
|
||||
memoryFlow?: MemoryFlowEventSink;
|
||||
agentRunner?: AgentRunnerService;
|
||||
llmProvider?: KtxLlmProvider;
|
||||
agentRunner?: AgentRunnerPort;
|
||||
llmRuntime?: KtxLlmRuntimePort;
|
||||
memoryModel?: string;
|
||||
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
||||
queryExecutor?: KtxSqlQueryExecutorPort;
|
||||
|
|
@ -221,7 +220,7 @@ export async function runLocalIngest(options: RunLocalIngestOptions): Promise<Lo
|
|||
jobId,
|
||||
memoryFlow: options.memoryFlow,
|
||||
agentRunner: options.agentRunner,
|
||||
llmProvider: options.llmProvider,
|
||||
llmRuntime: options.llmRuntime,
|
||||
memoryModel: options.memoryModel,
|
||||
semanticLayerCompute: options.semanticLayerCompute,
|
||||
queryExecutor: options.queryExecutor,
|
||||
|
|
@ -406,7 +405,7 @@ export async function runLocalMetabaseIngest(
|
|||
jobId: childJobId,
|
||||
memoryFlow: options.memoryFlow,
|
||||
agentRunner: options.agentRunner,
|
||||
llmProvider: options.llmProvider,
|
||||
llmRuntime: options.llmRuntime,
|
||||
memoryModel: options.memoryModel,
|
||||
semanticLayerCompute: options.semanticLayerCompute,
|
||||
queryExecutor: options.queryExecutor,
|
||||
|
|
|
|||
|
|
@ -1,24 +1,20 @@
|
|||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { AgentRunnerService } from '../agent/index.js';
|
||||
import type { AgentRunnerPort, RunLoopParams } from '../llm/index.js';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { initKtxProject, type KtxLocalProject } from '../project/index.js';
|
||||
import { LocalMetabaseDiscoveryCache } from './adapters/metabase/local-source-state-store.js';
|
||||
import { getLocalIngestStatus, runLocalMetabaseIngest } from './local-ingest.js';
|
||||
import type { ChunkResult, FetchContext, SourceAdapter } from './types.js';
|
||||
|
||||
class TestAgentRunner extends AgentRunnerService {
|
||||
override runLoop = vi.fn(async (params: Parameters<AgentRunnerService['runLoop']>[0]) => {
|
||||
class TestAgentRunner implements AgentRunnerPort {
|
||||
runLoop = vi.fn(async (params: RunLoopParams) => {
|
||||
if (params.userPrompt.includes('metabase-db-2')) {
|
||||
return { stopReason: 'error' as const, error: new Error('database 2 failed') };
|
||||
}
|
||||
return { stopReason: 'natural' as const };
|
||||
});
|
||||
|
||||
constructor() {
|
||||
super({ llmProvider: { getModel: () => ({}) as never } as never });
|
||||
}
|
||||
}
|
||||
|
||||
class FakeMetabaseSourceAdapter implements SourceAdapter {
|
||||
|
|
|
|||
|
|
@ -21,7 +21,11 @@ describe('PageTriageService', () => {
|
|||
};
|
||||
let promptService: { loadPrompt: ReturnType<typeof vi.fn<(name: string) => Promise<string>>> };
|
||||
let adapter: { triageSupported: true; getTriageSignals: ReturnType<typeof vi.fn> };
|
||||
let generateTextMock: ReturnType<typeof vi.fn>;
|
||||
let llmRuntime: {
|
||||
generateText: ReturnType<typeof vi.fn>;
|
||||
generateObject: ReturnType<typeof vi.fn>;
|
||||
runAgentLoop: ReturnType<typeof vi.fn>;
|
||||
};
|
||||
|
||||
beforeEach(async () => {
|
||||
stagedDir = await mkdtemp(join(tmpdir(), 'page-triage-'));
|
||||
|
|
@ -88,31 +92,16 @@ describe('PageTriageService', () => {
|
|||
.fn<(name: string) => Promise<string>>()
|
||||
.mockImplementation((name) => Promise.resolve(`prompt:${name}`)),
|
||||
};
|
||||
generateTextMock = vi.fn();
|
||||
llmRuntime = {
|
||||
generateText: vi.fn(),
|
||||
generateObject: vi.fn(),
|
||||
runAgentLoop: vi.fn(),
|
||||
};
|
||||
service = new PageTriageService({
|
||||
store: repository as any,
|
||||
llmProvider: {
|
||||
getModel: vi.fn().mockReturnValue('model'),
|
||||
getModelByName: vi.fn(),
|
||||
cacheMarker: vi.fn(),
|
||||
repairToolCallHandler: vi.fn(),
|
||||
thinkingProviderOptions: vi.fn(),
|
||||
telemetryConfig: vi.fn(),
|
||||
promptCachingConfig: vi.fn(() => ({
|
||||
enabled: false,
|
||||
systemTtl: '1h',
|
||||
toolsTtl: '1h',
|
||||
historyTtl: '5m',
|
||||
cacheSystem: true,
|
||||
cacheTools: true,
|
||||
cacheHistory: true,
|
||||
vertexFallbackTo5m: false,
|
||||
})),
|
||||
activeBackend: vi.fn(() => 'anthropic'),
|
||||
} as any,
|
||||
llmRuntime: llmRuntime as any,
|
||||
settings: triageSettings,
|
||||
promptService: promptService as any,
|
||||
generateText: generateTextMock as any,
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -121,10 +110,10 @@ describe('PageTriageService', () => {
|
|||
});
|
||||
|
||||
it('writes light-lane candidates and keeps the page out of full WorkUnits', async () => {
|
||||
generateTextMock
|
||||
.mockResolvedValueOnce({ text: JSON.stringify({ lane: 'light', reason: 'short durable policy' }) } as any)
|
||||
.mockResolvedValueOnce({
|
||||
text: JSON.stringify({
|
||||
llmRuntime.generateText
|
||||
.mockResolvedValueOnce(JSON.stringify({ lane: 'light', reason: 'short durable policy' }))
|
||||
.mockResolvedValueOnce(
|
||||
JSON.stringify({
|
||||
candidates: [
|
||||
{
|
||||
candidateKey: 'support-handoff-owner',
|
||||
|
|
@ -142,7 +131,7 @@ describe('PageTriageService', () => {
|
|||
},
|
||||
],
|
||||
}),
|
||||
} as any);
|
||||
);
|
||||
|
||||
const result = await service.triageRun({
|
||||
stagedDir,
|
||||
|
|
@ -171,6 +160,7 @@ describe('PageTriageService', () => {
|
|||
});
|
||||
expect(result.fullRawPaths.has('pages/page-1/page.md')).toBe(false);
|
||||
expect(adapter.getTriageSignals).toHaveBeenCalledWith(stagedDir, 'page-1');
|
||||
expect(llmRuntime.generateText).toHaveBeenCalledWith(expect.objectContaining({ role: 'triage' }));
|
||||
expect(repository.setDocumentTriageLane).toHaveBeenCalledWith('run-1', 'pages/page-1/page.md', 'light');
|
||||
expect(repository.insertCandidate).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
|
|
@ -225,23 +215,20 @@ describe('PageTriageService', () => {
|
|||
}
|
||||
return Promise.resolve(`prompt:${name}`);
|
||||
});
|
||||
generateTextMock
|
||||
llmRuntime.generateText
|
||||
.mockImplementationOnce((args: any) => {
|
||||
const systemMessage = args.system ?? args.messages.find((m: { role: string }) => m.role === 'system');
|
||||
const userMessage = args.messages.find((m: { role: string }) => m.role === 'user');
|
||||
const systemText =
|
||||
typeof systemMessage === 'string' ? systemMessage : (systemMessage.content as string);
|
||||
const userText = userMessage.content as string;
|
||||
const systemText = args.system as string;
|
||||
const userText = args.prompt as string;
|
||||
expect(systemText).toContain(
|
||||
'Reusable templates and scripts are durable knowledge regardless of subject matter.',
|
||||
);
|
||||
expect(systemText).toContain('Date-titled standups are still skip; named templates and scripts are not.');
|
||||
expect(userText).toContain('Cold Call Script');
|
||||
expect(userText).not.toContain('Reusable templates and scripts are durable knowledge');
|
||||
return { text: JSON.stringify({ lane: 'light', reason: 'reusable sales script' }) } as any;
|
||||
return JSON.stringify({ lane: 'light', reason: 'reusable sales script' });
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
text: JSON.stringify({
|
||||
.mockResolvedValueOnce(
|
||||
JSON.stringify({
|
||||
candidates: [
|
||||
{
|
||||
candidateKey: 'cold-call-script',
|
||||
|
|
@ -259,7 +246,7 @@ describe('PageTriageService', () => {
|
|||
},
|
||||
],
|
||||
}),
|
||||
} as any);
|
||||
);
|
||||
|
||||
const result = await service.triageRun({
|
||||
stagedDir,
|
||||
|
|
@ -312,9 +299,7 @@ describe('PageTriageService', () => {
|
|||
'utf-8',
|
||||
);
|
||||
|
||||
generateTextMock.mockResolvedValue({
|
||||
text: JSON.stringify({ lane: 'full', reason: 'durable policy page' }),
|
||||
} as any);
|
||||
llmRuntime.generateText.mockResolvedValue(JSON.stringify({ lane: 'full', reason: 'durable policy page' }));
|
||||
|
||||
const result = await service.triageRun({
|
||||
stagedDir,
|
||||
|
|
@ -351,7 +336,7 @@ describe('PageTriageService', () => {
|
|||
});
|
||||
|
||||
it('falls back to full when classifier output is malformed', async () => {
|
||||
generateTextMock.mockResolvedValueOnce({ text: 'not-json' } as any);
|
||||
llmRuntime.generateText.mockResolvedValueOnce('not-json');
|
||||
|
||||
const result = await service.triageRun({
|
||||
stagedDir,
|
||||
|
|
@ -370,8 +355,8 @@ describe('PageTriageService', () => {
|
|||
});
|
||||
|
||||
it('promotes a light page to full when light extraction fails', async () => {
|
||||
generateTextMock
|
||||
.mockResolvedValueOnce({ text: JSON.stringify({ lane: 'light', reason: 'short durable policy' }) } as any)
|
||||
llmRuntime.generateText
|
||||
.mockResolvedValueOnce(JSON.stringify({ lane: 'light', reason: 'short durable policy' }))
|
||||
.mockRejectedValueOnce(new Error('provider unavailable'));
|
||||
|
||||
const result = await service.triageRun({
|
||||
|
|
@ -405,7 +390,7 @@ describe('PageTriageService', () => {
|
|||
});
|
||||
|
||||
expect(result).toEqual({ enabled: false, report: undefined, fullRawPaths: new Set<string>(), warnings: [] });
|
||||
expect(generateTextMock).not.toHaveBeenCalled();
|
||||
expect(llmRuntime.generateText).not.toHaveBeenCalled();
|
||||
expect(repository.setDocumentTriageLane).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,11 +1,10 @@
|
|||
import { createHash } from 'node:crypto';
|
||||
import { readdir, readFile } from 'node:fs/promises';
|
||||
import { dirname, join, relative } from 'node:path';
|
||||
import { KtxMessageBuilder, splitKtxSystemMessages, type KtxLlmProvider } from '@ktx/llm';
|
||||
import { generateText, type ToolSet } from 'ai';
|
||||
import pLimit from 'p-limit';
|
||||
import { z } from 'zod';
|
||||
import { type KtxLogger, noopLogger } from '../../core/index.js';
|
||||
import type { KtxLlmRuntimePort } from '../../llm/index.js';
|
||||
import type { PromptService } from '../../prompts/index.js';
|
||||
import type { InsertContextCandidateInput } from '../context-candidates/index.js';
|
||||
import type { JsonValue } from '../ports.js';
|
||||
|
|
@ -100,20 +99,17 @@ export interface PageTriageSettings {
|
|||
|
||||
export interface PageTriageServiceDeps {
|
||||
store: PageTriageStorePort;
|
||||
llmProvider: KtxLlmProvider;
|
||||
llmRuntime: KtxLlmRuntimePort;
|
||||
settings: PageTriageSettings;
|
||||
promptService: PromptService;
|
||||
logger?: KtxLogger;
|
||||
generateText?: typeof generateText;
|
||||
}
|
||||
|
||||
export class PageTriageService {
|
||||
private readonly logger: KtxLogger;
|
||||
private readonly runGenerateText: typeof generateText;
|
||||
|
||||
constructor(private readonly deps: PageTriageServiceDeps) {
|
||||
this.logger = deps.logger ?? noopLogger;
|
||||
this.runGenerateText = deps.generateText ?? generateText;
|
||||
}
|
||||
|
||||
async triageRun(args: PageTriageRunArgs): Promise<PageTriageRunResult> {
|
||||
|
|
@ -339,22 +335,12 @@ export class PageTriageService {
|
|||
jobId: string;
|
||||
unitKey: string;
|
||||
}): Promise<string> {
|
||||
const model = this.deps.llmProvider.getModel('triage');
|
||||
const built = new KtxMessageBuilder(this.deps.llmProvider).wrapSimple({
|
||||
return this.deps.llmRuntime.generateText({
|
||||
role: 'triage',
|
||||
system: params.system,
|
||||
messages: [{ role: 'user', content: params.prompt }],
|
||||
tools: {},
|
||||
model,
|
||||
});
|
||||
const split = splitKtxSystemMessages(built.messages);
|
||||
const result = await this.runGenerateText({
|
||||
model,
|
||||
prompt: params.prompt,
|
||||
temperature: 0,
|
||||
...(split.system ? { system: split.system } : {}),
|
||||
messages: split.messages,
|
||||
tools: built.tools as ToolSet,
|
||||
});
|
||||
return result.text;
|
||||
}
|
||||
|
||||
private async buildClassifierSystem(): Promise<string> {
|
||||
|
|
|
|||
|
|
@ -1,8 +1,7 @@
|
|||
import type { ToolSet } from 'ai';
|
||||
import type { KtxModelRole } from '@ktx/llm';
|
||||
import type { AgentRunnerService } from '../agent/index.js';
|
||||
import type { KtxEmbeddingPort } from '../core/embedding.js';
|
||||
import type { GitService, KtxFileStorePort, KtxLogger, SessionOutcome } from '../core/index.js';
|
||||
import type { AgentRunnerPort, KtxLlmRuntimePort, KtxRuntimeToolSet } from '../llm/index.js';
|
||||
import type { CaptureSession, MemoryAction, MemoryKnowledgeSlRefsPort } from '../memory/index.js';
|
||||
import type { PromptService } from '../prompts/index.js';
|
||||
import type { SkillsRegistryService } from '../skills/index.js';
|
||||
|
|
@ -163,7 +162,7 @@ export interface IngestCommitMessagePort {
|
|||
}
|
||||
|
||||
export interface IngestToolsetLike {
|
||||
toAiSdkTools(context: ToolContext): ToolSet;
|
||||
toRuntimeTools(context: ToolContext): KtxRuntimeToolSet;
|
||||
}
|
||||
|
||||
export interface IngestToolsetFactoryPort {
|
||||
|
|
@ -315,7 +314,7 @@ export interface CuratorPaginationPort {
|
|||
items: ReconcileCandidateForPrompt[];
|
||||
runState: ReconcilePromptRunState;
|
||||
}) => string;
|
||||
buildToolSet: (passNumber: number) => ToolSet;
|
||||
buildToolSet: (passNumber: number) => KtxRuntimeToolSet;
|
||||
getReconciliationActions: () => MemoryAction[];
|
||||
onStepFinish?: (info: { passNumber: number; stepIndex: number; stepBudget: number }) => void;
|
||||
}): Promise<ReconciliationOutcome & { report: CuratorPaginationReport; warnings: string[] }>;
|
||||
|
|
@ -350,7 +349,8 @@ export interface IngestBundleRunnerDeps {
|
|||
registry: SourceAdapterRegistryPort;
|
||||
diffSetService: DiffSetComputerPort;
|
||||
sessionWorktreeService: IngestSessionWorktreePort;
|
||||
agentRunner: AgentRunnerService;
|
||||
agentRunner: AgentRunnerPort;
|
||||
llmRuntime?: KtxLlmRuntimePort;
|
||||
gitService: GitService;
|
||||
lockingService: IngestLockPort;
|
||||
storage: IngestStoragePort;
|
||||
|
|
|
|||
|
|
@ -141,26 +141,17 @@ describe('buildReconcileToolSet', () => {
|
|||
toolsetTools: { sl_write_source: { description: 'sl write', inputSchema: {} as any, execute: slWrite } as any },
|
||||
});
|
||||
|
||||
const correction = await toolSet.sl_write_source.execute?.(
|
||||
{ connectionId: 'warehouse', sourceName: 'accounts' },
|
||||
{ toolCallId: 't1' } as any,
|
||||
);
|
||||
const correction = await toolSet.sl_write_source.execute?.({ connectionId: 'warehouse', sourceName: 'accounts' });
|
||||
|
||||
expect(slWrite).not.toHaveBeenCalled();
|
||||
expect(correction).toMatchObject({ structured: { success: false, reason: 'verification_ledger_required' } });
|
||||
|
||||
await toolSet.record_verification_ledger.execute?.(
|
||||
{
|
||||
summary: 'Verified warehouse.accounts with entity_details.',
|
||||
verifiedIdentifiers: ['warehouse.accounts'],
|
||||
unverifiedIdentifiers: [],
|
||||
},
|
||||
{ toolCallId: 't2' } as any,
|
||||
);
|
||||
const written = await toolSet.sl_write_source.execute?.(
|
||||
{ connectionId: 'warehouse', sourceName: 'accounts' },
|
||||
{ toolCallId: 't3' } as any,
|
||||
);
|
||||
await toolSet.record_verification_ledger.execute?.({
|
||||
summary: 'Verified warehouse.accounts with entity_details.',
|
||||
verifiedIdentifiers: ['warehouse.accounts'],
|
||||
unverifiedIdentifiers: [],
|
||||
});
|
||||
const written = await toolSet.sl_write_source.execute?.({ connectionId: 'warehouse', sourceName: 'accounts' });
|
||||
|
||||
expect(slWrite).toHaveBeenCalledTimes(1);
|
||||
expect(written).toMatchObject({ structured: { success: true } });
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import type { Tool, ToolSet } from 'ai';
|
||||
import { buildCanonicalPinsPromptBlock, type CanonicalPin } from '../canonical-pins.js';
|
||||
import type { KtxRuntimeToolSet } from '../../llm/index.js';
|
||||
import {
|
||||
createVerificationLedgerState,
|
||||
VERIFICATION_LEDGER_PROMPT,
|
||||
|
|
@ -181,19 +181,19 @@ export function buildReconcileUserPrompt(
|
|||
}
|
||||
|
||||
export interface ReconcileToolSetInput {
|
||||
loadSkillTool: Record<string, Tool>;
|
||||
stageListTool: Record<string, Tool>;
|
||||
stageDiffTool: Record<string, Tool>;
|
||||
evictionListTool: Record<string, Tool>;
|
||||
emitConflictResolutionTool: Record<string, Tool>;
|
||||
emitEvictionDecisionTool: Record<string, Tool>;
|
||||
emitArtifactResolutionTool: Record<string, Tool>;
|
||||
emitUnmappedFallbackTool: Record<string, Tool>;
|
||||
readRawSpanTool: Record<string, Tool>;
|
||||
toolsetTools: ToolSet;
|
||||
loadSkillTool: KtxRuntimeToolSet;
|
||||
stageListTool: KtxRuntimeToolSet;
|
||||
stageDiffTool: KtxRuntimeToolSet;
|
||||
evictionListTool: KtxRuntimeToolSet;
|
||||
emitConflictResolutionTool: KtxRuntimeToolSet;
|
||||
emitEvictionDecisionTool: KtxRuntimeToolSet;
|
||||
emitArtifactResolutionTool: KtxRuntimeToolSet;
|
||||
emitUnmappedFallbackTool: KtxRuntimeToolSet;
|
||||
readRawSpanTool: KtxRuntimeToolSet;
|
||||
toolsetTools: KtxRuntimeToolSet;
|
||||
}
|
||||
|
||||
export function buildReconcileToolSet(input: ReconcileToolSetInput): ToolSet {
|
||||
export function buildReconcileToolSet(input: ReconcileToolSetInput): KtxRuntimeToolSet {
|
||||
const state = createVerificationLedgerState();
|
||||
return withVerificationLedger(
|
||||
{
|
||||
|
|
|
|||
|
|
@ -87,21 +87,18 @@ describe('buildWuToolSet', () => {
|
|||
toolsetTools: { wiki_write: { description: 'write', inputSchema: {} as any, execute: wikiWrite } as any },
|
||||
});
|
||||
|
||||
const correction = await toolSet.wiki_write.execute?.({ key: 'customer-rules' }, { toolCallId: 't1' } as any);
|
||||
const correction = await toolSet.wiki_write.execute?.({ key: 'customer-rules' });
|
||||
|
||||
expect(wikiWrite).not.toHaveBeenCalled();
|
||||
expect(correction).toMatchObject({ structured: { success: false, reason: 'verification_ledger_required' } });
|
||||
expect(String((correction as any).markdown)).toContain('record_verification_ledger');
|
||||
|
||||
await toolSet.record_verification_ledger.execute?.(
|
||||
{
|
||||
summary: 'No warehouse identifiers will be emitted in this wiki write.',
|
||||
verifiedIdentifiers: [],
|
||||
unverifiedIdentifiers: [],
|
||||
},
|
||||
{ toolCallId: 't2' } as any,
|
||||
);
|
||||
const written = await toolSet.wiki_write.execute?.({ key: 'customer-rules' }, { toolCallId: 't3' } as any);
|
||||
await toolSet.record_verification_ledger.execute?.({
|
||||
summary: 'No warehouse identifiers will be emitted in this wiki write.',
|
||||
verifiedIdentifiers: [],
|
||||
unverifiedIdentifiers: [],
|
||||
});
|
||||
const written = await toolSet.wiki_write.execute?.({ key: 'customer-rules' });
|
||||
|
||||
expect(wikiWrite).toHaveBeenCalledTimes(1);
|
||||
expect(written).toMatchObject({ structured: { success: true } });
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import type { Tool, ToolSet } from 'ai';
|
||||
import { buildCanonicalPinsPromptBlock, type CanonicalPin } from '../canonical-pins.js';
|
||||
import { createLookerQueryToSlTool } from '../adapters/looker/tools/looker-query-to-sl.tool.js';
|
||||
import { createRuntimeToolDescriptorFromAiTool, type KtxRuntimeToolSet } from '../../llm/index.js';
|
||||
import type { IngestProvenanceRow } from '../ports.js';
|
||||
import { createReadRawFileTool } from '../tools/read-raw-file.tool.js';
|
||||
import { createReadRawSpanTool } from '../tools/read-raw-span.tool.js';
|
||||
|
|
@ -88,12 +88,12 @@ export interface BuildWuToolSetInput {
|
|||
sourceKey?: string;
|
||||
stagedDir: string;
|
||||
wu: WorkUnit;
|
||||
loadSkillTool: Record<string, Tool>;
|
||||
emitUnmappedFallbackTool: Record<string, Tool>;
|
||||
toolsetTools: ToolSet;
|
||||
loadSkillTool: KtxRuntimeToolSet;
|
||||
emitUnmappedFallbackTool: KtxRuntimeToolSet;
|
||||
toolsetTools: KtxRuntimeToolSet;
|
||||
}
|
||||
|
||||
function withoutWriteSlTools(toolset: ToolSet, wu: WorkUnit): ToolSet {
|
||||
function withoutWriteSlTools(toolset: KtxRuntimeToolSet, wu: WorkUnit): KtxRuntimeToolSet {
|
||||
if (!wu.slDisallowed) {
|
||||
return toolset;
|
||||
}
|
||||
|
|
@ -103,9 +103,12 @@ function withoutWriteSlTools(toolset: ToolSet, wu: WorkUnit): ToolSet {
|
|||
return next;
|
||||
}
|
||||
|
||||
export function buildWuToolSet(input: BuildWuToolSetInput): ToolSet {
|
||||
export function buildWuToolSet(input: BuildWuToolSetInput): KtxRuntimeToolSet {
|
||||
const allowedPaths = new Set<string>([...input.wu.rawFiles, ...input.wu.dependencyPaths]);
|
||||
const lookerTools: ToolSet = input.sourceKey === 'looker' ? { looker_query_to_sl: createLookerQueryToSlTool() } : {};
|
||||
const lookerTools: KtxRuntimeToolSet =
|
||||
input.sourceKey === 'looker'
|
||||
? { looker_query_to_sl: createRuntimeToolDescriptorFromAiTool('looker_query_to_sl', createLookerQueryToSlTool()) }
|
||||
: {};
|
||||
const state = createVerificationLedgerState();
|
||||
return withVerificationLedger(
|
||||
withoutWriteSlTools(
|
||||
|
|
@ -114,8 +117,14 @@ export function buildWuToolSet(input: BuildWuToolSetInput): ToolSet {
|
|||
...lookerTools,
|
||||
...input.loadSkillTool,
|
||||
...input.emitUnmappedFallbackTool,
|
||||
read_raw_file: createReadRawFileTool({ stagedDir: input.stagedDir, allowedPaths }),
|
||||
read_raw_span: createReadRawSpanTool({ stagedDir: input.stagedDir, allowedPaths }),
|
||||
read_raw_file: createRuntimeToolDescriptorFromAiTool(
|
||||
'read_raw_file',
|
||||
createReadRawFileTool({ stagedDir: input.stagedDir, allowedPaths }),
|
||||
),
|
||||
read_raw_span: createRuntimeToolDescriptorFromAiTool(
|
||||
'read_raw_span',
|
||||
createReadRawSpanTool({ stagedDir: input.stagedDir, allowedPaths }),
|
||||
),
|
||||
},
|
||||
input.wu,
|
||||
),
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
import type { AgentRunnerService } from '@ktx/context/agent';
|
||||
import type { KtxModelRole } from '@ktx/llm';
|
||||
import type { Tool } from 'ai';
|
||||
import type { AgentRunnerPort, KtxRuntimeToolSet } from '@ktx/context';
|
||||
import type { CaptureSession, MemoryAction } from '../../memory/index.js';
|
||||
import { listTouchedSlSources, type TouchedSlSource } from '../../tools/index.js';
|
||||
import type { WorkUnit } from '../types.js';
|
||||
|
|
@ -14,12 +13,12 @@ export interface TouchedValidationResult {
|
|||
|
||||
export interface WorkUnitExecutionDeps {
|
||||
sessionWorktreeGit: { revParseHead(): Promise<string | null> };
|
||||
agentRunner: AgentRunnerService;
|
||||
agentRunner: AgentRunnerPort;
|
||||
validateTouchedSources: (touched: TouchedSlSource[]) => Promise<TouchedValidationResult>;
|
||||
resetHardTo: (targetSha: string) => Promise<void>;
|
||||
buildSystemPrompt: (wu: WorkUnit) => string;
|
||||
buildUserPrompt: (wu: WorkUnit) => string;
|
||||
buildToolSet: (wu: WorkUnit) => Record<string, Tool>;
|
||||
buildToolSet: (wu: WorkUnit) => KtxRuntimeToolSet;
|
||||
captureSession: CaptureSession;
|
||||
sessionActions: MemoryAction[];
|
||||
modelRole: KtxModelRole;
|
||||
|
|
|
|||
|
|
@ -1,16 +1,15 @@
|
|||
import type { AgentRunnerService } from '@ktx/context/agent';
|
||||
import type { AgentRunnerPort, KtxRuntimeToolSet } from '@ktx/context';
|
||||
import type { KtxModelRole } from '@ktx/llm';
|
||||
import type { ToolSet } from 'ai';
|
||||
import type { EvictionUnit } from '../types.js';
|
||||
import type { StageIndex } from './stage-index.types.js';
|
||||
|
||||
export interface ReconciliationContext {
|
||||
stageIndex: StageIndex;
|
||||
evictionUnit: EvictionUnit | undefined;
|
||||
agentRunner: AgentRunnerService;
|
||||
agentRunner: AgentRunnerPort;
|
||||
buildSystemPrompt: (idx: StageIndex, ev: EvictionUnit | undefined) => string;
|
||||
buildUserPrompt: (idx: StageIndex, ev: EvictionUnit | undefined) => string;
|
||||
buildToolSet: () => ToolSet;
|
||||
buildToolSet: () => KtxRuntimeToolSet;
|
||||
modelRole: KtxModelRole;
|
||||
stepBudget: number;
|
||||
sourceKey: string;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import { appendFile, mkdir } from 'node:fs/promises';
|
||||
import { dirname } from 'node:path';
|
||||
import type { ToolExecuteFunction, ToolExecutionOptions, ToolSet } from 'ai';
|
||||
import type { KtxRuntimeToolSet } from '../../llm/index.js';
|
||||
|
||||
export interface ToolCallLogEntry {
|
||||
ts: string;
|
||||
|
|
@ -31,7 +31,7 @@ interface ToolCallLoggerOptions {
|
|||
* sequential (`generateText` awaits each tool result), so per-WU files are
|
||||
* effectively single-writer and lines land in call order.
|
||||
*/
|
||||
export function wrapToolsWithLogger<T extends ToolSet>(
|
||||
export function wrapToolsWithLogger<T extends KtxRuntimeToolSet>(
|
||||
tools: T,
|
||||
logFilePath: string,
|
||||
wuKey: string,
|
||||
|
|
@ -44,17 +44,13 @@ export function wrapToolsWithLogger<T extends ToolSet>(
|
|||
wrapped[name] = original;
|
||||
continue;
|
||||
}
|
||||
const wrappedExecute: ToolExecuteFunction<unknown, unknown> = async (
|
||||
input: unknown,
|
||||
opts: ToolExecutionOptions,
|
||||
) => {
|
||||
const wrappedExecute = async (input: unknown) => {
|
||||
const start = Date.now();
|
||||
try {
|
||||
const output = await (originalExecute as ToolExecuteFunction<unknown, unknown>)(input, opts);
|
||||
const output = await originalExecute(input);
|
||||
const entry: ToolCallLogEntry = {
|
||||
ts: new Date().toISOString(),
|
||||
wuKey,
|
||||
toolCallId: opts.toolCallId,
|
||||
toolName: name,
|
||||
durationMs: Date.now() - start,
|
||||
input,
|
||||
|
|
@ -67,7 +63,6 @@ export function wrapToolsWithLogger<T extends ToolSet>(
|
|||
const entry: ToolCallLogEntry = {
|
||||
ts: new Date().toISOString(),
|
||||
wuKey,
|
||||
toolCallId: opts.toolCallId,
|
||||
toolName: name,
|
||||
durationMs: Date.now() - start,
|
||||
input,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { tool, type ToolExecuteFunction, type ToolExecutionOptions, type ToolSet } from 'ai';
|
||||
import { z } from 'zod';
|
||||
import type { KtxRuntimeToolDescriptor, KtxRuntimeToolSet } from '../../llm/index.js';
|
||||
|
||||
const verificationLedgerInputSchema = z.object({
|
||||
summary: z.string().min(1).max(2000),
|
||||
|
|
@ -37,22 +37,19 @@ export function createVerificationLedgerState(): VerificationLedgerState {
|
|||
return { entries: [] };
|
||||
}
|
||||
|
||||
export function withVerificationLedger(tools: ToolSet, state: VerificationLedgerState): ToolSet {
|
||||
const wrapped: ToolSet = {};
|
||||
export function withVerificationLedger(tools: KtxRuntimeToolSet, state: VerificationLedgerState): KtxRuntimeToolSet {
|
||||
const wrapped: KtxRuntimeToolSet = {};
|
||||
for (const [name, original] of Object.entries(tools)) {
|
||||
if (!WRITE_TOOL_NAMES.has(name) || typeof original.execute !== 'function') {
|
||||
wrapped[name] = original;
|
||||
continue;
|
||||
}
|
||||
const originalExecute = original.execute;
|
||||
const guardedExecute: ToolExecuteFunction<unknown, unknown> = async (
|
||||
input: unknown,
|
||||
opts: ToolExecutionOptions,
|
||||
) => {
|
||||
const guardedExecute = async (input: unknown) => {
|
||||
if (state.entries.length === 0) {
|
||||
return verificationRequiredOutput(name);
|
||||
}
|
||||
return (originalExecute as ToolExecuteFunction<unknown, unknown>)(input, opts);
|
||||
return originalExecute(input);
|
||||
};
|
||||
wrapped[name] = { ...original, execute: guardedExecute };
|
||||
}
|
||||
|
|
@ -60,8 +57,9 @@ export function withVerificationLedger(tools: ToolSet, state: VerificationLedger
|
|||
return wrapped;
|
||||
}
|
||||
|
||||
function createRecordVerificationLedgerTool(state: VerificationLedgerState) {
|
||||
return tool({
|
||||
function createRecordVerificationLedgerTool(state: VerificationLedgerState): KtxRuntimeToolDescriptor {
|
||||
return {
|
||||
name: 'record_verification_ledger',
|
||||
description:
|
||||
'Record the pre-write verification ledger required by loaded ingest skills. Call this before wiki/SL/fallback writes to state what was verified, which tool calls support it, and what remains intentionally unverified.',
|
||||
inputSchema: verificationLedgerInputSchema,
|
||||
|
|
@ -78,7 +76,7 @@ function createRecordVerificationLedgerTool(state: VerificationLedgerState) {
|
|||
structured: { success: true, entry },
|
||||
};
|
||||
},
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
function verificationRequiredOutput(toolName: string) {
|
||||
|
|
|
|||
164
packages/context/src/llm/ai-sdk-runtime.ts
Normal file
164
packages/context/src/llm/ai-sdk-runtime.ts
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
import { KtxMessageBuilder, splitKtxSystemMessages, type KtxLlmProvider } from '@ktx/llm';
|
||||
import { generateText, Output, stepCountIs, type FlexibleSchema, type TelemetrySettings, type ToolSet } from 'ai';
|
||||
import type { z } from 'zod';
|
||||
import { noopLogger, type KtxLogger } from '../core/index.js';
|
||||
import { summarizeKtxLlmDebugRequest, type KtxLlmDebugRequestRecorder } from './debug-request-recorder.js';
|
||||
import { createAiSdkToolSet } from './runtime-tools.js';
|
||||
import type {
|
||||
KtxGenerateObjectInput,
|
||||
KtxGenerateTextInput,
|
||||
KtxLlmRuntimePort,
|
||||
RunLoopParams,
|
||||
RunLoopResult,
|
||||
} from './runtime-port.js';
|
||||
|
||||
export interface AgentTelemetryPort {
|
||||
createTelemetry(tags: Record<string, string>): TelemetrySettings;
|
||||
}
|
||||
|
||||
export interface AiSdkKtxLlmRuntimeDeps {
|
||||
llmProvider: KtxLlmProvider;
|
||||
telemetry?: AgentTelemetryPort;
|
||||
logger?: KtxLogger;
|
||||
debugRequestRecorder?: KtxLlmDebugRequestRecorder;
|
||||
}
|
||||
|
||||
function hasTools(tools: Record<string, unknown>): boolean {
|
||||
return Object.keys(tools).length > 0;
|
||||
}
|
||||
|
||||
export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort {
|
||||
private readonly logger: KtxLogger;
|
||||
|
||||
constructor(private readonly deps: AiSdkKtxLlmRuntimeDeps) {
|
||||
this.logger = deps.logger ?? noopLogger;
|
||||
}
|
||||
|
||||
async generateText(input: KtxGenerateTextInput): Promise<string> {
|
||||
const model = this.deps.llmProvider.getModel(input.role);
|
||||
if ((model as { provider?: string }).provider === 'deterministic') {
|
||||
return `Deterministic description for ${input.prompt.slice(0, 64).trim() || 'data source'}`;
|
||||
}
|
||||
const tools = createAiSdkToolSet(input.tools ?? {});
|
||||
const built = new KtxMessageBuilder(this.deps.llmProvider).wrapSimple({
|
||||
system: input.system,
|
||||
messages: [{ role: 'user', content: input.prompt }],
|
||||
tools,
|
||||
model,
|
||||
});
|
||||
const split = splitKtxSystemMessages(built.messages);
|
||||
const result = await generateText({
|
||||
model,
|
||||
temperature: input.temperature ?? 0,
|
||||
...(split.system ? { system: split.system } : {}),
|
||||
messages: split.messages,
|
||||
tools: built.tools as ToolSet,
|
||||
...(hasTools(tools)
|
||||
? {
|
||||
experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({
|
||||
source: `ktx-${input.role}`,
|
||||
}),
|
||||
}
|
||||
: {}),
|
||||
});
|
||||
if (typeof result.text !== 'string') {
|
||||
throw new Error('KTX LLM text generation returned no text');
|
||||
}
|
||||
return result.text;
|
||||
}
|
||||
|
||||
async generateObject<TOutput, TSchema extends z.ZodType<TOutput>>(
|
||||
input: KtxGenerateObjectInput<TOutput, TSchema>,
|
||||
): Promise<TOutput> {
|
||||
const model = this.deps.llmProvider.getModel(input.role);
|
||||
const tools = createAiSdkToolSet(input.tools ?? {});
|
||||
const built = new KtxMessageBuilder(this.deps.llmProvider).wrapSimple({
|
||||
system: input.system,
|
||||
messages: [{ role: 'user', content: input.prompt }],
|
||||
tools,
|
||||
model,
|
||||
});
|
||||
const split = splitKtxSystemMessages(built.messages);
|
||||
const result = await generateText({
|
||||
model,
|
||||
temperature: input.temperature ?? 0,
|
||||
...(split.system ? { system: split.system } : {}),
|
||||
messages: split.messages,
|
||||
tools: built.tools as ToolSet,
|
||||
...(hasTools(tools)
|
||||
? {
|
||||
experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({
|
||||
source: `ktx-${input.role}`,
|
||||
}),
|
||||
}
|
||||
: {}),
|
||||
output: Output.object({ schema: input.schema as unknown as FlexibleSchema<TOutput> }),
|
||||
});
|
||||
if (result.output == null) {
|
||||
throw new Error('KTX LLM object generation returned no output');
|
||||
}
|
||||
return result.output as TOutput;
|
||||
}
|
||||
|
||||
async runAgentLoop(params: RunLoopParams): Promise<RunLoopResult> {
|
||||
let stepIndex = 0;
|
||||
try {
|
||||
const model = this.deps.llmProvider.getModel(params.modelRole);
|
||||
const tools = createAiSdkToolSet(params.toolSet);
|
||||
const builder = new KtxMessageBuilder(this.deps.llmProvider);
|
||||
const built = builder.wrapSimple({
|
||||
system: params.systemPrompt,
|
||||
messages: [{ role: 'user', content: params.userPrompt }],
|
||||
tools,
|
||||
model,
|
||||
});
|
||||
const promptMessages = splitKtxSystemMessages(built.messages);
|
||||
|
||||
await this.deps.debugRequestRecorder?.record(
|
||||
summarizeKtxLlmDebugRequest({
|
||||
operationName: params.telemetryTags.operationName ?? 'ktx-agent-runner',
|
||||
source: params.telemetryTags.source,
|
||||
jobId: params.telemetryTags.jobId,
|
||||
unitKey: params.telemetryTags.unitKey,
|
||||
modelRole: params.modelRole,
|
||||
modelId: (model as { modelId?: string }).modelId ?? params.modelRole,
|
||||
messages: built.messages,
|
||||
tools: built.tools as Record<string, { providerOptions?: unknown }>,
|
||||
}),
|
||||
);
|
||||
|
||||
await generateText({
|
||||
model,
|
||||
temperature: 0,
|
||||
stopWhen: stepCountIs(params.stepBudget),
|
||||
experimental_telemetry: this.deps.telemetry?.createTelemetry(params.telemetryTags) ?? this.deps.llmProvider.telemetryConfig(),
|
||||
experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({
|
||||
source: params.telemetryTags.operationName ?? 'ktx-agent-runner',
|
||||
}),
|
||||
...(promptMessages.system ? { system: promptMessages.system } : {}),
|
||||
messages: promptMessages.messages,
|
||||
tools: built.tools as ToolSet,
|
||||
onStepFinish: async () => {
|
||||
stepIndex += 1;
|
||||
if (!params.onStepFinish) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await params.onStepFinish({ stepIndex, stepBudget: params.stepBudget });
|
||||
} catch (err) {
|
||||
this.logger.warn(
|
||||
`[agent-runner] onStepFinish callback threw; ignoring: ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
return { stopReason: 'natural' };
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error(String(error));
|
||||
this.logger.warn(`[agent-runner] loop failed: ${err.message}`);
|
||||
return { stopReason: 'error', error: err };
|
||||
}
|
||||
}
|
||||
}
|
||||
19
packages/context/src/llm/claude-code-env.test.ts
Normal file
19
packages/context/src/llm/claude-code-env.test.ts
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { CLAUDE_CODE_PROVIDER_ENV_DENYLIST, createKtxClaudeCodeEnv } from './claude-code-env.js';
|
||||
|
||||
describe('createKtxClaudeCodeEnv', () => {
|
||||
it('strips provider-routing credentials from the Claude Code child environment', () => {
|
||||
const seeded = Object.fromEntries(CLAUDE_CODE_PROVIDER_ENV_DENYLIST.map((key) => [key, `${key}-value`]));
|
||||
const env = createKtxClaudeCodeEnv({
|
||||
...seeded,
|
||||
PATH: '/usr/bin',
|
||||
HOME: '/Users/test',
|
||||
});
|
||||
|
||||
for (const key of CLAUDE_CODE_PROVIDER_ENV_DENYLIST) {
|
||||
expect(env).not.toHaveProperty(key);
|
||||
}
|
||||
expect(env.PATH).toBe('/usr/bin');
|
||||
expect(env.HOME).toBe('/Users/test');
|
||||
});
|
||||
});
|
||||
23
packages/context/src/llm/claude-code-env.ts
Normal file
23
packages/context/src/llm/claude-code-env.ts
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
export const CLAUDE_CODE_PROVIDER_ENV_DENYLIST = [
|
||||
'ANTHROPIC_API_KEY',
|
||||
'ANTHROPIC_AUTH_TOKEN',
|
||||
'ANTHROPIC_BASE_URL',
|
||||
'ANTHROPIC_MODEL',
|
||||
'ANTHROPIC_VERTEX_PROJECT_ID',
|
||||
'CLOUD_ML_REGION',
|
||||
'GOOGLE_APPLICATION_CREDENTIALS',
|
||||
'GOOGLE_CLOUD_PROJECT',
|
||||
'AWS_ACCESS_KEY_ID',
|
||||
'AWS_SECRET_ACCESS_KEY',
|
||||
'AWS_SESSION_TOKEN',
|
||||
'AWS_REGION',
|
||||
'AWS_PROFILE',
|
||||
'CLAUDE_CODE_USE_BEDROCK',
|
||||
'CLAUDE_CODE_USE_VERTEX',
|
||||
] as const;
|
||||
|
||||
const DENYLIST = new Set<string>(CLAUDE_CODE_PROVIDER_ENV_DENYLIST);
|
||||
|
||||
export function createKtxClaudeCodeEnv(env: NodeJS.ProcessEnv = process.env): Record<string, string | undefined> {
|
||||
return Object.fromEntries(Object.entries(env).filter(([key]) => !DENYLIST.has(key)));
|
||||
}
|
||||
17
packages/context/src/llm/claude-code-models.test.ts
Normal file
17
packages/context/src/llm/claude-code-models.test.ts
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { resolveClaudeCodeModel } from './claude-code-models.js';
|
||||
|
||||
describe('resolveClaudeCodeModel', () => {
|
||||
it.each([
|
||||
['sonnet', 'claude-sonnet-4-6'],
|
||||
['opus', 'claude-opus-4-7'],
|
||||
['haiku', 'claude-haiku-4-5'],
|
||||
['claude-sonnet-4-6', 'claude-sonnet-4-6'],
|
||||
])('maps %s to %s', (input, expected) => {
|
||||
expect(resolveClaudeCodeModel(input)).toBe(expected);
|
||||
});
|
||||
|
||||
it('rejects unsupported aliases', () => {
|
||||
expect(() => resolveClaudeCodeModel('gpt-5')).toThrow('Unsupported Claude Code model');
|
||||
});
|
||||
});
|
||||
19
packages/context/src/llm/claude-code-models.ts
Normal file
19
packages/context/src/llm/claude-code-models.ts
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
const CLAUDE_CODE_MODEL_ALIASES: Record<string, string> = {
|
||||
sonnet: 'claude-sonnet-4-6',
|
||||
opus: 'claude-opus-4-7',
|
||||
haiku: 'claude-haiku-4-5',
|
||||
};
|
||||
|
||||
const FULL_MODEL_ID = /^claude-(sonnet|opus|haiku)-[0-9]+-[0-9]+$/;
|
||||
|
||||
export function resolveClaudeCodeModel(model: string): string {
|
||||
const normalized = model.trim();
|
||||
const alias = CLAUDE_CODE_MODEL_ALIASES[normalized];
|
||||
if (alias) {
|
||||
return alias;
|
||||
}
|
||||
if (FULL_MODEL_ID.test(normalized)) {
|
||||
return normalized;
|
||||
}
|
||||
throw new Error(`Unsupported Claude Code model "${model}". Use sonnet, opus, haiku, or a claude-* model id.`);
|
||||
}
|
||||
464
packages/context/src/llm/claude-code-runtime.test.ts
Normal file
464
packages/context/src/llm/claude-code-runtime.test.ts
Normal file
|
|
@ -0,0 +1,464 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
import type { SDKMessage } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { ClaudeCodeKtxLlmRuntime, mapClaudeCodeStopReason, runClaudeCodeAuthProbe } from './claude-code-runtime.js';
|
||||
|
||||
async function* stream(messages: SDKMessage[]): AsyncGenerator<SDKMessage, void> {
|
||||
for (const message of messages) {
|
||||
yield message;
|
||||
}
|
||||
}
|
||||
|
||||
function initMessage(overrides: Partial<Extract<SDKMessage, { type: 'system'; subtype: 'init' }>> = {}): Extract<
|
||||
SDKMessage,
|
||||
{ type: 'system'; subtype: 'init' }
|
||||
> {
|
||||
return {
|
||||
type: 'system',
|
||||
subtype: 'init',
|
||||
apiKeySource: 'none' as never, // pragma: allowlist secret
|
||||
claude_code_version: '0.3.142',
|
||||
cwd: '/tmp/project',
|
||||
tools: [],
|
||||
mcp_servers: [],
|
||||
model: 'claude-sonnet-4-6',
|
||||
permissionMode: 'dontAsk',
|
||||
slash_commands: [],
|
||||
output_style: 'default',
|
||||
skills: [],
|
||||
plugins: [],
|
||||
uuid: '00000000-0000-4000-8000-000000000001',
|
||||
session_id: 'session-id',
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function resultMessage(overrides: Partial<Extract<SDKMessage, { type: 'result' }>> = {}): Extract<
|
||||
SDKMessage,
|
||||
{ type: 'result' }
|
||||
> {
|
||||
return {
|
||||
type: 'result',
|
||||
subtype: 'success',
|
||||
duration_ms: 1,
|
||||
duration_api_ms: 1,
|
||||
is_error: false,
|
||||
num_turns: 1,
|
||||
result: 'ok',
|
||||
stop_reason: null,
|
||||
total_cost_usd: 0,
|
||||
usage: {} as never,
|
||||
modelUsage: {},
|
||||
permission_denials: [],
|
||||
errors: [],
|
||||
uuid: '00000000-0000-4000-8000-000000000002',
|
||||
session_id: 'session-id',
|
||||
...overrides,
|
||||
} as Extract<SDKMessage, { type: 'result' }>;
|
||||
}
|
||||
|
||||
describe('ClaudeCodeKtxLlmRuntime', () => {
|
||||
it('passes isolation options and scrubbed env to text generation', async () => {
|
||||
const query = vi.fn((_input: any) => stream([initMessage(), resultMessage({ result: 'hello' })]));
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query,
|
||||
env: { ANTHROPIC_API_KEY: 'sk-ant-test', PATH: '/usr/bin' }, // pragma: allowlist secret
|
||||
});
|
||||
|
||||
await expect(runtime.generateText({ role: 'default', prompt: 'say hello' })).resolves.toBe('hello');
|
||||
expect(query).toHaveBeenCalledWith({
|
||||
prompt: 'say hello',
|
||||
options: expect.objectContaining({
|
||||
cwd: '/tmp/project',
|
||||
model: 'claude-sonnet-4-6',
|
||||
maxTurns: 1,
|
||||
settingSources: [],
|
||||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
allowedTools: [],
|
||||
permissionMode: 'dontAsk',
|
||||
persistSession: false,
|
||||
env: expect.not.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test' }),
|
||||
}),
|
||||
});
|
||||
});
|
||||
|
||||
it('validates structured output with the caller schema', async () => {
|
||||
const schema = z.object({ answer: z.string() });
|
||||
const query = vi.fn((_input: any) => stream([initMessage(), resultMessage({ structured_output: { answer: 'yes' } })]));
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query,
|
||||
env: {},
|
||||
});
|
||||
|
||||
await expect(runtime.generateObject({ role: 'default', prompt: 'json', schema })).resolves.toEqual({ answer: 'yes' });
|
||||
expect(query.mock.calls[0][0].options.outputFormat).toMatchObject({
|
||||
type: 'json_schema',
|
||||
schema: expect.objectContaining({ type: 'object' }),
|
||||
});
|
||||
});
|
||||
|
||||
it('registers only exact KTX MCP tool ids and denies non-KTX tools', async () => {
|
||||
const query = vi.fn((_input: any) =>
|
||||
stream([
|
||||
initMessage({ tools: ['mcp__ktx__load_skill'], mcp_servers: [{ name: 'ktx', status: 'connected' }] }),
|
||||
{
|
||||
type: 'assistant',
|
||||
message: { role: 'assistant', content: [] },
|
||||
parent_tool_use_id: null,
|
||||
uuid: '00000000-0000-4000-8000-000000000003',
|
||||
session_id: 'session-id',
|
||||
} as unknown as SDKMessage,
|
||||
resultMessage({ subtype: 'error_max_turns', is_error: true }),
|
||||
]),
|
||||
);
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query,
|
||||
env: {},
|
||||
});
|
||||
const onStepFinish = vi.fn();
|
||||
|
||||
await runtime.runAgentLoop({
|
||||
modelRole: 'default',
|
||||
systemPrompt: 'system',
|
||||
userPrompt: 'user',
|
||||
toolSet: {
|
||||
load_skill: {
|
||||
name: 'load_skill',
|
||||
description: 'Load skill.',
|
||||
inputSchema: z.object({ name: z.string() }),
|
||||
execute: async () => ({ markdown: 'loaded' }),
|
||||
},
|
||||
},
|
||||
stepBudget: 1,
|
||||
telemetryTags: { operationName: 'test' },
|
||||
onStepFinish,
|
||||
});
|
||||
|
||||
const options = query.mock.calls[0][0].options;
|
||||
expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']);
|
||||
expect(await options.canUseTool('mcp__ktx__load_skill', {}, { signal: new AbortController().signal, toolUseID: '1' })).toEqual({
|
||||
behavior: 'allow',
|
||||
toolUseID: '1',
|
||||
});
|
||||
expect(await options.canUseTool('Bash', {}, { signal: new AbortController().signal, toolUseID: '2' })).toMatchObject({
|
||||
behavior: 'deny',
|
||||
toolUseID: '2',
|
||||
});
|
||||
expect(onStepFinish).toHaveBeenCalledWith({ stepIndex: 1, stepBudget: 1 });
|
||||
});
|
||||
|
||||
it('treats host-discovered commands skills and agents as non-fatal init metadata for text and auth probe', async () => {
|
||||
const hostDiscoveredInit = initMessage({
|
||||
slash_commands: ['/help', '/compact', '/clear', '/user-command'],
|
||||
skills: ['pdf', 'docx'],
|
||||
agents: ['claude', 'Explore', 'general-purpose'],
|
||||
});
|
||||
const textQuery = vi.fn((_input: any) => stream([hostDiscoveredInit, resultMessage({ result: 'hello' })]));
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query: textQuery,
|
||||
env: { ANTHROPIC_API_KEY: 'sk-ant-test', PATH: '/usr/bin' }, // pragma: allowlist secret
|
||||
});
|
||||
|
||||
await expect(runtime.generateText({ role: 'default', prompt: 'say hello' })).resolves.toBe('hello');
|
||||
const textOptions = textQuery.mock.calls[0][0].options;
|
||||
expect(textOptions).toMatchObject({
|
||||
settingSources: [],
|
||||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
allowedTools: [],
|
||||
permissionMode: 'dontAsk',
|
||||
persistSession: false,
|
||||
env: expect.not.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test' }),
|
||||
});
|
||||
expect(textOptions.disallowedTools).toEqual(expect.arrayContaining(['Agent', 'Task', 'Bash']));
|
||||
expect(await textOptions.canUseTool('Agent', {}, { signal: new AbortController().signal, toolUseID: 'agent' })).toMatchObject({
|
||||
behavior: 'deny',
|
||||
toolUseID: 'agent',
|
||||
});
|
||||
expect(await textOptions.canUseTool('Skill', {}, { signal: new AbortController().signal, toolUseID: 'skill' })).toMatchObject({
|
||||
behavior: 'deny',
|
||||
toolUseID: 'skill',
|
||||
});
|
||||
expect(
|
||||
await textOptions.canUseTool('SlashCommand', {}, { signal: new AbortController().signal, toolUseID: 'slash' }),
|
||||
).toMatchObject({
|
||||
behavior: 'deny',
|
||||
toolUseID: 'slash',
|
||||
});
|
||||
|
||||
const probeQuery = vi.fn((_input: any) => stream([hostDiscoveredInit, resultMessage({ result: 'ok' })]));
|
||||
await expect(
|
||||
runClaudeCodeAuthProbe({
|
||||
projectDir: '/tmp/project',
|
||||
model: 'sonnet',
|
||||
query: probeQuery,
|
||||
env: { ANTHROPIC_AUTH_TOKEN: 'token', HOME: '/Users/test' },
|
||||
}),
|
||||
).resolves.toEqual({ ok: true });
|
||||
expect(probeQuery.mock.calls[0][0].options).toMatchObject({
|
||||
settingSources: [],
|
||||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
allowedTools: [],
|
||||
permissionMode: 'dontAsk',
|
||||
persistSession: false,
|
||||
env: expect.objectContaining({ HOME: '/Users/test' }),
|
||||
});
|
||||
expect(probeQuery.mock.calls[0][0].options.env).not.toEqual(
|
||||
expect.objectContaining({ ANTHROPIC_AUTH_TOKEN: 'token' }),
|
||||
);
|
||||
});
|
||||
|
||||
it('allows host-discovered context during agent loops while requiring exact KTX MCP tools and servers', async () => {
|
||||
const query = vi.fn((_input: any) =>
|
||||
stream([
|
||||
initMessage({
|
||||
tools: ['mcp__ktx__load_skill'],
|
||||
mcp_servers: [{ name: 'ktx', status: 'connected' }],
|
||||
slash_commands: ['/help', '/compact', '/clear'],
|
||||
skills: ['memory-agent', 'doc-reader'],
|
||||
agents: ['claude', 'Plan', 'Explore'],
|
||||
}),
|
||||
{
|
||||
type: 'assistant',
|
||||
message: { role: 'assistant', content: [] },
|
||||
parent_tool_use_id: null,
|
||||
uuid: '00000000-0000-4000-8000-000000000006',
|
||||
session_id: 'session-id',
|
||||
} as unknown as SDKMessage,
|
||||
resultMessage({ subtype: 'error_max_turns', is_error: true }),
|
||||
]),
|
||||
);
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query,
|
||||
env: {},
|
||||
});
|
||||
|
||||
await expect(
|
||||
runtime.runAgentLoop({
|
||||
modelRole: 'default',
|
||||
systemPrompt: 'system',
|
||||
userPrompt: 'user',
|
||||
toolSet: {
|
||||
load_skill: {
|
||||
name: 'load_skill',
|
||||
description: 'Load skill.',
|
||||
inputSchema: z.object({ name: z.string() }),
|
||||
execute: async () => ({ markdown: 'loaded' }),
|
||||
},
|
||||
},
|
||||
stepBudget: 1,
|
||||
telemetryTags: { operationName: 'test' },
|
||||
}),
|
||||
).resolves.toEqual({ stopReason: 'budget' });
|
||||
|
||||
const options = query.mock.calls[0][0].options;
|
||||
expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']);
|
||||
expect(await options.canUseTool('mcp__ktx__load_skill', {}, { signal: new AbortController().signal, toolUseID: '1' })).toEqual({
|
||||
behavior: 'allow',
|
||||
toolUseID: '1',
|
||||
});
|
||||
expect(await options.canUseTool('Task', {}, { signal: new AbortController().signal, toolUseID: '2' })).toMatchObject({
|
||||
behavior: 'deny',
|
||||
toolUseID: '2',
|
||||
});
|
||||
expect(await options.canUseTool('Skill', {}, { signal: new AbortController().signal, toolUseID: '3' })).toMatchObject({
|
||||
behavior: 'deny',
|
||||
toolUseID: '3',
|
||||
});
|
||||
});
|
||||
|
||||
it('still rejects unexpected tools, missing KTX tools, plugins, and non-KTX MCP servers from init messages', async () => {
|
||||
const query = vi.fn((_input: any) =>
|
||||
stream([
|
||||
initMessage({
|
||||
tools: ['Bash'],
|
||||
mcp_servers: [{ name: 'filesystem', status: 'connected' }],
|
||||
plugins: [{ name: 'host-plugin', path: '/tmp/plugin' }],
|
||||
}),
|
||||
resultMessage({ result: 'hello' }),
|
||||
]),
|
||||
);
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query,
|
||||
env: {},
|
||||
});
|
||||
|
||||
await expect(
|
||||
runtime.generateText({
|
||||
role: 'default',
|
||||
prompt: 'say hello',
|
||||
tools: {
|
||||
load_skill: {
|
||||
name: 'load_skill',
|
||||
description: 'Load skill.',
|
||||
inputSchema: z.object({ name: z.string() }),
|
||||
execute: async () => ({ markdown: 'loaded' }),
|
||||
},
|
||||
},
|
||||
}),
|
||||
).rejects.toThrow(
|
||||
/Claude Code runtime isolation failed: .*tools=Bash.*missing_tools=mcp__ktx__load_skill.*mcp_servers=filesystem.*plugins=host-plugin/,
|
||||
);
|
||||
});
|
||||
|
||||
it('passes scrubbed env to object generation and agent loops', async () => {
|
||||
const schema = z.object({ answer: z.string() });
|
||||
const objectQuery = vi.fn((_input: any) =>
|
||||
stream([initMessage(), resultMessage({ structured_output: { answer: 'yes' } })]),
|
||||
);
|
||||
const objectRuntime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query: objectQuery,
|
||||
env: { ANTHROPIC_API_KEY: 'sk-ant-test', AWS_PROFILE: 'prod', PATH: '/usr/bin' }, // pragma: allowlist secret
|
||||
});
|
||||
|
||||
await expect(objectRuntime.generateObject({ role: 'default', prompt: 'json', schema })).resolves.toEqual({
|
||||
answer: 'yes',
|
||||
});
|
||||
expect(objectQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ PATH: '/usr/bin' }));
|
||||
expect(objectQuery.mock.calls[0][0].options.env).not.toEqual(
|
||||
expect.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test', AWS_PROFILE: 'prod' }), // pragma: allowlist secret
|
||||
);
|
||||
|
||||
const agentQuery = vi.fn((_input: any) =>
|
||||
stream([
|
||||
initMessage({ tools: ['mcp__ktx__load_skill'], mcp_servers: [{ name: 'ktx', status: 'connected' }] }),
|
||||
{
|
||||
type: 'assistant',
|
||||
message: { role: 'assistant', content: [] },
|
||||
parent_tool_use_id: null,
|
||||
uuid: '00000000-0000-4000-8000-000000000004',
|
||||
session_id: 'session-id',
|
||||
} as unknown as SDKMessage,
|
||||
resultMessage({ subtype: 'error_max_turns', is_error: true }),
|
||||
]),
|
||||
);
|
||||
const agentRuntime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query: agentQuery,
|
||||
env: { ANTHROPIC_AUTH_TOKEN: 'token', CLAUDE_CODE_USE_VERTEX: '1', HOME: '/Users/test' },
|
||||
});
|
||||
|
||||
await agentRuntime.runAgentLoop({
|
||||
modelRole: 'default',
|
||||
systemPrompt: 'system',
|
||||
userPrompt: 'user',
|
||||
toolSet: {
|
||||
load_skill: {
|
||||
name: 'load_skill',
|
||||
description: 'Load skill.',
|
||||
inputSchema: z.object({ name: z.string() }),
|
||||
execute: async () => ({ markdown: 'loaded' }),
|
||||
},
|
||||
},
|
||||
stepBudget: 1,
|
||||
telemetryTags: { operationName: 'test' },
|
||||
});
|
||||
expect(agentQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ HOME: '/Users/test' }));
|
||||
expect(agentQuery.mock.calls[0][0].options.env).not.toEqual(
|
||||
expect.objectContaining({ ANTHROPIC_AUTH_TOKEN: 'token', CLAUDE_CODE_USE_VERTEX: '1' }),
|
||||
);
|
||||
});
|
||||
|
||||
it('logs and ignores onStepFinish callback errors', async () => {
|
||||
const query = vi.fn((_input: any) =>
|
||||
stream([
|
||||
initMessage(),
|
||||
{
|
||||
type: 'assistant',
|
||||
message: { role: 'assistant', content: [] },
|
||||
parent_tool_use_id: null,
|
||||
uuid: '00000000-0000-4000-8000-000000000005',
|
||||
session_id: 'session-id',
|
||||
} as unknown as SDKMessage,
|
||||
resultMessage({ subtype: 'success', terminal_reason: 'completed' }),
|
||||
]),
|
||||
);
|
||||
const logger = {
|
||||
debug: vi.fn(),
|
||||
log: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
error: vi.fn(),
|
||||
};
|
||||
const runtime = new ClaudeCodeKtxLlmRuntime({
|
||||
projectDir: '/tmp/project',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
query,
|
||||
env: {},
|
||||
logger,
|
||||
});
|
||||
|
||||
await expect(
|
||||
runtime.runAgentLoop({
|
||||
modelRole: 'default',
|
||||
systemPrompt: 'system',
|
||||
userPrompt: 'user',
|
||||
toolSet: {},
|
||||
stepBudget: 1,
|
||||
telemetryTags: { operationName: 'test' },
|
||||
onStepFinish: async () => {
|
||||
throw new Error('callback exploded');
|
||||
},
|
||||
}),
|
||||
).resolves.toEqual({ stopReason: 'natural' });
|
||||
expect(logger.warn).toHaveBeenCalledWith(expect.stringContaining('callback exploded'));
|
||||
});
|
||||
|
||||
it('maps max-turn terminal reasons to budget', () => {
|
||||
expect(mapClaudeCodeStopReason(resultMessage({ subtype: 'error_max_turns' }))).toBe('budget');
|
||||
expect(mapClaudeCodeStopReason(resultMessage({ terminal_reason: 'max_turns' }))).toBe('budget');
|
||||
expect(mapClaudeCodeStopReason(resultMessage({ stop_reason: 'max_turns' }))).toBe('budget');
|
||||
expect(mapClaudeCodeStopReason(resultMessage({ subtype: 'success', terminal_reason: 'completed' }))).toBe('natural');
|
||||
expect(mapClaudeCodeStopReason(resultMessage({ subtype: 'error_during_execution' }))).toBe('error');
|
||||
});
|
||||
|
||||
it('auth probe uses isolation options and a scrubbed env', async () => {
|
||||
const query = vi.fn((_input: any) => stream([initMessage(), resultMessage({ result: 'ok' })]));
|
||||
|
||||
await expect(
|
||||
runClaudeCodeAuthProbe({ projectDir: '/tmp/project', model: 'sonnet', query, env: { ANTHROPIC_API_KEY: 'sk-ant-test' } }), // pragma: allowlist secret
|
||||
).resolves.toEqual({ ok: true });
|
||||
expect(query.mock.calls[0][0].options).toMatchObject({
|
||||
settingSources: [],
|
||||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
allowedTools: [],
|
||||
persistSession: false,
|
||||
env: expect.not.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test' }),
|
||||
});
|
||||
});
|
||||
|
||||
it('reports unsupported Claude Code models without framing them as auth failures', async () => {
|
||||
await expect(
|
||||
runClaudeCodeAuthProbe({
|
||||
projectDir: '/tmp/project',
|
||||
model: 'gpt-5',
|
||||
query: vi.fn(),
|
||||
env: {},
|
||||
}),
|
||||
).resolves.toEqual({
|
||||
ok: false,
|
||||
message: 'Unsupported Claude Code model "gpt-5". Use sonnet, opus, haiku, or a claude-* model id.',
|
||||
});
|
||||
});
|
||||
});
|
||||
327
packages/context/src/llm/claude-code-runtime.ts
Normal file
327
packages/context/src/llm/claude-code-runtime.ts
Normal file
|
|
@ -0,0 +1,327 @@
|
|||
import {
|
||||
createSdkMcpServer,
|
||||
query as defaultQuery,
|
||||
type Options,
|
||||
type SDKMessage,
|
||||
type SDKResultMessage,
|
||||
} from '@anthropic-ai/claude-agent-sdk';
|
||||
import { z } from 'zod';
|
||||
import { noopLogger, type KtxLogger } from '../core/index.js';
|
||||
import { createKtxClaudeCodeEnv } from './claude-code-env.js';
|
||||
import { resolveClaudeCodeModel } from './claude-code-models.js';
|
||||
import { createClaudeSdkTools, mcpToolIds } from './runtime-tools.js';
|
||||
import type {
|
||||
KtxGenerateObjectInput,
|
||||
KtxGenerateTextInput,
|
||||
KtxLlmRuntimePort,
|
||||
KtxRuntimeToolSet,
|
||||
RunLoopParams,
|
||||
RunLoopResult,
|
||||
RunLoopStopReason,
|
||||
} from './runtime-port.js';
|
||||
|
||||
type QueryFn = (params: Parameters<typeof defaultQuery>[0]) => AsyncIterable<SDKMessage>;
|
||||
|
||||
export interface ClaudeCodeKtxLlmRuntimeDeps {
|
||||
projectDir: string;
|
||||
modelSlots: { default: string } & Partial<Record<string, string>>;
|
||||
query?: QueryFn;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
logger?: KtxLogger;
|
||||
}
|
||||
|
||||
const BUILTIN_TOOLS = [
|
||||
'Agent',
|
||||
'Task',
|
||||
'AskUserQuestion',
|
||||
'Bash',
|
||||
'Read',
|
||||
'Edit',
|
||||
'Write',
|
||||
'Glob',
|
||||
'Grep',
|
||||
'WebFetch',
|
||||
'WebSearch',
|
||||
'TodoWrite',
|
||||
];
|
||||
|
||||
function isResult(message: SDKMessage): message is SDKResultMessage {
|
||||
return message.type === 'result';
|
||||
}
|
||||
|
||||
function resultError(result: SDKResultMessage): Error | undefined {
|
||||
if (result.subtype === 'success') {
|
||||
return undefined;
|
||||
}
|
||||
const details = result.errors.length > 0 ? `: ${result.errors.join('; ')}` : '';
|
||||
return new Error(`Claude Code query failed (${result.subtype})${details}`);
|
||||
}
|
||||
|
||||
export function mapClaudeCodeStopReason(result: SDKResultMessage): RunLoopStopReason {
|
||||
if (result.subtype === 'error_max_turns') {
|
||||
return 'budget';
|
||||
}
|
||||
if (result.terminal_reason === 'max_turns' || result.stop_reason === 'max_turns') {
|
||||
return 'budget';
|
||||
}
|
||||
if (result.subtype === 'success') {
|
||||
return result.terminal_reason && result.terminal_reason !== 'completed' ? 'error' : 'natural';
|
||||
}
|
||||
return 'error';
|
||||
}
|
||||
|
||||
function jsonSchema(schema: z.ZodType): Record<string, unknown> {
|
||||
return z.toJSONSchema(schema, { target: 'draft-7' }) as Record<string, unknown>;
|
||||
}
|
||||
|
||||
function modelForRole(modelSlots: ClaudeCodeKtxLlmRuntimeDeps['modelSlots'], role: string): string {
|
||||
return resolveClaudeCodeModel(modelSlots[role] ?? modelSlots.default);
|
||||
}
|
||||
|
||||
function assertInitIsolation(
|
||||
message: SDKMessage,
|
||||
allowedToolIds: Set<string>,
|
||||
expectedMcpServerNames: Set<string>,
|
||||
): void {
|
||||
if (message.type !== 'system' || message.subtype !== 'init') {
|
||||
return;
|
||||
}
|
||||
const activeToolIds = new Set(message.tools);
|
||||
const unexpectedTools = message.tools.filter((toolName) => !allowedToolIds.has(toolName));
|
||||
const missingTools = [...allowedToolIds].filter((toolName) => !activeToolIds.has(toolName));
|
||||
const activeMcpServerNames = message.mcp_servers.map((server) => server.name);
|
||||
const unexpectedMcpServers = activeMcpServerNames.filter((name) => !expectedMcpServerNames.has(name));
|
||||
const missingMcpServers = [...expectedMcpServerNames].filter((name) => !activeMcpServerNames.includes(name));
|
||||
const unexpectedPlugins = message.plugins.map((plugin) => plugin.name);
|
||||
if (
|
||||
unexpectedTools.length > 0 ||
|
||||
missingTools.length > 0 ||
|
||||
unexpectedMcpServers.length > 0 ||
|
||||
missingMcpServers.length > 0 ||
|
||||
unexpectedPlugins.length > 0
|
||||
) {
|
||||
throw new Error(
|
||||
`Claude Code runtime isolation failed: tools=${unexpectedTools.join(',') || '(none)'} missing_tools=${
|
||||
missingTools.join(',') || '(none)'
|
||||
} mcp_servers=${unexpectedMcpServers.join(',') || '(none)'} missing_mcp_servers=${
|
||||
missingMcpServers.join(',') || '(none)'
|
||||
} plugins=${unexpectedPlugins.join(',') || '(none)'} host_slash_commands=${
|
||||
message.slash_commands.length
|
||||
} host_skills=${message.skills.length} host_agents=${message.agents?.join(',') || '(none)'}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function expectedMcpServerNames(tools: KtxRuntimeToolSet | undefined): Set<string> {
|
||||
return tools && Object.keys(tools).length > 0 ? new Set(['ktx']) : new Set();
|
||||
}
|
||||
|
||||
function baseOptions(input: {
|
||||
projectDir: string;
|
||||
model: string;
|
||||
env: NodeJS.ProcessEnv | undefined;
|
||||
maxTurns: number;
|
||||
tools?: KtxRuntimeToolSet;
|
||||
}): Options {
|
||||
const toolIds = mcpToolIds(input.tools ?? {});
|
||||
const allowedToolIds = new Set(toolIds);
|
||||
return {
|
||||
cwd: input.projectDir,
|
||||
model: input.model,
|
||||
maxTurns: input.maxTurns,
|
||||
settingSources: [],
|
||||
skills: [],
|
||||
plugins: [],
|
||||
tools: [],
|
||||
allowedTools: toolIds,
|
||||
disallowedTools: BUILTIN_TOOLS,
|
||||
canUseTool: async (toolName, _toolInput, options) =>
|
||||
allowedToolIds.has(toolName)
|
||||
? { behavior: 'allow', toolUseID: options.toolUseID }
|
||||
: {
|
||||
behavior: 'deny',
|
||||
message: `KTX claude-code runtime only permits current KTX MCP tools; denied ${toolName}.`,
|
||||
toolUseID: options.toolUseID,
|
||||
},
|
||||
permissionMode: 'dontAsk',
|
||||
persistSession: false,
|
||||
env: createKtxClaudeCodeEnv(input.env),
|
||||
...(input.tools && Object.keys(input.tools).length > 0
|
||||
? { mcpServers: { ktx: createSdkMcpServer({ name: 'ktx', tools: createClaudeSdkTools(input.tools) }) } }
|
||||
: {}),
|
||||
};
|
||||
}
|
||||
|
||||
async function collectResult(params: {
|
||||
query: QueryFn;
|
||||
prompt: string;
|
||||
options: Options;
|
||||
allowedToolIds: Set<string>;
|
||||
expectedMcpServerNames: Set<string>;
|
||||
onAssistantTurn?: () => Promise<void>;
|
||||
}): Promise<SDKResultMessage> {
|
||||
let result: SDKResultMessage | undefined;
|
||||
for await (const message of params.query({ prompt: params.prompt, options: params.options })) {
|
||||
assertInitIsolation(message, params.allowedToolIds, params.expectedMcpServerNames);
|
||||
if (message.type === 'assistant' && message.parent_tool_use_id === null) {
|
||||
await params.onAssistantTurn?.();
|
||||
}
|
||||
if (isResult(message)) {
|
||||
result = message;
|
||||
}
|
||||
}
|
||||
if (!result) {
|
||||
throw new Error('Claude Code query returned no result message');
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
|
||||
private readonly runQuery: QueryFn;
|
||||
private readonly logger: KtxLogger;
|
||||
|
||||
constructor(private readonly deps: ClaudeCodeKtxLlmRuntimeDeps) {
|
||||
this.runQuery = deps.query ?? defaultQuery;
|
||||
this.logger = deps.logger ?? noopLogger;
|
||||
}
|
||||
|
||||
async generateText(input: KtxGenerateTextInput): Promise<string> {
|
||||
const options = baseOptions({
|
||||
projectDir: this.deps.projectDir,
|
||||
model: modelForRole(this.deps.modelSlots, input.role),
|
||||
env: this.deps.env,
|
||||
maxTurns: 1,
|
||||
tools: input.tools,
|
||||
});
|
||||
const result = await collectResult({
|
||||
query: this.runQuery,
|
||||
prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'),
|
||||
options,
|
||||
allowedToolIds: new Set(mcpToolIds(input.tools ?? {})),
|
||||
expectedMcpServerNames: expectedMcpServerNames(input.tools),
|
||||
});
|
||||
const error = resultError(result);
|
||||
if (error) {
|
||||
throw error;
|
||||
}
|
||||
if (result.subtype !== 'success') {
|
||||
throw new Error(`Claude Code query failed (${result.subtype})`);
|
||||
}
|
||||
return result.result;
|
||||
}
|
||||
|
||||
async generateObject<TOutput, TSchema extends z.ZodType<TOutput>>(
|
||||
input: KtxGenerateObjectInput<TOutput, TSchema>,
|
||||
): Promise<TOutput> {
|
||||
const options = {
|
||||
...baseOptions({
|
||||
projectDir: this.deps.projectDir,
|
||||
model: modelForRole(this.deps.modelSlots, input.role),
|
||||
env: this.deps.env,
|
||||
maxTurns: 1,
|
||||
tools: input.tools,
|
||||
}),
|
||||
outputFormat: { type: 'json_schema' as const, schema: jsonSchema(input.schema as z.ZodType) },
|
||||
};
|
||||
const result = await collectResult({
|
||||
query: this.runQuery,
|
||||
prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'),
|
||||
options,
|
||||
allowedToolIds: new Set(mcpToolIds(input.tools ?? {})),
|
||||
expectedMcpServerNames: expectedMcpServerNames(input.tools),
|
||||
});
|
||||
const error = resultError(result);
|
||||
if (error) {
|
||||
throw error;
|
||||
}
|
||||
if (result.subtype !== 'success') {
|
||||
throw new Error(`Claude Code query failed (${result.subtype})`);
|
||||
}
|
||||
return (input.schema as z.ZodType<TOutput>).parse(result.structured_output);
|
||||
}
|
||||
|
||||
async runAgentLoop(params: RunLoopParams): Promise<RunLoopResult> {
|
||||
let stepIndex = 0;
|
||||
try {
|
||||
const options = baseOptions({
|
||||
projectDir: this.deps.projectDir,
|
||||
model: modelForRole(this.deps.modelSlots, params.modelRole),
|
||||
env: this.deps.env,
|
||||
maxTurns: params.stepBudget,
|
||||
tools: params.toolSet,
|
||||
});
|
||||
const result = await collectResult({
|
||||
query: this.runQuery,
|
||||
prompt: params.userPrompt,
|
||||
options: { ...options, systemPrompt: params.systemPrompt },
|
||||
allowedToolIds: new Set(mcpToolIds(params.toolSet)),
|
||||
expectedMcpServerNames: expectedMcpServerNames(params.toolSet),
|
||||
onAssistantTurn: async () => {
|
||||
stepIndex += 1;
|
||||
if (!params.onStepFinish) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await params.onStepFinish({ stepIndex, stepBudget: params.stepBudget });
|
||||
} catch (error) {
|
||||
this.logger.warn(
|
||||
`[claude-code-runner] onStepFinish callback threw; ignoring: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
},
|
||||
});
|
||||
const stopReason = mapClaudeCodeStopReason(result);
|
||||
const error = resultError(result);
|
||||
return { stopReason, ...(stopReason === 'error' && error ? { error } : {}) };
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error(String(error));
|
||||
return { stopReason: 'error', error: err };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function runClaudeCodeAuthProbe(input: {
|
||||
projectDir: string;
|
||||
model: string;
|
||||
query?: QueryFn;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}): Promise<{ ok: true } | { ok: false; message: string }> {
|
||||
let model: string;
|
||||
try {
|
||||
model = resolveClaudeCodeModel(input.model);
|
||||
} catch (error) {
|
||||
return {
|
||||
ok: false,
|
||||
message: error instanceof Error ? error.message : String(error),
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const options = baseOptions({
|
||||
projectDir: input.projectDir,
|
||||
model,
|
||||
env: input.env,
|
||||
maxTurns: 1,
|
||||
});
|
||||
const result = await collectResult({
|
||||
query: input.query ?? defaultQuery,
|
||||
prompt: 'Reply with exactly: ok',
|
||||
options,
|
||||
allowedToolIds: new Set(),
|
||||
expectedMcpServerNames: new Set(),
|
||||
});
|
||||
const error = resultError(result);
|
||||
if (error) {
|
||||
throw error;
|
||||
}
|
||||
return { ok: true };
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return {
|
||||
ok: false,
|
||||
message: `Claude Code authentication is not usable. Authenticate Claude Code locally with the Claude Code CLI, then rerun setup or the command. ${message}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -1,85 +1,12 @@
|
|||
import { KtxMessageBuilder, splitKtxSystemMessages, type KtxLlmProvider, type KtxModelRole } from '@ktx/llm';
|
||||
import { generateText, Output, type FlexibleSchema, type ToolSet } from 'ai';
|
||||
import type { z } from 'zod';
|
||||
import type { KtxGenerateObjectInput, KtxGenerateTextInput, KtxLlmRuntimePort } from './runtime-port.js';
|
||||
|
||||
type GenerateTextInput = Parameters<typeof generateText>[0];
|
||||
type GenerateTextFn = (input: GenerateTextInput) => Promise<{ text?: string; output?: unknown }>;
|
||||
|
||||
function hasTools(tools: ToolSet): boolean {
|
||||
return Object.keys(tools).length > 0;
|
||||
export async function generateKtxText(input: KtxGenerateTextInput & { runtime: KtxLlmRuntimePort }): Promise<string> {
|
||||
return input.runtime.generateText(input);
|
||||
}
|
||||
|
||||
interface GenerateKtxTextInput {
|
||||
llmProvider: KtxLlmProvider;
|
||||
role: KtxModelRole;
|
||||
prompt: string;
|
||||
system?: string;
|
||||
tools?: ToolSet;
|
||||
temperature?: number;
|
||||
generateText?: GenerateTextFn;
|
||||
}
|
||||
|
||||
export async function generateKtxText(input: GenerateKtxTextInput): Promise<string> {
|
||||
const model = input.llmProvider.getModel(input.role);
|
||||
if ((model as { provider?: string }).provider === 'deterministic') {
|
||||
return `Deterministic description for ${input.prompt.slice(0, 64).trim() || 'data source'}`;
|
||||
}
|
||||
const built = new KtxMessageBuilder(input.llmProvider).wrapSimple({
|
||||
system: input.system,
|
||||
messages: [{ role: 'user', content: input.prompt }],
|
||||
tools: input.tools ?? {},
|
||||
model,
|
||||
});
|
||||
const split = splitKtxSystemMessages(built.messages);
|
||||
const result = await (input.generateText ?? generateText)({
|
||||
model,
|
||||
temperature: input.temperature ?? 0,
|
||||
...(split.system ? { system: split.system } : {}),
|
||||
messages: split.messages,
|
||||
tools: built.tools as ToolSet,
|
||||
...(hasTools(built.tools as ToolSet)
|
||||
? {
|
||||
experimental_repairToolCall: input.llmProvider.repairToolCallHandler({
|
||||
source: `ktx-${input.role}`,
|
||||
}),
|
||||
}
|
||||
: {}),
|
||||
});
|
||||
if (typeof result.text !== 'string') {
|
||||
throw new Error('KTX LLM text generation returned no text');
|
||||
}
|
||||
return result.text;
|
||||
}
|
||||
|
||||
export async function generateKtxObject<TOutput, TSchema>(
|
||||
input: GenerateKtxTextInput & { schema: TSchema },
|
||||
export async function generateKtxObject<TOutput, TSchema extends z.ZodType<TOutput>>(
|
||||
input: KtxGenerateObjectInput<TOutput, TSchema> & { runtime: KtxLlmRuntimePort },
|
||||
): Promise<TOutput> {
|
||||
const model = input.llmProvider.getModel(input.role);
|
||||
const built = new KtxMessageBuilder(input.llmProvider).wrapSimple({
|
||||
system: input.system,
|
||||
messages: [{ role: 'user', content: input.prompt }],
|
||||
tools: input.tools ?? {},
|
||||
model,
|
||||
});
|
||||
const split = splitKtxSystemMessages(built.messages);
|
||||
const result = await (input.generateText ?? generateText)({
|
||||
model,
|
||||
temperature: input.temperature ?? 0,
|
||||
...(split.system ? { system: split.system } : {}),
|
||||
messages: split.messages,
|
||||
tools: built.tools as ToolSet,
|
||||
...(hasTools(built.tools as ToolSet)
|
||||
? {
|
||||
experimental_repairToolCall: input.llmProvider.repairToolCallHandler({
|
||||
source: `ktx-${input.role}`,
|
||||
}),
|
||||
}
|
||||
: {}),
|
||||
output: Output.object({
|
||||
schema: input.schema as FlexibleSchema<TOutput>,
|
||||
}),
|
||||
});
|
||||
if (result.output == null) {
|
||||
throw new Error('KTX LLM object generation returned no output');
|
||||
}
|
||||
return result.output as TOutput;
|
||||
return input.runtime.generateObject(input);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,31 @@
|
|||
export { KtxIngestEmbeddingPortAdapter, KtxScanEmbeddingPortAdapter } from './embedding-port.js';
|
||||
export { AiSdkKtxLlmRuntime } from './ai-sdk-runtime.js';
|
||||
export type { AgentTelemetryPort, AiSdkKtxLlmRuntimeDeps } from './ai-sdk-runtime.js';
|
||||
export { createKtxClaudeCodeEnv, CLAUDE_CODE_PROVIDER_ENV_DENYLIST } from './claude-code-env.js';
|
||||
export { resolveClaudeCodeModel } from './claude-code-models.js';
|
||||
export { ClaudeCodeKtxLlmRuntime, mapClaudeCodeStopReason, runClaudeCodeAuthProbe } from './claude-code-runtime.js';
|
||||
export { generateKtxObject, generateKtxText } from './generation.js';
|
||||
export type {
|
||||
AgentRunnerPort,
|
||||
KtxGenerateObjectInput,
|
||||
KtxGenerateTextInput,
|
||||
KtxLlmRuntimePort,
|
||||
KtxRuntimeToolDescriptor,
|
||||
KtxRuntimeToolOutput,
|
||||
KtxRuntimeToolSet,
|
||||
RunLoopParams,
|
||||
RunLoopResult,
|
||||
RunLoopStepInfo,
|
||||
RunLoopStopReason,
|
||||
} from './runtime-port.js';
|
||||
export { RuntimeAgentRunner } from './runtime-port.js';
|
||||
export {
|
||||
createAiSdkToolSet,
|
||||
createClaudeSdkTools,
|
||||
createRuntimeToolDescriptorFromAiTool,
|
||||
createRuntimeToolSetFromAiSdkTools,
|
||||
normalizeKtxRuntimeToolOutput,
|
||||
} from './runtime-tools.js';
|
||||
export type {
|
||||
KtxLlmDebugProviderOptionsEntry,
|
||||
KtxLlmDebugRequest,
|
||||
|
|
@ -15,6 +41,7 @@ export {
|
|||
MANAGED_SENTENCE_TRANSFORMERS_BASE_URL_ENV,
|
||||
createLocalKtxEmbeddingProviderFromConfig,
|
||||
createLocalKtxLlmProviderFromConfig,
|
||||
createLocalKtxLlmRuntimeFromConfig,
|
||||
resolveLocalKtxEmbeddingConfig,
|
||||
resolveLocalKtxLlmConfig,
|
||||
} from './local-config.js';
|
||||
|
|
|
|||
|
|
@ -9,11 +9,17 @@ import {
|
|||
} from '@ktx/llm';
|
||||
import { resolveKtxConfigReference } from '../core/config-reference.js';
|
||||
import type { KtxProjectEmbeddingConfig, KtxProjectLlmConfig } from '../project/config.js';
|
||||
import { AiSdkKtxLlmRuntime } from './ai-sdk-runtime.js';
|
||||
import { ClaudeCodeKtxLlmRuntime } from './claude-code-runtime.js';
|
||||
import type { KtxLlmRuntimePort } from './runtime-port.js';
|
||||
|
||||
interface LocalConfigDeps {
|
||||
env?: NodeJS.ProcessEnv;
|
||||
projectDir?: string;
|
||||
createKtxLlmProvider?: typeof createKtxLlmProvider;
|
||||
createKtxEmbeddingProvider?: typeof createKtxEmbeddingProvider;
|
||||
createClaudeCodeRuntime?: (deps: ConstructorParameters<typeof ClaudeCodeKtxLlmRuntime>[0]) => KtxLlmRuntimePort;
|
||||
createAiSdkRuntime?: (deps: { llmProvider: KtxLlmProvider }) => KtxLlmRuntimePort;
|
||||
}
|
||||
|
||||
export const MANAGED_SENTENCE_TRANSFORMERS_BASE_URL = 'managed:local-embeddings';
|
||||
|
|
@ -106,7 +112,33 @@ export function createLocalKtxLlmProviderFromConfig(
|
|||
deps: LocalConfigDeps = {},
|
||||
): KtxLlmProvider | null {
|
||||
const resolved = resolveLocalKtxLlmConfig(config, deps.env ?? process.env);
|
||||
return resolved ? (deps.createKtxLlmProvider ?? createKtxLlmProvider)(resolved) : null;
|
||||
if (!resolved || resolved.backend === 'claude-code') {
|
||||
return null;
|
||||
}
|
||||
return (deps.createKtxLlmProvider ?? createKtxLlmProvider)(resolved);
|
||||
}
|
||||
|
||||
export function createLocalKtxLlmRuntimeFromConfig(
|
||||
config: KtxProjectLlmConfig,
|
||||
deps: LocalConfigDeps = {},
|
||||
): KtxLlmRuntimePort | null {
|
||||
const resolved = resolveLocalKtxLlmConfig(config, deps.env ?? process.env);
|
||||
if (!resolved) {
|
||||
return null;
|
||||
}
|
||||
if (resolved.backend === 'claude-code') {
|
||||
const projectDir = deps.projectDir;
|
||||
if (!projectDir) {
|
||||
throw new Error('projectDir is required when creating the claude-code LLM runtime');
|
||||
}
|
||||
return (deps.createClaudeCodeRuntime ?? ((runtimeDeps) => new ClaudeCodeKtxLlmRuntime(runtimeDeps)))({
|
||||
projectDir,
|
||||
modelSlots: resolved.modelSlots,
|
||||
env: deps.env,
|
||||
});
|
||||
}
|
||||
const llmProvider = (deps.createKtxLlmProvider ?? createKtxLlmProvider)(resolved);
|
||||
return (deps.createAiSdkRuntime ?? ((runtimeDeps) => new AiSdkKtxLlmRuntime(runtimeDeps)))({ llmProvider });
|
||||
}
|
||||
|
||||
function resolveSentenceTransformersBaseUrl(
|
||||
|
|
|
|||
25
packages/context/src/llm/runtime-local-config.test.ts
Normal file
25
packages/context/src/llm/runtime-local-config.test.ts
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { createLocalKtxLlmProviderFromConfig, createLocalKtxLlmRuntimeFromConfig } from './local-config.js';
|
||||
|
||||
describe('local KTX LLM runtime config', () => {
|
||||
it('creates a Claude Code runtime for claude-code backend without creating an AI SDK provider', () => {
|
||||
const runtime = createLocalKtxLlmRuntimeFromConfig(
|
||||
{
|
||||
provider: { backend: 'claude-code' },
|
||||
models: { default: 'sonnet', triage: 'haiku' },
|
||||
},
|
||||
{ env: {}, projectDir: '/tmp/project', createClaudeCodeRuntime: vi.fn((deps) => ({ deps }) as never) },
|
||||
);
|
||||
|
||||
expect(runtime).toMatchObject({ deps: expect.objectContaining({ projectDir: '/tmp/project' }) });
|
||||
});
|
||||
|
||||
it('returns null from the AI SDK provider factory for claude-code backend', () => {
|
||||
expect(
|
||||
createLocalKtxLlmProviderFromConfig({
|
||||
provider: { backend: 'claude-code' },
|
||||
models: { default: 'sonnet' },
|
||||
}),
|
||||
).toBeNull();
|
||||
});
|
||||
});
|
||||
75
packages/context/src/llm/runtime-port.ts
Normal file
75
packages/context/src/llm/runtime-port.ts
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
import type { KtxModelRole } from '@ktx/llm';
|
||||
import type { z } from 'zod';
|
||||
|
||||
export interface KtxRuntimeToolOutput<TOutput = unknown> {
|
||||
markdown: string;
|
||||
structured?: TOutput;
|
||||
}
|
||||
|
||||
export interface KtxRuntimeToolDescriptor<TInput = unknown, TOutput = unknown> {
|
||||
name: string;
|
||||
description: string;
|
||||
inputSchema: z.ZodObject<z.ZodRawShape>;
|
||||
execute(input: TInput): Promise<KtxRuntimeToolOutput<TOutput>>;
|
||||
}
|
||||
|
||||
export type KtxRuntimeToolSet = Record<string, KtxRuntimeToolDescriptor>;
|
||||
|
||||
export type RunLoopStopReason = 'budget' | 'natural' | 'error';
|
||||
|
||||
export interface RunLoopStepInfo {
|
||||
stepIndex: number;
|
||||
stepBudget: number;
|
||||
}
|
||||
|
||||
export interface RunLoopParams {
|
||||
modelRole: KtxModelRole;
|
||||
systemPrompt: string;
|
||||
userPrompt: string;
|
||||
toolSet: KtxRuntimeToolSet;
|
||||
stepBudget: number;
|
||||
telemetryTags: Record<string, string>;
|
||||
onStepFinish?: (info: RunLoopStepInfo) => void | Promise<void>;
|
||||
}
|
||||
|
||||
export interface RunLoopResult {
|
||||
stopReason: RunLoopStopReason;
|
||||
error?: Error;
|
||||
}
|
||||
|
||||
export interface KtxGenerateTextInput {
|
||||
role: KtxModelRole;
|
||||
prompt: string;
|
||||
system?: string;
|
||||
tools?: KtxRuntimeToolSet;
|
||||
temperature?: number;
|
||||
}
|
||||
|
||||
export interface KtxGenerateObjectInput<TOutput, TSchema extends z.ZodType<TOutput>> {
|
||||
role: KtxModelRole;
|
||||
prompt: string;
|
||||
system?: string;
|
||||
tools?: KtxRuntimeToolSet;
|
||||
temperature?: number;
|
||||
schema: TSchema;
|
||||
}
|
||||
|
||||
export interface KtxLlmRuntimePort {
|
||||
generateText(input: KtxGenerateTextInput): Promise<string>;
|
||||
generateObject<TOutput, TSchema extends z.ZodType<TOutput>>(
|
||||
input: KtxGenerateObjectInput<TOutput, TSchema>,
|
||||
): Promise<TOutput>;
|
||||
runAgentLoop(params: RunLoopParams): Promise<RunLoopResult>;
|
||||
}
|
||||
|
||||
export interface AgentRunnerPort {
|
||||
runLoop(params: RunLoopParams): Promise<RunLoopResult>;
|
||||
}
|
||||
|
||||
export class RuntimeAgentRunner implements AgentRunnerPort {
|
||||
constructor(private readonly runtime: KtxLlmRuntimePort) {}
|
||||
|
||||
runLoop(params: RunLoopParams): Promise<RunLoopResult> {
|
||||
return this.runtime.runAgentLoop(params);
|
||||
}
|
||||
}
|
||||
43
packages/context/src/llm/runtime-tools.test.ts
Normal file
43
packages/context/src/llm/runtime-tools.test.ts
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
import { createAiSdkToolSet, createClaudeSdkTools, normalizeKtxRuntimeToolOutput } from './runtime-tools.js';
|
||||
import type { KtxRuntimeToolDescriptor } from './runtime-port.js';
|
||||
|
||||
describe('runtime tool descriptors', () => {
|
||||
const descriptor: KtxRuntimeToolDescriptor<{ id: string }, { ok: boolean }> = {
|
||||
name: 'read_thing',
|
||||
description: 'Read one thing.',
|
||||
inputSchema: z.object({ id: z.string() }),
|
||||
execute: vi.fn(async (input) => ({
|
||||
markdown: `Read ${input.id}`,
|
||||
structured: { ok: true },
|
||||
})),
|
||||
};
|
||||
|
||||
it('normalizes string and object tool outputs into markdown plus optional structured payload', () => {
|
||||
expect(normalizeKtxRuntimeToolOutput('plain text')).toEqual({ markdown: 'plain text' });
|
||||
expect(normalizeKtxRuntimeToolOutput({ markdown: 'shown', structured: { id: 1 } })).toEqual({
|
||||
markdown: 'shown',
|
||||
structured: { id: 1 },
|
||||
});
|
||||
expect(normalizeKtxRuntimeToolOutput({ name: 'skill', content: 'body' })).toEqual({
|
||||
markdown: '```json\n{\n "name": "skill",\n "content": "body"\n}\n```',
|
||||
structured: { name: 'skill', content: 'body' },
|
||||
});
|
||||
});
|
||||
|
||||
it('builds AI SDK tools that expose markdown to the model', async () => {
|
||||
const tools = createAiSdkToolSet({ read_thing: descriptor });
|
||||
const output = await tools.read_thing.execute?.({ id: 'a' }, { toolCallId: 'call-1', messages: [] } as never);
|
||||
const modelOutput = tools.read_thing.toModelOutput?.({ output } as never);
|
||||
|
||||
expect(modelOutput).toEqual({ type: 'text', value: 'Read a' });
|
||||
});
|
||||
|
||||
it('builds Claude SDK tools that return text content only', async () => {
|
||||
const tools = createClaudeSdkTools({ read_thing: descriptor });
|
||||
const result = await tools[0].handler({ id: 'b' } as never, {});
|
||||
|
||||
expect(result).toEqual({ content: [{ type: 'text', text: 'Read b' }] });
|
||||
});
|
||||
});
|
||||
91
packages/context/src/llm/runtime-tools.ts
Normal file
91
packages/context/src/llm/runtime-tools.ts
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
import { tool as aiTool, type Tool, type ToolSet } from 'ai';
|
||||
import { tool as claudeTool, type SdkMcpToolDefinition } from '@anthropic-ai/claude-agent-sdk';
|
||||
import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
|
||||
import { z } from 'zod';
|
||||
import type { KtxRuntimeToolDescriptor, KtxRuntimeToolOutput, KtxRuntimeToolSet } from './runtime-port.js';
|
||||
|
||||
function isRuntimeOutput(value: unknown): value is KtxRuntimeToolOutput {
|
||||
return Boolean(
|
||||
value &&
|
||||
typeof value === 'object' &&
|
||||
'markdown' in value &&
|
||||
typeof (value as { markdown?: unknown }).markdown === 'string',
|
||||
);
|
||||
}
|
||||
|
||||
export function normalizeKtxRuntimeToolOutput(value: unknown): KtxRuntimeToolOutput {
|
||||
if (isRuntimeOutput(value)) {
|
||||
return 'structured' in value ? { markdown: value.markdown, structured: value.structured } : { markdown: value.markdown };
|
||||
}
|
||||
if (typeof value === 'string') {
|
||||
return { markdown: value };
|
||||
}
|
||||
return {
|
||||
markdown: `\`\`\`json\n${JSON.stringify(value, null, 2)}\n\`\`\``,
|
||||
structured: value,
|
||||
};
|
||||
}
|
||||
|
||||
function assertObjectSchema(name: string, schema: z.ZodType): asserts schema is z.ZodObject<z.ZodRawShape> {
|
||||
if (!(schema instanceof z.ZodObject)) {
|
||||
throw new Error(`KTX runtime tool "${name}" must use z.object input schema for claude-code`);
|
||||
}
|
||||
}
|
||||
|
||||
export function createAiSdkToolSet(tools: KtxRuntimeToolSet = {}): ToolSet {
|
||||
return Object.fromEntries(
|
||||
Object.entries(tools).map(([name, descriptor]) => [
|
||||
name,
|
||||
aiTool({
|
||||
description: descriptor.description,
|
||||
inputSchema: descriptor.inputSchema,
|
||||
execute: async (input) => descriptor.execute(input),
|
||||
toModelOutput: ({ output }) => {
|
||||
const normalized = normalizeKtxRuntimeToolOutput(output);
|
||||
return { type: 'text', value: normalized.markdown };
|
||||
},
|
||||
}),
|
||||
]),
|
||||
);
|
||||
}
|
||||
|
||||
export function createClaudeSdkTools(tools: KtxRuntimeToolSet = {}): Array<SdkMcpToolDefinition<z.ZodRawShape>> {
|
||||
return Object.values(tools).map((descriptor) => {
|
||||
assertObjectSchema(descriptor.name, descriptor.inputSchema);
|
||||
return claudeTool(
|
||||
descriptor.name,
|
||||
descriptor.description,
|
||||
descriptor.inputSchema.shape,
|
||||
async (input): Promise<CallToolResult> => {
|
||||
const normalized = normalizeKtxRuntimeToolOutput(await descriptor.execute(input));
|
||||
return { content: [{ type: 'text', text: normalized.markdown }] };
|
||||
},
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
export function mcpToolIds(tools: KtxRuntimeToolSet = {}): string[] {
|
||||
return Object.keys(tools).map((name) => `mcp__ktx__${name}`);
|
||||
}
|
||||
|
||||
export function createRuntimeToolDescriptorFromAiTool(name: string, aiSdkTool: Tool): KtxRuntimeToolDescriptor {
|
||||
return {
|
||||
name,
|
||||
description: aiSdkTool.description ?? '',
|
||||
inputSchema: aiSdkTool.inputSchema as KtxRuntimeToolDescriptor['inputSchema'],
|
||||
execute: async (input) => {
|
||||
if (typeof aiSdkTool.execute !== 'function') {
|
||||
throw new Error(`KTX runtime tool "${name}" has no execute function`);
|
||||
}
|
||||
return normalizeKtxRuntimeToolOutput(
|
||||
await aiSdkTool.execute(input as never, { toolCallId: `runtime-${name}` } as never),
|
||||
);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function createRuntimeToolSetFromAiSdkTools(tools: ToolSet = {}): KtxRuntimeToolSet {
|
||||
return Object.fromEntries(
|
||||
Object.entries(tools).map(([name, aiSdkTool]) => [name, createRuntimeToolDescriptorFromAiTool(name, aiSdkTool as Tool)]),
|
||||
);
|
||||
}
|
||||
|
|
@ -1,13 +1,17 @@
|
|||
import { join } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import type { KtxLlmProvider } from '@ktx/llm';
|
||||
import YAML from 'yaml';
|
||||
import { AgentRunnerService } from '../agent/index.js';
|
||||
import { localConnectionInfoFromConfig } from '../connections/index.js';
|
||||
import type { KtxEmbeddingPort, KtxFileStorePort, KtxFileWriteResult } from '../core/index.js';
|
||||
import { type KtxLogger, noopLogger, SessionWorktreeService } from '../core/index.js';
|
||||
import type { KtxSemanticLayerComputePort } from '../daemon/index.js';
|
||||
import { createLocalKtxLlmProviderFromConfig } from '../llm/index.js';
|
||||
import {
|
||||
createLocalKtxLlmRuntimeFromConfig,
|
||||
RuntimeAgentRunner,
|
||||
type AgentRunnerPort,
|
||||
type KtxLlmRuntimePort,
|
||||
type KtxRuntimeToolSet,
|
||||
} from '../llm/index.js';
|
||||
import type { KtxLocalProject } from '../project/index.js';
|
||||
import { PromptService } from '../prompts/index.js';
|
||||
import { SkillsRegistryService } from '../skills/index.js';
|
||||
|
|
@ -63,8 +67,8 @@ const LOCAL_AUTHOR = { name: 'KTX Local', email: 'local@ktx.local' };
|
|||
const LOCAL_SHAPE_WARNING = 'Local memory ingest validates semantic-layer YAML shape only.';
|
||||
|
||||
export interface CreateLocalProjectMemoryIngestOptions {
|
||||
llmProvider?: KtxLlmProvider;
|
||||
agentRunner?: AgentRunnerService;
|
||||
llmRuntime?: KtxLlmRuntimePort;
|
||||
agentRunner?: AgentRunnerPort;
|
||||
memoryModel?: string;
|
||||
semanticLayerCompute?: KtxSemanticLayerComputePort;
|
||||
queryExecutor?: { execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise<KtxQueryResult> };
|
||||
|
|
@ -89,7 +93,8 @@ export function createLocalProjectMemoryIngest(
|
|||
const slSearchService = new SlSearchService(embedding, slSourcesRepository, logger);
|
||||
const wikiService = new KnowledgeWikiService(rootFileStore, embedding, knowledgeIndex, project.git, logger);
|
||||
const authorResolver = new LocalAuthorResolver();
|
||||
const llmProvider = options.llmProvider ?? createLocalKtxLlmProviderFromConfig(project.config.llm);
|
||||
const llmRuntime =
|
||||
options.llmRuntime ?? createLocalKtxLlmRuntimeFromConfig(project.config.llm, { projectDir: project.projectDir });
|
||||
const toolsetFactory = new LocalMemoryToolsetFactory({
|
||||
project,
|
||||
embedding,
|
||||
|
|
@ -104,10 +109,7 @@ export function createLocalProjectMemoryIngest(
|
|||
});
|
||||
const agentRunner =
|
||||
options.agentRunner ??
|
||||
new AgentRunnerService({
|
||||
llmProvider: requireLlmProvider(llmProvider),
|
||||
logger,
|
||||
});
|
||||
new RuntimeAgentRunner(requireLlmRuntime(llmRuntime));
|
||||
const memoryAgent = new MemoryAgentService({
|
||||
settings: {
|
||||
knowledge: { userScopedKnowledgeEnabled: false },
|
||||
|
|
@ -143,11 +145,11 @@ export function createLocalProjectMemoryIngest(
|
|||
});
|
||||
}
|
||||
|
||||
function requireLlmProvider(provider: KtxLlmProvider | null | undefined): KtxLlmProvider {
|
||||
if (!provider) {
|
||||
function requireLlmRuntime(runtime: KtxLlmRuntimePort | null | undefined): KtxLlmRuntimePort {
|
||||
if (!runtime) {
|
||||
throw new Error('createLocalProjectMemoryIngest requires llm.provider.backend or an injected agentRunner');
|
||||
}
|
||||
return provider;
|
||||
return runtime;
|
||||
}
|
||||
|
||||
class LocalMemoryFileStore implements MemoryFileStorePort {
|
||||
|
|
@ -386,8 +388,8 @@ class LocalShapeOnlySlValidator implements SlValidatorPort<SlValidationDeps> {
|
|||
class LocalMemoryToolSet implements MemoryToolSetLike {
|
||||
constructor(private readonly tools: BaseTool[]) {}
|
||||
|
||||
toAiSdkTools(context: ToolContext) {
|
||||
return Object.fromEntries(this.tools.map((tool) => [tool.name, tool.toAiSdkTool(context)]));
|
||||
toRuntimeTools(context: ToolContext): KtxRuntimeToolSet {
|
||||
return Object.fromEntries(this.tools.map((tool) => [tool.name, tool.toRuntimeTool(context)]));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
// Module-level mock for 'ai' so generateText is a stub. This file is separate from
|
||||
|
|
@ -15,7 +18,6 @@ import { MemoryAgentService } from './memory-agent.service.js';
|
|||
|
||||
interface BuiltMocks {
|
||||
appSettings: any;
|
||||
llmProvider: any;
|
||||
prompt: any;
|
||||
eventTracker: any;
|
||||
telemetry: any;
|
||||
|
|
@ -63,7 +65,6 @@ const buildMocks = (overrides: Partial<BuiltMocks> = {}): BuiltMocks => {
|
|||
llm: { memoryIngestionModel: 'test-model' },
|
||||
},
|
||||
},
|
||||
llmProvider: { getModel: vi.fn().mockReturnValue({}) },
|
||||
prompt: { loadPrompt: vi.fn().mockResolvedValue('base framing') },
|
||||
eventTracker: { trackEvent: vi.fn(), createTelemetryIntegration: vi.fn().mockReturnValue(undefined) },
|
||||
telemetry: {
|
||||
|
|
@ -124,11 +125,11 @@ const buildMocks = (overrides: Partial<BuiltMocks> = {}): BuiltMocks => {
|
|||
slValidator: { validateSingleSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }) },
|
||||
toolsetFactory: {
|
||||
createIngestWuToolset: vi.fn().mockReturnValue({
|
||||
toAiSdkTools: vi.fn().mockReturnValue({}),
|
||||
toRuntimeTools: vi.fn().mockReturnValue({}),
|
||||
getAllTools: vi.fn().mockReturnValue([]),
|
||||
}),
|
||||
createToolset: vi.fn().mockReturnValue({
|
||||
toAiSdkTools: vi.fn().mockReturnValue({}),
|
||||
toRuntimeTools: vi.fn().mockReturnValue({}),
|
||||
getAllTools: vi.fn().mockReturnValue([]),
|
||||
}),
|
||||
},
|
||||
|
|
@ -241,6 +242,39 @@ describe('MemoryAgentService.ingest — session-branch orchestration', () => {
|
|||
expect(result.commitHash).toBe('cafebabe');
|
||||
});
|
||||
|
||||
it('normalizes load_skill output to markdown while preserving structured payload', async () => {
|
||||
const tempDir = await mkdtemp(join(tmpdir(), 'ktx-memory-skill-'));
|
||||
const skillDir = join(tempDir, 'memory_agent');
|
||||
await mkdir(skillDir, { recursive: true });
|
||||
await writeFile(join(skillDir, 'SKILL.md'), '---\nname: memory_agent\n---\nSkill body', 'utf-8');
|
||||
try {
|
||||
const agentRunner = {
|
||||
runLoop: vi.fn(async (params: any) => {
|
||||
const result = await params.toolSet.load_skill.execute({ name: 'memory_agent' });
|
||||
expect(result.markdown).toContain('memory_agent');
|
||||
expect(result.structured).toMatchObject({ name: 'memory_agent' });
|
||||
return { stopReason: 'natural' as const };
|
||||
}),
|
||||
};
|
||||
const mocks = buildMocks({
|
||||
agentRunner,
|
||||
skillsRegistry: {
|
||||
listSkills: vi.fn().mockResolvedValue([{ name: 'memory_agent', path: skillDir }]),
|
||||
buildSkillsPrompt: vi.fn().mockReturnValue(''),
|
||||
getSkill: vi.fn().mockResolvedValue({ name: 'memory_agent', path: skillDir }),
|
||||
stripFrontmatter: vi.fn().mockReturnValue('Skill body'),
|
||||
},
|
||||
});
|
||||
const svc = buildService(mocks);
|
||||
|
||||
await svc.ingest(baseInput);
|
||||
|
||||
expect(agentRunner.runLoop).toHaveBeenCalled();
|
||||
} finally {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('logs prompt debug output when KTX_MEMORY_AGENT_DEBUG_PROMPTS is enabled', async () => {
|
||||
const previousDebugPrompts = process.env.KTX_MEMORY_AGENT_DEBUG_PROMPTS;
|
||||
const mocks = buildMocks();
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
import { createHash } from 'node:crypto';
|
||||
import { readFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { tool } from 'ai';
|
||||
import * as YAML from 'yaml';
|
||||
import { z } from 'zod';
|
||||
import { type KtxLogger, noopLogger } from '../core/index.js';
|
||||
import type { KtxRuntimeToolSet } from '../llm/index.js';
|
||||
import {
|
||||
revertSourceToPreHead,
|
||||
type SemanticLayerSource,
|
||||
|
|
@ -125,8 +125,9 @@ export class MemoryAgentService {
|
|||
session: toolSession,
|
||||
};
|
||||
|
||||
const loadSkillTool = {
|
||||
load_skill: tool({
|
||||
const loadSkillTool: KtxRuntimeToolSet = {
|
||||
load_skill: {
|
||||
name: 'load_skill',
|
||||
description:
|
||||
'Load a skill to get specialized instructions. Call this when a skill listed in the system prompt matches the current task.',
|
||||
inputSchema: z.object({
|
||||
|
|
@ -137,23 +138,27 @@ export class MemoryAgentService {
|
|||
if (!skill) {
|
||||
const available =
|
||||
(await this.deps.skillsRegistry.listSkills('memory_agent')).map((s) => s.name).join(', ') || '(none)';
|
||||
return `Skill "${name}" not available to the memory agent. Available: ${available}`;
|
||||
return { markdown: `Skill "${name}" not available to the memory agent. Available: ${available}` };
|
||||
}
|
||||
try {
|
||||
const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8');
|
||||
if (!skillsLoaded.includes(skill.name)) {
|
||||
skillsLoaded.push(skill.name);
|
||||
}
|
||||
return {
|
||||
const structured = {
|
||||
name: skill.name,
|
||||
skillDirectory: skill.path,
|
||||
content: this.deps.skillsRegistry.stripFrontmatter(body),
|
||||
};
|
||||
return {
|
||||
markdown: `# ${structured.name}\n\n${structured.content}`,
|
||||
structured,
|
||||
};
|
||||
} catch (e) {
|
||||
return `Error loading skill "${name}": ${e instanceof Error ? e.message : String(e)}`;
|
||||
return { markdown: `Error loading skill "${name}": ${e instanceof Error ? e.message : String(e)}` };
|
||||
}
|
||||
},
|
||||
}),
|
||||
},
|
||||
};
|
||||
|
||||
const skillNames: string[] = [...DEFAULT_SKILL_NAMES];
|
||||
|
|
@ -212,7 +217,7 @@ export class MemoryAgentService {
|
|||
modelRole: 'candidateExtraction',
|
||||
systemPrompt,
|
||||
userPrompt: prompt,
|
||||
toolSet: { ...toolset.toAiSdkTools(toolContext), ...loadSkillTool },
|
||||
toolSet: { ...toolset.toRuntimeTools(toolContext), ...loadSkillTool },
|
||||
stepBudget,
|
||||
telemetryTags: {
|
||||
operationName: 'memory-agent-ingest',
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
import type { Tool } from 'ai';
|
||||
import type { AgentRunnerService } from '../agent/index.js';
|
||||
import type { AgentRunnerPort, KtxRuntimeToolSet } from '../llm/index.js';
|
||||
import type { GitService, KtxFileStorePort, KtxLogger, SessionWorktreeService } from '../core/index.js';
|
||||
import type { PromptService } from '../prompts/index.js';
|
||||
import type { SkillsRegistryService } from '../skills/index.js';
|
||||
|
|
@ -118,7 +117,7 @@ export interface MemoryCommitMessagePort {
|
|||
export interface MemoryFileStorePort extends KtxFileStorePort<MemoryFileStorePort>, MemoryCommitMessagePort {}
|
||||
|
||||
export interface MemoryToolSetLike {
|
||||
toAiSdkTools(context: ToolContext): Record<string, Tool>;
|
||||
toRuntimeTools(context: ToolContext): KtxRuntimeToolSet;
|
||||
}
|
||||
|
||||
export interface MemoryToolsetFactoryPort {
|
||||
|
|
@ -150,7 +149,7 @@ export interface MemoryAgentServiceDeps {
|
|||
slSourcesRepository: SlSourcesIndexPort;
|
||||
sessionWorktreeService: SessionWorktreeService<MemoryFileStorePort>;
|
||||
semanticLayerSourceReconciler: MemorySlSourceReconcilerPort;
|
||||
agentRunner: AgentRunnerService;
|
||||
agentRunner: AgentRunnerPort;
|
||||
slValidator: SlValidatorPort<SlValidationDeps>;
|
||||
toolsetFactory: MemoryToolsetFactoryPort;
|
||||
telemetry?: MemoryTelemetryPort;
|
||||
|
|
|
|||
|
|
@ -180,6 +180,31 @@ llm:
|
|||
});
|
||||
});
|
||||
|
||||
it('parses Claude Code as a first-class LLM backend', () => {
|
||||
const config = parseKtxProjectConfig(`
|
||||
llm:
|
||||
provider:
|
||||
backend: claude-code
|
||||
models:
|
||||
default: sonnet
|
||||
triage: haiku
|
||||
candidateExtraction: sonnet
|
||||
curator: sonnet
|
||||
reconcile: sonnet
|
||||
repair: opus
|
||||
`);
|
||||
|
||||
expect(config.llm.provider.backend).toBe('claude-code');
|
||||
expect(config.llm.models).toEqual({
|
||||
default: 'sonnet',
|
||||
triage: 'haiku',
|
||||
candidateExtraction: 'sonnet',
|
||||
curator: 'sonnet',
|
||||
reconcile: 'sonnet',
|
||||
repair: 'opus',
|
||||
});
|
||||
});
|
||||
|
||||
it('parses gateway LLM, OpenAI scan embeddings, and sentence-transformers ingest embeddings', () => {
|
||||
const config = parseKtxProjectConfig(`
|
||||
llm:
|
||||
|
|
@ -497,7 +522,7 @@ describe('generateKtxProjectConfigJsonSchema', () => {
|
|||
const llm = (schema.properties as Record<string, { properties?: Record<string, unknown> }>).llm;
|
||||
const provider = llm?.properties?.provider as { properties?: Record<string, unknown> };
|
||||
const backend = provider?.properties?.backend as { enum?: readonly string[] };
|
||||
expect(backend?.enum).toEqual(['none', 'anthropic', 'vertex', 'gateway']);
|
||||
expect(backend?.enum).toEqual(['none', 'anthropic', 'vertex', 'gateway', 'claude-code']);
|
||||
|
||||
const storage = (schema.properties as Record<string, { properties?: Record<string, unknown> }>).storage;
|
||||
const state = storage?.properties?.state as { enum?: readonly string[] };
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import YAML from 'yaml';
|
|||
import * as z from 'zod';
|
||||
import { connectionConfigSchema } from './driver-schemas.js';
|
||||
|
||||
const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway'] as const;
|
||||
const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway', 'claude-code'] as const;
|
||||
const KTX_EMBEDDING_BACKENDS = ['none', 'deterministic', 'openai', 'sentence-transformers'] as const;
|
||||
const KTX_PROMPT_CACHE_TTLS = ['5m', '1h'] as const;
|
||||
const KTX_ENRICHMENT_MODES = ['none', 'deterministic', 'llm'] as const;
|
||||
|
|
@ -46,7 +46,9 @@ const llmProviderSchema = z
|
|||
backend: z
|
||||
.enum(KTX_LLM_BACKENDS)
|
||||
.default('none')
|
||||
.describe('LLM provider backend. "none" disables LLM features; "anthropic" / "vertex" / "gateway" require the matching nested credentials block.'),
|
||||
.describe(
|
||||
'LLM provider backend. "none" disables LLM features; "anthropic" / "vertex" / "gateway" require the matching nested credentials block; "claude-code" uses the local Claude Code session.',
|
||||
),
|
||||
vertex: vertexProviderSchema.optional().describe('Vertex AI credentials, used when backend is "vertex".'),
|
||||
anthropic: apiCredentialsSchema.optional().describe('Anthropic API credentials, used when backend is "anthropic".'),
|
||||
gateway: apiCredentialsSchema.optional().describe('AI Gateway credentials, used when backend is "gateway".'),
|
||||
|
|
|
|||
|
|
@ -31,46 +31,32 @@ function createCache(initial: Record<string, string> = {}): KtxDescriptionCacheP
|
|||
function createLlmProvider(text = 'generated description') {
|
||||
vi.mocked(generateText).mockResolvedValue({ text } as never);
|
||||
return {
|
||||
getModel: vi.fn().mockReturnValue({ modelId: 'claude-sonnet-4-6', provider: 'anthropic' }),
|
||||
getModelByName: vi.fn(),
|
||||
cacheMarker: vi.fn(),
|
||||
repairToolCallHandler: vi.fn(),
|
||||
thinkingProviderOptions: vi.fn(),
|
||||
telemetryConfig: vi.fn(),
|
||||
promptCachingConfig: vi.fn(() => ({
|
||||
enabled: false,
|
||||
systemTtl: '1h',
|
||||
toolsTtl: '1h',
|
||||
historyTtl: '5m',
|
||||
cacheSystem: true,
|
||||
cacheTools: true,
|
||||
cacheHistory: true,
|
||||
vertexFallbackTo5m: false,
|
||||
})),
|
||||
activeBackend: vi.fn(() => 'anthropic'),
|
||||
generateText: vi.fn(async (input) => {
|
||||
const result = await generateText({
|
||||
system: input.system ? { role: 'system', content: input.system } : undefined,
|
||||
messages: [{ role: 'user', content: input.prompt }],
|
||||
temperature: input.temperature,
|
||||
} as never);
|
||||
return result.text;
|
||||
}),
|
||||
generateObject: vi.fn(),
|
||||
runAgentLoop: vi.fn(),
|
||||
} as any;
|
||||
}
|
||||
|
||||
function createFailingLlmProvider(message = 'timeout exceeded when trying to connect') {
|
||||
vi.mocked(generateText).mockRejectedValue(new Error(message) as never);
|
||||
return {
|
||||
getModel: vi.fn().mockReturnValue({ modelId: 'claude-sonnet-4-6', provider: 'anthropic' }),
|
||||
getModelByName: vi.fn(),
|
||||
cacheMarker: vi.fn(),
|
||||
repairToolCallHandler: vi.fn(),
|
||||
thinkingProviderOptions: vi.fn(),
|
||||
telemetryConfig: vi.fn(),
|
||||
promptCachingConfig: vi.fn(() => ({
|
||||
enabled: false,
|
||||
systemTtl: '1h',
|
||||
toolsTtl: '1h',
|
||||
historyTtl: '5m',
|
||||
cacheSystem: true,
|
||||
cacheTools: true,
|
||||
cacheHistory: true,
|
||||
vertexFallbackTo5m: false,
|
||||
})),
|
||||
activeBackend: vi.fn(() => 'anthropic'),
|
||||
generateText: vi.fn(async (input) => {
|
||||
const result = await generateText({
|
||||
system: input.system ? { role: 'system', content: input.system } : undefined,
|
||||
messages: [{ role: 'user', content: input.prompt }],
|
||||
temperature: input.temperature,
|
||||
} as never);
|
||||
return result.text;
|
||||
}),
|
||||
generateObject: vi.fn(),
|
||||
runAgentLoop: vi.fn(),
|
||||
} as any;
|
||||
}
|
||||
|
||||
|
|
@ -158,10 +144,10 @@ describe('KTX description prompt builders', () => {
|
|||
describe('KtxDescriptionGenerator', () => {
|
||||
it('generates column descriptions with pre-fetched values, cache hits, and word-limit metadata', async () => {
|
||||
const cache = createCache({ 'warehouse.public.orders.cached_status': 'Cached status description' });
|
||||
const llmProvider = createLlmProvider('Payment state');
|
||||
const llmRuntime = createLlmProvider('Payment state');
|
||||
const connector = createConnector();
|
||||
const generator = new KtxDescriptionGenerator({
|
||||
llmProvider,
|
||||
llmRuntime,
|
||||
cache,
|
||||
settings: {
|
||||
columnMaxWords: 12,
|
||||
|
|
@ -222,7 +208,7 @@ describe('KtxDescriptionGenerator', () => {
|
|||
it('samples through the connector when column values are not pre-fetched', async () => {
|
||||
const connector = createConnector();
|
||||
const generator = new KtxDescriptionGenerator({
|
||||
llmProvider: createLlmProvider('Current order state'),
|
||||
llmRuntime: createLlmProvider('Current order state'),
|
||||
settings: {
|
||||
columnMaxWords: 12,
|
||||
tableMaxWords: 18,
|
||||
|
|
@ -271,7 +257,7 @@ describe('KtxDescriptionGenerator', () => {
|
|||
})),
|
||||
};
|
||||
const generator = new KtxDescriptionGenerator({
|
||||
llmProvider: createLlmProvider('Generated through sampler'),
|
||||
llmRuntime: createLlmProvider('Generated through sampler'),
|
||||
settings: {
|
||||
columnMaxWords: 12,
|
||||
tableMaxWords: 18,
|
||||
|
|
@ -310,7 +296,7 @@ describe('KtxDescriptionGenerator', () => {
|
|||
const cache = createCache();
|
||||
const connector = createConnector();
|
||||
const generator = new KtxDescriptionGenerator({
|
||||
llmProvider: createFailingLlmProvider(),
|
||||
llmRuntime: createFailingLlmProvider(),
|
||||
cache,
|
||||
settings: {
|
||||
columnMaxWords: 12,
|
||||
|
|
@ -355,7 +341,7 @@ describe('KtxDescriptionGenerator', () => {
|
|||
const cache = createCache();
|
||||
const connector = createConnector();
|
||||
const generator = new KtxDescriptionGenerator({
|
||||
llmProvider: createLlmProvider('Commerce orders'),
|
||||
llmRuntime: createLlmProvider('Commerce orders'),
|
||||
cache,
|
||||
settings: {
|
||||
columnMaxWords: 12,
|
||||
|
|
@ -424,7 +410,7 @@ describe('KtxDescriptionGenerator resilience', () => {
|
|||
const logger = createLogger();
|
||||
const warnings: Array<{ code: string; table?: string }> = [];
|
||||
const generator = new KtxDescriptionGenerator({
|
||||
llmProvider: createLlmProvider('Commerce orders'),
|
||||
llmRuntime: createLlmProvider('Commerce orders'),
|
||||
logger,
|
||||
onWarning: (warning) => warnings.push({ code: warning.code, ...(warning.table ? { table: warning.table } : {}) }),
|
||||
settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24, concurrencyLimit: 2 },
|
||||
|
|
@ -455,7 +441,7 @@ describe('KtxDescriptionGenerator resilience', () => {
|
|||
const logger = createLogger();
|
||||
const warnings: Array<{ code: string; table?: string; metadata?: Record<string, unknown> }> = [];
|
||||
const generator = new KtxDescriptionGenerator({
|
||||
llmProvider: createLlmProvider('Customer reference data'),
|
||||
llmRuntime: createLlmProvider('Customer reference data'),
|
||||
logger,
|
||||
onWarning: (warning) =>
|
||||
warnings.push({
|
||||
|
|
@ -503,7 +489,7 @@ describe('KtxDescriptionGenerator resilience', () => {
|
|||
};
|
||||
const warnings: string[] = [];
|
||||
const generator = new KtxDescriptionGenerator({
|
||||
llmProvider: createFailingLlmProvider(),
|
||||
llmRuntime: createFailingLlmProvider(),
|
||||
onWarning: (warning) => warnings.push(warning.code),
|
||||
settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
|
||||
});
|
||||
|
|
@ -528,7 +514,7 @@ describe('KtxDescriptionGenerator resilience', () => {
|
|||
};
|
||||
const warnings: string[] = [];
|
||||
const generator = new KtxDescriptionGenerator({
|
||||
llmProvider: createLlmProvider('Orders mart'),
|
||||
llmRuntime: createLlmProvider('Orders mart'),
|
||||
onWarning: (warning) => warnings.push(warning.code),
|
||||
settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
|
||||
});
|
||||
|
|
@ -562,7 +548,7 @@ describe('KtxDescriptionGenerator resilience', () => {
|
|||
};
|
||||
const warnings: string[] = [];
|
||||
const generator = new KtxDescriptionGenerator({
|
||||
llmProvider: createLlmProvider('should not be called'),
|
||||
llmRuntime: createLlmProvider('should not be called'),
|
||||
onWarning: (warning) => warnings.push(warning.code),
|
||||
settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
|
||||
});
|
||||
|
|
@ -588,7 +574,7 @@ describe('KtxDescriptionGenerator resilience', () => {
|
|||
};
|
||||
const logger = createLogger();
|
||||
const generator = new KtxDescriptionGenerator({
|
||||
llmProvider: createLlmProvider('Payment lifecycle state'),
|
||||
llmRuntime: createLlmProvider('Payment lifecycle state'),
|
||||
logger,
|
||||
settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
|
||||
});
|
||||
|
|
@ -625,7 +611,7 @@ describe('KtxDescriptionGenerator resilience', () => {
|
|||
sampleColumn,
|
||||
};
|
||||
const generator = new KtxDescriptionGenerator({
|
||||
llmProvider: createLlmProvider('Customer reference identifier'),
|
||||
llmRuntime: createLlmProvider('Customer reference identifier'),
|
||||
settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
|
||||
});
|
||||
|
||||
|
|
@ -657,7 +643,7 @@ describe('KtxDescriptionGenerator resilience', () => {
|
|||
};
|
||||
vi.mocked(generateText).mockClear();
|
||||
const generator = new KtxDescriptionGenerator({
|
||||
llmProvider: createLlmProvider('should not be called'),
|
||||
llmRuntime: createLlmProvider('should not be called'),
|
||||
settings: { columnMaxWords: 12, tableMaxWords: 18, dataSourceMaxWords: 24 },
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
import type { KtxLlmProvider } from '@ktx/llm';
|
||||
import { generateKtxText } from '../llm/index.js';
|
||||
import type { KtxLlmRuntimePort } from '../llm/index.js';
|
||||
import type {
|
||||
KtxColumnSampleInput,
|
||||
KtxColumnSampleResult,
|
||||
|
|
@ -120,7 +119,7 @@ export interface KtxGenerateDataSourceDescriptionInput {
|
|||
}
|
||||
|
||||
export interface KtxDescriptionGeneratorOptions {
|
||||
llmProvider: KtxLlmProvider;
|
||||
llmRuntime: KtxLlmRuntimePort;
|
||||
cache?: KtxDescriptionCachePort;
|
||||
logger?: KtxScanLoggerPort;
|
||||
onWarning?: (warning: KtxScanWarning) => void;
|
||||
|
|
@ -400,14 +399,14 @@ Data source type: ${input.dataSourceType}`;
|
|||
}
|
||||
|
||||
export class KtxDescriptionGenerator {
|
||||
private readonly llmProvider: KtxLlmProvider;
|
||||
private readonly llmRuntime: KtxLlmRuntimePort;
|
||||
private readonly cache?: KtxDescriptionCachePort;
|
||||
private readonly logger?: KtxScanLoggerPort;
|
||||
private readonly onWarning?: (warning: KtxScanWarning) => void;
|
||||
private readonly settings: ResolvedKtxDescriptionGenerationSettings;
|
||||
|
||||
constructor(options: KtxDescriptionGeneratorOptions) {
|
||||
this.llmProvider = options.llmProvider;
|
||||
this.llmRuntime = options.llmRuntime;
|
||||
this.cache = options.cache;
|
||||
this.logger = options.logger;
|
||||
this.onWarning = options.onWarning;
|
||||
|
|
@ -779,8 +778,7 @@ export class KtxDescriptionGenerator {
|
|||
|
||||
private async generateAiDescription(prompt: KtxDescriptionPrompt, _operationName: string): Promise<string | null> {
|
||||
try {
|
||||
const text = await generateKtxText({
|
||||
llmProvider: this.llmProvider,
|
||||
const text = await this.llmRuntime.generateText({
|
||||
role: 'candidateExtraction',
|
||||
system: prompt.system,
|
||||
prompt: prompt.user,
|
||||
|
|
|
|||
|
|
@ -264,7 +264,6 @@ export type {
|
|||
} from './relationship-graph-resolver.js';
|
||||
export { resolveKtxRelationshipGraph } from './relationship-graph-resolver.js';
|
||||
export type {
|
||||
KtxRelationshipLlmProposalGenerateText,
|
||||
KtxRelationshipLlmProposalResult,
|
||||
KtxRelationshipLlmProposalSettings,
|
||||
ProposeKtxRelationshipCandidatesWithLlmInput,
|
||||
|
|
|
|||
|
|
@ -356,7 +356,7 @@ describe('local scan enrichment', () => {
|
|||
|
||||
it('honors scan relationship config when LLM proposals are disabled', async () => {
|
||||
const providers = createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 3 });
|
||||
const getModel = vi.fn(() => ({ modelId: 'provider/language-model', provider: 'gateway' }));
|
||||
const generateObject = vi.fn();
|
||||
const result = await runLocalScanEnrichment({
|
||||
connectionId: 'warehouse',
|
||||
mode: 'relationships',
|
||||
|
|
@ -365,9 +365,9 @@ describe('local scan enrichment', () => {
|
|||
context: { runId: 'scan-run-llm-disabled' },
|
||||
providers: {
|
||||
...providers,
|
||||
llm: {
|
||||
...providers.llm,
|
||||
getModel: getModel as never,
|
||||
llmRuntime: {
|
||||
...providers.llmRuntime,
|
||||
generateObject: generateObject as never,
|
||||
},
|
||||
},
|
||||
relationshipSettings: {
|
||||
|
|
@ -378,7 +378,7 @@ describe('local scan enrichment', () => {
|
|||
});
|
||||
|
||||
expect(result.summary.llmRelationshipValidation).toBe('skipped');
|
||||
expect(getModel).not.toHaveBeenCalledWith('candidateExtraction');
|
||||
expect(generateObject).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('skips relationship detection when scan relationships are disabled', async () => {
|
||||
|
|
@ -628,7 +628,7 @@ describe('local scan enrichment', () => {
|
|||
connector: scanConnector,
|
||||
context: { runId: 'scan-run-batched-embeddings' },
|
||||
providers: {
|
||||
llm: deterministicProviders.llm,
|
||||
llmRuntime: deterministicProviders.llmRuntime,
|
||||
embedding: {
|
||||
dimensions: 3,
|
||||
maxBatchSize: 2,
|
||||
|
|
@ -658,7 +658,7 @@ describe('local scan enrichment', () => {
|
|||
providerIdentity: { provider: 'deterministic', embeddingDimensions: 6 },
|
||||
});
|
||||
|
||||
const getModel = vi.spyOn(providers.llm, 'getModel');
|
||||
const generateText = vi.spyOn(providers.llmRuntime, 'generateText');
|
||||
const embedBatch = vi.spyOn(providers.embedding, 'embedBatch');
|
||||
const second = await runLocalScanEnrichment({
|
||||
connectionId: 'warehouse',
|
||||
|
|
@ -676,7 +676,7 @@ describe('local scan enrichment', () => {
|
|||
expect(first.state.resumedStages).toEqual([]);
|
||||
expect(second.state.resumedStages).toEqual(['descriptions', 'embeddings', 'relationships']);
|
||||
expect(second.state.completedStages).toEqual(['descriptions', 'embeddings', 'relationships']);
|
||||
expect(getModel).not.toHaveBeenCalled();
|
||||
expect(generateText).not.toHaveBeenCalled();
|
||||
expect(embedBatch).not.toHaveBeenCalled();
|
||||
expect(second.descriptionUpdates).toEqual(first.descriptionUpdates);
|
||||
expect(second.embeddingUpdates).toEqual(first.embeddingUpdates);
|
||||
|
|
@ -711,7 +711,7 @@ describe('local scan enrichment', () => {
|
|||
tables: [{ ...firstTable, name: 'customers' }],
|
||||
})),
|
||||
};
|
||||
const getModel = vi.spyOn(providers.llm, 'getModel');
|
||||
const generateText = vi.spyOn(providers.llmRuntime, 'generateText');
|
||||
|
||||
const result = await runLocalScanEnrichment({
|
||||
connectionId: 'warehouse',
|
||||
|
|
@ -727,7 +727,7 @@ describe('local scan enrichment', () => {
|
|||
|
||||
expect(result.state.resumedStages).toEqual([]);
|
||||
expect(result.state.completedStages).toEqual(['descriptions', 'embeddings', 'relationships']);
|
||||
expect(getModel).toHaveBeenCalled();
|
||||
expect(generateText).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('runs providerless enriched scans as relationship-only discovery enrichment', async () => {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import type { KtxLlmProvider } from '@ktx/llm';
|
||||
import pLimit from 'p-limit';
|
||||
import type { KtxLlmRuntimePort } from '../llm/index.js';
|
||||
import { buildDefaultKtxProjectConfig, type KtxScanRelationshipConfig } from '../project/config.js';
|
||||
import { type KtxDescriptionColumnTable, KtxDescriptionGenerator } from './description-generation.js';
|
||||
import { buildKtxColumnEmbeddingText } from './embedding-text.js';
|
||||
|
|
@ -49,7 +49,7 @@ export interface DeterministicLocalScanEnrichmentProviderOptions {
|
|||
}
|
||||
|
||||
export interface KtxLocalScanEnrichmentProviders {
|
||||
llm: KtxLlmProvider;
|
||||
llmRuntime: KtxLlmRuntimePort;
|
||||
embedding: KtxEmbeddingPort;
|
||||
}
|
||||
|
||||
|
|
@ -190,7 +190,7 @@ export function createDeterministicLocalScanEnrichmentProviders(
|
|||
const dimensions = options.embeddingDimensions ?? 8;
|
||||
const maxBatchSize = options.maxBatchSize ?? 64;
|
||||
return {
|
||||
llm: deterministicLlmProvider(),
|
||||
llmRuntime: deterministicLlmRuntime(),
|
||||
embedding: {
|
||||
dimensions,
|
||||
maxBatchSize,
|
||||
|
|
@ -201,41 +201,16 @@ export function createDeterministicLocalScanEnrichmentProviders(
|
|||
};
|
||||
}
|
||||
|
||||
function deterministicLlmProvider(): KtxLlmProvider {
|
||||
const model = { modelId: 'deterministic-scan', provider: 'deterministic' };
|
||||
function deterministicLlmRuntime(): KtxLlmRuntimePort {
|
||||
return {
|
||||
getModel() {
|
||||
return model as ReturnType<KtxLlmProvider['getModel']>;
|
||||
async generateText(input) {
|
||||
return `Deterministic description for ${input.prompt.slice(0, 64).trim() || 'data source'}`;
|
||||
},
|
||||
getModelByName() {
|
||||
return model as ReturnType<KtxLlmProvider['getModelByName']>;
|
||||
async generateObject() {
|
||||
return { pkCandidates: [], fkCandidates: [] } as never;
|
||||
},
|
||||
cacheMarker() {
|
||||
return undefined;
|
||||
},
|
||||
repairToolCallHandler() {
|
||||
throw new Error('deterministic scan provider does not support tool-call repair');
|
||||
},
|
||||
thinkingProviderOptions() {
|
||||
return {};
|
||||
},
|
||||
telemetryConfig() {
|
||||
return undefined;
|
||||
},
|
||||
promptCachingConfig() {
|
||||
return {
|
||||
enabled: false,
|
||||
systemTtl: '1h',
|
||||
toolsTtl: '1h',
|
||||
historyTtl: '5m',
|
||||
cacheSystem: true,
|
||||
cacheTools: true,
|
||||
cacheHistory: true,
|
||||
vertexFallbackTo5m: false,
|
||||
};
|
||||
},
|
||||
activeBackend() {
|
||||
return 'gateway';
|
||||
async runAgentLoop() {
|
||||
return { stopReason: 'natural' };
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -324,7 +299,7 @@ async function generateDescriptions(input: {
|
|||
}): Promise<KtxLocalScanEnrichmentResult['descriptionUpdates']> {
|
||||
const warningSink = input.warnings;
|
||||
const generator = new KtxDescriptionGenerator({
|
||||
llmProvider: input.providers.llm,
|
||||
llmRuntime: input.providers.llmRuntime,
|
||||
...(input.context.logger ? { logger: input.context.logger } : {}),
|
||||
...(warningSink
|
||||
? {
|
||||
|
|
@ -643,7 +618,7 @@ export async function runLocalScanEnrichment(
|
|||
schema,
|
||||
context: input.context,
|
||||
settings: relationshipSettings,
|
||||
llmProvider: input.providers?.llm ?? null,
|
||||
llmRuntime: input.providers?.llmRuntime ?? null,
|
||||
});
|
||||
|
||||
await relationshipProgress?.update(
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import type { KtxLlmProvider } from '@ktx/llm';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import YAML from 'yaml';
|
||||
import type { SourceAdapter } from '../ingest/index.js';
|
||||
import type { KtxLlmRuntimePort } from '../llm/index.js';
|
||||
import { initKtxProject, type KtxLocalProject, loadKtxProject } from '../project/index.js';
|
||||
import { filterSnapshotTables, getLocalScanReport, getLocalScanStatus, resolveEnabledTables, runLocalScan } from './local-scan.js';
|
||||
import type { KtxQueryResult, KtxReadOnlyQueryInput, KtxSchemaSnapshot, KtxSchemaTable } from './types.js';
|
||||
|
|
@ -79,25 +79,11 @@ function relationshipSqlResult(
|
|||
throw new Error(`Unexpected relationship SQL: ${input.sql}`);
|
||||
}
|
||||
|
||||
function deterministicLlmProvider(): KtxLlmProvider {
|
||||
function deterministicLlmRuntime(): KtxLlmRuntimePort {
|
||||
return {
|
||||
getModel: () => ({ provider: 'deterministic', modelId: 'deterministic' }) as never,
|
||||
getModelByName: () => ({ provider: 'deterministic', modelId: 'deterministic' }) as never,
|
||||
cacheMarker: () => undefined,
|
||||
repairToolCallHandler: (() => undefined) as never,
|
||||
thinkingProviderOptions: () => ({}),
|
||||
telemetryConfig: () => undefined,
|
||||
promptCachingConfig: () => ({
|
||||
enabled: false,
|
||||
systemTtl: '1h',
|
||||
toolsTtl: '1h',
|
||||
historyTtl: '5m',
|
||||
cacheSystem: true,
|
||||
cacheTools: true,
|
||||
cacheHistory: true,
|
||||
vertexFallbackTo5m: false,
|
||||
}),
|
||||
activeBackend: () => 'gateway',
|
||||
generateText: vi.fn(async (input) => `Deterministic description for ${input.prompt.slice(0, 64).trim() || 'data source'}`),
|
||||
generateObject: vi.fn(async () => ({ pkCandidates: [], fkCandidates: [] }) as never),
|
||||
runAgentLoop: vi.fn(),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -571,7 +557,7 @@ describe('local scan', () => {
|
|||
llmProposals: false,
|
||||
maxLlmTablesPerBatch: 7,
|
||||
};
|
||||
const getModel = vi.fn(() => ({ modelId: 'provider/language-model', provider: 'gateway' }));
|
||||
const generateObject = vi.fn(async () => ({ pkCandidates: [], fkCandidates: [] }));
|
||||
const connector = {
|
||||
id: 'test:warehouse',
|
||||
driver: 'postgres' as const,
|
||||
|
|
@ -650,9 +636,9 @@ describe('local scan', () => {
|
|||
detectRelationships: true,
|
||||
connector,
|
||||
enrichmentProviders: {
|
||||
llm: {
|
||||
...deterministicLlmProvider(),
|
||||
getModel: getModel as never,
|
||||
llmRuntime: {
|
||||
...deterministicLlmRuntime(),
|
||||
generateObject: generateObject as never,
|
||||
},
|
||||
embedding: {
|
||||
dimensions: 8,
|
||||
|
|
@ -668,7 +654,7 @@ describe('local scan', () => {
|
|||
|
||||
expect(result.report.relationships.accepted).toBe(1);
|
||||
expect(result.report.enrichment.llmRelationshipValidation).toBe('skipped');
|
||||
expect(getModel).not.toHaveBeenCalledWith('candidateExtraction');
|
||||
expect(generateObject).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('accepts no-declared-constraint relationships and writes relationship artifacts', async () => {
|
||||
|
|
@ -1206,7 +1192,7 @@ describe('local scan', () => {
|
|||
mode: 'enriched',
|
||||
connector,
|
||||
enrichmentProviders: {
|
||||
llm: deterministicLlmProvider(),
|
||||
llmRuntime: deterministicLlmRuntime(),
|
||||
embedding: {
|
||||
dimensions: 8,
|
||||
maxBatchSize: 64,
|
||||
|
|
@ -1314,7 +1300,7 @@ describe('local scan', () => {
|
|||
return { values: ['1'], nullCount: 0, distinctCount: 1 };
|
||||
},
|
||||
};
|
||||
const llm = deterministicLlmProvider();
|
||||
const llmRuntime = deterministicLlmRuntime();
|
||||
|
||||
const first = await runLocalScan({
|
||||
project,
|
||||
|
|
@ -1323,7 +1309,7 @@ describe('local scan', () => {
|
|||
mode: 'enriched',
|
||||
connector,
|
||||
enrichmentProviders: {
|
||||
llm,
|
||||
llmRuntime,
|
||||
embedding: {
|
||||
dimensions: 8,
|
||||
maxBatchSize: 64,
|
||||
|
|
@ -1344,7 +1330,7 @@ describe('local scan', () => {
|
|||
});
|
||||
expect(first.report.enrichment.embeddings).toBe('failed');
|
||||
|
||||
const getModel = vi.spyOn(llm, 'getModel');
|
||||
const generateObject = vi.spyOn(llmRuntime, 'generateObject');
|
||||
const retry = await runLocalScan({
|
||||
project,
|
||||
adapters: [fetchOnlyAdapter()],
|
||||
|
|
@ -1352,7 +1338,7 @@ describe('local scan', () => {
|
|||
mode: 'enriched',
|
||||
connector,
|
||||
enrichmentProviders: {
|
||||
llm,
|
||||
llmRuntime,
|
||||
embedding: {
|
||||
dimensions: 8,
|
||||
maxBatchSize: 64,
|
||||
|
|
@ -1373,8 +1359,8 @@ describe('local scan', () => {
|
|||
failedStages: [],
|
||||
});
|
||||
expect(retry.report.enrichment.embeddings).toBe('completed');
|
||||
expect(getModel).toHaveBeenCalledTimes(1);
|
||||
expect(getModel).toHaveBeenCalledWith('candidateExtraction');
|
||||
expect(generateObject).toHaveBeenCalledTimes(1);
|
||||
expect(generateObject).toHaveBeenCalledWith(expect.objectContaining({ role: 'candidateExtraction' }));
|
||||
expect(embeddingAttempts).toBe(2);
|
||||
|
||||
const reportPath = retry.report.artifactPaths.reportPath;
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import {
|
|||
} from '../ingest/index.js';
|
||||
import {
|
||||
createLocalKtxEmbeddingProviderFromConfig,
|
||||
createLocalKtxLlmProviderFromConfig,
|
||||
createLocalKtxLlmRuntimeFromConfig,
|
||||
KtxScanEmbeddingPortAdapter,
|
||||
} from '../llm/index.js';
|
||||
import type { KtxProjectLlmConfig, KtxScanEnrichmentConfig, KtxScanRelationshipConfig } from '../project/config.js';
|
||||
|
|
@ -150,6 +150,7 @@ interface LocalScanEnrichmentProviderDeps {
|
|||
createKtxLlmProvider?: typeof createKtxLlmProvider;
|
||||
createKtxEmbeddingProvider?: typeof createKtxEmbeddingProvider;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
projectDir?: string;
|
||||
}
|
||||
|
||||
export function createLocalScanEnrichmentProvidersFromConfig(
|
||||
|
|
@ -165,14 +166,17 @@ export function createLocalScanEnrichmentProvidersFromConfig(
|
|||
return null;
|
||||
}
|
||||
|
||||
const llm = createLocalKtxLlmProviderFromConfig(llmConfig, deps);
|
||||
const llmRuntime = createLocalKtxLlmRuntimeFromConfig(llmConfig, {
|
||||
...deps,
|
||||
projectDir: deps.projectDir,
|
||||
});
|
||||
const embeddingProvider = createLocalKtxEmbeddingProviderFromConfig(config.embeddings, deps);
|
||||
if (!llm || !embeddingProvider) {
|
||||
if (!llmRuntime || !embeddingProvider) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
llm,
|
||||
llmRuntime,
|
||||
embedding: new KtxScanEmbeddingPortAdapter(embeddingProvider),
|
||||
};
|
||||
}
|
||||
|
|
@ -378,7 +382,9 @@ export async function runLocalScan(options: RunLocalScanOptions): Promise<LocalS
|
|||
connector && (mode !== 'structural' || options.detectRelationships)
|
||||
? options.enrichmentProviders !== undefined
|
||||
? options.enrichmentProviders
|
||||
: createLocalScanEnrichmentProvidersFromConfig(options.project.config.scan.enrichment, options.project.config.llm)
|
||||
: createLocalScanEnrichmentProvidersFromConfig(options.project.config.scan.enrichment, options.project.config.llm, {
|
||||
projectDir: options.project.projectDir,
|
||||
})
|
||||
: null;
|
||||
|
||||
await options.progress?.update(0.15, 'Inspecting database schema');
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import { gunzipSync } from 'node:zlib';
|
|||
import Database from 'better-sqlite3';
|
||||
import YAML from 'yaml';
|
||||
import { z } from 'zod';
|
||||
import type { KtxLlmRuntimePort } from '../llm/index.js';
|
||||
import type { KtxEnrichedRelationship, KtxEnrichedSchema, KtxRelationshipType } from './enrichment-types.js';
|
||||
import { snapshotToKtxEnrichedSchema } from './local-enrichment.js';
|
||||
import type { KtxRelationshipDiscoveryCandidate } from './relationship-candidates.js';
|
||||
|
|
@ -13,7 +14,6 @@ import {
|
|||
generateKtxRelationshipDiscoveryCandidates,
|
||||
mergeKtxRelationshipDiscoveryCandidates,
|
||||
} from './relationship-candidates.js';
|
||||
import type { KtxLlmProvider } from '@ktx/llm';
|
||||
import { proposeKtxRelationshipCandidatesWithLlm } from './relationship-llm-proposal.js';
|
||||
import {
|
||||
discoverKtxCompositeRelationships,
|
||||
|
|
@ -527,7 +527,7 @@ export function isKtxRelationshipBenchmarkTuningEligible(input: {
|
|||
}
|
||||
|
||||
export function ktxRelationshipBenchmarkDetectorWithLlm(
|
||||
llmProvider: KtxLlmProvider,
|
||||
llmRuntime: KtxLlmRuntimePort,
|
||||
): KtxRelationshipBenchmarkDetector {
|
||||
return {
|
||||
async detect(input) {
|
||||
|
|
@ -566,7 +566,7 @@ export function ktxRelationshipBenchmarkDetectorWithLlm(
|
|||
connectionId: input.snapshot.connectionId,
|
||||
schema: input.schema,
|
||||
profile: profiles,
|
||||
llmProvider,
|
||||
llmRuntime,
|
||||
});
|
||||
const candidates = mergeKtxRelationshipDiscoveryCandidates([
|
||||
...broadRelationshipCandidates,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import type { KtxLlmProvider } from '@ktx/llm';
|
||||
import Database from 'better-sqlite3';
|
||||
import { afterEach, describe, expect, it, vi } from 'vitest';
|
||||
import type { KtxLlmRuntimePort } from '../llm/index.js';
|
||||
import { buildDefaultKtxProjectConfig } from '../project/config.js';
|
||||
import { snapshotToKtxEnrichedSchema } from './local-enrichment.js';
|
||||
import {
|
||||
|
|
@ -216,29 +216,11 @@ function connector(executor: InMemorySqliteExecutor | null): KtxScanConnector {
|
|||
};
|
||||
}
|
||||
|
||||
function llmProvider(): KtxLlmProvider {
|
||||
const model = { modelId: 'claude-sonnet-4-6', provider: 'anthropic' };
|
||||
function llmRuntime(output: unknown): KtxLlmRuntimePort {
|
||||
return {
|
||||
getModel: vi.fn(() => model as ReturnType<KtxLlmProvider['getModel']>),
|
||||
getModelByName: vi.fn(() => model as ReturnType<KtxLlmProvider['getModelByName']>),
|
||||
cacheMarker: vi.fn(),
|
||||
repairToolCallHandler: vi.fn(),
|
||||
thinkingProviderOptions: vi.fn(() => ({})),
|
||||
telemetryConfig: vi.fn(() => undefined),
|
||||
promptCachingConfig: vi.fn(
|
||||
() =>
|
||||
({
|
||||
enabled: false,
|
||||
systemTtl: '1h',
|
||||
toolsTtl: '1h',
|
||||
historyTtl: '5m',
|
||||
cacheSystem: true,
|
||||
cacheTools: true,
|
||||
cacheHistory: true,
|
||||
vertexFallbackTo5m: false,
|
||||
}) as ReturnType<KtxLlmProvider['promptCachingConfig']>,
|
||||
),
|
||||
activeBackend: vi.fn(() => 'anthropic' as ReturnType<KtxLlmProvider['activeBackend']>),
|
||||
generateText: vi.fn(),
|
||||
generateObject: vi.fn(async () => output) as KtxLlmRuntimePort['generateObject'],
|
||||
runAgentLoop: vi.fn(),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -505,21 +487,19 @@ describe('production relationship discovery', () => {
|
|||
INSERT INTO customers (id) VALUES (1), (2);
|
||||
INSERT INTO orders (id, buyer_ref) VALUES (10, 1), (11, 2);
|
||||
`);
|
||||
const generateText = vi.fn(async () => ({
|
||||
output: {
|
||||
pkCandidates: [{ table: 'customers', column: 'id', confidence: 0.91, rationale: 'Unique customer key.' }],
|
||||
fkCandidates: [
|
||||
{
|
||||
fromTable: 'orders',
|
||||
fromColumn: 'buyer_ref',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
confidence: 0.89,
|
||||
rationale: 'Buyer reference values align with customer identifiers.',
|
||||
},
|
||||
],
|
||||
},
|
||||
}));
|
||||
const llmOutput = {
|
||||
pkCandidates: [{ table: 'customers', column: 'id', confidence: 0.91, rationale: 'Unique customer key.' }],
|
||||
fkCandidates: [
|
||||
{
|
||||
fromTable: 'orders',
|
||||
fromColumn: 'buyer_ref',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
confidence: 0.89,
|
||||
rationale: 'Buyer reference values align with customer identifiers.',
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const result = await discoverKtxRelationships({
|
||||
connectionId: 'warehouse',
|
||||
|
|
@ -528,8 +508,7 @@ describe('production relationship discovery', () => {
|
|||
schema: snapshotToKtxEnrichedSchema(llmOnlyRelationshipSnapshot()),
|
||||
context: { runId: 'llm-relationship-orchestrator' },
|
||||
settings: relationshipSettings(),
|
||||
llmProvider: llmProvider(),
|
||||
generateText,
|
||||
llmRuntime: llmRuntime(llmOutput),
|
||||
});
|
||||
|
||||
expect(result.llmRelationshipValidation).toBe('completed');
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import type { KtxLlmProvider } from '@ktx/llm';
|
||||
import type { KtxLlmRuntimePort } from '../llm/index.js';
|
||||
import type { KtxScanRelationshipConfig } from '../project/config.js';
|
||||
import type { KtxEnrichedRelationship, KtxEnrichedSchema, KtxRelationshipUpdate } from './enrichment-types.js';
|
||||
import {
|
||||
|
|
@ -15,10 +15,7 @@ import {
|
|||
type KtxResolvedRelationshipDiscoveryCandidate,
|
||||
resolveKtxRelationshipGraph,
|
||||
} from './relationship-graph-resolver.js';
|
||||
import {
|
||||
type KtxRelationshipLlmProposalGenerateText,
|
||||
proposeKtxRelationshipCandidatesWithLlm,
|
||||
} from './relationship-llm-proposal.js';
|
||||
import { proposeKtxRelationshipCandidatesWithLlm } from './relationship-llm-proposal.js';
|
||||
import {
|
||||
createKtxRelationshipProfileCache,
|
||||
type KtxRelationshipProfileArtifact,
|
||||
|
|
@ -42,8 +39,7 @@ export interface DiscoverKtxRelationshipsInput {
|
|||
schema: KtxEnrichedSchema;
|
||||
context: KtxScanContext;
|
||||
settings: KtxScanRelationshipConfig;
|
||||
llmProvider?: KtxLlmProvider | null;
|
||||
generateText?: KtxRelationshipLlmProposalGenerateText;
|
||||
llmRuntime?: KtxLlmRuntimePort | null;
|
||||
}
|
||||
|
||||
export interface DiscoverKtxRelationshipsResult {
|
||||
|
|
@ -246,11 +242,10 @@ export async function discoverKtxRelationships(
|
|||
connectionId: input.connectionId,
|
||||
schema: input.schema,
|
||||
profile,
|
||||
llmProvider: input.llmProvider ?? null,
|
||||
llmRuntime: input.llmRuntime ?? null,
|
||||
settings: {
|
||||
maxTablesPerBatch: input.settings.maxLlmTablesPerBatch,
|
||||
},
|
||||
generateText: input.generateText,
|
||||
})
|
||||
: { candidates: [], warnings: [], llmCalls: 0, summary: 'skipped' as const };
|
||||
const candidates = mergeKtxRelationshipDiscoveryCandidates([
|
||||
|
|
|
|||
|
|
@ -1,32 +1,14 @@
|
|||
import type { KtxLlmProvider } from '@ktx/llm';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import type { KtxLlmRuntimePort } from '../llm/index.js';
|
||||
import type { KtxEnrichedColumn, KtxEnrichedSchema, KtxEnrichedTable } from './enrichment-types.js';
|
||||
import type { KtxRelationshipProfileArtifact } from './relationship-profiling.js';
|
||||
import { proposeKtxRelationshipCandidatesWithLlm } from './relationship-llm-proposal.js';
|
||||
|
||||
function llmProvider(provider = 'anthropic'): KtxLlmProvider {
|
||||
const model = { modelId: 'claude-sonnet-4-6', provider };
|
||||
function llmRuntime(output?: unknown): KtxLlmRuntimePort {
|
||||
return {
|
||||
getModel: vi.fn(() => model as ReturnType<KtxLlmProvider['getModel']>),
|
||||
getModelByName: vi.fn(() => model as ReturnType<KtxLlmProvider['getModelByName']>),
|
||||
cacheMarker: vi.fn(),
|
||||
repairToolCallHandler: vi.fn(),
|
||||
thinkingProviderOptions: vi.fn(() => ({})),
|
||||
telemetryConfig: vi.fn(() => undefined),
|
||||
promptCachingConfig: vi.fn(
|
||||
() =>
|
||||
({
|
||||
enabled: false,
|
||||
systemTtl: '1h',
|
||||
toolsTtl: '1h',
|
||||
historyTtl: '5m',
|
||||
cacheSystem: true,
|
||||
cacheTools: true,
|
||||
cacheHistory: true,
|
||||
vertexFallbackTo5m: false,
|
||||
}) as ReturnType<KtxLlmProvider['promptCachingConfig']>,
|
||||
),
|
||||
activeBackend: vi.fn(() => provider as ReturnType<KtxLlmProvider['activeBackend']>),
|
||||
generateText: vi.fn(),
|
||||
generateObject: vi.fn(async () => output) as KtxLlmRuntimePort['generateObject'],
|
||||
runAgentLoop: vi.fn(),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -125,28 +107,25 @@ function profile(): KtxRelationshipProfileArtifact {
|
|||
|
||||
describe('relationship LLM proposals', () => {
|
||||
it('maps valid structured FK proposals into review candidates with rationale evidence', async () => {
|
||||
const generateText = vi.fn(async () => ({
|
||||
output: {
|
||||
pkCandidates: [{ table: 'customers', column: 'id', confidence: 0.94, rationale: 'Unique customer identifier.' }],
|
||||
fkCandidates: [
|
||||
{
|
||||
fromTable: 'orders',
|
||||
fromColumn: 'buyer_ref',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
confidence: 0.88,
|
||||
rationale: 'Buyer reference values match customer identifiers.',
|
||||
},
|
||||
],
|
||||
},
|
||||
}));
|
||||
const runtime = llmRuntime({
|
||||
pkCandidates: [{ table: 'customers', column: 'id', confidence: 0.94, rationale: 'Unique customer identifier.' }],
|
||||
fkCandidates: [
|
||||
{
|
||||
fromTable: 'orders',
|
||||
fromColumn: 'buyer_ref',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
confidence: 0.88,
|
||||
rationale: 'Buyer reference values match customer identifiers.',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const result = await proposeKtxRelationshipCandidatesWithLlm({
|
||||
connectionId: 'warehouse',
|
||||
schema: schema(),
|
||||
profile: profile(),
|
||||
llmProvider: llmProvider(),
|
||||
generateText,
|
||||
llmRuntime: runtime,
|
||||
});
|
||||
|
||||
expect(result.summary).toBe('completed');
|
||||
|
|
@ -164,42 +143,27 @@ describe('relationship LLM proposals', () => {
|
|||
reasons: ['llm_proposal', 'llm_pk_proposal'],
|
||||
},
|
||||
});
|
||||
expect(generateText).toHaveBeenCalledWith(
|
||||
expect(runtime.generateObject).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
system: expect.objectContaining({
|
||||
role: 'system',
|
||||
content: expect.stringContaining('You are helping KTX review possible SQL relationships'),
|
||||
}),
|
||||
messages: expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
role: 'user',
|
||||
content: expect.stringContaining('"tables"'),
|
||||
}),
|
||||
]),
|
||||
role: 'candidateExtraction',
|
||||
system: expect.stringContaining('You are helping KTX review possible SQL relationships'),
|
||||
prompt: expect.stringContaining('"tables"'),
|
||||
}),
|
||||
);
|
||||
const call = (
|
||||
generateText.mock.calls as unknown as Array<[{ messages: Array<{ role: string; content: string }> }]>
|
||||
)[0]?.[0];
|
||||
const userMessage = call?.messages.find((m) => m.role === 'user');
|
||||
expect(userMessage?.content).not.toContain('You are helping KTX review possible SQL relationships');
|
||||
expect(call?.messages.some((m) => m.role === 'system')).toBe(false);
|
||||
const call = vi.mocked(runtime.generateObject).mock.calls[0]?.[0];
|
||||
expect(call?.prompt).not.toContain('You are helping KTX review possible SQL relationships');
|
||||
});
|
||||
|
||||
it('skips deterministic providers without calling generateText', async () => {
|
||||
const generateText = vi.fn();
|
||||
|
||||
it('skips when no runtime is configured', async () => {
|
||||
const result = await proposeKtxRelationshipCandidatesWithLlm({
|
||||
connectionId: 'warehouse',
|
||||
schema: schema(),
|
||||
profile: profile(),
|
||||
llmProvider: llmProvider('deterministic'),
|
||||
generateText,
|
||||
llmRuntime: null,
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({ candidates: [], llmCalls: 0, summary: 'skipped' });
|
||||
expect(result.warnings).toEqual([]);
|
||||
expect(generateText).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('returns recoverable warnings for invalid references and generation failures', async () => {
|
||||
|
|
@ -207,22 +171,19 @@ describe('relationship LLM proposals', () => {
|
|||
connectionId: 'warehouse',
|
||||
schema: schema(),
|
||||
profile: profile(),
|
||||
llmProvider: llmProvider(),
|
||||
generateText: vi.fn(async () => ({
|
||||
output: {
|
||||
pkCandidates: [],
|
||||
fkCandidates: [
|
||||
{
|
||||
fromTable: 'orders',
|
||||
fromColumn: 'missing_column',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
confidence: 0.7,
|
||||
rationale: 'Invalid source column.',
|
||||
},
|
||||
],
|
||||
},
|
||||
})),
|
||||
llmRuntime: llmRuntime({
|
||||
pkCandidates: [],
|
||||
fkCandidates: [
|
||||
{
|
||||
fromTable: 'orders',
|
||||
fromColumn: 'missing_column',
|
||||
toTable: 'customers',
|
||||
toColumn: 'id',
|
||||
confidence: 0.7,
|
||||
rationale: 'Invalid source column.',
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
expect(invalidReference.candidates).toEqual([]);
|
||||
expect(invalidReference.summary).toBe('completed');
|
||||
|
|
@ -235,10 +196,13 @@ describe('relationship LLM proposals', () => {
|
|||
connectionId: 'warehouse',
|
||||
schema: schema(),
|
||||
profile: profile(),
|
||||
llmProvider: llmProvider(),
|
||||
generateText: vi.fn(async () => {
|
||||
throw new Error('model unavailable');
|
||||
}),
|
||||
llmRuntime: {
|
||||
generateText: vi.fn(),
|
||||
generateObject: vi.fn(async () => {
|
||||
throw new Error('model unavailable');
|
||||
}),
|
||||
runAgentLoop: vi.fn(),
|
||||
},
|
||||
});
|
||||
expect(failed).toMatchObject({ candidates: [], llmCalls: 1, summary: 'failed' });
|
||||
expect(failed.warnings[0]).toMatchObject({
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
import type { KtxLlmProvider } from '@ktx/llm';
|
||||
import type { generateText } from 'ai';
|
||||
import { z } from 'zod';
|
||||
import { generateKtxObject } from '../llm/index.js';
|
||||
import { generateKtxObject, type KtxLlmRuntimePort } from '../llm/index.js';
|
||||
import type { KtxEnrichedColumn, KtxEnrichedSchema, KtxEnrichedTable } from './enrichment-types.js';
|
||||
import {
|
||||
normalizeKtxRelationshipName,
|
||||
|
|
@ -32,10 +30,6 @@ const relationshipLlmProposalSchema = z.object({
|
|||
});
|
||||
|
||||
type KtxRelationshipLlmProposalOutput = z.infer<typeof relationshipLlmProposalSchema>;
|
||||
type GenerateTextInput = Parameters<typeof generateText>[0];
|
||||
export type KtxRelationshipLlmProposalGenerateText = (
|
||||
input: GenerateTextInput,
|
||||
) => Promise<{ text?: string; output?: unknown }>;
|
||||
|
||||
export interface KtxRelationshipLlmProposalSettings {
|
||||
maxTablesPerBatch: number;
|
||||
|
|
@ -48,9 +42,8 @@ export interface ProposeKtxRelationshipCandidatesWithLlmInput {
|
|||
connectionId: string;
|
||||
schema: KtxEnrichedSchema;
|
||||
profile: KtxRelationshipProfileArtifact;
|
||||
llmProvider: KtxLlmProvider | null;
|
||||
llmRuntime: KtxLlmRuntimePort | null;
|
||||
settings?: Partial<KtxRelationshipLlmProposalSettings>;
|
||||
generateText?: KtxRelationshipLlmProposalGenerateText;
|
||||
}
|
||||
|
||||
export interface KtxRelationshipLlmProposalResult {
|
||||
|
|
@ -77,11 +70,6 @@ function clampConfidence(value: number): number {
|
|||
return Number(Math.max(0, Math.min(1, value)).toFixed(3));
|
||||
}
|
||||
|
||||
function modelIsDeterministic(llmProvider: KtxLlmProvider): boolean {
|
||||
const model = llmProvider.getModel('candidateExtraction');
|
||||
return (model as { provider?: string }).provider === 'deterministic';
|
||||
}
|
||||
|
||||
function findTable(schema: KtxEnrichedSchema, name: string): KtxEnrichedTable | null {
|
||||
const normalized = name.toLowerCase();
|
||||
return schema.tables.find((table) => table.ref.name.toLowerCase() === normalized) ?? null;
|
||||
|
|
@ -238,7 +226,7 @@ function generationFailureWarning(error: unknown): KtxScanWarning {
|
|||
export async function proposeKtxRelationshipCandidatesWithLlm(
|
||||
input: ProposeKtxRelationshipCandidatesWithLlmInput,
|
||||
): Promise<KtxRelationshipLlmProposalResult> {
|
||||
if (!input.llmProvider || modelIsDeterministic(input.llmProvider)) {
|
||||
if (!input.llmRuntime) {
|
||||
return { candidates: [], warnings: [], llmCalls: 0, summary: 'skipped' };
|
||||
}
|
||||
|
||||
|
|
@ -256,12 +244,11 @@ export async function proposeKtxRelationshipCandidatesWithLlm(
|
|||
KtxRelationshipLlmProposalOutput,
|
||||
typeof relationshipLlmProposalSchema
|
||||
>({
|
||||
llmProvider: input.llmProvider,
|
||||
runtime: input.llmRuntime,
|
||||
role: 'candidateExtraction',
|
||||
system,
|
||||
prompt,
|
||||
schema: relationshipLlmProposalSchema,
|
||||
generateText: input.generateText,
|
||||
});
|
||||
const output = relationshipLlmProposalSchema.parse(generated);
|
||||
const mapped = mapValidProposals(input.schema, output, settings);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
import { tool } from 'ai';
|
||||
import { z, type ZodType } from 'zod';
|
||||
import { noopLogger, type KtxLogger } from '../core/index.js';
|
||||
import type { KtxRuntimeToolDescriptor } from '../llm/runtime-port.js';
|
||||
import { normalizeKtxRuntimeToolOutput } from '../llm/runtime-tools.js';
|
||||
import type { IngestToolMetadata, ToolSession } from './tool-session.js';
|
||||
|
||||
export interface ToolOutput<T = unknown> {
|
||||
|
|
@ -164,6 +166,23 @@ export abstract class BaseTool<TInput extends ZodType = ZodType> {
|
|||
});
|
||||
}
|
||||
|
||||
toRuntimeTool(context: ToolContext): KtxRuntimeToolDescriptor {
|
||||
const toolName = this.name;
|
||||
return {
|
||||
name: toolName,
|
||||
description: this.description,
|
||||
inputSchema: this.inputSchema as unknown as KtxRuntimeToolDescriptor['inputSchema'],
|
||||
execute: async (params) => {
|
||||
const callContext = { ...context };
|
||||
if (!callContext.userId) {
|
||||
throw new Error('Authentication required: userId must be provided in ToolContext');
|
||||
}
|
||||
const parsedInput = this.parseInput(params as Record<string, any>);
|
||||
return normalizeKtxRuntimeToolOutput(await this.call(parsedInput, callContext));
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
parseInput(input: Record<string, any>): z.infer<TInput> {
|
||||
return this.inputSchema.parse(input);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,4 +61,17 @@ describe('KTX LLM health check', () => {
|
|||
message: '401 invalid x-api-key [redacted]',
|
||||
});
|
||||
});
|
||||
|
||||
it('reports claude-code as unsupported by the AI SDK health check', async () => {
|
||||
const result = await runKtxLlmHealthCheck({
|
||||
backend: 'claude-code',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
promptCaching: { enabled: false },
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
ok: false,
|
||||
message: expect.stringContaining('claude-code is not an AI SDK LanguageModel backend'),
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -302,4 +302,14 @@ describe('createKtxLlmProvider', () => {
|
|||
expect(provider.promptCachingConfig().enabled).toBe(false);
|
||||
expect(provider.cacheMarker('1h', 'claude-sonnet-4-6')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('throws instead of falling through when an unsupported LLM backend is passed to the AI SDK provider factory', () => {
|
||||
expect(() =>
|
||||
createKtxLlmProvider({
|
||||
backend: 'claude-code',
|
||||
modelSlots: { default: 'sonnet' },
|
||||
promptCaching: { enabled: false },
|
||||
}),
|
||||
).toThrow('claude-code is not an AI SDK LanguageModel backend');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -175,14 +175,18 @@ class DefaultKtxLlmProvider implements KtxLlmProvider {
|
|||
return (modelId) => vertex(modelId);
|
||||
}
|
||||
|
||||
const gateway = (deps.createGateway ?? createGateway)({
|
||||
...(config.gateway?.apiKey ? { apiKey: config.gateway.apiKey } : {}),
|
||||
...(config.gateway?.baseURL ? { baseURL: config.gateway.baseURL } : {}),
|
||||
headers: {
|
||||
'anthropic-beta': ANTHROPIC_BETA_HEADER,
|
||||
},
|
||||
});
|
||||
return (modelId) => gateway(modelId);
|
||||
if (config.backend === 'gateway') {
|
||||
const gateway = (deps.createGateway ?? createGateway)({
|
||||
...(config.gateway?.apiKey ? { apiKey: config.gateway.apiKey } : {}),
|
||||
...(config.gateway?.baseURL ? { baseURL: config.gateway.baseURL } : {}),
|
||||
headers: {
|
||||
'anthropic-beta': ANTHROPIC_BETA_HEADER,
|
||||
},
|
||||
});
|
||||
return (modelId) => gateway(modelId);
|
||||
}
|
||||
|
||||
throw new Error(`${config.backend} is not an AI SDK LanguageModel backend; use KtxLlmRuntimePort`);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import type { LanguageModel, TelemetrySettings, ToolCallRepairFunction, ToolSet
|
|||
export const KTX_MODEL_ROLES = ['default', 'triage', 'candidateExtraction', 'curator', 'reconcile', 'repair'] as const;
|
||||
|
||||
export type KtxModelRole = (typeof KTX_MODEL_ROLES)[number];
|
||||
export type KtxLlmBackend = 'anthropic' | 'vertex' | 'gateway';
|
||||
export type KtxLlmBackend = 'anthropic' | 'vertex' | 'gateway' | 'claude-code';
|
||||
export type KtxPromptCacheTtl = '5m' | '1h';
|
||||
|
||||
export type KtxJsonValue =
|
||||
|
|
|
|||
2245
pnpm-lock.yaml
generated
2245
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load diff
|
|
@ -1,21 +1,26 @@
|
|||
{
|
||||
"schemaVersion": 1,
|
||||
"publicNpmPackageVersion": "0.1.0-rc.1",
|
||||
"releaseMode": "npm-public-release-ready",
|
||||
"npm": {
|
||||
"publish": true,
|
||||
"registry": null,
|
||||
"access": "public",
|
||||
"tag": "next",
|
||||
"packages": ["@kaelio/ktx"]
|
||||
"packages": [
|
||||
"@kaelio/ktx"
|
||||
]
|
||||
},
|
||||
"python": {
|
||||
"publish": false,
|
||||
"repository": null,
|
||||
"packages": ["kaelio-ktx"]
|
||||
"packages": [
|
||||
"kaelio-ktx"
|
||||
]
|
||||
},
|
||||
"publishedPackageSmoke": {
|
||||
"packageName": "@kaelio/ktx",
|
||||
"version": "0.1.0-rc.0",
|
||||
"version": "0.1.0-rc.1",
|
||||
"registry": null
|
||||
},
|
||||
"runtimeInstaller": {
|
||||
|
|
|
|||
|
|
@ -6,10 +6,15 @@ import { dirname, join, resolve } from 'node:path';
|
|||
import { fileURLToPath, pathToFileURL } from 'node:url';
|
||||
import { promisify } from 'node:util';
|
||||
|
||||
import {
|
||||
PUBLIC_NPM_PACKAGE_NAME,
|
||||
publicNpmPackageVersion,
|
||||
} from './public-npm-release-metadata.mjs';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
export const PUBLIC_NPM_PACKAGE_NAME = '@kaelio/ktx';
|
||||
export const PUBLIC_NPM_PACKAGE_VERSION = '0.1.0-rc.0';
|
||||
export const PUBLIC_NPM_PACKAGE_VERSION = publicNpmPackageVersion();
|
||||
export { PUBLIC_NPM_PACKAGE_NAME };
|
||||
|
||||
export function publicNpmPackageTarballName(version = PUBLIC_NPM_PACKAGE_VERSION) {
|
||||
return `kaelio-ktx-${version}.tgz`;
|
||||
|
|
|
|||
|
|
@ -142,9 +142,9 @@ describe('publicNpmPackageLayout', () => {
|
|||
it('uses the first public npm release version for the tarball name', () => {
|
||||
const layout = publicNpmPackageLayout('/repo/ktx');
|
||||
|
||||
assert.equal(PUBLIC_NPM_PACKAGE_VERSION, '0.1.0-rc.0');
|
||||
assert.equal(publicNpmPackageTarballName(), 'kaelio-ktx-0.1.0-rc.0.tgz');
|
||||
assert.equal(layout.tarballPath, '/repo/ktx/dist/artifacts/npm/kaelio-ktx-0.1.0-rc.0.tgz');
|
||||
assert.equal(PUBLIC_NPM_PACKAGE_VERSION, '0.1.0-rc.1');
|
||||
assert.equal(publicNpmPackageTarballName(), 'kaelio-ktx-0.1.0-rc.1.tgz');
|
||||
assert.equal(layout.tarballPath, '/repo/ktx/dist/artifacts/npm/kaelio-ktx-0.1.0-rc.1.tgz');
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -211,7 +211,7 @@ describe('publicNpmPackageJson', () => {
|
|||
);
|
||||
|
||||
assert.equal(packageJson.name, PUBLIC_NPM_PACKAGE_NAME);
|
||||
assert.equal(packageJson.version, '0.1.0-rc.0');
|
||||
assert.equal(packageJson.version, '0.1.0-rc.1');
|
||||
assert.equal(packageJson.private, false);
|
||||
assert.deepEqual(packageJson.bin, { ktx: './dist/bin.js' });
|
||||
assert.deepEqual(packageJson.dependencies, { commander: '14.0.3' });
|
||||
|
|
@ -267,7 +267,7 @@ describe('publicNpmPackCommand', () => {
|
|||
'--config.node-linker=hoisted',
|
||||
'pack',
|
||||
'--out',
|
||||
'/repo/ktx/dist/artifacts/npm/kaelio-ktx-0.1.0-rc.0.tgz',
|
||||
'/repo/ktx/dist/artifacts/npm/kaelio-ktx-0.1.0-rc.1.tgz',
|
||||
],
|
||||
cwd: '/repo/ktx/dist/public-npm-package',
|
||||
});
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ const runtimeAssetPatterns = [/^packages\/[^/]+\/prompts\/.+\.md$/, /^packages\/
|
|||
const identifierSkipPrefixes = ['docs/', 'docs-site/', 'examples/', 'python/ktx-sl/plans/', 'python/ktx-sl/openspec/'];
|
||||
const identifierAllowPatterns = [
|
||||
/^packages\/cli\/src\/(?:index|managed-local-embeddings|managed-python-command|managed-python-daemon|managed-python-runtime|runtime)(?:\.test)?\.ts$/,
|
||||
/^scripts\/(?:build-public-npm-package|build-python-runtime-wheel|local-embeddings-runtime-smoke|package-artifacts|publish-public-npm-package|published-package-smoke|release-readiness)(?:\.test)?\.mjs$/,
|
||||
/^scripts\/(?:build-public-npm-package|build-python-runtime-wheel|local-embeddings-runtime-smoke|package-artifacts|public-npm-release-metadata|publish-public-npm-package|published-package-smoke|release-readiness)(?:\.test)?\.mjs$/,
|
||||
];
|
||||
const forbiddenIdentifierTerms = ['kae' + 'lio', 'Kae' + 'lio', 'KAE' + 'LIO_'];
|
||||
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@ describe('scanFileContent', () => {
|
|||
|
||||
assert.equal(scanFileContent('scripts/local-embeddings-runtime-smoke.mjs', `@${name}/ktx`).length, 0);
|
||||
assert.equal(scanFileContent('scripts/package-artifacts.test.mjs', `${name}-ktx`).length, 0);
|
||||
assert.equal(scanFileContent('scripts/public-npm-release-metadata.mjs', `@${name}/ktx`).length, 0);
|
||||
assert.equal(scanFileContent('scripts/publish-public-npm-package.test.mjs', `@${name}/ktx`).length, 0);
|
||||
assert.equal(scanFileContent('packages/cli/src/managed-python-runtime.ts', `${name}_ktx`).length, 0);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -36,8 +36,8 @@ describe('localEmbeddingsSmokeOptIn', () => {
|
|||
describe('publicKtxTarballName', () => {
|
||||
it('selects the public @kaelio/ktx tarball name', () => {
|
||||
assert.equal(
|
||||
publicKtxTarballName(['kaelio-ktx-0.1.0-rc.0.tgz', 'ignore-me.tgz']),
|
||||
'kaelio-ktx-0.1.0-rc.0.tgz',
|
||||
publicKtxTarballName(['kaelio-ktx-0.1.0-rc.1.tgz', 'ignore-me.tgz']),
|
||||
'kaelio-ktx-0.1.0-rc.1.tgz',
|
||||
);
|
||||
});
|
||||
|
||||
|
|
@ -50,7 +50,7 @@ describe('publicKtxTarballName', () => {
|
|||
|
||||
it('fails when multiple public package tarballs are present', () => {
|
||||
assert.throws(
|
||||
() => publicKtxTarballName(['kaelio-ktx-0.1.0-rc.0.tgz', 'kaelio-ktx-0.2.0.tgz']),
|
||||
() => publicKtxTarballName(['kaelio-ktx-0.1.0-rc.1.tgz', 'kaelio-ktx-0.2.0.tgz']),
|
||||
/Expected exactly one @kaelio\/ktx tarball/,
|
||||
);
|
||||
});
|
||||
|
|
@ -60,7 +60,7 @@ describe('expectedPublicKtxVersionPattern', () => {
|
|||
it('matches the public package version and rejects the private workspace version', () => {
|
||||
const pattern = expectedPublicKtxVersionPattern();
|
||||
|
||||
assert.match('@kaelio/ktx 0.1.0-rc.0\n', pattern);
|
||||
assert.match('@kaelio/ktx 0.1.0-rc.1\n', pattern);
|
||||
assert.doesNotMatch('@kaelio/ktx 0.0.0-private\n', pattern);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -14,9 +14,9 @@ import {
|
|||
} from './build-python-runtime-wheel.mjs';
|
||||
import {
|
||||
PUBLIC_NPM_PACKAGE_NAME,
|
||||
PUBLIC_NPM_PACKAGE_VERSION,
|
||||
publicNpmPackageTarballName,
|
||||
} from './build-public-npm-package.mjs';
|
||||
import { publicNpmPackageVersion } from './public-npm-release-metadata.mjs';
|
||||
|
||||
export {
|
||||
RUNTIME_WHEEL_DISTRIBUTION_NAME,
|
||||
|
|
@ -45,24 +45,27 @@ function scriptRootDir() {
|
|||
return resolve(dirname(fileURLToPath(import.meta.url)), '..');
|
||||
}
|
||||
|
||||
function npmPackageTarballName(packageName) {
|
||||
function npmPackageTarballName(packageName, version) {
|
||||
if (packageName !== PUBLIC_NPM_PACKAGE_NAME) {
|
||||
throw new Error(`Unsupported npm artifact package: ${packageName}`);
|
||||
}
|
||||
return publicNpmPackageTarballName(PUBLIC_NPM_PACKAGE_VERSION);
|
||||
return publicNpmPackageTarballName(version);
|
||||
}
|
||||
|
||||
function npmPackageTarballs(npmDir) {
|
||||
function npmPackageTarballs(npmDir, version) {
|
||||
return Object.fromEntries(
|
||||
NPM_ARTIFACT_PACKAGES.map((packageInfo) => [packageInfo.name, join(npmDir, npmPackageTarballName(packageInfo.name))]),
|
||||
NPM_ARTIFACT_PACKAGES.map((packageInfo) => [
|
||||
packageInfo.name,
|
||||
join(npmDir, npmPackageTarballName(packageInfo.name, version)),
|
||||
]),
|
||||
);
|
||||
}
|
||||
|
||||
export function packageArtifactLayout(rootDir = scriptRootDir()) {
|
||||
export function packageArtifactLayout(rootDir = scriptRootDir(), version = publicNpmPackageVersion(rootDir)) {
|
||||
const artifactDir = join(rootDir, 'dist', 'artifacts');
|
||||
const npmDir = join(artifactDir, 'npm');
|
||||
const pythonDir = join(artifactDir, 'python');
|
||||
const npmTarballs = npmPackageTarballs(npmDir);
|
||||
const npmTarballs = npmPackageTarballs(npmDir, version);
|
||||
|
||||
return {
|
||||
rootDir,
|
||||
|
|
@ -170,7 +173,7 @@ function releaseMetadataEntry({ ecosystem, packageName, packageRoot, packageVers
|
|||
};
|
||||
}
|
||||
|
||||
async function readNpmPackageMetadata(rootDir, packageInfo) {
|
||||
async function readNpmPackageMetadata(rootDir, packageInfo, version) {
|
||||
const packageJson = await readJson(join(rootDir, packageInfo.packageRoot, 'package.json'));
|
||||
const expectedSourceName = packageInfo.name === PUBLIC_NPM_PACKAGE_NAME ? '@ktx/cli' : packageInfo.name;
|
||||
if (packageJson.name !== expectedSourceName) {
|
||||
|
|
@ -183,14 +186,14 @@ async function readNpmPackageMetadata(rootDir, packageInfo) {
|
|||
ecosystem: 'npm',
|
||||
packageName: packageInfo.name,
|
||||
packageRoot: packageInfo.packageRoot,
|
||||
packageVersion: isPublicKtxPackage ? PUBLIC_NPM_PACKAGE_VERSION : packageJson.version,
|
||||
packageVersion: isPublicKtxPackage ? version : packageJson.version,
|
||||
privatePackage: isPublicKtxPackage ? false : packageJson.private === true,
|
||||
});
|
||||
}
|
||||
|
||||
export async function packageReleaseMetadata(rootDir = scriptRootDir()) {
|
||||
export async function packageReleaseMetadata(rootDir = scriptRootDir(), version = publicNpmPackageVersion(rootDir)) {
|
||||
const npmPackages = await Promise.all(
|
||||
NPM_ARTIFACT_PACKAGES.map((packageInfo) => readNpmPackageMetadata(rootDir, packageInfo)),
|
||||
NPM_ARTIFACT_PACKAGES.map((packageInfo) => readNpmPackageMetadata(rootDir, packageInfo, version)),
|
||||
);
|
||||
|
||||
return [
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import { tmpdir } from 'node:os';
|
|||
import { join } from 'node:path';
|
||||
import { describe, it } from 'node:test';
|
||||
|
||||
import { PUBLIC_NPM_PACKAGE_VERSION } from './build-public-npm-package.mjs';
|
||||
import {
|
||||
CLI_PYTHON_ASSET_MANIFEST,
|
||||
INTERNAL_NPM_WORKSPACE_PACKAGES,
|
||||
|
|
@ -32,6 +33,35 @@ async function writeJson(path, value) {
|
|||
}
|
||||
|
||||
async function writeReleaseMetadataInputs(root) {
|
||||
await writeJson(join(root, 'release-policy.json'), {
|
||||
schemaVersion: 1,
|
||||
publicNpmPackageVersion: PUBLIC_NPM_PACKAGE_VERSION,
|
||||
releaseMode: 'ci-artifact-only',
|
||||
npm: {
|
||||
publish: false,
|
||||
registry: null,
|
||||
access: 'public',
|
||||
tag: 'next',
|
||||
packages: ['@kaelio/ktx'],
|
||||
},
|
||||
python: {
|
||||
publish: false,
|
||||
repository: null,
|
||||
packages: ['kaelio-ktx'],
|
||||
},
|
||||
publishedPackageSmoke: {
|
||||
packageName: '@kaelio/ktx',
|
||||
version: PUBLIC_NPM_PACKAGE_VERSION,
|
||||
registry: null,
|
||||
},
|
||||
runtimeInstaller: {
|
||||
uvStrategy: 'path-prerequisite',
|
||||
bootstrapUv: false,
|
||||
missingUvBehavior: 'focused-error',
|
||||
},
|
||||
requiredBeforePublishing: ['Choose public release version.'],
|
||||
});
|
||||
|
||||
for (const packageInfo of INTERNAL_NPM_WORKSPACE_PACKAGES) {
|
||||
await mkdir(join(root, packageInfo.packageRoot), { recursive: true });
|
||||
await writeJson(join(root, packageInfo.packageRoot, 'package.json'), {
|
||||
|
|
@ -64,19 +94,19 @@ async function writeUploadableArtifactFixtures(layout) {
|
|||
|
||||
describe('packageArtifactLayout', () => {
|
||||
it('uses stable artifact paths under ktx/dist/artifacts', () => {
|
||||
const layout = packageArtifactLayout('/repo/ktx');
|
||||
const layout = packageArtifactLayout('/repo/ktx', PUBLIC_NPM_PACKAGE_VERSION);
|
||||
|
||||
assert.equal(layout.artifactDir, '/repo/ktx/dist/artifacts');
|
||||
assert.equal(layout.npmDir, '/repo/ktx/dist/artifacts/npm');
|
||||
assert.equal(layout.pythonDir, '/repo/ktx/dist/artifacts/python');
|
||||
assert.equal(layout.cliTarball, '/repo/ktx/dist/artifacts/npm/kaelio-ktx-0.1.0-rc.0.tgz');
|
||||
assert.equal(layout.cliTarball, '/repo/ktx/dist/artifacts/npm/kaelio-ktx-0.1.0-rc.1.tgz');
|
||||
assert.deepEqual(Object.keys(layout.npmTarballs), ['@kaelio/ktx']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildArtifactCommands', () => {
|
||||
it('builds TypeScript packages in parallel topology, then the runtime wheel, then packs npm artifacts', () => {
|
||||
const layout = packageArtifactLayout('/repo/ktx');
|
||||
const layout = packageArtifactLayout('/repo/ktx', PUBLIC_NPM_PACKAGE_VERSION);
|
||||
const commands = buildArtifactCommands(layout);
|
||||
|
||||
assert.deepEqual(
|
||||
|
|
@ -101,7 +131,7 @@ describe('packageReleaseMetadata', () => {
|
|||
ecosystem: 'npm',
|
||||
packageName: '@kaelio/ktx',
|
||||
packageRoot: 'packages/cli',
|
||||
packageVersion: '0.1.0-rc.0',
|
||||
packageVersion: '0.1.0-rc.1',
|
||||
private: false,
|
||||
releaseMode: 'ci-artifact-only',
|
||||
},
|
||||
|
|
@ -147,7 +177,7 @@ describe('findPythonArtifacts', () => {
|
|||
describe('artifact manifest', () => {
|
||||
it('writes release metadata, source revision, checksums, and byte counts for every uploadable artifact', async () => {
|
||||
const root = await mkdtemp(join(tmpdir(), 'ktx-artifacts-manifest-test-'));
|
||||
const layout = packageArtifactLayout(root);
|
||||
const layout = packageArtifactLayout(root, PUBLIC_NPM_PACKAGE_VERSION);
|
||||
try {
|
||||
await writeReleaseMetadataInputs(root);
|
||||
await writeUploadableArtifactFixtures(layout);
|
||||
|
|
@ -167,7 +197,7 @@ describe('artifact manifest', () => {
|
|||
ecosystem: 'npm',
|
||||
packageName: '@kaelio/ktx',
|
||||
packageRoot: 'packages/cli',
|
||||
packageVersion: '0.1.0-rc.0',
|
||||
packageVersion: '0.1.0-rc.1',
|
||||
private: false,
|
||||
releaseMode: 'ci-artifact-only',
|
||||
},
|
||||
|
|
@ -202,8 +232,8 @@ describe('artifact manifest', () => {
|
|||
artifactKind: 'tarball',
|
||||
ecosystem: 'npm',
|
||||
packageName: '@kaelio/ktx',
|
||||
packageVersion: '0.1.0-rc.0',
|
||||
path: 'npm/kaelio-ktx-0.1.0-rc.0.tgz',
|
||||
packageVersion: '0.1.0-rc.1',
|
||||
path: 'npm/kaelio-ktx-0.1.0-rc.1.tgz',
|
||||
},
|
||||
],
|
||||
);
|
||||
|
|
@ -228,7 +258,7 @@ describe('artifact manifest', () => {
|
|||
],
|
||||
);
|
||||
|
||||
const npmEntry = manifest.files.find((file) => file.path === 'npm/kaelio-ktx-0.1.0-rc.0.tgz');
|
||||
const npmEntry = manifest.files.find((file) => file.path === 'npm/kaelio-ktx-0.1.0-rc.1.tgz');
|
||||
assert.ok(npmEntry);
|
||||
assert.equal(npmEntry.bytes, Buffer.byteLength('@kaelio/ktx-tarball'));
|
||||
assert.equal(npmEntry.sha256, createHash('sha256').update('@kaelio/ktx-tarball').digest('hex'));
|
||||
|
|
@ -244,7 +274,7 @@ describe('artifact manifest', () => {
|
|||
describe('verifyArtifactManifest', () => {
|
||||
it('accepts a schema version 2 manifest that matches the artifact directory', async () => {
|
||||
const root = await mkdtemp(join(tmpdir(), 'ktx-artifacts-verify-manifest-test-'));
|
||||
const layout = packageArtifactLayout(root);
|
||||
const layout = packageArtifactLayout(root, PUBLIC_NPM_PACKAGE_VERSION);
|
||||
try {
|
||||
await writeReleaseMetadataInputs(root);
|
||||
await writeUploadableArtifactFixtures(layout);
|
||||
|
|
@ -266,7 +296,7 @@ describe('verifyArtifactManifest', () => {
|
|||
|
||||
it('rejects a manifest when a file checksum has drifted', async () => {
|
||||
const root = await mkdtemp(join(tmpdir(), 'ktx-artifacts-checksum-drift-test-'));
|
||||
const layout = packageArtifactLayout(root);
|
||||
const layout = packageArtifactLayout(root, PUBLIC_NPM_PACKAGE_VERSION);
|
||||
try {
|
||||
await writeReleaseMetadataInputs(root);
|
||||
await writeUploadableArtifactFixtures(layout);
|
||||
|
|
@ -286,7 +316,7 @@ describe('verifyArtifactManifest', () => {
|
|||
|
||||
it('rejects a manifest with an unsafe artifact path', async () => {
|
||||
const root = await mkdtemp(join(tmpdir(), 'ktx-artifacts-path-test-'));
|
||||
const layout = packageArtifactLayout(root);
|
||||
const layout = packageArtifactLayout(root, PUBLIC_NPM_PACKAGE_VERSION);
|
||||
try {
|
||||
await writeReleaseMetadataInputs(root);
|
||||
await writeUploadableArtifactFixtures(layout);
|
||||
|
|
@ -304,7 +334,7 @@ describe('verifyArtifactManifest', () => {
|
|||
|
||||
it('rejects a manifest from the wrong source revision when one is required', async () => {
|
||||
const root = await mkdtemp(join(tmpdir(), 'ktx-artifacts-revision-test-'));
|
||||
const layout = packageArtifactLayout(root);
|
||||
const layout = packageArtifactLayout(root, PUBLIC_NPM_PACKAGE_VERSION);
|
||||
try {
|
||||
await writeReleaseMetadataInputs(root);
|
||||
await writeUploadableArtifactFixtures(layout);
|
||||
|
|
@ -328,7 +358,7 @@ describe('verifyArtifactManifest', () => {
|
|||
describe('copyRuntimeWheelAssets', () => {
|
||||
it('copies the runtime wheel and checksum manifest into CLI assets', async () => {
|
||||
const root = await mkdtemp(join(tmpdir(), 'ktx-runtime-assets-test-'));
|
||||
const layout = packageArtifactLayout(root);
|
||||
const layout = packageArtifactLayout(root, PUBLIC_NPM_PACKAGE_VERSION);
|
||||
try {
|
||||
await mkdir(layout.pythonDir, { recursive: true });
|
||||
await writeFile(
|
||||
|
|
@ -399,7 +429,7 @@ describe('standalone Python artifact cleanup', () => {
|
|||
|
||||
describe('verification snippets', () => {
|
||||
it('pins the smoke project to the public package artifact', () => {
|
||||
const layout = packageArtifactLayout('/repo/ktx');
|
||||
const layout = packageArtifactLayout('/repo/ktx', PUBLIC_NPM_PACKAGE_VERSION);
|
||||
|
||||
const packageJson = npmSmokePackageJson(layout);
|
||||
assert.deepEqual(packageJson.dependencies, {
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue