mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
Compare commits
15 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
440ee1e1ef | ||
|
|
ecf864df25 | ||
|
|
2e38f7fa09 | ||
|
|
7906e5d455 | ||
|
|
374966c06e | ||
|
|
dbe6632d5f | ||
|
|
fb794ae7fe | ||
|
|
1d869641ff | ||
|
|
b5ebb1beea | ||
|
|
f3d6ea41ad | ||
|
|
554a3d1f6a | ||
|
|
241a181d3a | ||
|
|
5a4487fc6e | ||
|
|
b71a555f19 | ||
|
|
938f9c4bdf |
45 changed files with 1110 additions and 971 deletions
54
.github/workflows/ci.yml
vendored
54
.github/workflows/ci.yml
vendored
|
|
@ -107,6 +107,56 @@ jobs:
|
||||||
if: always()
|
if: always()
|
||||||
run: planoai down || true
|
run: planoai down || true
|
||||||
|
|
||||||
|
# ── Zero-config path: `planoai up` with no args, no plano.yaml in cwd.
|
||||||
|
# Exercises the synthesize_default_config branch in cli/planoai/main.py
|
||||||
|
# which is otherwise never hit by the smoke test above.
|
||||||
|
#
|
||||||
|
# Pre-seed ~/.plano/ from the freshly-built artifacts so the CLI's
|
||||||
|
# cached-download path hits in step (2) of ensure_wasm_plugins /
|
||||||
|
# ensure_brightstaff_binary. Without this, running from outside the
|
||||||
|
# repo means find_repo_root() returns None, the local-build short-
|
||||||
|
# circuit is skipped, and the CLI tries to download from a GitHub
|
||||||
|
# release that does not yet exist for the in-flight version on
|
||||||
|
# release-bump PRs (e.g. 0.4.25 before publish-binaries has run).
|
||||||
|
- name: Seed ~/.plano cache for zero-config test
|
||||||
|
run: |
|
||||||
|
VERSION=$(sed -nE 's/^__version__ = "(.*)"$/\1/p' cli/planoai/__init__.py)
|
||||||
|
mkdir -p ~/.plano/plugins ~/.plano/bin
|
||||||
|
cp crates/target/wasm32-wasip1/release/prompt_gateway.wasm ~/.plano/plugins/
|
||||||
|
cp crates/target/wasm32-wasip1/release/llm_gateway.wasm ~/.plano/plugins/
|
||||||
|
cp crates/target/release/brightstaff ~/.plano/bin/
|
||||||
|
chmod +x ~/.plano/bin/brightstaff
|
||||||
|
echo "$VERSION" > ~/.plano/plugins/wasm.version
|
||||||
|
echo "$VERSION" > ~/.plano/bin/brightstaff.version
|
||||||
|
|
||||||
|
- name: Zero-config smoke test
|
||||||
|
env:
|
||||||
|
OPENAI_API_KEY: test-key-not-used
|
||||||
|
run: |
|
||||||
|
empty_dir="$(mktemp -d)"
|
||||||
|
cd "$empty_dir"
|
||||||
|
test ! -f plano.yaml
|
||||||
|
planoai up
|
||||||
|
test -f "$HOME/.plano/default_config.yaml"
|
||||||
|
|
||||||
|
- name: Zero-config health check
|
||||||
|
run: |
|
||||||
|
for i in $(seq 1 30); do
|
||||||
|
if curl -sf http://localhost:12000/healthz > /dev/null 2>&1; then
|
||||||
|
echo "Zero-config health check passed"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
echo "Zero-config health check failed after 30s"
|
||||||
|
cat ~/.plano/run/logs/envoy.log || true
|
||||||
|
cat ~/.plano/run/logs/brightstaff.log || true
|
||||||
|
exit 1
|
||||||
|
|
||||||
|
- name: Stop plano (zero-config)
|
||||||
|
if: always()
|
||||||
|
run: planoai down || true
|
||||||
|
|
||||||
# ──────────────────────────────────────────────
|
# ──────────────────────────────────────────────
|
||||||
# Single Docker build — shared by all downstream jobs
|
# Single Docker build — shared by all downstream jobs
|
||||||
# ──────────────────────────────────────────────
|
# ──────────────────────────────────────────────
|
||||||
|
|
@ -133,13 +183,13 @@ jobs:
|
||||||
load: true
|
load: true
|
||||||
tags: |
|
tags: |
|
||||||
${{ env.PLANO_DOCKER_IMAGE }}
|
${{ env.PLANO_DOCKER_IMAGE }}
|
||||||
${{ env.DOCKER_IMAGE }}:0.4.22
|
${{ env.DOCKER_IMAGE }}:0.4.25
|
||||||
${{ env.DOCKER_IMAGE }}:latest
|
${{ env.DOCKER_IMAGE }}:latest
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
cache-to: type=gha,mode=max
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
- name: Save image as artifact
|
- name: Save image as artifact
|
||||||
run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.22 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar
|
run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.25 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar
|
||||||
|
|
||||||
- name: Upload image artifact
|
- name: Upload image artifact
|
||||||
uses: actions/upload-artifact@v6
|
uses: actions/upload-artifact@v6
|
||||||
|
|
|
||||||
124
.github/workflows/update-providers.yml
vendored
Normal file
124
.github/workflows/update-providers.yml
vendored
Normal file
|
|
@ -0,0 +1,124 @@
|
||||||
|
name: Update provider_models.yaml
|
||||||
|
|
||||||
|
on:
|
||||||
|
repository_dispatch:
|
||||||
|
types: [update-providers]
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
pull-requests: write
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
update-providers:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
env:
|
||||||
|
RESPONSE_URL: ${{ github.event.client_payload.response_url }}
|
||||||
|
SLACK_USER_ID: ${{ github.event.client_payload.user_id }}
|
||||||
|
SLACK_USER_NAME: ${{ github.event.client_payload.user_name }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout main
|
||||||
|
uses: actions/checkout@v6
|
||||||
|
with:
|
||||||
|
ref: main
|
||||||
|
|
||||||
|
- name: Install Rust toolchain
|
||||||
|
uses: dtolnay/rust-toolchain@stable
|
||||||
|
|
||||||
|
- name: Configure AWS credentials
|
||||||
|
uses: aws-actions/configure-aws-credentials@v4
|
||||||
|
with:
|
||||||
|
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||||
|
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||||
|
aws-region: ${{ secrets.AWS_REGION }}
|
||||||
|
|
||||||
|
- name: Cache cargo build
|
||||||
|
uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/.cargo/registry
|
||||||
|
~/.cargo/git
|
||||||
|
crates/target
|
||||||
|
key: cargo-fetch-models-${{ hashFiles('crates/**/Cargo.lock', 'crates/**/Cargo.toml') }}
|
||||||
|
restore-keys: cargo-fetch-models-
|
||||||
|
|
||||||
|
- name: Run fetch_models
|
||||||
|
working-directory: crates/hermesllm
|
||||||
|
env:
|
||||||
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
|
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||||
|
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
|
||||||
|
DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
|
||||||
|
GROK_API_KEY: ${{ secrets.GROK_API_KEY }}
|
||||||
|
DASHSCOPE_API_KEY: ${{ secrets.DASHSCOPE_API_KEY }}
|
||||||
|
MOONSHOT_API_KEY: ${{ secrets.MOONSHOT_API_KEY }}
|
||||||
|
ZHIPU_API_KEY: ${{ secrets.ZHIPU_API_KEY }}
|
||||||
|
MIMO_API_KEY: ${{ secrets.MIMO_API_KEY }}
|
||||||
|
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||||
|
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||||
|
AI_GATEWAY_API_KEY: ${{ secrets.AI_GATEWAY_API_KEY }}
|
||||||
|
run: cargo run --bin fetch_models --features model-fetch
|
||||||
|
|
||||||
|
- name: Create pull request
|
||||||
|
id: cpr
|
||||||
|
uses: peter-evans/create-pull-request@v7
|
||||||
|
with:
|
||||||
|
branch: bot/update-providers-${{ github.run_id }}
|
||||||
|
base: main
|
||||||
|
commit-message: "chore: refresh provider_models.yaml"
|
||||||
|
title: "chore: refresh provider_models.yaml"
|
||||||
|
body: |
|
||||||
|
Automated refresh of `crates/hermesllm/src/bin/provider_models.yaml`
|
||||||
|
via `fetch_models`.
|
||||||
|
|
||||||
|
Requested by ${{ env.SLACK_USER_NAME && format('@{0}', env.SLACK_USER_NAME) || 'workflow_dispatch' }}${{ env.SLACK_USER_ID && format(' (Slack `{0}`)', env.SLACK_USER_ID) || '' }}.
|
||||||
|
|
||||||
|
Workflow run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||||
|
labels: automated, provider-models
|
||||||
|
add-paths: crates/hermesllm/src/bin/provider_models.yaml
|
||||||
|
|
||||||
|
- name: Notify Slack (success)
|
||||||
|
if: success() && env.RESPONSE_URL != ''
|
||||||
|
env:
|
||||||
|
PR_URL: ${{ steps.cpr.outputs.pull-request-url }}
|
||||||
|
PR_NUMBER: ${{ steps.cpr.outputs.pull-request-number }}
|
||||||
|
PR_OP: ${{ steps.cpr.outputs.pull-request-operation }}
|
||||||
|
run: |
|
||||||
|
if [ -z "$PR_URL" ]; then
|
||||||
|
TEXT=":information_source: No provider model changes detected \u2014 nothing to PR."
|
||||||
|
BLOCKS=$(jq -nc --arg text "$TEXT" '{response_type:"ephemeral", replace_original:true, text:$text, blocks:[{type:"section", text:{type:"mrkdwn", text:$text}}]}')
|
||||||
|
else
|
||||||
|
TEXT=":white_check_mark: provider_models.yaml PR ready: $PR_URL"
|
||||||
|
BLOCKS=$(jq -nc \
|
||||||
|
--arg pr "$PR_URL" \
|
||||||
|
--arg num "$PR_NUMBER" \
|
||||||
|
--arg op "$PR_OP" \
|
||||||
|
'{
|
||||||
|
response_type:"ephemeral",
|
||||||
|
replace_original:true,
|
||||||
|
text:(":white_check_mark: provider_models.yaml PR #" + $num + " " + $op + ": " + $pr),
|
||||||
|
blocks:[
|
||||||
|
{type:"section", text:{type:"mrkdwn", text:(":white_check_mark: *provider_models.yaml* PR <" + $pr + "|#" + $num + "> " + $op + ".")}},
|
||||||
|
{type:"actions", elements:[{type:"button", text:{type:"plain_text", text:"Open PR"}, url:$pr}]}
|
||||||
|
]
|
||||||
|
}')
|
||||||
|
fi
|
||||||
|
curl -sS -X POST -H 'Content-Type: application/json' -d "$BLOCKS" "$RESPONSE_URL"
|
||||||
|
|
||||||
|
- name: Notify Slack (failure)
|
||||||
|
if: failure() && env.RESPONSE_URL != ''
|
||||||
|
run: |
|
||||||
|
RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||||
|
TEXT=":x: provider_models.yaml update failed. Logs: $RUN_URL"
|
||||||
|
jq -nc \
|
||||||
|
--arg text "$TEXT" \
|
||||||
|
--arg run "$RUN_URL" \
|
||||||
|
'{
|
||||||
|
response_type:"ephemeral",
|
||||||
|
replace_original:true,
|
||||||
|
text:$text,
|
||||||
|
blocks:[
|
||||||
|
{type:"section", text:{type:"mrkdwn", text:(":x: *provider_models.yaml update failed.*")}},
|
||||||
|
{type:"actions", elements:[{type:"button", text:{type:"plain_text", text:"View logs"}, url:$run}]}
|
||||||
|
]
|
||||||
|
}' | curl -sS -X POST -H 'Content-Type: application/json' -d @- "$RESPONSE_URL"
|
||||||
|
|
@ -49,7 +49,7 @@ Client → Envoy (prompt_gateway.wasm → llm_gateway.wasm) → Agents/LLM Provi
|
||||||
|
|
||||||
### Python CLI (cli/planoai/)
|
### Python CLI (cli/planoai/)
|
||||||
|
|
||||||
Entry point: `main.py`. Built with `rich-click`. Commands: `up`, `down`, `build`, `logs`, `trace`, `init`, `cli_agent`, `generate_prompt_targets`.
|
Entry point: `main.py`. Built with `rich-click`. Commands: `up`, `down`, `build`, `logs`, `trace`, `init`, `cli_agent`.
|
||||||
|
|
||||||
### Config (config/)
|
### Config (config/)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ Plano solves this by moving core delivery concerns into a unified, out-of-proces
|
||||||
Plano pulls rote plumbing out of your framework so you can stay focused on what matters most: the core product logic of your agentic applications. Plano is backed by [industry-leading LLM research](https://planoai.dev/research) and built on [Envoy](https://envoyproxy.io) by its core contributors, who built critical infrastructure at scale for modern worklaods.
|
Plano pulls rote plumbing out of your framework so you can stay focused on what matters most: the core product logic of your agentic applications. Plano is backed by [industry-leading LLM research](https://planoai.dev/research) and built on [Envoy](https://envoyproxy.io) by its core contributors, who built critical infrastructure at scale for modern worklaods.
|
||||||
|
|
||||||
**High-Level Network Sequence Diagram**:
|
**High-Level Network Sequence Diagram**:
|
||||||

|

|
||||||
|
|
||||||
**Jump to our [docs](https://docs.planoai.dev)** to learn how you can use Plano to improve the speed, safety and obervability of your agentic applications.
|
**Jump to our [docs](https://docs.planoai.dev)** to learn how you can use Plano to improve the speed, safety and obervability of your agentic applications.
|
||||||
|
|
||||||
|
|
@ -156,7 +156,7 @@ curl http://localhost:8001/v1/chat/completions \
|
||||||
|
|
||||||
Every request is traced end-to-end with OpenTelemetry - no instrumentation code needed.
|
Every request is traced end-to-end with OpenTelemetry - no instrumentation code needed.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
### What You Didn't Have to Build
|
### What You Didn't Have to Build
|
||||||
|
|
||||||
|
|
@ -183,7 +183,6 @@ Ready to try Plano? Check out our comprehensive documentation:
|
||||||
- **[LLM Routing](https://docs.planoai.dev/guides/llm_router.html)** - Route by model name, alias, or intelligent preferences
|
- **[LLM Routing](https://docs.planoai.dev/guides/llm_router.html)** - Route by model name, alias, or intelligent preferences
|
||||||
- **[Agent Orchestration](https://docs.planoai.dev/guides/orchestration.html)** - Build multi-agent workflows
|
- **[Agent Orchestration](https://docs.planoai.dev/guides/orchestration.html)** - Build multi-agent workflows
|
||||||
- **[Filter Chains](https://docs.planoai.dev/concepts/filter_chain.html)** - Add guardrails, moderation, and memory hooks
|
- **[Filter Chains](https://docs.planoai.dev/concepts/filter_chain.html)** - Add guardrails, moderation, and memory hooks
|
||||||
- **[Prompt Targets](https://docs.planoai.dev/concepts/prompt_target.html)** - Turn prompts into deterministic API calls
|
|
||||||
- **[Observability](https://docs.planoai.dev/guides/observability/observability.html)** - Traces, metrics, and logs
|
- **[Observability](https://docs.planoai.dev/guides/observability/observability.html)** - Traces, metrics, and logs
|
||||||
|
|
||||||
## Contribution
|
## Contribution
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ export function Hero() {
|
||||||
>
|
>
|
||||||
<div className="inline-flex flex-wrap items-center gap-1.5 sm:gap-2 px-3 sm:px-4 py-1 rounded-full bg-[rgba(185,191,255,0.4)] border border-[var(--secondary)] shadow backdrop-blur hover:bg-[rgba(185,191,255,0.6)] transition-colors cursor-pointer">
|
<div className="inline-flex flex-wrap items-center gap-1.5 sm:gap-2 px-3 sm:px-4 py-1 rounded-full bg-[rgba(185,191,255,0.4)] border border-[var(--secondary)] shadow backdrop-blur hover:bg-[rgba(185,191,255,0.6)] transition-colors cursor-pointer">
|
||||||
<span className="text-xs sm:text-sm font-medium text-black/65">
|
<span className="text-xs sm:text-sm font-medium text-black/65">
|
||||||
v0.4.22
|
v0.4.25
|
||||||
</span>
|
</span>
|
||||||
<span className="text-xs sm:text-sm font-medium text-black ">
|
<span className="text-xs sm:text-sm font-medium text-black ">
|
||||||
—
|
—
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.22
|
docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.25
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,3 @@
|
||||||
"""Plano CLI - Intelligent Prompt Gateway."""
|
"""Plano CLI - Intelligent Prompt Gateway."""
|
||||||
|
|
||||||
__version__ = "0.4.22"
|
__version__ = "0.4.25"
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,42 @@ CHATGPT_API_BASE = "https://chatgpt.com/backend-api/codex"
|
||||||
CHATGPT_DEFAULT_ORIGINATOR = "codex_cli_rs"
|
CHATGPT_DEFAULT_ORIGINATOR = "codex_cli_rs"
|
||||||
CHATGPT_DEFAULT_USER_AGENT = "codex_cli_rs/0.0.0 (Unknown 0; unknown) unknown"
|
CHATGPT_DEFAULT_USER_AGENT = "codex_cli_rs/0.0.0 (Unknown 0; unknown) unknown"
|
||||||
|
|
||||||
|
KIMI_CODE_API_HOST = "api.kimi.com"
|
||||||
|
KIMI_CODE_DEFAULT_USER_AGENT = "KimiCLI/1.3"
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_kimi_code_base_url(base_url: str) -> str:
|
||||||
|
"""Ensure Kimi Code API base URLs include the /v1 suffix."""
|
||||||
|
parsed = urlparse(base_url)
|
||||||
|
if parsed.hostname != KIMI_CODE_API_HOST:
|
||||||
|
return base_url
|
||||||
|
path = parsed.path.rstrip("/")
|
||||||
|
if path.endswith("/coding"):
|
||||||
|
return f"{parsed.scheme}://{parsed.netloc}{path}/v1"
|
||||||
|
return base_url
|
||||||
|
|
||||||
|
|
||||||
|
def apply_kimi_code_provider_defaults(model_provider: dict) -> None:
|
||||||
|
"""Inject Kimi Code API defaults (User-Agent, normalized base URL)."""
|
||||||
|
base_url = model_provider.get("base_url")
|
||||||
|
if not base_url:
|
||||||
|
return
|
||||||
|
parsed = urlparse(base_url)
|
||||||
|
model_id = model_provider.get("model", "")
|
||||||
|
is_kimi_code = (
|
||||||
|
parsed.hostname == KIMI_CODE_API_HOST or model_id == "kimi-for-coding"
|
||||||
|
)
|
||||||
|
if not is_kimi_code:
|
||||||
|
return
|
||||||
|
|
||||||
|
normalized = normalize_kimi_code_base_url(base_url)
|
||||||
|
if normalized != base_url:
|
||||||
|
model_provider["base_url"] = normalized
|
||||||
|
|
||||||
|
headers = model_provider.setdefault("headers", {})
|
||||||
|
headers.setdefault("User-Agent", KIMI_CODE_DEFAULT_USER_AGENT)
|
||||||
|
|
||||||
|
|
||||||
SUPPORTED_PROVIDERS = (
|
SUPPORTED_PROVIDERS = (
|
||||||
SUPPORTED_PROVIDERS_WITHOUT_BASE_URL + SUPPORTED_PROVIDERS_WITH_BASE_URL
|
SUPPORTED_PROVIDERS_WITHOUT_BASE_URL + SUPPORTED_PROVIDERS_WITH_BASE_URL
|
||||||
)
|
)
|
||||||
|
|
@ -463,6 +499,8 @@ def validate_and_render_schema():
|
||||||
headers.setdefault("session_id", str(uuid.uuid4()))
|
headers.setdefault("session_id", str(uuid.uuid4()))
|
||||||
model_provider["headers"] = headers
|
model_provider["headers"] = headers
|
||||||
|
|
||||||
|
apply_kimi_code_provider_defaults(model_provider)
|
||||||
|
|
||||||
updated_model_providers.append(model_provider)
|
updated_model_providers.append(model_provider)
|
||||||
|
|
||||||
if model_provider.get("base_url", None):
|
if model_provider.get("base_url", None):
|
||||||
|
|
@ -562,15 +600,15 @@ def validate_and_render_schema():
|
||||||
"Please provide model_providers either under listeners or at root level, not both. Currently we don't support multiple listeners with model_providers"
|
"Please provide model_providers either under listeners or at root level, not both. Currently we don't support multiple listeners with model_providers"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Validate input_filters IDs on listeners reference valid agent/filter IDs
|
# Validate listener-level filter IDs reference valid agent/filter IDs.
|
||||||
for listener in listeners:
|
for listener in listeners:
|
||||||
listener_input_filters = listener.get("input_filters", [])
|
for filter_field in ("input_filters", "output_filters"):
|
||||||
for fc_id in listener_input_filters:
|
for fc_id in listener.get(filter_field, []):
|
||||||
if fc_id not in agent_id_keys:
|
if fc_id not in agent_id_keys:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"Listener '{listener.get('name', 'unknown')}' references input_filters id '{fc_id}' "
|
f"Listener '{listener.get('name', 'unknown')}' references {filter_field} id '{fc_id}' "
|
||||||
f"which is not defined in agents or filters. Available ids: {', '.join(sorted(agent_id_keys))}"
|
f"which is not defined in agents or filters. Available ids: {', '.join(sorted(agent_id_keys))}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Validate model aliases if present
|
# Validate model aliases if present
|
||||||
if "model_aliases" in config_yaml:
|
if "model_aliases" in config_yaml:
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ PLANO_COLOR = "#969FF4"
|
||||||
|
|
||||||
SERVICE_NAME_ARCHGW = "plano"
|
SERVICE_NAME_ARCHGW = "plano"
|
||||||
PLANO_DOCKER_NAME = "plano"
|
PLANO_DOCKER_NAME = "plano"
|
||||||
PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.22")
|
PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.25")
|
||||||
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://localhost:4317"
|
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://localhost:4317"
|
||||||
|
|
||||||
# Native mode constants
|
# Native mode constants
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,6 @@ import contextlib
|
||||||
import logging
|
import logging
|
||||||
import rich_click as click
|
import rich_click as click
|
||||||
import yaml
|
import yaml
|
||||||
from planoai import targets
|
|
||||||
from planoai.defaults import (
|
from planoai.defaults import (
|
||||||
DEFAULT_LLM_LISTENER_PORT,
|
DEFAULT_LLM_LISTENER_PORT,
|
||||||
detect_providers,
|
detect_providers,
|
||||||
|
|
@ -622,28 +621,6 @@ def down(docker, verbose):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@click.command()
|
|
||||||
@click.option(
|
|
||||||
"--f",
|
|
||||||
"--file",
|
|
||||||
type=click.Path(exists=True),
|
|
||||||
required=True,
|
|
||||||
help="Path to the Python file",
|
|
||||||
)
|
|
||||||
def generate_prompt_targets(file):
|
|
||||||
"""Generats prompt_targets from python methods.
|
|
||||||
Note: This works for simple data types like ['int', 'float', 'bool', 'str', 'list', 'tuple', 'set', 'dict']:
|
|
||||||
If you have a complex pydantic data type, you will have to flatten those manually until we add support for it.
|
|
||||||
"""
|
|
||||||
|
|
||||||
print(f"Processing file: {file}")
|
|
||||||
if not file.endswith(".py"):
|
|
||||||
print("Error: Input file must be a .py file")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
targets.generate_prompt_targets(file)
|
|
||||||
|
|
||||||
|
|
||||||
@click.command()
|
@click.command()
|
||||||
@click.option(
|
@click.option(
|
||||||
"--debug",
|
"--debug",
|
||||||
|
|
@ -741,7 +718,6 @@ main.add_command(down)
|
||||||
main.add_command(build)
|
main.add_command(build)
|
||||||
main.add_command(logs)
|
main.add_command(logs)
|
||||||
main.add_command(cli_agent)
|
main.add_command(cli_agent)
|
||||||
main.add_command(generate_prompt_targets)
|
|
||||||
main.add_command(init_cmd, name="init")
|
main.add_command(init_cmd, name="init")
|
||||||
main.add_command(trace_cmd, name="trace")
|
main.add_command(trace_cmd, name="trace")
|
||||||
main.add_command(chatgpt_cmd, name="chatgpt")
|
main.add_command(chatgpt_cmd, name="chatgpt")
|
||||||
|
|
|
||||||
|
|
@ -63,9 +63,5 @@ def configure_rich_click(plano_color: str) -> None:
|
||||||
"name": "Observability",
|
"name": "Observability",
|
||||||
"commands": ["trace", "obs"],
|
"commands": ["trace", "obs"],
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "Utilities",
|
|
||||||
"commands": ["generate-prompt-targets"],
|
|
||||||
},
|
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,365 +0,0 @@
|
||||||
import ast
|
|
||||||
import sys
|
|
||||||
import yaml
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
FLASK_ROUTE_DECORATORS = ["route", "get", "post", "put", "delete", "patch"]
|
|
||||||
FASTAPI_ROUTE_DECORATORS = ["get", "post", "put", "delete", "patch"]
|
|
||||||
|
|
||||||
|
|
||||||
def detect_framework(tree: Any) -> str:
|
|
||||||
"""Detect whether the file is using Flask or FastAPI based on imports."""
|
|
||||||
for node in ast.walk(tree):
|
|
||||||
if isinstance(node, ast.ImportFrom):
|
|
||||||
if node.module == "flask":
|
|
||||||
return "flask"
|
|
||||||
elif node.module == "fastapi":
|
|
||||||
return "fastapi"
|
|
||||||
return "unknown"
|
|
||||||
|
|
||||||
|
|
||||||
def get_route_decorators(node: Any, framework: str) -> list:
|
|
||||||
"""Extract route decorators based on the framework."""
|
|
||||||
decorators = []
|
|
||||||
for decorator in node.decorator_list:
|
|
||||||
if isinstance(decorator, ast.Call) and isinstance(
|
|
||||||
decorator.func, ast.Attribute
|
|
||||||
):
|
|
||||||
if framework == "flask" and decorator.func.attr in FLASK_ROUTE_DECORATORS:
|
|
||||||
decorators.append(decorator.func.attr)
|
|
||||||
elif (
|
|
||||||
framework == "fastapi"
|
|
||||||
and decorator.func.attr in FASTAPI_ROUTE_DECORATORS
|
|
||||||
):
|
|
||||||
decorators.append(decorator.func.attr)
|
|
||||||
return decorators
|
|
||||||
|
|
||||||
|
|
||||||
def get_route_path(node: Any, framework: str) -> str:
|
|
||||||
"""Extract route path based on the framework."""
|
|
||||||
for decorator in node.decorator_list:
|
|
||||||
if isinstance(decorator, ast.Call) and decorator.args:
|
|
||||||
return decorator.args[0].s # Assuming it's a string literal
|
|
||||||
|
|
||||||
|
|
||||||
def is_pydantic_model(annotation: ast.expr, tree: ast.AST) -> bool:
|
|
||||||
"""Check if a given type annotation is a Pydantic model."""
|
|
||||||
# We walk through the AST to find class definitions and check if they inherit from Pydantic's BaseModel
|
|
||||||
if isinstance(annotation, ast.Name):
|
|
||||||
for node in ast.walk(tree):
|
|
||||||
if isinstance(node, ast.ClassDef) and node.name == annotation.id:
|
|
||||||
for base in node.bases:
|
|
||||||
if isinstance(base, ast.Name) and base.id == "BaseModel":
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def get_pydantic_model_fields(model_name: str, tree: ast.AST) -> list:
|
|
||||||
"""Extract fields from a Pydantic model, handling list, tuple, set, dict types, and direct default values."""
|
|
||||||
fields = []
|
|
||||||
|
|
||||||
for node in ast.walk(tree):
|
|
||||||
if isinstance(node, ast.ClassDef) and node.name == model_name:
|
|
||||||
for stmt in node.body:
|
|
||||||
if isinstance(stmt, ast.AnnAssign):
|
|
||||||
# Initialize the default field description
|
|
||||||
field_type = "Unknown: Please Fix This!"
|
|
||||||
description = "Field, description not present. Please fix."
|
|
||||||
default_value = None
|
|
||||||
required = True # Assume the field is required initially
|
|
||||||
|
|
||||||
# Check if the field uses Field() with required status and description
|
|
||||||
if (
|
|
||||||
stmt.value
|
|
||||||
and isinstance(stmt.value, ast.Call)
|
|
||||||
and isinstance(stmt.value.func, ast.Name)
|
|
||||||
and stmt.value.func.id == "Field"
|
|
||||||
):
|
|
||||||
# Extract the description argument inside the Field call
|
|
||||||
for keyword in stmt.value.keywords:
|
|
||||||
if keyword.arg == "description" and isinstance(
|
|
||||||
keyword.value, ast.Str
|
|
||||||
):
|
|
||||||
description = keyword.value.s
|
|
||||||
if keyword.arg == "default":
|
|
||||||
default_value = keyword.value
|
|
||||||
# If Ellipsis (...) is used, it means the field is required
|
|
||||||
if (
|
|
||||||
stmt.value.args
|
|
||||||
and isinstance(stmt.value.args[0], ast.Constant)
|
|
||||||
and stmt.value.args[0].value is Ellipsis
|
|
||||||
):
|
|
||||||
required = True
|
|
||||||
else:
|
|
||||||
required = False
|
|
||||||
|
|
||||||
# Handle direct default values (e.g., name: str = "John Doe")
|
|
||||||
elif stmt.value is not None:
|
|
||||||
if isinstance(stmt.value, ast.Constant):
|
|
||||||
# Set the default value from the assignment (e.g., name: str = "John Doe")
|
|
||||||
default_value = stmt.value.value
|
|
||||||
required = (
|
|
||||||
False # Not required since it has a default value
|
|
||||||
)
|
|
||||||
|
|
||||||
# Always extract the field type, even if there's a default value
|
|
||||||
if isinstance(stmt.annotation, ast.Subscript):
|
|
||||||
# Get the base type (list, tuple, set, dict)
|
|
||||||
base_type = (
|
|
||||||
stmt.annotation.value.id
|
|
||||||
if isinstance(stmt.annotation.value, ast.Name)
|
|
||||||
else "Unknown"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Handle only list, tuple, set, dict and ignore the inner types
|
|
||||||
if base_type.lower() in ["list", "tuple", "set", "dict"]:
|
|
||||||
field_type = base_type.lower()
|
|
||||||
|
|
||||||
# Handle the ellipsis '...' for required fields if no Field() call
|
|
||||||
elif (
|
|
||||||
isinstance(stmt.value, ast.Constant)
|
|
||||||
and stmt.value.value is Ellipsis
|
|
||||||
):
|
|
||||||
required = True
|
|
||||||
|
|
||||||
# Handle simple types like str, int, etc.
|
|
||||||
if isinstance(stmt.annotation, ast.Name):
|
|
||||||
field_type = stmt.annotation.id
|
|
||||||
|
|
||||||
field_info = {
|
|
||||||
"name": stmt.target.id,
|
|
||||||
"type": field_type, # Always set the field type
|
|
||||||
"description": description,
|
|
||||||
"default": default_value, # Handle direct default values
|
|
||||||
"required": required,
|
|
||||||
}
|
|
||||||
fields.append(field_info)
|
|
||||||
|
|
||||||
return fields
|
|
||||||
|
|
||||||
|
|
||||||
def get_function_parameters(node: ast.FunctionDef, tree: ast.AST) -> list:
|
|
||||||
"""Extract the parameters and their types from the function definition."""
|
|
||||||
parameters = []
|
|
||||||
|
|
||||||
# Extract docstring to find descriptions
|
|
||||||
docstring = ast.get_docstring(node)
|
|
||||||
arg_descriptions = extract_arg_descriptions_from_docstring(docstring)
|
|
||||||
|
|
||||||
# Extract default values
|
|
||||||
defaults = [None] * (
|
|
||||||
len(node.args.args) - len(node.args.defaults)
|
|
||||||
) + node.args.defaults # Align defaults with args
|
|
||||||
for arg, default in zip(node.args.args, defaults):
|
|
||||||
if arg.arg != "self": # Skip 'self' or 'cls' in class methods
|
|
||||||
param_info = {
|
|
||||||
"name": arg.arg,
|
|
||||||
"description": arg_descriptions.get(arg.arg, "[ADD DESCRIPTION]"),
|
|
||||||
}
|
|
||||||
|
|
||||||
# Handle Pydantic model types
|
|
||||||
if hasattr(arg, "annotation") and is_pydantic_model(arg.annotation, tree):
|
|
||||||
# Extract and flatten Pydantic model fields
|
|
||||||
pydantic_fields = get_pydantic_model_fields(arg.annotation.id, tree)
|
|
||||||
parameters.extend(
|
|
||||||
pydantic_fields
|
|
||||||
) # Flatten the model fields into the parameters list
|
|
||||||
continue # Skip adding the current param_info for the model since we expand the fields
|
|
||||||
|
|
||||||
# Handle standard Python types (int, float, str, etc.)
|
|
||||||
elif hasattr(arg, "annotation") and isinstance(arg.annotation, ast.Name):
|
|
||||||
if arg.annotation.id in [
|
|
||||||
"int",
|
|
||||||
"float",
|
|
||||||
"bool",
|
|
||||||
"str",
|
|
||||||
"list",
|
|
||||||
"tuple",
|
|
||||||
"set",
|
|
||||||
"dict",
|
|
||||||
]:
|
|
||||||
param_info["type"] = arg.annotation.id
|
|
||||||
else:
|
|
||||||
param_info["type"] = "[UNKNOWN - PLEASE FIX]"
|
|
||||||
|
|
||||||
# Handle generic subscript types (e.g., Optional, List[Type], etc.)
|
|
||||||
elif hasattr(arg, "annotation") and isinstance(
|
|
||||||
arg.annotation, ast.Subscript
|
|
||||||
):
|
|
||||||
if isinstance(
|
|
||||||
arg.annotation.value, ast.Name
|
|
||||||
) and arg.annotation.value.id in ["list", "tuple", "set", "dict"]:
|
|
||||||
param_info["type"] = (
|
|
||||||
f"{arg.annotation.value.id}" # e.g., "List", "Tuple", etc.
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
param_info["type"] = "[UNKNOWN - PLEASE FIX]"
|
|
||||||
|
|
||||||
# Default for unknown types
|
|
||||||
else:
|
|
||||||
param_info["type"] = (
|
|
||||||
"[UNKNOWN - PLEASE FIX]" # If unable to detect type
|
|
||||||
)
|
|
||||||
|
|
||||||
# Handle default values
|
|
||||||
if default is not None:
|
|
||||||
if isinstance(default, ast.Constant) or isinstance(
|
|
||||||
default, ast.NameConstant
|
|
||||||
):
|
|
||||||
param_info["default"] = (
|
|
||||||
default.value
|
|
||||||
) # Use the default value directly
|
|
||||||
else:
|
|
||||||
param_info["default"] = "[UNKNOWN DEFAULT]" # Unknown default type
|
|
||||||
param_info["required"] = False # Optional since it has a default value
|
|
||||||
else:
|
|
||||||
param_info["default"] = None
|
|
||||||
param_info["required"] = True # Required if no default value
|
|
||||||
|
|
||||||
parameters.append(param_info)
|
|
||||||
|
|
||||||
return parameters
|
|
||||||
|
|
||||||
|
|
||||||
def get_function_docstring(node: Any) -> str:
|
|
||||||
"""Extract the function's docstring description if present."""
|
|
||||||
# Check if the first node is a docstring
|
|
||||||
if isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Str):
|
|
||||||
# Get the entire docstring
|
|
||||||
full_docstring = node.body[0].value.s.strip()
|
|
||||||
|
|
||||||
# Split the docstring by double newlines (to separate description from fields like Args)
|
|
||||||
description = full_docstring.split("\n\n")[0].strip()
|
|
||||||
|
|
||||||
return description
|
|
||||||
|
|
||||||
return "No description provided."
|
|
||||||
|
|
||||||
|
|
||||||
def extract_arg_descriptions_from_docstring(docstring: str) -> dict:
|
|
||||||
"""Extract descriptions for function parameters from the 'Args' section of the docstring."""
|
|
||||||
descriptions = {}
|
|
||||||
if not docstring:
|
|
||||||
return descriptions
|
|
||||||
|
|
||||||
in_args_section = False
|
|
||||||
current_param = None
|
|
||||||
for line in docstring.splitlines():
|
|
||||||
line = line.strip()
|
|
||||||
|
|
||||||
# Detect the start of the 'Args' section
|
|
||||||
if line.startswith("Args:"):
|
|
||||||
in_args_section = True
|
|
||||||
continue # Proceed to the next line after 'Args:'
|
|
||||||
|
|
||||||
# End of 'Args' section if no indentation and no colon
|
|
||||||
if in_args_section and not line.startswith(" ") and ":" not in line:
|
|
||||||
break # Stop processing if we reach a new section
|
|
||||||
|
|
||||||
# Process lines in the 'Args' section
|
|
||||||
if in_args_section:
|
|
||||||
if ":" in line:
|
|
||||||
# Extract parameter name and description
|
|
||||||
param_name, description = line.split(":", 1)
|
|
||||||
descriptions[param_name.strip()] = description.strip()
|
|
||||||
current_param = param_name.strip()
|
|
||||||
elif current_param and line.startswith(" "):
|
|
||||||
# Handle multiline descriptions (indented lines)
|
|
||||||
descriptions[current_param] += f" {line.strip()}"
|
|
||||||
|
|
||||||
return descriptions
|
|
||||||
|
|
||||||
|
|
||||||
def generate_prompt_targets(input_file_path: str) -> None:
|
|
||||||
"""Introspect routes and generate YAML for either Flask or FastAPI."""
|
|
||||||
with open(input_file_path, "r") as source:
|
|
||||||
tree = ast.parse(source.read())
|
|
||||||
|
|
||||||
# Detect the framework (Flask or FastAPI)
|
|
||||||
framework = detect_framework(tree)
|
|
||||||
if framework == "unknown":
|
|
||||||
print("Could not detect Flask or FastAPI in the file.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Extract routes
|
|
||||||
routes = []
|
|
||||||
for node in ast.walk(tree):
|
|
||||||
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
||||||
route_decorators = get_route_decorators(node, framework)
|
|
||||||
if route_decorators:
|
|
||||||
route_path = get_route_path(node, framework)
|
|
||||||
function_params = get_function_parameters(
|
|
||||||
node, tree
|
|
||||||
) # Get parameters for the route
|
|
||||||
function_docstring = get_function_docstring(node) # Extract docstring
|
|
||||||
routes.append(
|
|
||||||
{
|
|
||||||
"name": node.name,
|
|
||||||
"path": route_path,
|
|
||||||
"methods": route_decorators,
|
|
||||||
"parameters": function_params, # Add parameters to the route
|
|
||||||
"description": function_docstring, # Add the docstring as the description
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Generate YAML structure
|
|
||||||
output_structure = {"prompt_targets": []}
|
|
||||||
|
|
||||||
for route in routes:
|
|
||||||
target = {
|
|
||||||
"name": route["name"],
|
|
||||||
"endpoint": [
|
|
||||||
{
|
|
||||||
"name": "app_server",
|
|
||||||
"path": route["path"],
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"description": route["description"], # Use extracted docstring
|
|
||||||
"parameters": [
|
|
||||||
{
|
|
||||||
"name": param["name"],
|
|
||||||
"type": param["type"],
|
|
||||||
"description": f"{param['description']}",
|
|
||||||
**(
|
|
||||||
{"default": param["default"]}
|
|
||||||
if "default" in param and param["default"] is not None
|
|
||||||
else {}
|
|
||||||
), # Only add default if it's set
|
|
||||||
"required": param["required"],
|
|
||||||
}
|
|
||||||
for param in route["parameters"]
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
if route["name"] == "default":
|
|
||||||
# Special case for `information_extraction` based on your YAML format
|
|
||||||
target["type"] = "default"
|
|
||||||
target["auto-llm-dispatch-on-response"] = True
|
|
||||||
|
|
||||||
output_structure["prompt_targets"].append(target)
|
|
||||||
|
|
||||||
# Output as YAML
|
|
||||||
print(
|
|
||||||
yaml.dump(output_structure, sort_keys=False, default_flow_style=False, indent=3)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
if len(sys.argv) != 2:
|
|
||||||
print("Usage: python targets.py <input_file>")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
input_file = sys.argv[1]
|
|
||||||
|
|
||||||
# Automatically generate the output file name
|
|
||||||
if input_file.endswith(".py"):
|
|
||||||
output_file = input_file.replace(".py", "_prompt_targets.yml")
|
|
||||||
else:
|
|
||||||
print("Error: Input file must be a .py file")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Call the function with the input and generated output file names
|
|
||||||
generate_prompt_targets(input_file, output_file)
|
|
||||||
|
|
||||||
# Example usage:
|
|
||||||
# python targets.py api.yaml
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[project]
|
[project]
|
||||||
name = "planoai"
|
name = "planoai"
|
||||||
version = "0.4.22"
|
version = "0.4.25"
|
||||||
description = "Python-based CLI tool to manage Plano."
|
description = "Python-based CLI tool to manage Plano."
|
||||||
authors = [{name = "Katanemo Labs, Inc."}]
|
authors = [{name = "Katanemo Labs, Inc."}]
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,10 @@ import pytest
|
||||||
import yaml
|
import yaml
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
from planoai.config_generator import (
|
from planoai.config_generator import (
|
||||||
validate_and_render_schema,
|
apply_kimi_code_provider_defaults,
|
||||||
migrate_inline_routing_preferences,
|
migrate_inline_routing_preferences,
|
||||||
|
normalize_kimi_code_base_url,
|
||||||
|
validate_and_render_schema,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -327,6 +329,63 @@ routing_preferences:
|
||||||
tracing:
|
tracing:
|
||||||
random_sampling: 100
|
random_sampling: 100
|
||||||
|
|
||||||
|
""",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "unknown_listener_output_filter",
|
||||||
|
"expected_error": "references output_filters id 'missing_output_guard'",
|
||||||
|
"plano_config": """
|
||||||
|
version: v0.4.0
|
||||||
|
|
||||||
|
filters:
|
||||||
|
- id: input_guard
|
||||||
|
url: http://localhost:10500
|
||||||
|
type: http
|
||||||
|
|
||||||
|
listeners:
|
||||||
|
- name: llm
|
||||||
|
type: model
|
||||||
|
port: 12000
|
||||||
|
input_filters:
|
||||||
|
- input_guard
|
||||||
|
output_filters:
|
||||||
|
- missing_output_guard
|
||||||
|
|
||||||
|
model_providers:
|
||||||
|
- model: openai/gpt-4o-mini
|
||||||
|
access_key: $OPENAI_API_KEY
|
||||||
|
default: true
|
||||||
|
|
||||||
|
""",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "valid_listener_output_filter",
|
||||||
|
"expected_error": None,
|
||||||
|
"plano_config": """
|
||||||
|
version: v0.4.0
|
||||||
|
|
||||||
|
filters:
|
||||||
|
- id: input_guard
|
||||||
|
url: http://localhost:10500
|
||||||
|
type: http
|
||||||
|
- id: output_guard
|
||||||
|
url: http://localhost:10501
|
||||||
|
type: http
|
||||||
|
|
||||||
|
listeners:
|
||||||
|
- name: llm
|
||||||
|
type: model
|
||||||
|
port: 12000
|
||||||
|
input_filters:
|
||||||
|
- input_guard
|
||||||
|
output_filters:
|
||||||
|
- output_guard
|
||||||
|
|
||||||
|
model_providers:
|
||||||
|
- model: openai/gpt-4o-mini
|
||||||
|
access_key: $OPENAI_API_KEY
|
||||||
|
default: true
|
||||||
|
|
||||||
""",
|
""",
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
@ -738,3 +797,29 @@ model_providers:
|
||||||
migrate_inline_routing_preferences(config_yaml)
|
migrate_inline_routing_preferences(config_yaml)
|
||||||
|
|
||||||
assert config_yaml["version"] == "v0.5.0"
|
assert config_yaml["version"] == "v0.5.0"
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_kimi_code_base_url_appends_v1_suffix():
|
||||||
|
assert (
|
||||||
|
normalize_kimi_code_base_url("https://api.kimi.com/coding")
|
||||||
|
== "https://api.kimi.com/coding/v1"
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
normalize_kimi_code_base_url("https://api.kimi.com/coding/")
|
||||||
|
== "https://api.kimi.com/coding/v1"
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
normalize_kimi_code_base_url("https://api.kimi.com/coding/v1")
|
||||||
|
== "https://api.kimi.com/coding/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_kimi_code_provider_defaults_injects_user_agent():
|
||||||
|
provider = {
|
||||||
|
"model": "kimi-for-coding",
|
||||||
|
"base_url": "https://api.kimi.com/coding",
|
||||||
|
"access_key": "$MOONSHOTAI_API_KEY",
|
||||||
|
}
|
||||||
|
apply_kimi_code_provider_defaults(provider)
|
||||||
|
assert provider["base_url"] == "https://api.kimi.com/coding/v1"
|
||||||
|
assert provider["headers"]["User-Agent"] == "KimiCLI/1.3"
|
||||||
|
|
|
||||||
2
cli/uv.lock
generated
2
cli/uv.lock
generated
|
|
@ -337,7 +337,7 @@ wheels = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "planoai"
|
name = "planoai"
|
||||||
version = "0.4.22"
|
version = "0.4.25"
|
||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "click" },
|
{ name = "click" },
|
||||||
|
|
|
||||||
|
|
@ -194,6 +194,7 @@ properties:
|
||||||
- digitalocean
|
- digitalocean
|
||||||
- vercel
|
- vercel
|
||||||
- openrouter
|
- openrouter
|
||||||
|
- moonshotai
|
||||||
headers:
|
headers:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
|
|
@ -252,6 +253,7 @@ properties:
|
||||||
- digitalocean
|
- digitalocean
|
||||||
- vercel
|
- vercel
|
||||||
- openrouter
|
- openrouter
|
||||||
|
- moonshotai
|
||||||
headers:
|
headers:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
|
|
|
||||||
45
crates/Cargo.lock
generated
45
crates/Cargo.lock
generated
|
|
@ -2552,9 +2552,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proxy-wasm"
|
name = "proxy-wasm"
|
||||||
version = "0.2.4"
|
version = "0.2.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f8d35d9e2bc5104e2e954b149aa1d5f9fa3bb27f73b45b2706020fed101db685"
|
checksum = "de8f6564bd52c2f4ff79fa5d1bd3bc10d8f822162af8d527e121e46703496aa0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"hashbrown 0.16.1",
|
"hashbrown 0.16.1",
|
||||||
"log",
|
"log",
|
||||||
|
|
@ -2752,12 +2752,18 @@ dependencies = [
|
||||||
"num-bigint",
|
"num-bigint",
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
|
"rustls 0.23.38",
|
||||||
|
"rustls-native-certs 0.7.3",
|
||||||
|
"rustls-pemfile 2.2.0",
|
||||||
|
"rustls-pki-types",
|
||||||
"ryu",
|
"ryu",
|
||||||
"sha1_smol",
|
"sha1_smol",
|
||||||
"socket2 0.5.10",
|
"socket2 0.5.10",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
"tokio-rustls 0.26.4",
|
||||||
"tokio-util",
|
"tokio-util",
|
||||||
"url",
|
"url",
|
||||||
|
"webpki-roots 0.26.11",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -2965,7 +2971,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00"
|
checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"openssl-probe 0.1.6",
|
"openssl-probe 0.1.6",
|
||||||
"rustls-pemfile",
|
"rustls-pemfile 1.0.4",
|
||||||
|
"schannel",
|
||||||
|
"security-framework 2.11.1",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustls-native-certs"
|
||||||
|
version = "0.7.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5"
|
||||||
|
dependencies = [
|
||||||
|
"openssl-probe 0.1.6",
|
||||||
|
"rustls-pemfile 2.2.0",
|
||||||
|
"rustls-pki-types",
|
||||||
"schannel",
|
"schannel",
|
||||||
"security-framework 2.11.1",
|
"security-framework 2.11.1",
|
||||||
]
|
]
|
||||||
|
|
@ -2991,6 +3010,15 @@ dependencies = [
|
||||||
"base64 0.21.7",
|
"base64 0.21.7",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustls-pemfile"
|
||||||
|
version = "2.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50"
|
||||||
|
dependencies = [
|
||||||
|
"rustls-pki-types",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustls-pki-types"
|
name = "rustls-pki-types"
|
||||||
version = "1.14.0"
|
version = "1.14.0"
|
||||||
|
|
@ -4024,7 +4052,7 @@ dependencies = [
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"ureq-proto",
|
"ureq-proto",
|
||||||
"utf8-zero",
|
"utf8-zero",
|
||||||
"webpki-roots",
|
"webpki-roots 1.0.6",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -4278,6 +4306,15 @@ dependencies = [
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "webpki-roots"
|
||||||
|
version = "0.26.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
|
||||||
|
dependencies = [
|
||||||
|
"webpki-roots 1.0.6",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "webpki-roots"
|
name = "webpki-roots"
|
||||||
version = "1.0.6"
|
version = "1.0.6"
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ lru = "0.12"
|
||||||
metrics = "0.23"
|
metrics = "0.23"
|
||||||
metrics-exporter-prometheus = { version = "0.15", default-features = false, features = ["http-listener"] }
|
metrics-exporter-prometheus = { version = "0.15", default-features = false, features = ["http-listener"] }
|
||||||
metrics-process = "2.1"
|
metrics-process = "2.1"
|
||||||
redis = { version = "0.27", features = ["tokio-comp"] }
|
redis = { version = "0.27", features = ["tokio-comp", "tokio-rustls-comp", "tls-rustls-webpki-roots"] }
|
||||||
reqwest = { version = "0.12.15", features = ["stream"] }
|
reqwest = { version = "0.12.15", features = ["stream"] }
|
||||||
serde = { version = "1.0.219", features = ["derive"] }
|
serde = { version = "1.0.219", features = ["derive"] }
|
||||||
serde_json = "1.0.140"
|
serde_json = "1.0.140"
|
||||||
|
|
|
||||||
|
|
@ -142,25 +142,19 @@ async fn init_app_state(
|
||||||
.listeners
|
.listeners
|
||||||
.iter()
|
.iter()
|
||||||
.find(|l| l.listener_type == ListenerType::Model);
|
.find(|l| l.listener_type == ListenerType::Model);
|
||||||
let resolve_chain = |filter_ids: Option<Vec<String>>| -> Option<ResolvedFilterChain> {
|
|
||||||
filter_ids.map(|ids| {
|
|
||||||
let agents = ids
|
|
||||||
.iter()
|
|
||||||
.filter_map(|id| {
|
|
||||||
global_agent_map
|
|
||||||
.get(id)
|
|
||||||
.map(|a: &Agent| (id.clone(), a.clone()))
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
ResolvedFilterChain {
|
|
||||||
filter_ids: ids,
|
|
||||||
agents,
|
|
||||||
}
|
|
||||||
})
|
|
||||||
};
|
|
||||||
let filter_pipeline = Arc::new(FilterPipeline {
|
let filter_pipeline = Arc::new(FilterPipeline {
|
||||||
input: resolve_chain(model_listener.and_then(|l| l.input_filters.clone())),
|
input: resolve_filter_chain(
|
||||||
output: resolve_chain(model_listener.and_then(|l| l.output_filters.clone())),
|
"input_filters",
|
||||||
|
model_listener.and_then(|l| l.input_filters.clone()),
|
||||||
|
&global_agent_map,
|
||||||
|
)
|
||||||
|
.map_err(|e| format!("failed to resolve model listener input filters: {e}"))?,
|
||||||
|
output: resolve_filter_chain(
|
||||||
|
"output_filters",
|
||||||
|
model_listener.and_then(|l| l.output_filters.clone()),
|
||||||
|
&global_agent_map,
|
||||||
|
)
|
||||||
|
.map_err(|e| format!("failed to resolve model listener output filters: {e}"))?,
|
||||||
});
|
});
|
||||||
|
|
||||||
let overrides = config.overrides.clone().unwrap_or_default();
|
let overrides = config.overrides.clone().unwrap_or_default();
|
||||||
|
|
@ -350,6 +344,29 @@ async fn init_app_state(
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn resolve_filter_chain(
|
||||||
|
field_name: &str,
|
||||||
|
filter_ids: Option<Vec<String>>,
|
||||||
|
global_agent_map: &HashMap<String, Agent>,
|
||||||
|
) -> Result<Option<ResolvedFilterChain>, String> {
|
||||||
|
let Some(ids) = filter_ids else {
|
||||||
|
return Ok(None);
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut agents = HashMap::new();
|
||||||
|
for id in &ids {
|
||||||
|
let agent = global_agent_map
|
||||||
|
.get(id)
|
||||||
|
.ok_or_else(|| format!("{field_name} id '{id}' is not defined in agents or filters"))?;
|
||||||
|
agents.insert(id.clone(), agent.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Some(ResolvedFilterChain {
|
||||||
|
filter_ids: ids,
|
||||||
|
agents,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
/// Initialize the conversation state storage backend (if configured).
|
/// Initialize the conversation state storage backend (if configured).
|
||||||
async fn init_state_storage(
|
async fn init_state_storage(
|
||||||
config: &Configuration,
|
config: &Configuration,
|
||||||
|
|
@ -588,3 +605,63 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
let state = Arc::new(init_app_state(&config).await?);
|
let state = Arc::new(init_app_state(&config).await?);
|
||||||
run_server(state).await
|
run_server(state).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn test_agent(id: &str) -> Agent {
|
||||||
|
Agent {
|
||||||
|
id: id.to_string(),
|
||||||
|
transport: None,
|
||||||
|
tool: None,
|
||||||
|
url: "http://localhost:10500".to_string(),
|
||||||
|
agent_type: Some("http".to_string()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_filter_chain_keeps_valid_filter_references() {
|
||||||
|
let agent = test_agent("output_guard");
|
||||||
|
let global_agent_map = HashMap::from([(agent.id.clone(), agent)]);
|
||||||
|
|
||||||
|
let resolved = resolve_filter_chain(
|
||||||
|
"output_filters",
|
||||||
|
Some(vec!["output_guard".to_string()]),
|
||||||
|
&global_agent_map,
|
||||||
|
)
|
||||||
|
.expect("filter chain should resolve")
|
||||||
|
.expect("filter chain should be present");
|
||||||
|
|
||||||
|
assert_eq!(resolved.filter_ids, vec!["output_guard".to_string()]);
|
||||||
|
assert!(resolved.agents.contains_key("output_guard"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_filter_chain_errors_on_missing_output_filter_reference() {
|
||||||
|
let global_agent_map = HashMap::new();
|
||||||
|
|
||||||
|
let err = resolve_filter_chain(
|
||||||
|
"output_filters",
|
||||||
|
Some(vec!["missing_output_guard".to_string()]),
|
||||||
|
&global_agent_map,
|
||||||
|
)
|
||||||
|
.expect_err("missing output filter should fail closed");
|
||||||
|
|
||||||
|
assert!(err.contains("output_filters id 'missing_output_guard'"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_filter_chain_errors_on_missing_input_filter_reference() {
|
||||||
|
let global_agent_map = HashMap::new();
|
||||||
|
|
||||||
|
let err = resolve_filter_chain(
|
||||||
|
"input_filters",
|
||||||
|
Some(vec!["missing_input_guard".to_string()]),
|
||||||
|
&global_agent_map,
|
||||||
|
)
|
||||||
|
.expect_err("missing input filter should fail closed");
|
||||||
|
|
||||||
|
assert!(err.contains("input_filters id 'missing_input_guard'"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,7 @@ impl Serialize for FunctionParameters {
|
||||||
where
|
where
|
||||||
S: serde::Serializer,
|
S: serde::Serializer,
|
||||||
{
|
{
|
||||||
// select all requried parameters
|
// select all required parameters
|
||||||
let required: Vec<&String> = self
|
let required: Vec<&String> = self
|
||||||
.properties
|
.properties
|
||||||
.iter()
|
.iter()
|
||||||
|
|
|
||||||
|
|
@ -400,6 +400,10 @@ pub enum LlmProviderType {
|
||||||
Vercel,
|
Vercel,
|
||||||
#[serde(rename = "openrouter")]
|
#[serde(rename = "openrouter")]
|
||||||
OpenRouter,
|
OpenRouter,
|
||||||
|
#[serde(rename = "astraflow")]
|
||||||
|
Astraflow,
|
||||||
|
#[serde(rename = "astraflow_cn")]
|
||||||
|
AstraflowCN,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Display for LlmProviderType {
|
impl Display for LlmProviderType {
|
||||||
|
|
@ -425,6 +429,8 @@ impl Display for LlmProviderType {
|
||||||
LlmProviderType::DigitalOcean => write!(f, "digitalocean"),
|
LlmProviderType::DigitalOcean => write!(f, "digitalocean"),
|
||||||
LlmProviderType::Vercel => write!(f, "vercel"),
|
LlmProviderType::Vercel => write!(f, "vercel"),
|
||||||
LlmProviderType::OpenRouter => write!(f, "openrouter"),
|
LlmProviderType::OpenRouter => write!(f, "openrouter"),
|
||||||
|
LlmProviderType::Astraflow => write!(f, "astraflow"),
|
||||||
|
LlmProviderType::AstraflowCN => write!(f, "astraflow_cn"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -128,6 +128,7 @@ pub struct MessagesRequest {
|
||||||
pub enum MessagesRole {
|
pub enum MessagesRole {
|
||||||
User,
|
User,
|
||||||
Assistant,
|
Assistant,
|
||||||
|
System,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Cache control types for content blocks
|
/// Cache control types for content blocks
|
||||||
|
|
@ -632,6 +633,7 @@ impl MessagesRole {
|
||||||
match self {
|
match self {
|
||||||
MessagesRole::User => "user",
|
MessagesRole::User => "user",
|
||||||
MessagesRole::Assistant => "assistant",
|
MessagesRole::Assistant => "assistant",
|
||||||
|
MessagesRole::System => "system",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
use log::warn;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use serde_with::skip_serializing_none;
|
use serde_with::skip_serializing_none;
|
||||||
|
|
@ -136,6 +137,37 @@ impl ChatCompletionsRequest {
|
||||||
self.temperature = Some(1.0);
|
self.temperature = Some(1.0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Strip request fields that Kimi Code API (`kimi-for-coding`) rejects or mishandles.
|
||||||
|
pub fn normalize_for_kimi_code_api(&mut self) {
|
||||||
|
if self.stream_options.is_some() {
|
||||||
|
warn!("kimi-for-coding: stripping unsupported stream_options from upstream request");
|
||||||
|
self.stream_options = None;
|
||||||
|
}
|
||||||
|
if self.reasoning_effort.is_some() {
|
||||||
|
warn!("kimi-for-coding: stripping unsupported reasoning_effort from upstream request");
|
||||||
|
self.reasoning_effort = None;
|
||||||
|
}
|
||||||
|
if self.web_search_options.is_some() {
|
||||||
|
warn!(
|
||||||
|
"kimi-for-coding: stripping unsupported web_search_options from upstream request"
|
||||||
|
);
|
||||||
|
self.web_search_options = None;
|
||||||
|
}
|
||||||
|
if self.service_tier.is_some() {
|
||||||
|
warn!("kimi-for-coding: stripping unsupported service_tier from upstream request");
|
||||||
|
self.service_tier = None;
|
||||||
|
}
|
||||||
|
if self.store.is_some() {
|
||||||
|
warn!("kimi-for-coding: stripping unsupported store from upstream request");
|
||||||
|
self.store = None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// True when the upstream model id is Moonshot's Kimi Code endpoint model.
|
||||||
|
pub fn is_kimi_code_model(model: &str) -> bool {
|
||||||
|
model == "kimi-for-coding"
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,21 @@
|
||||||
// Fetch latest provider models from canonical provider APIs and update provider_models.yaml
|
// Fetch latest provider models from canonical provider APIs and merge into
|
||||||
|
// provider_models.yaml.
|
||||||
|
//
|
||||||
|
// Behavior is non-destructive: only providers we successfully fetch this run
|
||||||
|
// are replaced. Providers whose API key is missing, or whose fetch fails, are
|
||||||
|
// left untouched in the existing file. This means partial runs (e.g. without
|
||||||
|
// AWS or Google creds) can't accidentally wipe out provider entries you don't
|
||||||
|
// have keys for locally.
|
||||||
|
//
|
||||||
// Usage:
|
// Usage:
|
||||||
// Optional: OPENAI_API_KEY, ANTHROPIC_API_KEY, DEEPSEEK_API_KEY, GROK_API_KEY,
|
// Optional: OPENAI_API_KEY, ANTHROPIC_API_KEY, MISTRAL_API_KEY,
|
||||||
// DASHSCOPE_API_KEY, MOONSHOT_API_KEY, ZHIPU_API_KEY, GOOGLE_API_KEY
|
// DEEPSEEK_API_KEY, GROK_API_KEY, DASHSCOPE_API_KEY,
|
||||||
// Required: AWS CLI configured for Amazon Bedrock models
|
// MOONSHOT_API_KEY, ZHIPU_API_KEY, MIMO_API_KEY, GOOGLE_API_KEY
|
||||||
// cargo run --bin fetch_models
|
// Optional: AWS CLI configured for Amazon Bedrock models
|
||||||
|
// cargo run --bin fetch_models --features model-fetch
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::collections::HashMap;
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
// Default to writing in the same directory as this source file
|
// Default to writing in the same directory as this source file
|
||||||
|
|
@ -19,16 +28,33 @@ fn main() {
|
||||||
.nth(1)
|
.nth(1)
|
||||||
.unwrap_or_else(|| default_path.to_string_lossy().to_string());
|
.unwrap_or_else(|| default_path.to_string_lossy().to_string());
|
||||||
|
|
||||||
println!("Fetching latest models from provider APIs...");
|
println!("Loading existing {}...", output_path);
|
||||||
|
let existing = match load_existing_models(&output_path) {
|
||||||
|
Ok(map) => {
|
||||||
|
if map.is_empty() {
|
||||||
|
println!(" (none — starting fresh)");
|
||||||
|
} else {
|
||||||
|
println!(" loaded {} existing providers", map.len());
|
||||||
|
}
|
||||||
|
map
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Error loading existing {}: {}", output_path, e);
|
||||||
|
eprintln!("Refusing to overwrite a file we can't parse. Fix or delete it and re-run.");
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
match fetch_all_models() {
|
println!("\nFetching latest models from provider APIs...");
|
||||||
|
|
||||||
|
match fetch_all_models(existing) {
|
||||||
Ok(models) => {
|
Ok(models) => {
|
||||||
let yaml = serde_yaml::to_string(&models).expect("Failed to serialize models");
|
let yaml = serde_yaml::to_string(&models).expect("Failed to serialize models");
|
||||||
|
|
||||||
std::fs::write(&output_path, yaml).expect("Failed to write provider_models.yaml");
|
std::fs::write(&output_path, yaml).expect("Failed to write provider_models.yaml");
|
||||||
|
|
||||||
println!(
|
println!(
|
||||||
"✓ Successfully updated {} providers ({} models) to {}",
|
"✓ Wrote {} providers ({} models) to {}",
|
||||||
models.metadata.total_providers, models.metadata.total_models, output_path
|
models.metadata.total_providers, models.metadata.total_models, output_path
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
@ -44,6 +70,18 @@ fn main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn load_existing_models(
|
||||||
|
path: &str,
|
||||||
|
) -> Result<BTreeMap<String, Vec<String>>, Box<dyn std::error::Error>> {
|
||||||
|
let content = match std::fs::read_to_string(path) {
|
||||||
|
Ok(c) => c,
|
||||||
|
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(BTreeMap::new()),
|
||||||
|
Err(e) => return Err(Box::new(e)),
|
||||||
|
};
|
||||||
|
let parsed: ProviderModels = serde_yaml::from_str(&content)?;
|
||||||
|
Ok(parsed.providers)
|
||||||
|
}
|
||||||
|
|
||||||
// OpenAI-compatible API response (used by most providers)
|
// OpenAI-compatible API response (used by most providers)
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
struct OpenAICompatibleModel {
|
struct OpenAICompatibleModel {
|
||||||
|
|
@ -68,21 +106,36 @@ struct GoogleResponse {
|
||||||
models: Vec<GoogleModel>,
|
models: Vec<GoogleModel>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
struct ProviderModels {
|
struct ProviderModels {
|
||||||
|
#[serde(default = "default_version")]
|
||||||
version: String,
|
version: String,
|
||||||
|
#[serde(default = "default_source")]
|
||||||
source: String,
|
source: String,
|
||||||
providers: HashMap<String, Vec<String>>,
|
#[serde(default)]
|
||||||
|
providers: BTreeMap<String, Vec<String>>,
|
||||||
|
#[serde(default)]
|
||||||
metadata: Metadata,
|
metadata: Metadata,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||||
struct Metadata {
|
struct Metadata {
|
||||||
|
#[serde(default)]
|
||||||
total_providers: usize,
|
total_providers: usize,
|
||||||
|
#[serde(default)]
|
||||||
total_models: usize,
|
total_models: usize,
|
||||||
|
#[serde(default)]
|
||||||
last_updated: String,
|
last_updated: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn default_version() -> String {
|
||||||
|
"1.0".to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_source() -> String {
|
||||||
|
"canonical-apis".to_string()
|
||||||
|
}
|
||||||
|
|
||||||
fn is_text_model(model_id: &str) -> bool {
|
fn is_text_model(model_id: &str) -> bool {
|
||||||
let id_lower = model_id.to_lowercase();
|
let id_lower = model_id.to_lowercase();
|
||||||
|
|
||||||
|
|
@ -273,8 +326,13 @@ fn fetch_bedrock_amazon_models() -> Result<Vec<String>, Box<dyn std::error::Erro
|
||||||
Ok(amazon_models)
|
Ok(amazon_models)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fetch_all_models() -> Result<ProviderModels, Box<dyn std::error::Error>> {
|
fn fetch_all_models(
|
||||||
let mut providers: HashMap<String, Vec<String>> = HashMap::new();
|
existing: BTreeMap<String, Vec<String>>,
|
||||||
|
) -> Result<ProviderModels, Box<dyn std::error::Error>> {
|
||||||
|
let mut providers = existing;
|
||||||
|
let mut updated: Vec<String> = Vec::new();
|
||||||
|
let mut skipped: Vec<String> = Vec::new();
|
||||||
|
let mut failed: Vec<String> = Vec::new();
|
||||||
let mut errors: Vec<String> = Vec::new();
|
let mut errors: Vec<String> = Vec::new();
|
||||||
|
|
||||||
// Configuration: provider name, env var, API URL, prefix for model IDs
|
// Configuration: provider name, env var, API URL, prefix for model IDs
|
||||||
|
|
@ -324,90 +382,131 @@ fn fetch_all_models() -> Result<ProviderModels, Box<dyn std::error::Error>> {
|
||||||
),
|
),
|
||||||
];
|
];
|
||||||
|
|
||||||
|
// Helper that records the outcome of a fetch attempt and only mutates
|
||||||
|
// `providers` on success, so missing/failed providers keep their existing
|
||||||
|
// entries (or stay absent if there were none).
|
||||||
|
let mut record =
|
||||||
|
|name: &str,
|
||||||
|
env_var: Option<&str>,
|
||||||
|
result: Option<Result<Vec<String>, Box<dyn std::error::Error>>>,
|
||||||
|
providers: &mut BTreeMap<String, Vec<String>>| match result {
|
||||||
|
Some(Ok(models)) => {
|
||||||
|
println!(" ✓ {}: {} models", name, models.len());
|
||||||
|
providers.insert(name.to_string(), models);
|
||||||
|
updated.push(name.to_string());
|
||||||
|
}
|
||||||
|
Some(Err(e)) => {
|
||||||
|
let kept = providers
|
||||||
|
.get(name)
|
||||||
|
.map(|v| format!(" (keeping existing {} models)", v.len()))
|
||||||
|
.unwrap_or_default();
|
||||||
|
let err_msg = format!(" ✗ {}: {}{}", name, e, kept);
|
||||||
|
eprintln!("{}", err_msg);
|
||||||
|
errors.push(err_msg);
|
||||||
|
failed.push(name.to_string());
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
let kept = providers
|
||||||
|
.get(name)
|
||||||
|
.map(|v| format!(" (keeping existing {} models)", v.len()))
|
||||||
|
.unwrap_or_else(|| " (no existing entry)".to_string());
|
||||||
|
let label = env_var
|
||||||
|
.map(|v| format!("{} not set", v))
|
||||||
|
.unwrap_or_else(|| "no credentials".to_string());
|
||||||
|
println!(" ⊘ {}: {}{}", name, label, kept);
|
||||||
|
skipped.push(name.to_string());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Fetch from OpenAI-compatible providers
|
// Fetch from OpenAI-compatible providers
|
||||||
for (provider_name, env_var, api_url, prefix) in provider_configs {
|
for (provider_name, env_var, api_url, prefix) in provider_configs {
|
||||||
if let Ok(api_key) = std::env::var(env_var) {
|
let result = std::env::var(env_var)
|
||||||
match fetch_openai_compatible_models(api_url, &api_key, prefix) {
|
.ok()
|
||||||
Ok(models) => {
|
.map(|api_key| fetch_openai_compatible_models(api_url, &api_key, prefix));
|
||||||
println!(" ✓ {}: {} models", provider_name, models.len());
|
record(provider_name, Some(env_var), result, &mut providers);
|
||||||
providers.insert(provider_name.to_string(), models);
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
let err_msg = format!(" ✗ {}: {}", provider_name, e);
|
|
||||||
eprintln!("{}", err_msg);
|
|
||||||
errors.push(err_msg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
println!(" ⊘ {}: {} not set (skipped)", provider_name, env_var);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fetch Anthropic models (different authentication)
|
// Fetch Anthropic models (different authentication)
|
||||||
if let Ok(api_key) = std::env::var("ANTHROPIC_API_KEY") {
|
let anthropic_result = std::env::var("ANTHROPIC_API_KEY")
|
||||||
match fetch_anthropic_models(&api_key) {
|
.ok()
|
||||||
Ok(models) => {
|
.map(|key| fetch_anthropic_models(&key));
|
||||||
println!(" ✓ anthropic: {} models", models.len());
|
record(
|
||||||
providers.insert("anthropic".to_string(), models);
|
"anthropic",
|
||||||
}
|
Some("ANTHROPIC_API_KEY"),
|
||||||
Err(e) => {
|
anthropic_result,
|
||||||
let err_msg = format!(" ✗ anthropic: {}", e);
|
&mut providers,
|
||||||
eprintln!("{}", err_msg);
|
);
|
||||||
errors.push(err_msg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
println!(" ⊘ anthropic: ANTHROPIC_API_KEY not set (skipped)");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fetch Google models (different API format)
|
// Fetch Google models (different API format)
|
||||||
if let Ok(api_key) = std::env::var("GOOGLE_API_KEY") {
|
let google_result = std::env::var("GOOGLE_API_KEY")
|
||||||
match fetch_google_models(&api_key) {
|
.ok()
|
||||||
Ok(models) => {
|
.map(|key| fetch_google_models(&key));
|
||||||
println!(" ✓ google: {} models", models.len());
|
record(
|
||||||
providers.insert("google".to_string(), models);
|
"google",
|
||||||
}
|
Some("GOOGLE_API_KEY"),
|
||||||
Err(e) => {
|
google_result,
|
||||||
let err_msg = format!(" ✗ google: {}", e);
|
&mut providers,
|
||||||
eprintln!("{}", err_msg);
|
);
|
||||||
errors.push(err_msg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
println!(" ⊘ google: GOOGLE_API_KEY not set (skipped)");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fetch Amazon models from AWS Bedrock
|
// Fetch Amazon models from AWS Bedrock. Only attempt if the AWS CLI is on
|
||||||
match fetch_bedrock_amazon_models() {
|
// PATH and any AWS credential is configured — otherwise treat as skipped
|
||||||
Ok(models) => {
|
// so we don't drop the existing amazon entry on machines / CI runs without
|
||||||
println!(" ✓ amazon: {} models (via AWS Bedrock)", models.len());
|
// Bedrock access.
|
||||||
providers.insert("amazon".to_string(), models);
|
let amazon_result = if aws_credentials_available() {
|
||||||
}
|
Some(fetch_bedrock_amazon_models())
|
||||||
Err(e) => {
|
} else {
|
||||||
let err_msg = format!(" ✗ amazon: {} (AWS Bedrock required)", e);
|
None
|
||||||
eprintln!("{}", err_msg);
|
};
|
||||||
errors.push(err_msg);
|
record(
|
||||||
}
|
"amazon",
|
||||||
}
|
Some("AWS credentials"),
|
||||||
|
amazon_result,
|
||||||
|
&mut providers,
|
||||||
|
);
|
||||||
|
|
||||||
if providers.is_empty() {
|
if providers.is_empty() {
|
||||||
return Err("No models fetched from any provider. Check API keys.".into());
|
return Err(
|
||||||
|
"No existing data and no models fetched. Set at least one API key and re-run.".into(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let total_providers = providers.len();
|
let total_providers = providers.len();
|
||||||
let total_models: usize = providers.values().map(|v| v.len()).sum();
|
let total_models: usize = providers.values().map(|v| v.len()).sum();
|
||||||
|
|
||||||
|
println!("\nSummary:");
|
||||||
println!(
|
println!(
|
||||||
"\n✅ Successfully fetched models from {} providers",
|
" updated: {} ({})",
|
||||||
total_providers
|
updated.len(),
|
||||||
|
if updated.is_empty() {
|
||||||
|
"none".to_string()
|
||||||
|
} else {
|
||||||
|
updated.join(", ")
|
||||||
|
}
|
||||||
);
|
);
|
||||||
if !errors.is_empty() {
|
println!(
|
||||||
println!("⚠️ {} providers failed", errors.len());
|
" skipped (kept existing): {} ({})",
|
||||||
|
skipped.len(),
|
||||||
|
if skipped.is_empty() {
|
||||||
|
"none".to_string()
|
||||||
|
} else {
|
||||||
|
skipped.join(", ")
|
||||||
|
}
|
||||||
|
);
|
||||||
|
if !failed.is_empty() {
|
||||||
|
println!(
|
||||||
|
" failed (kept existing): {} ({})",
|
||||||
|
failed.len(),
|
||||||
|
failed.join(", ")
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
println!(
|
||||||
|
"✅ Final state: {} providers, {} models",
|
||||||
|
total_providers, total_models
|
||||||
|
);
|
||||||
|
|
||||||
Ok(ProviderModels {
|
Ok(ProviderModels {
|
||||||
version: "1.0".to_string(),
|
version: default_version(),
|
||||||
source: "canonical-apis".to_string(),
|
source: default_source(),
|
||||||
providers,
|
providers,
|
||||||
metadata: Metadata {
|
metadata: Metadata {
|
||||||
total_providers,
|
total_providers,
|
||||||
|
|
@ -416,3 +515,10 @@ fn fetch_all_models() -> Result<ProviderModels, Box<dyn std::error::Error>> {
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn aws_credentials_available() -> bool {
|
||||||
|
std::env::var("AWS_ACCESS_KEY_ID").is_ok()
|
||||||
|
|| std::env::var("AWS_PROFILE").is_ok()
|
||||||
|
|| std::env::var("AWS_SESSION_TOKEN").is_ok()
|
||||||
|
|| std::env::var("AWS_WEB_IDENTITY_TOKEN_FILE").is_ok()
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,77 @@ providers:
|
||||||
- amazon/amazon.nova-premier-v1:0
|
- amazon/amazon.nova-premier-v1:0
|
||||||
- amazon/amazon.nova-lite-v1:0
|
- amazon/amazon.nova-lite-v1:0
|
||||||
- amazon/amazon.nova-micro-v1:0
|
- amazon/amazon.nova-micro-v1:0
|
||||||
|
anthropic:
|
||||||
|
- anthropic/claude-fable-5
|
||||||
|
- anthropic/claude-opus-4-8
|
||||||
|
- anthropic/claude-opus-4-7
|
||||||
|
- anthropic/claude-sonnet-4-6
|
||||||
|
- anthropic/claude-opus-4-6
|
||||||
|
- anthropic/claude-opus-4-5-20251101
|
||||||
|
- anthropic/claude-opus-4-5
|
||||||
|
- anthropic/claude-haiku-4-5-20251001
|
||||||
|
- anthropic/claude-haiku-4-5
|
||||||
|
- anthropic/claude-sonnet-4-5-20250929
|
||||||
|
- anthropic/claude-sonnet-4-5
|
||||||
|
- anthropic/claude-opus-4-1-20250805
|
||||||
|
- anthropic/claude-opus-4-1
|
||||||
|
- anthropic/claude-opus-4-20250514
|
||||||
|
- anthropic/claude-opus-4
|
||||||
|
- anthropic/claude-sonnet-4-20250514
|
||||||
|
- anthropic/claude-sonnet-4
|
||||||
|
chatgpt:
|
||||||
|
- chatgpt/gpt-5.4
|
||||||
|
- chatgpt/gpt-5.3-codex
|
||||||
|
- chatgpt/gpt-5.2
|
||||||
|
deepseek:
|
||||||
|
- deepseek/deepseek-v4-flash
|
||||||
|
- deepseek/deepseek-v4-pro
|
||||||
|
digitalocean:
|
||||||
|
- digitalocean/openai-gpt-4.1
|
||||||
|
- digitalocean/openai-gpt-4o
|
||||||
|
- digitalocean/openai-gpt-4o-mini
|
||||||
|
- digitalocean/openai-gpt-5
|
||||||
|
- digitalocean/openai-gpt-5-mini
|
||||||
|
- digitalocean/openai-gpt-5-nano
|
||||||
|
- digitalocean/openai-gpt-5.1-codex-max
|
||||||
|
- digitalocean/openai-gpt-5.2
|
||||||
|
- digitalocean/openai-gpt-5.2-pro
|
||||||
|
- digitalocean/openai-gpt-5.3-codex
|
||||||
|
- digitalocean/openai-gpt-5.4
|
||||||
|
- digitalocean/openai-gpt-5.4-mini
|
||||||
|
- digitalocean/openai-gpt-5.4-nano
|
||||||
|
- digitalocean/openai-gpt-5.4-pro
|
||||||
|
- digitalocean/openai-gpt-oss-120b
|
||||||
|
- digitalocean/openai-gpt-oss-20b
|
||||||
|
- digitalocean/openai-o1
|
||||||
|
- digitalocean/openai-o3
|
||||||
|
- digitalocean/openai-o3-mini
|
||||||
|
- digitalocean/anthropic-claude-4.1-opus
|
||||||
|
- digitalocean/anthropic-claude-4.5-sonnet
|
||||||
|
- digitalocean/anthropic-claude-4.6-sonnet
|
||||||
|
- digitalocean/anthropic-claude-haiku-4.5
|
||||||
|
- digitalocean/anthropic-claude-opus-4
|
||||||
|
- digitalocean/anthropic-claude-opus-4.5
|
||||||
|
- digitalocean/anthropic-claude-opus-4.6
|
||||||
|
- digitalocean/anthropic-claude-opus-4.7
|
||||||
|
- digitalocean/anthropic-claude-sonnet-4
|
||||||
|
- digitalocean/alibaba-qwen3-32b
|
||||||
|
- digitalocean/arcee-trinity-large-thinking
|
||||||
|
- digitalocean/deepseek-3.2
|
||||||
|
- digitalocean/deepseek-r1-distill-llama-70b
|
||||||
|
- digitalocean/gemma-4-31B-it
|
||||||
|
- digitalocean/glm-5
|
||||||
|
- digitalocean/kimi-k2.5
|
||||||
|
- digitalocean/llama3.3-70b-instruct
|
||||||
|
- digitalocean/minimax-m2.5
|
||||||
|
- digitalocean/nvidia-nemotron-3-super-120b
|
||||||
|
- digitalocean/qwen3-coder-flash
|
||||||
|
- digitalocean/qwen3.5-397b-a17b
|
||||||
|
- digitalocean/all-mini-lm-l6-v2
|
||||||
|
- digitalocean/gte-large-en-v1.5
|
||||||
|
- digitalocean/multi-qa-mpnet-base-dot-v1
|
||||||
|
- digitalocean/qwen3-embedding-0.6b
|
||||||
|
- digitalocean/router:software-engineering
|
||||||
google:
|
google:
|
||||||
- google/gemini-2.5-flash
|
- google/gemini-2.5-flash
|
||||||
- google/gemini-2.5-pro
|
- google/gemini-2.5-pro
|
||||||
|
|
@ -22,12 +93,6 @@ providers:
|
||||||
- google/gemini-2.0-flash-lite
|
- google/gemini-2.0-flash-lite
|
||||||
- google/gemini-2.5-flash-preview-tts
|
- google/gemini-2.5-flash-preview-tts
|
||||||
- google/gemini-2.5-pro-preview-tts
|
- google/gemini-2.5-pro-preview-tts
|
||||||
- google/gemma-3-1b-it
|
|
||||||
- google/gemma-3-4b-it
|
|
||||||
- google/gemma-3-12b-it
|
|
||||||
- google/gemma-3-27b-it
|
|
||||||
- google/gemma-3n-e4b-it
|
|
||||||
- google/gemma-3n-e2b-it
|
|
||||||
- google/gemma-4-26b-a4b-it
|
- google/gemma-4-26b-a4b-it
|
||||||
- google/gemma-4-31b-it
|
- google/gemma-4-31b-it
|
||||||
- google/gemini-flash-latest
|
- google/gemini-flash-latest
|
||||||
|
|
@ -40,13 +105,22 @@ providers:
|
||||||
- google/gemini-3.1-pro-preview
|
- google/gemini-3.1-pro-preview
|
||||||
- google/gemini-3.1-pro-preview-customtools
|
- google/gemini-3.1-pro-preview-customtools
|
||||||
- google/gemini-3.1-flash-lite-preview
|
- google/gemini-3.1-flash-lite-preview
|
||||||
|
- google/gemini-3.1-flash-lite
|
||||||
- google/gemini-3-pro-image-preview
|
- google/gemini-3-pro-image-preview
|
||||||
|
- google/gemini-3-pro-image
|
||||||
- google/nano-banana-pro-preview
|
- google/nano-banana-pro-preview
|
||||||
- google/gemini-3.1-flash-image-preview
|
- google/gemini-3.1-flash-image-preview
|
||||||
|
- google/gemini-3.1-flash-image
|
||||||
|
- google/gemini-3.5-flash
|
||||||
- google/lyria-3-clip-preview
|
- google/lyria-3-clip-preview
|
||||||
- google/lyria-3-pro-preview
|
- google/lyria-3-pro-preview
|
||||||
|
- google/gemini-3.1-flash-tts-preview
|
||||||
- google/gemini-robotics-er-1.5-preview
|
- google/gemini-robotics-er-1.5-preview
|
||||||
|
- google/gemini-robotics-er-1.6-preview
|
||||||
- google/gemini-2.5-computer-use-preview-10-2025
|
- google/gemini-2.5-computer-use-preview-10-2025
|
||||||
|
- google/antigravity-preview-05-2026
|
||||||
|
- google/deep-research-max-preview-04-2026
|
||||||
|
- google/deep-research-preview-04-2026
|
||||||
- google/deep-research-pro-preview-12-2025
|
- google/deep-research-pro-preview-12-2025
|
||||||
mistralai:
|
mistralai:
|
||||||
- mistralai/mistral-medium-2505
|
- mistralai/mistral-medium-2505
|
||||||
|
|
@ -60,183 +134,62 @@ providers:
|
||||||
- mistralai/mistral-tiny-latest
|
- mistralai/mistral-tiny-latest
|
||||||
- mistralai/codestral-2508
|
- mistralai/codestral-2508
|
||||||
- mistralai/codestral-latest
|
- mistralai/codestral-latest
|
||||||
|
- mistralai/mistral-code-latest
|
||||||
|
- mistralai/mistral-code-fim-latest
|
||||||
- mistralai/devstral-2512
|
- mistralai/devstral-2512
|
||||||
- mistralai/mistral-vibe-cli-latest
|
|
||||||
- mistralai/devstral-medium-latest
|
- mistralai/devstral-medium-latest
|
||||||
- mistralai/devstral-latest
|
- mistralai/devstral-latest
|
||||||
|
- mistralai/mistral-code-agent-latest
|
||||||
- mistralai/mistral-small-2603
|
- mistralai/mistral-small-2603
|
||||||
- mistralai/mistral-small-latest
|
- mistralai/mistral-small-latest
|
||||||
- mistralai/mistral-vibe-cli-fast
|
- mistralai/mistral-vibe-cli-fast
|
||||||
- mistralai/mistral-small-2506
|
- mistralai/magistral-small-latest
|
||||||
- mistralai/magistral-medium-2509
|
- mistralai/magistral-medium-2509
|
||||||
- mistralai/magistral-medium-latest
|
- mistralai/magistral-medium-latest
|
||||||
- mistralai/magistral-small-2509
|
|
||||||
- mistralai/magistral-small-latest
|
|
||||||
- mistralai/labs-leanstral-2603
|
- mistralai/labs-leanstral-2603
|
||||||
- mistralai/mistral-large-2512
|
- mistralai/mistral-large-2512
|
||||||
- mistralai/mistral-large-latest
|
- mistralai/mistral-large-latest
|
||||||
|
- mistralai/mistral-large-2512
|
||||||
|
- mistralai/mistral-large-latest
|
||||||
- mistralai/ministral-3b-2512
|
- mistralai/ministral-3b-2512
|
||||||
- mistralai/ministral-3b-latest
|
- mistralai/ministral-3b-latest
|
||||||
- mistralai/ministral-8b-2512
|
- mistralai/ministral-8b-2512
|
||||||
- mistralai/ministral-8b-latest
|
- mistralai/ministral-8b-latest
|
||||||
- mistralai/ministral-14b-2512
|
- mistralai/ministral-14b-2512
|
||||||
- mistralai/ministral-14b-latest
|
- mistralai/ministral-14b-latest
|
||||||
- mistralai/mistral-large-2411
|
- mistralai/mistral-medium-3-5
|
||||||
- mistralai/pixtral-large-2411
|
- mistralai/mistral-medium-3.5
|
||||||
- mistralai/pixtral-large-latest
|
- mistralai/mistral-medium-3
|
||||||
- mistralai/mistral-large-pixtral-2411
|
- mistralai/mistral-medium-2604
|
||||||
- mistralai/devstral-small-2507
|
- mistralai/mistral-medium-c21211-r0-75
|
||||||
- mistralai/devstral-medium-2507
|
- mistralai/mistral-vibe-cli-latest
|
||||||
- mistralai/labs-mistral-small-creative
|
- mistralai/mistral-medium-3-5
|
||||||
|
- mistralai/mistral-medium-3.5
|
||||||
|
- mistralai/mistral-medium-3
|
||||||
|
- mistralai/mistral-medium-2604
|
||||||
|
- mistralai/mistral-medium-c21211-r0-75
|
||||||
|
- mistralai/mistral-vibe-cli-latest
|
||||||
|
- mistralai/magistral-small-2509
|
||||||
|
- mistralai/mistral-small-2506
|
||||||
- mistralai/mistral-embed-2312
|
- mistralai/mistral-embed-2312
|
||||||
- mistralai/mistral-embed
|
- mistralai/mistral-embed
|
||||||
- mistralai/codestral-embed
|
- mistralai/codestral-embed
|
||||||
- mistralai/codestral-embed-2505
|
- mistralai/codestral-embed-2505
|
||||||
anthropic:
|
moonshotai:
|
||||||
- anthropic/claude-sonnet-4-6
|
- moonshotai/kimi-k2.5
|
||||||
- anthropic/claude-opus-4-6
|
- moonshotai/kimi-k2.6
|
||||||
- anthropic/claude-opus-4-7
|
- moonshotai/moonshot-v1-32k
|
||||||
- anthropic/claude-opus-4-5-20251101
|
- moonshotai/moonshot-v1-8k
|
||||||
- anthropic/claude-opus-4-5
|
- moonshotai/moonshot-v1-128k-vision-preview
|
||||||
- anthropic/claude-haiku-4-5-20251001
|
- moonshotai/moonshot-v1-auto
|
||||||
- anthropic/claude-haiku-4-5
|
- moonshotai/moonshot-v1-8k-vision-preview
|
||||||
- anthropic/claude-sonnet-4-5-20250929
|
- moonshotai/moonshot-v1-128k
|
||||||
- anthropic/claude-sonnet-4-5
|
- moonshotai/moonshot-v1-32k-vision-preview
|
||||||
- anthropic/claude-opus-4-1-20250805
|
|
||||||
- anthropic/claude-opus-4-1
|
|
||||||
- anthropic/claude-opus-4-20250514
|
|
||||||
- anthropic/claude-opus-4
|
|
||||||
- anthropic/claude-sonnet-4-20250514
|
|
||||||
- anthropic/claude-sonnet-4
|
|
||||||
- anthropic/claude-3-haiku-20240307
|
|
||||||
- anthropic/claude-3-haiku
|
|
||||||
qwen:
|
|
||||||
- qwen/qwen3.6-plus-2026-04-02
|
|
||||||
- qwen/qwen3.6-plus
|
|
||||||
- qwen/wan2.7-image
|
|
||||||
- qwen/deepseek-v3.2
|
|
||||||
- qwen/qwen3-asr-flash-2026-02-10
|
|
||||||
- qwen/qwen3.5-flash-2026-02-23
|
|
||||||
- qwen/qwen3.5-flash
|
|
||||||
- qwen/qwen3.5-122b-a10b
|
|
||||||
- qwen/qwen3.5-35b-a3b
|
|
||||||
- qwen/qwen3.5-27b
|
|
||||||
- qwen/qwen3-coder-next
|
|
||||||
- qwen/qwen3.5-397b-a17b
|
|
||||||
- qwen/qwen3.5-plus-2026-02-15
|
|
||||||
- qwen/qwen3.5-plus
|
|
||||||
- qwen/qwen3-vl-flash-2026-01-22
|
|
||||||
- qwen/qwen3-max-2026-01-23
|
|
||||||
- qwen/qwen-plus-character
|
|
||||||
- qwen/qwen-flash-character
|
|
||||||
- qwen/qwen-flash
|
|
||||||
- qwen/qwen3-vl-plus-2025-12-19
|
|
||||||
- qwen/qwen3-omni-flash-2025-12-01
|
|
||||||
- qwen/qwen3-livetranslate-flash-2025-12-01
|
|
||||||
- qwen/qwen3-livetranslate-flash
|
|
||||||
- qwen/qwen-mt-lite
|
|
||||||
- qwen/qwen-plus-2025-12-01
|
|
||||||
- qwen/qwen-mt-flash
|
|
||||||
- qwen/ccai-pro
|
|
||||||
- qwen/tongyi-tingwu-slp
|
|
||||||
- qwen/qwen3-vl-flash
|
|
||||||
- qwen/qwen3-vl-flash-2025-10-15
|
|
||||||
- qwen/qwen3-omni-flash
|
|
||||||
- qwen/qwen3-omni-flash-2025-09-15
|
|
||||||
- qwen/qwen3-omni-30b-a3b-captioner
|
|
||||||
- qwen/qwen2.5-7b-instruct
|
|
||||||
- qwen/qwen2.5-14b-instruct
|
|
||||||
- qwen/qwen2.5-32b-instruct
|
|
||||||
- qwen/qwen2.5-72b-instruct
|
|
||||||
- qwen/qwen2.5-14b-instruct-1m
|
|
||||||
- qwen/qwen2.5-7b-instruct-1m
|
|
||||||
- qwen/qwen-max-2025-01-25
|
|
||||||
- qwen/qwen-max-latest
|
|
||||||
- qwen/qwen-turbo-2024-11-01
|
|
||||||
- qwen/qwen-turbo-latest
|
|
||||||
- qwen/qwen-plus-latest
|
|
||||||
- qwen/qwen-plus-2025-01-25
|
|
||||||
- qwen/qwq-plus-2025-03-05
|
|
||||||
- qwen/qwen-mt-turbo
|
|
||||||
- qwen/qwen-mt-plus
|
|
||||||
- qwen/qwen-coder-plus
|
|
||||||
- qwen/qwq-plus
|
|
||||||
- qwen/qwen2.5-vl-32b-instruct
|
|
||||||
- qwen/qvq-max
|
|
||||||
- qwen/qwen-omni-turbo
|
|
||||||
- qwen/qwen3-8b
|
|
||||||
- qwen/qwen3-30b-a3b
|
|
||||||
- qwen/qwen3-235b-a22b
|
|
||||||
- qwen/qwen-turbo-2025-04-28
|
|
||||||
- qwen/qwen-plus-2025-04-28
|
|
||||||
- qwen/qwen-vl-max-2025-04-08
|
|
||||||
- qwen/qwen-vl-plus-2025-01-25
|
|
||||||
- qwen/qwen-vl-plus-latest
|
|
||||||
- qwen/qwen-vl-max-latest
|
|
||||||
- qwen/qwen-vl-plus-2025-05-07
|
|
||||||
- qwen/qwen3-coder-plus
|
|
||||||
- qwen/qwen3-coder-480b-a35b-instruct
|
|
||||||
- qwen/qwen3-235b-a22b-instruct-2507
|
|
||||||
- qwen/qwen-plus-2025-07-14
|
|
||||||
- qwen/qwen3-coder-plus-2025-07-22
|
|
||||||
- qwen/qwen3-235b-a22b-thinking-2507
|
|
||||||
- qwen/qwen3-coder-flash
|
|
||||||
- qwen/qwen-vl-max
|
|
||||||
- qwen/qwen-vl-max-2025-08-13
|
|
||||||
- qwen/qwen3-max
|
|
||||||
- qwen/qwen3-max-2025-09-23
|
|
||||||
- qwen/qwen3-vl-plus
|
|
||||||
- qwen/qwen3-vl-235b-a22b-instruct
|
|
||||||
- qwen/qwen3-vl-235b-a22b-thinking
|
|
||||||
- qwen/qwen3-30b-a3b-thinking-2507
|
|
||||||
- qwen/qwen3-30b-a3b-instruct-2507
|
|
||||||
- qwen/qwen3-14b
|
|
||||||
- qwen/qwen3-32b
|
|
||||||
- qwen/qwen3-0.6b
|
|
||||||
- qwen/qwen3-4b
|
|
||||||
- qwen/qwen3-1.7b
|
|
||||||
- qwen/qwen-vl-plus
|
|
||||||
- qwen/qwen3-coder-plus-2025-09-23
|
|
||||||
- qwen/qwen3-vl-plus-2025-09-23
|
|
||||||
- qwen/qwen-plus-2025-09-11
|
|
||||||
- qwen/qwen3-next-80b-a3b-thinking
|
|
||||||
- qwen/qwen3-next-80b-a3b-instruct
|
|
||||||
- qwen/qwen3-max-preview
|
|
||||||
- qwen/qwen2-7b-instruct
|
|
||||||
- qwen/qwen-max
|
|
||||||
- qwen/qwen-plus
|
|
||||||
- qwen/qwen-turbo
|
|
||||||
z-ai:
|
|
||||||
- z-ai/glm-4.5
|
|
||||||
- z-ai/glm-4.5-air
|
|
||||||
- z-ai/glm-4.6
|
|
||||||
- z-ai/glm-4.7
|
|
||||||
- z-ai/glm-5
|
|
||||||
- z-ai/glm-5-turbo
|
|
||||||
- z-ai/glm-5.1
|
|
||||||
x-ai:
|
|
||||||
- x-ai/grok-3
|
|
||||||
- x-ai/grok-3-mini
|
|
||||||
- x-ai/grok-4-0709
|
|
||||||
- x-ai/grok-4-1-fast-non-reasoning
|
|
||||||
- x-ai/grok-4-1-fast-reasoning
|
|
||||||
- x-ai/grok-4-fast-non-reasoning
|
|
||||||
- x-ai/grok-4-fast-reasoning
|
|
||||||
- x-ai/grok-4.20-0309-non-reasoning
|
|
||||||
- x-ai/grok-4.20-0309-reasoning
|
|
||||||
- x-ai/grok-4.20-multi-agent-0309
|
|
||||||
- x-ai/grok-code-fast-1
|
|
||||||
- x-ai/grok-imagine-image
|
|
||||||
- x-ai/grok-imagine-video
|
|
||||||
openai:
|
openai:
|
||||||
|
- openai/gpt-3.5-turbo
|
||||||
|
- openai/gpt-3.5-turbo-16k
|
||||||
- openai/gpt-4-0613
|
- openai/gpt-4-0613
|
||||||
- openai/gpt-4
|
- openai/gpt-4
|
||||||
- openai/gpt-3.5-turbo
|
|
||||||
- openai/gpt-5.4-mini
|
|
||||||
- openai/gpt-5.4
|
|
||||||
- openai/gpt-5.4-nano-2026-03-17
|
|
||||||
- openai/gpt-5.4-nano
|
|
||||||
- openai/gpt-5.4-mini-2026-03-17
|
|
||||||
- openai/gpt-3.5-turbo-instruct
|
- openai/gpt-3.5-turbo-instruct
|
||||||
- openai/gpt-3.5-turbo-instruct-0914
|
- openai/gpt-3.5-turbo-instruct-0914
|
||||||
- openai/gpt-3.5-turbo-1106
|
- openai/gpt-3.5-turbo-1106
|
||||||
|
|
@ -306,80 +259,137 @@ providers:
|
||||||
- openai/gpt-5.4-2026-03-05
|
- openai/gpt-5.4-2026-03-05
|
||||||
- openai/gpt-5.4-pro
|
- openai/gpt-5.4-pro
|
||||||
- openai/gpt-5.4-pro-2026-03-05
|
- openai/gpt-5.4-pro-2026-03-05
|
||||||
- openai/gpt-3.5-turbo-16k
|
- openai/gpt-5.4
|
||||||
|
- openai/gpt-5.4-nano-2026-03-17
|
||||||
|
- openai/gpt-5.4-nano
|
||||||
|
- openai/gpt-5.4-mini-2026-03-17
|
||||||
|
- openai/gpt-5.4-mini
|
||||||
|
- openai/gpt-5.5
|
||||||
|
- openai/gpt-5.5-2026-04-23
|
||||||
|
- openai/gpt-5.5-pro
|
||||||
|
- openai/gpt-5.5-pro-2026-04-23
|
||||||
|
- openai/chat-latest
|
||||||
- openai/ft:gpt-3.5-turbo-0613:katanemo::8CMZbm0P
|
- openai/ft:gpt-3.5-turbo-0613:katanemo::8CMZbm0P
|
||||||
deepseek:
|
qwen:
|
||||||
- deepseek/deepseek-chat
|
- qwen/qwen3.7-plus-2026-05-26
|
||||||
- deepseek/deepseek-reasoner
|
- qwen/qwen3.7-plus
|
||||||
moonshotai:
|
- qwen/kimi-k2.6
|
||||||
- moonshotai/kimi-k2-thinking
|
- qwen/glm-5.1
|
||||||
- moonshotai/moonshot-v1-auto
|
- qwen/qwen3.7-max-2026-05-17
|
||||||
- moonshotai/moonshot-v1-32k-vision-preview
|
- qwen/qwen3.7-max-preview
|
||||||
- moonshotai/moonshot-v1-128k
|
- qwen/qwen3.7-max-2026-05-20
|
||||||
- moonshotai/kimi-k2-turbo-preview
|
- qwen/qwen3.7-max
|
||||||
- moonshotai/kimi-k2-0905-preview
|
- qwen/deepseek-v4-flash
|
||||||
- moonshotai/moonshot-v1-128k-vision-preview
|
- qwen/deepseek-v4-pro
|
||||||
- moonshotai/moonshot-v1-32k
|
- qwen/qwen3.6-27b
|
||||||
- moonshotai/moonshot-v1-8k-vision-preview
|
- qwen/qwen3.5-plus-2026-04-20
|
||||||
- moonshotai/kimi-k2.5
|
- qwen/qwen3.6-max-preview
|
||||||
- moonshotai/moonshot-v1-8k
|
- qwen/qwen3.6-35b-a3b
|
||||||
- moonshotai/kimi-k2-thinking-turbo
|
- qwen/qwen3.6-flash
|
||||||
- moonshotai/kimi-k2-0711-preview
|
- qwen/qwen3.6-flash-2026-04-16
|
||||||
|
- qwen/qwen3.5-omni-plus-2026-03-15
|
||||||
|
- qwen/qwen3.5-omni-plus
|
||||||
|
- qwen/qwen3.5-omni-flash-2026-03-15
|
||||||
|
- qwen/qwen3.5-omni-flash
|
||||||
|
- qwen/qwen3.6-plus-2026-04-02
|
||||||
|
- qwen/qwen3.6-plus
|
||||||
|
- qwen/wan2.7-image
|
||||||
|
- qwen/deepseek-v3.2
|
||||||
|
- qwen/qwen3-asr-flash-2026-02-10
|
||||||
|
- qwen/qwen3.5-flash-2026-02-23
|
||||||
|
- qwen/qwen3.5-flash
|
||||||
|
- qwen/qwen3.5-122b-a10b
|
||||||
|
- qwen/qwen3.5-35b-a3b
|
||||||
|
- qwen/qwen3.5-27b
|
||||||
|
- qwen/qwen3-coder-next
|
||||||
|
- qwen/qwen3.5-397b-a17b
|
||||||
|
- qwen/qwen3.5-plus-2026-02-15
|
||||||
|
- qwen/qwen3.5-plus
|
||||||
|
- qwen/qwen3-vl-flash-2026-01-22
|
||||||
|
- qwen/qwen3-max-2026-01-23
|
||||||
|
- qwen/qwen-plus-character
|
||||||
|
- qwen/qwen-flash-character
|
||||||
|
- qwen/qwen-flash
|
||||||
|
- qwen/qwen3-vl-plus-2025-12-19
|
||||||
|
- qwen/qwen3-omni-flash-2025-12-01
|
||||||
|
- qwen/qwen3-livetranslate-flash-2025-12-01
|
||||||
|
- qwen/qwen3-livetranslate-flash
|
||||||
|
- qwen/qwen-mt-lite
|
||||||
|
- qwen/qwen-plus-2025-12-01
|
||||||
|
- qwen/qwen-mt-flash
|
||||||
|
- qwen/ccai-pro
|
||||||
|
- qwen/tongyi-tingwu-slp
|
||||||
|
- qwen/qwen3-vl-flash
|
||||||
|
- qwen/qwen3-vl-flash-2025-10-15
|
||||||
|
- qwen/qwen3-omni-flash
|
||||||
|
- qwen/qwen3-omni-flash-2025-09-15
|
||||||
|
- qwen/qwen3-omni-30b-a3b-captioner
|
||||||
|
- qwen/qwen-plus-latest
|
||||||
|
- qwen/qwen-plus-2025-01-25
|
||||||
|
- qwen/qwq-plus-2025-03-05
|
||||||
|
- qwen/qwen-mt-turbo
|
||||||
|
- qwen/qwen-mt-plus
|
||||||
|
- qwen/qwen-coder-plus
|
||||||
|
- qwen/qwq-plus
|
||||||
|
- qwen/qvq-max
|
||||||
|
- qwen/qwen-omni-turbo
|
||||||
|
- qwen/qwen3-8b
|
||||||
|
- qwen/qwen3-30b-a3b
|
||||||
|
- qwen/qwen3-235b-a22b
|
||||||
|
- qwen/qwen-plus-2025-04-28
|
||||||
|
- qwen/qwen3-coder-plus
|
||||||
|
- qwen/qwen3-coder-480b-a35b-instruct
|
||||||
|
- qwen/qwen3-235b-a22b-instruct-2507
|
||||||
|
- qwen/qwen-plus-2025-07-14
|
||||||
|
- qwen/qwen3-coder-plus-2025-07-22
|
||||||
|
- qwen/qwen3-235b-a22b-thinking-2507
|
||||||
|
- qwen/qwen3-coder-flash
|
||||||
|
- qwen/qwen-vl-max
|
||||||
|
- qwen/qwen3-max
|
||||||
|
- qwen/qwen3-max-2025-09-23
|
||||||
|
- qwen/qwen3-vl-plus
|
||||||
|
- qwen/qwen3-vl-235b-a22b-instruct
|
||||||
|
- qwen/qwen3-vl-235b-a22b-thinking
|
||||||
|
- qwen/qwen3-30b-a3b-thinking-2507
|
||||||
|
- qwen/qwen3-30b-a3b-instruct-2507
|
||||||
|
- qwen/qwen3-14b
|
||||||
|
- qwen/qwen3-32b
|
||||||
|
- qwen/qwen-vl-plus
|
||||||
|
- qwen/qwen3-coder-plus-2025-09-23
|
||||||
|
- qwen/qwen3-vl-plus-2025-09-23
|
||||||
|
- qwen/qwen-plus-2025-09-11
|
||||||
|
- qwen/qwen3-next-80b-a3b-thinking
|
||||||
|
- qwen/qwen3-next-80b-a3b-instruct
|
||||||
|
- qwen/qwen3-max-preview
|
||||||
|
- qwen/qwen2-7b-instruct
|
||||||
|
- qwen/qwen-max
|
||||||
|
- qwen/qwen-plus
|
||||||
|
- qwen/qwen-turbo
|
||||||
|
x-ai:
|
||||||
|
- x-ai/grok-4.20-0309-non-reasoning
|
||||||
|
- x-ai/grok-4.20-0309-reasoning
|
||||||
|
- x-ai/grok-4.20-multi-agent-0309
|
||||||
|
- x-ai/grok-4.3
|
||||||
|
- x-ai/grok-build-0.1
|
||||||
|
- x-ai/grok-imagine-image
|
||||||
|
- x-ai/grok-imagine-video
|
||||||
|
- x-ai/grok-imagine-video-1.5-preview
|
||||||
xiaomi:
|
xiaomi:
|
||||||
- xiaomi/mimo-v2-flash
|
- xiaomi/mimo-v2-flash
|
||||||
- xiaomi/mimo-v2-omni
|
- xiaomi/mimo-v2-omni
|
||||||
- xiaomi/mimo-v2-pro
|
- xiaomi/mimo-v2-pro
|
||||||
chatgpt:
|
- xiaomi/mimo-v2.5
|
||||||
- chatgpt/gpt-5.4
|
- xiaomi/mimo-v2.5-asr
|
||||||
- chatgpt/gpt-5.3-codex
|
- xiaomi/mimo-v2.5-pro
|
||||||
- chatgpt/gpt-5.2
|
z-ai:
|
||||||
digitalocean:
|
- z-ai/glm-4.5
|
||||||
- digitalocean/openai-gpt-4.1
|
- z-ai/glm-4.5-air
|
||||||
- digitalocean/openai-gpt-4o
|
- z-ai/glm-4.6
|
||||||
- digitalocean/openai-gpt-4o-mini
|
- z-ai/glm-4.7
|
||||||
- digitalocean/openai-gpt-5
|
- z-ai/glm-5
|
||||||
- digitalocean/openai-gpt-5-mini
|
- z-ai/glm-5-turbo
|
||||||
- digitalocean/openai-gpt-5-nano
|
- z-ai/glm-5.1
|
||||||
- digitalocean/openai-gpt-5.1-codex-max
|
|
||||||
- digitalocean/openai-gpt-5.2
|
|
||||||
- digitalocean/openai-gpt-5.2-pro
|
|
||||||
- digitalocean/openai-gpt-5.3-codex
|
|
||||||
- digitalocean/openai-gpt-5.4
|
|
||||||
- digitalocean/openai-gpt-5.4-mini
|
|
||||||
- digitalocean/openai-gpt-5.4-nano
|
|
||||||
- digitalocean/openai-gpt-5.4-pro
|
|
||||||
- digitalocean/openai-gpt-oss-120b
|
|
||||||
- digitalocean/openai-gpt-oss-20b
|
|
||||||
- digitalocean/openai-o1
|
|
||||||
- digitalocean/openai-o3
|
|
||||||
- digitalocean/openai-o3-mini
|
|
||||||
- digitalocean/anthropic-claude-4.1-opus
|
|
||||||
- digitalocean/anthropic-claude-4.5-sonnet
|
|
||||||
- digitalocean/anthropic-claude-4.6-sonnet
|
|
||||||
- digitalocean/anthropic-claude-haiku-4.5
|
|
||||||
- digitalocean/anthropic-claude-opus-4
|
|
||||||
- digitalocean/anthropic-claude-opus-4.5
|
|
||||||
- digitalocean/anthropic-claude-opus-4.6
|
|
||||||
- digitalocean/anthropic-claude-opus-4.7
|
|
||||||
- digitalocean/anthropic-claude-sonnet-4
|
|
||||||
- digitalocean/alibaba-qwen3-32b
|
|
||||||
- digitalocean/arcee-trinity-large-thinking
|
|
||||||
- digitalocean/deepseek-3.2
|
|
||||||
- digitalocean/deepseek-r1-distill-llama-70b
|
|
||||||
- digitalocean/gemma-4-31B-it
|
|
||||||
- digitalocean/glm-5
|
|
||||||
- digitalocean/kimi-k2.5
|
|
||||||
- digitalocean/llama3.3-70b-instruct
|
|
||||||
- digitalocean/minimax-m2.5
|
|
||||||
- digitalocean/nvidia-nemotron-3-super-120b
|
|
||||||
- digitalocean/qwen3-coder-flash
|
|
||||||
- digitalocean/qwen3.5-397b-a17b
|
|
||||||
- digitalocean/all-mini-lm-l6-v2
|
|
||||||
- digitalocean/gte-large-en-v1.5
|
|
||||||
- digitalocean/multi-qa-mpnet-base-dot-v1
|
|
||||||
- digitalocean/qwen3-embedding-0.6b
|
|
||||||
- digitalocean/router:software-engineering
|
|
||||||
metadata:
|
metadata:
|
||||||
total_providers: 13
|
total_providers: 13
|
||||||
total_models: 364
|
total_models: 375
|
||||||
last_updated: 2026-04-20T00:00:00.000000+00:00
|
last_updated: 2026-06-09T22:50:12.186709+00:00
|
||||||
|
|
|
||||||
|
|
@ -500,6 +500,19 @@ mod tests {
|
||||||
"/custom/api/v2/chat/completions"
|
"/custom/api/v2/chat/completions"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Kimi Code API: base_url path prefix already includes /coding/v1
|
||||||
|
assert_eq!(
|
||||||
|
api.target_endpoint_for_provider(
|
||||||
|
&ProviderId::Moonshotai,
|
||||||
|
"/v1/messages",
|
||||||
|
"kimi-for-coding",
|
||||||
|
false,
|
||||||
|
Some("/coding/v1"),
|
||||||
|
false
|
||||||
|
),
|
||||||
|
"/coding/v1/chat/completions"
|
||||||
|
);
|
||||||
|
|
||||||
// Test Groq with custom prefix
|
// Test Groq with custom prefix
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
api.target_endpoint_for_provider(
|
api.target_endpoint_for_provider(
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,8 @@ pub enum ProviderId {
|
||||||
DigitalOcean,
|
DigitalOcean,
|
||||||
Vercel,
|
Vercel,
|
||||||
OpenRouter,
|
OpenRouter,
|
||||||
|
Astraflow,
|
||||||
|
AstraflowCN,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TryFrom<&str> for ProviderId {
|
impl TryFrom<&str> for ProviderId {
|
||||||
|
|
@ -81,6 +83,8 @@ impl TryFrom<&str> for ProviderId {
|
||||||
"do_ai" => Ok(ProviderId::DigitalOcean), // alias
|
"do_ai" => Ok(ProviderId::DigitalOcean), // alias
|
||||||
"vercel" => Ok(ProviderId::Vercel),
|
"vercel" => Ok(ProviderId::Vercel),
|
||||||
"openrouter" => Ok(ProviderId::OpenRouter),
|
"openrouter" => Ok(ProviderId::OpenRouter),
|
||||||
|
"astraflow" => Ok(ProviderId::Astraflow),
|
||||||
|
"astraflow_cn" => Ok(ProviderId::AstraflowCN),
|
||||||
_ => Err(format!("Unknown provider: {}", value)),
|
_ => Err(format!("Unknown provider: {}", value)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -107,6 +111,7 @@ impl ProviderId {
|
||||||
ProviderId::Qwen => "qwen",
|
ProviderId::Qwen => "qwen",
|
||||||
ProviderId::ChatGPT => "chatgpt",
|
ProviderId::ChatGPT => "chatgpt",
|
||||||
ProviderId::DigitalOcean => "digitalocean",
|
ProviderId::DigitalOcean => "digitalocean",
|
||||||
|
ProviderId::Astraflow | ProviderId::AstraflowCN => return Vec::new(),
|
||||||
_ => return Vec::new(),
|
_ => return Vec::new(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -174,7 +179,9 @@ impl ProviderId {
|
||||||
| ProviderId::Qwen
|
| ProviderId::Qwen
|
||||||
| ProviderId::DigitalOcean
|
| ProviderId::DigitalOcean
|
||||||
| ProviderId::OpenRouter
|
| ProviderId::OpenRouter
|
||||||
| ProviderId::ChatGPT,
|
| ProviderId::ChatGPT
|
||||||
|
| ProviderId::Astraflow
|
||||||
|
| ProviderId::AstraflowCN,
|
||||||
SupportedAPIsFromClient::AnthropicMessagesAPI(_),
|
SupportedAPIsFromClient::AnthropicMessagesAPI(_),
|
||||||
) => SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions),
|
) => SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions),
|
||||||
|
|
||||||
|
|
@ -196,7 +203,9 @@ impl ProviderId {
|
||||||
| ProviderId::Qwen
|
| ProviderId::Qwen
|
||||||
| ProviderId::DigitalOcean
|
| ProviderId::DigitalOcean
|
||||||
| ProviderId::OpenRouter
|
| ProviderId::OpenRouter
|
||||||
| ProviderId::ChatGPT,
|
| ProviderId::ChatGPT
|
||||||
|
| ProviderId::Astraflow
|
||||||
|
| ProviderId::AstraflowCN,
|
||||||
SupportedAPIsFromClient::OpenAIChatCompletions(_),
|
SupportedAPIsFromClient::OpenAIChatCompletions(_),
|
||||||
) => SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions),
|
) => SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions),
|
||||||
|
|
||||||
|
|
@ -267,6 +276,8 @@ impl Display for ProviderId {
|
||||||
ProviderId::DigitalOcean => write!(f, "digitalocean"),
|
ProviderId::DigitalOcean => write!(f, "digitalocean"),
|
||||||
ProviderId::Vercel => write!(f, "vercel"),
|
ProviderId::Vercel => write!(f, "vercel"),
|
||||||
ProviderId::OpenRouter => write!(f, "openrouter"),
|
ProviderId::OpenRouter => write!(f, "openrouter"),
|
||||||
|
ProviderId::Astraflow => write!(f, "astraflow"),
|
||||||
|
ProviderId::AstraflowCN => write!(f, "astraflow_cn"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
use crate::apis::anthropic::MessagesRequest;
|
use crate::apis::anthropic::MessagesRequest;
|
||||||
use crate::apis::openai::ChatCompletionsRequest;
|
use crate::apis::openai::{is_kimi_code_model, ChatCompletionsRequest};
|
||||||
|
use log::warn;
|
||||||
|
|
||||||
use crate::apis::amazon_bedrock::{ConverseRequest, ConverseStreamRequest};
|
use crate::apis::amazon_bedrock::{ConverseRequest, ConverseStreamRequest};
|
||||||
use crate::apis::openai_responses::ResponsesAPIRequest;
|
use crate::apis::openai_responses::ResponsesAPIRequest;
|
||||||
|
|
@ -90,6 +91,24 @@ impl ProviderRequestType {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if matches!(
|
||||||
|
upstream_api,
|
||||||
|
SupportedUpstreamAPIs::OpenAIChatCompletions(_)
|
||||||
|
) {
|
||||||
|
if let Self::ChatCompletionsRequest(req) = self {
|
||||||
|
if is_kimi_code_model(req.model()) {
|
||||||
|
req.normalize_for_kimi_code_api();
|
||||||
|
}
|
||||||
|
} else if let Self::MessagesRequest(req) = self {
|
||||||
|
if is_kimi_code_model(req.model.as_str()) && req.thinking.is_some() {
|
||||||
|
warn!(
|
||||||
|
"kimi-for-coding: stripping unsupported thinking config from upstream request"
|
||||||
|
);
|
||||||
|
req.thinking = None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ChatGPT requires instructions, store=false, and input as a list
|
// ChatGPT requires instructions, store=false, and input as a list
|
||||||
if provider_id == ProviderId::ChatGPT {
|
if provider_id == ProviderId::ChatGPT {
|
||||||
if let Self::ResponsesAPIRequest(req) = self {
|
if let Self::ResponsesAPIRequest(req) = self {
|
||||||
|
|
@ -879,6 +898,42 @@ mod tests {
|
||||||
assert!(req.web_search_options.is_none());
|
assert!(req.web_search_options.is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_normalize_for_upstream_kimi_code_strips_unsupported_chat_fields() {
|
||||||
|
use crate::apis::openai::{Message, MessageContent, OpenAIApi, Role, StreamOptions};
|
||||||
|
|
||||||
|
let mut request = ProviderRequestType::ChatCompletionsRequest(ChatCompletionsRequest {
|
||||||
|
model: "kimi-for-coding".to_string(),
|
||||||
|
messages: vec![Message {
|
||||||
|
role: Role::User,
|
||||||
|
content: Some(MessageContent::Text("hello".to_string())),
|
||||||
|
name: None,
|
||||||
|
tool_calls: None,
|
||||||
|
tool_call_id: None,
|
||||||
|
}],
|
||||||
|
stream_options: Some(StreamOptions {
|
||||||
|
include_usage: Some(true),
|
||||||
|
}),
|
||||||
|
reasoning_effort: Some("high".to_string()),
|
||||||
|
web_search_options: Some(serde_json::json!({"search_context_size":"medium"})),
|
||||||
|
..Default::default()
|
||||||
|
});
|
||||||
|
|
||||||
|
request
|
||||||
|
.normalize_for_upstream(
|
||||||
|
ProviderId::Moonshotai,
|
||||||
|
&SupportedUpstreamAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let ProviderRequestType::ChatCompletionsRequest(req) = request else {
|
||||||
|
panic!("expected chat request");
|
||||||
|
};
|
||||||
|
assert!(req.stream_options.is_none());
|
||||||
|
assert!(req.reasoning_effort.is_none());
|
||||||
|
assert!(req.web_search_options.is_none());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_normalize_for_upstream_non_xai_keeps_chat_web_search_options() {
|
fn test_normalize_for_upstream_non_xai_keeps_chat_web_search_options() {
|
||||||
use crate::apis::openai::{Message, MessageContent, OpenAIApi, Role};
|
use crate::apis::openai::{Message, MessageContent, OpenAIApi, Role};
|
||||||
|
|
|
||||||
|
|
@ -223,6 +223,7 @@ impl From<MessagesRole> for Role {
|
||||||
match val {
|
match val {
|
||||||
MessagesRole::User => Role::User,
|
MessagesRole::User => Role::User,
|
||||||
MessagesRole::Assistant => Role::Assistant,
|
MessagesRole::Assistant => Role::Assistant,
|
||||||
|
MessagesRole::System => Role::System,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -340,6 +341,11 @@ impl TryFrom<MessagesMessage> for BedrockMessage {
|
||||||
let role = match message.role {
|
let role = match message.role {
|
||||||
MessagesRole::User => ConversationRole::User,
|
MessagesRole::User => ConversationRole::User,
|
||||||
MessagesRole::Assistant => ConversationRole::Assistant,
|
MessagesRole::Assistant => ConversationRole::Assistant,
|
||||||
|
MessagesRole::System => {
|
||||||
|
return Err(TransformError::UnsupportedConversion(
|
||||||
|
"System messages must be set via the system prompt, not messages".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut content_blocks = Vec::new();
|
let mut content_blocks = Vec::new();
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ This demo shows how you can use user preferences to route user prompts to approp
|
||||||
|
|
||||||
## How to start the demo
|
## How to start the demo
|
||||||
|
|
||||||
Make sure you have Plano CLI installed (`pip install planoai==0.4.22` or `uv tool install planoai==0.4.22`).
|
Make sure you have Plano CLI installed (`pip install planoai==0.4.25` or `uv tool install planoai==0.4.25`).
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd demos/llm_routing/preference_based_routing
|
cd demos/llm_routing/preference_based_routing
|
||||||
|
|
|
||||||
|
|
@ -432,6 +432,9 @@ Moonshot AI
|
||||||
* - Model Name
|
* - Model Name
|
||||||
- Model ID for Config
|
- Model ID for Config
|
||||||
- Description
|
- Description
|
||||||
|
* - Kimi for Coding
|
||||||
|
- ``moonshotai/kimi-for-coding``
|
||||||
|
- Kimi Code API model for agentic coding (use with ``base_url: https://api.kimi.com/coding/v1``)
|
||||||
* - Kimi K2 Preview
|
* - Kimi K2 Preview
|
||||||
- ``moonshotai/kimi-k2-0905-preview``
|
- ``moonshotai/kimi-k2-0905-preview``
|
||||||
- Foundation model optimized for agentic tasks with 32B activated parameters
|
- Foundation model optimized for agentic tasks with 32B activated parameters
|
||||||
|
|
@ -447,6 +450,13 @@ Moonshot AI
|
||||||
.. code-block:: yaml
|
.. code-block:: yaml
|
||||||
|
|
||||||
llm_providers:
|
llm_providers:
|
||||||
|
# Kimi Code API (Claude Code / agentic clients via Plano translation)
|
||||||
|
- model: moonshotai/kimi-for-coding
|
||||||
|
access_key: $MOONSHOTAI_API_KEY
|
||||||
|
base_url: https://api.kimi.com/coding/v1
|
||||||
|
headers:
|
||||||
|
User-Agent: "KimiCLI/1.3"
|
||||||
|
|
||||||
# Latest K2 models for agentic tasks
|
# Latest K2 models for agentic tasks
|
||||||
- model: moonshotai/kimi-k2-0905-preview
|
- model: moonshotai/kimi-k2-0905-preview
|
||||||
access_key: $MOONSHOTAI_API_KEY
|
access_key: $MOONSHOTAI_API_KEY
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,15 @@
|
||||||
|
|
||||||
Prompt Target
|
Prompt Target
|
||||||
=============
|
=============
|
||||||
|
|
||||||
|
.. deprecated:: v0.4.22
|
||||||
|
**Prompt Targets are deprecated and no longer actively maintained.** This concept is
|
||||||
|
retained for existing users on older Plano configurations, but new applications should
|
||||||
|
not adopt it. For deterministic, task-specific workloads, use :ref:`Agents <agents>`
|
||||||
|
together with :ref:`Function Calling <function_calling>` instead. The
|
||||||
|
``prompt_targets`` configuration block and related CLI commands will continue to
|
||||||
|
function for now, but may be removed in a future release.
|
||||||
|
|
||||||
A Prompt Target is a deterministic, task-specific backend function or API endpoint that your application calls via Plano.
|
A Prompt Target is a deterministic, task-specific backend function or API endpoint that your application calls via Plano.
|
||||||
Unlike agents (which handle wide-ranging, open-ended tasks), prompt targets are designed for focused, specific workloads where Plano can add value through input clarification and validation.
|
Unlike agents (which handle wide-ranging, open-ended tasks), prompt targets are designed for focused, specific workloads where Plano can add value through input clarification and validation.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ from sphinxawesome_theme.postprocess import Icons
|
||||||
project = "Plano Docs"
|
project = "Plano Docs"
|
||||||
copyright = "2026, Katanemo Labs, a DigitalOcean Company"
|
copyright = "2026, Katanemo Labs, a DigitalOcean Company"
|
||||||
author = "Katanemo Labs, Inc"
|
author = "Katanemo Labs, Inc"
|
||||||
release = " v0.4.22"
|
release = " v0.4.25"
|
||||||
|
|
||||||
# -- General configuration ---------------------------------------------------
|
# -- General configuration ---------------------------------------------------
|
||||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||||
|
|
|
||||||
|
|
@ -57,10 +57,10 @@ Deep dive into essential ideas and mechanisms behind Plano:
|
||||||
|
|
||||||
Explore Plano's LLM integration options
|
Explore Plano's LLM integration options
|
||||||
|
|
||||||
.. grid-item-card:: :octicon:`workflow` Prompt Target
|
.. grid-item-card:: :octicon:`workflow` Prompt Target (Deprecated)
|
||||||
:link: ../concepts/prompt_target.html
|
:link: ../concepts/prompt_target.html
|
||||||
|
|
||||||
Understand how Plano handles prompts
|
Deprecated — kept for existing users. New apps should use Agents.
|
||||||
|
|
||||||
|
|
||||||
Guides
|
Guides
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
$ uv tool install planoai==0.4.22
|
$ uv tool install planoai==0.4.25
|
||||||
|
|
||||||
**Option 2: Install with pip (Traditional)**
|
**Option 2: Install with pip (Traditional)**
|
||||||
|
|
||||||
|
|
@ -51,7 +51,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins
|
||||||
|
|
||||||
$ python -m venv venv
|
$ python -m venv venv
|
||||||
$ source venv/bin/activate # On Windows, use: venv\Scripts\activate
|
$ source venv/bin/activate # On Windows, use: venv\Scripts\activate
|
||||||
$ pip install planoai==0.4.22
|
$ pip install planoai==0.4.25
|
||||||
|
|
||||||
|
|
||||||
.. _llm_routing_quickstart:
|
.. _llm_routing_quickstart:
|
||||||
|
|
@ -247,6 +247,11 @@ You can then ask a follow-up like "Also book me a hotel near JFK" and Plano-Orch
|
||||||
Deterministic API calls with prompt targets
|
Deterministic API calls with prompt targets
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. deprecated:: v0.4.22
|
||||||
|
:ref:`Prompt Targets <prompt_target>` are deprecated and no longer actively
|
||||||
|
maintained. The walkthrough below is preserved for users on existing configs;
|
||||||
|
new applications should use :ref:`Agents <agents>` instead.
|
||||||
|
|
||||||
Next, we'll show Plano's deterministic API calling using a single prompt target. We'll build a currency exchange backend powered by `https://api.frankfurter.dev/`, assuming USD as the base currency.
|
Next, we'll show Plano's deterministic API calling using a single prompt target. We'll build a currency exchange backend powered by `https://api.frankfurter.dev/`, assuming USD as the base currency.
|
||||||
|
|
||||||
Step 1. Create plano config file
|
Step 1. Create plano config file
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,12 @@ Function Calling
|
||||||
**Function Calling** is a powerful feature in Plano that allows your application to dynamically execute backend functions or services based on user prompts.
|
**Function Calling** is a powerful feature in Plano that allows your application to dynamically execute backend functions or services based on user prompts.
|
||||||
This enables seamless integration between natural language interactions and backend operations, turning user inputs into actionable results.
|
This enables seamless integration between natural language interactions and backend operations, turning user inputs into actionable results.
|
||||||
|
|
||||||
|
.. deprecated:: v0.4.22
|
||||||
|
The prompt-target based workflow shown below (see :ref:`Step 2 <function_calling>`)
|
||||||
|
is deprecated. :ref:`Prompt Targets <prompt_target>` are no longer actively
|
||||||
|
maintained and may be removed in a future release. For new function-calling
|
||||||
|
workloads, prefer :ref:`Agents <agents>` with tool definitions.
|
||||||
|
|
||||||
|
|
||||||
What is Function Calling?
|
What is Function Calling?
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,6 @@ Quick Navigation
|
||||||
- :ref:`cli_reference_logs`
|
- :ref:`cli_reference_logs`
|
||||||
- :ref:`cli_reference_init`
|
- :ref:`cli_reference_init`
|
||||||
- :ref:`cli_reference_trace`
|
- :ref:`cli_reference_trace`
|
||||||
- :ref:`cli_reference_prompt_targets`
|
|
||||||
- :ref:`cli_reference_cli_agent`
|
- :ref:`cli_reference_cli_agent`
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -260,24 +259,6 @@ Inspect request traces from the local OTLP listener.
|
||||||
- ``--list`` cannot be combined with a specific trace-id target.
|
- ``--list`` cannot be combined with a specific trace-id target.
|
||||||
|
|
||||||
|
|
||||||
.. _cli_reference_prompt_targets:
|
|
||||||
|
|
||||||
planoai prompt_targets
|
|
||||||
----------------------
|
|
||||||
|
|
||||||
Generate prompt-target metadata from Python methods.
|
|
||||||
|
|
||||||
**Synopsis**
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
$ planoai prompt_targets --file <python-file>
|
|
||||||
|
|
||||||
**Options**
|
|
||||||
|
|
||||||
- ``--file, --f <python-file>``: required path to a ``.py`` source file.
|
|
||||||
|
|
||||||
|
|
||||||
.. _cli_reference_cli_agent:
|
.. _cli_reference_cli_agent:
|
||||||
|
|
||||||
planoai cli_agent
|
planoai cli_agent
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,29 @@ The following is a complete reference of the ``plano_config.yml`` that controls
|
||||||
the Plano gateway. This where you enable capabilities like routing to upstream LLm providers, defining prompt_targets
|
the Plano gateway. This where you enable capabilities like routing to upstream LLm providers, defining prompt_targets
|
||||||
where prompts get routed to, apply guardrails, and enable critical agent observability features.
|
where prompts get routed to, apply guardrails, and enable critical agent observability features.
|
||||||
|
|
||||||
|
Model provider headers
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
Each entry under ``model_providers`` (or the legacy ``llm_providers`` alias) may include a ``headers`` map of extra
|
||||||
|
HTTP headers that Plano adds to upstream LLM requests. Plano applies these headers after it sets authentication from
|
||||||
|
``access_key`` or ``passthrough_auth``, so you can supply provider-specific metadata without replacing the configured
|
||||||
|
credentials.
|
||||||
|
|
||||||
|
- **Type:** map of strings (header name → value)
|
||||||
|
- **Optional:** yes
|
||||||
|
- **Common uses:** required ``User-Agent`` values, organization or account identifiers, or other headers some APIs expect
|
||||||
|
|
||||||
|
.. code-block:: yaml
|
||||||
|
|
||||||
|
model_providers:
|
||||||
|
- model: moonshotai/kimi-for-coding
|
||||||
|
access_key: $MOONSHOTAI_API_KEY
|
||||||
|
base_url: https://api.kimi.com/coding/v1
|
||||||
|
headers:
|
||||||
|
User-Agent: "KimiCLI/1.3"
|
||||||
|
|
||||||
|
The example below includes this and other provider options in context.
|
||||||
|
|
||||||
.. literalinclude:: includes/plano_config_full_reference.yaml
|
.. literalinclude:: includes/plano_config_full_reference.yaml
|
||||||
:language: yaml
|
:language: yaml
|
||||||
:linenos:
|
:linenos:
|
||||||
|
|
|
||||||
|
|
@ -65,7 +65,7 @@ Create a ``docker-compose.yml`` file with the following configuration:
|
||||||
# docker-compose.yml
|
# docker-compose.yml
|
||||||
services:
|
services:
|
||||||
plano:
|
plano:
|
||||||
image: katanemo/plano:0.4.22
|
image: katanemo/plano:0.4.25
|
||||||
container_name: plano
|
container_name: plano
|
||||||
ports:
|
ports:
|
||||||
- "10000:10000" # ingress (client -> plano)
|
- "10000:10000" # ingress (client -> plano)
|
||||||
|
|
@ -153,7 +153,7 @@ Create a ``plano-deployment.yaml``:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: plano
|
- name: plano
|
||||||
image: katanemo/plano:0.4.22
|
image: katanemo/plano:0.4.25
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 12000 # LLM gateway (chat completions, model routing)
|
- containerPort: 12000 # LLM gateway (chat completions, model routing)
|
||||||
name: llm-gateway
|
name: llm-gateway
|
||||||
|
|
|
||||||
|
|
@ -47,6 +47,14 @@ model_providers:
|
||||||
http_host: api.custom-provider.com
|
http_host: api.custom-provider.com
|
||||||
access_key: $CUSTOM_API_KEY
|
access_key: $CUSTOM_API_KEY
|
||||||
|
|
||||||
|
# headers: optional map of extra HTTP headers sent on upstream requests (after auth).
|
||||||
|
# Use for provider-specific requirements such as User-Agent, org IDs, or account headers.
|
||||||
|
- model: moonshotai/kimi-for-coding
|
||||||
|
access_key: $MOONSHOTAI_API_KEY
|
||||||
|
base_url: https://api.kimi.com/coding/v1
|
||||||
|
headers:
|
||||||
|
User-Agent: "KimiCLI/1.3"
|
||||||
|
|
||||||
# Model aliases - use friendly names instead of full provider model names
|
# Model aliases - use friendly names instead of full provider model names
|
||||||
model_aliases:
|
model_aliases:
|
||||||
fast-llm:
|
fast-llm:
|
||||||
|
|
|
||||||
|
|
@ -88,6 +88,18 @@ listeners:
|
||||||
port: 443
|
port: 443
|
||||||
protocol: https
|
protocol: https
|
||||||
provider_interface: openai
|
provider_interface: openai
|
||||||
|
- access_key: $MOONSHOTAI_API_KEY
|
||||||
|
base_url: https://api.kimi.com/coding/v1
|
||||||
|
base_url_path_prefix: /coding/v1
|
||||||
|
cluster_name: moonshotai_api.kimi.com
|
||||||
|
endpoint: api.kimi.com
|
||||||
|
headers:
|
||||||
|
User-Agent: KimiCLI/1.3
|
||||||
|
model: kimi-for-coding
|
||||||
|
name: moonshotai/kimi-for-coding
|
||||||
|
port: 443
|
||||||
|
protocol: https
|
||||||
|
provider_interface: moonshotai
|
||||||
name: model_1
|
name: model_1
|
||||||
output_filters:
|
output_filters:
|
||||||
- input_guards
|
- input_guards
|
||||||
|
|
@ -144,6 +156,18 @@ model_providers:
|
||||||
port: 443
|
port: 443
|
||||||
protocol: https
|
protocol: https
|
||||||
provider_interface: openai
|
provider_interface: openai
|
||||||
|
- access_key: $MOONSHOTAI_API_KEY
|
||||||
|
base_url: https://api.kimi.com/coding/v1
|
||||||
|
base_url_path_prefix: /coding/v1
|
||||||
|
cluster_name: moonshotai_api.kimi.com
|
||||||
|
endpoint: api.kimi.com
|
||||||
|
headers:
|
||||||
|
User-Agent: KimiCLI/1.3
|
||||||
|
model: kimi-for-coding
|
||||||
|
name: moonshotai/kimi-for-coding
|
||||||
|
port: 443
|
||||||
|
protocol: https
|
||||||
|
provider_interface: moonshotai
|
||||||
- internal: true
|
- internal: true
|
||||||
model: Plano-Orchestrator
|
model: Plano-Orchestrator
|
||||||
name: plano-orchestrator
|
name: plano-orchestrator
|
||||||
|
|
|
||||||
101
skills/AGENTS.md
101
skills/AGENTS.md
|
|
@ -31,9 +31,8 @@
|
||||||
- [5.3 Use `planoai trace` to Inspect Routing Decisions](#use-planoai-trace-to-inspect-routing-decisions)
|
- [5.3 Use `planoai trace` to Inspect Routing Decisions](#use-planoai-trace-to-inspect-routing-decisions)
|
||||||
- [Section 6: CLI Operations](#section-6)
|
- [Section 6: CLI Operations](#section-6)
|
||||||
- [6.1 Follow the `planoai up` Validation Workflow Before Debugging Runtime Issues](#follow-the-planoai-up-validation-workflow-before-debugging-runtime-issues)
|
- [6.1 Follow the `planoai up` Validation Workflow Before Debugging Runtime Issues](#follow-the-planoai-up-validation-workflow-before-debugging-runtime-issues)
|
||||||
- [6.2 Generate Prompt Targets from Python Functions with `planoai generate_prompt_targets`](#generate-prompt-targets-from-python-functions-with-planoai-generateprompttargets)
|
- [6.2 Use `planoai cli_agent` to Connect Claude Code Through Plano](#use-planoai-cliagent-to-connect-claude-code-through-plano)
|
||||||
- [6.3 Use `planoai cli_agent` to Connect Claude Code Through Plano](#use-planoai-cliagent-to-connect-claude-code-through-plano)
|
- [6.3 Use `planoai init` Templates to Bootstrap New Projects Correctly](#use-planoai-init-templates-to-bootstrap-new-projects-correctly)
|
||||||
- [6.4 Use `planoai init` Templates to Bootstrap New Projects Correctly](#use-planoai-init-templates-to-bootstrap-new-projects-correctly)
|
|
||||||
- [Section 7: Deployment & Security](#section-7)
|
- [Section 7: Deployment & Security](#section-7)
|
||||||
- [7.1 Understand Plano's Docker Network Topology for Agent URL Configuration](#understand-planos-docker-network-topology-for-agent-url-configuration)
|
- [7.1 Understand Plano's Docker Network Topology for Agent URL Configuration](#understand-planos-docker-network-topology-for-agent-url-configuration)
|
||||||
- [7.2 Use PostgreSQL State Storage for Multi-Turn Conversations in Production](#use-postgresql-state-storage-for-multi-turn-conversations-in-production)
|
- [7.2 Use PostgreSQL State Storage for Multi-Turn Conversations in Production](#use-postgresql-state-storage-for-multi-turn-conversations-in-production)
|
||||||
|
|
@ -1377,99 +1376,7 @@ Reference: https://github.com/katanemo/archgw
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### 6.2 Generate Prompt Targets from Python Functions with `planoai generate_prompt_targets`
|
### 6.2 Use `planoai cli_agent` to Connect Claude Code Through Plano
|
||||||
|
|
||||||
**Impact:** `MEDIUM` — Manually writing prompt_targets YAML for existing Python APIs is error-prone — the generator introspects function signatures and produces correct YAML automatically
|
|
||||||
**Tags:** `cli`, `generate`, `prompt-targets`, `python`, `code-generation`
|
|
||||||
|
|
||||||
## Generate Prompt Targets from Python Functions with `planoai generate_prompt_targets`
|
|
||||||
|
|
||||||
`planoai generate_prompt_targets` introspects Python function signatures and docstrings to generate `prompt_targets` YAML for your Plano config. This is the fastest way to expose existing Python APIs as LLM-callable functions without manually writing the YAML schema.
|
|
||||||
|
|
||||||
**Python function requirements for generation:**
|
|
||||||
- Use simple type annotations: `int`, `float`, `bool`, `str`, `list`, `tuple`, `set`, `dict`
|
|
||||||
- Include a docstring describing what the function does (becomes the `description`)
|
|
||||||
- Complex Pydantic models must be flattened into primitive typed parameters first
|
|
||||||
|
|
||||||
**Example Python file:**
|
|
||||||
|
|
||||||
```python
|
|
||||||
# api.py
|
|
||||||
|
|
||||||
def get_stock_quote(symbol: str, exchange: str = "NYSE") -> dict:
|
|
||||||
"""Get the current stock price and trading data for a given stock symbol.
|
|
||||||
|
|
||||||
Returns price, volume, market cap, and 24h change percentage.
|
|
||||||
"""
|
|
||||||
# Implementation calls stock API
|
|
||||||
pass
|
|
||||||
|
|
||||||
def get_weather_forecast(city: str, days: int = 3, units: str = "celsius") -> dict:
|
|
||||||
"""Get the weather forecast for a city.
|
|
||||||
|
|
||||||
Returns temperature, precipitation, and conditions for the specified number of days.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def search_flights(origin: str, destination: str, date: str, passengers: int = 1) -> list:
|
|
||||||
"""Search for available flights between two airports on a given date.
|
|
||||||
|
|
||||||
Date format: YYYY-MM-DD. Returns list of flight options with prices.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
```
|
|
||||||
|
|
||||||
**Running the generator:**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
planoai generate_prompt_targets --file api.py
|
|
||||||
```
|
|
||||||
|
|
||||||
**Generated output (add to your config.yaml):**
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
prompt_targets:
|
|
||||||
- name: get_stock_quote
|
|
||||||
description: Get the current stock price and trading data for a given stock symbol.
|
|
||||||
parameters:
|
|
||||||
- name: symbol
|
|
||||||
type: str
|
|
||||||
required: true
|
|
||||||
- name: exchange
|
|
||||||
type: str
|
|
||||||
required: false
|
|
||||||
default: NYSE
|
|
||||||
# Add endpoint manually:
|
|
||||||
endpoint:
|
|
||||||
name: stock_api
|
|
||||||
path: /quote?symbol={symbol}&exchange={exchange}
|
|
||||||
|
|
||||||
- name: get_weather_forecast
|
|
||||||
description: Get the weather forecast for a city.
|
|
||||||
parameters:
|
|
||||||
- name: city
|
|
||||||
type: str
|
|
||||||
required: true
|
|
||||||
- name: days
|
|
||||||
type: int
|
|
||||||
required: false
|
|
||||||
default: 3
|
|
||||||
- name: units
|
|
||||||
type: str
|
|
||||||
required: false
|
|
||||||
default: celsius
|
|
||||||
endpoint:
|
|
||||||
name: weather_api
|
|
||||||
path: /forecast?city={city}&days={days}&units={units}
|
|
||||||
```
|
|
||||||
|
|
||||||
After generation, manually add the `endpoint` blocks pointing to your actual API. The generator produces the schema; you wire in the connectivity.
|
|
||||||
|
|
||||||
Reference: https://github.com/katanemo/archgw
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### 6.3 Use `planoai cli_agent` to Connect Claude Code Through Plano
|
|
||||||
|
|
||||||
**Impact:** `MEDIUM-HIGH` — Running Claude Code directly against provider APIs bypasses Plano's routing, observability, and guardrails — cli_agent routes all Claude Code traffic through your configured Plano instance
|
**Impact:** `MEDIUM-HIGH` — Running Claude Code directly against provider APIs bypasses Plano's routing, observability, and guardrails — cli_agent routes all Claude Code traffic through your configured Plano instance
|
||||||
**Tags:** `cli`, `cli-agent`, `claude`, `coding-agent`, `integration`
|
**Tags:** `cli`, `cli-agent`, `claude`, `coding-agent`, `integration`
|
||||||
|
|
@ -1562,7 +1469,7 @@ Reference: [https://github.com/katanemo/archgw](https://github.com/katanemo/arch
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### 6.4 Use `planoai init` Templates to Bootstrap New Projects Correctly
|
### 6.3 Use `planoai init` Templates to Bootstrap New Projects Correctly
|
||||||
|
|
||||||
**Impact:** `MEDIUM` — Starting from a blank config.yaml leads to missing required fields and common structural mistakes — templates provide validated, idiomatic starting points
|
**Impact:** `MEDIUM` — Starting from a blank config.yaml leads to missing required fields and common structural mistakes — templates provide validated, idiomatic starting points
|
||||||
**Tags:** `cli`, `init`, `templates`, `getting-started`, `project-setup`
|
**Tags:** `cli`, `init`, `templates`, `getting-started`, `project-setup`
|
||||||
|
|
|
||||||
|
|
@ -63,7 +63,7 @@ After installation, these skills are available to your coding agent and can be i
|
||||||
- `plano-agent-orchestration` - Agent registration and routing descriptions
|
- `plano-agent-orchestration` - Agent registration and routing descriptions
|
||||||
- `plano-filter-guardrails` - MCP filters, guardrail messaging, filter ordering
|
- `plano-filter-guardrails` - MCP filters, guardrail messaging, filter ordering
|
||||||
- `plano-observability-debugging` - Tracing setup, span attributes, trace analysis
|
- `plano-observability-debugging` - Tracing setup, span attributes, trace analysis
|
||||||
- `plano-cli-operations` - `planoai up`, `cli_agent`, init, prompt target generation
|
- `plano-cli-operations` - `planoai up`, `cli_agent`, init
|
||||||
- `plano-deployment-security` - Docker networking, health checks, state storage
|
- `plano-deployment-security` - Docker networking, health checks, state storage
|
||||||
- `plano-advanced-patterns` - Multi-listener architecture and prompt target schema design
|
- `plano-advanced-patterns` - Multi-listener architecture and prompt target schema design
|
||||||
|
|
||||||
|
|
@ -110,7 +110,7 @@ skills/
|
||||||
| 3 | `agent-` | Agent Orchestration | Descriptions, agent registration |
|
| 3 | `agent-` | Agent Orchestration | Descriptions, agent registration |
|
||||||
| 4 | `filter-` | Filter Chains & Guardrails | Ordering, MCP integration, guardrails |
|
| 4 | `filter-` | Filter Chains & Guardrails | Ordering, MCP integration, guardrails |
|
||||||
| 5 | `observe-` | Observability & Debugging | Tracing, trace inspection, span attributes |
|
| 5 | `observe-` | Observability & Debugging | Tracing, trace inspection, span attributes |
|
||||||
| 6 | `cli-` | CLI Operations | Startup, CLI agent, init, code generation |
|
| 6 | `cli-` | CLI Operations | Startup, CLI agent, init |
|
||||||
| 7 | `deploy-` | Deployment & Security | Docker networking, state storage, health checks |
|
| 7 | `deploy-` | Deployment & Security | Docker networking, state storage, health checks |
|
||||||
| 8 | `advanced-` | Advanced Patterns | Prompt targets, rate limits, multi-listener |
|
| 8 | `advanced-` | Advanced Patterns | Prompt targets, rate limits, multi-listener |
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
---
|
---
|
||||||
name: plano-cli-operations
|
name: plano-cli-operations
|
||||||
description: Apply Plano CLI best practices. Use for startup troubleshooting, cli_agent workflows, prompt target generation, and template-based project bootstrapping.
|
description: Apply Plano CLI best practices. Use for startup troubleshooting, cli_agent workflows, and template-based project bootstrapping.
|
||||||
license: Apache-2.0
|
license: Apache-2.0
|
||||||
metadata:
|
metadata:
|
||||||
author: katanemo
|
author: katanemo
|
||||||
|
|
@ -15,20 +15,17 @@ Use this skill when the task is primarily operational and CLI-driven.
|
||||||
|
|
||||||
- "Fix `planoai up` failures"
|
- "Fix `planoai up` failures"
|
||||||
- "Use `planoai cli_agent` with coding agents"
|
- "Use `planoai cli_agent` with coding agents"
|
||||||
- "Generate prompt targets from Python functions"
|
|
||||||
- "Bootstrap a project with `planoai init` templates"
|
- "Bootstrap a project with `planoai init` templates"
|
||||||
|
|
||||||
## Apply These Rules
|
## Apply These Rules
|
||||||
|
|
||||||
- `cli-startup`
|
- `cli-startup`
|
||||||
- `cli-agent`
|
- `cli-agent`
|
||||||
- `cli-generate`
|
|
||||||
- `cli-init`
|
- `cli-init`
|
||||||
|
|
||||||
## Execution Checklist
|
## Execution Checklist
|
||||||
|
|
||||||
1. Follow startup validation order before deep debugging.
|
1. Follow startup validation order before deep debugging.
|
||||||
2. Use `cli_agent` to route coding-agent traffic through Plano.
|
2. Use `cli_agent` to route coding-agent traffic through Plano.
|
||||||
3. Generate prompt target schema, then wire endpoint details explicitly.
|
3. Start from templates for reliable first-time setup.
|
||||||
4. Start from templates for reliable first-time setup.
|
4. Provide a compact runbook with exact CLI commands.
|
||||||
5. Provide a compact runbook with exact CLI commands.
|
|
||||||
|
|
|
||||||
|
|
@ -1,91 +0,0 @@
|
||||||
---
|
|
||||||
title: Generate Prompt Targets from Python Functions with `planoai generate_prompt_targets`
|
|
||||||
impact: MEDIUM
|
|
||||||
impactDescription: Manually writing prompt_targets YAML for existing Python APIs is error-prone — the generator introspects function signatures and produces correct YAML automatically
|
|
||||||
tags: cli, generate, prompt-targets, python, code-generation
|
|
||||||
---
|
|
||||||
|
|
||||||
## Generate Prompt Targets from Python Functions with `planoai generate_prompt_targets`
|
|
||||||
|
|
||||||
`planoai generate_prompt_targets` introspects Python function signatures and docstrings to generate `prompt_targets` YAML for your Plano config. This is the fastest way to expose existing Python APIs as LLM-callable functions without manually writing the YAML schema.
|
|
||||||
|
|
||||||
**Python function requirements for generation:**
|
|
||||||
- Use simple type annotations: `int`, `float`, `bool`, `str`, `list`, `tuple`, `set`, `dict`
|
|
||||||
- Include a docstring describing what the function does (becomes the `description`)
|
|
||||||
- Complex Pydantic models must be flattened into primitive typed parameters first
|
|
||||||
|
|
||||||
**Example Python file:**
|
|
||||||
|
|
||||||
```python
|
|
||||||
# api.py
|
|
||||||
|
|
||||||
def get_stock_quote(symbol: str, exchange: str = "NYSE") -> dict:
|
|
||||||
"""Get the current stock price and trading data for a given stock symbol.
|
|
||||||
|
|
||||||
Returns price, volume, market cap, and 24h change percentage.
|
|
||||||
"""
|
|
||||||
# Implementation calls stock API
|
|
||||||
pass
|
|
||||||
|
|
||||||
def get_weather_forecast(city: str, days: int = 3, units: str = "celsius") -> dict:
|
|
||||||
"""Get the weather forecast for a city.
|
|
||||||
|
|
||||||
Returns temperature, precipitation, and conditions for the specified number of days.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def search_flights(origin: str, destination: str, date: str, passengers: int = 1) -> list:
|
|
||||||
"""Search for available flights between two airports on a given date.
|
|
||||||
|
|
||||||
Date format: YYYY-MM-DD. Returns list of flight options with prices.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
```
|
|
||||||
|
|
||||||
**Running the generator:**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
planoai generate_prompt_targets --file api.py
|
|
||||||
```
|
|
||||||
|
|
||||||
**Generated output (add to your config.yaml):**
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
prompt_targets:
|
|
||||||
- name: get_stock_quote
|
|
||||||
description: Get the current stock price and trading data for a given stock symbol.
|
|
||||||
parameters:
|
|
||||||
- name: symbol
|
|
||||||
type: str
|
|
||||||
required: true
|
|
||||||
- name: exchange
|
|
||||||
type: str
|
|
||||||
required: false
|
|
||||||
default: NYSE
|
|
||||||
# Add endpoint manually:
|
|
||||||
endpoint:
|
|
||||||
name: stock_api
|
|
||||||
path: /quote?symbol={symbol}&exchange={exchange}
|
|
||||||
|
|
||||||
- name: get_weather_forecast
|
|
||||||
description: Get the weather forecast for a city.
|
|
||||||
parameters:
|
|
||||||
- name: city
|
|
||||||
type: str
|
|
||||||
required: true
|
|
||||||
- name: days
|
|
||||||
type: int
|
|
||||||
required: false
|
|
||||||
default: 3
|
|
||||||
- name: units
|
|
||||||
type: str
|
|
||||||
required: false
|
|
||||||
default: celsius
|
|
||||||
endpoint:
|
|
||||||
name: weather_api
|
|
||||||
path: /forecast?city={city}&days={days}&units={units}
|
|
||||||
```
|
|
||||||
|
|
||||||
After generation, manually add the `endpoint` blocks pointing to your actual API. The generator produces the schema; you wire in the connectivity.
|
|
||||||
|
|
||||||
Reference: https://github.com/katanemo/archgw
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue