diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index bfe57eab..ebda64a6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -117,7 +117,7 @@ jobs:
# repo means find_repo_root() returns None, the local-build short-
# circuit is skipped, and the CLI tries to download from a GitHub
# release that does not yet exist for the in-flight version on
- # release-bump PRs (e.g. 0.4.23 before publish-binaries has run).
+ # release-bump PRs (e.g. 0.4.25 before publish-binaries has run).
- name: Seed ~/.plano cache for zero-config test
run: |
VERSION=$(sed -nE 's/^__version__ = "(.*)"$/\1/p' cli/planoai/__init__.py)
@@ -183,13 +183,13 @@ jobs:
load: true
tags: |
${{ env.PLANO_DOCKER_IMAGE }}
- ${{ env.DOCKER_IMAGE }}:0.4.23
+ ${{ env.DOCKER_IMAGE }}:0.4.25
${{ env.DOCKER_IMAGE }}:latest
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Save image as artifact
- run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.23 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar
+ run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.25 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar
- name: Upload image artifact
uses: actions/upload-artifact@v6
diff --git a/apps/www/src/components/Hero.tsx b/apps/www/src/components/Hero.tsx
index b9d5b170..566bee49 100644
--- a/apps/www/src/components/Hero.tsx
+++ b/apps/www/src/components/Hero.tsx
@@ -24,7 +24,7 @@ export function Hero() {
>
- v0.4.23
+ v0.4.25
—
diff --git a/build_filter_image.sh b/build_filter_image.sh
index 624955c2..c60d8d0b 100644
--- a/build_filter_image.sh
+++ b/build_filter_image.sh
@@ -1 +1 @@
-docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.23
+docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.25
diff --git a/cli/planoai/__init__.py b/cli/planoai/__init__.py
index dc0c543a..689f32df 100644
--- a/cli/planoai/__init__.py
+++ b/cli/planoai/__init__.py
@@ -1,3 +1,3 @@
"""Plano CLI - Intelligent Prompt Gateway."""
-__version__ = "0.4.23"
+__version__ = "0.4.25"
diff --git a/cli/planoai/consts.py b/cli/planoai/consts.py
index 8f13ba50..1b7f4cd3 100644
--- a/cli/planoai/consts.py
+++ b/cli/planoai/consts.py
@@ -5,7 +5,7 @@ PLANO_COLOR = "#969FF4"
SERVICE_NAME_ARCHGW = "plano"
PLANO_DOCKER_NAME = "plano"
-PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.23")
+PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.25")
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://localhost:4317"
# Native mode constants
diff --git a/cli/pyproject.toml b/cli/pyproject.toml
index e0a90e11..9ee00403 100644
--- a/cli/pyproject.toml
+++ b/cli/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "planoai"
-version = "0.4.23"
+version = "0.4.25"
description = "Python-based CLI tool to manage Plano."
authors = [{name = "Katanemo Labs, Inc."}]
readme = "README.md"
diff --git a/cli/uv.lock b/cli/uv.lock
index 98d50481..727d3a2a 100644
--- a/cli/uv.lock
+++ b/cli/uv.lock
@@ -337,7 +337,7 @@ wheels = [
[[package]]
name = "planoai"
-version = "0.4.23"
+version = "0.4.25"
source = { editable = "." }
dependencies = [
{ name = "click" },
diff --git a/crates/hermesllm/src/apis/anthropic.rs b/crates/hermesllm/src/apis/anthropic.rs
index ee572268..cfde591d 100644
--- a/crates/hermesllm/src/apis/anthropic.rs
+++ b/crates/hermesllm/src/apis/anthropic.rs
@@ -128,6 +128,7 @@ pub struct MessagesRequest {
pub enum MessagesRole {
User,
Assistant,
+ System,
}
/// Cache control types for content blocks
@@ -632,6 +633,7 @@ impl MessagesRole {
match self {
MessagesRole::User => "user",
MessagesRole::Assistant => "assistant",
+ MessagesRole::System => "system",
}
}
}
diff --git a/crates/hermesllm/src/bin/provider_models.yaml b/crates/hermesllm/src/bin/provider_models.yaml
index ccc4416f..7d9b9e5b 100644
--- a/crates/hermesllm/src/bin/provider_models.yaml
+++ b/crates/hermesllm/src/bin/provider_models.yaml
@@ -13,6 +13,77 @@ providers:
- amazon/amazon.nova-premier-v1:0
- amazon/amazon.nova-lite-v1:0
- amazon/amazon.nova-micro-v1:0
+ anthropic:
+ - anthropic/claude-fable-5
+ - anthropic/claude-opus-4-8
+ - anthropic/claude-opus-4-7
+ - anthropic/claude-sonnet-4-6
+ - anthropic/claude-opus-4-6
+ - anthropic/claude-opus-4-5-20251101
+ - anthropic/claude-opus-4-5
+ - anthropic/claude-haiku-4-5-20251001
+ - anthropic/claude-haiku-4-5
+ - anthropic/claude-sonnet-4-5-20250929
+ - anthropic/claude-sonnet-4-5
+ - anthropic/claude-opus-4-1-20250805
+ - anthropic/claude-opus-4-1
+ - anthropic/claude-opus-4-20250514
+ - anthropic/claude-opus-4
+ - anthropic/claude-sonnet-4-20250514
+ - anthropic/claude-sonnet-4
+ chatgpt:
+ - chatgpt/gpt-5.4
+ - chatgpt/gpt-5.3-codex
+ - chatgpt/gpt-5.2
+ deepseek:
+ - deepseek/deepseek-v4-flash
+ - deepseek/deepseek-v4-pro
+ digitalocean:
+ - digitalocean/openai-gpt-4.1
+ - digitalocean/openai-gpt-4o
+ - digitalocean/openai-gpt-4o-mini
+ - digitalocean/openai-gpt-5
+ - digitalocean/openai-gpt-5-mini
+ - digitalocean/openai-gpt-5-nano
+ - digitalocean/openai-gpt-5.1-codex-max
+ - digitalocean/openai-gpt-5.2
+ - digitalocean/openai-gpt-5.2-pro
+ - digitalocean/openai-gpt-5.3-codex
+ - digitalocean/openai-gpt-5.4
+ - digitalocean/openai-gpt-5.4-mini
+ - digitalocean/openai-gpt-5.4-nano
+ - digitalocean/openai-gpt-5.4-pro
+ - digitalocean/openai-gpt-oss-120b
+ - digitalocean/openai-gpt-oss-20b
+ - digitalocean/openai-o1
+ - digitalocean/openai-o3
+ - digitalocean/openai-o3-mini
+ - digitalocean/anthropic-claude-4.1-opus
+ - digitalocean/anthropic-claude-4.5-sonnet
+ - digitalocean/anthropic-claude-4.6-sonnet
+ - digitalocean/anthropic-claude-haiku-4.5
+ - digitalocean/anthropic-claude-opus-4
+ - digitalocean/anthropic-claude-opus-4.5
+ - digitalocean/anthropic-claude-opus-4.6
+ - digitalocean/anthropic-claude-opus-4.7
+ - digitalocean/anthropic-claude-sonnet-4
+ - digitalocean/alibaba-qwen3-32b
+ - digitalocean/arcee-trinity-large-thinking
+ - digitalocean/deepseek-3.2
+ - digitalocean/deepseek-r1-distill-llama-70b
+ - digitalocean/gemma-4-31B-it
+ - digitalocean/glm-5
+ - digitalocean/kimi-k2.5
+ - digitalocean/llama3.3-70b-instruct
+ - digitalocean/minimax-m2.5
+ - digitalocean/nvidia-nemotron-3-super-120b
+ - digitalocean/qwen3-coder-flash
+ - digitalocean/qwen3.5-397b-a17b
+ - digitalocean/all-mini-lm-l6-v2
+ - digitalocean/gte-large-en-v1.5
+ - digitalocean/multi-qa-mpnet-base-dot-v1
+ - digitalocean/qwen3-embedding-0.6b
+ - digitalocean/router:software-engineering
google:
- google/gemini-2.5-flash
- google/gemini-2.5-pro
@@ -22,12 +93,6 @@ providers:
- google/gemini-2.0-flash-lite
- google/gemini-2.5-flash-preview-tts
- google/gemini-2.5-pro-preview-tts
- - google/gemma-3-1b-it
- - google/gemma-3-4b-it
- - google/gemma-3-12b-it
- - google/gemma-3-27b-it
- - google/gemma-3n-e4b-it
- - google/gemma-3n-e2b-it
- google/gemma-4-26b-a4b-it
- google/gemma-4-31b-it
- google/gemini-flash-latest
@@ -40,13 +105,22 @@ providers:
- google/gemini-3.1-pro-preview
- google/gemini-3.1-pro-preview-customtools
- google/gemini-3.1-flash-lite-preview
+ - google/gemini-3.1-flash-lite
- google/gemini-3-pro-image-preview
+ - google/gemini-3-pro-image
- google/nano-banana-pro-preview
- google/gemini-3.1-flash-image-preview
+ - google/gemini-3.1-flash-image
+ - google/gemini-3.5-flash
- google/lyria-3-clip-preview
- google/lyria-3-pro-preview
+ - google/gemini-3.1-flash-tts-preview
- google/gemini-robotics-er-1.5-preview
+ - google/gemini-robotics-er-1.6-preview
- google/gemini-2.5-computer-use-preview-10-2025
+ - google/antigravity-preview-05-2026
+ - google/deep-research-max-preview-04-2026
+ - google/deep-research-preview-04-2026
- google/deep-research-pro-preview-12-2025
mistralai:
- mistralai/mistral-medium-2505
@@ -60,183 +134,62 @@ providers:
- mistralai/mistral-tiny-latest
- mistralai/codestral-2508
- mistralai/codestral-latest
+ - mistralai/mistral-code-latest
+ - mistralai/mistral-code-fim-latest
- mistralai/devstral-2512
- - mistralai/mistral-vibe-cli-latest
- mistralai/devstral-medium-latest
- mistralai/devstral-latest
+ - mistralai/mistral-code-agent-latest
- mistralai/mistral-small-2603
- mistralai/mistral-small-latest
- mistralai/mistral-vibe-cli-fast
- - mistralai/mistral-small-2506
+ - mistralai/magistral-small-latest
- mistralai/magistral-medium-2509
- mistralai/magistral-medium-latest
- - mistralai/magistral-small-2509
- - mistralai/magistral-small-latest
- mistralai/labs-leanstral-2603
- mistralai/mistral-large-2512
- mistralai/mistral-large-latest
+ - mistralai/mistral-large-2512
+ - mistralai/mistral-large-latest
- mistralai/ministral-3b-2512
- mistralai/ministral-3b-latest
- mistralai/ministral-8b-2512
- mistralai/ministral-8b-latest
- mistralai/ministral-14b-2512
- mistralai/ministral-14b-latest
- - mistralai/mistral-large-2411
- - mistralai/pixtral-large-2411
- - mistralai/pixtral-large-latest
- - mistralai/mistral-large-pixtral-2411
- - mistralai/devstral-small-2507
- - mistralai/devstral-medium-2507
- - mistralai/labs-mistral-small-creative
+ - mistralai/mistral-medium-3-5
+ - mistralai/mistral-medium-3.5
+ - mistralai/mistral-medium-3
+ - mistralai/mistral-medium-2604
+ - mistralai/mistral-medium-c21211-r0-75
+ - mistralai/mistral-vibe-cli-latest
+ - mistralai/mistral-medium-3-5
+ - mistralai/mistral-medium-3.5
+ - mistralai/mistral-medium-3
+ - mistralai/mistral-medium-2604
+ - mistralai/mistral-medium-c21211-r0-75
+ - mistralai/mistral-vibe-cli-latest
+ - mistralai/magistral-small-2509
+ - mistralai/mistral-small-2506
- mistralai/mistral-embed-2312
- mistralai/mistral-embed
- mistralai/codestral-embed
- mistralai/codestral-embed-2505
- anthropic:
- - anthropic/claude-sonnet-4-6
- - anthropic/claude-opus-4-6
- - anthropic/claude-opus-4-7
- - anthropic/claude-opus-4-5-20251101
- - anthropic/claude-opus-4-5
- - anthropic/claude-haiku-4-5-20251001
- - anthropic/claude-haiku-4-5
- - anthropic/claude-sonnet-4-5-20250929
- - anthropic/claude-sonnet-4-5
- - anthropic/claude-opus-4-1-20250805
- - anthropic/claude-opus-4-1
- - anthropic/claude-opus-4-20250514
- - anthropic/claude-opus-4
- - anthropic/claude-sonnet-4-20250514
- - anthropic/claude-sonnet-4
- - anthropic/claude-3-haiku-20240307
- - anthropic/claude-3-haiku
- qwen:
- - qwen/qwen3.6-plus-2026-04-02
- - qwen/qwen3.6-plus
- - qwen/wan2.7-image
- - qwen/deepseek-v3.2
- - qwen/qwen3-asr-flash-2026-02-10
- - qwen/qwen3.5-flash-2026-02-23
- - qwen/qwen3.5-flash
- - qwen/qwen3.5-122b-a10b
- - qwen/qwen3.5-35b-a3b
- - qwen/qwen3.5-27b
- - qwen/qwen3-coder-next
- - qwen/qwen3.5-397b-a17b
- - qwen/qwen3.5-plus-2026-02-15
- - qwen/qwen3.5-plus
- - qwen/qwen3-vl-flash-2026-01-22
- - qwen/qwen3-max-2026-01-23
- - qwen/qwen-plus-character
- - qwen/qwen-flash-character
- - qwen/qwen-flash
- - qwen/qwen3-vl-plus-2025-12-19
- - qwen/qwen3-omni-flash-2025-12-01
- - qwen/qwen3-livetranslate-flash-2025-12-01
- - qwen/qwen3-livetranslate-flash
- - qwen/qwen-mt-lite
- - qwen/qwen-plus-2025-12-01
- - qwen/qwen-mt-flash
- - qwen/ccai-pro
- - qwen/tongyi-tingwu-slp
- - qwen/qwen3-vl-flash
- - qwen/qwen3-vl-flash-2025-10-15
- - qwen/qwen3-omni-flash
- - qwen/qwen3-omni-flash-2025-09-15
- - qwen/qwen3-omni-30b-a3b-captioner
- - qwen/qwen2.5-7b-instruct
- - qwen/qwen2.5-14b-instruct
- - qwen/qwen2.5-32b-instruct
- - qwen/qwen2.5-72b-instruct
- - qwen/qwen2.5-14b-instruct-1m
- - qwen/qwen2.5-7b-instruct-1m
- - qwen/qwen-max-2025-01-25
- - qwen/qwen-max-latest
- - qwen/qwen-turbo-2024-11-01
- - qwen/qwen-turbo-latest
- - qwen/qwen-plus-latest
- - qwen/qwen-plus-2025-01-25
- - qwen/qwq-plus-2025-03-05
- - qwen/qwen-mt-turbo
- - qwen/qwen-mt-plus
- - qwen/qwen-coder-plus
- - qwen/qwq-plus
- - qwen/qwen2.5-vl-32b-instruct
- - qwen/qvq-max
- - qwen/qwen-omni-turbo
- - qwen/qwen3-8b
- - qwen/qwen3-30b-a3b
- - qwen/qwen3-235b-a22b
- - qwen/qwen-turbo-2025-04-28
- - qwen/qwen-plus-2025-04-28
- - qwen/qwen-vl-max-2025-04-08
- - qwen/qwen-vl-plus-2025-01-25
- - qwen/qwen-vl-plus-latest
- - qwen/qwen-vl-max-latest
- - qwen/qwen-vl-plus-2025-05-07
- - qwen/qwen3-coder-plus
- - qwen/qwen3-coder-480b-a35b-instruct
- - qwen/qwen3-235b-a22b-instruct-2507
- - qwen/qwen-plus-2025-07-14
- - qwen/qwen3-coder-plus-2025-07-22
- - qwen/qwen3-235b-a22b-thinking-2507
- - qwen/qwen3-coder-flash
- - qwen/qwen-vl-max
- - qwen/qwen-vl-max-2025-08-13
- - qwen/qwen3-max
- - qwen/qwen3-max-2025-09-23
- - qwen/qwen3-vl-plus
- - qwen/qwen3-vl-235b-a22b-instruct
- - qwen/qwen3-vl-235b-a22b-thinking
- - qwen/qwen3-30b-a3b-thinking-2507
- - qwen/qwen3-30b-a3b-instruct-2507
- - qwen/qwen3-14b
- - qwen/qwen3-32b
- - qwen/qwen3-0.6b
- - qwen/qwen3-4b
- - qwen/qwen3-1.7b
- - qwen/qwen-vl-plus
- - qwen/qwen3-coder-plus-2025-09-23
- - qwen/qwen3-vl-plus-2025-09-23
- - qwen/qwen-plus-2025-09-11
- - qwen/qwen3-next-80b-a3b-thinking
- - qwen/qwen3-next-80b-a3b-instruct
- - qwen/qwen3-max-preview
- - qwen/qwen2-7b-instruct
- - qwen/qwen-max
- - qwen/qwen-plus
- - qwen/qwen-turbo
- z-ai:
- - z-ai/glm-4.5
- - z-ai/glm-4.5-air
- - z-ai/glm-4.6
- - z-ai/glm-4.7
- - z-ai/glm-5
- - z-ai/glm-5-turbo
- - z-ai/glm-5.1
- x-ai:
- - x-ai/grok-3
- - x-ai/grok-3-mini
- - x-ai/grok-4-0709
- - x-ai/grok-4-1-fast-non-reasoning
- - x-ai/grok-4-1-fast-reasoning
- - x-ai/grok-4-fast-non-reasoning
- - x-ai/grok-4-fast-reasoning
- - x-ai/grok-4.20-0309-non-reasoning
- - x-ai/grok-4.20-0309-reasoning
- - x-ai/grok-4.20-multi-agent-0309
- - x-ai/grok-code-fast-1
- - x-ai/grok-imagine-image
- - x-ai/grok-imagine-video
+ moonshotai:
+ - moonshotai/kimi-k2.5
+ - moonshotai/kimi-k2.6
+ - moonshotai/moonshot-v1-32k
+ - moonshotai/moonshot-v1-8k
+ - moonshotai/moonshot-v1-128k-vision-preview
+ - moonshotai/moonshot-v1-auto
+ - moonshotai/moonshot-v1-8k-vision-preview
+ - moonshotai/moonshot-v1-128k
+ - moonshotai/moonshot-v1-32k-vision-preview
openai:
+ - openai/gpt-3.5-turbo
+ - openai/gpt-3.5-turbo-16k
- openai/gpt-4-0613
- openai/gpt-4
- - openai/gpt-3.5-turbo
- - openai/gpt-5.4-mini
- - openai/gpt-5.4
- - openai/gpt-5.4-nano-2026-03-17
- - openai/gpt-5.4-nano
- - openai/gpt-5.4-mini-2026-03-17
- openai/gpt-3.5-turbo-instruct
- openai/gpt-3.5-turbo-instruct-0914
- openai/gpt-3.5-turbo-1106
@@ -306,81 +259,137 @@ providers:
- openai/gpt-5.4-2026-03-05
- openai/gpt-5.4-pro
- openai/gpt-5.4-pro-2026-03-05
- - openai/gpt-3.5-turbo-16k
+ - openai/gpt-5.4
+ - openai/gpt-5.4-nano-2026-03-17
+ - openai/gpt-5.4-nano
+ - openai/gpt-5.4-mini-2026-03-17
+ - openai/gpt-5.4-mini
+ - openai/gpt-5.5
+ - openai/gpt-5.5-2026-04-23
+ - openai/gpt-5.5-pro
+ - openai/gpt-5.5-pro-2026-04-23
+ - openai/chat-latest
- openai/ft:gpt-3.5-turbo-0613:katanemo::8CMZbm0P
- deepseek:
- - deepseek/deepseek-chat
- - deepseek/deepseek-reasoner
- moonshotai:
- - moonshotai/kimi-for-coding
- - moonshotai/kimi-k2-thinking
- - moonshotai/moonshot-v1-auto
- - moonshotai/moonshot-v1-32k-vision-preview
- - moonshotai/moonshot-v1-128k
- - moonshotai/kimi-k2-turbo-preview
- - moonshotai/kimi-k2-0905-preview
- - moonshotai/moonshot-v1-128k-vision-preview
- - moonshotai/moonshot-v1-32k
- - moonshotai/moonshot-v1-8k-vision-preview
- - moonshotai/kimi-k2.5
- - moonshotai/moonshot-v1-8k
- - moonshotai/kimi-k2-thinking-turbo
- - moonshotai/kimi-k2-0711-preview
+ qwen:
+ - qwen/qwen3.7-plus-2026-05-26
+ - qwen/qwen3.7-plus
+ - qwen/kimi-k2.6
+ - qwen/glm-5.1
+ - qwen/qwen3.7-max-2026-05-17
+ - qwen/qwen3.7-max-preview
+ - qwen/qwen3.7-max-2026-05-20
+ - qwen/qwen3.7-max
+ - qwen/deepseek-v4-flash
+ - qwen/deepseek-v4-pro
+ - qwen/qwen3.6-27b
+ - qwen/qwen3.5-plus-2026-04-20
+ - qwen/qwen3.6-max-preview
+ - qwen/qwen3.6-35b-a3b
+ - qwen/qwen3.6-flash
+ - qwen/qwen3.6-flash-2026-04-16
+ - qwen/qwen3.5-omni-plus-2026-03-15
+ - qwen/qwen3.5-omni-plus
+ - qwen/qwen3.5-omni-flash-2026-03-15
+ - qwen/qwen3.5-omni-flash
+ - qwen/qwen3.6-plus-2026-04-02
+ - qwen/qwen3.6-plus
+ - qwen/wan2.7-image
+ - qwen/deepseek-v3.2
+ - qwen/qwen3-asr-flash-2026-02-10
+ - qwen/qwen3.5-flash-2026-02-23
+ - qwen/qwen3.5-flash
+ - qwen/qwen3.5-122b-a10b
+ - qwen/qwen3.5-35b-a3b
+ - qwen/qwen3.5-27b
+ - qwen/qwen3-coder-next
+ - qwen/qwen3.5-397b-a17b
+ - qwen/qwen3.5-plus-2026-02-15
+ - qwen/qwen3.5-plus
+ - qwen/qwen3-vl-flash-2026-01-22
+ - qwen/qwen3-max-2026-01-23
+ - qwen/qwen-plus-character
+ - qwen/qwen-flash-character
+ - qwen/qwen-flash
+ - qwen/qwen3-vl-plus-2025-12-19
+ - qwen/qwen3-omni-flash-2025-12-01
+ - qwen/qwen3-livetranslate-flash-2025-12-01
+ - qwen/qwen3-livetranslate-flash
+ - qwen/qwen-mt-lite
+ - qwen/qwen-plus-2025-12-01
+ - qwen/qwen-mt-flash
+ - qwen/ccai-pro
+ - qwen/tongyi-tingwu-slp
+ - qwen/qwen3-vl-flash
+ - qwen/qwen3-vl-flash-2025-10-15
+ - qwen/qwen3-omni-flash
+ - qwen/qwen3-omni-flash-2025-09-15
+ - qwen/qwen3-omni-30b-a3b-captioner
+ - qwen/qwen-plus-latest
+ - qwen/qwen-plus-2025-01-25
+ - qwen/qwq-plus-2025-03-05
+ - qwen/qwen-mt-turbo
+ - qwen/qwen-mt-plus
+ - qwen/qwen-coder-plus
+ - qwen/qwq-plus
+ - qwen/qvq-max
+ - qwen/qwen-omni-turbo
+ - qwen/qwen3-8b
+ - qwen/qwen3-30b-a3b
+ - qwen/qwen3-235b-a22b
+ - qwen/qwen-plus-2025-04-28
+ - qwen/qwen3-coder-plus
+ - qwen/qwen3-coder-480b-a35b-instruct
+ - qwen/qwen3-235b-a22b-instruct-2507
+ - qwen/qwen-plus-2025-07-14
+ - qwen/qwen3-coder-plus-2025-07-22
+ - qwen/qwen3-235b-a22b-thinking-2507
+ - qwen/qwen3-coder-flash
+ - qwen/qwen-vl-max
+ - qwen/qwen3-max
+ - qwen/qwen3-max-2025-09-23
+ - qwen/qwen3-vl-plus
+ - qwen/qwen3-vl-235b-a22b-instruct
+ - qwen/qwen3-vl-235b-a22b-thinking
+ - qwen/qwen3-30b-a3b-thinking-2507
+ - qwen/qwen3-30b-a3b-instruct-2507
+ - qwen/qwen3-14b
+ - qwen/qwen3-32b
+ - qwen/qwen-vl-plus
+ - qwen/qwen3-coder-plus-2025-09-23
+ - qwen/qwen3-vl-plus-2025-09-23
+ - qwen/qwen-plus-2025-09-11
+ - qwen/qwen3-next-80b-a3b-thinking
+ - qwen/qwen3-next-80b-a3b-instruct
+ - qwen/qwen3-max-preview
+ - qwen/qwen2-7b-instruct
+ - qwen/qwen-max
+ - qwen/qwen-plus
+ - qwen/qwen-turbo
+ x-ai:
+ - x-ai/grok-4.20-0309-non-reasoning
+ - x-ai/grok-4.20-0309-reasoning
+ - x-ai/grok-4.20-multi-agent-0309
+ - x-ai/grok-4.3
+ - x-ai/grok-build-0.1
+ - x-ai/grok-imagine-image
+ - x-ai/grok-imagine-video
+ - x-ai/grok-imagine-video-1.5-preview
xiaomi:
- xiaomi/mimo-v2-flash
- xiaomi/mimo-v2-omni
- xiaomi/mimo-v2-pro
- chatgpt:
- - chatgpt/gpt-5.4
- - chatgpt/gpt-5.3-codex
- - chatgpt/gpt-5.2
- digitalocean:
- - digitalocean/openai-gpt-4.1
- - digitalocean/openai-gpt-4o
- - digitalocean/openai-gpt-4o-mini
- - digitalocean/openai-gpt-5
- - digitalocean/openai-gpt-5-mini
- - digitalocean/openai-gpt-5-nano
- - digitalocean/openai-gpt-5.1-codex-max
- - digitalocean/openai-gpt-5.2
- - digitalocean/openai-gpt-5.2-pro
- - digitalocean/openai-gpt-5.3-codex
- - digitalocean/openai-gpt-5.4
- - digitalocean/openai-gpt-5.4-mini
- - digitalocean/openai-gpt-5.4-nano
- - digitalocean/openai-gpt-5.4-pro
- - digitalocean/openai-gpt-oss-120b
- - digitalocean/openai-gpt-oss-20b
- - digitalocean/openai-o1
- - digitalocean/openai-o3
- - digitalocean/openai-o3-mini
- - digitalocean/anthropic-claude-4.1-opus
- - digitalocean/anthropic-claude-4.5-sonnet
- - digitalocean/anthropic-claude-4.6-sonnet
- - digitalocean/anthropic-claude-haiku-4.5
- - digitalocean/anthropic-claude-opus-4
- - digitalocean/anthropic-claude-opus-4.5
- - digitalocean/anthropic-claude-opus-4.6
- - digitalocean/anthropic-claude-opus-4.7
- - digitalocean/anthropic-claude-sonnet-4
- - digitalocean/alibaba-qwen3-32b
- - digitalocean/arcee-trinity-large-thinking
- - digitalocean/deepseek-3.2
- - digitalocean/deepseek-r1-distill-llama-70b
- - digitalocean/gemma-4-31B-it
- - digitalocean/glm-5
- - digitalocean/kimi-k2.5
- - digitalocean/llama3.3-70b-instruct
- - digitalocean/minimax-m2.5
- - digitalocean/nvidia-nemotron-3-super-120b
- - digitalocean/qwen3-coder-flash
- - digitalocean/qwen3.5-397b-a17b
- - digitalocean/all-mini-lm-l6-v2
- - digitalocean/gte-large-en-v1.5
- - digitalocean/multi-qa-mpnet-base-dot-v1
- - digitalocean/qwen3-embedding-0.6b
- - digitalocean/router:software-engineering
+ - xiaomi/mimo-v2.5
+ - xiaomi/mimo-v2.5-asr
+ - xiaomi/mimo-v2.5-pro
+ z-ai:
+ - z-ai/glm-4.5
+ - z-ai/glm-4.5-air
+ - z-ai/glm-4.6
+ - z-ai/glm-4.7
+ - z-ai/glm-5
+ - z-ai/glm-5-turbo
+ - z-ai/glm-5.1
metadata:
total_providers: 13
- total_models: 364
- last_updated: 2026-04-20T00:00:00.000000+00:00
+ total_models: 375
+ last_updated: 2026-06-09T22:50:12.186709+00:00
diff --git a/crates/hermesllm/src/transforms/request/from_anthropic.rs b/crates/hermesllm/src/transforms/request/from_anthropic.rs
index dba17dde..20442c59 100644
--- a/crates/hermesllm/src/transforms/request/from_anthropic.rs
+++ b/crates/hermesllm/src/transforms/request/from_anthropic.rs
@@ -223,6 +223,7 @@ impl From for Role {
match val {
MessagesRole::User => Role::User,
MessagesRole::Assistant => Role::Assistant,
+ MessagesRole::System => Role::System,
}
}
}
@@ -340,6 +341,11 @@ impl TryFrom for BedrockMessage {
let role = match message.role {
MessagesRole::User => ConversationRole::User,
MessagesRole::Assistant => ConversationRole::Assistant,
+ MessagesRole::System => {
+ return Err(TransformError::UnsupportedConversion(
+ "System messages must be set via the system prompt, not messages".to_string(),
+ ));
+ }
};
let mut content_blocks = Vec::new();
diff --git a/demos/llm_routing/preference_based_routing/README.md b/demos/llm_routing/preference_based_routing/README.md
index 3401dcf6..b36d739c 100644
--- a/demos/llm_routing/preference_based_routing/README.md
+++ b/demos/llm_routing/preference_based_routing/README.md
@@ -3,7 +3,7 @@ This demo shows how you can use user preferences to route user prompts to approp
## How to start the demo
-Make sure you have Plano CLI installed (`pip install planoai==0.4.23` or `uv tool install planoai==0.4.23`).
+Make sure you have Plano CLI installed (`pip install planoai==0.4.25` or `uv tool install planoai==0.4.25`).
```bash
cd demos/llm_routing/preference_based_routing
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 8d006444..b734f071 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -17,7 +17,7 @@ from sphinxawesome_theme.postprocess import Icons
project = "Plano Docs"
copyright = "2026, Katanemo Labs, a DigitalOcean Company"
author = "Katanemo Labs, Inc"
-release = " v0.4.23"
+release = " v0.4.25"
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst
index 0b49f104..40d20c2b 100644
--- a/docs/source/get_started/quickstart.rst
+++ b/docs/source/get_started/quickstart.rst
@@ -43,7 +43,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins
.. code-block:: console
- $ uv tool install planoai==0.4.23
+ $ uv tool install planoai==0.4.25
**Option 2: Install with pip (Traditional)**
@@ -51,7 +51,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins
$ python -m venv venv
$ source venv/bin/activate # On Windows, use: venv\Scripts\activate
- $ pip install planoai==0.4.23
+ $ pip install planoai==0.4.25
.. _llm_routing_quickstart:
diff --git a/docs/source/resources/deployment.rst b/docs/source/resources/deployment.rst
index 6858269f..d9bd5722 100644
--- a/docs/source/resources/deployment.rst
+++ b/docs/source/resources/deployment.rst
@@ -65,7 +65,7 @@ Create a ``docker-compose.yml`` file with the following configuration:
# docker-compose.yml
services:
plano:
- image: katanemo/plano:0.4.23
+ image: katanemo/plano:0.4.25
container_name: plano
ports:
- "10000:10000" # ingress (client -> plano)
@@ -153,7 +153,7 @@ Create a ``plano-deployment.yaml``:
spec:
containers:
- name: plano
- image: katanemo/plano:0.4.23
+ image: katanemo/plano:0.4.25
ports:
- containerPort: 12000 # LLM gateway (chat completions, model routing)
name: llm-gateway