From 7906e5d455f0a81c99c3974b5c0b0ec308547688 Mon Sep 17 00:00:00 2001 From: Musa Date: Tue, 9 Jun 2026 16:05:43 -0700 Subject: [PATCH 1/4] chore(models): update provider models (#965) --- crates/hermesllm/src/bin/provider_models.yaml | 469 +++++++++--------- 1 file changed, 239 insertions(+), 230 deletions(-) diff --git a/crates/hermesllm/src/bin/provider_models.yaml b/crates/hermesllm/src/bin/provider_models.yaml index ccc4416f..7d9b9e5b 100644 --- a/crates/hermesllm/src/bin/provider_models.yaml +++ b/crates/hermesllm/src/bin/provider_models.yaml @@ -13,6 +13,77 @@ providers: - amazon/amazon.nova-premier-v1:0 - amazon/amazon.nova-lite-v1:0 - amazon/amazon.nova-micro-v1:0 + anthropic: + - anthropic/claude-fable-5 + - anthropic/claude-opus-4-8 + - anthropic/claude-opus-4-7 + - anthropic/claude-sonnet-4-6 + - anthropic/claude-opus-4-6 + - anthropic/claude-opus-4-5-20251101 + - anthropic/claude-opus-4-5 + - anthropic/claude-haiku-4-5-20251001 + - anthropic/claude-haiku-4-5 + - anthropic/claude-sonnet-4-5-20250929 + - anthropic/claude-sonnet-4-5 + - anthropic/claude-opus-4-1-20250805 + - anthropic/claude-opus-4-1 + - anthropic/claude-opus-4-20250514 + - anthropic/claude-opus-4 + - anthropic/claude-sonnet-4-20250514 + - anthropic/claude-sonnet-4 + chatgpt: + - chatgpt/gpt-5.4 + - chatgpt/gpt-5.3-codex + - chatgpt/gpt-5.2 + deepseek: + - deepseek/deepseek-v4-flash + - deepseek/deepseek-v4-pro + digitalocean: + - digitalocean/openai-gpt-4.1 + - digitalocean/openai-gpt-4o + - digitalocean/openai-gpt-4o-mini + - digitalocean/openai-gpt-5 + - digitalocean/openai-gpt-5-mini + - digitalocean/openai-gpt-5-nano + - digitalocean/openai-gpt-5.1-codex-max + - digitalocean/openai-gpt-5.2 + - digitalocean/openai-gpt-5.2-pro + - digitalocean/openai-gpt-5.3-codex + - digitalocean/openai-gpt-5.4 + - digitalocean/openai-gpt-5.4-mini + - digitalocean/openai-gpt-5.4-nano + - digitalocean/openai-gpt-5.4-pro + - digitalocean/openai-gpt-oss-120b + - digitalocean/openai-gpt-oss-20b + - digitalocean/openai-o1 + - digitalocean/openai-o3 + - digitalocean/openai-o3-mini + - digitalocean/anthropic-claude-4.1-opus + - digitalocean/anthropic-claude-4.5-sonnet + - digitalocean/anthropic-claude-4.6-sonnet + - digitalocean/anthropic-claude-haiku-4.5 + - digitalocean/anthropic-claude-opus-4 + - digitalocean/anthropic-claude-opus-4.5 + - digitalocean/anthropic-claude-opus-4.6 + - digitalocean/anthropic-claude-opus-4.7 + - digitalocean/anthropic-claude-sonnet-4 + - digitalocean/alibaba-qwen3-32b + - digitalocean/arcee-trinity-large-thinking + - digitalocean/deepseek-3.2 + - digitalocean/deepseek-r1-distill-llama-70b + - digitalocean/gemma-4-31B-it + - digitalocean/glm-5 + - digitalocean/kimi-k2.5 + - digitalocean/llama3.3-70b-instruct + - digitalocean/minimax-m2.5 + - digitalocean/nvidia-nemotron-3-super-120b + - digitalocean/qwen3-coder-flash + - digitalocean/qwen3.5-397b-a17b + - digitalocean/all-mini-lm-l6-v2 + - digitalocean/gte-large-en-v1.5 + - digitalocean/multi-qa-mpnet-base-dot-v1 + - digitalocean/qwen3-embedding-0.6b + - digitalocean/router:software-engineering google: - google/gemini-2.5-flash - google/gemini-2.5-pro @@ -22,12 +93,6 @@ providers: - google/gemini-2.0-flash-lite - google/gemini-2.5-flash-preview-tts - google/gemini-2.5-pro-preview-tts - - google/gemma-3-1b-it - - google/gemma-3-4b-it - - google/gemma-3-12b-it - - google/gemma-3-27b-it - - google/gemma-3n-e4b-it - - google/gemma-3n-e2b-it - google/gemma-4-26b-a4b-it - google/gemma-4-31b-it - google/gemini-flash-latest @@ -40,13 +105,22 @@ providers: - google/gemini-3.1-pro-preview - google/gemini-3.1-pro-preview-customtools - google/gemini-3.1-flash-lite-preview + - google/gemini-3.1-flash-lite - google/gemini-3-pro-image-preview + - google/gemini-3-pro-image - google/nano-banana-pro-preview - google/gemini-3.1-flash-image-preview + - google/gemini-3.1-flash-image + - google/gemini-3.5-flash - google/lyria-3-clip-preview - google/lyria-3-pro-preview + - google/gemini-3.1-flash-tts-preview - google/gemini-robotics-er-1.5-preview + - google/gemini-robotics-er-1.6-preview - google/gemini-2.5-computer-use-preview-10-2025 + - google/antigravity-preview-05-2026 + - google/deep-research-max-preview-04-2026 + - google/deep-research-preview-04-2026 - google/deep-research-pro-preview-12-2025 mistralai: - mistralai/mistral-medium-2505 @@ -60,183 +134,62 @@ providers: - mistralai/mistral-tiny-latest - mistralai/codestral-2508 - mistralai/codestral-latest + - mistralai/mistral-code-latest + - mistralai/mistral-code-fim-latest - mistralai/devstral-2512 - - mistralai/mistral-vibe-cli-latest - mistralai/devstral-medium-latest - mistralai/devstral-latest + - mistralai/mistral-code-agent-latest - mistralai/mistral-small-2603 - mistralai/mistral-small-latest - mistralai/mistral-vibe-cli-fast - - mistralai/mistral-small-2506 + - mistralai/magistral-small-latest - mistralai/magistral-medium-2509 - mistralai/magistral-medium-latest - - mistralai/magistral-small-2509 - - mistralai/magistral-small-latest - mistralai/labs-leanstral-2603 - mistralai/mistral-large-2512 - mistralai/mistral-large-latest + - mistralai/mistral-large-2512 + - mistralai/mistral-large-latest - mistralai/ministral-3b-2512 - mistralai/ministral-3b-latest - mistralai/ministral-8b-2512 - mistralai/ministral-8b-latest - mistralai/ministral-14b-2512 - mistralai/ministral-14b-latest - - mistralai/mistral-large-2411 - - mistralai/pixtral-large-2411 - - mistralai/pixtral-large-latest - - mistralai/mistral-large-pixtral-2411 - - mistralai/devstral-small-2507 - - mistralai/devstral-medium-2507 - - mistralai/labs-mistral-small-creative + - mistralai/mistral-medium-3-5 + - mistralai/mistral-medium-3.5 + - mistralai/mistral-medium-3 + - mistralai/mistral-medium-2604 + - mistralai/mistral-medium-c21211-r0-75 + - mistralai/mistral-vibe-cli-latest + - mistralai/mistral-medium-3-5 + - mistralai/mistral-medium-3.5 + - mistralai/mistral-medium-3 + - mistralai/mistral-medium-2604 + - mistralai/mistral-medium-c21211-r0-75 + - mistralai/mistral-vibe-cli-latest + - mistralai/magistral-small-2509 + - mistralai/mistral-small-2506 - mistralai/mistral-embed-2312 - mistralai/mistral-embed - mistralai/codestral-embed - mistralai/codestral-embed-2505 - anthropic: - - anthropic/claude-sonnet-4-6 - - anthropic/claude-opus-4-6 - - anthropic/claude-opus-4-7 - - anthropic/claude-opus-4-5-20251101 - - anthropic/claude-opus-4-5 - - anthropic/claude-haiku-4-5-20251001 - - anthropic/claude-haiku-4-5 - - anthropic/claude-sonnet-4-5-20250929 - - anthropic/claude-sonnet-4-5 - - anthropic/claude-opus-4-1-20250805 - - anthropic/claude-opus-4-1 - - anthropic/claude-opus-4-20250514 - - anthropic/claude-opus-4 - - anthropic/claude-sonnet-4-20250514 - - anthropic/claude-sonnet-4 - - anthropic/claude-3-haiku-20240307 - - anthropic/claude-3-haiku - qwen: - - qwen/qwen3.6-plus-2026-04-02 - - qwen/qwen3.6-plus - - qwen/wan2.7-image - - qwen/deepseek-v3.2 - - qwen/qwen3-asr-flash-2026-02-10 - - qwen/qwen3.5-flash-2026-02-23 - - qwen/qwen3.5-flash - - qwen/qwen3.5-122b-a10b - - qwen/qwen3.5-35b-a3b - - qwen/qwen3.5-27b - - qwen/qwen3-coder-next - - qwen/qwen3.5-397b-a17b - - qwen/qwen3.5-plus-2026-02-15 - - qwen/qwen3.5-plus - - qwen/qwen3-vl-flash-2026-01-22 - - qwen/qwen3-max-2026-01-23 - - qwen/qwen-plus-character - - qwen/qwen-flash-character - - qwen/qwen-flash - - qwen/qwen3-vl-plus-2025-12-19 - - qwen/qwen3-omni-flash-2025-12-01 - - qwen/qwen3-livetranslate-flash-2025-12-01 - - qwen/qwen3-livetranslate-flash - - qwen/qwen-mt-lite - - qwen/qwen-plus-2025-12-01 - - qwen/qwen-mt-flash - - qwen/ccai-pro - - qwen/tongyi-tingwu-slp - - qwen/qwen3-vl-flash - - qwen/qwen3-vl-flash-2025-10-15 - - qwen/qwen3-omni-flash - - qwen/qwen3-omni-flash-2025-09-15 - - qwen/qwen3-omni-30b-a3b-captioner - - qwen/qwen2.5-7b-instruct - - qwen/qwen2.5-14b-instruct - - qwen/qwen2.5-32b-instruct - - qwen/qwen2.5-72b-instruct - - qwen/qwen2.5-14b-instruct-1m - - qwen/qwen2.5-7b-instruct-1m - - qwen/qwen-max-2025-01-25 - - qwen/qwen-max-latest - - qwen/qwen-turbo-2024-11-01 - - qwen/qwen-turbo-latest - - qwen/qwen-plus-latest - - qwen/qwen-plus-2025-01-25 - - qwen/qwq-plus-2025-03-05 - - qwen/qwen-mt-turbo - - qwen/qwen-mt-plus - - qwen/qwen-coder-plus - - qwen/qwq-plus - - qwen/qwen2.5-vl-32b-instruct - - qwen/qvq-max - - qwen/qwen-omni-turbo - - qwen/qwen3-8b - - qwen/qwen3-30b-a3b - - qwen/qwen3-235b-a22b - - qwen/qwen-turbo-2025-04-28 - - qwen/qwen-plus-2025-04-28 - - qwen/qwen-vl-max-2025-04-08 - - qwen/qwen-vl-plus-2025-01-25 - - qwen/qwen-vl-plus-latest - - qwen/qwen-vl-max-latest - - qwen/qwen-vl-plus-2025-05-07 - - qwen/qwen3-coder-plus - - qwen/qwen3-coder-480b-a35b-instruct - - qwen/qwen3-235b-a22b-instruct-2507 - - qwen/qwen-plus-2025-07-14 - - qwen/qwen3-coder-plus-2025-07-22 - - qwen/qwen3-235b-a22b-thinking-2507 - - qwen/qwen3-coder-flash - - qwen/qwen-vl-max - - qwen/qwen-vl-max-2025-08-13 - - qwen/qwen3-max - - qwen/qwen3-max-2025-09-23 - - qwen/qwen3-vl-plus - - qwen/qwen3-vl-235b-a22b-instruct - - qwen/qwen3-vl-235b-a22b-thinking - - qwen/qwen3-30b-a3b-thinking-2507 - - qwen/qwen3-30b-a3b-instruct-2507 - - qwen/qwen3-14b - - qwen/qwen3-32b - - qwen/qwen3-0.6b - - qwen/qwen3-4b - - qwen/qwen3-1.7b - - qwen/qwen-vl-plus - - qwen/qwen3-coder-plus-2025-09-23 - - qwen/qwen3-vl-plus-2025-09-23 - - qwen/qwen-plus-2025-09-11 - - qwen/qwen3-next-80b-a3b-thinking - - qwen/qwen3-next-80b-a3b-instruct - - qwen/qwen3-max-preview - - qwen/qwen2-7b-instruct - - qwen/qwen-max - - qwen/qwen-plus - - qwen/qwen-turbo - z-ai: - - z-ai/glm-4.5 - - z-ai/glm-4.5-air - - z-ai/glm-4.6 - - z-ai/glm-4.7 - - z-ai/glm-5 - - z-ai/glm-5-turbo - - z-ai/glm-5.1 - x-ai: - - x-ai/grok-3 - - x-ai/grok-3-mini - - x-ai/grok-4-0709 - - x-ai/grok-4-1-fast-non-reasoning - - x-ai/grok-4-1-fast-reasoning - - x-ai/grok-4-fast-non-reasoning - - x-ai/grok-4-fast-reasoning - - x-ai/grok-4.20-0309-non-reasoning - - x-ai/grok-4.20-0309-reasoning - - x-ai/grok-4.20-multi-agent-0309 - - x-ai/grok-code-fast-1 - - x-ai/grok-imagine-image - - x-ai/grok-imagine-video + moonshotai: + - moonshotai/kimi-k2.5 + - moonshotai/kimi-k2.6 + - moonshotai/moonshot-v1-32k + - moonshotai/moonshot-v1-8k + - moonshotai/moonshot-v1-128k-vision-preview + - moonshotai/moonshot-v1-auto + - moonshotai/moonshot-v1-8k-vision-preview + - moonshotai/moonshot-v1-128k + - moonshotai/moonshot-v1-32k-vision-preview openai: + - openai/gpt-3.5-turbo + - openai/gpt-3.5-turbo-16k - openai/gpt-4-0613 - openai/gpt-4 - - openai/gpt-3.5-turbo - - openai/gpt-5.4-mini - - openai/gpt-5.4 - - openai/gpt-5.4-nano-2026-03-17 - - openai/gpt-5.4-nano - - openai/gpt-5.4-mini-2026-03-17 - openai/gpt-3.5-turbo-instruct - openai/gpt-3.5-turbo-instruct-0914 - openai/gpt-3.5-turbo-1106 @@ -306,81 +259,137 @@ providers: - openai/gpt-5.4-2026-03-05 - openai/gpt-5.4-pro - openai/gpt-5.4-pro-2026-03-05 - - openai/gpt-3.5-turbo-16k + - openai/gpt-5.4 + - openai/gpt-5.4-nano-2026-03-17 + - openai/gpt-5.4-nano + - openai/gpt-5.4-mini-2026-03-17 + - openai/gpt-5.4-mini + - openai/gpt-5.5 + - openai/gpt-5.5-2026-04-23 + - openai/gpt-5.5-pro + - openai/gpt-5.5-pro-2026-04-23 + - openai/chat-latest - openai/ft:gpt-3.5-turbo-0613:katanemo::8CMZbm0P - deepseek: - - deepseek/deepseek-chat - - deepseek/deepseek-reasoner - moonshotai: - - moonshotai/kimi-for-coding - - moonshotai/kimi-k2-thinking - - moonshotai/moonshot-v1-auto - - moonshotai/moonshot-v1-32k-vision-preview - - moonshotai/moonshot-v1-128k - - moonshotai/kimi-k2-turbo-preview - - moonshotai/kimi-k2-0905-preview - - moonshotai/moonshot-v1-128k-vision-preview - - moonshotai/moonshot-v1-32k - - moonshotai/moonshot-v1-8k-vision-preview - - moonshotai/kimi-k2.5 - - moonshotai/moonshot-v1-8k - - moonshotai/kimi-k2-thinking-turbo - - moonshotai/kimi-k2-0711-preview + qwen: + - qwen/qwen3.7-plus-2026-05-26 + - qwen/qwen3.7-plus + - qwen/kimi-k2.6 + - qwen/glm-5.1 + - qwen/qwen3.7-max-2026-05-17 + - qwen/qwen3.7-max-preview + - qwen/qwen3.7-max-2026-05-20 + - qwen/qwen3.7-max + - qwen/deepseek-v4-flash + - qwen/deepseek-v4-pro + - qwen/qwen3.6-27b + - qwen/qwen3.5-plus-2026-04-20 + - qwen/qwen3.6-max-preview + - qwen/qwen3.6-35b-a3b + - qwen/qwen3.6-flash + - qwen/qwen3.6-flash-2026-04-16 + - qwen/qwen3.5-omni-plus-2026-03-15 + - qwen/qwen3.5-omni-plus + - qwen/qwen3.5-omni-flash-2026-03-15 + - qwen/qwen3.5-omni-flash + - qwen/qwen3.6-plus-2026-04-02 + - qwen/qwen3.6-plus + - qwen/wan2.7-image + - qwen/deepseek-v3.2 + - qwen/qwen3-asr-flash-2026-02-10 + - qwen/qwen3.5-flash-2026-02-23 + - qwen/qwen3.5-flash + - qwen/qwen3.5-122b-a10b + - qwen/qwen3.5-35b-a3b + - qwen/qwen3.5-27b + - qwen/qwen3-coder-next + - qwen/qwen3.5-397b-a17b + - qwen/qwen3.5-plus-2026-02-15 + - qwen/qwen3.5-plus + - qwen/qwen3-vl-flash-2026-01-22 + - qwen/qwen3-max-2026-01-23 + - qwen/qwen-plus-character + - qwen/qwen-flash-character + - qwen/qwen-flash + - qwen/qwen3-vl-plus-2025-12-19 + - qwen/qwen3-omni-flash-2025-12-01 + - qwen/qwen3-livetranslate-flash-2025-12-01 + - qwen/qwen3-livetranslate-flash + - qwen/qwen-mt-lite + - qwen/qwen-plus-2025-12-01 + - qwen/qwen-mt-flash + - qwen/ccai-pro + - qwen/tongyi-tingwu-slp + - qwen/qwen3-vl-flash + - qwen/qwen3-vl-flash-2025-10-15 + - qwen/qwen3-omni-flash + - qwen/qwen3-omni-flash-2025-09-15 + - qwen/qwen3-omni-30b-a3b-captioner + - qwen/qwen-plus-latest + - qwen/qwen-plus-2025-01-25 + - qwen/qwq-plus-2025-03-05 + - qwen/qwen-mt-turbo + - qwen/qwen-mt-plus + - qwen/qwen-coder-plus + - qwen/qwq-plus + - qwen/qvq-max + - qwen/qwen-omni-turbo + - qwen/qwen3-8b + - qwen/qwen3-30b-a3b + - qwen/qwen3-235b-a22b + - qwen/qwen-plus-2025-04-28 + - qwen/qwen3-coder-plus + - qwen/qwen3-coder-480b-a35b-instruct + - qwen/qwen3-235b-a22b-instruct-2507 + - qwen/qwen-plus-2025-07-14 + - qwen/qwen3-coder-plus-2025-07-22 + - qwen/qwen3-235b-a22b-thinking-2507 + - qwen/qwen3-coder-flash + - qwen/qwen-vl-max + - qwen/qwen3-max + - qwen/qwen3-max-2025-09-23 + - qwen/qwen3-vl-plus + - qwen/qwen3-vl-235b-a22b-instruct + - qwen/qwen3-vl-235b-a22b-thinking + - qwen/qwen3-30b-a3b-thinking-2507 + - qwen/qwen3-30b-a3b-instruct-2507 + - qwen/qwen3-14b + - qwen/qwen3-32b + - qwen/qwen-vl-plus + - qwen/qwen3-coder-plus-2025-09-23 + - qwen/qwen3-vl-plus-2025-09-23 + - qwen/qwen-plus-2025-09-11 + - qwen/qwen3-next-80b-a3b-thinking + - qwen/qwen3-next-80b-a3b-instruct + - qwen/qwen3-max-preview + - qwen/qwen2-7b-instruct + - qwen/qwen-max + - qwen/qwen-plus + - qwen/qwen-turbo + x-ai: + - x-ai/grok-4.20-0309-non-reasoning + - x-ai/grok-4.20-0309-reasoning + - x-ai/grok-4.20-multi-agent-0309 + - x-ai/grok-4.3 + - x-ai/grok-build-0.1 + - x-ai/grok-imagine-image + - x-ai/grok-imagine-video + - x-ai/grok-imagine-video-1.5-preview xiaomi: - xiaomi/mimo-v2-flash - xiaomi/mimo-v2-omni - xiaomi/mimo-v2-pro - chatgpt: - - chatgpt/gpt-5.4 - - chatgpt/gpt-5.3-codex - - chatgpt/gpt-5.2 - digitalocean: - - digitalocean/openai-gpt-4.1 - - digitalocean/openai-gpt-4o - - digitalocean/openai-gpt-4o-mini - - digitalocean/openai-gpt-5 - - digitalocean/openai-gpt-5-mini - - digitalocean/openai-gpt-5-nano - - digitalocean/openai-gpt-5.1-codex-max - - digitalocean/openai-gpt-5.2 - - digitalocean/openai-gpt-5.2-pro - - digitalocean/openai-gpt-5.3-codex - - digitalocean/openai-gpt-5.4 - - digitalocean/openai-gpt-5.4-mini - - digitalocean/openai-gpt-5.4-nano - - digitalocean/openai-gpt-5.4-pro - - digitalocean/openai-gpt-oss-120b - - digitalocean/openai-gpt-oss-20b - - digitalocean/openai-o1 - - digitalocean/openai-o3 - - digitalocean/openai-o3-mini - - digitalocean/anthropic-claude-4.1-opus - - digitalocean/anthropic-claude-4.5-sonnet - - digitalocean/anthropic-claude-4.6-sonnet - - digitalocean/anthropic-claude-haiku-4.5 - - digitalocean/anthropic-claude-opus-4 - - digitalocean/anthropic-claude-opus-4.5 - - digitalocean/anthropic-claude-opus-4.6 - - digitalocean/anthropic-claude-opus-4.7 - - digitalocean/anthropic-claude-sonnet-4 - - digitalocean/alibaba-qwen3-32b - - digitalocean/arcee-trinity-large-thinking - - digitalocean/deepseek-3.2 - - digitalocean/deepseek-r1-distill-llama-70b - - digitalocean/gemma-4-31B-it - - digitalocean/glm-5 - - digitalocean/kimi-k2.5 - - digitalocean/llama3.3-70b-instruct - - digitalocean/minimax-m2.5 - - digitalocean/nvidia-nemotron-3-super-120b - - digitalocean/qwen3-coder-flash - - digitalocean/qwen3.5-397b-a17b - - digitalocean/all-mini-lm-l6-v2 - - digitalocean/gte-large-en-v1.5 - - digitalocean/multi-qa-mpnet-base-dot-v1 - - digitalocean/qwen3-embedding-0.6b - - digitalocean/router:software-engineering + - xiaomi/mimo-v2.5 + - xiaomi/mimo-v2.5-asr + - xiaomi/mimo-v2.5-pro + z-ai: + - z-ai/glm-4.5 + - z-ai/glm-4.5-air + - z-ai/glm-4.6 + - z-ai/glm-4.7 + - z-ai/glm-5 + - z-ai/glm-5-turbo + - z-ai/glm-5.1 metadata: total_providers: 13 - total_models: 364 - last_updated: 2026-04-20T00:00:00.000000+00:00 + total_models: 375 + last_updated: 2026-06-09T22:50:12.186709+00:00 From 2e38f7fa09c1c2c4af7dee07c0c93e8885a2a162 Mon Sep 17 00:00:00 2001 From: Musa Date: Tue, 9 Jun 2026 16:22:10 -0700 Subject: [PATCH 2/4] release 0.4.24 (#966) --- .github/workflows/ci.yml | 6 +++--- apps/www/src/components/Hero.tsx | 2 +- build_filter_image.sh | 2 +- cli/planoai/__init__.py | 2 +- cli/planoai/consts.py | 2 +- cli/pyproject.toml | 2 +- cli/uv.lock | 2 +- demos/llm_routing/preference_based_routing/README.md | 2 +- docs/source/conf.py | 2 +- docs/source/get_started/quickstart.rst | 4 ++-- docs/source/resources/deployment.rst | 4 ++-- 11 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bfe57eab..6178cfe5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -117,7 +117,7 @@ jobs: # repo means find_repo_root() returns None, the local-build short- # circuit is skipped, and the CLI tries to download from a GitHub # release that does not yet exist for the in-flight version on - # release-bump PRs (e.g. 0.4.23 before publish-binaries has run). + # release-bump PRs (e.g. 0.4.24 before publish-binaries has run). - name: Seed ~/.plano cache for zero-config test run: | VERSION=$(sed -nE 's/^__version__ = "(.*)"$/\1/p' cli/planoai/__init__.py) @@ -183,13 +183,13 @@ jobs: load: true tags: | ${{ env.PLANO_DOCKER_IMAGE }} - ${{ env.DOCKER_IMAGE }}:0.4.23 + ${{ env.DOCKER_IMAGE }}:0.4.24 ${{ env.DOCKER_IMAGE }}:latest cache-from: type=gha cache-to: type=gha,mode=max - name: Save image as artifact - run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.23 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar + run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.24 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar - name: Upload image artifact uses: actions/upload-artifact@v6 diff --git a/apps/www/src/components/Hero.tsx b/apps/www/src/components/Hero.tsx index b9d5b170..cbfbee04 100644 --- a/apps/www/src/components/Hero.tsx +++ b/apps/www/src/components/Hero.tsx @@ -24,7 +24,7 @@ export function Hero() { >
- v0.4.23 + v0.4.24 — diff --git a/build_filter_image.sh b/build_filter_image.sh index 624955c2..68b9c305 100644 --- a/build_filter_image.sh +++ b/build_filter_image.sh @@ -1 +1 @@ -docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.23 +docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.24 diff --git a/cli/planoai/__init__.py b/cli/planoai/__init__.py index dc0c543a..edcaee24 100644 --- a/cli/planoai/__init__.py +++ b/cli/planoai/__init__.py @@ -1,3 +1,3 @@ """Plano CLI - Intelligent Prompt Gateway.""" -__version__ = "0.4.23" +__version__ = "0.4.24" diff --git a/cli/planoai/consts.py b/cli/planoai/consts.py index 8f13ba50..ba2567e8 100644 --- a/cli/planoai/consts.py +++ b/cli/planoai/consts.py @@ -5,7 +5,7 @@ PLANO_COLOR = "#969FF4" SERVICE_NAME_ARCHGW = "plano" PLANO_DOCKER_NAME = "plano" -PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.23") +PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.24") DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://localhost:4317" # Native mode constants diff --git a/cli/pyproject.toml b/cli/pyproject.toml index e0a90e11..28627b18 100644 --- a/cli/pyproject.toml +++ b/cli/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "planoai" -version = "0.4.23" +version = "0.4.24" description = "Python-based CLI tool to manage Plano." authors = [{name = "Katanemo Labs, Inc."}] readme = "README.md" diff --git a/cli/uv.lock b/cli/uv.lock index 98d50481..f11f776a 100644 --- a/cli/uv.lock +++ b/cli/uv.lock @@ -337,7 +337,7 @@ wheels = [ [[package]] name = "planoai" -version = "0.4.23" +version = "0.4.24" source = { editable = "." } dependencies = [ { name = "click" }, diff --git a/demos/llm_routing/preference_based_routing/README.md b/demos/llm_routing/preference_based_routing/README.md index 3401dcf6..ae005a9b 100644 --- a/demos/llm_routing/preference_based_routing/README.md +++ b/demos/llm_routing/preference_based_routing/README.md @@ -3,7 +3,7 @@ This demo shows how you can use user preferences to route user prompts to approp ## How to start the demo -Make sure you have Plano CLI installed (`pip install planoai==0.4.23` or `uv tool install planoai==0.4.23`). +Make sure you have Plano CLI installed (`pip install planoai==0.4.24` or `uv tool install planoai==0.4.24`). ```bash cd demos/llm_routing/preference_based_routing diff --git a/docs/source/conf.py b/docs/source/conf.py index 8d006444..29b5d6d2 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -17,7 +17,7 @@ from sphinxawesome_theme.postprocess import Icons project = "Plano Docs" copyright = "2026, Katanemo Labs, a DigitalOcean Company" author = "Katanemo Labs, Inc" -release = " v0.4.23" +release = " v0.4.24" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst index 0b49f104..47aa0314 100644 --- a/docs/source/get_started/quickstart.rst +++ b/docs/source/get_started/quickstart.rst @@ -43,7 +43,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins .. code-block:: console - $ uv tool install planoai==0.4.23 + $ uv tool install planoai==0.4.24 **Option 2: Install with pip (Traditional)** @@ -51,7 +51,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins $ python -m venv venv $ source venv/bin/activate # On Windows, use: venv\Scripts\activate - $ pip install planoai==0.4.23 + $ pip install planoai==0.4.24 .. _llm_routing_quickstart: diff --git a/docs/source/resources/deployment.rst b/docs/source/resources/deployment.rst index 6858269f..04859296 100644 --- a/docs/source/resources/deployment.rst +++ b/docs/source/resources/deployment.rst @@ -65,7 +65,7 @@ Create a ``docker-compose.yml`` file with the following configuration: # docker-compose.yml services: plano: - image: katanemo/plano:0.4.23 + image: katanemo/plano:0.4.24 container_name: plano ports: - "10000:10000" # ingress (client -> plano) @@ -153,7 +153,7 @@ Create a ``plano-deployment.yaml``: spec: containers: - name: plano - image: katanemo/plano:0.4.23 + image: katanemo/plano:0.4.24 ports: - containerPort: 12000 # LLM gateway (chat completions, model routing) name: llm-gateway From ecf864df25c62d99cebbeb00754b4631fbd8e577 Mon Sep 17 00:00:00 2001 From: shivani kumar Date: Fri, 12 Jun 2026 17:25:22 -0400 Subject: [PATCH 3/4] Add the system role into messages array (#967) * add teh system role into messages array * ci: trigger workflows * dont normalize for anthropic --------- Co-authored-by: Spherrrical --- crates/hermesllm/src/apis/anthropic.rs | 2 ++ crates/hermesllm/src/transforms/request/from_anthropic.rs | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/crates/hermesllm/src/apis/anthropic.rs b/crates/hermesllm/src/apis/anthropic.rs index ee572268..cfde591d 100644 --- a/crates/hermesllm/src/apis/anthropic.rs +++ b/crates/hermesllm/src/apis/anthropic.rs @@ -128,6 +128,7 @@ pub struct MessagesRequest { pub enum MessagesRole { User, Assistant, + System, } /// Cache control types for content blocks @@ -632,6 +633,7 @@ impl MessagesRole { match self { MessagesRole::User => "user", MessagesRole::Assistant => "assistant", + MessagesRole::System => "system", } } } diff --git a/crates/hermesllm/src/transforms/request/from_anthropic.rs b/crates/hermesllm/src/transforms/request/from_anthropic.rs index dba17dde..20442c59 100644 --- a/crates/hermesllm/src/transforms/request/from_anthropic.rs +++ b/crates/hermesllm/src/transforms/request/from_anthropic.rs @@ -223,6 +223,7 @@ impl From for Role { match val { MessagesRole::User => Role::User, MessagesRole::Assistant => Role::Assistant, + MessagesRole::System => Role::System, } } } @@ -340,6 +341,11 @@ impl TryFrom for BedrockMessage { let role = match message.role { MessagesRole::User => ConversationRole::User, MessagesRole::Assistant => ConversationRole::Assistant, + MessagesRole::System => { + return Err(TransformError::UnsupportedConversion( + "System messages must be set via the system prompt, not messages".to_string(), + )); + } }; let mut content_blocks = Vec::new(); From 440ee1e1ef4f462a558a904b76817f13aad3533b Mon Sep 17 00:00:00 2001 From: Musa Date: Mon, 15 Jun 2026 14:25:19 -0700 Subject: [PATCH 4/4] release 0.4.25 (#969) --- .github/workflows/ci.yml | 6 +++--- apps/www/src/components/Hero.tsx | 2 +- build_filter_image.sh | 2 +- cli/planoai/__init__.py | 2 +- cli/planoai/consts.py | 2 +- cli/pyproject.toml | 2 +- cli/uv.lock | 2 +- demos/llm_routing/preference_based_routing/README.md | 2 +- docs/source/conf.py | 2 +- docs/source/get_started/quickstart.rst | 4 ++-- docs/source/resources/deployment.rst | 4 ++-- 11 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6178cfe5..ebda64a6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -117,7 +117,7 @@ jobs: # repo means find_repo_root() returns None, the local-build short- # circuit is skipped, and the CLI tries to download from a GitHub # release that does not yet exist for the in-flight version on - # release-bump PRs (e.g. 0.4.24 before publish-binaries has run). + # release-bump PRs (e.g. 0.4.25 before publish-binaries has run). - name: Seed ~/.plano cache for zero-config test run: | VERSION=$(sed -nE 's/^__version__ = "(.*)"$/\1/p' cli/planoai/__init__.py) @@ -183,13 +183,13 @@ jobs: load: true tags: | ${{ env.PLANO_DOCKER_IMAGE }} - ${{ env.DOCKER_IMAGE }}:0.4.24 + ${{ env.DOCKER_IMAGE }}:0.4.25 ${{ env.DOCKER_IMAGE }}:latest cache-from: type=gha cache-to: type=gha,mode=max - name: Save image as artifact - run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.24 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar + run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.25 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar - name: Upload image artifact uses: actions/upload-artifact@v6 diff --git a/apps/www/src/components/Hero.tsx b/apps/www/src/components/Hero.tsx index cbfbee04..566bee49 100644 --- a/apps/www/src/components/Hero.tsx +++ b/apps/www/src/components/Hero.tsx @@ -24,7 +24,7 @@ export function Hero() { >
- v0.4.24 + v0.4.25 — diff --git a/build_filter_image.sh b/build_filter_image.sh index 68b9c305..c60d8d0b 100644 --- a/build_filter_image.sh +++ b/build_filter_image.sh @@ -1 +1 @@ -docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.24 +docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.25 diff --git a/cli/planoai/__init__.py b/cli/planoai/__init__.py index edcaee24..689f32df 100644 --- a/cli/planoai/__init__.py +++ b/cli/planoai/__init__.py @@ -1,3 +1,3 @@ """Plano CLI - Intelligent Prompt Gateway.""" -__version__ = "0.4.24" +__version__ = "0.4.25" diff --git a/cli/planoai/consts.py b/cli/planoai/consts.py index ba2567e8..1b7f4cd3 100644 --- a/cli/planoai/consts.py +++ b/cli/planoai/consts.py @@ -5,7 +5,7 @@ PLANO_COLOR = "#969FF4" SERVICE_NAME_ARCHGW = "plano" PLANO_DOCKER_NAME = "plano" -PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.24") +PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.25") DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://localhost:4317" # Native mode constants diff --git a/cli/pyproject.toml b/cli/pyproject.toml index 28627b18..9ee00403 100644 --- a/cli/pyproject.toml +++ b/cli/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "planoai" -version = "0.4.24" +version = "0.4.25" description = "Python-based CLI tool to manage Plano." authors = [{name = "Katanemo Labs, Inc."}] readme = "README.md" diff --git a/cli/uv.lock b/cli/uv.lock index f11f776a..727d3a2a 100644 --- a/cli/uv.lock +++ b/cli/uv.lock @@ -337,7 +337,7 @@ wheels = [ [[package]] name = "planoai" -version = "0.4.24" +version = "0.4.25" source = { editable = "." } dependencies = [ { name = "click" }, diff --git a/demos/llm_routing/preference_based_routing/README.md b/demos/llm_routing/preference_based_routing/README.md index ae005a9b..b36d739c 100644 --- a/demos/llm_routing/preference_based_routing/README.md +++ b/demos/llm_routing/preference_based_routing/README.md @@ -3,7 +3,7 @@ This demo shows how you can use user preferences to route user prompts to approp ## How to start the demo -Make sure you have Plano CLI installed (`pip install planoai==0.4.24` or `uv tool install planoai==0.4.24`). +Make sure you have Plano CLI installed (`pip install planoai==0.4.25` or `uv tool install planoai==0.4.25`). ```bash cd demos/llm_routing/preference_based_routing diff --git a/docs/source/conf.py b/docs/source/conf.py index 29b5d6d2..b734f071 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -17,7 +17,7 @@ from sphinxawesome_theme.postprocess import Icons project = "Plano Docs" copyright = "2026, Katanemo Labs, a DigitalOcean Company" author = "Katanemo Labs, Inc" -release = " v0.4.24" +release = " v0.4.25" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst index 47aa0314..40d20c2b 100644 --- a/docs/source/get_started/quickstart.rst +++ b/docs/source/get_started/quickstart.rst @@ -43,7 +43,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins .. code-block:: console - $ uv tool install planoai==0.4.24 + $ uv tool install planoai==0.4.25 **Option 2: Install with pip (Traditional)** @@ -51,7 +51,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins $ python -m venv venv $ source venv/bin/activate # On Windows, use: venv\Scripts\activate - $ pip install planoai==0.4.24 + $ pip install planoai==0.4.25 .. _llm_routing_quickstart: diff --git a/docs/source/resources/deployment.rst b/docs/source/resources/deployment.rst index 04859296..d9bd5722 100644 --- a/docs/source/resources/deployment.rst +++ b/docs/source/resources/deployment.rst @@ -65,7 +65,7 @@ Create a ``docker-compose.yml`` file with the following configuration: # docker-compose.yml services: plano: - image: katanemo/plano:0.4.24 + image: katanemo/plano:0.4.25 container_name: plano ports: - "10000:10000" # ingress (client -> plano) @@ -153,7 +153,7 @@ Create a ``plano-deployment.yaml``: spec: containers: - name: plano - image: katanemo/plano:0.4.24 + image: katanemo/plano:0.4.25 ports: - containerPort: 12000 # LLM gateway (chat completions, model routing) name: llm-gateway