diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py index c56a4fdb..2157b714 100644 --- a/cli/planoai/config_generator.py +++ b/cli/planoai/config_generator.py @@ -210,7 +210,7 @@ def validate_and_render_schema(): model_name_tokens = model_name.split("/") if len(model_name_tokens) < 2: raise Exception( - f"Invalid model name {model_name}. Please provide model name in the format /." + f"Invalid model name {model_name}. Please provide model name in the format / or /* for wildcards." ) provider = model_name_tokens[0].strip() diff --git a/config/supervisord.conf b/config/supervisord.conf index 812cbc0b..35923974 100644 --- a/config/supervisord.conf +++ b/config/supervisord.conf @@ -19,7 +19,7 @@ command=/bin/sh -c "\ uv run python -m planoai.config_generator && \ envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && \ envoy -c /etc/envoy.env_sub.yaml \ - --component-log-level wasm:debug \ + --component-log-level wasm:info \ --log-format '[%%Y-%%m-%%d %%T.%%e][%%l] %%v' 2>&1 | \ tee /var/log/envoy.log | \ while IFS= read -r line; do echo '[plano_logs]' \"$line\"; done" diff --git a/crates/Cargo.lock b/crates/Cargo.lock index f182439a..1dab5467 100644 --- a/crates/Cargo.lock +++ b/crates/Cargo.lock @@ -1042,6 +1042,7 @@ dependencies = [ "serde", "serde_json", "serde_with", + "serde_yaml", "thiserror 2.0.12", "ureq", "uuid", diff --git a/crates/hermesllm/Cargo.toml b/crates/hermesllm/Cargo.toml index 7bce647f..2658b36b 100644 --- a/crates/hermesllm/Cargo.toml +++ b/crates/hermesllm/Cargo.toml @@ -11,6 +11,7 @@ required-features = ["model-fetch"] [dependencies] serde = {version = "1.0.219", features = ["derive"]} serde_json = "1.0.140" +serde_yaml = "0.9.34-deprecated" serde_with = {version = "3.12.0", features = ["base64"]} thiserror = "2.0.12" aws-smithy-eventstream = "0.60" diff --git a/crates/hermesllm/src/bin/fetch_models.rs b/crates/hermesllm/src/bin/fetch_models.rs index 5ddbd69c..0f473d4c 100644 --- a/crates/hermesllm/src/bin/fetch_models.rs +++ b/crates/hermesllm/src/bin/fetch_models.rs @@ -1,4 +1,4 @@ -// Fetch latest provider models from canonical provider APIs and update provider_models.json +// Fetch latest provider models from canonical provider APIs and update provider_models.yaml // Usage: // Optional: OPENAI_API_KEY, ANTHROPIC_API_KEY, DEEPSEEK_API_KEY, GROK_API_KEY, // DASHSCOPE_API_KEY, MOONSHOT_API_KEY, ZHIPU_API_KEY, GOOGLE_API_KEY @@ -13,7 +13,7 @@ fn main() { let default_path = std::path::Path::new(file!()) .parent() .unwrap() - .join("provider_models.json"); + .join("provider_models.yaml"); let output_path = std::env::args() .nth(1) @@ -23,9 +23,9 @@ fn main() { match fetch_all_models() { Ok(models) => { - let json = serde_json::to_string_pretty(&models).expect("Failed to serialize models"); + let yaml = serde_yaml::to_string(&models).expect("Failed to serialize models"); - std::fs::write(&output_path, json).expect("Failed to write provider_models.json"); + std::fs::write(&output_path, yaml).expect("Failed to write provider_models.yaml"); println!( "✓ Successfully updated {} providers ({} models) to {}", diff --git a/crates/hermesllm/src/bin/provider_models.json b/crates/hermesllm/src/bin/provider_models.json deleted file mode 100644 index f0f03ca4..00000000 --- a/crates/hermesllm/src/bin/provider_models.json +++ /dev/null @@ -1,327 +0,0 @@ -{ - "version": "1.0", - "source": "canonical-apis", - "providers": { - "google": [ - "google/gemini-2.5-flash", - "google/gemini-2.5-pro", - "google/gemini-2.0-flash-exp", - "google/gemini-2.0-flash", - "google/gemini-2.0-flash-001", - "google/gemini-2.0-flash-exp-image-generation", - "google/gemini-2.0-flash-lite-001", - "google/gemini-2.0-flash-lite", - "google/gemini-2.0-flash-lite-preview-02-05", - "google/gemini-2.0-flash-lite-preview", - "google/gemini-exp-1206", - "google/gemini-2.5-flash-preview-tts", - "google/gemini-2.5-pro-preview-tts", - "google/gemma-3-1b-it", - "google/gemma-3-4b-it", - "google/gemma-3-12b-it", - "google/gemma-3-27b-it", - "google/gemma-3n-e4b-it", - "google/gemma-3n-e2b-it", - "google/gemini-flash-latest", - "google/gemini-flash-lite-latest", - "google/gemini-pro-latest", - "google/gemini-2.5-flash-lite", - "google/gemini-2.5-flash-image", - "google/gemini-2.5-flash-preview-09-2025", - "google/gemini-2.5-flash-lite-preview-09-2025", - "google/gemini-3-pro-preview", - "google/gemini-3-flash-preview", - "google/gemini-3-pro-image-preview", - "google/nano-banana-pro-preview", - "google/gemini-robotics-er-1.5-preview", - "google/gemini-2.5-computer-use-preview-10-2025", - "google/deep-research-pro-preview-12-2025" - ], - "qwen": [ - "qwen/qwen-plus-character", - "qwen/qwen-flash-character", - "qwen/qwen-flash", - "qwen/qwen3-vl-plus-2025-12-19", - "qwen/qwen3-omni-flash-2025-12-01", - "qwen/qwen3-livetranslate-flash-2025-12-01", - "qwen/qwen3-livetranslate-flash", - "qwen/qwen-mt-lite", - "qwen/qwen-plus-2025-12-01", - "qwen/qwen-mt-flash", - "qwen/ccai-pro", - "qwen/tongyi-tingwu-slp", - "qwen/qwen3-vl-flash", - "qwen/qwen3-vl-flash-2025-10-15", - "qwen/qwen3-omni-flash", - "qwen/qwen3-omni-flash-2025-09-15", - "qwen/qwen3-omni-30b-a3b-captioner", - "qwen/qwen2.5-7b-instruct", - "qwen/qwen2.5-14b-instruct", - "qwen/qwen2.5-32b-instruct", - "qwen/qwen2.5-72b-instruct", - "qwen/qwen2.5-14b-instruct-1m", - "qwen/qwen2.5-7b-instruct-1m", - "qwen/qwen-max-2025-01-25", - "qwen/qwen-max-latest", - "qwen/qwen-turbo-2024-11-01", - "qwen/qwen-turbo-latest", - "qwen/qwen-plus-latest", - "qwen/qwen-plus-2025-01-25", - "qwen/qwq-plus-2025-03-05", - "qwen/qwen-mt-turbo", - "qwen/qwen-mt-plus", - "qwen/qwen-coder-plus", - "qwen/qwq-plus", - "qwen/qwen2.5-vl-32b-instruct", - "qwen/qvq-max", - "qwen/qwen-omni-turbo", - "qwen/qwen3-8b", - "qwen/qwen3-30b-a3b", - "qwen/qwen3-235b-a22b", - "qwen/qwen-turbo-2025-04-28", - "qwen/qwen-plus-2025-04-28", - "qwen/qwen-vl-max-2025-04-08", - "qwen/qwen-vl-plus-2025-01-25", - "qwen/qwen-vl-plus-latest", - "qwen/qwen-vl-max-latest", - "qwen/qwen-vl-plus-2025-05-07", - "qwen/qwen3-coder-plus", - "qwen/qwen3-coder-480b-a35b-instruct", - "qwen/qwen3-235b-a22b-instruct-2507", - "qwen/qwen-plus-2025-07-14", - "qwen/qwen3-coder-plus-2025-07-22", - "qwen/qwen3-235b-a22b-thinking-2507", - "qwen/qwen3-coder-flash", - "qwen/qwen-vl-max", - "qwen/qwen-vl-max-2025-08-13", - "qwen/qwen3-max", - "qwen/qwen3-max-2025-09-23", - "qwen/qwen3-vl-plus", - "qwen/qwen3-vl-235b-a22b-instruct", - "qwen/qwen3-vl-235b-a22b-thinking", - "qwen/qwen3-30b-a3b-thinking-2507", - "qwen/qwen3-30b-a3b-instruct-2507", - "qwen/qwen3-14b", - "qwen/qwen3-32b", - "qwen/qwen3-0.6b", - "qwen/qwen3-4b", - "qwen/qwen3-1.7b", - "qwen/qwen-vl-plus", - "qwen/qwen3-coder-plus-2025-09-23", - "qwen/qwen3-vl-plus-2025-09-23", - "qwen/qwen-plus-2025-09-11", - "qwen/qwen3-next-80b-a3b-thinking", - "qwen/qwen3-next-80b-a3b-instruct", - "qwen/qwen3-max-preview", - "qwen/qwen2-7b-instruct", - "qwen/qwen-max", - "qwen/qwen-plus", - "qwen/qwen-turbo" - ], - "mistralai": [ - "mistralai/mistral-medium-2505", - "mistralai/mistral-medium-2508", - "mistralai/mistral-medium-latest", - "mistralai/mistral-medium", - "mistralai/open-mistral-nemo", - "mistralai/open-mistral-nemo-2407", - "mistralai/mistral-tiny-2407", - "mistralai/mistral-tiny-latest", - "mistralai/mistral-large-2411", - "mistralai/pixtral-large-2411", - "mistralai/pixtral-large-latest", - "mistralai/mistral-large-pixtral-2411", - "mistralai/codestral-2508", - "mistralai/codestral-latest", - "mistralai/devstral-small-2507", - "mistralai/devstral-medium-2507", - "mistralai/devstral-2512", - "mistralai/mistral-vibe-cli-latest", - "mistralai/devstral-medium-latest", - "mistralai/devstral-latest", - "mistralai/labs-devstral-small-2512", - "mistralai/devstral-small-latest", - "mistralai/mistral-small-2506", - "mistralai/mistral-small-latest", - "mistralai/labs-mistral-small-creative", - "mistralai/magistral-medium-2509", - "mistralai/magistral-medium-latest", - "mistralai/magistral-small-2509", - "mistralai/magistral-small-latest", - "mistralai/mistral-large-2512", - "mistralai/mistral-large-latest", - "mistralai/ministral-3b-2512", - "mistralai/ministral-3b-latest", - "mistralai/ministral-8b-2512", - "mistralai/ministral-8b-latest", - "mistralai/ministral-14b-2512", - "mistralai/ministral-14b-latest", - "mistralai/open-mistral-7b", - "mistralai/mistral-tiny", - "mistralai/mistral-tiny-2312", - "mistralai/pixtral-12b-2409", - "mistralai/pixtral-12b", - "mistralai/pixtral-12b-latest", - "mistralai/ministral-3b-2410", - "mistralai/ministral-8b-2410", - "mistralai/codestral-2501", - "mistralai/codestral-2412", - "mistralai/codestral-2411-rc5", - "mistralai/mistral-small-2501", - "mistralai/mistral-embed-2312", - "mistralai/mistral-embed", - "mistralai/codestral-embed", - "mistralai/codestral-embed-2505" - ], - "amazon": [ - "amazon/amazon.nova-pro-v1:0", - "amazon/amazon.nova-2-lite-v1:0", - "amazon/amazon.nova-2-sonic-v1:0", - "amazon/amazon.titan-tg1-large", - "amazon/amazon.nova-premier-v1:0:8k", - "amazon/amazon.nova-premier-v1:0:20k", - "amazon/amazon.nova-premier-v1:0:1000k", - "amazon/amazon.nova-premier-v1:0:mm", - "amazon/amazon.nova-premier-v1:0", - "amazon/amazon.nova-lite-v1:0", - "amazon/amazon.nova-micro-v1:0" - ], - "deepseek": [ - "deepseek/deepseek-chat", - "deepseek/deepseek-reasoner" - ], - "openai": [ - "openai/gpt-4-0613", - "openai/gpt-4", - "openai/gpt-3.5-turbo", - "openai/gpt-5.2-codex", - "openai/gpt-3.5-turbo-instruct", - "openai/gpt-3.5-turbo-instruct-0914", - "openai/gpt-4-1106-preview", - "openai/gpt-3.5-turbo-1106", - "openai/gpt-4-0125-preview", - "openai/gpt-4-turbo-preview", - "openai/gpt-3.5-turbo-0125", - "openai/gpt-4-turbo", - "openai/gpt-4-turbo-2024-04-09", - "openai/gpt-4o", - "openai/gpt-4o-2024-05-13", - "openai/gpt-4o-mini-2024-07-18", - "openai/gpt-4o-mini", - "openai/gpt-4o-2024-08-06", - "openai/chatgpt-4o-latest", - "openai/o1-2024-12-17", - "openai/o1", - "openai/computer-use-preview", - "openai/o3-mini", - "openai/o3-mini-2025-01-31", - "openai/gpt-4o-2024-11-20", - "openai/computer-use-preview-2025-03-11", - "openai/gpt-4o-search-preview-2025-03-11", - "openai/gpt-4o-search-preview", - "openai/gpt-4o-mini-search-preview-2025-03-11", - "openai/gpt-4o-mini-search-preview", - "openai/o1-pro-2025-03-19", - "openai/o1-pro", - "openai/o3-2025-04-16", - "openai/o4-mini-2025-04-16", - "openai/o3", - "openai/o4-mini", - "openai/gpt-4.1-2025-04-14", - "openai/gpt-4.1", - "openai/gpt-4.1-mini-2025-04-14", - "openai/gpt-4.1-mini", - "openai/gpt-4.1-nano-2025-04-14", - "openai/gpt-4.1-nano", - "openai/codex-mini-latest", - "openai/o3-pro", - "openai/o3-pro-2025-06-10", - "openai/o4-mini-deep-research", - "openai/o3-deep-research", - "openai/o3-deep-research-2025-06-26", - "openai/o4-mini-deep-research-2025-06-26", - "openai/gpt-5-chat-latest", - "openai/gpt-5-2025-08-07", - "openai/gpt-5", - "openai/gpt-5-mini-2025-08-07", - "openai/gpt-5-mini", - "openai/gpt-5-nano-2025-08-07", - "openai/gpt-5-nano", - "openai/gpt-5-codex", - "openai/gpt-5-pro-2025-10-06", - "openai/gpt-5-pro", - "openai/gpt-5-search-api", - "openai/gpt-5-search-api-2025-10-14", - "openai/gpt-5.1-chat-latest", - "openai/gpt-5.1-2025-11-13", - "openai/gpt-5.1", - "openai/gpt-5.1-codex", - "openai/gpt-5.1-codex-mini", - "openai/gpt-5.1-codex-max", - "openai/gpt-5.2-2025-12-11", - "openai/gpt-5.2", - "openai/gpt-5.2-pro-2025-12-11", - "openai/gpt-5.2-pro", - "openai/gpt-5.2-chat-latest", - "openai/gpt-3.5-turbo-16k", - "openai/ft:gpt-3.5-turbo-0613:katanemo::8CMZbm0P" - ], - "x-ai": [ - "x-ai/grok-2-vision-1212", - "x-ai/grok-3", - "x-ai/grok-3-mini", - "x-ai/grok-4-0709", - "x-ai/grok-4-1-fast-non-reasoning", - "x-ai/grok-4-1-fast-reasoning", - "x-ai/grok-4-fast-non-reasoning", - "x-ai/grok-4-fast-reasoning", - "x-ai/grok-code-fast-1" - ], - "z-ai": [ - "z-ai/glm-4.5", - "z-ai/glm-4.5-air", - "z-ai/glm-4.6", - "z-ai/glm-4.7" - ], - "anthropic": [ - "anthropic/claude-opus-4-5-20251101", - "anthropic/claude-opus-4-5", - "anthropic/claude-haiku-4-5-20251001", - "anthropic/claude-haiku-4-5", - "anthropic/claude-sonnet-4-5-20250929", - "anthropic/claude-sonnet-4-5", - "anthropic/claude-opus-4-1-20250805", - "anthropic/claude-opus-4-1", - "anthropic/claude-opus-4-20250514", - "anthropic/claude-opus-4", - "anthropic/claude-sonnet-4-20250514", - "anthropic/claude-sonnet-4", - "anthropic/claude-3-7-sonnet-20250219", - "anthropic/claude-3-7-sonnet", - "anthropic/claude-3-5-haiku-20241022", - "anthropic/claude-3-5-haiku", - "anthropic/claude-3-haiku-20240307", - "anthropic/claude-3-haiku" - ], - "moonshotai": [ - "moonshotai/moonshot-v1-32k-vision-preview", - "moonshotai/moonshot-v1-32k", - "moonshotai/moonshot-v1-8k-vision-preview", - "moonshotai/moonshot-v1-128k", - "moonshotai/kimi-k2-turbo-preview", - "moonshotai/kimi-latest", - "moonshotai/kimi-k2-thinking-turbo", - "moonshotai/moonshot-v1-auto", - "moonshotai/moonshot-v1-128k-vision-preview", - "moonshotai/kimi-k2-0905-preview", - "moonshotai/kimi-k2-0711-preview", - "moonshotai/kimi-k2-thinking", - "moonshotai/moonshot-v1-8k" - ] - }, - "metadata": { - "total_providers": 10, - "total_models": 296, - "last_updated": "2026-01-24T21:56:34.611335+00:00" - } -} diff --git a/crates/hermesllm/src/bin/provider_models.yaml b/crates/hermesllm/src/bin/provider_models.yaml new file mode 100644 index 00000000..c9d1ab86 --- /dev/null +++ b/crates/hermesllm/src/bin/provider_models.yaml @@ -0,0 +1,315 @@ +version: '1.0' +source: canonical-apis +providers: + qwen: + - qwen/qwen3-max-2026-01-23 + - qwen/qwen-plus-character + - qwen/qwen-flash-character + - qwen/qwen-flash + - qwen/qwen3-vl-plus-2025-12-19 + - qwen/qwen3-omni-flash-2025-12-01 + - qwen/qwen3-livetranslate-flash-2025-12-01 + - qwen/qwen3-livetranslate-flash + - qwen/qwen-mt-lite + - qwen/qwen-plus-2025-12-01 + - qwen/qwen-mt-flash + - qwen/ccai-pro + - qwen/tongyi-tingwu-slp + - qwen/qwen3-vl-flash + - qwen/qwen3-vl-flash-2025-10-15 + - qwen/qwen3-omni-flash + - qwen/qwen3-omni-flash-2025-09-15 + - qwen/qwen3-omni-30b-a3b-captioner + - qwen/qwen2.5-7b-instruct + - qwen/qwen2.5-14b-instruct + - qwen/qwen2.5-32b-instruct + - qwen/qwen2.5-72b-instruct + - qwen/qwen2.5-14b-instruct-1m + - qwen/qwen2.5-7b-instruct-1m + - qwen/qwen-max-2025-01-25 + - qwen/qwen-max-latest + - qwen/qwen-turbo-2024-11-01 + - qwen/qwen-turbo-latest + - qwen/qwen-plus-latest + - qwen/qwen-plus-2025-01-25 + - qwen/qwq-plus-2025-03-05 + - qwen/qwen-mt-turbo + - qwen/qwen-mt-plus + - qwen/qwen-coder-plus + - qwen/qwq-plus + - qwen/qwen2.5-vl-32b-instruct + - qwen/qvq-max + - qwen/qwen-omni-turbo + - qwen/qwen3-8b + - qwen/qwen3-30b-a3b + - qwen/qwen3-235b-a22b + - qwen/qwen-turbo-2025-04-28 + - qwen/qwen-plus-2025-04-28 + - qwen/qwen-vl-max-2025-04-08 + - qwen/qwen-vl-plus-2025-01-25 + - qwen/qwen-vl-plus-latest + - qwen/qwen-vl-max-latest + - qwen/qwen-vl-plus-2025-05-07 + - qwen/qwen3-coder-plus + - qwen/qwen3-coder-480b-a35b-instruct + - qwen/qwen3-235b-a22b-instruct-2507 + - qwen/qwen-plus-2025-07-14 + - qwen/qwen3-coder-plus-2025-07-22 + - qwen/qwen3-235b-a22b-thinking-2507 + - qwen/qwen3-coder-flash + - qwen/qwen-vl-max + - qwen/qwen-vl-max-2025-08-13 + - qwen/qwen3-max + - qwen/qwen3-max-2025-09-23 + - qwen/qwen3-vl-plus + - qwen/qwen3-vl-235b-a22b-instruct + - qwen/qwen3-vl-235b-a22b-thinking + - qwen/qwen3-30b-a3b-thinking-2507 + - qwen/qwen3-30b-a3b-instruct-2507 + - qwen/qwen3-14b + - qwen/qwen3-32b + - qwen/qwen3-0.6b + - qwen/qwen3-4b + - qwen/qwen3-1.7b + - qwen/qwen-vl-plus + - qwen/qwen3-coder-plus-2025-09-23 + - qwen/qwen3-vl-plus-2025-09-23 + - qwen/qwen-plus-2025-09-11 + - qwen/qwen3-next-80b-a3b-thinking + - qwen/qwen3-next-80b-a3b-instruct + - qwen/qwen3-max-preview + - qwen/qwen2-7b-instruct + - qwen/qwen-max + - qwen/qwen-plus + - qwen/qwen-turbo + openai: + - openai/gpt-4-0613 + - openai/gpt-4 + - openai/gpt-3.5-turbo + - openai/gpt-5.2-codex + - openai/gpt-3.5-turbo-instruct + - openai/gpt-3.5-turbo-instruct-0914 + - openai/gpt-4-1106-preview + - openai/gpt-3.5-turbo-1106 + - openai/gpt-4-0125-preview + - openai/gpt-4-turbo-preview + - openai/gpt-3.5-turbo-0125 + - openai/gpt-4-turbo + - openai/gpt-4-turbo-2024-04-09 + - openai/gpt-4o + - openai/gpt-4o-2024-05-13 + - openai/gpt-4o-mini-2024-07-18 + - openai/gpt-4o-mini + - openai/gpt-4o-2024-08-06 + - openai/chatgpt-4o-latest + - openai/o1-2024-12-17 + - openai/o1 + - openai/computer-use-preview + - openai/o3-mini + - openai/o3-mini-2025-01-31 + - openai/gpt-4o-2024-11-20 + - openai/computer-use-preview-2025-03-11 + - openai/gpt-4o-search-preview-2025-03-11 + - openai/gpt-4o-search-preview + - openai/gpt-4o-mini-search-preview-2025-03-11 + - openai/gpt-4o-mini-search-preview + - openai/o1-pro-2025-03-19 + - openai/o1-pro + - openai/o3-2025-04-16 + - openai/o4-mini-2025-04-16 + - openai/o3 + - openai/o4-mini + - openai/gpt-4.1-2025-04-14 + - openai/gpt-4.1 + - openai/gpt-4.1-mini-2025-04-14 + - openai/gpt-4.1-mini + - openai/gpt-4.1-nano-2025-04-14 + - openai/gpt-4.1-nano + - openai/codex-mini-latest + - openai/o3-pro + - openai/o3-pro-2025-06-10 + - openai/o4-mini-deep-research + - openai/o3-deep-research + - openai/o3-deep-research-2025-06-26 + - openai/o4-mini-deep-research-2025-06-26 + - openai/gpt-5-chat-latest + - openai/gpt-5-2025-08-07 + - openai/gpt-5 + - openai/gpt-5-mini-2025-08-07 + - openai/gpt-5-mini + - openai/gpt-5-nano-2025-08-07 + - openai/gpt-5-nano + - openai/gpt-5-codex + - openai/gpt-5-pro-2025-10-06 + - openai/gpt-5-pro + - openai/gpt-5-search-api + - openai/gpt-5-search-api-2025-10-14 + - openai/gpt-5.1-chat-latest + - openai/gpt-5.1-2025-11-13 + - openai/gpt-5.1 + - openai/gpt-5.1-codex + - openai/gpt-5.1-codex-mini + - openai/gpt-5.1-codex-max + - openai/gpt-5.2-2025-12-11 + - openai/gpt-5.2 + - openai/gpt-5.2-pro-2025-12-11 + - openai/gpt-5.2-pro + - openai/gpt-5.2-chat-latest + - openai/gpt-3.5-turbo-16k + - openai/ft:gpt-3.5-turbo-0613:katanemo::8CMZbm0P + google: + - google/gemini-2.5-flash + - google/gemini-2.5-pro + - google/gemini-2.0-flash-exp + - google/gemini-2.0-flash + - google/gemini-2.0-flash-001 + - google/gemini-2.0-flash-exp-image-generation + - google/gemini-2.0-flash-lite-001 + - google/gemini-2.0-flash-lite + - google/gemini-2.0-flash-lite-preview-02-05 + - google/gemini-2.0-flash-lite-preview + - google/gemini-exp-1206 + - google/gemini-2.5-flash-preview-tts + - google/gemini-2.5-pro-preview-tts + - google/gemma-3-1b-it + - google/gemma-3-4b-it + - google/gemma-3-12b-it + - google/gemma-3-27b-it + - google/gemma-3n-e4b-it + - google/gemma-3n-e2b-it + - google/gemini-flash-latest + - google/gemini-flash-lite-latest + - google/gemini-pro-latest + - google/gemini-2.5-flash-lite + - google/gemini-2.5-flash-image + - google/gemini-2.5-flash-preview-09-2025 + - google/gemini-2.5-flash-lite-preview-09-2025 + - google/gemini-3-pro-preview + - google/gemini-3-flash-preview + - google/gemini-3-pro-image-preview + - google/nano-banana-pro-preview + - google/gemini-robotics-er-1.5-preview + - google/gemini-2.5-computer-use-preview-10-2025 + - google/deep-research-pro-preview-12-2025 + mistralai: + - mistralai/mistral-medium-2505 + - mistralai/mistral-medium-2508 + - mistralai/mistral-medium-latest + - mistralai/mistral-medium + - mistralai/open-mistral-nemo + - mistralai/open-mistral-nemo-2407 + - mistralai/mistral-tiny-2407 + - mistralai/mistral-tiny-latest + - mistralai/mistral-large-2411 + - mistralai/pixtral-large-2411 + - mistralai/pixtral-large-latest + - mistralai/mistral-large-pixtral-2411 + - mistralai/codestral-2508 + - mistralai/codestral-latest + - mistralai/devstral-small-2507 + - mistralai/devstral-medium-2507 + - mistralai/devstral-2512 + - mistralai/mistral-vibe-cli-latest + - mistralai/devstral-medium-latest + - mistralai/devstral-latest + - mistralai/labs-devstral-small-2512 + - mistralai/devstral-small-latest + - mistralai/mistral-small-2506 + - mistralai/mistral-small-latest + - mistralai/labs-mistral-small-creative + - mistralai/magistral-medium-2509 + - mistralai/magistral-medium-latest + - mistralai/magistral-small-2509 + - mistralai/magistral-small-latest + - mistralai/mistral-large-2512 + - mistralai/mistral-large-latest + - mistralai/ministral-3b-2512 + - mistralai/ministral-3b-latest + - mistralai/ministral-8b-2512 + - mistralai/ministral-8b-latest + - mistralai/ministral-14b-2512 + - mistralai/ministral-14b-latest + - mistralai/open-mistral-7b + - mistralai/mistral-tiny + - mistralai/mistral-tiny-2312 + - mistralai/pixtral-12b-2409 + - mistralai/pixtral-12b + - mistralai/pixtral-12b-latest + - mistralai/ministral-3b-2410 + - mistralai/ministral-8b-2410 + - mistralai/codestral-2501 + - mistralai/codestral-2412 + - mistralai/codestral-2411-rc5 + - mistralai/mistral-small-2501 + - mistralai/mistral-embed-2312 + - mistralai/mistral-embed + - mistralai/codestral-embed + - mistralai/codestral-embed-2505 + z-ai: + - z-ai/glm-4.5 + - z-ai/glm-4.5-air + - z-ai/glm-4.6 + - z-ai/glm-4.7 + amazon: + - amazon/amazon.nova-pro-v1:0 + - amazon/amazon.nova-2-lite-v1:0 + - amazon/amazon.nova-2-sonic-v1:0 + - amazon/amazon.titan-tg1-large + - amazon/amazon.nova-premier-v1:0:8k + - amazon/amazon.nova-premier-v1:0:20k + - amazon/amazon.nova-premier-v1:0:1000k + - amazon/amazon.nova-premier-v1:0:mm + - amazon/amazon.nova-premier-v1:0 + - amazon/amazon.nova-lite-v1:0 + - amazon/amazon.nova-micro-v1:0 + deepseek: + - deepseek/deepseek-chat + - deepseek/deepseek-reasoner + x-ai: + - x-ai/grok-2-vision-1212 + - x-ai/grok-3 + - x-ai/grok-3-mini + - x-ai/grok-4-0709 + - x-ai/grok-4-1-fast-non-reasoning + - x-ai/grok-4-1-fast-reasoning + - x-ai/grok-4-fast-non-reasoning + - x-ai/grok-4-fast-reasoning + - x-ai/grok-code-fast-1 + moonshotai: + - moonshotai/kimi-latest + - moonshotai/kimi-k2.5 + - moonshotai/moonshot-v1-8k-vision-preview + - moonshotai/kimi-k2-thinking + - moonshotai/moonshot-v1-auto + - moonshotai/kimi-k2-0711-preview + - moonshotai/moonshot-v1-32k + - moonshotai/kimi-k2-thinking-turbo + - moonshotai/kimi-k2-0905-preview + - moonshotai/moonshot-v1-128k + - moonshotai/moonshot-v1-32k-vision-preview + - moonshotai/moonshot-v1-128k-vision-preview + - moonshotai/kimi-k2-turbo-preview + - moonshotai/moonshot-v1-8k + anthropic: + - anthropic/claude-opus-4-5-20251101 + - anthropic/claude-opus-4-5 + - anthropic/claude-haiku-4-5-20251001 + - anthropic/claude-haiku-4-5 + - anthropic/claude-sonnet-4-5-20250929 + - anthropic/claude-sonnet-4-5 + - anthropic/claude-opus-4-1-20250805 + - anthropic/claude-opus-4-1 + - anthropic/claude-opus-4-20250514 + - anthropic/claude-opus-4 + - anthropic/claude-sonnet-4-20250514 + - anthropic/claude-sonnet-4 + - anthropic/claude-3-7-sonnet-20250219 + - anthropic/claude-3-7-sonnet + - anthropic/claude-3-5-haiku-20241022 + - anthropic/claude-3-5-haiku + - anthropic/claude-3-haiku-20240307 + - anthropic/claude-3-haiku +metadata: + total_providers: 10 + total_models: 298 + last_updated: 2026-01-27T22:40:53.653700+00:00 diff --git a/crates/hermesllm/src/providers/id.rs b/crates/hermesllm/src/providers/id.rs index 90223963..fff73f15 100644 --- a/crates/hermesllm/src/providers/id.rs +++ b/crates/hermesllm/src/providers/id.rs @@ -1,34 +1,26 @@ use crate::apis::{AmazonBedrockApi, AnthropicApi, OpenAIApi}; use crate::clients::endpoints::{SupportedAPIsFromClient, SupportedUpstreamAPIs}; +use serde::Deserialize; use std::collections::HashMap; use std::fmt::Display; use std::sync::OnceLock; -static PROVIDER_MODELS_JSON: &str = include_str!("../bin/provider_models.json"); +static PROVIDER_MODELS_YAML: &str = include_str!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/src/bin/provider_models.yaml" +)); + +#[derive(Deserialize)] +struct ProviderModelsFile { + providers: HashMap>, +} fn load_provider_models() -> &'static HashMap> { static MODELS: OnceLock>> = OnceLock::new(); MODELS.get_or_init(|| { - let data: serde_json::Value = serde_json::from_str(PROVIDER_MODELS_JSON) - .expect("Failed to parse provider_models.json"); - - let providers = data - .get("providers") - .expect("Missing 'providers' key") - .as_object() - .expect("'providers' must be an object"); - - let mut result = HashMap::new(); - for (provider, models) in providers { - let model_list: Vec = models - .as_array() - .expect("Models must be an array") - .iter() - .map(|m| m.as_str().expect("Model must be a string").to_string()) - .collect(); - result.insert(provider.clone(), model_list); - } - result + let ProviderModelsFile { providers } = serde_yaml::from_str(PROVIDER_MODELS_YAML) + .expect("Failed to parse provider_models.yaml"); + providers }) } @@ -244,7 +236,7 @@ mod tests { use super::*; #[test] - fn test_models_loaded_from_json() { + fn test_models_loaded_from_yaml() { // Test that we can load models for each supported provider let openai_models = ProviderId::OpenAI.models(); assert!(!openai_models.is_empty(), "OpenAI should have models"); diff --git a/docs/Dockerfile b/docs/Dockerfile index 1f0961c0..1ac171d8 100644 --- a/docs/Dockerfile +++ b/docs/Dockerfile @@ -1,6 +1,9 @@ FROM sphinxdoc/sphinx WORKDIR /docs -ADD requirements.txt /docs +ADD docs/requirements.txt /docs RUN python3 -m pip install -r requirements.txt RUN pip freeze + +# Copy provider_models.yaml from the repo for documentation +COPY crates/hermesllm/src/bin/provider_models.yaml /docs/provider_models.yaml diff --git a/docs/build_docs.sh b/docs/build_docs.sh index 7754ef0d..88d2802a 100644 --- a/docs/build_docs.sh +++ b/docs/build_docs.sh @@ -1,4 +1,19 @@ -docker build -f Dockerfile . -t sphinx -docker run --user $(id -u):$(id -g) --rm -v $(pwd):/docs sphinx make clean -docker run --user $(id -u):$(id -g) --rm -v $(pwd):/docs sphinx make html -chmod -R 777 build/html +docker build -f docs/Dockerfile . -t sphinx + +# Clean build output locally +rm -rf docs/build + +# Run make clean/html while keeping provider_models.yaml from the image +docker run --user $(id -u):$(id -g) --rm \ + -v $(pwd)/docs/source:/docs/source \ + -v $(pwd)/docs/Makefile:/docs/Makefile \ + -v $(pwd)/docs/build:/docs/build \ + sphinx make clean + +docker run --user $(id -u):$(id -g) --rm \ + -v $(pwd)/docs/source:/docs/source \ + -v $(pwd)/docs/Makefile:/docs/Makefile \ + -v $(pwd)/docs/build:/docs/build \ + sphinx make html + +chmod -R 777 docs/build/html diff --git a/docs/source/_ext/provider_models.py b/docs/source/_ext/provider_models.py new file mode 100644 index 00000000..9b7451c5 --- /dev/null +++ b/docs/source/_ext/provider_models.py @@ -0,0 +1,44 @@ +"""Sphinx extension to copy provider_models.yaml to build output.""" +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING +import shutil + +if TYPE_CHECKING: + from sphinx.application import Sphinx + + +def _on_build_finished(app: Sphinx, exception: Exception | None) -> None: + """Copy provider_models.yaml to the build output after build completes.""" + if exception is not None: + return + + # Only generate for HTML-like builders where app.outdir is a website root. + if getattr(app.builder, "format", None) != "html": + return + + # Source path: provider_models.yaml is copied into the Docker image at /docs/provider_models.yaml + # This follows the pattern used for config templates like envoy.template.yaml and arch_config_schema.yaml + docs_root = Path(app.srcdir).parent # Goes from source/ to docs/ + source_path = docs_root / "provider_models.yaml" + + if not source_path.exists(): + # Silently skip if source file doesn't exist + return + + # Per repo convention, place generated artifacts under an `includes/` folder. + out_path = Path(app.outdir) / "includes" / "provider_models.yaml" + out_path.parent.mkdir(parents=True, exist_ok=True) + + shutil.copy2(source_path, out_path) + + +def setup(app: Sphinx) -> dict[str, object]: + """Register the extension with Sphinx.""" + app.connect("build-finished", _on_build_finished) + return { + "version": "0.1.0", + "parallel_read_safe": True, + "parallel_write_safe": True, + } diff --git a/docs/source/concepts/llm_providers/supported_providers.rst b/docs/source/concepts/llm_providers/supported_providers.rst index d8e9578c..f3f77b5f 100644 --- a/docs/source/concepts/llm_providers/supported_providers.rst +++ b/docs/source/concepts/llm_providers/supported_providers.rst @@ -738,6 +738,7 @@ Automatically configure all available models from a provider using wildcard patt - Expands at config load time to all models in Plano's provider registry - Creates entries for both canonical (``openai/gpt-4``) and short names (``gpt-4``) - Enables the ``/v1/models`` endpoint to list all available models + - **View complete model list**: `provider_models.yaml <../../includes/provider_models.yaml>`_ 2. **Unknown/Custom Providers** (e.g., ``custom-provider/*``): diff --git a/docs/source/conf.py b/docs/source/conf.py index c6e56b43..ff54d4fa 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -38,6 +38,7 @@ extensions = [ "sphinx_design", # Local extensions "llms_txt", + "provider_models", ] # Paths that contain templates, relative to this directory.