fixing bugs related to default model provider, provider hint and duplicates in the model provider list

2026-06-17 15:25:17 +02:00 · 2026-01-22 17:00:00 -08:00 · 2026-01-22 17:00:00 -08:00 · 541099474f
commit 541099474f
parent 2c084ef6f7
21 changed files with 819 additions and 385 deletions
--- a/config/supervisord.conf
+++ b/config/supervisord.conf
@ -4,7 +4,7 @@ nodaemon=true
 [program:brightstaff]
 command=sh -c "\
    envsubst < /app/arch_config_rendered.yaml > /app/arch_config_rendered.env_sub.yaml && \
-    RUST_LOG=info \
+    RUST_LOG=debug \
    ARCH_CONFIG_PATH_RENDERED=/app/arch_config_rendered.env_sub.yaml \
    /app/brightstaff 2>&1 | \
    tee /var/log/brightstaff.log | \
@ -19,7 +19,7 @@ command=/bin/sh -c "\
    uv run python -m planoai.config_generator && \
    envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && \
    envoy -c /etc/envoy.env_sub.yaml \
-          --component-log-level wasm:info \
+          --component-log-level wasm:debug \
          --log-format '[%%Y-%%m-%%d %%T.%%e][%%l] %%v' 2>&1 | \
    tee /var/log/envoy.log | \
    while IFS= read -r line; do echo '[plano_logs]' \"$line\"; done"
--- a/crates/brightstaff/src/handlers/llm.rs
+++ b/crates/brightstaff/src/handlers/llm.rs
@ -1,8 +1,9 @@
 use bytes::Bytes;
-use common::configuration::{LlmProvider, ModelAlias};
+use common::configuration::ModelAlias;
 use common::consts::{
    ARCH_IS_STREAMING_HEADER, ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER, TRACE_PARENT_HEADER,
 };
+use common::llm_providers::LlmProviders;
 use common::traces::TraceCollector;
 use hermesllm::apis::openai_responses::InputParam;
 use hermesllm::clients::{SupportedAPIsFromClient, SupportedUpstreamAPIs};
@ -38,7 +39,7 @@ pub async fn llm_chat(
    router_service: Arc<RouterService>,
    full_qualified_llm_provider_url: String,
    model_aliases: Arc<Option<HashMap<String, ModelAlias>>>,
-    llm_providers: Arc<RwLock<Vec<LlmProvider>>>,
+    llm_providers: Arc<RwLock<LlmProviders>>,
    trace_collector: Arc<TraceCollector>,
    state_storage: Option<Arc<dyn StateStorage>>,
 ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
@ -123,6 +124,19 @@ pub async fn llm_chat(
    let is_streaming_request = client_request.is_streaming();
    let resolved_model = resolve_model_alias(&model_from_request, &model_aliases);

+    // Validate that the requested model exists in configuration
+    // This matches the validation in llm_gateway routing.rs
+    if llm_providers.read().await.get(&resolved_model).is_none() {
+        let err_msg = format!(
+            "Model '{}' not found in configured providers",
+            resolved_model
+        );
+        warn!("[PLANO_REQ_ID:{}] | FAILURE | {}", request_id, err_msg);
+        let mut bad_request = Response::new(full(err_msg));
+        *bad_request.status_mut() = StatusCode::BAD_REQUEST;
+        return Ok(bad_request);
+    }
+
    // Handle provider/model slug format (e.g., "openai/gpt-4")
    // Extract just the model name for upstream (providers don't understand the slug)
    let model_name_only = if let Some((_, model)) = resolved_model.split_once('/') {
@ -250,22 +264,25 @@ pub async fn llm_chat(
        }
    };

-    // Use the resolved model (could be "gpt-4" or "openai/gpt-4") as the provider hint
-    // The routing layer will use llm_providers.get() which handles both formats:
-    // - "gpt-4" → looks up by model name
-    // - "openai/gpt-4" → looks up by provider/model slug
-    // If router doesn't find anything, it will use routing_result.model_name
-    let provider_hint_value = resolved_model.clone();
-    let model_name = routing_result.model_name;
+    // Determine final model to use
+    // Router returns "none" as a sentinel value when it doesn't select a specific model
+    let router_selected_model = routing_result.model_name;
+    let model_name = if router_selected_model != "none" {
+        // Router selected a specific model via routing preferences
+        router_selected_model
+    } else {
+        // Router returned "none" sentinel, use validated resolved_model from request
+        resolved_model.clone()
+    };

    debug!(
        "[PLANO_REQ_ID:{}] | ARCH_ROUTER URL | {}, Provider Hint: {}, Model for upstream: {}",
-        request_id, full_qualified_llm_provider_url, provider_hint_value, model_name_only
+        request_id, full_qualified_llm_provider_url, model_name, model_name_only
    );

    request_headers.insert(
        ARCH_PROVIDER_HINT_HEADER,
-        header::HeaderValue::from_str(&provider_hint_value).unwrap(),
+        header::HeaderValue::from_str(&model_name).unwrap(),
    );

    request_headers.insert(
@ -405,7 +422,7 @@ async fn build_llm_span(
    tool_names: Option<Vec<String>>,
    user_message_preview: Option<String>,
    temperature: Option<f32>,
-    llm_providers: &Arc<RwLock<Vec<LlmProvider>>>,
+    llm_providers: &Arc<RwLock<LlmProviders>>,
 ) -> common::traces::Span {
    use crate::tracing::{http, llm, OperationNameBuilder};
    use common::traces::{parse_traceparent, SpanBuilder, SpanKind};
@ -478,7 +495,7 @@ async fn build_llm_span(
 /// Looks up provider configuration, gets the ProviderId and base_url_path_prefix,
 /// then uses target_endpoint_for_provider to calculate the correct upstream path.
 async fn get_upstream_path(
-    llm_providers: &Arc<RwLock<Vec<LlmProvider>>>,
+    llm_providers: &Arc<RwLock<LlmProviders>>,
    model_name: &str,
    request_path: &str,
    resolved_model: &str,
@ -501,25 +518,21 @@ async fn get_upstream_path(

 /// Helper function to get provider info (ProviderId and base_url_path_prefix)
 async fn get_provider_info(
-    llm_providers: &Arc<RwLock<Vec<LlmProvider>>>,
+    llm_providers: &Arc<RwLock<LlmProviders>>,
    model_name: &str,
 ) -> (hermesllm::ProviderId, Option<String>) {
    let providers_lock = llm_providers.read().await;

-    // First, try to find by model name or provider name
-    let provider = providers_lock.iter().find(|p| {
-        p.model.as_ref().map(|m| m == model_name).unwrap_or(false) || p.name == model_name
-    });
-
-    if let Some(provider) = provider {
+    // Try to find by model name or provider name using LlmProviders::get
+    // This handles both "gpt-4" and "openai/gpt-4" formats
+    if let Some(provider) = providers_lock.get(model_name) {
        let provider_id = provider.provider_interface.to_provider_id();
        let prefix = provider.base_url_path_prefix.clone();
        return (provider_id, prefix);
    }

-    let default_provider = providers_lock.iter().find(|p| p.default.unwrap_or(false));
-
-    if let Some(provider) = default_provider {
+    // Fall back to default provider
+    if let Some(provider) = providers_lock.default() {
        let provider_id = provider.provider_interface.to_provider_id();
        let prefix = provider.base_url_path_prefix.clone();
        (provider_id, prefix)
--- a/crates/brightstaff/src/handlers/models.rs
+++ b/crates/brightstaff/src/handlers/models.rs
@ -1,19 +1,17 @@
 use bytes::Bytes;
-use common::configuration::{IntoModels, LlmProvider};
-use hermesllm::apis::openai::Models;
+use common::llm_providers::LlmProviders;
 use http_body_util::{combinators::BoxBody, BodyExt, Full};
 use hyper::{Response, StatusCode};
 use serde_json;
 use std::sync::Arc;

 pub async fn list_models(
-    llm_providers: Arc<tokio::sync::RwLock<Vec<LlmProvider>>>,
+    llm_providers: Arc<tokio::sync::RwLock<LlmProviders>>,
 ) -> Response<BoxBody<Bytes, hyper::Error>> {
    let prov = llm_providers.read().await;
-    let providers = prov.clone();
-    let openai_models: Models = providers.into_models();
+    let models = prov.to_models();

-    match serde_json::to_string(&openai_models) {
+    match serde_json::to_string(&models) {
        Ok(json) => {
            let body = Full::new(Bytes::from(json))
                .map_err(|never| match never {})
--- a/crates/brightstaff/src/handlers/router_chat.rs
+++ b/crates/brightstaff/src/handlers/router_chat.rs
@ -151,16 +151,15 @@ pub async fn router_chat_get_upstream_model(
                Ok(RoutingResult { model_name })
            }
            None => {
-                // No route determined, use default model from request
+                // No route determined, return sentinel value "none"
+                // This signals to llm.rs to use the original validated request model
                info!(
-                    "[PLANO_REQ_ID: {}] | ROUTER_REQ | No route determined, using default model from request: {}",
-                    request_id,
-                    chat_request.model
+                    "[PLANO_REQ_ID: {}] | ROUTER_REQ | No route determined, returning sentinel 'none'",
+                    request_id
                );

-                let default_model = chat_request.model.clone();
                let mut attrs = HashMap::new();
-                attrs.insert("route.selected_model".to_string(), default_model.clone());
+                attrs.insert("route.selected_model".to_string(), "none".to_string());
                record_routing_span(
                    trace_collector,
                    traceparent,
@ -171,7 +170,7 @@ pub async fn router_chat_get_upstream_model(
                .await;

                Ok(RoutingResult {
-                    model_name: default_model,
+                    model_name: "none".to_string(),
                })
            }
        },
--- a/crates/brightstaff/src/main.rs
+++ b/crates/brightstaff/src/main.rs
@ -13,6 +13,7 @@ use common::configuration::{Agent, Configuration};
 use common::consts::{
    CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH, PLANO_ORCHESTRATOR_MODEL_NAME,
 };
+use common::llm_providers::LlmProviders;
 use common::traces::TraceCollector;
 use http_body_util::{combinators::BoxBody, BodyExt, Empty};
 use hyper::body::Incoming;
@ -76,7 +77,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
        .cloned()
        .collect();

-    let llm_providers = Arc::new(RwLock::new(arch_config.model_providers.clone()));
+    // Create expanded provider list for /v1/models endpoint
+    let llm_providers = LlmProviders::try_from(arch_config.model_providers.clone())
+        .expect("Failed to create LlmProviders");
+    let llm_providers = Arc::new(RwLock::new(llm_providers));
    let combined_agents_filters_list = Arc::new(RwLock::new(Some(all_agents)));
    let listeners = Arc::new(RwLock::new(arch_config.listeners.clone()));
    let llm_provider_url =
--- a/crates/common/src/llm_providers.rs
+++ b/crates/common/src/llm_providers.rs
@ -1,27 +1,54 @@
 use crate::configuration::LlmProvider;
 use hermesllm::providers::ProviderId;
 use std::collections::HashMap;
-use std::rc::Rc;
+use std::sync::Arc;

 #[derive(Debug)]
 pub struct LlmProviders {
-    providers: HashMap<String, Rc<LlmProvider>>,
-    default: Option<Rc<LlmProvider>>,
+    providers: HashMap<String, Arc<LlmProvider>>,
+    default: Option<Arc<LlmProvider>>,
    /// Wildcard providers: maps provider prefix to base provider config
    /// e.g., "openai" -> LlmProvider for "openai/*"
-    wildcard_providers: HashMap<String, Rc<LlmProvider>>,
+    wildcard_providers: HashMap<String, Arc<LlmProvider>>,
 }

 impl LlmProviders {
-    pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Rc<LlmProvider>> {
+    pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Arc<LlmProvider>> {
        self.providers.iter()
    }

-    pub fn default(&self) -> Option<Rc<LlmProvider>> {
+    pub fn default(&self) -> Option<Arc<LlmProvider>> {
        self.default.clone()
    }
+    /// Convert providers to OpenAI Models format for /v1/models endpoint
+    /// Filters out internal models and duplicate entries (backward compatibility aliases)
+    pub fn to_models(&self) -> hermesllm::apis::openai::Models {
+        use hermesllm::apis::openai::{ModelDetail, ModelObject, Models};

-    pub fn get(&self, name: &str) -> Option<Rc<LlmProvider>> {
+        let data: Vec<ModelDetail> = self
+            .providers
+            .iter()
+            .filter(|(key, provider)| {
+                // Exclude internal models
+                provider.internal != Some(true)
+                // Only include canonical entries (key matches provider name)
+                // This avoids duplicates from backward compatibility short names
+                && *key == &provider.name
+            })
+            .map(|(name, provider)| ModelDetail {
+                id: name.clone(),
+                object: Some("model".to_string()),
+                created: 0,
+                owned_by: provider.to_provider_id().to_string(),
+            })
+            .collect();
+
+        Models {
+            object: ModelObject::List,
+            data,
+        }
+    }
+    pub fn get(&self, name: &str) -> Option<Arc<LlmProvider>> {
        // First try exact match
        if let Some(provider) = self.providers.get(name).cloned() {
            return Some(provider);
@ -47,7 +74,7 @@ impl LlmProviders {
                // Create a new provider with the specific model from the slug
                let mut specific_provider = (**wildcard_provider).clone();
                specific_provider.model = Some(model_name.to_string());
-                return Some(Rc::new(specific_provider));
+                return Some(Arc::new(specific_provider));
            }
        }

@ -79,13 +106,40 @@ impl TryFrom<Vec<LlmProvider>> for LlmProviders {
            wildcard_providers: HashMap::new(),
        };

+        // Track specific (non-wildcard) provider names to detect true duplicates
+        let mut specific_provider_names = std::collections::HashSet::new();
+
+        // Track specific models that should be excluded from wildcard expansion
+        // Maps provider_prefix -> Set of model names (e.g., "anthropic" -> {"claude-sonnet-4-20250514"})
+        let mut specific_models_by_provider: HashMap<String, std::collections::HashSet<String>> =
+            HashMap::new();
+
+        // First pass: collect all specific model configurations
+        for llm_provider in &llm_providers_config {
+            let is_wildcard = llm_provider
+                .model
+                .as_ref()
+                .map(|m| m == "*" || m.ends_with("/*"))
+                .unwrap_or(false);
+
+            if !is_wildcard {
+                // Check if this is a provider/model format
+                if let Some((provider_prefix, model_name)) = llm_provider.name.split_once('/') {
+                    specific_models_by_provider
+                        .entry(provider_prefix.to_string())
+                        .or_default()
+                        .insert(model_name.to_string());
+                }
+            }
+        }
+
        for llm_provider in llm_providers_config {
-            let llm_provider: Rc<LlmProvider> = Rc::new(llm_provider);
+            let llm_provider: Arc<LlmProvider> = Arc::new(llm_provider);

            if llm_provider.default.unwrap_or_default() {
                match llm_providers.default {
                    Some(_) => return Err(LlmProvidersNewError::MoreThanOneDefault),
-                    None => llm_providers.default = Some(Rc::clone(&llm_provider)),
+                    None => llm_providers.default = Some(Arc::clone(&llm_provider)),
                }
            }

@ -109,20 +163,45 @@ impl TryFrom<Vec<LlmProvider>> for LlmProviders {

                llm_providers
                    .wildcard_providers
-                    .insert(provider_prefix.to_string(), Rc::clone(&llm_provider));
+                    .insert(provider_prefix.to_string(), Arc::clone(&llm_provider));

                // Try to expand wildcard using ProviderId models
                if let Ok(provider_id) = ProviderId::try_from(provider_prefix) {
                    let models = provider_id.models();
+
+                    // Get the set of specific models to exclude for this provider
+                    let models_to_exclude = specific_models_by_provider
+                        .get(provider_prefix)
+                        .cloned()
+                        .unwrap_or_default();
+
                    if !models.is_empty() {
+                        let excluded_count = models_to_exclude.len();
+                        let total_models = models.len();
+
                        log::info!(
-                            "Expanding wildcard provider '{}' to {} models",
+                            "Expanding wildcard provider '{}' to {} models{}",
                            provider_prefix,
-                            models.len()
+                            total_models - excluded_count,
+                            if excluded_count > 0 {
+                                format!(" (excluding {} specifically configured)", excluded_count)
+                            } else {
+                                String::new()
+                            }
                        );

-                        // Create a provider entry for each model
+                        // Create a provider entry for each model (except those specifically configured)
                        for model_name in models {
+                            // Skip this model if it has a specific configuration
+                            if models_to_exclude.contains(&model_name) {
+                                log::debug!(
+                                    "Skipping wildcard expansion for '{}/{}' - specific configuration exists",
+                                    provider_prefix,
+                                    model_name
+                                );
+                                continue;
+                            }
+
                            let full_model_id = format!("{}/{}", provider_prefix, model_name);

                            // Create a new provider with the specific model
@ -130,12 +209,12 @@ impl TryFrom<Vec<LlmProvider>> for LlmProviders {
                            expanded_provider.model = Some(model_name.clone());
                            expanded_provider.name = full_model_id.clone();

-                            let expanded_rc = Rc::new(expanded_provider);
+                            let expanded_rc = Arc::new(expanded_provider);

                            // Insert with full model ID as key
                            llm_providers
                                .providers
-                                .insert(full_model_id.clone(), Rc::clone(&expanded_rc));
+                                .insert(full_model_id.clone(), Arc::clone(&expanded_rc));

                            // Also insert with just model name for backward compatibility
                            llm_providers.providers.insert(model_name, expanded_rc);
@ -149,24 +228,26 @@ impl TryFrom<Vec<LlmProvider>> for LlmProviders {
                    );
                }
            } else {
-                // Non-wildcard provider - original behavior
-                if llm_providers
-                    .providers
-                    .insert(name.clone(), Rc::clone(&llm_provider))
-                    .is_some()
-                {
+                // Non-wildcard provider - specific configuration
+                // Check for duplicate specific entries (not allowed)
+                if specific_provider_names.contains(&name) {
                    return Err(LlmProvidersNewError::DuplicateName(name));
                }
+                specific_provider_names.insert(name.clone());

-                // also add model_id as key for provider lookup
+                // This specific configuration takes precedence over any wildcard expansion
+                // The wildcard expansion already excluded this model (see first pass above)
+
+                log::debug!("Processing specific provider configuration: {}", name);
+
+                // Insert with the provider name as key
+                llm_providers
+                    .providers
+                    .insert(name.clone(), Arc::clone(&llm_provider));
+
+                // Also add model_id as key for provider lookup
                if let Some(model) = llm_provider.model.clone() {
-                    if llm_providers
-                        .providers
-                        .insert(model, llm_provider)
-                        .is_some()
-                    {
-                        return Err(LlmProvidersNewError::DuplicateName(name));
-                    }
+                    llm_providers.providers.insert(model, llm_provider);
                }
            }
        }
--- a/crates/common/src/routing.rs
+++ b/crates/common/src/routing.rs
@ -1,4 +1,4 @@
-use std::rc::Rc;
+use std::sync::Arc;

 use crate::{configuration, llm_providers::LlmProviders};
 use configuration::LlmProvider;
@ -21,7 +21,7 @@ impl From<String> for ProviderHint {
 pub fn get_llm_provider(
    llm_providers: &LlmProviders,
    provider_hint: Option<ProviderHint>,
-) -> Result<Rc<LlmProvider>, String> {
+) -> Result<Arc<LlmProvider>, String> {
    match provider_hint {
        Some(ProviderHint::Default) => llm_providers
            .default()
@ -29,11 +29,6 @@ pub fn get_llm_provider(
        Some(ProviderHint::Name(name)) => llm_providers
            .get(&name)
            .ok_or_else(|| format!("Model '{}' not found in configured providers", name)),
-        None => {
-            // No hint provided - must have a default configured
-            llm_providers
-                .default()
-                .ok_or_else(|| "No model specified and no default provider configured".to_string())
-        }
+        None => Err("No model specified in request".to_string()),
    }
 }
--- a/crates/hermesllm/src/bin/fetch_models.rs
+++ b/crates/hermesllm/src/bin/fetch_models.rs
@ -1,5 +1,9 @@
-// Fetch latest provider models from OpenRouter and update provider_models.json
-// Usage: OPENROUTER_API_KEY=xxx cargo run --bin fetch_models
+// Fetch latest provider models from canonical provider APIs and update provider_models.json
+// Usage:
+//   Optional: OPENAI_API_KEY, ANTHROPIC_API_KEY, DEEPSEEK_API_KEY, GROK_API_KEY,
+//             DASHSCOPE_API_KEY, MOONSHOT_API_KEY, ZHIPU_API_KEY, GOOGLE_API_KEY
+//   Required: AWS CLI configured for Amazon Bedrock models
+//   cargo run --bin fetch_models

 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
@ -15,9 +19,9 @@ fn main() {
        .nth(1)
        .unwrap_or_else(|| default_path.to_string_lossy().to_string());

-    println!("Fetching latest models from OpenRouter...");
+    println!("Fetching latest models from provider APIs...");

-    match fetch_openrouter_models() {
+    match fetch_all_models() {
        Ok(models) => {
            let json = serde_json::to_string_pretty(&models).expect("Failed to serialize models");

@ -30,28 +34,38 @@ fn main() {
        }
        Err(e) => {
            eprintln!("Error fetching models: {}", e);
-            eprintln!("\nMake sure OPENROUTER_API_KEY is set:");
-            eprintln!("  export OPENROUTER_API_KEY=your-key-here");
+            eprintln!("\nMake sure required tools are set up:");
+            eprintln!("  AWS CLI configured for Bedrock (for Amazon models)");
+            eprintln!("  export OPENAI_API_KEY=your-key-here      # Optional");
+            eprintln!("  export DEEPSEEK_API_KEY=your-key-here    # Optional");
            eprintln!("  cargo run --bin fetch_models");
            std::process::exit(1);
        }
    }
 }

+// OpenAI-compatible API response (used by most providers)
 #[derive(Debug, Deserialize)]
-struct OpenRouterModel {
+struct OpenAICompatibleModel {
    id: String,
-    architecture: Option<Architecture>,
 }

 #[derive(Debug, Deserialize)]
-struct Architecture {
-    modality: Option<String>,
+struct OpenAICompatibleResponse {
+    data: Vec<OpenAICompatibleModel>,
+}
+
+// Google Gemini API response
+#[derive(Debug, Deserialize)]
+struct GoogleModel {
+    name: String,
+    #[serde(rename = "supportedGenerationMethods")]
+    supported_generation_methods: Option<Vec<String>>,
 }

 #[derive(Debug, Deserialize)]
-struct OpenRouterResponse {
-    data: Vec<OpenRouterModel>,
+struct GoogleResponse {
+    models: Vec<GoogleModel>,
 }

 #[derive(Debug, Serialize)]
@ -69,94 +83,327 @@ struct Metadata {
    last_updated: String,
 }

-fn fetch_openrouter_models() -> Result<ProviderModels, Box<dyn std::error::Error>> {
-    let api_key = std::env::var("OPENROUTER_API_KEY")
-        .map_err(|_| "OPENROUTER_API_KEY environment variable not set")?;
+fn is_text_model(model_id: &str) -> bool {
+    let id_lower = model_id.to_lowercase();

-    let response_body = ureq::get("https://openrouter.ai/api/v1/models")
+    // Filter out known non-text models
+    let non_text_patterns = [
+        "embedding",   // Embedding models
+        "whisper",     // Audio transcription
+        "-tts",        // Text-to-speech (with dash to avoid matching in middle of words)
+        "tts-",        // Text-to-speech prefix
+        "dall-e",      // Image generation
+        "sora",        // Video generation
+        "moderation",  // Moderation models
+        "babbage",     // Legacy completion models
+        "davinci-002", // Legacy completion models
+        "transcribe",  // Audio transcription models
+        "realtime",    // Realtime audio models
+        "audio",       // Audio models (gpt-audio, gpt-audio-mini)
+        "-image-",     // Image generation models (grok-2-image-1212)
+        "-ocr-",       // OCR models
+        "ocr-",        // OCR models prefix
+        "voxtral",     // Audio/voice models
+    ];
+
+    // Additional pattern: models that are purely for image generation usually have "image" in the name
+    // but we need to be careful not to filter vision models that can process images
+    // Models like "gpt-image-1" or "chatgpt-image-latest" are image generators
+    // Models like "grok-2-vision" or "gemini-vision" are vision models (text+image->text)
+
+    if non_text_patterns
+        .iter()
+        .any(|pattern| id_lower.contains(pattern))
+    {
+        return false;
+    }
+
+    // Filter models starting with "gpt-image" (image generators)
+    if id_lower.contains("/gpt-image") || id_lower.contains("/chatgpt-image") {
+        return false;
+    }
+
+    true
+}
+
+fn fetch_openai_compatible_models(
+    api_url: &str,
+    api_key: &str,
+    provider_prefix: &str,
+) -> Result<Vec<String>, Box<dyn std::error::Error>> {
+    let response_body = ureq::get(api_url)
        .header("Authorization", &format!("Bearer {}", api_key))
        .call()?
        .body_mut()
        .read_to_string()?;

-    let openrouter_response: OpenRouterResponse = serde_json::from_str(&response_body)?;
+    let response: OpenAICompatibleResponse = serde_json::from_str(&response_body)?;

-    // Supported providers to include
-    let supported_providers = [
-        "openai",
-        "anthropic",
-        "mistralai",
-        "deepseek",
-        "google",
-        "x-ai",
-        "moonshotai",
-        "qwen",
-        "amazon",
-        "z-ai",
+    Ok(response
+        .data
+        .into_iter()
+        .filter(|m| is_text_model(&m.id))
+        .map(|m| format!("{}/{}", provider_prefix, m.id))
+        .collect())
+}
+
+fn fetch_anthropic_models(api_key: &str) -> Result<Vec<String>, Box<dyn std::error::Error>> {
+    let response_body = ureq::get("https://api.anthropic.com/v1/models")
+        .header("x-api-key", api_key)
+        .header("anthropic-version", "2023-06-01")
+        .call()?
+        .body_mut()
+        .read_to_string()?;
+
+    let response: OpenAICompatibleResponse = serde_json::from_str(&response_body)?;
+
+    let dated_models: Vec<String> = response
+        .data
+        .into_iter()
+        .filter(|m| is_text_model(&m.id))
+        .map(|m| m.id)
+        .collect();
+
+    let mut models: Vec<String> = Vec::new();
+
+    // Add both dated versions and their aliases (without the -YYYYMMDD suffix)
+    for model_id in dated_models {
+        // Add the full dated model ID
+        models.push(format!("anthropic/{}", model_id));
+
+        // Generate alias by removing trailing -YYYYMMDD pattern
+        // Pattern: ends with -YYYYMMDD where YYYY is year, MM is month, DD is day
+        if let Some(date_pos) = model_id.rfind('-') {
+            let potential_date = &model_id[date_pos + 1..];
+            // Check if it's an 8-digit date (YYYYMMDD)
+            if potential_date.len() == 8 && potential_date.chars().all(|c| c.is_ascii_digit()) {
+                let alias = &model_id[..date_pos];
+                let alias_full = format!("anthropic/{}", alias);
+                // Only add if not already present
+                if !models.contains(&alias_full) {
+                    models.push(alias_full);
+                }
+            }
+        }
+    }
+
+    Ok(models)
+}
+
+fn fetch_google_models(api_key: &str) -> Result<Vec<String>, Box<dyn std::error::Error>> {
+    let api_url = format!(
+        "https://generativelanguage.googleapis.com/v1beta/models?key={}",
+        api_key
+    );
+
+    let response_body = ureq::get(&api_url).call()?.body_mut().read_to_string()?;
+
+    let response: GoogleResponse = serde_json::from_str(&response_body)?;
+
+    // Only include models that support generateContent
+    Ok(response
+        .models
+        .into_iter()
+        .filter(|m| {
+            m.supported_generation_methods
+                .as_ref()
+                .map_or(false, |methods| {
+                    methods.contains(&"generateContent".to_string())
+                })
+        })
+        .map(|m| {
+            // Convert "models/gemini-pro" to "google/gemini-pro"
+            let model_id = m.name.strip_prefix("models/").unwrap_or(&m.name);
+            format!("google/{}", model_id)
+        })
+        .collect())
+}
+
+fn fetch_bedrock_amazon_models() -> Result<Vec<String>, Box<dyn std::error::Error>> {
+    // Use AWS CLI to fetch Amazon models from Bedrock
+    let output = std::process::Command::new("aws")
+        .args([
+            "bedrock",
+            "list-foundation-models",
+            "--by-provider",
+            "amazon",
+            "--by-output-modality",
+            "TEXT",
+            "--no-cli-pager",
+            "--output",
+            "json",
+        ])
+        .output()?;
+
+    if !output.status.success() {
+        return Err(format!(
+            "AWS CLI command failed: {}",
+            String::from_utf8_lossy(&output.stderr)
+        )
+        .into());
+    }
+
+    let response_body = String::from_utf8(output.stdout)?;
+
+    #[derive(Debug, Deserialize)]
+    struct BedrockModelSummary {
+        #[serde(rename = "modelId")]
+        model_id: String,
+    }
+
+    #[derive(Debug, Deserialize)]
+    struct BedrockResponse {
+        #[serde(rename = "modelSummaries")]
+        model_summaries: Vec<BedrockModelSummary>,
+    }
+
+    let bedrock_response: BedrockResponse = serde_json::from_str(&response_body)?;
+
+    // Filter out embedding, image generation, and rerank models
+    let amazon_models: Vec<String> = bedrock_response
+        .model_summaries
+        .into_iter()
+        .filter(|model| {
+            let id_lower = model.model_id.to_lowercase();
+            !id_lower.contains("embed")
+                && !id_lower.contains("image")
+                && !id_lower.contains("rerank")
+        })
+        .map(|m| format!("amazon/{}", m.model_id))
+        .collect();
+
+    Ok(amazon_models)
+}
+
+fn fetch_all_models() -> Result<ProviderModels, Box<dyn std::error::Error>> {
+    let mut providers: HashMap<String, Vec<String>> = HashMap::new();
+    let mut errors: Vec<String> = Vec::new();
+
+    // Configuration: provider name, env var, API URL, prefix for model IDs
+    let provider_configs = vec![
+        (
+            "openai",
+            "OPENAI_API_KEY",
+            "https://api.openai.com/v1/models",
+            "openai",
+        ),
+        (
+            "mistralai",
+            "MISTRAL_API_KEY",
+            "https://api.mistral.ai/v1/models",
+            "mistralai",
+        ),
+        (
+            "deepseek",
+            "DEEPSEEK_API_KEY",
+            "https://api.deepseek.com/v1/models",
+            "deepseek",
+        ),
+        ("x-ai", "GROK_API_KEY", "https://api.x.ai/v1/models", "x-ai"),
+        (
+            "moonshotai",
+            "MOONSHOT_API_KEY",
+            "https://api.moonshot.ai/v1/models",
+            "moonshotai",
+        ),
+        (
+            "qwen",
+            "DASHSCOPE_API_KEY",
+            "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models",
+            "qwen",
+        ),
+        (
+            "z-ai",
+            "ZHIPU_API_KEY",
+            "https://open.bigmodel.cn/api/paas/v4/models",
+            "z-ai",
+        ),
    ];

-    let mut providers: HashMap<String, Vec<String>> = HashMap::new();
-    let mut total_models = 0;
-    let mut filtered_modality: Vec<(String, String)> = Vec::new();
-    let mut filtered_provider: Vec<(String, Option<String>)> = Vec::new();
+    // Fetch from OpenAI-compatible providers
+    for (provider_name, env_var, api_url, prefix) in provider_configs {
+        if let Ok(api_key) = std::env::var(env_var) {
+            match fetch_openai_compatible_models(api_url, &api_key, prefix) {
+                Ok(models) => {
+                    println!("  ✓ {}: {} models", provider_name, models.len());
+                    providers.insert(provider_name.to_string(), models);
+                }
+                Err(e) => {
+                    let err_msg = format!("  ✗ {}: {}", provider_name, e);
+                    eprintln!("{}", err_msg);
+                    errors.push(err_msg);
+                }
+            }
+        } else {
+            println!("  ⊘ {}: {} not set (skipped)", provider_name, env_var);
+        }
+    }

-    for model in openrouter_response.data {
-        let modality = model
-            .architecture
-            .as_ref()
-            .and_then(|arch| arch.modality.clone());
-
-        // Only include text->text and text+image->text models
-        if let Some(ref mod_str) = modality {
-            if mod_str != "text->text" && mod_str != "text" && mod_str != "text+image->text" {
-                filtered_modality.push((model.id.clone(), mod_str.clone()));
-                continue;
+    // Fetch Anthropic models (different authentication)
+    if let Ok(api_key) = std::env::var("ANTHROPIC_API_KEY") {
+        match fetch_anthropic_models(&api_key) {
+            Ok(models) => {
+                println!("  ✓ anthropic: {} models", models.len());
+                providers.insert("anthropic".to_string(), models);
+            }
+            Err(e) => {
+                let err_msg = format!("  ✗ anthropic: {}", e);
+                eprintln!("{}", err_msg);
+                errors.push(err_msg);
            }
        }
+    } else {
+        println!("  ⊘ anthropic: ANTHROPIC_API_KEY not set (skipped)");
+    }

-        // Extract provider from model ID (e.g., "openai/gpt-4" -> "openai")
-        if let Some(provider_name) = model.id.split('/').next() {
-            if supported_providers.contains(&provider_name) {
-                providers
-                    .entry(provider_name.to_string())
-                    .or_default()
-                    .push(model.id.clone());
-                total_models += 1;
-            } else {
-                filtered_provider.push((model.id.clone(), modality));
+    // Fetch Google models (different API format)
+    if let Ok(api_key) = std::env::var("GOOGLE_API_KEY") {
+        match fetch_google_models(&api_key) {
+            Ok(models) => {
+                println!("  ✓ google: {} models", models.len());
+                providers.insert("google".to_string(), models);
+            }
+            Err(e) => {
+                let err_msg = format!("  ✗ google: {}", e);
+                eprintln!("{}", err_msg);
+                errors.push(err_msg);
            }
        }
+    } else {
+        println!("  ⊘ google: GOOGLE_API_KEY not set (skipped)");
    }

-    println!("✅ Loaded models from {} providers:", providers.len());
-    let mut sorted_providers: Vec<_> = providers.iter().collect();
-    sorted_providers.sort_by_key(|(name, _)| *name);
-    for (provider, models) in sorted_providers {
-        println!("  • {}: {} models", provider, models.len());
-    }
-
-    // Group filtered providers to get counts
-    let mut filtered_by_provider: HashMap<String, usize> = HashMap::new();
-    for (model_id, _modality) in &filtered_provider {
-        if let Some(provider_name) = model_id.split('/').next() {
-            *filtered_by_provider
-                .entry(provider_name.to_string())
-                .or_insert(0) += 1;
+    // Fetch Amazon models from AWS Bedrock
+    match fetch_bedrock_amazon_models() {
+        Ok(models) => {
+            println!("  ✓ amazon: {} models (via AWS Bedrock)", models.len());
+            providers.insert("amazon".to_string(), models);
+        }
+        Err(e) => {
+            let err_msg = format!("  ✗ amazon: {} (AWS Bedrock required)", e);
+            eprintln!("{}", err_msg);
+            errors.push(err_msg);
        }
    }

-    println!(
-        "\n⏭️  Skipped {} providers ({} models total)",
-        filtered_by_provider.len(),
-        filtered_provider.len()
-    );
-    println!();
+    if providers.is_empty() {
+        return Err("No models fetched from any provider. Check API keys.".into());
+    }

    let total_providers = providers.len();
+    let total_models: usize = providers.values().map(|v| v.len()).sum();
+
+    println!(
+        "\n✅ Successfully fetched models from {} providers",
+        total_providers
+    );
+    if !errors.is_empty() {
+        println!("⚠️  {} providers failed", errors.len());
+    }

    Ok(ProviderModels {
        version: "1.0".to_string(),
-        source: "openrouter".to_string(),
+        source: "canonical-apis".to_string(),
        providers,
        metadata: Metadata {
            total_providers,
--- a/crates/hermesllm/src/bin/provider_models.json
+++ b/crates/hermesllm/src/bin/provider_models.json
@ -1,236 +1,327 @@
 {
  "version": "1.0",
-  "source": "openrouter",
+  "source": "canonical-apis",
  "providers": {
-    "openai": [
-      "openai/gpt-5.2-codex",
-      "openai/gpt-5.2-chat",
-      "openai/gpt-5.2-pro",
-      "openai/gpt-5.2",
-      "openai/gpt-5.1-codex-max",
-      "openai/gpt-5.1",
-      "openai/gpt-5.1-chat",
-      "openai/gpt-5.1-codex",
-      "openai/gpt-5.1-codex-mini",
-      "openai/gpt-oss-safeguard-20b",
-      "openai/o3-deep-research",
-      "openai/o4-mini-deep-research",
-      "openai/gpt-5-pro",
-      "openai/gpt-5-codex",
-      "openai/gpt-4o-audio-preview",
-      "openai/gpt-5-chat",
-      "openai/gpt-5",
-      "openai/gpt-5-mini",
-      "openai/gpt-5-nano",
-      "openai/gpt-oss-120b:free",
-      "openai/gpt-oss-120b",
-      "openai/gpt-oss-120b:exacto",
-      "openai/gpt-oss-20b:free",
-      "openai/gpt-oss-20b",
-      "openai/o3-pro",
-      "openai/o4-mini-high",
-      "openai/o3",
-      "openai/o4-mini",
-      "openai/gpt-4.1",
-      "openai/gpt-4.1-mini",
-      "openai/gpt-4.1-nano",
-      "openai/o1-pro",
-      "openai/gpt-4o-mini-search-preview",
-      "openai/gpt-4o-search-preview",
-      "openai/o3-mini-high",
-      "openai/o3-mini",
-      "openai/o1",
-      "openai/gpt-4o-2024-11-20",
-      "openai/chatgpt-4o-latest",
-      "openai/gpt-4o-2024-08-06",
-      "openai/gpt-4o-mini-2024-07-18",
-      "openai/gpt-4o-mini",
-      "openai/gpt-4o-2024-05-13",
-      "openai/gpt-4o",
-      "openai/gpt-4o:extended",
-      "openai/gpt-4-turbo",
-      "openai/gpt-3.5-turbo-0613",
-      "openai/gpt-4-turbo-preview",
-      "openai/gpt-4-1106-preview",
-      "openai/gpt-3.5-turbo-instruct",
-      "openai/gpt-3.5-turbo-16k",
-      "openai/gpt-4-0314",
-      "openai/gpt-4",
-      "openai/gpt-3.5-turbo"
-    ],
-    "mistralai": [
-      "mistralai/mistral-small-creative",
-      "mistralai/devstral-2512:free",
-      "mistralai/devstral-2512",
-      "mistralai/ministral-14b-2512",
-      "mistralai/ministral-8b-2512",
-      "mistralai/ministral-3b-2512",
-      "mistralai/mistral-large-2512",
-      "mistralai/voxtral-small-24b-2507",
-      "mistralai/mistral-medium-3.1",
-      "mistralai/codestral-2508",
-      "mistralai/devstral-medium",
-      "mistralai/devstral-small",
-      "mistralai/mistral-small-3.2-24b-instruct",
-      "mistralai/mistral-medium-3",
-      "mistralai/mistral-small-3.1-24b-instruct:free",
-      "mistralai/mistral-small-3.1-24b-instruct",
-      "mistralai/mistral-saba",
-      "mistralai/mistral-small-24b-instruct-2501",
-      "mistralai/mistral-large-2411",
-      "mistralai/mistral-large-2407",
-      "mistralai/pixtral-large-2411",
-      "mistralai/ministral-8b",
-      "mistralai/ministral-3b",
-      "mistralai/pixtral-12b",
-      "mistralai/mistral-nemo",
-      "mistralai/mistral-7b-instruct",
-      "mistralai/mistral-7b-instruct-v0.3",
-      "mistralai/mixtral-8x22b-instruct",
-      "mistralai/mistral-large",
-      "mistralai/mistral-tiny",
-      "mistralai/mistral-7b-instruct-v0.2",
-      "mistralai/mixtral-8x7b-instruct",
-      "mistralai/mistral-7b-instruct-v0.1"
-    ],
-    "qwen": [
-      "qwen/qwen3-vl-32b-instruct",
-      "qwen/qwen3-vl-8b-thinking",
-      "qwen/qwen3-vl-8b-instruct",
-      "qwen/qwen3-vl-30b-a3b-thinking",
-      "qwen/qwen3-vl-30b-a3b-instruct",
-      "qwen/qwen3-vl-235b-a22b-thinking",
-      "qwen/qwen3-vl-235b-a22b-instruct",
-      "qwen/qwen3-max",
-      "qwen/qwen3-coder-plus",
-      "qwen/qwen3-coder-flash",
-      "qwen/qwen3-next-80b-a3b-thinking",
-      "qwen/qwen3-next-80b-a3b-instruct:free",
-      "qwen/qwen3-next-80b-a3b-instruct",
-      "qwen/qwen-plus-2025-07-28",
-      "qwen/qwen-plus-2025-07-28:thinking",
-      "qwen/qwen3-30b-a3b-thinking-2507",
-      "qwen/qwen3-coder-30b-a3b-instruct",
-      "qwen/qwen3-30b-a3b-instruct-2507",
-      "qwen/qwen3-235b-a22b-thinking-2507",
-      "qwen/qwen3-coder:free",
-      "qwen/qwen3-coder",
-      "qwen/qwen3-coder:exacto",
-      "qwen/qwen3-235b-a22b-2507",
-      "qwen/qwen3-4b:free",
-      "qwen/qwen3-30b-a3b",
-      "qwen/qwen3-8b",
-      "qwen/qwen3-14b",
-      "qwen/qwen3-32b",
-      "qwen/qwen3-235b-a22b",
-      "qwen/qwen2.5-coder-7b-instruct",
-      "qwen/qwen2.5-vl-32b-instruct",
-      "qwen/qwq-32b",
-      "qwen/qwen-vl-plus",
-      "qwen/qwen-vl-max",
-      "qwen/qwen-turbo",
-      "qwen/qwen2.5-vl-72b-instruct",
-      "qwen/qwen-plus",
-      "qwen/qwen-max",
-      "qwen/qwen-2.5-coder-32b-instruct",
-      "qwen/qwen-2.5-7b-instruct",
-      "qwen/qwen-2.5-72b-instruct",
-      "qwen/qwen-2.5-vl-7b-instruct:free",
-      "qwen/qwen-2.5-vl-7b-instruct"
-    ],
-    "z-ai": [
-      "z-ai/glm-4.7",
-      "z-ai/glm-4.6v",
-      "z-ai/glm-4.6",
-      "z-ai/glm-4.6:exacto",
-      "z-ai/glm-4.5v",
-      "z-ai/glm-4.5",
-      "z-ai/glm-4.5-air:free",
-      "z-ai/glm-4.5-air",
-      "z-ai/glm-4-32b"
-    ],
-    "moonshotai": [
-      "moonshotai/kimi-k2-thinking",
-      "moonshotai/kimi-k2-0905",
-      "moonshotai/kimi-k2-0905:exacto",
-      "moonshotai/kimi-k2:free",
-      "moonshotai/kimi-k2",
-      "moonshotai/kimi-dev-72b"
-    ],
    "anthropic": [
-      "anthropic/claude-opus-4.5",
-      "anthropic/claude-haiku-4.5",
-      "anthropic/claude-sonnet-4.5",
-      "anthropic/claude-opus-4.1",
+      "anthropic/claude-opus-4-5-20251101",
+      "anthropic/claude-opus-4-5",
+      "anthropic/claude-haiku-4-5-20251001",
+      "anthropic/claude-haiku-4-5",
+      "anthropic/claude-sonnet-4-5-20250929",
+      "anthropic/claude-sonnet-4-5",
+      "anthropic/claude-opus-4-1-20250805",
+      "anthropic/claude-opus-4-1",
+      "anthropic/claude-opus-4-20250514",
      "anthropic/claude-opus-4",
+      "anthropic/claude-sonnet-4-20250514",
      "anthropic/claude-sonnet-4",
-      "anthropic/claude-3.7-sonnet:thinking",
-      "anthropic/claude-3.7-sonnet",
-      "anthropic/claude-3.5-haiku",
-      "anthropic/claude-3.5-sonnet",
+      "anthropic/claude-3-7-sonnet-20250219",
+      "anthropic/claude-3-7-sonnet",
+      "anthropic/claude-3-5-haiku-20241022",
+      "anthropic/claude-3-5-haiku",
+      "anthropic/claude-3-haiku-20240307",
      "anthropic/claude-3-haiku"
    ],
-    "google": [
-      "google/gemini-3-flash-preview",
-      "google/gemini-3-pro-preview",
-      "google/gemini-2.5-flash-preview-09-2025",
-      "google/gemini-2.5-flash-lite-preview-09-2025",
-      "google/gemini-2.5-flash-lite",
-      "google/gemma-3n-e2b-it:free",
-      "google/gemini-2.5-flash",
-      "google/gemini-2.5-pro",
-      "google/gemini-2.5-pro-preview",
-      "google/gemma-3n-e4b-it:free",
-      "google/gemma-3n-e4b-it",
-      "google/gemini-2.5-pro-preview-05-06",
-      "google/gemma-3-4b-it:free",
-      "google/gemma-3-4b-it",
-      "google/gemma-3-12b-it:free",
-      "google/gemma-3-12b-it",
-      "google/gemma-3-27b-it:free",
-      "google/gemma-3-27b-it",
-      "google/gemini-2.0-flash-lite-001",
-      "google/gemini-2.0-flash-001",
-      "google/gemini-2.0-flash-exp:free",
-      "google/gemma-2-27b-it",
-      "google/gemma-2-9b-it"
+    "qwen": [
+      "qwen/qwen-plus-character",
+      "qwen/qwen-flash-character",
+      "qwen/qwen-flash",
+      "qwen/qwen3-vl-plus-2025-12-19",
+      "qwen/qwen3-omni-flash-2025-12-01",
+      "qwen/qwen3-livetranslate-flash-2025-12-01",
+      "qwen/qwen3-livetranslate-flash",
+      "qwen/qwen-mt-lite",
+      "qwen/qwen-plus-2025-12-01",
+      "qwen/qwen-mt-flash",
+      "qwen/ccai-pro",
+      "qwen/tongyi-tingwu-slp",
+      "qwen/qwen3-vl-flash",
+      "qwen/qwen3-vl-flash-2025-10-15",
+      "qwen/qwen3-omni-flash",
+      "qwen/qwen3-omni-flash-2025-09-15",
+      "qwen/qwen3-omni-30b-a3b-captioner",
+      "qwen/qwen2.5-7b-instruct",
+      "qwen/qwen2.5-14b-instruct",
+      "qwen/qwen2.5-32b-instruct",
+      "qwen/qwen2.5-72b-instruct",
+      "qwen/qwen2.5-14b-instruct-1m",
+      "qwen/qwen2.5-7b-instruct-1m",
+      "qwen/qwen-max-2025-01-25",
+      "qwen/qwen-max-latest",
+      "qwen/qwen-turbo-2024-11-01",
+      "qwen/qwen-turbo-latest",
+      "qwen/qwen-plus-latest",
+      "qwen/qwen-plus-2025-01-25",
+      "qwen/qwq-plus-2025-03-05",
+      "qwen/qwen-mt-turbo",
+      "qwen/qwen-mt-plus",
+      "qwen/qwen-coder-plus",
+      "qwen/qwq-plus",
+      "qwen/qwen2.5-vl-32b-instruct",
+      "qwen/qvq-max",
+      "qwen/qwen-omni-turbo",
+      "qwen/qwen3-8b",
+      "qwen/qwen3-30b-a3b",
+      "qwen/qwen3-235b-a22b",
+      "qwen/qwen-turbo-2025-04-28",
+      "qwen/qwen-plus-2025-04-28",
+      "qwen/qwen-vl-max-2025-04-08",
+      "qwen/qwen-vl-plus-2025-01-25",
+      "qwen/qwen-vl-plus-latest",
+      "qwen/qwen-vl-max-latest",
+      "qwen/qwen-vl-plus-2025-05-07",
+      "qwen/qwen3-coder-plus",
+      "qwen/qwen3-coder-480b-a35b-instruct",
+      "qwen/qwen3-235b-a22b-instruct-2507",
+      "qwen/qwen-plus-2025-07-14",
+      "qwen/qwen3-coder-plus-2025-07-22",
+      "qwen/qwen3-235b-a22b-thinking-2507",
+      "qwen/qwen3-coder-flash",
+      "qwen/qwen-vl-max",
+      "qwen/qwen-vl-max-2025-08-13",
+      "qwen/qwen3-max",
+      "qwen/qwen3-max-2025-09-23",
+      "qwen/qwen3-vl-plus",
+      "qwen/qwen3-vl-235b-a22b-instruct",
+      "qwen/qwen3-vl-235b-a22b-thinking",
+      "qwen/qwen3-30b-a3b-thinking-2507",
+      "qwen/qwen3-30b-a3b-instruct-2507",
+      "qwen/qwen3-14b",
+      "qwen/qwen3-32b",
+      "qwen/qwen3-0.6b",
+      "qwen/qwen3-4b",
+      "qwen/qwen3-1.7b",
+      "qwen/qwen-vl-plus",
+      "qwen/qwen3-coder-plus-2025-09-23",
+      "qwen/qwen3-vl-plus-2025-09-23",
+      "qwen/qwen-plus-2025-09-11",
+      "qwen/qwen3-next-80b-a3b-thinking",
+      "qwen/qwen3-next-80b-a3b-instruct",
+      "qwen/qwen3-max-preview",
+      "qwen/qwen2-7b-instruct",
+      "qwen/qwen-max",
+      "qwen/qwen-plus",
+      "qwen/qwen-turbo"
    ],
-    "amazon": [
-      "amazon/nova-2-lite-v1",
-      "amazon/nova-premier-v1",
-      "amazon/nova-lite-v1",
-      "amazon/nova-micro-v1",
-      "amazon/nova-pro-v1"
+    "moonshotai": [
+      "moonshotai/kimi-latest",
+      "moonshotai/moonshot-v1-auto",
+      "moonshotai/kimi-k2-turbo-preview",
+      "moonshotai/moonshot-v1-32k-vision-preview",
+      "moonshotai/moonshot-v1-8k",
+      "moonshotai/kimi-k2-thinking",
+      "moonshotai/moonshot-v1-32k",
+      "moonshotai/moonshot-v1-128k",
+      "moonshotai/kimi-k2-thinking-turbo",
+      "moonshotai/moonshot-v1-128k-vision-preview",
+      "moonshotai/moonshot-v1-8k-vision-preview",
+      "moonshotai/kimi-k2-0711-preview",
+      "moonshotai/kimi-k2-0905-preview"
    ],
    "deepseek": [
-      "deepseek/deepseek-v3.2-speciale",
-      "deepseek/deepseek-v3.2",
-      "deepseek/deepseek-v3.2-exp",
-      "deepseek/deepseek-v3.1-terminus:exacto",
-      "deepseek/deepseek-v3.1-terminus",
-      "deepseek/deepseek-chat-v3.1",
-      "deepseek/deepseek-r1-0528:free",
-      "deepseek/deepseek-r1-0528",
-      "deepseek/deepseek-chat-v3-0324",
-      "deepseek/deepseek-r1-distill-qwen-32b",
-      "deepseek/deepseek-r1-distill-llama-70b",
-      "deepseek/deepseek-r1",
-      "deepseek/deepseek-chat"
+      "deepseek/deepseek-chat",
+      "deepseek/deepseek-reasoner"
    ],
    "x-ai": [
-      "x-ai/grok-4.1-fast",
-      "x-ai/grok-4-fast",
-      "x-ai/grok-code-fast-1",
-      "x-ai/grok-4",
-      "x-ai/grok-3-mini",
+      "x-ai/grok-2-vision-1212",
      "x-ai/grok-3",
-      "x-ai/grok-3-mini-beta",
-      "x-ai/grok-3-beta"
+      "x-ai/grok-3-mini",
+      "x-ai/grok-4-0709",
+      "x-ai/grok-4-1-fast-non-reasoning",
+      "x-ai/grok-4-1-fast-reasoning",
+      "x-ai/grok-4-fast-non-reasoning",
+      "x-ai/grok-4-fast-reasoning",
+      "x-ai/grok-code-fast-1"
+    ],
+    "z-ai": [
+      "z-ai/glm-4.5",
+      "z-ai/glm-4.5-air",
+      "z-ai/glm-4.6",
+      "z-ai/glm-4.7"
+    ],
+    "google": [
+      "google/gemini-2.5-flash",
+      "google/gemini-2.5-pro",
+      "google/gemini-2.0-flash-exp",
+      "google/gemini-2.0-flash",
+      "google/gemini-2.0-flash-001",
+      "google/gemini-2.0-flash-exp-image-generation",
+      "google/gemini-2.0-flash-lite-001",
+      "google/gemini-2.0-flash-lite",
+      "google/gemini-2.0-flash-lite-preview-02-05",
+      "google/gemini-2.0-flash-lite-preview",
+      "google/gemini-exp-1206",
+      "google/gemini-2.5-flash-preview-tts",
+      "google/gemini-2.5-pro-preview-tts",
+      "google/gemma-3-1b-it",
+      "google/gemma-3-4b-it",
+      "google/gemma-3-12b-it",
+      "google/gemma-3-27b-it",
+      "google/gemma-3n-e4b-it",
+      "google/gemma-3n-e2b-it",
+      "google/gemini-flash-latest",
+      "google/gemini-flash-lite-latest",
+      "google/gemini-pro-latest",
+      "google/gemini-2.5-flash-lite",
+      "google/gemini-2.5-flash-image",
+      "google/gemini-2.5-flash-preview-09-2025",
+      "google/gemini-2.5-flash-lite-preview-09-2025",
+      "google/gemini-3-pro-preview",
+      "google/gemini-3-flash-preview",
+      "google/gemini-3-pro-image-preview",
+      "google/nano-banana-pro-preview",
+      "google/gemini-robotics-er-1.5-preview",
+      "google/gemini-2.5-computer-use-preview-10-2025",
+      "google/deep-research-pro-preview-12-2025"
+    ],
+    "mistralai": [
+      "mistralai/mistral-medium-2505",
+      "mistralai/mistral-medium-2508",
+      "mistralai/mistral-medium-latest",
+      "mistralai/mistral-medium",
+      "mistralai/open-mistral-nemo",
+      "mistralai/open-mistral-nemo-2407",
+      "mistralai/mistral-tiny-2407",
+      "mistralai/mistral-tiny-latest",
+      "mistralai/mistral-large-2411",
+      "mistralai/pixtral-large-2411",
+      "mistralai/pixtral-large-latest",
+      "mistralai/mistral-large-pixtral-2411",
+      "mistralai/codestral-2508",
+      "mistralai/codestral-latest",
+      "mistralai/devstral-small-2507",
+      "mistralai/devstral-medium-2507",
+      "mistralai/devstral-2512",
+      "mistralai/mistral-vibe-cli-latest",
+      "mistralai/devstral-medium-latest",
+      "mistralai/devstral-latest",
+      "mistralai/labs-devstral-small-2512",
+      "mistralai/devstral-small-latest",
+      "mistralai/mistral-small-2506",
+      "mistralai/mistral-small-latest",
+      "mistralai/labs-mistral-small-creative",
+      "mistralai/magistral-medium-2509",
+      "mistralai/magistral-medium-latest",
+      "mistralai/magistral-small-2509",
+      "mistralai/magistral-small-latest",
+      "mistralai/mistral-large-2512",
+      "mistralai/mistral-large-latest",
+      "mistralai/ministral-3b-2512",
+      "mistralai/ministral-3b-latest",
+      "mistralai/ministral-8b-2512",
+      "mistralai/ministral-8b-latest",
+      "mistralai/ministral-14b-2512",
+      "mistralai/ministral-14b-latest",
+      "mistralai/open-mistral-7b",
+      "mistralai/mistral-tiny",
+      "mistralai/mistral-tiny-2312",
+      "mistralai/pixtral-12b-2409",
+      "mistralai/pixtral-12b",
+      "mistralai/pixtral-12b-latest",
+      "mistralai/ministral-3b-2410",
+      "mistralai/ministral-8b-2410",
+      "mistralai/codestral-2501",
+      "mistralai/codestral-2412",
+      "mistralai/codestral-2411-rc5",
+      "mistralai/mistral-small-2501",
+      "mistralai/mistral-embed-2312",
+      "mistralai/mistral-embed",
+      "mistralai/codestral-embed",
+      "mistralai/codestral-embed-2505"
+    ],
+    "amazon": [
+      "amazon/amazon.nova-pro-v1:0",
+      "amazon/amazon.nova-2-lite-v1:0",
+      "amazon/amazon.nova-2-sonic-v1:0",
+      "amazon/amazon.titan-tg1-large",
+      "amazon/amazon.nova-premier-v1:0:8k",
+      "amazon/amazon.nova-premier-v1:0:20k",
+      "amazon/amazon.nova-premier-v1:0:1000k",
+      "amazon/amazon.nova-premier-v1:0:mm",
+      "amazon/amazon.nova-premier-v1:0",
+      "amazon/amazon.nova-lite-v1:0",
+      "amazon/amazon.nova-micro-v1:0"
+    ],
+    "openai": [
+      "openai/gpt-4-0613",
+      "openai/gpt-4",
+      "openai/gpt-3.5-turbo",
+      "openai/gpt-5.2-codex",
+      "openai/gpt-3.5-turbo-instruct",
+      "openai/gpt-3.5-turbo-instruct-0914",
+      "openai/gpt-4-1106-preview",
+      "openai/gpt-3.5-turbo-1106",
+      "openai/gpt-4-0125-preview",
+      "openai/gpt-4-turbo-preview",
+      "openai/gpt-3.5-turbo-0125",
+      "openai/gpt-4-turbo",
+      "openai/gpt-4-turbo-2024-04-09",
+      "openai/gpt-4o",
+      "openai/gpt-4o-2024-05-13",
+      "openai/gpt-4o-mini-2024-07-18",
+      "openai/gpt-4o-mini",
+      "openai/gpt-4o-2024-08-06",
+      "openai/chatgpt-4o-latest",
+      "openai/o1-2024-12-17",
+      "openai/o1",
+      "openai/computer-use-preview",
+      "openai/o3-mini",
+      "openai/o3-mini-2025-01-31",
+      "openai/gpt-4o-2024-11-20",
+      "openai/computer-use-preview-2025-03-11",
+      "openai/gpt-4o-search-preview-2025-03-11",
+      "openai/gpt-4o-search-preview",
+      "openai/gpt-4o-mini-search-preview-2025-03-11",
+      "openai/gpt-4o-mini-search-preview",
+      "openai/o1-pro-2025-03-19",
+      "openai/o1-pro",
+      "openai/o3-2025-04-16",
+      "openai/o4-mini-2025-04-16",
+      "openai/o3",
+      "openai/o4-mini",
+      "openai/gpt-4.1-2025-04-14",
+      "openai/gpt-4.1",
+      "openai/gpt-4.1-mini-2025-04-14",
+      "openai/gpt-4.1-mini",
+      "openai/gpt-4.1-nano-2025-04-14",
+      "openai/gpt-4.1-nano",
+      "openai/codex-mini-latest",
+      "openai/o3-pro",
+      "openai/o3-pro-2025-06-10",
+      "openai/o4-mini-deep-research",
+      "openai/o3-deep-research",
+      "openai/o3-deep-research-2025-06-26",
+      "openai/o4-mini-deep-research-2025-06-26",
+      "openai/gpt-5-chat-latest",
+      "openai/gpt-5-2025-08-07",
+      "openai/gpt-5",
+      "openai/gpt-5-mini-2025-08-07",
+      "openai/gpt-5-mini",
+      "openai/gpt-5-nano-2025-08-07",
+      "openai/gpt-5-nano",
+      "openai/gpt-5-codex",
+      "openai/gpt-5-pro-2025-10-06",
+      "openai/gpt-5-pro",
+      "openai/gpt-5-search-api",
+      "openai/gpt-5-search-api-2025-10-14",
+      "openai/gpt-5.1-chat-latest",
+      "openai/gpt-5.1-2025-11-13",
+      "openai/gpt-5.1",
+      "openai/gpt-5.1-codex",
+      "openai/gpt-5.1-codex-mini",
+      "openai/gpt-5.1-codex-max",
+      "openai/gpt-5.2-2025-12-11",
+      "openai/gpt-5.2",
+      "openai/gpt-5.2-pro-2025-12-11",
+      "openai/gpt-5.2-pro",
+      "openai/gpt-5.2-chat-latest",
+      "openai/gpt-3.5-turbo-16k",
+      "openai/ft:gpt-3.5-turbo-0613:katanemo::8CMZbm0P"
    ]
  },
  "metadata": {
    "total_providers": 10,
-    "total_models": 205,
-    "last_updated": "2026-01-16T20:30:00.806165+00:00"
+    "total_models": 296,
+    "last_updated": "2026-01-22T01:36:41.296455+00:00"
  }
 }
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@ -6,6 +6,7 @@ use proxy_wasm::traits::*;
 use proxy_wasm::types::*;
 use std::num::NonZero;
 use std::rc::Rc;
+use std::sync::Arc;
 use std::time::{Duration, SystemTime, UNIX_EPOCH};

 use crate::metrics::Metrics;
@ -40,7 +41,7 @@ pub struct StreamContext {
    /// The API that should be used for the upstream provider (after compatibility mapping)
    resolved_api: Option<SupportedUpstreamAPIs>,
    llm_providers: Rc<LlmProviders>,
-    llm_provider: Option<Rc<LlmProvider>>,
+    llm_provider: Option<Arc<LlmProvider>>,
    request_id: Option<String>,
    start_time: SystemTime,
    ttft_duration: Option<Duration>,
--- a/demos/samples_python/currency_exchange/hurl_tests/simple.hurl
+++ b/demos/samples_python/currency_exchange/hurl_tests/simple.hurl
@ -8,7 +8,7 @@ Content-Type: application/json
      "content": "convert 100 eur"
    }
  ],
-  "model": "none"
+  "model": "gpt-4o"
 }
 HTTP 200
 [Asserts]
--- a/demos/samples_python/currency_exchange/hurl_tests/simple_stream.hurl
+++ b/demos/samples_python/currency_exchange/hurl_tests/simple_stream.hurl
@ -9,7 +9,7 @@ Content-Type: application/json
    }
  ],
  "stream": true,
-  "model": "none"
+  "model": "gpt-4o"
 }
 HTTP 200
 [Asserts]
--- a/demos/use_cases/README.md
+++ b/demos/use_cases/README.md
@ -67,7 +67,7 @@ print("OpenAI Response:", response.choices[0].message.content)
 #### Step 3.2: Using curl command
 ```
 $ curl --header 'Content-Type: application/json' \
-  --data '{"messages": [{"role": "user","content": "What is the capital of France?"}], "model": "none"}' \
+  --data '{"messages": [{"role": "user","content": "What is the capital of France?"}], "model": "gpt-4o"}' \
  http://localhost:12000/v1/chat/completions

 {
@ -92,7 +92,7 @@ You can override model selection using `x-arch-llm-provider-hint` header. For ex
 ```
 $ curl --header 'Content-Type: application/json' \
  --header 'x-arch-llm-provider-hint: ministral-3b' \
-  --data '{"messages": [{"role": "user","content": "What is the capital of France?"}], "model": "none"}' \
+  --data '{"messages": [{"role": "user","content": "What is the capital of France?"}], "model": "gpt-4o"}' \
  http://localhost:12000/v1/chat/completions
 {
  ...
--- a/demos/use_cases/llm_routing/README.md
+++ b/demos/use_cases/llm_routing/README.md
@ -19,7 +19,7 @@ You can also pass in a header to override model when sending prompt. Following e

 $ curl --header 'Content-Type: application/json' \
  --header 'x-arch-llm-provider-hint: mistral/ministral-3b' \
-  --data '{"messages": [{"role": "user","content": "hello"}], "model": "none"}' \
+  --data '{"messages": [{"role": "user","content": "hello"}], "model": "gpt-4o"}' \
  http://localhost:12000/v1/chat/completions 2> /dev/null | jq .
 {
  "id": "xxx",
--- a/demos/use_cases/preference_based_routing/hurl_tests/simple_stream.hurl
+++ b/demos/use_cases/preference_based_routing/hurl_tests/simple_stream.hurl
@ -5,10 +5,10 @@ Content-Type: application/json
  "messages": [
    {
      "role": "user",
-      "content": "hi"
+      "content": "Can you explain what this Python function does?\n\ndef fibonacci(n):\n    if n <= 1:\n        return n\n    return fibonacci(n-1) + fibonacci(n-2)"
    }
  ],
-  "model": "none",
+  "model": "openai/gpt-4o-mini",
  "stream": true
 }
 HTTP 200
--- a/demos/use_cases/preference_based_routing/test_router_endpoint.rest
+++ b/demos/use_cases/preference_based_routing/test_router_endpoint.rest
@ -34,7 +34,7 @@ POST http://localhost:12000/v1/chat/completions HTTP/1.1
 Content-Type: application/json

 {
-  "model": "none",
+  "model": "gpt-4o",
  "messages": [
    {
      "role": "user",
@ -49,7 +49,7 @@ POST http://localhost:12000/v1/chat/completions HTTP/1.1
 Content-Type: application/json

 {
-  "model": "none",
+  "model": "gpt-4o",
  "messages": [
    {
      "role": "user",
--- a/demos/use_cases/wildcard_providers/config.yaml
+++ b/demos/use_cases/wildcard_providers/config.yaml
@ -16,10 +16,15 @@ model_providers:
  - model: anthropic/*
    access_key: $ANTHROPIC_API_KEY

+  - model: anthropic/claude-sonnet-4-20250514
+    access_key: $ANTHROPIC_API_KEY
+    routing_preferences:
+      - name: code generation
+        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
+
  - model: xai/*
    access_key: $GROK_API_KEY

-
  # Custom internal LLM provider
  # Note: Requires base_url and provider_interface for unknown providers
  - model: ollama/*
--- a/docs/source/get_started/quickstart.rst
+++ b/docs/source/get_started/quickstart.rst
@ -105,7 +105,7 @@ Step 3.1: Using curl command
 .. code-block:: bash

   $ curl --header 'Content-Type: application/json' \
-     --data '{"messages": [{"role": "user","content": "What is the capital of France?"}], "model": "none"}' \
+     --data '{"messages": [{"role": "user","content": "What is the capital of France?"}], "model": "gpt-4o"}' \
     http://localhost:12000/v1/chat/completions

   {
@ -315,7 +315,7 @@ Here is a sample curl command you can use to interact:
 .. code-block:: bash

   $ curl --header 'Content-Type: application/json' \
-     --data '{"messages": [{"role": "user","content": "what is exchange rate for gbp"}], "model": "none"}' \
+     --data '{"messages": [{"role": "user","content": "what is exchange rate for gbp"}], "model": "gpt-4o"}' \
     http://localhost:10000/v1/chat/completions | jq ".choices[0].message.content"

   "As of the date provided in your context, December 5, 2024, the exchange rate for GBP (British Pound) from USD (United States Dollar) is 0.78558. This means that 1 USD is equivalent to 0.78558 GBP."
@ -325,7 +325,7 @@ And to get the list of supported currencies:
 .. code-block:: bash

   $ curl --header 'Content-Type: application/json' \
-     --data '{"messages": [{"role": "user","content": "show me list of currencies that are supported for conversion"}], "model": "none"}' \
+     --data '{"messages": [{"role": "user","content": "show me list of currencies that are supported for conversion"}], "model": "gpt-4o"}' \
     http://localhost:10000/v1/chat/completions | jq ".choices[0].message.content"

   "Here is a list of the currencies that are supported for conversion from USD, along with their symbols:\n\n1. AUD - Australian Dollar\n2. BGN - Bulgarian Lev\n3. BRL - Brazilian Real\n4. CAD - Canadian Dollar\n5. CHF - Swiss Franc\n6. CNY - Chinese Renminbi Yuan\n7. CZK - Czech Koruna\n8. DKK - Danish Krone\n9. EUR - Euro\n10. GBP - British Pound\n11. HKD - Hong Kong Dollar\n12. HUF - Hungarian Forint\n13. IDR - Indonesian Rupiah\n14. ILS - Israeli New Sheqel\n15. INR - Indian Rupee\n16. ISK - Icelandic Króna\n17. JPY - Japanese Yen\n18. KRW - South Korean Won\n19. MXN - Mexican Peso\n20. MYR - Malaysian Ringgit\n21. NOK - Norwegian Krone\n22. NZD - New Zealand Dollar\n23. PHP - Philippine Peso\n24. PLN - Polish Złoty\n25. RON - Romanian Leu\n26. SEK - Swedish Krona\n27. SGD - Singapore Dollar\n28. THB - Thai Baht\n29. TRY - Turkish Lira\n30. USD - United States Dollar\n31. ZAR - South African Rand\n\nIf you want to convert USD to any of these currencies, you can select the one you are interested in."
--- a/tests/rest/api_llm_gateway.rest
+++ b/tests/rest/api_llm_gateway.rest
@ -107,7 +107,7 @@ Content-Type: application/json

 {
  "stream": true,
-  "model": "None",
+  "model": "gpt-4o",
  "messages": [
    {
      "role": "user",
--- a/tests/rest/api_model_server.rest
+++ b/tests/rest/api_model_server.rest
@ -238,7 +238,7 @@ POST {{model_server_endpoint}}/function_calling HTTP/1.1
 Content-Type: application/json

 {
-  "model": "None",
+  "model": "gpt-4o",
  "messages": [
    {
      "role": "user",
--- a/tests/rest/api_prompt_gateway.rest
+++ b/tests/rest/api_prompt_gateway.rest
@ -82,7 +82,7 @@ POST {{prompt_endpoint}}/v1/chat/completions HTTP/1.1
 Content-Type: application/json

 {
-  "model": "None",
+  "model": "gpt-4o",
  "messages": [
    {
      "role": "user",