diff --git a/config/supervisord.conf b/config/supervisord.conf index 35923974..999e761e 100644 --- a/config/supervisord.conf +++ b/config/supervisord.conf @@ -4,7 +4,7 @@ nodaemon=true [program:brightstaff] command=sh -c "\ envsubst < /app/arch_config_rendered.yaml > /app/arch_config_rendered.env_sub.yaml && \ - RUST_LOG=info \ + RUST_LOG=debug \ ARCH_CONFIG_PATH_RENDERED=/app/arch_config_rendered.env_sub.yaml \ /app/brightstaff 2>&1 | \ tee /var/log/brightstaff.log | \ @@ -19,7 +19,7 @@ command=/bin/sh -c "\ uv run python -m planoai.config_generator && \ envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && \ envoy -c /etc/envoy.env_sub.yaml \ - --component-log-level wasm:info \ + --component-log-level wasm:debug \ --log-format '[%%Y-%%m-%%d %%T.%%e][%%l] %%v' 2>&1 | \ tee /var/log/envoy.log | \ while IFS= read -r line; do echo '[plano_logs]' \"$line\"; done" diff --git a/crates/brightstaff/src/handlers/llm.rs b/crates/brightstaff/src/handlers/llm.rs index bd77f948..e1fe5a93 100644 --- a/crates/brightstaff/src/handlers/llm.rs +++ b/crates/brightstaff/src/handlers/llm.rs @@ -1,8 +1,9 @@ use bytes::Bytes; -use common::configuration::{LlmProvider, ModelAlias}; +use common::configuration::ModelAlias; use common::consts::{ ARCH_IS_STREAMING_HEADER, ARCH_PROVIDER_HINT_HEADER, REQUEST_ID_HEADER, TRACE_PARENT_HEADER, }; +use common::llm_providers::LlmProviders; use common::traces::TraceCollector; use hermesllm::apis::openai_responses::InputParam; use hermesllm::clients::{SupportedAPIsFromClient, SupportedUpstreamAPIs}; @@ -38,7 +39,7 @@ pub async fn llm_chat( router_service: Arc, full_qualified_llm_provider_url: String, model_aliases: Arc>>, - llm_providers: Arc>>, + llm_providers: Arc>, trace_collector: Arc, state_storage: Option>, ) -> Result>, hyper::Error> { @@ -123,6 +124,19 @@ pub async fn llm_chat( let is_streaming_request = client_request.is_streaming(); let resolved_model = resolve_model_alias(&model_from_request, &model_aliases); + // Validate that the requested model exists in configuration + // This matches the validation in llm_gateway routing.rs + if llm_providers.read().await.get(&resolved_model).is_none() { + let err_msg = format!( + "Model '{}' not found in configured providers", + resolved_model + ); + warn!("[PLANO_REQ_ID:{}] | FAILURE | {}", request_id, err_msg); + let mut bad_request = Response::new(full(err_msg)); + *bad_request.status_mut() = StatusCode::BAD_REQUEST; + return Ok(bad_request); + } + // Handle provider/model slug format (e.g., "openai/gpt-4") // Extract just the model name for upstream (providers don't understand the slug) let model_name_only = if let Some((_, model)) = resolved_model.split_once('/') { @@ -250,22 +264,25 @@ pub async fn llm_chat( } }; - // Use the resolved model (could be "gpt-4" or "openai/gpt-4") as the provider hint - // The routing layer will use llm_providers.get() which handles both formats: - // - "gpt-4" → looks up by model name - // - "openai/gpt-4" → looks up by provider/model slug - // If router doesn't find anything, it will use routing_result.model_name - let provider_hint_value = resolved_model.clone(); - let model_name = routing_result.model_name; + // Determine final model to use + // Router returns "none" as a sentinel value when it doesn't select a specific model + let router_selected_model = routing_result.model_name; + let model_name = if router_selected_model != "none" { + // Router selected a specific model via routing preferences + router_selected_model + } else { + // Router returned "none" sentinel, use validated resolved_model from request + resolved_model.clone() + }; debug!( "[PLANO_REQ_ID:{}] | ARCH_ROUTER URL | {}, Provider Hint: {}, Model for upstream: {}", - request_id, full_qualified_llm_provider_url, provider_hint_value, model_name_only + request_id, full_qualified_llm_provider_url, model_name, model_name_only ); request_headers.insert( ARCH_PROVIDER_HINT_HEADER, - header::HeaderValue::from_str(&provider_hint_value).unwrap(), + header::HeaderValue::from_str(&model_name).unwrap(), ); request_headers.insert( @@ -405,7 +422,7 @@ async fn build_llm_span( tool_names: Option>, user_message_preview: Option, temperature: Option, - llm_providers: &Arc>>, + llm_providers: &Arc>, ) -> common::traces::Span { use crate::tracing::{http, llm, OperationNameBuilder}; use common::traces::{parse_traceparent, SpanBuilder, SpanKind}; @@ -478,7 +495,7 @@ async fn build_llm_span( /// Looks up provider configuration, gets the ProviderId and base_url_path_prefix, /// then uses target_endpoint_for_provider to calculate the correct upstream path. async fn get_upstream_path( - llm_providers: &Arc>>, + llm_providers: &Arc>, model_name: &str, request_path: &str, resolved_model: &str, @@ -501,25 +518,21 @@ async fn get_upstream_path( /// Helper function to get provider info (ProviderId and base_url_path_prefix) async fn get_provider_info( - llm_providers: &Arc>>, + llm_providers: &Arc>, model_name: &str, ) -> (hermesllm::ProviderId, Option) { let providers_lock = llm_providers.read().await; - // First, try to find by model name or provider name - let provider = providers_lock.iter().find(|p| { - p.model.as_ref().map(|m| m == model_name).unwrap_or(false) || p.name == model_name - }); - - if let Some(provider) = provider { + // Try to find by model name or provider name using LlmProviders::get + // This handles both "gpt-4" and "openai/gpt-4" formats + if let Some(provider) = providers_lock.get(model_name) { let provider_id = provider.provider_interface.to_provider_id(); let prefix = provider.base_url_path_prefix.clone(); return (provider_id, prefix); } - let default_provider = providers_lock.iter().find(|p| p.default.unwrap_or(false)); - - if let Some(provider) = default_provider { + // Fall back to default provider + if let Some(provider) = providers_lock.default() { let provider_id = provider.provider_interface.to_provider_id(); let prefix = provider.base_url_path_prefix.clone(); (provider_id, prefix) diff --git a/crates/brightstaff/src/handlers/models.rs b/crates/brightstaff/src/handlers/models.rs index ac1bbebe..a29d5e90 100644 --- a/crates/brightstaff/src/handlers/models.rs +++ b/crates/brightstaff/src/handlers/models.rs @@ -1,19 +1,17 @@ use bytes::Bytes; -use common::configuration::{IntoModels, LlmProvider}; -use hermesllm::apis::openai::Models; +use common::llm_providers::LlmProviders; use http_body_util::{combinators::BoxBody, BodyExt, Full}; use hyper::{Response, StatusCode}; use serde_json; use std::sync::Arc; pub async fn list_models( - llm_providers: Arc>>, + llm_providers: Arc>, ) -> Response> { let prov = llm_providers.read().await; - let providers = prov.clone(); - let openai_models: Models = providers.into_models(); + let models = prov.to_models(); - match serde_json::to_string(&openai_models) { + match serde_json::to_string(&models) { Ok(json) => { let body = Full::new(Bytes::from(json)) .map_err(|never| match never {}) diff --git a/crates/brightstaff/src/handlers/router_chat.rs b/crates/brightstaff/src/handlers/router_chat.rs index 701e8e51..c3a517e0 100644 --- a/crates/brightstaff/src/handlers/router_chat.rs +++ b/crates/brightstaff/src/handlers/router_chat.rs @@ -151,16 +151,15 @@ pub async fn router_chat_get_upstream_model( Ok(RoutingResult { model_name }) } None => { - // No route determined, use default model from request + // No route determined, return sentinel value "none" + // This signals to llm.rs to use the original validated request model info!( - "[PLANO_REQ_ID: {}] | ROUTER_REQ | No route determined, using default model from request: {}", - request_id, - chat_request.model + "[PLANO_REQ_ID: {}] | ROUTER_REQ | No route determined, returning sentinel 'none'", + request_id ); - let default_model = chat_request.model.clone(); let mut attrs = HashMap::new(); - attrs.insert("route.selected_model".to_string(), default_model.clone()); + attrs.insert("route.selected_model".to_string(), "none".to_string()); record_routing_span( trace_collector, traceparent, @@ -171,7 +170,7 @@ pub async fn router_chat_get_upstream_model( .await; Ok(RoutingResult { - model_name: default_model, + model_name: "none".to_string(), }) } }, diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index e5933676..b8fa8832 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -13,6 +13,7 @@ use common::configuration::{Agent, Configuration}; use common::consts::{ CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH, PLANO_ORCHESTRATOR_MODEL_NAME, }; +use common::llm_providers::LlmProviders; use common::traces::TraceCollector; use http_body_util::{combinators::BoxBody, BodyExt, Empty}; use hyper::body::Incoming; @@ -76,7 +77,10 @@ async fn main() -> Result<(), Box> { .cloned() .collect(); - let llm_providers = Arc::new(RwLock::new(arch_config.model_providers.clone())); + // Create expanded provider list for /v1/models endpoint + let llm_providers = LlmProviders::try_from(arch_config.model_providers.clone()) + .expect("Failed to create LlmProviders"); + let llm_providers = Arc::new(RwLock::new(llm_providers)); let combined_agents_filters_list = Arc::new(RwLock::new(Some(all_agents))); let listeners = Arc::new(RwLock::new(arch_config.listeners.clone())); let llm_provider_url = diff --git a/crates/common/src/llm_providers.rs b/crates/common/src/llm_providers.rs index 8a26f377..3c9d1d68 100644 --- a/crates/common/src/llm_providers.rs +++ b/crates/common/src/llm_providers.rs @@ -1,27 +1,54 @@ use crate::configuration::LlmProvider; use hermesllm::providers::ProviderId; use std::collections::HashMap; -use std::rc::Rc; +use std::sync::Arc; #[derive(Debug)] pub struct LlmProviders { - providers: HashMap>, - default: Option>, + providers: HashMap>, + default: Option>, /// Wildcard providers: maps provider prefix to base provider config /// e.g., "openai" -> LlmProvider for "openai/*" - wildcard_providers: HashMap>, + wildcard_providers: HashMap>, } impl LlmProviders { - pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Rc> { + pub fn iter(&self) -> std::collections::hash_map::Iter<'_, String, Arc> { self.providers.iter() } - pub fn default(&self) -> Option> { + pub fn default(&self) -> Option> { self.default.clone() } + /// Convert providers to OpenAI Models format for /v1/models endpoint + /// Filters out internal models and duplicate entries (backward compatibility aliases) + pub fn to_models(&self) -> hermesllm::apis::openai::Models { + use hermesllm::apis::openai::{ModelDetail, ModelObject, Models}; - pub fn get(&self, name: &str) -> Option> { + let data: Vec = self + .providers + .iter() + .filter(|(key, provider)| { + // Exclude internal models + provider.internal != Some(true) + // Only include canonical entries (key matches provider name) + // This avoids duplicates from backward compatibility short names + && *key == &provider.name + }) + .map(|(name, provider)| ModelDetail { + id: name.clone(), + object: Some("model".to_string()), + created: 0, + owned_by: provider.to_provider_id().to_string(), + }) + .collect(); + + Models { + object: ModelObject::List, + data, + } + } + pub fn get(&self, name: &str) -> Option> { // First try exact match if let Some(provider) = self.providers.get(name).cloned() { return Some(provider); @@ -47,7 +74,7 @@ impl LlmProviders { // Create a new provider with the specific model from the slug let mut specific_provider = (**wildcard_provider).clone(); specific_provider.model = Some(model_name.to_string()); - return Some(Rc::new(specific_provider)); + return Some(Arc::new(specific_provider)); } } @@ -79,13 +106,40 @@ impl TryFrom> for LlmProviders { wildcard_providers: HashMap::new(), }; + // Track specific (non-wildcard) provider names to detect true duplicates + let mut specific_provider_names = std::collections::HashSet::new(); + + // Track specific models that should be excluded from wildcard expansion + // Maps provider_prefix -> Set of model names (e.g., "anthropic" -> {"claude-sonnet-4-20250514"}) + let mut specific_models_by_provider: HashMap> = + HashMap::new(); + + // First pass: collect all specific model configurations + for llm_provider in &llm_providers_config { + let is_wildcard = llm_provider + .model + .as_ref() + .map(|m| m == "*" || m.ends_with("/*")) + .unwrap_or(false); + + if !is_wildcard { + // Check if this is a provider/model format + if let Some((provider_prefix, model_name)) = llm_provider.name.split_once('/') { + specific_models_by_provider + .entry(provider_prefix.to_string()) + .or_default() + .insert(model_name.to_string()); + } + } + } + for llm_provider in llm_providers_config { - let llm_provider: Rc = Rc::new(llm_provider); + let llm_provider: Arc = Arc::new(llm_provider); if llm_provider.default.unwrap_or_default() { match llm_providers.default { Some(_) => return Err(LlmProvidersNewError::MoreThanOneDefault), - None => llm_providers.default = Some(Rc::clone(&llm_provider)), + None => llm_providers.default = Some(Arc::clone(&llm_provider)), } } @@ -109,20 +163,45 @@ impl TryFrom> for LlmProviders { llm_providers .wildcard_providers - .insert(provider_prefix.to_string(), Rc::clone(&llm_provider)); + .insert(provider_prefix.to_string(), Arc::clone(&llm_provider)); // Try to expand wildcard using ProviderId models if let Ok(provider_id) = ProviderId::try_from(provider_prefix) { let models = provider_id.models(); + + // Get the set of specific models to exclude for this provider + let models_to_exclude = specific_models_by_provider + .get(provider_prefix) + .cloned() + .unwrap_or_default(); + if !models.is_empty() { + let excluded_count = models_to_exclude.len(); + let total_models = models.len(); + log::info!( - "Expanding wildcard provider '{}' to {} models", + "Expanding wildcard provider '{}' to {} models{}", provider_prefix, - models.len() + total_models - excluded_count, + if excluded_count > 0 { + format!(" (excluding {} specifically configured)", excluded_count) + } else { + String::new() + } ); - // Create a provider entry for each model + // Create a provider entry for each model (except those specifically configured) for model_name in models { + // Skip this model if it has a specific configuration + if models_to_exclude.contains(&model_name) { + log::debug!( + "Skipping wildcard expansion for '{}/{}' - specific configuration exists", + provider_prefix, + model_name + ); + continue; + } + let full_model_id = format!("{}/{}", provider_prefix, model_name); // Create a new provider with the specific model @@ -130,12 +209,12 @@ impl TryFrom> for LlmProviders { expanded_provider.model = Some(model_name.clone()); expanded_provider.name = full_model_id.clone(); - let expanded_rc = Rc::new(expanded_provider); + let expanded_rc = Arc::new(expanded_provider); // Insert with full model ID as key llm_providers .providers - .insert(full_model_id.clone(), Rc::clone(&expanded_rc)); + .insert(full_model_id.clone(), Arc::clone(&expanded_rc)); // Also insert with just model name for backward compatibility llm_providers.providers.insert(model_name, expanded_rc); @@ -149,24 +228,26 @@ impl TryFrom> for LlmProviders { ); } } else { - // Non-wildcard provider - original behavior - if llm_providers - .providers - .insert(name.clone(), Rc::clone(&llm_provider)) - .is_some() - { + // Non-wildcard provider - specific configuration + // Check for duplicate specific entries (not allowed) + if specific_provider_names.contains(&name) { return Err(LlmProvidersNewError::DuplicateName(name)); } + specific_provider_names.insert(name.clone()); - // also add model_id as key for provider lookup + // This specific configuration takes precedence over any wildcard expansion + // The wildcard expansion already excluded this model (see first pass above) + + log::debug!("Processing specific provider configuration: {}", name); + + // Insert with the provider name as key + llm_providers + .providers + .insert(name.clone(), Arc::clone(&llm_provider)); + + // Also add model_id as key for provider lookup if let Some(model) = llm_provider.model.clone() { - if llm_providers - .providers - .insert(model, llm_provider) - .is_some() - { - return Err(LlmProvidersNewError::DuplicateName(name)); - } + llm_providers.providers.insert(model, llm_provider); } } } diff --git a/crates/common/src/routing.rs b/crates/common/src/routing.rs index 2007c579..a2b12c46 100644 --- a/crates/common/src/routing.rs +++ b/crates/common/src/routing.rs @@ -1,4 +1,4 @@ -use std::rc::Rc; +use std::sync::Arc; use crate::{configuration, llm_providers::LlmProviders}; use configuration::LlmProvider; @@ -21,7 +21,7 @@ impl From for ProviderHint { pub fn get_llm_provider( llm_providers: &LlmProviders, provider_hint: Option, -) -> Result, String> { +) -> Result, String> { match provider_hint { Some(ProviderHint::Default) => llm_providers .default() @@ -29,11 +29,6 @@ pub fn get_llm_provider( Some(ProviderHint::Name(name)) => llm_providers .get(&name) .ok_or_else(|| format!("Model '{}' not found in configured providers", name)), - None => { - // No hint provided - must have a default configured - llm_providers - .default() - .ok_or_else(|| "No model specified and no default provider configured".to_string()) - } + None => Err("No model specified in request".to_string()), } } diff --git a/crates/hermesllm/src/bin/fetch_models.rs b/crates/hermesllm/src/bin/fetch_models.rs index 73f5e0d9..22769a2a 100644 --- a/crates/hermesllm/src/bin/fetch_models.rs +++ b/crates/hermesllm/src/bin/fetch_models.rs @@ -1,5 +1,9 @@ -// Fetch latest provider models from OpenRouter and update provider_models.json -// Usage: OPENROUTER_API_KEY=xxx cargo run --bin fetch_models +// Fetch latest provider models from canonical provider APIs and update provider_models.json +// Usage: +// Optional: OPENAI_API_KEY, ANTHROPIC_API_KEY, DEEPSEEK_API_KEY, GROK_API_KEY, +// DASHSCOPE_API_KEY, MOONSHOT_API_KEY, ZHIPU_API_KEY, GOOGLE_API_KEY +// Required: AWS CLI configured for Amazon Bedrock models +// cargo run --bin fetch_models use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -15,9 +19,9 @@ fn main() { .nth(1) .unwrap_or_else(|| default_path.to_string_lossy().to_string()); - println!("Fetching latest models from OpenRouter..."); + println!("Fetching latest models from provider APIs..."); - match fetch_openrouter_models() { + match fetch_all_models() { Ok(models) => { let json = serde_json::to_string_pretty(&models).expect("Failed to serialize models"); @@ -30,28 +34,38 @@ fn main() { } Err(e) => { eprintln!("Error fetching models: {}", e); - eprintln!("\nMake sure OPENROUTER_API_KEY is set:"); - eprintln!(" export OPENROUTER_API_KEY=your-key-here"); + eprintln!("\nMake sure required tools are set up:"); + eprintln!(" AWS CLI configured for Bedrock (for Amazon models)"); + eprintln!(" export OPENAI_API_KEY=your-key-here # Optional"); + eprintln!(" export DEEPSEEK_API_KEY=your-key-here # Optional"); eprintln!(" cargo run --bin fetch_models"); std::process::exit(1); } } } +// OpenAI-compatible API response (used by most providers) #[derive(Debug, Deserialize)] -struct OpenRouterModel { +struct OpenAICompatibleModel { id: String, - architecture: Option, } #[derive(Debug, Deserialize)] -struct Architecture { - modality: Option, +struct OpenAICompatibleResponse { + data: Vec, +} + +// Google Gemini API response +#[derive(Debug, Deserialize)] +struct GoogleModel { + name: String, + #[serde(rename = "supportedGenerationMethods")] + supported_generation_methods: Option>, } #[derive(Debug, Deserialize)] -struct OpenRouterResponse { - data: Vec, +struct GoogleResponse { + models: Vec, } #[derive(Debug, Serialize)] @@ -69,94 +83,327 @@ struct Metadata { last_updated: String, } -fn fetch_openrouter_models() -> Result> { - let api_key = std::env::var("OPENROUTER_API_KEY") - .map_err(|_| "OPENROUTER_API_KEY environment variable not set")?; +fn is_text_model(model_id: &str) -> bool { + let id_lower = model_id.to_lowercase(); - let response_body = ureq::get("https://openrouter.ai/api/v1/models") + // Filter out known non-text models + let non_text_patterns = [ + "embedding", // Embedding models + "whisper", // Audio transcription + "-tts", // Text-to-speech (with dash to avoid matching in middle of words) + "tts-", // Text-to-speech prefix + "dall-e", // Image generation + "sora", // Video generation + "moderation", // Moderation models + "babbage", // Legacy completion models + "davinci-002", // Legacy completion models + "transcribe", // Audio transcription models + "realtime", // Realtime audio models + "audio", // Audio models (gpt-audio, gpt-audio-mini) + "-image-", // Image generation models (grok-2-image-1212) + "-ocr-", // OCR models + "ocr-", // OCR models prefix + "voxtral", // Audio/voice models + ]; + + // Additional pattern: models that are purely for image generation usually have "image" in the name + // but we need to be careful not to filter vision models that can process images + // Models like "gpt-image-1" or "chatgpt-image-latest" are image generators + // Models like "grok-2-vision" or "gemini-vision" are vision models (text+image->text) + + if non_text_patterns + .iter() + .any(|pattern| id_lower.contains(pattern)) + { + return false; + } + + // Filter models starting with "gpt-image" (image generators) + if id_lower.contains("/gpt-image") || id_lower.contains("/chatgpt-image") { + return false; + } + + true +} + +fn fetch_openai_compatible_models( + api_url: &str, + api_key: &str, + provider_prefix: &str, +) -> Result, Box> { + let response_body = ureq::get(api_url) .header("Authorization", &format!("Bearer {}", api_key)) .call()? .body_mut() .read_to_string()?; - let openrouter_response: OpenRouterResponse = serde_json::from_str(&response_body)?; + let response: OpenAICompatibleResponse = serde_json::from_str(&response_body)?; - // Supported providers to include - let supported_providers = [ - "openai", - "anthropic", - "mistralai", - "deepseek", - "google", - "x-ai", - "moonshotai", - "qwen", - "amazon", - "z-ai", + Ok(response + .data + .into_iter() + .filter(|m| is_text_model(&m.id)) + .map(|m| format!("{}/{}", provider_prefix, m.id)) + .collect()) +} + +fn fetch_anthropic_models(api_key: &str) -> Result, Box> { + let response_body = ureq::get("https://api.anthropic.com/v1/models") + .header("x-api-key", api_key) + .header("anthropic-version", "2023-06-01") + .call()? + .body_mut() + .read_to_string()?; + + let response: OpenAICompatibleResponse = serde_json::from_str(&response_body)?; + + let dated_models: Vec = response + .data + .into_iter() + .filter(|m| is_text_model(&m.id)) + .map(|m| m.id) + .collect(); + + let mut models: Vec = Vec::new(); + + // Add both dated versions and their aliases (without the -YYYYMMDD suffix) + for model_id in dated_models { + // Add the full dated model ID + models.push(format!("anthropic/{}", model_id)); + + // Generate alias by removing trailing -YYYYMMDD pattern + // Pattern: ends with -YYYYMMDD where YYYY is year, MM is month, DD is day + if let Some(date_pos) = model_id.rfind('-') { + let potential_date = &model_id[date_pos + 1..]; + // Check if it's an 8-digit date (YYYYMMDD) + if potential_date.len() == 8 && potential_date.chars().all(|c| c.is_ascii_digit()) { + let alias = &model_id[..date_pos]; + let alias_full = format!("anthropic/{}", alias); + // Only add if not already present + if !models.contains(&alias_full) { + models.push(alias_full); + } + } + } + } + + Ok(models) +} + +fn fetch_google_models(api_key: &str) -> Result, Box> { + let api_url = format!( + "https://generativelanguage.googleapis.com/v1beta/models?key={}", + api_key + ); + + let response_body = ureq::get(&api_url).call()?.body_mut().read_to_string()?; + + let response: GoogleResponse = serde_json::from_str(&response_body)?; + + // Only include models that support generateContent + Ok(response + .models + .into_iter() + .filter(|m| { + m.supported_generation_methods + .as_ref() + .map_or(false, |methods| { + methods.contains(&"generateContent".to_string()) + }) + }) + .map(|m| { + // Convert "models/gemini-pro" to "google/gemini-pro" + let model_id = m.name.strip_prefix("models/").unwrap_or(&m.name); + format!("google/{}", model_id) + }) + .collect()) +} + +fn fetch_bedrock_amazon_models() -> Result, Box> { + // Use AWS CLI to fetch Amazon models from Bedrock + let output = std::process::Command::new("aws") + .args([ + "bedrock", + "list-foundation-models", + "--by-provider", + "amazon", + "--by-output-modality", + "TEXT", + "--no-cli-pager", + "--output", + "json", + ]) + .output()?; + + if !output.status.success() { + return Err(format!( + "AWS CLI command failed: {}", + String::from_utf8_lossy(&output.stderr) + ) + .into()); + } + + let response_body = String::from_utf8(output.stdout)?; + + #[derive(Debug, Deserialize)] + struct BedrockModelSummary { + #[serde(rename = "modelId")] + model_id: String, + } + + #[derive(Debug, Deserialize)] + struct BedrockResponse { + #[serde(rename = "modelSummaries")] + model_summaries: Vec, + } + + let bedrock_response: BedrockResponse = serde_json::from_str(&response_body)?; + + // Filter out embedding, image generation, and rerank models + let amazon_models: Vec = bedrock_response + .model_summaries + .into_iter() + .filter(|model| { + let id_lower = model.model_id.to_lowercase(); + !id_lower.contains("embed") + && !id_lower.contains("image") + && !id_lower.contains("rerank") + }) + .map(|m| format!("amazon/{}", m.model_id)) + .collect(); + + Ok(amazon_models) +} + +fn fetch_all_models() -> Result> { + let mut providers: HashMap> = HashMap::new(); + let mut errors: Vec = Vec::new(); + + // Configuration: provider name, env var, API URL, prefix for model IDs + let provider_configs = vec![ + ( + "openai", + "OPENAI_API_KEY", + "https://api.openai.com/v1/models", + "openai", + ), + ( + "mistralai", + "MISTRAL_API_KEY", + "https://api.mistral.ai/v1/models", + "mistralai", + ), + ( + "deepseek", + "DEEPSEEK_API_KEY", + "https://api.deepseek.com/v1/models", + "deepseek", + ), + ("x-ai", "GROK_API_KEY", "https://api.x.ai/v1/models", "x-ai"), + ( + "moonshotai", + "MOONSHOT_API_KEY", + "https://api.moonshot.ai/v1/models", + "moonshotai", + ), + ( + "qwen", + "DASHSCOPE_API_KEY", + "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", + "qwen", + ), + ( + "z-ai", + "ZHIPU_API_KEY", + "https://open.bigmodel.cn/api/paas/v4/models", + "z-ai", + ), ]; - let mut providers: HashMap> = HashMap::new(); - let mut total_models = 0; - let mut filtered_modality: Vec<(String, String)> = Vec::new(); - let mut filtered_provider: Vec<(String, Option)> = Vec::new(); + // Fetch from OpenAI-compatible providers + for (provider_name, env_var, api_url, prefix) in provider_configs { + if let Ok(api_key) = std::env::var(env_var) { + match fetch_openai_compatible_models(api_url, &api_key, prefix) { + Ok(models) => { + println!(" ✓ {}: {} models", provider_name, models.len()); + providers.insert(provider_name.to_string(), models); + } + Err(e) => { + let err_msg = format!(" ✗ {}: {}", provider_name, e); + eprintln!("{}", err_msg); + errors.push(err_msg); + } + } + } else { + println!(" ⊘ {}: {} not set (skipped)", provider_name, env_var); + } + } - for model in openrouter_response.data { - let modality = model - .architecture - .as_ref() - .and_then(|arch| arch.modality.clone()); - - // Only include text->text and text+image->text models - if let Some(ref mod_str) = modality { - if mod_str != "text->text" && mod_str != "text" && mod_str != "text+image->text" { - filtered_modality.push((model.id.clone(), mod_str.clone())); - continue; + // Fetch Anthropic models (different authentication) + if let Ok(api_key) = std::env::var("ANTHROPIC_API_KEY") { + match fetch_anthropic_models(&api_key) { + Ok(models) => { + println!(" ✓ anthropic: {} models", models.len()); + providers.insert("anthropic".to_string(), models); + } + Err(e) => { + let err_msg = format!(" ✗ anthropic: {}", e); + eprintln!("{}", err_msg); + errors.push(err_msg); } } + } else { + println!(" ⊘ anthropic: ANTHROPIC_API_KEY not set (skipped)"); + } - // Extract provider from model ID (e.g., "openai/gpt-4" -> "openai") - if let Some(provider_name) = model.id.split('/').next() { - if supported_providers.contains(&provider_name) { - providers - .entry(provider_name.to_string()) - .or_default() - .push(model.id.clone()); - total_models += 1; - } else { - filtered_provider.push((model.id.clone(), modality)); + // Fetch Google models (different API format) + if let Ok(api_key) = std::env::var("GOOGLE_API_KEY") { + match fetch_google_models(&api_key) { + Ok(models) => { + println!(" ✓ google: {} models", models.len()); + providers.insert("google".to_string(), models); + } + Err(e) => { + let err_msg = format!(" ✗ google: {}", e); + eprintln!("{}", err_msg); + errors.push(err_msg); } } + } else { + println!(" ⊘ google: GOOGLE_API_KEY not set (skipped)"); } - println!("✅ Loaded models from {} providers:", providers.len()); - let mut sorted_providers: Vec<_> = providers.iter().collect(); - sorted_providers.sort_by_key(|(name, _)| *name); - for (provider, models) in sorted_providers { - println!(" • {}: {} models", provider, models.len()); - } - - // Group filtered providers to get counts - let mut filtered_by_provider: HashMap = HashMap::new(); - for (model_id, _modality) in &filtered_provider { - if let Some(provider_name) = model_id.split('/').next() { - *filtered_by_provider - .entry(provider_name.to_string()) - .or_insert(0) += 1; + // Fetch Amazon models from AWS Bedrock + match fetch_bedrock_amazon_models() { + Ok(models) => { + println!(" ✓ amazon: {} models (via AWS Bedrock)", models.len()); + providers.insert("amazon".to_string(), models); + } + Err(e) => { + let err_msg = format!(" ✗ amazon: {} (AWS Bedrock required)", e); + eprintln!("{}", err_msg); + errors.push(err_msg); } } - println!( - "\n⏭️ Skipped {} providers ({} models total)", - filtered_by_provider.len(), - filtered_provider.len() - ); - println!(); + if providers.is_empty() { + return Err("No models fetched from any provider. Check API keys.".into()); + } let total_providers = providers.len(); + let total_models: usize = providers.values().map(|v| v.len()).sum(); + + println!( + "\n✅ Successfully fetched models from {} providers", + total_providers + ); + if !errors.is_empty() { + println!("⚠️ {} providers failed", errors.len()); + } Ok(ProviderModels { version: "1.0".to_string(), - source: "openrouter".to_string(), + source: "canonical-apis".to_string(), providers, metadata: Metadata { total_providers, diff --git a/crates/hermesllm/src/bin/provider_models.json b/crates/hermesllm/src/bin/provider_models.json index 509293bf..6ab1365a 100644 --- a/crates/hermesllm/src/bin/provider_models.json +++ b/crates/hermesllm/src/bin/provider_models.json @@ -1,236 +1,327 @@ { "version": "1.0", - "source": "openrouter", + "source": "canonical-apis", "providers": { - "openai": [ - "openai/gpt-5.2-codex", - "openai/gpt-5.2-chat", - "openai/gpt-5.2-pro", - "openai/gpt-5.2", - "openai/gpt-5.1-codex-max", - "openai/gpt-5.1", - "openai/gpt-5.1-chat", - "openai/gpt-5.1-codex", - "openai/gpt-5.1-codex-mini", - "openai/gpt-oss-safeguard-20b", - "openai/o3-deep-research", - "openai/o4-mini-deep-research", - "openai/gpt-5-pro", - "openai/gpt-5-codex", - "openai/gpt-4o-audio-preview", - "openai/gpt-5-chat", - "openai/gpt-5", - "openai/gpt-5-mini", - "openai/gpt-5-nano", - "openai/gpt-oss-120b:free", - "openai/gpt-oss-120b", - "openai/gpt-oss-120b:exacto", - "openai/gpt-oss-20b:free", - "openai/gpt-oss-20b", - "openai/o3-pro", - "openai/o4-mini-high", - "openai/o3", - "openai/o4-mini", - "openai/gpt-4.1", - "openai/gpt-4.1-mini", - "openai/gpt-4.1-nano", - "openai/o1-pro", - "openai/gpt-4o-mini-search-preview", - "openai/gpt-4o-search-preview", - "openai/o3-mini-high", - "openai/o3-mini", - "openai/o1", - "openai/gpt-4o-2024-11-20", - "openai/chatgpt-4o-latest", - "openai/gpt-4o-2024-08-06", - "openai/gpt-4o-mini-2024-07-18", - "openai/gpt-4o-mini", - "openai/gpt-4o-2024-05-13", - "openai/gpt-4o", - "openai/gpt-4o:extended", - "openai/gpt-4-turbo", - "openai/gpt-3.5-turbo-0613", - "openai/gpt-4-turbo-preview", - "openai/gpt-4-1106-preview", - "openai/gpt-3.5-turbo-instruct", - "openai/gpt-3.5-turbo-16k", - "openai/gpt-4-0314", - "openai/gpt-4", - "openai/gpt-3.5-turbo" - ], - "mistralai": [ - "mistralai/mistral-small-creative", - "mistralai/devstral-2512:free", - "mistralai/devstral-2512", - "mistralai/ministral-14b-2512", - "mistralai/ministral-8b-2512", - "mistralai/ministral-3b-2512", - "mistralai/mistral-large-2512", - "mistralai/voxtral-small-24b-2507", - "mistralai/mistral-medium-3.1", - "mistralai/codestral-2508", - "mistralai/devstral-medium", - "mistralai/devstral-small", - "mistralai/mistral-small-3.2-24b-instruct", - "mistralai/mistral-medium-3", - "mistralai/mistral-small-3.1-24b-instruct:free", - "mistralai/mistral-small-3.1-24b-instruct", - "mistralai/mistral-saba", - "mistralai/mistral-small-24b-instruct-2501", - "mistralai/mistral-large-2411", - "mistralai/mistral-large-2407", - "mistralai/pixtral-large-2411", - "mistralai/ministral-8b", - "mistralai/ministral-3b", - "mistralai/pixtral-12b", - "mistralai/mistral-nemo", - "mistralai/mistral-7b-instruct", - "mistralai/mistral-7b-instruct-v0.3", - "mistralai/mixtral-8x22b-instruct", - "mistralai/mistral-large", - "mistralai/mistral-tiny", - "mistralai/mistral-7b-instruct-v0.2", - "mistralai/mixtral-8x7b-instruct", - "mistralai/mistral-7b-instruct-v0.1" - ], - "qwen": [ - "qwen/qwen3-vl-32b-instruct", - "qwen/qwen3-vl-8b-thinking", - "qwen/qwen3-vl-8b-instruct", - "qwen/qwen3-vl-30b-a3b-thinking", - "qwen/qwen3-vl-30b-a3b-instruct", - "qwen/qwen3-vl-235b-a22b-thinking", - "qwen/qwen3-vl-235b-a22b-instruct", - "qwen/qwen3-max", - "qwen/qwen3-coder-plus", - "qwen/qwen3-coder-flash", - "qwen/qwen3-next-80b-a3b-thinking", - "qwen/qwen3-next-80b-a3b-instruct:free", - "qwen/qwen3-next-80b-a3b-instruct", - "qwen/qwen-plus-2025-07-28", - "qwen/qwen-plus-2025-07-28:thinking", - "qwen/qwen3-30b-a3b-thinking-2507", - "qwen/qwen3-coder-30b-a3b-instruct", - "qwen/qwen3-30b-a3b-instruct-2507", - "qwen/qwen3-235b-a22b-thinking-2507", - "qwen/qwen3-coder:free", - "qwen/qwen3-coder", - "qwen/qwen3-coder:exacto", - "qwen/qwen3-235b-a22b-2507", - "qwen/qwen3-4b:free", - "qwen/qwen3-30b-a3b", - "qwen/qwen3-8b", - "qwen/qwen3-14b", - "qwen/qwen3-32b", - "qwen/qwen3-235b-a22b", - "qwen/qwen2.5-coder-7b-instruct", - "qwen/qwen2.5-vl-32b-instruct", - "qwen/qwq-32b", - "qwen/qwen-vl-plus", - "qwen/qwen-vl-max", - "qwen/qwen-turbo", - "qwen/qwen2.5-vl-72b-instruct", - "qwen/qwen-plus", - "qwen/qwen-max", - "qwen/qwen-2.5-coder-32b-instruct", - "qwen/qwen-2.5-7b-instruct", - "qwen/qwen-2.5-72b-instruct", - "qwen/qwen-2.5-vl-7b-instruct:free", - "qwen/qwen-2.5-vl-7b-instruct" - ], - "z-ai": [ - "z-ai/glm-4.7", - "z-ai/glm-4.6v", - "z-ai/glm-4.6", - "z-ai/glm-4.6:exacto", - "z-ai/glm-4.5v", - "z-ai/glm-4.5", - "z-ai/glm-4.5-air:free", - "z-ai/glm-4.5-air", - "z-ai/glm-4-32b" - ], - "moonshotai": [ - "moonshotai/kimi-k2-thinking", - "moonshotai/kimi-k2-0905", - "moonshotai/kimi-k2-0905:exacto", - "moonshotai/kimi-k2:free", - "moonshotai/kimi-k2", - "moonshotai/kimi-dev-72b" - ], "anthropic": [ - "anthropic/claude-opus-4.5", - "anthropic/claude-haiku-4.5", - "anthropic/claude-sonnet-4.5", - "anthropic/claude-opus-4.1", + "anthropic/claude-opus-4-5-20251101", + "anthropic/claude-opus-4-5", + "anthropic/claude-haiku-4-5-20251001", + "anthropic/claude-haiku-4-5", + "anthropic/claude-sonnet-4-5-20250929", + "anthropic/claude-sonnet-4-5", + "anthropic/claude-opus-4-1-20250805", + "anthropic/claude-opus-4-1", + "anthropic/claude-opus-4-20250514", "anthropic/claude-opus-4", + "anthropic/claude-sonnet-4-20250514", "anthropic/claude-sonnet-4", - "anthropic/claude-3.7-sonnet:thinking", - "anthropic/claude-3.7-sonnet", - "anthropic/claude-3.5-haiku", - "anthropic/claude-3.5-sonnet", + "anthropic/claude-3-7-sonnet-20250219", + "anthropic/claude-3-7-sonnet", + "anthropic/claude-3-5-haiku-20241022", + "anthropic/claude-3-5-haiku", + "anthropic/claude-3-haiku-20240307", "anthropic/claude-3-haiku" ], - "google": [ - "google/gemini-3-flash-preview", - "google/gemini-3-pro-preview", - "google/gemini-2.5-flash-preview-09-2025", - "google/gemini-2.5-flash-lite-preview-09-2025", - "google/gemini-2.5-flash-lite", - "google/gemma-3n-e2b-it:free", - "google/gemini-2.5-flash", - "google/gemini-2.5-pro", - "google/gemini-2.5-pro-preview", - "google/gemma-3n-e4b-it:free", - "google/gemma-3n-e4b-it", - "google/gemini-2.5-pro-preview-05-06", - "google/gemma-3-4b-it:free", - "google/gemma-3-4b-it", - "google/gemma-3-12b-it:free", - "google/gemma-3-12b-it", - "google/gemma-3-27b-it:free", - "google/gemma-3-27b-it", - "google/gemini-2.0-flash-lite-001", - "google/gemini-2.0-flash-001", - "google/gemini-2.0-flash-exp:free", - "google/gemma-2-27b-it", - "google/gemma-2-9b-it" + "qwen": [ + "qwen/qwen-plus-character", + "qwen/qwen-flash-character", + "qwen/qwen-flash", + "qwen/qwen3-vl-plus-2025-12-19", + "qwen/qwen3-omni-flash-2025-12-01", + "qwen/qwen3-livetranslate-flash-2025-12-01", + "qwen/qwen3-livetranslate-flash", + "qwen/qwen-mt-lite", + "qwen/qwen-plus-2025-12-01", + "qwen/qwen-mt-flash", + "qwen/ccai-pro", + "qwen/tongyi-tingwu-slp", + "qwen/qwen3-vl-flash", + "qwen/qwen3-vl-flash-2025-10-15", + "qwen/qwen3-omni-flash", + "qwen/qwen3-omni-flash-2025-09-15", + "qwen/qwen3-omni-30b-a3b-captioner", + "qwen/qwen2.5-7b-instruct", + "qwen/qwen2.5-14b-instruct", + "qwen/qwen2.5-32b-instruct", + "qwen/qwen2.5-72b-instruct", + "qwen/qwen2.5-14b-instruct-1m", + "qwen/qwen2.5-7b-instruct-1m", + "qwen/qwen-max-2025-01-25", + "qwen/qwen-max-latest", + "qwen/qwen-turbo-2024-11-01", + "qwen/qwen-turbo-latest", + "qwen/qwen-plus-latest", + "qwen/qwen-plus-2025-01-25", + "qwen/qwq-plus-2025-03-05", + "qwen/qwen-mt-turbo", + "qwen/qwen-mt-plus", + "qwen/qwen-coder-plus", + "qwen/qwq-plus", + "qwen/qwen2.5-vl-32b-instruct", + "qwen/qvq-max", + "qwen/qwen-omni-turbo", + "qwen/qwen3-8b", + "qwen/qwen3-30b-a3b", + "qwen/qwen3-235b-a22b", + "qwen/qwen-turbo-2025-04-28", + "qwen/qwen-plus-2025-04-28", + "qwen/qwen-vl-max-2025-04-08", + "qwen/qwen-vl-plus-2025-01-25", + "qwen/qwen-vl-plus-latest", + "qwen/qwen-vl-max-latest", + "qwen/qwen-vl-plus-2025-05-07", + "qwen/qwen3-coder-plus", + "qwen/qwen3-coder-480b-a35b-instruct", + "qwen/qwen3-235b-a22b-instruct-2507", + "qwen/qwen-plus-2025-07-14", + "qwen/qwen3-coder-plus-2025-07-22", + "qwen/qwen3-235b-a22b-thinking-2507", + "qwen/qwen3-coder-flash", + "qwen/qwen-vl-max", + "qwen/qwen-vl-max-2025-08-13", + "qwen/qwen3-max", + "qwen/qwen3-max-2025-09-23", + "qwen/qwen3-vl-plus", + "qwen/qwen3-vl-235b-a22b-instruct", + "qwen/qwen3-vl-235b-a22b-thinking", + "qwen/qwen3-30b-a3b-thinking-2507", + "qwen/qwen3-30b-a3b-instruct-2507", + "qwen/qwen3-14b", + "qwen/qwen3-32b", + "qwen/qwen3-0.6b", + "qwen/qwen3-4b", + "qwen/qwen3-1.7b", + "qwen/qwen-vl-plus", + "qwen/qwen3-coder-plus-2025-09-23", + "qwen/qwen3-vl-plus-2025-09-23", + "qwen/qwen-plus-2025-09-11", + "qwen/qwen3-next-80b-a3b-thinking", + "qwen/qwen3-next-80b-a3b-instruct", + "qwen/qwen3-max-preview", + "qwen/qwen2-7b-instruct", + "qwen/qwen-max", + "qwen/qwen-plus", + "qwen/qwen-turbo" ], - "amazon": [ - "amazon/nova-2-lite-v1", - "amazon/nova-premier-v1", - "amazon/nova-lite-v1", - "amazon/nova-micro-v1", - "amazon/nova-pro-v1" + "moonshotai": [ + "moonshotai/kimi-latest", + "moonshotai/moonshot-v1-auto", + "moonshotai/kimi-k2-turbo-preview", + "moonshotai/moonshot-v1-32k-vision-preview", + "moonshotai/moonshot-v1-8k", + "moonshotai/kimi-k2-thinking", + "moonshotai/moonshot-v1-32k", + "moonshotai/moonshot-v1-128k", + "moonshotai/kimi-k2-thinking-turbo", + "moonshotai/moonshot-v1-128k-vision-preview", + "moonshotai/moonshot-v1-8k-vision-preview", + "moonshotai/kimi-k2-0711-preview", + "moonshotai/kimi-k2-0905-preview" ], "deepseek": [ - "deepseek/deepseek-v3.2-speciale", - "deepseek/deepseek-v3.2", - "deepseek/deepseek-v3.2-exp", - "deepseek/deepseek-v3.1-terminus:exacto", - "deepseek/deepseek-v3.1-terminus", - "deepseek/deepseek-chat-v3.1", - "deepseek/deepseek-r1-0528:free", - "deepseek/deepseek-r1-0528", - "deepseek/deepseek-chat-v3-0324", - "deepseek/deepseek-r1-distill-qwen-32b", - "deepseek/deepseek-r1-distill-llama-70b", - "deepseek/deepseek-r1", - "deepseek/deepseek-chat" + "deepseek/deepseek-chat", + "deepseek/deepseek-reasoner" ], "x-ai": [ - "x-ai/grok-4.1-fast", - "x-ai/grok-4-fast", - "x-ai/grok-code-fast-1", - "x-ai/grok-4", - "x-ai/grok-3-mini", + "x-ai/grok-2-vision-1212", "x-ai/grok-3", - "x-ai/grok-3-mini-beta", - "x-ai/grok-3-beta" + "x-ai/grok-3-mini", + "x-ai/grok-4-0709", + "x-ai/grok-4-1-fast-non-reasoning", + "x-ai/grok-4-1-fast-reasoning", + "x-ai/grok-4-fast-non-reasoning", + "x-ai/grok-4-fast-reasoning", + "x-ai/grok-code-fast-1" + ], + "z-ai": [ + "z-ai/glm-4.5", + "z-ai/glm-4.5-air", + "z-ai/glm-4.6", + "z-ai/glm-4.7" + ], + "google": [ + "google/gemini-2.5-flash", + "google/gemini-2.5-pro", + "google/gemini-2.0-flash-exp", + "google/gemini-2.0-flash", + "google/gemini-2.0-flash-001", + "google/gemini-2.0-flash-exp-image-generation", + "google/gemini-2.0-flash-lite-001", + "google/gemini-2.0-flash-lite", + "google/gemini-2.0-flash-lite-preview-02-05", + "google/gemini-2.0-flash-lite-preview", + "google/gemini-exp-1206", + "google/gemini-2.5-flash-preview-tts", + "google/gemini-2.5-pro-preview-tts", + "google/gemma-3-1b-it", + "google/gemma-3-4b-it", + "google/gemma-3-12b-it", + "google/gemma-3-27b-it", + "google/gemma-3n-e4b-it", + "google/gemma-3n-e2b-it", + "google/gemini-flash-latest", + "google/gemini-flash-lite-latest", + "google/gemini-pro-latest", + "google/gemini-2.5-flash-lite", + "google/gemini-2.5-flash-image", + "google/gemini-2.5-flash-preview-09-2025", + "google/gemini-2.5-flash-lite-preview-09-2025", + "google/gemini-3-pro-preview", + "google/gemini-3-flash-preview", + "google/gemini-3-pro-image-preview", + "google/nano-banana-pro-preview", + "google/gemini-robotics-er-1.5-preview", + "google/gemini-2.5-computer-use-preview-10-2025", + "google/deep-research-pro-preview-12-2025" + ], + "mistralai": [ + "mistralai/mistral-medium-2505", + "mistralai/mistral-medium-2508", + "mistralai/mistral-medium-latest", + "mistralai/mistral-medium", + "mistralai/open-mistral-nemo", + "mistralai/open-mistral-nemo-2407", + "mistralai/mistral-tiny-2407", + "mistralai/mistral-tiny-latest", + "mistralai/mistral-large-2411", + "mistralai/pixtral-large-2411", + "mistralai/pixtral-large-latest", + "mistralai/mistral-large-pixtral-2411", + "mistralai/codestral-2508", + "mistralai/codestral-latest", + "mistralai/devstral-small-2507", + "mistralai/devstral-medium-2507", + "mistralai/devstral-2512", + "mistralai/mistral-vibe-cli-latest", + "mistralai/devstral-medium-latest", + "mistralai/devstral-latest", + "mistralai/labs-devstral-small-2512", + "mistralai/devstral-small-latest", + "mistralai/mistral-small-2506", + "mistralai/mistral-small-latest", + "mistralai/labs-mistral-small-creative", + "mistralai/magistral-medium-2509", + "mistralai/magistral-medium-latest", + "mistralai/magistral-small-2509", + "mistralai/magistral-small-latest", + "mistralai/mistral-large-2512", + "mistralai/mistral-large-latest", + "mistralai/ministral-3b-2512", + "mistralai/ministral-3b-latest", + "mistralai/ministral-8b-2512", + "mistralai/ministral-8b-latest", + "mistralai/ministral-14b-2512", + "mistralai/ministral-14b-latest", + "mistralai/open-mistral-7b", + "mistralai/mistral-tiny", + "mistralai/mistral-tiny-2312", + "mistralai/pixtral-12b-2409", + "mistralai/pixtral-12b", + "mistralai/pixtral-12b-latest", + "mistralai/ministral-3b-2410", + "mistralai/ministral-8b-2410", + "mistralai/codestral-2501", + "mistralai/codestral-2412", + "mistralai/codestral-2411-rc5", + "mistralai/mistral-small-2501", + "mistralai/mistral-embed-2312", + "mistralai/mistral-embed", + "mistralai/codestral-embed", + "mistralai/codestral-embed-2505" + ], + "amazon": [ + "amazon/amazon.nova-pro-v1:0", + "amazon/amazon.nova-2-lite-v1:0", + "amazon/amazon.nova-2-sonic-v1:0", + "amazon/amazon.titan-tg1-large", + "amazon/amazon.nova-premier-v1:0:8k", + "amazon/amazon.nova-premier-v1:0:20k", + "amazon/amazon.nova-premier-v1:0:1000k", + "amazon/amazon.nova-premier-v1:0:mm", + "amazon/amazon.nova-premier-v1:0", + "amazon/amazon.nova-lite-v1:0", + "amazon/amazon.nova-micro-v1:0" + ], + "openai": [ + "openai/gpt-4-0613", + "openai/gpt-4", + "openai/gpt-3.5-turbo", + "openai/gpt-5.2-codex", + "openai/gpt-3.5-turbo-instruct", + "openai/gpt-3.5-turbo-instruct-0914", + "openai/gpt-4-1106-preview", + "openai/gpt-3.5-turbo-1106", + "openai/gpt-4-0125-preview", + "openai/gpt-4-turbo-preview", + "openai/gpt-3.5-turbo-0125", + "openai/gpt-4-turbo", + "openai/gpt-4-turbo-2024-04-09", + "openai/gpt-4o", + "openai/gpt-4o-2024-05-13", + "openai/gpt-4o-mini-2024-07-18", + "openai/gpt-4o-mini", + "openai/gpt-4o-2024-08-06", + "openai/chatgpt-4o-latest", + "openai/o1-2024-12-17", + "openai/o1", + "openai/computer-use-preview", + "openai/o3-mini", + "openai/o3-mini-2025-01-31", + "openai/gpt-4o-2024-11-20", + "openai/computer-use-preview-2025-03-11", + "openai/gpt-4o-search-preview-2025-03-11", + "openai/gpt-4o-search-preview", + "openai/gpt-4o-mini-search-preview-2025-03-11", + "openai/gpt-4o-mini-search-preview", + "openai/o1-pro-2025-03-19", + "openai/o1-pro", + "openai/o3-2025-04-16", + "openai/o4-mini-2025-04-16", + "openai/o3", + "openai/o4-mini", + "openai/gpt-4.1-2025-04-14", + "openai/gpt-4.1", + "openai/gpt-4.1-mini-2025-04-14", + "openai/gpt-4.1-mini", + "openai/gpt-4.1-nano-2025-04-14", + "openai/gpt-4.1-nano", + "openai/codex-mini-latest", + "openai/o3-pro", + "openai/o3-pro-2025-06-10", + "openai/o4-mini-deep-research", + "openai/o3-deep-research", + "openai/o3-deep-research-2025-06-26", + "openai/o4-mini-deep-research-2025-06-26", + "openai/gpt-5-chat-latest", + "openai/gpt-5-2025-08-07", + "openai/gpt-5", + "openai/gpt-5-mini-2025-08-07", + "openai/gpt-5-mini", + "openai/gpt-5-nano-2025-08-07", + "openai/gpt-5-nano", + "openai/gpt-5-codex", + "openai/gpt-5-pro-2025-10-06", + "openai/gpt-5-pro", + "openai/gpt-5-search-api", + "openai/gpt-5-search-api-2025-10-14", + "openai/gpt-5.1-chat-latest", + "openai/gpt-5.1-2025-11-13", + "openai/gpt-5.1", + "openai/gpt-5.1-codex", + "openai/gpt-5.1-codex-mini", + "openai/gpt-5.1-codex-max", + "openai/gpt-5.2-2025-12-11", + "openai/gpt-5.2", + "openai/gpt-5.2-pro-2025-12-11", + "openai/gpt-5.2-pro", + "openai/gpt-5.2-chat-latest", + "openai/gpt-3.5-turbo-16k", + "openai/ft:gpt-3.5-turbo-0613:katanemo::8CMZbm0P" ] }, "metadata": { "total_providers": 10, - "total_models": 205, - "last_updated": "2026-01-16T20:30:00.806165+00:00" + "total_models": 296, + "last_updated": "2026-01-22T01:36:41.296455+00:00" } } diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 9992cd6e..6851ab0e 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -6,6 +6,7 @@ use proxy_wasm::traits::*; use proxy_wasm::types::*; use std::num::NonZero; use std::rc::Rc; +use std::sync::Arc; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use crate::metrics::Metrics; @@ -40,7 +41,7 @@ pub struct StreamContext { /// The API that should be used for the upstream provider (after compatibility mapping) resolved_api: Option, llm_providers: Rc, - llm_provider: Option>, + llm_provider: Option>, request_id: Option, start_time: SystemTime, ttft_duration: Option, diff --git a/demos/samples_python/currency_exchange/hurl_tests/simple.hurl b/demos/samples_python/currency_exchange/hurl_tests/simple.hurl index cc66863d..504adcdf 100644 --- a/demos/samples_python/currency_exchange/hurl_tests/simple.hurl +++ b/demos/samples_python/currency_exchange/hurl_tests/simple.hurl @@ -8,7 +8,7 @@ Content-Type: application/json "content": "convert 100 eur" } ], - "model": "none" + "model": "gpt-4o" } HTTP 200 [Asserts] diff --git a/demos/samples_python/currency_exchange/hurl_tests/simple_stream.hurl b/demos/samples_python/currency_exchange/hurl_tests/simple_stream.hurl index 4a8bbe4f..78fccc45 100644 --- a/demos/samples_python/currency_exchange/hurl_tests/simple_stream.hurl +++ b/demos/samples_python/currency_exchange/hurl_tests/simple_stream.hurl @@ -9,7 +9,7 @@ Content-Type: application/json } ], "stream": true, - "model": "none" + "model": "gpt-4o" } HTTP 200 [Asserts] diff --git a/demos/use_cases/README.md b/demos/use_cases/README.md index 348960c0..23a4bc38 100644 --- a/demos/use_cases/README.md +++ b/demos/use_cases/README.md @@ -67,7 +67,7 @@ print("OpenAI Response:", response.choices[0].message.content) #### Step 3.2: Using curl command ``` $ curl --header 'Content-Type: application/json' \ - --data '{"messages": [{"role": "user","content": "What is the capital of France?"}], "model": "none"}' \ + --data '{"messages": [{"role": "user","content": "What is the capital of France?"}], "model": "gpt-4o"}' \ http://localhost:12000/v1/chat/completions { @@ -92,7 +92,7 @@ You can override model selection using `x-arch-llm-provider-hint` header. For ex ``` $ curl --header 'Content-Type: application/json' \ --header 'x-arch-llm-provider-hint: ministral-3b' \ - --data '{"messages": [{"role": "user","content": "What is the capital of France?"}], "model": "none"}' \ + --data '{"messages": [{"role": "user","content": "What is the capital of France?"}], "model": "gpt-4o"}' \ http://localhost:12000/v1/chat/completions { ... diff --git a/demos/use_cases/llm_routing/README.md b/demos/use_cases/llm_routing/README.md index 45c7c5a5..590bf027 100644 --- a/demos/use_cases/llm_routing/README.md +++ b/demos/use_cases/llm_routing/README.md @@ -19,7 +19,7 @@ You can also pass in a header to override model when sending prompt. Following e $ curl --header 'Content-Type: application/json' \ --header 'x-arch-llm-provider-hint: mistral/ministral-3b' \ - --data '{"messages": [{"role": "user","content": "hello"}], "model": "none"}' \ + --data '{"messages": [{"role": "user","content": "hello"}], "model": "gpt-4o"}' \ http://localhost:12000/v1/chat/completions 2> /dev/null | jq . { "id": "xxx", diff --git a/demos/use_cases/preference_based_routing/hurl_tests/simple_stream.hurl b/demos/use_cases/preference_based_routing/hurl_tests/simple_stream.hurl index 16c68c72..579935ed 100644 --- a/demos/use_cases/preference_based_routing/hurl_tests/simple_stream.hurl +++ b/demos/use_cases/preference_based_routing/hurl_tests/simple_stream.hurl @@ -5,10 +5,10 @@ Content-Type: application/json "messages": [ { "role": "user", - "content": "hi" + "content": "Can you explain what this Python function does?\n\ndef fibonacci(n):\n if n <= 1:\n return n\n return fibonacci(n-1) + fibonacci(n-2)" } ], - "model": "none", + "model": "openai/gpt-4o-mini", "stream": true } HTTP 200 diff --git a/demos/use_cases/preference_based_routing/test_router_endpoint.rest b/demos/use_cases/preference_based_routing/test_router_endpoint.rest index b915b9c6..72686a70 100644 --- a/demos/use_cases/preference_based_routing/test_router_endpoint.rest +++ b/demos/use_cases/preference_based_routing/test_router_endpoint.rest @@ -34,7 +34,7 @@ POST http://localhost:12000/v1/chat/completions HTTP/1.1 Content-Type: application/json { - "model": "none", + "model": "gpt-4o", "messages": [ { "role": "user", @@ -49,7 +49,7 @@ POST http://localhost:12000/v1/chat/completions HTTP/1.1 Content-Type: application/json { - "model": "none", + "model": "gpt-4o", "messages": [ { "role": "user", diff --git a/demos/use_cases/wildcard_providers/config.yaml b/demos/use_cases/wildcard_providers/config.yaml index 8ddd0469..eef9807a 100644 --- a/demos/use_cases/wildcard_providers/config.yaml +++ b/demos/use_cases/wildcard_providers/config.yaml @@ -16,10 +16,15 @@ model_providers: - model: anthropic/* access_key: $ANTHROPIC_API_KEY + - model: anthropic/claude-sonnet-4-20250514 + access_key: $ANTHROPIC_API_KEY + routing_preferences: + - name: code generation + description: generating new code snippets, functions, or boilerplate based on user prompts or requirements + - model: xai/* access_key: $GROK_API_KEY - # Custom internal LLM provider # Note: Requires base_url and provider_interface for unknown providers - model: ollama/* diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst index bbed4f46..73fb6e1e 100644 --- a/docs/source/get_started/quickstart.rst +++ b/docs/source/get_started/quickstart.rst @@ -105,7 +105,7 @@ Step 3.1: Using curl command .. code-block:: bash $ curl --header 'Content-Type: application/json' \ - --data '{"messages": [{"role": "user","content": "What is the capital of France?"}], "model": "none"}' \ + --data '{"messages": [{"role": "user","content": "What is the capital of France?"}], "model": "gpt-4o"}' \ http://localhost:12000/v1/chat/completions { @@ -315,7 +315,7 @@ Here is a sample curl command you can use to interact: .. code-block:: bash $ curl --header 'Content-Type: application/json' \ - --data '{"messages": [{"role": "user","content": "what is exchange rate for gbp"}], "model": "none"}' \ + --data '{"messages": [{"role": "user","content": "what is exchange rate for gbp"}], "model": "gpt-4o"}' \ http://localhost:10000/v1/chat/completions | jq ".choices[0].message.content" "As of the date provided in your context, December 5, 2024, the exchange rate for GBP (British Pound) from USD (United States Dollar) is 0.78558. This means that 1 USD is equivalent to 0.78558 GBP." @@ -325,7 +325,7 @@ And to get the list of supported currencies: .. code-block:: bash $ curl --header 'Content-Type: application/json' \ - --data '{"messages": [{"role": "user","content": "show me list of currencies that are supported for conversion"}], "model": "none"}' \ + --data '{"messages": [{"role": "user","content": "show me list of currencies that are supported for conversion"}], "model": "gpt-4o"}' \ http://localhost:10000/v1/chat/completions | jq ".choices[0].message.content" "Here is a list of the currencies that are supported for conversion from USD, along with their symbols:\n\n1. AUD - Australian Dollar\n2. BGN - Bulgarian Lev\n3. BRL - Brazilian Real\n4. CAD - Canadian Dollar\n5. CHF - Swiss Franc\n6. CNY - Chinese Renminbi Yuan\n7. CZK - Czech Koruna\n8. DKK - Danish Krone\n9. EUR - Euro\n10. GBP - British Pound\n11. HKD - Hong Kong Dollar\n12. HUF - Hungarian Forint\n13. IDR - Indonesian Rupiah\n14. ILS - Israeli New Sheqel\n15. INR - Indian Rupee\n16. ISK - Icelandic Króna\n17. JPY - Japanese Yen\n18. KRW - South Korean Won\n19. MXN - Mexican Peso\n20. MYR - Malaysian Ringgit\n21. NOK - Norwegian Krone\n22. NZD - New Zealand Dollar\n23. PHP - Philippine Peso\n24. PLN - Polish Złoty\n25. RON - Romanian Leu\n26. SEK - Swedish Krona\n27. SGD - Singapore Dollar\n28. THB - Thai Baht\n29. TRY - Turkish Lira\n30. USD - United States Dollar\n31. ZAR - South African Rand\n\nIf you want to convert USD to any of these currencies, you can select the one you are interested in." diff --git a/tests/rest/api_llm_gateway.rest b/tests/rest/api_llm_gateway.rest index 5549ccac..b8a3deed 100644 --- a/tests/rest/api_llm_gateway.rest +++ b/tests/rest/api_llm_gateway.rest @@ -107,7 +107,7 @@ Content-Type: application/json { "stream": true, - "model": "None", + "model": "gpt-4o", "messages": [ { "role": "user", diff --git a/tests/rest/api_model_server.rest b/tests/rest/api_model_server.rest index 9c094c19..3c58c657 100644 --- a/tests/rest/api_model_server.rest +++ b/tests/rest/api_model_server.rest @@ -238,7 +238,7 @@ POST {{model_server_endpoint}}/function_calling HTTP/1.1 Content-Type: application/json { - "model": "None", + "model": "gpt-4o", "messages": [ { "role": "user", diff --git a/tests/rest/api_prompt_gateway.rest b/tests/rest/api_prompt_gateway.rest index 4a1f77e0..b772efe7 100644 --- a/tests/rest/api_prompt_gateway.rest +++ b/tests/rest/api_prompt_gateway.rest @@ -82,7 +82,7 @@ POST {{prompt_endpoint}}/v1/chat/completions HTTP/1.1 Content-Type: application/json { - "model": "None", + "model": "gpt-4o", "messages": [ { "role": "user",