Adding support for wildcard models in the model_providers config (#696)

* cleaning up plano cli commands * adding support for wildcard model providers * fixing compile errors * fixing bugs related to default model provider, provider hint and duplicates in the model provider list * fixed cargo fmt issues * updating tests to always include the model id * using default for the prompt_gateway path * fixed the model name, as gpt-5-mini-2025-08-07 wasn't in the config * making sure that all aliases and models match the config * fixed the config generator to allow for base_url providers LLMs to include wildcard models * re-ran the models list utility and added a shell script to run it * updating docs to mention wildcard model providers * updated provider_models.json to yaml, added that file to our docs for reference * updating the build docs to use the new root-based build --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
2026-04-25 00:36:34 +02:00 · 2026-01-28 17:47:33 -08:00 · 2026-01-28 17:47:33 -08:00 · 2941392ed1
commit 2941392ed1
parent 8428b06e22
42 changed files with 1748 additions and 202 deletions
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@ -1,11 +1,12 @@
 use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
 use http::StatusCode;
-use log::{debug, info, warn};
+use log::{debug, error, info, warn};
 use proxy_wasm::hostcalls::get_current_time;
 use proxy_wasm::traits::*;
 use proxy_wasm::types::*;
 use std::num::NonZero;
 use std::rc::Rc;
+use std::sync::Arc;
 use std::time::{Duration, SystemTime, UNIX_EPOCH};

 use crate::metrics::Metrics;
@ -40,7 +41,7 @@ pub struct StreamContext {
    /// The API that should be used for the upstream provider (after compatibility mapping)
    resolved_api: Option<SupportedUpstreamAPIs>,
    llm_providers: Rc<LlmProviders>,
-    llm_provider: Option<Rc<LlmProvider>>,
+    llm_provider: Option<Arc<LlmProvider>>,
    request_id: Option<String>,
    start_time: SystemTime,
    ttft_duration: Option<Duration>,
@ -128,16 +129,40 @@ impl StreamContext {
        }
    }

-    fn select_llm_provider(&mut self) {
+    fn select_llm_provider(&mut self) -> Result<(), String> {
        let provider_hint = self
            .get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
            .map(|llm_name| llm_name.into());

-        // info!("llm_providers: {:?}", self.llm_providers);
-        self.llm_provider = Some(routing::get_llm_provider(
-            &self.llm_providers,
-            provider_hint,
-        ));
+        // Try to get provider with hint, fallback to default if error
+        // This handles prompt_gateway requests which don't set ARCH_PROVIDER_HINT_HEADER
+        // since prompt_gateway doesn't have access to model configuration.
+        // brightstaff (model proxy) always validates and sets the provider hint.
+        let provider = match routing::get_llm_provider(&self.llm_providers, provider_hint) {
+            Ok(provider) => provider,
+            Err(err) => {
+                // Try default provider as fallback
+                match self.llm_providers.default() {
+                    Some(default_provider) => {
+                        info!(
+                            "[PLANO_REQ_ID:{}] Provider selection failed, using default provider",
+                            self.request_identifier()
+                        );
+                        default_provider
+                    }
+                    None => {
+                        error!(
+                            "[PLANO_REQ_ID:{}] PROVIDER_SELECTION_FAILED: Error='{}' and no default provider configured",
+                            self.request_identifier(),
+                            err
+                        );
+                        return Err(err);
+                    }
+                }
+            }
+        };
+
+        self.llm_provider = Some(provider);

        info!(
            "[PLANO_REQ_ID:{}] PROVIDER_SELECTION: Hint='{}' -> Selected='{}'",
@ -146,6 +171,8 @@ impl StreamContext {
                .unwrap_or("none".to_string()),
            self.llm_provider.as_ref().unwrap().name
        );
+
+        Ok(())
    }

    fn modify_auth_headers(&mut self) -> Result<(), ServerError> {
@ -764,7 +791,15 @@ impl HttpContext for StreamContext {

        // let routing_header_value = self.get_http_request_header(ARCH_ROUTING_HEADER);

-        self.select_llm_provider();
+        if let Err(err) = self.select_llm_provider() {
+            self.send_http_response(
+                400,
+                vec![],
+                Some(format!(r#"{{"error": "{}"}}"#, err).as_bytes()),
+            );
+            return Action::Continue;
+        }
+
        // Check if this is a supported API endpoint
        if SupportedAPIsFromClient::from_endpoint(&request_path).is_none() {
            self.send_http_response(404, vec![], Some(b"Unsupported endpoint"));