Adding support for wildcard models in the model_providers config (#696)

* cleaning up plano cli commands

* adding support for wildcard model providers

* fixing compile errors

* fixing bugs related to default model provider, provider hint and duplicates in the model provider list

* fixed cargo fmt issues

* updating tests to always include the model id

* using default for the prompt_gateway path

* fixed the model name, as gpt-5-mini-2025-08-07 wasn't in the config

* making sure that all aliases and models match the config

* fixed the config generator to allow for base_url providers LLMs to include wildcard models

* re-ran the models list utility and added a shell script to run it

* updating docs to mention wildcard model providers

* updated provider_models.json to yaml, added that file to our docs for reference

* updating the build docs to use the new root-based build

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
This commit is contained in:
Salman Paracha 2026-01-28 17:47:33 -08:00 committed by GitHub
parent 8428b06e22
commit 2941392ed1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
42 changed files with 1748 additions and 202 deletions

View file

@ -1,11 +1,12 @@
use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
use http::StatusCode;
use log::{debug, info, warn};
use log::{debug, error, info, warn};
use proxy_wasm::hostcalls::get_current_time;
use proxy_wasm::traits::*;
use proxy_wasm::types::*;
use std::num::NonZero;
use std::rc::Rc;
use std::sync::Arc;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use crate::metrics::Metrics;
@ -40,7 +41,7 @@ pub struct StreamContext {
/// The API that should be used for the upstream provider (after compatibility mapping)
resolved_api: Option<SupportedUpstreamAPIs>,
llm_providers: Rc<LlmProviders>,
llm_provider: Option<Rc<LlmProvider>>,
llm_provider: Option<Arc<LlmProvider>>,
request_id: Option<String>,
start_time: SystemTime,
ttft_duration: Option<Duration>,
@ -128,16 +129,40 @@ impl StreamContext {
}
}
fn select_llm_provider(&mut self) {
fn select_llm_provider(&mut self) -> Result<(), String> {
let provider_hint = self
.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
.map(|llm_name| llm_name.into());
// info!("llm_providers: {:?}", self.llm_providers);
self.llm_provider = Some(routing::get_llm_provider(
&self.llm_providers,
provider_hint,
));
// Try to get provider with hint, fallback to default if error
// This handles prompt_gateway requests which don't set ARCH_PROVIDER_HINT_HEADER
// since prompt_gateway doesn't have access to model configuration.
// brightstaff (model proxy) always validates and sets the provider hint.
let provider = match routing::get_llm_provider(&self.llm_providers, provider_hint) {
Ok(provider) => provider,
Err(err) => {
// Try default provider as fallback
match self.llm_providers.default() {
Some(default_provider) => {
info!(
"[PLANO_REQ_ID:{}] Provider selection failed, using default provider",
self.request_identifier()
);
default_provider
}
None => {
error!(
"[PLANO_REQ_ID:{}] PROVIDER_SELECTION_FAILED: Error='{}' and no default provider configured",
self.request_identifier(),
err
);
return Err(err);
}
}
}
};
self.llm_provider = Some(provider);
info!(
"[PLANO_REQ_ID:{}] PROVIDER_SELECTION: Hint='{}' -> Selected='{}'",
@ -146,6 +171,8 @@ impl StreamContext {
.unwrap_or("none".to_string()),
self.llm_provider.as_ref().unwrap().name
);
Ok(())
}
fn modify_auth_headers(&mut self) -> Result<(), ServerError> {
@ -764,7 +791,15 @@ impl HttpContext for StreamContext {
// let routing_header_value = self.get_http_request_header(ARCH_ROUTING_HEADER);
self.select_llm_provider();
if let Err(err) = self.select_llm_provider() {
self.send_http_response(
400,
vec![],
Some(format!(r#"{{"error": "{}"}}"#, err).as_bytes()),
);
return Action::Continue;
}
// Check if this is a supported API endpoint
if SupportedAPIsFromClient::from_endpoint(&request_path).is_none() {
self.send_http_response(404, vec![], Some(b"Unsupported endpoint"));