mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
Adding support for wildcard models in the model_providers config (#696)
* cleaning up plano cli commands * adding support for wildcard model providers * fixing compile errors * fixing bugs related to default model provider, provider hint and duplicates in the model provider list * fixed cargo fmt issues * updating tests to always include the model id * using default for the prompt_gateway path * fixed the model name, as gpt-5-mini-2025-08-07 wasn't in the config * making sure that all aliases and models match the config * fixed the config generator to allow for base_url providers LLMs to include wildcard models * re-ran the models list utility and added a shell script to run it * updating docs to mention wildcard model providers * updated provider_models.json to yaml, added that file to our docs for reference * updating the build docs to use the new root-based build --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
This commit is contained in:
parent
8428b06e22
commit
2941392ed1
42 changed files with 1748 additions and 202 deletions
|
|
@ -1,11 +1,12 @@
|
|||
use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
|
||||
use http::StatusCode;
|
||||
use log::{debug, info, warn};
|
||||
use log::{debug, error, info, warn};
|
||||
use proxy_wasm::hostcalls::get_current_time;
|
||||
use proxy_wasm::traits::*;
|
||||
use proxy_wasm::types::*;
|
||||
use std::num::NonZero;
|
||||
use std::rc::Rc;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use crate::metrics::Metrics;
|
||||
|
|
@ -40,7 +41,7 @@ pub struct StreamContext {
|
|||
/// The API that should be used for the upstream provider (after compatibility mapping)
|
||||
resolved_api: Option<SupportedUpstreamAPIs>,
|
||||
llm_providers: Rc<LlmProviders>,
|
||||
llm_provider: Option<Rc<LlmProvider>>,
|
||||
llm_provider: Option<Arc<LlmProvider>>,
|
||||
request_id: Option<String>,
|
||||
start_time: SystemTime,
|
||||
ttft_duration: Option<Duration>,
|
||||
|
|
@ -128,16 +129,40 @@ impl StreamContext {
|
|||
}
|
||||
}
|
||||
|
||||
fn select_llm_provider(&mut self) {
|
||||
fn select_llm_provider(&mut self) -> Result<(), String> {
|
||||
let provider_hint = self
|
||||
.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
|
||||
.map(|llm_name| llm_name.into());
|
||||
|
||||
// info!("llm_providers: {:?}", self.llm_providers);
|
||||
self.llm_provider = Some(routing::get_llm_provider(
|
||||
&self.llm_providers,
|
||||
provider_hint,
|
||||
));
|
||||
// Try to get provider with hint, fallback to default if error
|
||||
// This handles prompt_gateway requests which don't set ARCH_PROVIDER_HINT_HEADER
|
||||
// since prompt_gateway doesn't have access to model configuration.
|
||||
// brightstaff (model proxy) always validates and sets the provider hint.
|
||||
let provider = match routing::get_llm_provider(&self.llm_providers, provider_hint) {
|
||||
Ok(provider) => provider,
|
||||
Err(err) => {
|
||||
// Try default provider as fallback
|
||||
match self.llm_providers.default() {
|
||||
Some(default_provider) => {
|
||||
info!(
|
||||
"[PLANO_REQ_ID:{}] Provider selection failed, using default provider",
|
||||
self.request_identifier()
|
||||
);
|
||||
default_provider
|
||||
}
|
||||
None => {
|
||||
error!(
|
||||
"[PLANO_REQ_ID:{}] PROVIDER_SELECTION_FAILED: Error='{}' and no default provider configured",
|
||||
self.request_identifier(),
|
||||
err
|
||||
);
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
self.llm_provider = Some(provider);
|
||||
|
||||
info!(
|
||||
"[PLANO_REQ_ID:{}] PROVIDER_SELECTION: Hint='{}' -> Selected='{}'",
|
||||
|
|
@ -146,6 +171,8 @@ impl StreamContext {
|
|||
.unwrap_or("none".to_string()),
|
||||
self.llm_provider.as_ref().unwrap().name
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn modify_auth_headers(&mut self) -> Result<(), ServerError> {
|
||||
|
|
@ -764,7 +791,15 @@ impl HttpContext for StreamContext {
|
|||
|
||||
// let routing_header_value = self.get_http_request_header(ARCH_ROUTING_HEADER);
|
||||
|
||||
self.select_llm_provider();
|
||||
if let Err(err) = self.select_llm_provider() {
|
||||
self.send_http_response(
|
||||
400,
|
||||
vec![],
|
||||
Some(format!(r#"{{"error": "{}"}}"#, err).as_bytes()),
|
||||
);
|
||||
return Action::Continue;
|
||||
}
|
||||
|
||||
// Check if this is a supported API endpoint
|
||||
if SupportedAPIsFromClient::from_endpoint(&request_path).is_none() {
|
||||
self.send_http_response(404, vec![], Some(b"Unsupported endpoint"));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue