From 3dbda9741e0ccbfab055cc5ba2097a51988e274d Mon Sep 17 00:00:00 2001 From: Musa Date: Tue, 31 Mar 2026 20:40:42 -0400 Subject: [PATCH] fix: route Perplexity OpenAI endpoints without /v1 (#854) * fix: route Perplexity OpenAI paths without /v1 * add tests for Perplexity provider handling in LLM module * refactor: use constant for Perplexity provider prefix in LLM module * moving const to top of file --- crates/brightstaff/src/handlers/llm/mod.rs | 119 +++++++++++++++++++-- crates/hermesllm/src/clients/endpoints.rs | 101 ++++++++++++----- crates/llm_gateway/src/stream_context.rs | 1 + 3 files changed, 189 insertions(+), 32 deletions(-) diff --git a/crates/brightstaff/src/handlers/llm/mod.rs b/crates/brightstaff/src/handlers/llm/mod.rs index 5b0898bb..1570a2d8 100644 --- a/crates/brightstaff/src/handlers/llm/mod.rs +++ b/crates/brightstaff/src/handlers/llm/mod.rs @@ -38,6 +38,8 @@ use crate::tracing::{ }; use model_selection::router_chat_get_upstream_model; +const PERPLEXITY_PROVIDER_PREFIX: &str = "perplexity/"; + pub async fn llm_chat( request: Request, state: Arc, @@ -384,7 +386,7 @@ async fn parse_and_validate_request( let temperature = client_request.get_temperature(); let is_streaming_request = client_request.is_streaming(); let alias_resolved_model = resolve_model_alias(&model_from_request, model_aliases); - let (provider_id, _) = get_provider_info(llm_providers, &alias_resolved_model).await; + let (provider_id, _, _) = get_provider_info(llm_providers, &alias_resolved_model).await; // Validate model exists in configuration if llm_providers @@ -739,7 +741,8 @@ async fn get_upstream_path( resolved_model: &str, is_streaming: bool, ) -> String { - let (provider_id, base_url_path_prefix) = get_provider_info(llm_providers, model_name).await; + let (provider_id, base_url_path_prefix, use_unversioned_paths) = + get_provider_info(llm_providers, model_name).await; let Some(client_api) = SupportedAPIsFromClient::from_endpoint(request_path) else { return request_path.to_string(); @@ -751,6 +754,7 @@ async fn get_upstream_path( resolved_model, is_streaming, base_url_path_prefix.as_deref(), + use_unversioned_paths, ) } @@ -758,21 +762,124 @@ async fn get_upstream_path( async fn get_provider_info( llm_providers: &Arc>, model_name: &str, -) -> (hermesllm::ProviderId, Option) { +) -> (hermesllm::ProviderId, Option, bool) { let providers_lock = llm_providers.read().await; if let Some(provider) = providers_lock.get(model_name) { let provider_id = provider.provider_interface.to_provider_id(); let prefix = provider.base_url_path_prefix.clone(); - return (provider_id, prefix); + let use_unversioned_paths = provider.name.starts_with(PERPLEXITY_PROVIDER_PREFIX); + return (provider_id, prefix, use_unversioned_paths); } if let Some(provider) = providers_lock.default() { let provider_id = provider.provider_interface.to_provider_id(); let prefix = provider.base_url_path_prefix.clone(); - (provider_id, prefix) + let use_unversioned_paths = provider.name.starts_with(PERPLEXITY_PROVIDER_PREFIX); + (provider_id, prefix, use_unversioned_paths) } else { warn!("No default provider found, falling back to OpenAI"); - (hermesllm::ProviderId::OpenAI, None) + (hermesllm::ProviderId::OpenAI, None, false) + } +} + +#[cfg(test)] +mod tests { + use super::{get_provider_info, get_upstream_path}; + use common::configuration::{LlmProvider, LlmProviderType}; + use common::llm_providers::LlmProviders; + use hermesllm::apis::OpenAIApi; + use hermesllm::clients::SupportedAPIsFromClient; + use std::sync::Arc; + use tokio::sync::RwLock; + + fn build_provider(name: &str, model: &str) -> LlmProvider { + LlmProvider { + name: name.to_string(), + provider_interface: LlmProviderType::OpenAI, + access_key: Some("test_key".to_string()), + model: Some(model.to_string()), + default: Some(false), + ..Default::default() + } + } + + fn providers_lock(providers: Vec) -> Arc> { + Arc::new(RwLock::new( + LlmProviders::try_from(providers).expect("test providers should be valid"), + )) + } + + #[tokio::test] + async fn test_get_provider_info_marks_perplexity_as_unversioned() { + let providers = providers_lock(vec![build_provider("perplexity/sonar-pro", "sonar-pro")]); + + let (provider_id, prefix, use_unversioned_paths) = + get_provider_info(&providers, "perplexity/sonar-pro").await; + + assert_eq!(provider_id, hermesllm::ProviderId::OpenAI); + assert_eq!(prefix, None); + assert!(use_unversioned_paths); + } + + #[tokio::test] + async fn test_get_upstream_path_for_perplexity_uses_unversioned_chat_endpoint() { + let providers = providers_lock(vec![build_provider("perplexity/sonar-pro", "sonar-pro")]); + + let upstream_path = get_upstream_path( + &providers, + "perplexity/sonar-pro", + "/v1/chat/completions", + "sonar-pro", + false, + ) + .await; + + assert_eq!(upstream_path, "/chat/completions"); + } + + #[tokio::test] + async fn test_get_upstream_path_for_non_perplexity_keeps_v1_chat_endpoint() { + let providers = providers_lock(vec![build_provider("openai/gpt-4o-mini", "gpt-4o-mini")]); + + let upstream_path = get_upstream_path( + &providers, + "openai/gpt-4o-mini", + "/v1/chat/completions", + "gpt-4o-mini", + false, + ) + .await; + + assert_eq!(upstream_path, "/v1/chat/completions"); + } + + #[tokio::test] + async fn test_perplexity_with_and_without_versioning_paths() { + let providers = providers_lock(vec![build_provider("perplexity/sonar-pro", "sonar-pro")]); + + // This is the path Plano should use for Perplexity (works). + let success_path = get_upstream_path( + &providers, + "perplexity/sonar-pro", + "/v1/chat/completions", + "sonar-pro", + false, + ) + .await; + assert_eq!(success_path, "/chat/completions"); + + // This is the generic OpenAI default path; for Perplexity this would 404. + let fail_path = SupportedAPIsFromClient::OpenAIChatCompletions(OpenAIApi::ChatCompletions) + .target_endpoint_for_provider( + &hermesllm::ProviderId::OpenAI, + "/v1/chat/completions", + "sonar-pro", + false, + None, + false, + ); + assert_eq!(fail_path, "/v1/chat/completions"); + assert_ne!(success_path, fail_path); } } diff --git a/crates/hermesllm/src/clients/endpoints.rs b/crates/hermesllm/src/clients/endpoints.rs index 23e14604..39b34358 100644 --- a/crates/hermesllm/src/clients/endpoints.rs +++ b/crates/hermesllm/src/clients/endpoints.rs @@ -92,6 +92,7 @@ impl SupportedAPIsFromClient { model_id: &str, is_streaming: bool, base_url_path_prefix: Option<&str>, + use_unversioned_paths: bool, ) -> String { // Helper function to build endpoint with optional prefix override let build_endpoint = |provider_prefix: &str, suffix: &str| -> String { @@ -161,7 +162,13 @@ impl SupportedAPIsFromClient { build_endpoint("/v1", endpoint_suffix) } } - _ => build_endpoint("/v1", endpoint_suffix), + _ => { + if use_unversioned_paths { + build_endpoint("", endpoint_suffix) + } else { + build_endpoint("/v1", endpoint_suffix) + } + } } }; @@ -343,7 +350,8 @@ mod tests { "/v1/chat/completions", "gpt-4", false, - None + None, + false ), "/v1/chat/completions" ); @@ -355,7 +363,8 @@ mod tests { "/v1/chat/completions", "llama2", false, - None + None, + false ), "/openai/v1/chat/completions" ); @@ -367,7 +376,8 @@ mod tests { "/v1/chat/completions", "chatglm", false, - None + None, + false ), "/api/paas/v4/chat/completions" ); @@ -379,7 +389,8 @@ mod tests { "/v1/chat/completions", "qwen-turbo", false, - None + None, + false ), "/compatible-mode/v1/chat/completions" ); @@ -391,7 +402,8 @@ mod tests { "/v1/chat/completions", "gpt-4", false, - None + None, + false ), "/openai/deployments/gpt-4/chat/completions?api-version=2025-01-01-preview" ); @@ -403,12 +415,30 @@ mod tests { "/v1/chat/completions", "gemini-pro", false, - None + None, + false ), "/v1beta/openai/chat/completions" ); } + #[test] + fn test_target_endpoint_unversioned_paths() { + let api = SupportedAPIsFromClient::OpenAIChatCompletions(OpenAIApi::ChatCompletions); + + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::OpenAI, + "/v1/chat/completions", + "sonar-pro", + false, + None, + true + ), + "/chat/completions" + ); + } + #[test] fn test_target_endpoint_with_base_url_prefix() { let api = SupportedAPIsFromClient::OpenAIChatCompletions(OpenAIApi::ChatCompletions); @@ -420,7 +450,8 @@ mod tests { "/v1/chat/completions", "chatglm", false, - Some("/api/coding/paas/v4") + Some("/api/coding/paas/v4"), + false ), "/api/coding/paas/v4/chat/completions" ); @@ -432,7 +463,8 @@ mod tests { "/v1/chat/completions", "chatglm", false, - Some("api/coding/paas/v4") + Some("api/coding/paas/v4"), + false ), "/api/coding/paas/v4/chat/completions" ); @@ -444,7 +476,8 @@ mod tests { "/v1/chat/completions", "chatglm", false, - Some("/api/coding/paas/v4/") + Some("/api/coding/paas/v4/"), + false ), "/api/coding/paas/v4/chat/completions" ); @@ -456,7 +489,8 @@ mod tests { "/v1/chat/completions", "gpt-4", false, - Some("/custom/api/v2") + Some("/custom/api/v2"), + false ), "/custom/api/v2/chat/completions" ); @@ -468,7 +502,8 @@ mod tests { "/v1/chat/completions", "llama2", false, - Some("/api/v2") + Some("/api/v2"), + false ), "/api/v2/v1/chat/completions" ); @@ -485,7 +520,8 @@ mod tests { "/v1/chat/completions", "chatglm", false, - Some("/") + Some("/"), + false ), "/api/paas/v4/chat/completions" ); @@ -497,7 +533,8 @@ mod tests { "/v1/chat/completions", "chatglm", false, - None + None, + false ), "/api/paas/v4/chat/completions" ); @@ -514,7 +551,8 @@ mod tests { "/v1/messages", "us.amazon.nova-pro-v1:0", false, - None + None, + false ), "/model/us.amazon.nova-pro-v1:0/converse" ); @@ -526,7 +564,8 @@ mod tests { "/v1/messages", "us.amazon.nova-pro-v1:0", true, - None + None, + false ), "/model/us.amazon.nova-pro-v1:0/converse-stream" ); @@ -538,7 +577,8 @@ mod tests { "/v1/messages", "us.amazon.nova-pro-v1:0", false, - Some("/custom/path") + Some("/custom/path"), + false ), "/custom/path/model/us.amazon.nova-pro-v1:0/converse" ); @@ -550,7 +590,8 @@ mod tests { "/v1/messages", "us.amazon.nova-pro-v1:0", true, - Some("/custom/path") + Some("/custom/path"), + false ), "/custom/path/model/us.amazon.nova-pro-v1:0/converse-stream" ); @@ -567,7 +608,8 @@ mod tests { "/v1/messages", "claude-3-opus", false, - None + None, + false ), "/v1/messages" ); @@ -579,7 +621,8 @@ mod tests { "/v1/messages", "claude-3-opus", false, - Some("/api/v2") + Some("/api/v2"), + false ), "/api/v2/messages" ); @@ -596,7 +639,8 @@ mod tests { "/custom/path", "llama2", false, - None + None, + false ), "/v1/chat/completions" ); @@ -608,7 +652,8 @@ mod tests { "/custom/path", "chatglm", false, - None + None, + false ), "/v1/chat/completions" ); @@ -620,7 +665,8 @@ mod tests { "/custom/path", "chatglm", false, - Some("/api/v2") + Some("/api/v2"), + false ), "/api/v2/chat/completions" ); @@ -637,7 +683,8 @@ mod tests { "/v1/chat/completions", "gpt-4-deployment", false, - None + None, + false ), "/openai/deployments/gpt-4-deployment/chat/completions?api-version=2025-01-01-preview" ); @@ -649,7 +696,8 @@ mod tests { "/v1/chat/completions", "gpt-4-deployment", false, - Some("/custom/azure/path") + Some("/custom/azure/path"), + false ), "/custom/azure/path/gpt-4-deployment/chat/completions?api-version=2025-01-01-preview" ); @@ -664,7 +712,8 @@ mod tests { "/v1/responses", "grok-4-1-fast-reasoning", false, - None + None, + false ), "/v1/responses" ); diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index f62631fa..afb0b050 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -122,6 +122,7 @@ impl StreamContext { .unwrap_or(&"".to_string()), self.streaming_response, self.llm_provider().base_url_path_prefix.as_deref(), + self.llm_provider().name.starts_with("perplexity/"), ); if target_endpoint != request_path { self.set_http_request_header(":path", Some(&target_endpoint));