diff --git a/crates/brightstaff/src/handlers/llm/mod.rs b/crates/brightstaff/src/handlers/llm/mod.rs index 9d4a2dfb..aafc7fa5 100644 --- a/crates/brightstaff/src/handlers/llm/mod.rs +++ b/crates/brightstaff/src/handlers/llm/mod.rs @@ -386,7 +386,7 @@ async fn parse_and_validate_request( let temperature = client_request.get_temperature(); let is_streaming_request = client_request.is_streaming(); let alias_resolved_model = resolve_model_alias(&model_from_request, model_aliases); - let (provider_id, _) = get_provider_info(llm_providers, &alias_resolved_model).await; + let (provider_id, _, _) = get_provider_info(llm_providers, &alias_resolved_model).await; // Validate model exists in configuration if llm_providers @@ -741,7 +741,8 @@ async fn get_upstream_path( resolved_model: &str, is_streaming: bool, ) -> String { - let (provider_id, base_url_path_prefix) = get_provider_info(llm_providers, model_name).await; + let (provider_id, base_url_path_prefix, use_unversioned_paths) = + get_provider_info(llm_providers, model_name).await; let Some(client_api) = SupportedAPIsFromClient::from_endpoint(request_path) else { return request_path.to_string(); @@ -753,6 +754,7 @@ async fn get_upstream_path( resolved_model, is_streaming, base_url_path_prefix.as_deref(), + use_unversioned_paths, ) } @@ -760,21 +762,23 @@ async fn get_upstream_path( async fn get_provider_info( llm_providers: &Arc>, model_name: &str, -) -> (hermesllm::ProviderId, Option) { +) -> (hermesllm::ProviderId, Option, bool) { let providers_lock = llm_providers.read().await; if let Some(provider) = providers_lock.get(model_name) { let provider_id = provider.provider_interface.to_provider_id(); let prefix = provider.base_url_path_prefix.clone(); - return (provider_id, prefix); + let use_unversioned_paths = provider.name.starts_with("perplexity/"); + return (provider_id, prefix, use_unversioned_paths); } if let Some(provider) = providers_lock.default() { let provider_id = provider.provider_interface.to_provider_id(); let prefix = provider.base_url_path_prefix.clone(); - (provider_id, prefix) + let use_unversioned_paths = provider.name.starts_with("perplexity/"); + (provider_id, prefix, use_unversioned_paths) } else { warn!("No default provider found, falling back to OpenAI"); - (hermesllm::ProviderId::OpenAI, None) + (hermesllm::ProviderId::OpenAI, None, false) } } diff --git a/crates/hermesllm/src/clients/endpoints.rs b/crates/hermesllm/src/clients/endpoints.rs index 23e14604..39b34358 100644 --- a/crates/hermesllm/src/clients/endpoints.rs +++ b/crates/hermesllm/src/clients/endpoints.rs @@ -92,6 +92,7 @@ impl SupportedAPIsFromClient { model_id: &str, is_streaming: bool, base_url_path_prefix: Option<&str>, + use_unversioned_paths: bool, ) -> String { // Helper function to build endpoint with optional prefix override let build_endpoint = |provider_prefix: &str, suffix: &str| -> String { @@ -161,7 +162,13 @@ impl SupportedAPIsFromClient { build_endpoint("/v1", endpoint_suffix) } } - _ => build_endpoint("/v1", endpoint_suffix), + _ => { + if use_unversioned_paths { + build_endpoint("", endpoint_suffix) + } else { + build_endpoint("/v1", endpoint_suffix) + } + } } }; @@ -343,7 +350,8 @@ mod tests { "/v1/chat/completions", "gpt-4", false, - None + None, + false ), "/v1/chat/completions" ); @@ -355,7 +363,8 @@ mod tests { "/v1/chat/completions", "llama2", false, - None + None, + false ), "/openai/v1/chat/completions" ); @@ -367,7 +376,8 @@ mod tests { "/v1/chat/completions", "chatglm", false, - None + None, + false ), "/api/paas/v4/chat/completions" ); @@ -379,7 +389,8 @@ mod tests { "/v1/chat/completions", "qwen-turbo", false, - None + None, + false ), "/compatible-mode/v1/chat/completions" ); @@ -391,7 +402,8 @@ mod tests { "/v1/chat/completions", "gpt-4", false, - None + None, + false ), "/openai/deployments/gpt-4/chat/completions?api-version=2025-01-01-preview" ); @@ -403,12 +415,30 @@ mod tests { "/v1/chat/completions", "gemini-pro", false, - None + None, + false ), "/v1beta/openai/chat/completions" ); } + #[test] + fn test_target_endpoint_unversioned_paths() { + let api = SupportedAPIsFromClient::OpenAIChatCompletions(OpenAIApi::ChatCompletions); + + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::OpenAI, + "/v1/chat/completions", + "sonar-pro", + false, + None, + true + ), + "/chat/completions" + ); + } + #[test] fn test_target_endpoint_with_base_url_prefix() { let api = SupportedAPIsFromClient::OpenAIChatCompletions(OpenAIApi::ChatCompletions); @@ -420,7 +450,8 @@ mod tests { "/v1/chat/completions", "chatglm", false, - Some("/api/coding/paas/v4") + Some("/api/coding/paas/v4"), + false ), "/api/coding/paas/v4/chat/completions" ); @@ -432,7 +463,8 @@ mod tests { "/v1/chat/completions", "chatglm", false, - Some("api/coding/paas/v4") + Some("api/coding/paas/v4"), + false ), "/api/coding/paas/v4/chat/completions" ); @@ -444,7 +476,8 @@ mod tests { "/v1/chat/completions", "chatglm", false, - Some("/api/coding/paas/v4/") + Some("/api/coding/paas/v4/"), + false ), "/api/coding/paas/v4/chat/completions" ); @@ -456,7 +489,8 @@ mod tests { "/v1/chat/completions", "gpt-4", false, - Some("/custom/api/v2") + Some("/custom/api/v2"), + false ), "/custom/api/v2/chat/completions" ); @@ -468,7 +502,8 @@ mod tests { "/v1/chat/completions", "llama2", false, - Some("/api/v2") + Some("/api/v2"), + false ), "/api/v2/v1/chat/completions" ); @@ -485,7 +520,8 @@ mod tests { "/v1/chat/completions", "chatglm", false, - Some("/") + Some("/"), + false ), "/api/paas/v4/chat/completions" ); @@ -497,7 +533,8 @@ mod tests { "/v1/chat/completions", "chatglm", false, - None + None, + false ), "/api/paas/v4/chat/completions" ); @@ -514,7 +551,8 @@ mod tests { "/v1/messages", "us.amazon.nova-pro-v1:0", false, - None + None, + false ), "/model/us.amazon.nova-pro-v1:0/converse" ); @@ -526,7 +564,8 @@ mod tests { "/v1/messages", "us.amazon.nova-pro-v1:0", true, - None + None, + false ), "/model/us.amazon.nova-pro-v1:0/converse-stream" ); @@ -538,7 +577,8 @@ mod tests { "/v1/messages", "us.amazon.nova-pro-v1:0", false, - Some("/custom/path") + Some("/custom/path"), + false ), "/custom/path/model/us.amazon.nova-pro-v1:0/converse" ); @@ -550,7 +590,8 @@ mod tests { "/v1/messages", "us.amazon.nova-pro-v1:0", true, - Some("/custom/path") + Some("/custom/path"), + false ), "/custom/path/model/us.amazon.nova-pro-v1:0/converse-stream" ); @@ -567,7 +608,8 @@ mod tests { "/v1/messages", "claude-3-opus", false, - None + None, + false ), "/v1/messages" ); @@ -579,7 +621,8 @@ mod tests { "/v1/messages", "claude-3-opus", false, - Some("/api/v2") + Some("/api/v2"), + false ), "/api/v2/messages" ); @@ -596,7 +639,8 @@ mod tests { "/custom/path", "llama2", false, - None + None, + false ), "/v1/chat/completions" ); @@ -608,7 +652,8 @@ mod tests { "/custom/path", "chatglm", false, - None + None, + false ), "/v1/chat/completions" ); @@ -620,7 +665,8 @@ mod tests { "/custom/path", "chatglm", false, - Some("/api/v2") + Some("/api/v2"), + false ), "/api/v2/chat/completions" ); @@ -637,7 +683,8 @@ mod tests { "/v1/chat/completions", "gpt-4-deployment", false, - None + None, + false ), "/openai/deployments/gpt-4-deployment/chat/completions?api-version=2025-01-01-preview" ); @@ -649,7 +696,8 @@ mod tests { "/v1/chat/completions", "gpt-4-deployment", false, - Some("/custom/azure/path") + Some("/custom/azure/path"), + false ), "/custom/azure/path/gpt-4-deployment/chat/completions?api-version=2025-01-01-preview" ); @@ -664,7 +712,8 @@ mod tests { "/v1/responses", "grok-4-1-fast-reasoning", false, - None + None, + false ), "/v1/responses" ); diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index f62631fa..afb0b050 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -122,6 +122,7 @@ impl StreamContext { .unwrap_or(&"".to_string()), self.streaming_response, self.llm_provider().base_url_path_prefix.as_deref(), + self.llm_provider().name.starts_with("perplexity/"), ); if target_endpoint != request_path { self.set_http_request_header(":path", Some(&target_endpoint));