From 3dbda9741e0ccbfab055cc5ba2097a51988e274d Mon Sep 17 00:00:00 2001
From: Musa <malikmusa1323@gmail.com>
Date: Tue, 31 Mar 2026 20:40:42 -0400
Subject: [PATCH] fix: route Perplexity OpenAI endpoints without /v1 (#854)

* fix: route Perplexity OpenAI paths without /v1

* add tests for Perplexity provider handling in LLM module

* refactor: use constant for Perplexity provider prefix in LLM module

* moving const to top of file
---
 crates/brightstaff/src/handlers/llm/mod.rs | 119 +++++++++++++++++++--
 crates/hermesllm/src/clients/endpoints.rs  | 101 ++++++++++++-----
 crates/llm_gateway/src/stream_context.rs   |   1 +
 3 files changed, 189 insertions(+), 32 deletions(-)
diff --git a/crates/brightstaff/src/handlers/llm/mod.rs b/crates/brightstaff/src/handlers/llm/mod.rs
index 5b0898bb..1570a2d8 100644
--- a/crates/brightstaff/src/handlers/llm/mod.rs
+++ b/crates/brightstaff/src/handlers/llm/mod.rs
@@ -38,6 +38,8 @@ use crate::tracing::{
 };
 use model_selection::router_chat_get_upstream_model;
 
+const PERPLEXITY_PROVIDER_PREFIX: &str = "perplexity/";
+
 pub async fn llm_chat(
     request: Request<hyper::body::Incoming>,
     state: Arc<AppState>,
@@ -384,7 +386,7 @@ async fn parse_and_validate_request(
     let temperature = client_request.get_temperature();
     let is_streaming_request = client_request.is_streaming();
     let alias_resolved_model = resolve_model_alias(&model_from_request, model_aliases);
-    let (provider_id, _) = get_provider_info(llm_providers, &alias_resolved_model).await;
+    let (provider_id, _, _) = get_provider_info(llm_providers, &alias_resolved_model).await;
 
     // Validate model exists in configuration
     if llm_providers
@@ -739,7 +741,8 @@ async fn get_upstream_path(
     resolved_model: &str,
     is_streaming: bool,
 ) -> String {
-    let (provider_id, base_url_path_prefix) = get_provider_info(llm_providers, model_name).await;
+    let (provider_id, base_url_path_prefix, use_unversioned_paths) =
+        get_provider_info(llm_providers, model_name).await;
 
     let Some(client_api) = SupportedAPIsFromClient::from_endpoint(request_path) else {
         return request_path.to_string();
@@ -751,6 +754,7 @@ async fn get_upstream_path(
         resolved_model,
         is_streaming,
         base_url_path_prefix.as_deref(),
+        use_unversioned_paths,
     )
 }
 
@@ -758,21 +762,124 @@ async fn get_upstream_path(
 async fn get_provider_info(
     llm_providers: &Arc<RwLock<LlmProviders>>,
     model_name: &str,
-) -> (hermesllm::ProviderId, Option<String>) {
+) -> (hermesllm::ProviderId, Option<String>, bool) {
     let providers_lock = llm_providers.read().await;
 
     if let Some(provider) = providers_lock.get(model_name) {
         let provider_id = provider.provider_interface.to_provider_id();
         let prefix = provider.base_url_path_prefix.clone();
-        return (provider_id, prefix);
+        let use_unversioned_paths = provider.name.starts_with(PERPLEXITY_PROVIDER_PREFIX);
+        return (provider_id, prefix, use_unversioned_paths);
     }
 
     if let Some(provider) = providers_lock.default() {
         let provider_id = provider.provider_interface.to_provider_id();
         let prefix = provider.base_url_path_prefix.clone();
-        (provider_id, prefix)
+        let use_unversioned_paths = provider.name.starts_with(PERPLEXITY_PROVIDER_PREFIX);
+        (provider_id, prefix, use_unversioned_paths)
     } else {
         warn!("No default provider found, falling back to OpenAI");
-        (hermesllm::ProviderId::OpenAI, None)
+        (hermesllm::ProviderId::OpenAI, None, false)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{get_provider_info, get_upstream_path};
+    use common::configuration::{LlmProvider, LlmProviderType};
+    use common::llm_providers::LlmProviders;
+    use hermesllm::apis::OpenAIApi;
+    use hermesllm::clients::SupportedAPIsFromClient;
+    use std::sync::Arc;
+    use tokio::sync::RwLock;
+
+    fn build_provider(name: &str, model: &str) -> LlmProvider {
+        LlmProvider {
+            name: name.to_string(),
+            provider_interface: LlmProviderType::OpenAI,
+            access_key: Some("test_key".to_string()),
+            model: Some(model.to_string()),
+            default: Some(false),
+            ..Default::default()
+        }
+    }
+
+    fn providers_lock(providers: Vec<LlmProvider>) -> Arc<RwLock<LlmProviders>> {
+        Arc::new(RwLock::new(
+            LlmProviders::try_from(providers).expect("test providers should be valid"),
+        ))
+    }
+
+    #[tokio::test]
+    async fn test_get_provider_info_marks_perplexity_as_unversioned() {
+        let providers = providers_lock(vec![build_provider("perplexity/sonar-pro", "sonar-pro")]);
+
+        let (provider_id, prefix, use_unversioned_paths) =
+            get_provider_info(&providers, "perplexity/sonar-pro").await;
+
+        assert_eq!(provider_id, hermesllm::ProviderId::OpenAI);
+        assert_eq!(prefix, None);
+        assert!(use_unversioned_paths);
+    }
+
+    #[tokio::test]
+    async fn test_get_upstream_path_for_perplexity_uses_unversioned_chat_endpoint() {
+        let providers = providers_lock(vec![build_provider("perplexity/sonar-pro", "sonar-pro")]);
+
+        let upstream_path = get_upstream_path(
+            &providers,
+            "perplexity/sonar-pro",
+            "/v1/chat/completions",
+            "sonar-pro",
+            false,
+        )
+        .await;
+
+        assert_eq!(upstream_path, "/chat/completions");
+    }
+
+    #[tokio::test]
+    async fn test_get_upstream_path_for_non_perplexity_keeps_v1_chat_endpoint() {
+        let providers = providers_lock(vec![build_provider("openai/gpt-4o-mini", "gpt-4o-mini")]);
+
+        let upstream_path = get_upstream_path(
+            &providers,
+            "openai/gpt-4o-mini",
+            "/v1/chat/completions",
+            "gpt-4o-mini",
+            false,
+        )
+        .await;
+
+        assert_eq!(upstream_path, "/v1/chat/completions");
+    }
+
+    #[tokio::test]
+    async fn test_perplexity_with_and_without_versioning_paths() {
+        let providers = providers_lock(vec![build_provider("perplexity/sonar-pro", "sonar-pro")]);
+
+        // This is the path Plano should use for Perplexity (works).
+        let success_path = get_upstream_path(
+            &providers,
+            "perplexity/sonar-pro",
+            "/v1/chat/completions",
+            "sonar-pro",
+            false,
+        )
+        .await;
+        assert_eq!(success_path, "/chat/completions");
+
+        // This is the generic OpenAI default path; for Perplexity this would 404.
+        let fail_path = SupportedAPIsFromClient::OpenAIChatCompletions(OpenAIApi::ChatCompletions)
+            .target_endpoint_for_provider(
+                &hermesllm::ProviderId::OpenAI,
+                "/v1/chat/completions",
+                "sonar-pro",
+                false,
+                None,
+                false,
+            );
+        assert_eq!(fail_path, "/v1/chat/completions");
+        assert_ne!(success_path, fail_path);
     }
 }
diff --git a/crates/hermesllm/src/clients/endpoints.rs b/crates/hermesllm/src/clients/endpoints.rs
index 23e14604..39b34358 100644
--- a/crates/hermesllm/src/clients/endpoints.rs
+++ b/crates/hermesllm/src/clients/endpoints.rs
@@ -92,6 +92,7 @@ impl SupportedAPIsFromClient {
         model_id: &str,
         is_streaming: bool,
         base_url_path_prefix: Option<&str>,
+        use_unversioned_paths: bool,
     ) -> String {
         // Helper function to build endpoint with optional prefix override
         let build_endpoint = |provider_prefix: &str, suffix: &str| -> String {
@@ -161,7 +162,13 @@ impl SupportedAPIsFromClient {
                         build_endpoint("/v1", endpoint_suffix)
                     }
                 }
-                _ => build_endpoint("/v1", endpoint_suffix),
+                _ => {
+                    if use_unversioned_paths {
+                        build_endpoint("", endpoint_suffix)
+                    } else {
+                        build_endpoint("/v1", endpoint_suffix)
+                    }
+                }
             }
         };
 
@@ -343,7 +350,8 @@ mod tests {
                 "/v1/chat/completions",
                 "gpt-4",
                 false,
-                None
+                None,
+                false
             ),
             "/v1/chat/completions"
         );
@@ -355,7 +363,8 @@ mod tests {
                 "/v1/chat/completions",
                 "llama2",
                 false,
-                None
+                None,
+                false
             ),
             "/openai/v1/chat/completions"
         );
@@ -367,7 +376,8 @@ mod tests {
                 "/v1/chat/completions",
                 "chatglm",
                 false,
-                None
+                None,
+                false
             ),
             "/api/paas/v4/chat/completions"
         );
@@ -379,7 +389,8 @@ mod tests {
                 "/v1/chat/completions",
                 "qwen-turbo",
                 false,
-                None
+                None,
+                false
             ),
             "/compatible-mode/v1/chat/completions"
         );
@@ -391,7 +402,8 @@ mod tests {
                 "/v1/chat/completions",
                 "gpt-4",
                 false,
-                None
+                None,
+                false
             ),
             "/openai/deployments/gpt-4/chat/completions?api-version=2025-01-01-preview"
         );
@@ -403,12 +415,30 @@ mod tests {
                 "/v1/chat/completions",
                 "gemini-pro",
                 false,
-                None
+                None,
+                false
             ),
             "/v1beta/openai/chat/completions"
         );
     }
 
+    #[test]
+    fn test_target_endpoint_unversioned_paths() {
+        let api = SupportedAPIsFromClient::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
+
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::OpenAI,
+                "/v1/chat/completions",
+                "sonar-pro",
+                false,
+                None,
+                true
+            ),
+            "/chat/completions"
+        );
+    }
+
     #[test]
     fn test_target_endpoint_with_base_url_prefix() {
         let api = SupportedAPIsFromClient::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
@@ -420,7 +450,8 @@ mod tests {
                 "/v1/chat/completions",
                 "chatglm",
                 false,
-                Some("/api/coding/paas/v4")
+                Some("/api/coding/paas/v4"),
+                false
             ),
             "/api/coding/paas/v4/chat/completions"
         );
@@ -432,7 +463,8 @@ mod tests {
                 "/v1/chat/completions",
                 "chatglm",
                 false,
-                Some("api/coding/paas/v4")
+                Some("api/coding/paas/v4"),
+                false
             ),
             "/api/coding/paas/v4/chat/completions"
         );
@@ -444,7 +476,8 @@ mod tests {
                 "/v1/chat/completions",
                 "chatglm",
                 false,
-                Some("/api/coding/paas/v4/")
+                Some("/api/coding/paas/v4/"),
+                false
             ),
             "/api/coding/paas/v4/chat/completions"
         );
@@ -456,7 +489,8 @@ mod tests {
                 "/v1/chat/completions",
                 "gpt-4",
                 false,
-                Some("/custom/api/v2")
+                Some("/custom/api/v2"),
+                false
             ),
             "/custom/api/v2/chat/completions"
         );
@@ -468,7 +502,8 @@ mod tests {
                 "/v1/chat/completions",
                 "llama2",
                 false,
-                Some("/api/v2")
+                Some("/api/v2"),
+                false
             ),
             "/api/v2/v1/chat/completions"
         );
@@ -485,7 +520,8 @@ mod tests {
                 "/v1/chat/completions",
                 "chatglm",
                 false,
-                Some("/")
+                Some("/"),
+                false
             ),
             "/api/paas/v4/chat/completions"
         );
@@ -497,7 +533,8 @@ mod tests {
                 "/v1/chat/completions",
                 "chatglm",
                 false,
-                None
+                None,
+                false
             ),
             "/api/paas/v4/chat/completions"
         );
@@ -514,7 +551,8 @@ mod tests {
                 "/v1/messages",
                 "us.amazon.nova-pro-v1:0",
                 false,
-                None
+                None,
+                false
             ),
             "/model/us.amazon.nova-pro-v1:0/converse"
         );
@@ -526,7 +564,8 @@ mod tests {
                 "/v1/messages",
                 "us.amazon.nova-pro-v1:0",
                 true,
-                None
+                None,
+                false
             ),
             "/model/us.amazon.nova-pro-v1:0/converse-stream"
         );
@@ -538,7 +577,8 @@ mod tests {
                 "/v1/messages",
                 "us.amazon.nova-pro-v1:0",
                 false,
-                Some("/custom/path")
+                Some("/custom/path"),
+                false
             ),
             "/custom/path/model/us.amazon.nova-pro-v1:0/converse"
         );
@@ -550,7 +590,8 @@ mod tests {
                 "/v1/messages",
                 "us.amazon.nova-pro-v1:0",
                 true,
-                Some("/custom/path")
+                Some("/custom/path"),
+                false
             ),
             "/custom/path/model/us.amazon.nova-pro-v1:0/converse-stream"
         );
@@ -567,7 +608,8 @@ mod tests {
                 "/v1/messages",
                 "claude-3-opus",
                 false,
-                None
+                None,
+                false
             ),
             "/v1/messages"
         );
@@ -579,7 +621,8 @@ mod tests {
                 "/v1/messages",
                 "claude-3-opus",
                 false,
-                Some("/api/v2")
+                Some("/api/v2"),
+                false
             ),
             "/api/v2/messages"
         );
@@ -596,7 +639,8 @@ mod tests {
                 "/custom/path",
                 "llama2",
                 false,
-                None
+                None,
+                false
             ),
             "/v1/chat/completions"
         );
@@ -608,7 +652,8 @@ mod tests {
                 "/custom/path",
                 "chatglm",
                 false,
-                None
+                None,
+                false
             ),
             "/v1/chat/completions"
         );
@@ -620,7 +665,8 @@ mod tests {
                 "/custom/path",
                 "chatglm",
                 false,
-                Some("/api/v2")
+                Some("/api/v2"),
+                false
             ),
             "/api/v2/chat/completions"
         );
@@ -637,7 +683,8 @@ mod tests {
                 "/v1/chat/completions",
                 "gpt-4-deployment",
                 false,
-                None
+                None,
+                false
             ),
             "/openai/deployments/gpt-4-deployment/chat/completions?api-version=2025-01-01-preview"
         );
@@ -649,7 +696,8 @@ mod tests {
                 "/v1/chat/completions",
                 "gpt-4-deployment",
                 false,
-                Some("/custom/azure/path")
+                Some("/custom/azure/path"),
+                false
             ),
             "/custom/azure/path/gpt-4-deployment/chat/completions?api-version=2025-01-01-preview"
         );
@@ -664,7 +712,8 @@ mod tests {
                 "/v1/responses",
                 "grok-4-1-fast-reasoning",
                 false,
-                None
+                None,
+                false
             ),
             "/v1/responses"
         );
diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs
index f62631fa..afb0b050 100644
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@@ -122,6 +122,7 @@ impl StreamContext {
                     .unwrap_or(&"".to_string()),
                 self.streaming_response,
                 self.llm_provider().base_url_path_prefix.as_deref(),
+                self.llm_provider().name.starts_with("perplexity/"),
             );
             if target_endpoint != request_path {
                 self.set_http_request_header(":path", Some(&target_endpoint));