adding support for base_url

2026-06-17 15:25:17 +02:00 · 2025-10-28 14:10:36 -07:00 · 2025-10-28 14:10:36 -07:00 · 85da10579f
commit 85da10579f
parent 5108013df4
5 changed files with 393 additions and 36 deletions
--- a/arch/tools/cli/config_generator.py
+++ b/arch/tools/cli/config_generator.py
@ -8,7 +8,14 @@ from urllib.parse import urlparse
 from copy import deepcopy


-SUPPORTED_PROVIDERS = [
+SUPPORTED_PROVIDERS_WITH_BASE_URL = [
+    "azure_openai",
+    "ollama",
+    "qwen",
+    "amazon_bedrock",
+]
+
+SUPPORTED_PROVIDERS_WITHOUT_BASE_URL = [
    "arch",
    "deepseek",
    "groq",
@ -17,15 +24,15 @@ SUPPORTED_PROVIDERS = [
    "gemini",
    "anthropic",
    "together_ai",
-    "azure_openai",
    "xai",
-    "ollama",
    "moonshotai",
    "zhipu",
-    "qwen",
-    "amazon_bedrock",
 ]

+SUPPORTED_PROVIDERS = (
+    SUPPORTED_PROVIDERS_WITHOUT_BASE_URL + SUPPORTED_PROVIDERS_WITH_BASE_URL
+)
+

 def get_endpoint_and_port(endpoint, protocol):
    endpoint_tokens = endpoint.split(":")
@ -189,12 +196,9 @@ def validate_and_render_schema():
            provider = model_name_tokens[0]

            # Validate azure_openai and ollama provider requires base_url
-            if (
-                provider == "azure_openai"
-                or provider == "ollama"
-                or provider == "qwen"
-                or provider == "amazon_bedrock"
-            ) and model_provider.get("base_url") is None:
+            if (provider in SUPPORTED_PROVIDERS_WITH_BASE_URL) and model_provider.get(
+                "base_url"
+            ) is None:
                raise Exception(
                    f"Provider '{provider}' requires 'base_url' to be set for model {model_name}"
                )
@ -245,11 +249,11 @@ def validate_and_render_schema():
            if model_provider.get("base_url", None):
                base_url = model_provider["base_url"]
                urlparse_result = urlparse(base_url)
-                url_path = urlparse_result.path
-                if url_path and url_path != "/":
-                    raise Exception(
-                        f"Please provide base_url without path, got {base_url}. Use base_url like 'http://example.com' instead of 'http://example.com/path'."
-                    )
+                base_url_path_prefix = urlparse_result.path
+                if base_url_path_prefix and base_url_path_prefix != "/":
+                    # we will now support base_url_path_prefix. This means that the user can provide base_url like http://example.com/path and we will extract /path as base_url_path_prefix
+                    model_provider["base_url_path_prefix"] = base_url_path_prefix
+
                if urlparse_result.scheme == "" or urlparse_result.scheme not in [
                    "http",
                    "https",
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@ -267,6 +267,7 @@ pub struct LlmProvider {
    pub usage: Option<String>,
    pub routing_preferences: Option<Vec<RoutingPreference>>,
    pub cluster_name: Option<String>,
+    pub base_url_path_prefix: Option<String>,
 }

 pub trait IntoModels {
@ -307,6 +308,7 @@ impl Default for LlmProvider {
            usage: None,
            routing_preferences: None,
            cluster_name: None,
+            base_url_path_prefix: None,
        }
    }
 }
--- a/crates/hermesllm/src/clients/endpoints.rs
+++ b/crates/hermesllm/src/clients/endpoints.rs
@ -77,70 +77,91 @@ impl SupportedAPIs {
        request_path: &str,
        model_id: &str,
        is_streaming: bool,
+        base_url_path_prefix: &str,
    ) -> String {
-        let default_endpoint = "/v1/chat/completions".to_string();
+        // Helper function to build endpoint with optional prefix override
+        let build_endpoint = |provider_prefix: &str, suffix: &str| -> String {
+            if !base_url_path_prefix.is_empty() {
+                // Use base_url_path_prefix instead of provider's default prefix
+                let prefix = base_url_path_prefix.trim_matches('/');
+                if prefix.is_empty() {
+                    // If prefix is just slashes, return suffix as-is
+                    suffix.to_string()
+                } else {
+                    format!("/{}{}", prefix, suffix)
+                }
+            } else {
+                // Use provider's default prefix
+                if provider_prefix.is_empty() {
+                    suffix.to_string()
+                } else {
+                    format!("{}{}", provider_prefix, suffix)
+                }
+            }
+        };
+
        match self {
            SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages) => match provider_id {
-                ProviderId::Anthropic => "/v1/messages".to_string(),
+                ProviderId::Anthropic => build_endpoint("/v1", "/messages"),
                ProviderId::AmazonBedrock => {
                    if request_path.starts_with("/v1/") && !is_streaming {
-                        format!("/model/{}/converse", model_id)
+                        build_endpoint("", &format!("/model/{}/converse", model_id))
                    } else if request_path.starts_with("/v1/") && is_streaming {
-                        format!("/model/{}/converse-stream", model_id)
+                        build_endpoint("", &format!("/model/{}/converse-stream", model_id))
                    } else {
-                        default_endpoint
+                        build_endpoint("/v1", "/chat/completions")
                    }
                }
-                _ => default_endpoint,
+                _ => build_endpoint("/v1", "/chat/completions"),
            },
            _ => match provider_id {
                ProviderId::Groq => {
                    if request_path.starts_with("/v1/") {
-                        format!("/openai{}", request_path)
+                        build_endpoint("/openai", request_path)
                    } else {
-                        default_endpoint
+                        build_endpoint("/v1", "/chat/completions")
                    }
                }
                ProviderId::Zhipu => {
                    if request_path.starts_with("/v1/") {
-                        "/api/paas/v4/chat/completions".to_string()
+                        build_endpoint("/api/paas/v4", "/chat/completions")
                    } else {
-                        default_endpoint
+                        build_endpoint("/v1", "/chat/completions")
                    }
                }
                ProviderId::Qwen => {
                    if request_path.starts_with("/v1/") {
-                        "/compatible-mode/v1/chat/completions".to_string()
+                        build_endpoint("/compatible-mode/v1", "/chat/completions")
                    } else {
-                        default_endpoint
+                        build_endpoint("/v1", "/chat/completions")
                    }
                }
                ProviderId::AzureOpenAI => {
                    if request_path.starts_with("/v1/") {
-                        format!("/openai/deployments/{}/chat/completions?api-version=2025-01-01-preview", model_id)
+                        build_endpoint("/openai/deployments", &format!("/{}/chat/completions?api-version=2025-01-01-preview", model_id))
                    } else {
-                        default_endpoint
+                        build_endpoint("/v1", "/chat/completions")
                    }
                }
                ProviderId::Gemini => {
                    if request_path.starts_with("/v1/") {
-                        "/v1beta/openai/chat/completions".to_string()
+                        build_endpoint("/v1beta/openai", "/chat/completions")
                    } else {
-                        default_endpoint
+                        build_endpoint("/v1", "/chat/completions")
                    }
                }
                ProviderId::AmazonBedrock => {
                    if request_path.starts_with("/v1/") {
                        if !is_streaming {
-                            format!("/model/{}/converse", model_id)
+                            build_endpoint("", &format!("/model/{}/converse", model_id))
                        } else {
-                            format!("/model/{}/converse-stream", model_id)
+                            build_endpoint("", &format!("/model/{}/converse-stream", model_id))
                        }
                    } else {
-                        default_endpoint
+                        build_endpoint("/v1", "/chat/completions")
                    }
                }
-                _ => default_endpoint,
+                _ => build_endpoint("/v1", "/chat/completions"),
            },
        }
    }
@ -245,4 +266,327 @@ mod tests {
            OpenAIApi::all_variants().len() + AnthropicApi::all_variants().len()
        );
    }
+
+    #[test]
+    fn test_target_endpoint_without_base_url_prefix() {
+        let api = SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
+
+        // Test default OpenAI provider
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::OpenAI,
+                "/v1/chat/completions",
+                "gpt-4",
+                false,
+                ""
+            ),
+            "/v1/chat/completions"
+        );
+
+        // Test Groq provider
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::Groq,
+                "/v1/chat/completions",
+                "llama2",
+                false,
+                ""
+            ),
+            "/openai/v1/chat/completions"
+        );
+
+        // Test Zhipu provider
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::Zhipu,
+                "/v1/chat/completions",
+                "chatglm",
+                false,
+                ""
+            ),
+            "/api/paas/v4/chat/completions"
+        );
+
+        // Test Qwen provider
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::Qwen,
+                "/v1/chat/completions",
+                "qwen-turbo",
+                false,
+                ""
+            ),
+            "/compatible-mode/v1/chat/completions"
+        );
+
+        // Test Azure OpenAI provider
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::AzureOpenAI,
+                "/v1/chat/completions",
+                "gpt-4",
+                false,
+                ""
+            ),
+            "/openai/deployments/gpt-4/chat/completions?api-version=2025-01-01-preview"
+        );
+
+        // Test Gemini provider
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::Gemini,
+                "/v1/chat/completions",
+                "gemini-pro",
+                false,
+                ""
+            ),
+            "/v1beta/openai/chat/completions"
+        );
+    }
+
+    #[test]
+    fn test_target_endpoint_with_base_url_prefix() {
+        let api = SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
+
+        // Test Zhipu with custom base_url_path_prefix
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::Zhipu,
+                "/v1/chat/completions",
+                "chatglm",
+                false,
+                "/api/coding/paas/v4"
+            ),
+            "/api/coding/paas/v4/chat/completions"
+        );
+
+        // Test with prefix without leading slash
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::Zhipu,
+                "/v1/chat/completions",
+                "chatglm",
+                false,
+                "api/coding/paas/v4"
+            ),
+            "/api/coding/paas/v4/chat/completions"
+        );
+
+        // Test with prefix with trailing slash
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::Zhipu,
+                "/v1/chat/completions",
+                "chatglm",
+                false,
+                "/api/coding/paas/v4/"
+            ),
+            "/api/coding/paas/v4/chat/completions"
+        );
+
+        // Test OpenAI with custom prefix
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::OpenAI,
+                "/v1/chat/completions",
+                "gpt-4",
+                false,
+                "/custom/api/v2"
+            ),
+            "/custom/api/v2/chat/completions"
+        );
+
+        // Test Groq with custom prefix
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::Groq,
+                "/v1/chat/completions",
+                "llama2",
+                false,
+                "/api/v2"
+            ),
+            "/api/v2/v1/chat/completions"
+        );
+    }
+
+    #[test]
+    fn test_target_endpoint_with_empty_base_url_prefix() {
+        let api = SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
+
+        // Test with just slashes - should use default
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::Zhipu,
+                "/v1/chat/completions",
+                "chatglm",
+                false,
+                "/"
+            ),
+            "/chat/completions"
+        );
+
+        // Test with empty string
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::Zhipu,
+                "/v1/chat/completions",
+                "chatglm",
+                false,
+                ""
+            ),
+            "/api/paas/v4/chat/completions"
+        );
+    }
+
+    #[test]
+    fn test_amazon_bedrock_endpoints() {
+        let api = SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages);
+
+        // Test Bedrock non-streaming without prefix
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::AmazonBedrock,
+                "/v1/messages",
+                "us.amazon.nova-pro-v1:0",
+                false,
+                ""
+            ),
+            "/model/us.amazon.nova-pro-v1:0/converse"
+        );
+
+        // Test Bedrock streaming without prefix
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::AmazonBedrock,
+                "/v1/messages",
+                "us.amazon.nova-pro-v1:0",
+                true,
+                ""
+            ),
+            "/model/us.amazon.nova-pro-v1:0/converse-stream"
+        );
+
+        // Test Bedrock non-streaming with prefix (prefix shouldn't affect bedrock)
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::AmazonBedrock,
+                "/v1/messages",
+                "us.amazon.nova-pro-v1:0",
+                false,
+                "/custom/path"
+            ),
+            "/custom/path/model/us.amazon.nova-pro-v1:0/converse"
+        );
+
+        // Test Bedrock streaming with prefix
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::AmazonBedrock,
+                "/v1/messages",
+                "us.amazon.nova-pro-v1:0",
+                true,
+                "/custom/path"
+            ),
+            "/custom/path/model/us.amazon.nova-pro-v1:0/converse-stream"
+        );
+    }
+
+    #[test]
+    fn test_anthropic_messages_endpoint() {
+        let api = SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages);
+
+        // Test Anthropic without prefix
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::Anthropic,
+                "/v1/messages",
+                "claude-3-opus",
+                false,
+                ""
+            ),
+            "/v1/messages"
+        );
+
+        // Test Anthropic with prefix
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::Anthropic,
+                "/v1/messages",
+                "claude-3-opus",
+                false,
+                "/api/v2"
+            ),
+            "/api/v2/messages"
+        );
+    }
+
+    #[test]
+    fn test_non_v1_request_paths() {
+        let api = SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
+
+        // Test Groq with non-v1 path (should use default)
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::Groq,
+                "/custom/path",
+                "llama2",
+                false,
+                ""
+            ),
+            "/v1/chat/completions"
+        );
+
+        // Test Zhipu with non-v1 path
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::Zhipu,
+                "/custom/path",
+                "chatglm",
+                false,
+                ""
+            ),
+            "/v1/chat/completions"
+        );
+
+        // Test with prefix on non-v1 path
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::Zhipu,
+                "/custom/path",
+                "chatglm",
+                false,
+                "/api/v2"
+            ),
+            "/api/v2/chat/completions"
+        );
+    }
+
+    #[test]
+    fn test_azure_openai_with_query_params() {
+        let api = SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
+
+        // Test Azure without prefix - should include query params
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::AzureOpenAI,
+                "/v1/chat/completions",
+                "gpt-4-deployment",
+                false,
+                ""
+            ),
+            "/openai/deployments/gpt-4-deployment/chat/completions?api-version=2025-01-01-preview"
+        );
+
+        // Test Azure with prefix - prefix should replace /openai/deployments
+        assert_eq!(
+            api.target_endpoint_for_provider(
+                &ProviderId::AzureOpenAI,
+                "/v1/chat/completions",
+                "gpt-4-deployment",
+                false,
+                "/custom/azure/path"
+            ),
+            "/custom/azure/path/gpt-4-deployment/chat/completions?api-version=2025-01-01-preview"
+        );
+    }
 }
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@ -121,6 +121,10 @@ impl StreamContext {
                    .as_ref()
                    .unwrap_or(&"".to_string()),
                self.streaming_response,
+                self.llm_provider()
+                    .base_url_path_prefix
+                    .as_deref()
+                    .unwrap_or(""),
            );
            if target_endpoint != request_path {
                self.set_http_request_header(":path", Some(&target_endpoint));
--- a/demos/use_cases/model_alias_routing/arch_config_with_aliases.yaml
+++ b/demos/use_cases/model_alias_routing/arch_config_with_aliases.yaml
@ -43,6 +43,9 @@ llm_providers:
    access_key: $AWS_BEARER_TOKEN_BEDROCK
    base_url: https://bedrock-runtime.us-west-2.amazonaws.com

+  - model: zhipu/glm-4.6
+    access_key: $ZHIPU_API_KEY
+    base_url: https://api.z.ai/api/coding/paas/v4

  # Ollama Models
  - model: ollama/llama3.1