From 85da10579f8fae5fae2a81a20959a4fd2ca937e9 Mon Sep 17 00:00:00 2001 From: Salman Paracha Date: Tue, 28 Oct 2025 14:10:36 -0700 Subject: [PATCH] adding support for base_url --- arch/tools/cli/config_generator.py | 36 +- crates/common/src/configuration.rs | 2 + crates/hermesllm/src/clients/endpoints.rs | 384 +++++++++++++++++- crates/llm_gateway/src/stream_context.rs | 4 + .../arch_config_with_aliases.yaml | 3 + 5 files changed, 393 insertions(+), 36 deletions(-) diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py index a1facae8..ead0a351 100644 --- a/arch/tools/cli/config_generator.py +++ b/arch/tools/cli/config_generator.py @@ -8,7 +8,14 @@ from urllib.parse import urlparse from copy import deepcopy -SUPPORTED_PROVIDERS = [ +SUPPORTED_PROVIDERS_WITH_BASE_URL = [ + "azure_openai", + "ollama", + "qwen", + "amazon_bedrock", +] + +SUPPORTED_PROVIDERS_WITHOUT_BASE_URL = [ "arch", "deepseek", "groq", @@ -17,15 +24,15 @@ SUPPORTED_PROVIDERS = [ "gemini", "anthropic", "together_ai", - "azure_openai", "xai", - "ollama", "moonshotai", "zhipu", - "qwen", - "amazon_bedrock", ] +SUPPORTED_PROVIDERS = ( + SUPPORTED_PROVIDERS_WITHOUT_BASE_URL + SUPPORTED_PROVIDERS_WITH_BASE_URL +) + def get_endpoint_and_port(endpoint, protocol): endpoint_tokens = endpoint.split(":") @@ -189,12 +196,9 @@ def validate_and_render_schema(): provider = model_name_tokens[0] # Validate azure_openai and ollama provider requires base_url - if ( - provider == "azure_openai" - or provider == "ollama" - or provider == "qwen" - or provider == "amazon_bedrock" - ) and model_provider.get("base_url") is None: + if (provider in SUPPORTED_PROVIDERS_WITH_BASE_URL) and model_provider.get( + "base_url" + ) is None: raise Exception( f"Provider '{provider}' requires 'base_url' to be set for model {model_name}" ) @@ -245,11 +249,11 @@ def validate_and_render_schema(): if model_provider.get("base_url", None): base_url = model_provider["base_url"] urlparse_result = urlparse(base_url) - url_path = urlparse_result.path - if url_path and url_path != "/": - raise Exception( - f"Please provide base_url without path, got {base_url}. Use base_url like 'http://example.com' instead of 'http://example.com/path'." - ) + base_url_path_prefix = urlparse_result.path + if base_url_path_prefix and base_url_path_prefix != "/": + # we will now support base_url_path_prefix. This means that the user can provide base_url like http://example.com/path and we will extract /path as base_url_path_prefix + model_provider["base_url_path_prefix"] = base_url_path_prefix + if urlparse_result.scheme == "" or urlparse_result.scheme not in [ "http", "https", diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index dc1b74e9..27f8ebd9 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -267,6 +267,7 @@ pub struct LlmProvider { pub usage: Option, pub routing_preferences: Option>, pub cluster_name: Option, + pub base_url_path_prefix: Option, } pub trait IntoModels { @@ -307,6 +308,7 @@ impl Default for LlmProvider { usage: None, routing_preferences: None, cluster_name: None, + base_url_path_prefix: None, } } } diff --git a/crates/hermesllm/src/clients/endpoints.rs b/crates/hermesllm/src/clients/endpoints.rs index 5177ce97..09c44c10 100644 --- a/crates/hermesllm/src/clients/endpoints.rs +++ b/crates/hermesllm/src/clients/endpoints.rs @@ -77,70 +77,91 @@ impl SupportedAPIs { request_path: &str, model_id: &str, is_streaming: bool, + base_url_path_prefix: &str, ) -> String { - let default_endpoint = "/v1/chat/completions".to_string(); + // Helper function to build endpoint with optional prefix override + let build_endpoint = |provider_prefix: &str, suffix: &str| -> String { + if !base_url_path_prefix.is_empty() { + // Use base_url_path_prefix instead of provider's default prefix + let prefix = base_url_path_prefix.trim_matches('/'); + if prefix.is_empty() { + // If prefix is just slashes, return suffix as-is + suffix.to_string() + } else { + format!("/{}{}", prefix, suffix) + } + } else { + // Use provider's default prefix + if provider_prefix.is_empty() { + suffix.to_string() + } else { + format!("{}{}", provider_prefix, suffix) + } + } + }; + match self { SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages) => match provider_id { - ProviderId::Anthropic => "/v1/messages".to_string(), + ProviderId::Anthropic => build_endpoint("/v1", "/messages"), ProviderId::AmazonBedrock => { if request_path.starts_with("/v1/") && !is_streaming { - format!("/model/{}/converse", model_id) + build_endpoint("", &format!("/model/{}/converse", model_id)) } else if request_path.starts_with("/v1/") && is_streaming { - format!("/model/{}/converse-stream", model_id) + build_endpoint("", &format!("/model/{}/converse-stream", model_id)) } else { - default_endpoint + build_endpoint("/v1", "/chat/completions") } } - _ => default_endpoint, + _ => build_endpoint("/v1", "/chat/completions"), }, _ => match provider_id { ProviderId::Groq => { if request_path.starts_with("/v1/") { - format!("/openai{}", request_path) + build_endpoint("/openai", request_path) } else { - default_endpoint + build_endpoint("/v1", "/chat/completions") } } ProviderId::Zhipu => { if request_path.starts_with("/v1/") { - "/api/paas/v4/chat/completions".to_string() + build_endpoint("/api/paas/v4", "/chat/completions") } else { - default_endpoint + build_endpoint("/v1", "/chat/completions") } } ProviderId::Qwen => { if request_path.starts_with("/v1/") { - "/compatible-mode/v1/chat/completions".to_string() + build_endpoint("/compatible-mode/v1", "/chat/completions") } else { - default_endpoint + build_endpoint("/v1", "/chat/completions") } } ProviderId::AzureOpenAI => { if request_path.starts_with("/v1/") { - format!("/openai/deployments/{}/chat/completions?api-version=2025-01-01-preview", model_id) + build_endpoint("/openai/deployments", &format!("/{}/chat/completions?api-version=2025-01-01-preview", model_id)) } else { - default_endpoint + build_endpoint("/v1", "/chat/completions") } } ProviderId::Gemini => { if request_path.starts_with("/v1/") { - "/v1beta/openai/chat/completions".to_string() + build_endpoint("/v1beta/openai", "/chat/completions") } else { - default_endpoint + build_endpoint("/v1", "/chat/completions") } } ProviderId::AmazonBedrock => { if request_path.starts_with("/v1/") { if !is_streaming { - format!("/model/{}/converse", model_id) + build_endpoint("", &format!("/model/{}/converse", model_id)) } else { - format!("/model/{}/converse-stream", model_id) + build_endpoint("", &format!("/model/{}/converse-stream", model_id)) } } else { - default_endpoint + build_endpoint("/v1", "/chat/completions") } } - _ => default_endpoint, + _ => build_endpoint("/v1", "/chat/completions"), }, } } @@ -245,4 +266,327 @@ mod tests { OpenAIApi::all_variants().len() + AnthropicApi::all_variants().len() ); } + + #[test] + fn test_target_endpoint_without_base_url_prefix() { + let api = SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions); + + // Test default OpenAI provider + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::OpenAI, + "/v1/chat/completions", + "gpt-4", + false, + "" + ), + "/v1/chat/completions" + ); + + // Test Groq provider + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::Groq, + "/v1/chat/completions", + "llama2", + false, + "" + ), + "/openai/v1/chat/completions" + ); + + // Test Zhipu provider + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::Zhipu, + "/v1/chat/completions", + "chatglm", + false, + "" + ), + "/api/paas/v4/chat/completions" + ); + + // Test Qwen provider + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::Qwen, + "/v1/chat/completions", + "qwen-turbo", + false, + "" + ), + "/compatible-mode/v1/chat/completions" + ); + + // Test Azure OpenAI provider + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::AzureOpenAI, + "/v1/chat/completions", + "gpt-4", + false, + "" + ), + "/openai/deployments/gpt-4/chat/completions?api-version=2025-01-01-preview" + ); + + // Test Gemini provider + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::Gemini, + "/v1/chat/completions", + "gemini-pro", + false, + "" + ), + "/v1beta/openai/chat/completions" + ); + } + + #[test] + fn test_target_endpoint_with_base_url_prefix() { + let api = SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions); + + // Test Zhipu with custom base_url_path_prefix + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::Zhipu, + "/v1/chat/completions", + "chatglm", + false, + "/api/coding/paas/v4" + ), + "/api/coding/paas/v4/chat/completions" + ); + + // Test with prefix without leading slash + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::Zhipu, + "/v1/chat/completions", + "chatglm", + false, + "api/coding/paas/v4" + ), + "/api/coding/paas/v4/chat/completions" + ); + + // Test with prefix with trailing slash + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::Zhipu, + "/v1/chat/completions", + "chatglm", + false, + "/api/coding/paas/v4/" + ), + "/api/coding/paas/v4/chat/completions" + ); + + // Test OpenAI with custom prefix + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::OpenAI, + "/v1/chat/completions", + "gpt-4", + false, + "/custom/api/v2" + ), + "/custom/api/v2/chat/completions" + ); + + // Test Groq with custom prefix + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::Groq, + "/v1/chat/completions", + "llama2", + false, + "/api/v2" + ), + "/api/v2/v1/chat/completions" + ); + } + + #[test] + fn test_target_endpoint_with_empty_base_url_prefix() { + let api = SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions); + + // Test with just slashes - should use default + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::Zhipu, + "/v1/chat/completions", + "chatglm", + false, + "/" + ), + "/chat/completions" + ); + + // Test with empty string + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::Zhipu, + "/v1/chat/completions", + "chatglm", + false, + "" + ), + "/api/paas/v4/chat/completions" + ); + } + + #[test] + fn test_amazon_bedrock_endpoints() { + let api = SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages); + + // Test Bedrock non-streaming without prefix + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::AmazonBedrock, + "/v1/messages", + "us.amazon.nova-pro-v1:0", + false, + "" + ), + "/model/us.amazon.nova-pro-v1:0/converse" + ); + + // Test Bedrock streaming without prefix + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::AmazonBedrock, + "/v1/messages", + "us.amazon.nova-pro-v1:0", + true, + "" + ), + "/model/us.amazon.nova-pro-v1:0/converse-stream" + ); + + // Test Bedrock non-streaming with prefix (prefix shouldn't affect bedrock) + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::AmazonBedrock, + "/v1/messages", + "us.amazon.nova-pro-v1:0", + false, + "/custom/path" + ), + "/custom/path/model/us.amazon.nova-pro-v1:0/converse" + ); + + // Test Bedrock streaming with prefix + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::AmazonBedrock, + "/v1/messages", + "us.amazon.nova-pro-v1:0", + true, + "/custom/path" + ), + "/custom/path/model/us.amazon.nova-pro-v1:0/converse-stream" + ); + } + + #[test] + fn test_anthropic_messages_endpoint() { + let api = SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages); + + // Test Anthropic without prefix + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::Anthropic, + "/v1/messages", + "claude-3-opus", + false, + "" + ), + "/v1/messages" + ); + + // Test Anthropic with prefix + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::Anthropic, + "/v1/messages", + "claude-3-opus", + false, + "/api/v2" + ), + "/api/v2/messages" + ); + } + + #[test] + fn test_non_v1_request_paths() { + let api = SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions); + + // Test Groq with non-v1 path (should use default) + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::Groq, + "/custom/path", + "llama2", + false, + "" + ), + "/v1/chat/completions" + ); + + // Test Zhipu with non-v1 path + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::Zhipu, + "/custom/path", + "chatglm", + false, + "" + ), + "/v1/chat/completions" + ); + + // Test with prefix on non-v1 path + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::Zhipu, + "/custom/path", + "chatglm", + false, + "/api/v2" + ), + "/api/v2/chat/completions" + ); + } + + #[test] + fn test_azure_openai_with_query_params() { + let api = SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions); + + // Test Azure without prefix - should include query params + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::AzureOpenAI, + "/v1/chat/completions", + "gpt-4-deployment", + false, + "" + ), + "/openai/deployments/gpt-4-deployment/chat/completions?api-version=2025-01-01-preview" + ); + + // Test Azure with prefix - prefix should replace /openai/deployments + assert_eq!( + api.target_endpoint_for_provider( + &ProviderId::AzureOpenAI, + "/v1/chat/completions", + "gpt-4-deployment", + false, + "/custom/azure/path" + ), + "/custom/azure/path/gpt-4-deployment/chat/completions?api-version=2025-01-01-preview" + ); + } } diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 870530ab..fb32d4e5 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -121,6 +121,10 @@ impl StreamContext { .as_ref() .unwrap_or(&"".to_string()), self.streaming_response, + self.llm_provider() + .base_url_path_prefix + .as_deref() + .unwrap_or(""), ); if target_endpoint != request_path { self.set_http_request_header(":path", Some(&target_endpoint)); diff --git a/demos/use_cases/model_alias_routing/arch_config_with_aliases.yaml b/demos/use_cases/model_alias_routing/arch_config_with_aliases.yaml index ae1e2499..847faabe 100644 --- a/demos/use_cases/model_alias_routing/arch_config_with_aliases.yaml +++ b/demos/use_cases/model_alias_routing/arch_config_with_aliases.yaml @@ -43,6 +43,9 @@ llm_providers: access_key: $AWS_BEARER_TOKEN_BEDROCK base_url: https://bedrock-runtime.us-west-2.amazonaws.com + - model: zhipu/glm-4.6 + access_key: $ZHIPU_API_KEY + base_url: https://api.z.ai/api/coding/paas/v4 # Ollama Models - model: ollama/llama3.1