mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
support base_url path for model providers (#608)
* adding support for base_url * updated docs * fixed tests for config generator * making fixes based on PR comments --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-288.local>
This commit is contained in:
parent
5108013df4
commit
cdfcfb9169
7 changed files with 459 additions and 46 deletions
|
|
@ -8,7 +8,14 @@ from urllib.parse import urlparse
|
|||
from copy import deepcopy
|
||||
|
||||
|
||||
SUPPORTED_PROVIDERS = [
|
||||
SUPPORTED_PROVIDERS_WITH_BASE_URL = [
|
||||
"azure_openai",
|
||||
"ollama",
|
||||
"qwen",
|
||||
"amazon_bedrock",
|
||||
]
|
||||
|
||||
SUPPORTED_PROVIDERS_WITHOUT_BASE_URL = [
|
||||
"arch",
|
||||
"deepseek",
|
||||
"groq",
|
||||
|
|
@ -17,15 +24,15 @@ SUPPORTED_PROVIDERS = [
|
|||
"gemini",
|
||||
"anthropic",
|
||||
"together_ai",
|
||||
"azure_openai",
|
||||
"xai",
|
||||
"ollama",
|
||||
"moonshotai",
|
||||
"zhipu",
|
||||
"qwen",
|
||||
"amazon_bedrock",
|
||||
]
|
||||
|
||||
SUPPORTED_PROVIDERS = (
|
||||
SUPPORTED_PROVIDERS_WITHOUT_BASE_URL + SUPPORTED_PROVIDERS_WITH_BASE_URL
|
||||
)
|
||||
|
||||
|
||||
def get_endpoint_and_port(endpoint, protocol):
|
||||
endpoint_tokens = endpoint.split(":")
|
||||
|
|
@ -189,12 +196,9 @@ def validate_and_render_schema():
|
|||
provider = model_name_tokens[0]
|
||||
|
||||
# Validate azure_openai and ollama provider requires base_url
|
||||
if (
|
||||
provider == "azure_openai"
|
||||
or provider == "ollama"
|
||||
or provider == "qwen"
|
||||
or provider == "amazon_bedrock"
|
||||
) and model_provider.get("base_url") is None:
|
||||
if (provider in SUPPORTED_PROVIDERS_WITH_BASE_URL) and model_provider.get(
|
||||
"base_url"
|
||||
) is None:
|
||||
raise Exception(
|
||||
f"Provider '{provider}' requires 'base_url' to be set for model {model_name}"
|
||||
)
|
||||
|
|
@ -245,11 +249,11 @@ def validate_and_render_schema():
|
|||
if model_provider.get("base_url", None):
|
||||
base_url = model_provider["base_url"]
|
||||
urlparse_result = urlparse(base_url)
|
||||
url_path = urlparse_result.path
|
||||
if url_path and url_path != "/":
|
||||
raise Exception(
|
||||
f"Please provide base_url without path, got {base_url}. Use base_url like 'http://example.com' instead of 'http://example.com/path'."
|
||||
)
|
||||
base_url_path_prefix = urlparse_result.path
|
||||
if base_url_path_prefix and base_url_path_prefix != "/":
|
||||
# we will now support base_url_path_prefix. This means that the user can provide base_url like http://example.com/path and we will extract /path as base_url_path_prefix
|
||||
model_provider["base_url_path_prefix"] = base_url_path_prefix
|
||||
|
||||
if urlparse_result.scheme == "" or urlparse_result.scheme not in [
|
||||
"http",
|
||||
"https",
|
||||
|
|
|
|||
|
|
@ -243,14 +243,13 @@ listeners:
|
|||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
|
||||
- model: custom/gpt-4o
|
||||
|
||||
""",
|
||||
},
|
||||
{
|
||||
"id": "base_url_no_prefix",
|
||||
"expected_error": "Please provide base_url without path",
|
||||
"id": "base_url_with_path_prefix",
|
||||
"expected_error": None,
|
||||
"arch_config": """
|
||||
version: v0.1.0
|
||||
|
||||
|
|
@ -264,7 +263,7 @@ listeners:
|
|||
llm_providers:
|
||||
|
||||
- model: custom/gpt-4o
|
||||
base_url: "http://custom.com/test"
|
||||
base_url: "http://custom.com/api/v2"
|
||||
provider_interface: openai
|
||||
|
||||
""",
|
||||
|
|
@ -322,8 +321,7 @@ def test_validate_and_render_schema_tests(monkeypatch, arch_config_test_case):
|
|||
monkeypatch.setenv("TEMPLATE_ROOT", "../")
|
||||
|
||||
arch_config = arch_config_test_case["arch_config"]
|
||||
expected_error = arch_config_test_case["expected_error"]
|
||||
test_id = arch_config_test_case["id"]
|
||||
expected_error = arch_config_test_case.get("expected_error")
|
||||
|
||||
arch_config_schema = ""
|
||||
with open("../arch_config_schema.yaml", "r") as file:
|
||||
|
|
@ -346,9 +344,14 @@ def test_validate_and_render_schema_tests(monkeypatch, arch_config_test_case):
|
|||
]
|
||||
with mock.patch("builtins.open", m_open):
|
||||
with mock.patch("config_generator.Environment"):
|
||||
with pytest.raises(Exception) as excinfo:
|
||||
if expected_error:
|
||||
# Test expects an error
|
||||
with pytest.raises(Exception) as excinfo:
|
||||
validate_and_render_schema()
|
||||
assert expected_error in str(excinfo.value)
|
||||
else:
|
||||
# Test expects success - no exception should be raised
|
||||
validate_and_render_schema()
|
||||
assert expected_error in str(excinfo.value)
|
||||
|
||||
|
||||
def test_convert_legacy_llm_providers():
|
||||
|
|
|
|||
|
|
@ -267,6 +267,7 @@ pub struct LlmProvider {
|
|||
pub usage: Option<String>,
|
||||
pub routing_preferences: Option<Vec<RoutingPreference>>,
|
||||
pub cluster_name: Option<String>,
|
||||
pub base_url_path_prefix: Option<String>,
|
||||
}
|
||||
|
||||
pub trait IntoModels {
|
||||
|
|
@ -307,6 +308,7 @@ impl Default for LlmProvider {
|
|||
usage: None,
|
||||
routing_preferences: None,
|
||||
cluster_name: None,
|
||||
base_url_path_prefix: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -77,70 +77,85 @@ impl SupportedAPIs {
|
|||
request_path: &str,
|
||||
model_id: &str,
|
||||
is_streaming: bool,
|
||||
base_url_path_prefix: Option<&str>,
|
||||
) -> String {
|
||||
let default_endpoint = "/v1/chat/completions".to_string();
|
||||
// Helper function to build endpoint with optional prefix override
|
||||
let build_endpoint = |provider_prefix: &str, suffix: &str| -> String {
|
||||
let prefix = base_url_path_prefix
|
||||
.map(|p| p.trim_matches('/'))
|
||||
.filter(|p| !p.is_empty())
|
||||
.unwrap_or(provider_prefix.trim_matches('/'));
|
||||
|
||||
let suffix = suffix.trim_start_matches('/');
|
||||
if prefix.is_empty() {
|
||||
format!("/{}", suffix)
|
||||
} else {
|
||||
format!("/{}/{}", prefix, suffix)
|
||||
}
|
||||
};
|
||||
|
||||
match self {
|
||||
SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages) => match provider_id {
|
||||
ProviderId::Anthropic => "/v1/messages".to_string(),
|
||||
ProviderId::Anthropic => build_endpoint("/v1", "/messages"),
|
||||
ProviderId::AmazonBedrock => {
|
||||
if request_path.starts_with("/v1/") && !is_streaming {
|
||||
format!("/model/{}/converse", model_id)
|
||||
build_endpoint("", &format!("/model/{}/converse", model_id))
|
||||
} else if request_path.starts_with("/v1/") && is_streaming {
|
||||
format!("/model/{}/converse-stream", model_id)
|
||||
build_endpoint("", &format!("/model/{}/converse-stream", model_id))
|
||||
} else {
|
||||
default_endpoint
|
||||
build_endpoint("/v1", "/chat/completions")
|
||||
}
|
||||
}
|
||||
_ => default_endpoint,
|
||||
_ => build_endpoint("/v1", "/chat/completions"),
|
||||
},
|
||||
_ => match provider_id {
|
||||
ProviderId::Groq => {
|
||||
if request_path.starts_with("/v1/") {
|
||||
format!("/openai{}", request_path)
|
||||
build_endpoint("/openai", request_path)
|
||||
} else {
|
||||
default_endpoint
|
||||
build_endpoint("/v1", "/chat/completions")
|
||||
}
|
||||
}
|
||||
ProviderId::Zhipu => {
|
||||
if request_path.starts_with("/v1/") {
|
||||
"/api/paas/v4/chat/completions".to_string()
|
||||
build_endpoint("/api/paas/v4", "/chat/completions")
|
||||
} else {
|
||||
default_endpoint
|
||||
build_endpoint("/v1", "/chat/completions")
|
||||
}
|
||||
}
|
||||
ProviderId::Qwen => {
|
||||
if request_path.starts_with("/v1/") {
|
||||
"/compatible-mode/v1/chat/completions".to_string()
|
||||
build_endpoint("/compatible-mode/v1", "/chat/completions")
|
||||
} else {
|
||||
default_endpoint
|
||||
build_endpoint("/v1", "/chat/completions")
|
||||
}
|
||||
}
|
||||
ProviderId::AzureOpenAI => {
|
||||
if request_path.starts_with("/v1/") {
|
||||
format!("/openai/deployments/{}/chat/completions?api-version=2025-01-01-preview", model_id)
|
||||
build_endpoint("/openai/deployments", &format!("/{}/chat/completions?api-version=2025-01-01-preview", model_id))
|
||||
} else {
|
||||
default_endpoint
|
||||
build_endpoint("/v1", "/chat/completions")
|
||||
}
|
||||
}
|
||||
ProviderId::Gemini => {
|
||||
if request_path.starts_with("/v1/") {
|
||||
"/v1beta/openai/chat/completions".to_string()
|
||||
build_endpoint("/v1beta/openai", "/chat/completions")
|
||||
} else {
|
||||
default_endpoint
|
||||
build_endpoint("/v1", "/chat/completions")
|
||||
}
|
||||
}
|
||||
ProviderId::AmazonBedrock => {
|
||||
if request_path.starts_with("/v1/") {
|
||||
if !is_streaming {
|
||||
format!("/model/{}/converse", model_id)
|
||||
build_endpoint("", &format!("/model/{}/converse", model_id))
|
||||
} else {
|
||||
format!("/model/{}/converse-stream", model_id)
|
||||
build_endpoint("", &format!("/model/{}/converse-stream", model_id))
|
||||
}
|
||||
} else {
|
||||
default_endpoint
|
||||
build_endpoint("/v1", "/chat/completions")
|
||||
}
|
||||
}
|
||||
_ => default_endpoint,
|
||||
_ => build_endpoint("/v1", "/chat/completions"),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
|
@ -245,4 +260,327 @@ mod tests {
|
|||
OpenAIApi::all_variants().len() + AnthropicApi::all_variants().len()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_target_endpoint_without_base_url_prefix() {
|
||||
let api = SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
|
||||
|
||||
// Test default OpenAI provider
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::OpenAI,
|
||||
"/v1/chat/completions",
|
||||
"gpt-4",
|
||||
false,
|
||||
None
|
||||
),
|
||||
"/v1/chat/completions"
|
||||
);
|
||||
|
||||
// Test Groq provider
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::Groq,
|
||||
"/v1/chat/completions",
|
||||
"llama2",
|
||||
false,
|
||||
None
|
||||
),
|
||||
"/openai/v1/chat/completions"
|
||||
);
|
||||
|
||||
// Test Zhipu provider
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::Zhipu,
|
||||
"/v1/chat/completions",
|
||||
"chatglm",
|
||||
false,
|
||||
None
|
||||
),
|
||||
"/api/paas/v4/chat/completions"
|
||||
);
|
||||
|
||||
// Test Qwen provider
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::Qwen,
|
||||
"/v1/chat/completions",
|
||||
"qwen-turbo",
|
||||
false,
|
||||
None
|
||||
),
|
||||
"/compatible-mode/v1/chat/completions"
|
||||
);
|
||||
|
||||
// Test Azure OpenAI provider
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::AzureOpenAI,
|
||||
"/v1/chat/completions",
|
||||
"gpt-4",
|
||||
false,
|
||||
None
|
||||
),
|
||||
"/openai/deployments/gpt-4/chat/completions?api-version=2025-01-01-preview"
|
||||
);
|
||||
|
||||
// Test Gemini provider
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::Gemini,
|
||||
"/v1/chat/completions",
|
||||
"gemini-pro",
|
||||
false,
|
||||
None
|
||||
),
|
||||
"/v1beta/openai/chat/completions"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_target_endpoint_with_base_url_prefix() {
|
||||
let api = SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
|
||||
|
||||
// Test Zhipu with custom base_url_path_prefix
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::Zhipu,
|
||||
"/v1/chat/completions",
|
||||
"chatglm",
|
||||
false,
|
||||
Some("/api/coding/paas/v4")
|
||||
),
|
||||
"/api/coding/paas/v4/chat/completions"
|
||||
);
|
||||
|
||||
// Test with prefix without leading slash
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::Zhipu,
|
||||
"/v1/chat/completions",
|
||||
"chatglm",
|
||||
false,
|
||||
Some("api/coding/paas/v4")
|
||||
),
|
||||
"/api/coding/paas/v4/chat/completions"
|
||||
);
|
||||
|
||||
// Test with prefix with trailing slash
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::Zhipu,
|
||||
"/v1/chat/completions",
|
||||
"chatglm",
|
||||
false,
|
||||
Some("/api/coding/paas/v4/")
|
||||
),
|
||||
"/api/coding/paas/v4/chat/completions"
|
||||
);
|
||||
|
||||
// Test OpenAI with custom prefix
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::OpenAI,
|
||||
"/v1/chat/completions",
|
||||
"gpt-4",
|
||||
false,
|
||||
Some("/custom/api/v2")
|
||||
),
|
||||
"/custom/api/v2/chat/completions"
|
||||
);
|
||||
|
||||
// Test Groq with custom prefix
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::Groq,
|
||||
"/v1/chat/completions",
|
||||
"llama2",
|
||||
false,
|
||||
Some("/api/v2")
|
||||
),
|
||||
"/api/v2/v1/chat/completions"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_target_endpoint_with_empty_base_url_prefix() {
|
||||
let api = SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
|
||||
|
||||
// Test with just slashes - trims to empty, uses provider default
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::Zhipu,
|
||||
"/v1/chat/completions",
|
||||
"chatglm",
|
||||
false,
|
||||
Some("/")
|
||||
),
|
||||
"/api/paas/v4/chat/completions"
|
||||
);
|
||||
|
||||
// Test with None - uses provider default
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::Zhipu,
|
||||
"/v1/chat/completions",
|
||||
"chatglm",
|
||||
false,
|
||||
None
|
||||
),
|
||||
"/api/paas/v4/chat/completions"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_amazon_bedrock_endpoints() {
|
||||
let api = SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages);
|
||||
|
||||
// Test Bedrock non-streaming without prefix
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::AmazonBedrock,
|
||||
"/v1/messages",
|
||||
"us.amazon.nova-pro-v1:0",
|
||||
false,
|
||||
None
|
||||
),
|
||||
"/model/us.amazon.nova-pro-v1:0/converse"
|
||||
);
|
||||
|
||||
// Test Bedrock streaming without prefix
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::AmazonBedrock,
|
||||
"/v1/messages",
|
||||
"us.amazon.nova-pro-v1:0",
|
||||
true,
|
||||
None
|
||||
),
|
||||
"/model/us.amazon.nova-pro-v1:0/converse-stream"
|
||||
);
|
||||
|
||||
// Test Bedrock non-streaming with prefix (prefix shouldn't affect bedrock)
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::AmazonBedrock,
|
||||
"/v1/messages",
|
||||
"us.amazon.nova-pro-v1:0",
|
||||
false,
|
||||
Some("/custom/path")
|
||||
),
|
||||
"/custom/path/model/us.amazon.nova-pro-v1:0/converse"
|
||||
);
|
||||
|
||||
// Test Bedrock streaming with prefix
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::AmazonBedrock,
|
||||
"/v1/messages",
|
||||
"us.amazon.nova-pro-v1:0",
|
||||
true,
|
||||
Some("/custom/path")
|
||||
),
|
||||
"/custom/path/model/us.amazon.nova-pro-v1:0/converse-stream"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_anthropic_messages_endpoint() {
|
||||
let api = SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages);
|
||||
|
||||
// Test Anthropic without prefix
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::Anthropic,
|
||||
"/v1/messages",
|
||||
"claude-3-opus",
|
||||
false,
|
||||
None
|
||||
),
|
||||
"/v1/messages"
|
||||
);
|
||||
|
||||
// Test Anthropic with prefix
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::Anthropic,
|
||||
"/v1/messages",
|
||||
"claude-3-opus",
|
||||
false,
|
||||
Some("/api/v2")
|
||||
),
|
||||
"/api/v2/messages"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_non_v1_request_paths() {
|
||||
let api = SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
|
||||
|
||||
// Test Groq with non-v1 path (should use default)
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::Groq,
|
||||
"/custom/path",
|
||||
"llama2",
|
||||
false,
|
||||
None
|
||||
),
|
||||
"/v1/chat/completions"
|
||||
);
|
||||
|
||||
// Test Zhipu with non-v1 path
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::Zhipu,
|
||||
"/custom/path",
|
||||
"chatglm",
|
||||
false,
|
||||
None
|
||||
),
|
||||
"/v1/chat/completions"
|
||||
);
|
||||
|
||||
// Test with prefix on non-v1 path
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::Zhipu,
|
||||
"/custom/path",
|
||||
"chatglm",
|
||||
false,
|
||||
Some("/api/v2")
|
||||
),
|
||||
"/api/v2/chat/completions"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_azure_openai_with_query_params() {
|
||||
let api = SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions);
|
||||
|
||||
// Test Azure without prefix - should include query params
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::AzureOpenAI,
|
||||
"/v1/chat/completions",
|
||||
"gpt-4-deployment",
|
||||
false,
|
||||
None
|
||||
),
|
||||
"/openai/deployments/gpt-4-deployment/chat/completions?api-version=2025-01-01-preview"
|
||||
);
|
||||
|
||||
// Test Azure with prefix - prefix should replace /openai/deployments
|
||||
assert_eq!(
|
||||
api.target_endpoint_for_provider(
|
||||
&ProviderId::AzureOpenAI,
|
||||
"/v1/chat/completions",
|
||||
"gpt-4-deployment",
|
||||
false,
|
||||
Some("/custom/azure/path")
|
||||
),
|
||||
"/custom/azure/path/gpt-4-deployment/chat/completions?api-version=2025-01-01-preview"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -121,6 +121,7 @@ impl StreamContext {
|
|||
.as_ref()
|
||||
.unwrap_or(&"".to_string()),
|
||||
self.streaming_response,
|
||||
self.llm_provider().base_url_path_prefix.as_deref(),
|
||||
);
|
||||
if target_endpoint != request_path {
|
||||
self.set_http_request_header(":path", Some(&target_endpoint));
|
||||
|
|
|
|||
|
|
@ -43,7 +43,6 @@ llm_providers:
|
|||
access_key: $AWS_BEARER_TOKEN_BEDROCK
|
||||
base_url: https://bedrock-runtime.us-west-2.amazonaws.com
|
||||
|
||||
|
||||
# Ollama Models
|
||||
- model: ollama/llama3.1
|
||||
base_url: http://host.docker.internal:11434
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ All providers are configured in the ``llm_providers`` section of your ``arch_con
|
|||
- ``access_key``: API key for authentication (supports environment variables)
|
||||
- ``default``: Mark a model as the default (optional, boolean)
|
||||
- ``name``: Custom name for the provider instance (optional)
|
||||
- ``base_url``: Custom endpoint URL (required for some providers)
|
||||
- ``base_url``: Custom endpoint URL (required for some providers, optional for others - see :ref:`base_url_details`)
|
||||
|
||||
Provider Categories
|
||||
-------------------
|
||||
|
|
@ -493,6 +493,8 @@ Zhipu AI
|
|||
Providers Requiring Base URL
|
||||
----------------------------
|
||||
|
||||
The following providers require a ``base_url`` parameter to be configured. For detailed information on base URL configuration including path prefix behavior and examples, see :ref:`base_url_details`.
|
||||
|
||||
Azure OpenAI
|
||||
~~~~~~~~~~~~
|
||||
|
||||
|
|
@ -616,6 +618,70 @@ For providers that implement the OpenAI API but aren't natively supported:
|
|||
base_url: http://localhost:8000
|
||||
provider_interface: openai
|
||||
|
||||
.. _base_url_details:
|
||||
|
||||
Base URL Configuration
|
||||
----------------------
|
||||
|
||||
The ``base_url`` parameter allows you to specify custom endpoints for model providers. It supports both hostname and path components, enabling flexible routing to different API endpoints.
|
||||
|
||||
**Format:** ``<scheme>://<hostname>[:<port>][/<path>]``
|
||||
|
||||
**Components:**
|
||||
|
||||
- ``scheme``: ``http`` or ``https``
|
||||
- ``hostname``: API server hostname or IP address
|
||||
- ``port``: Optional, defaults to 80 for http, 443 for https
|
||||
- ``path``: Optional path prefix that **replaces** the provider's default API path
|
||||
|
||||
**How Path Prefixes Work:**
|
||||
|
||||
When you include a path in ``base_url``, it replaces the provider's default path prefix while preserving the endpoint suffix:
|
||||
|
||||
- **Without path prefix**: Uses the provider's default path structure
|
||||
- **With path prefix**: Your custom path replaces the provider's default prefix, then the endpoint suffix is appended
|
||||
|
||||
**Configuration Examples:**
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
llm_providers:
|
||||
# Simple hostname only - uses provider's default path
|
||||
- model: zhipu/glm-4.6
|
||||
access_key: $ZHIPU_API_KEY
|
||||
base_url: https://api.z.ai
|
||||
# Results in: https://api.z.ai/api/paas/v4/chat/completions
|
||||
|
||||
# With custom path prefix - replaces provider's default path
|
||||
- model: zhipu/glm-4.6
|
||||
access_key: $ZHIPU_API_KEY
|
||||
base_url: https://api.z.ai/api/coding/paas/v4
|
||||
# Results in: https://api.z.ai/api/coding/paas/v4/chat/completions
|
||||
|
||||
# Azure with custom path
|
||||
- model: azure_openai/gpt-4
|
||||
access_key: $AZURE_API_KEY
|
||||
base_url: https://mycompany.openai.azure.com/custom/deployment/path
|
||||
# Results in: https://mycompany.openai.azure.com/custom/deployment/path/chat/completions
|
||||
|
||||
# Behind a proxy or API gateway
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
base_url: https://proxy.company.com/ai-gateway/openai
|
||||
# Results in: https://proxy.company.com/ai-gateway/openai/chat/completions
|
||||
|
||||
# Local endpoint with custom port
|
||||
- model: ollama/llama3.1
|
||||
base_url: http://localhost:8080
|
||||
# Results in: http://localhost:8080/v1/chat/completions
|
||||
|
||||
# Custom provider with path prefix
|
||||
- model: vllm/custom-model
|
||||
access_key: $VLLM_API_KEY
|
||||
base_url: https://vllm.example.com/models/v2
|
||||
provider_interface: openai
|
||||
# Results in: https://vllm.example.com/models/v2/chat/completions
|
||||
|
||||
Advanced Configuration
|
||||
----------------------
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue