diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml index 9e9abac8..1186e9c1 100644 --- a/arch/arch_config_schema.yaml +++ b/arch/arch_config_schema.yaml @@ -7,6 +7,7 @@ properties: - v0.1 - v0.1.0 - 0.1-beta + - 0.2.0 listeners: type: object additionalProperties: false @@ -102,6 +103,19 @@ properties: additionalProperties: false required: - model + + model_aliases: + type: object + patternProperties: + "^.*$": + type: object + properties: + target: + type: string + additionalProperties: false + required: + - target + overrides: type: object properties: diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py index 0f157ea1..1563dd4a 100644 --- a/arch/tools/cli/config_generator.py +++ b/arch/tools/cli/config_generator.py @@ -208,6 +208,16 @@ def validate_and_render_schema(): config_yaml["llm_providers"] = updated_llm_providers + # Validate model aliases if present + if "model_aliases" in config_yaml: + model_aliases = config_yaml["model_aliases"] + for alias_name, alias_config in model_aliases.items(): + target = alias_config.get("target") + if target not in model_name_keys: + raise Exception( + f"Model alias '{alias_name}' targets '{target}' which is not defined as a model. Available models: {', '.join(sorted(model_name_keys))}" + ) + arch_config_string = yaml.dump(config_yaml) arch_llm_config_string = yaml.dump(config_yaml) diff --git a/crates/brightstaff/src/handlers/chat_completions.rs b/crates/brightstaff/src/handlers/chat_completions.rs index fff07c22..b049e4f3 100644 --- a/crates/brightstaff/src/handlers/chat_completions.rs +++ b/crates/brightstaff/src/handlers/chat_completions.rs @@ -1,7 +1,7 @@ use std::sync::Arc; - +use std::collections::HashMap; use bytes::Bytes; -use common::configuration::ModelUsagePreference; +use common::configuration::{ModelAlias, ModelUsagePreference}; use common::consts::ARCH_PROVIDER_HINT_HEADER; use hermesllm::apis::openai::ChatCompletionsRequest; use hermesllm::clients::SupportedAPIs; @@ -28,6 +28,7 @@ pub async fn chat( request: Request, router_service: Arc, full_qualified_llm_provider_url: String, + model_aliases: Arc>>, ) -> Result>, hyper::Error> { let request_path = request.uri().path().to_string(); @@ -35,6 +36,7 @@ pub async fn chat( let chat_request_bytes = request.collect().await?.to_bytes(); debug!("Received request body (raw utf8): {}", String::from_utf8_lossy(&chat_request_bytes)); + let mut client_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &SupportedAPIs::from_endpoint(request_path.as_str()).unwrap())) { Ok(request) => request, Err(err) => { @@ -46,6 +48,24 @@ pub async fn chat( } }; + // Model alias resolution: update model field in client_request immediately + // This ensures all downstream objects use the resolved model + let model_from_request = client_request.model().to_string(); + let resolved_model = if let Some(model_aliases) = model_aliases.as_ref() { + if let Some(model_alias) = model_aliases.get(&model_from_request) { + debug!( + "Model Alias: 'From {}' -> 'To{}'", + model_from_request, model_alias.target + ); + model_alias.target.clone() + } else { + model_from_request.clone() + } + } else { + model_from_request.clone() + }; + client_request.set_model(resolved_model.clone()); + // Clone metadata for routing and remove archgw_preference_config from original let routing_metadata = client_request.metadata().clone(); @@ -77,7 +97,7 @@ pub async fn chat( }; debug!( - "[BRIGHTSTAFF -> ARCH_ROUTER] REQ: {}", + "[ARCH_ROUTER REQ]: {}", &serde_json::to_string(&chat_completions_request_for_arch_router).unwrap() ); @@ -132,11 +152,12 @@ pub async fn chat( Ok(route) => match route { Some((_, model_name)) => model_name, None => { - debug!( + debug!( "No route determined, using default model from request: {}", chat_completions_request_for_arch_router.model ); chat_completions_request_for_arch_router.model.clone() + } }, Err(err) => { @@ -148,7 +169,7 @@ pub async fn chat( }; debug!( - "[BRIGHTSTAFF -> ARCH_ROUTER] URL: {}, Model Hint: {}", + "[ARCH_ROUTER] URL: {}, Resolved Model: {}", full_qualified_llm_provider_url, model_name ); diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index d3843125..ed4776fe 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -94,12 +94,16 @@ async fn main() -> Result<(), Box> { routing_llm_provider, )); + let model_aliases = Arc::new(arch_config.model_aliases.clone()); + + loop { let (stream, _) = listener.accept().await?; let peer_addr = stream.peer_addr()?; let io = TokioIo::new(stream); let router_service: Arc = Arc::clone(&router_service); + let model_aliases = Arc::clone(&model_aliases); let llm_provider_url = llm_provider_url.clone(); let llm_providers = llm_providers.clone(); @@ -109,12 +113,13 @@ async fn main() -> Result<(), Box> { let parent_cx = extract_context_from_request(&req); let llm_provider_url = llm_provider_url.clone(); let llm_providers = llm_providers.clone(); + let model_aliases = Arc::clone(&model_aliases); async move { match (req.method(), req.uri().path()) { (&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH) => { let fully_qualified_url = format!("{}{}", llm_provider_url, req.uri().path()); - chat(req, router_service, fully_qualified_url) + chat(req, router_service, fully_qualified_url, model_aliases) .with_context(parent_cx) .await } diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 93f4fd38..81c2db4f 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -13,11 +13,17 @@ pub struct Routing { pub model: Option, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ModelAlias { + pub target: String, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Configuration { pub version: String, pub endpoints: Option>, pub llm_providers: Vec, + pub model_aliases: Option>, pub overrides: Option, pub system_prompt: Option, pub prompt_guards: Option, diff --git a/crates/hermesllm/src/apis/openai.rs b/crates/hermesllm/src/apis/openai.rs index 7e89acd2..9d20273e 100644 --- a/crates/hermesllm/src/apis/openai.rs +++ b/crates/hermesllm/src/apis/openai.rs @@ -104,6 +104,20 @@ pub struct ChatCompletionsRequest { // pub web_search: Option, // GOOD FIRST ISSUE: Future support for web search } +impl ChatCompletionsRequest { + /// Suppress max_tokens if the model is o3, o3-*, openrouter/o3, or openrouter/o3-* + pub fn suppress_max_tokens_if_o3(&mut self) { + let model = self.model.as_str(); + let is_o3 = model == "o3" + || model.starts_with("o3-") + || model == "openrouter/o3" + || model.starts_with("openrouter/o3-"); + if is_o3 { + self.max_tokens = None; + } + } +} + // ============================================================================ // CHAT COMPLETIONS API TYPES // ============================================================================ @@ -530,7 +544,10 @@ impl TryFrom<&[u8]> for ChatCompletionsRequest { type Error = OpenAIStreamError; fn try_from(bytes: &[u8]) -> Result { - serde_json::from_slice(bytes).map_err(OpenAIStreamError::from) + let mut req: ChatCompletionsRequest = serde_json::from_slice(bytes).map_err(OpenAIStreamError::from)?; + // Use the centralized suppression logic + req.suppress_max_tokens_if_o3(); + Ok(req) } } diff --git a/crates/hermesllm/src/clients/transformer.rs b/crates/hermesllm/src/clients/transformer.rs index 8170a53d..b907c243 100644 --- a/crates/hermesllm/src/clients/transformer.rs +++ b/crates/hermesllm/src/clients/transformer.rs @@ -97,7 +97,7 @@ impl TryFrom for ChatCompletionsRequest { let openai_tools = req.tools.map(|tools| convert_anthropic_tools(tools)); let (openai_tool_choice, parallel_tool_calls) = convert_anthropic_tool_choice(req.tool_choice); - Ok(ChatCompletionsRequest { + let mut _chat_completions_req: ChatCompletionsRequest = ChatCompletionsRequest { model: req.model, messages: openai_messages, temperature: req.temperature, @@ -109,7 +109,9 @@ impl TryFrom for ChatCompletionsRequest { tool_choice: openai_tool_choice, parallel_tool_calls, ..Default::default() - }) + }; + _chat_completions_req.suppress_max_tokens_if_o3(); + Ok(_chat_completions_req) } } diff --git a/demos/use_cases/model_alias_routing/README.md b/demos/use_cases/model_alias_routing/README.md new file mode 100644 index 00000000..347a7e2b --- /dev/null +++ b/demos/use_cases/model_alias_routing/README.md @@ -0,0 +1,148 @@ +# Model Alias Demo Suite + +This directory contains demos for the model alias feature in archgw. + +## Overview + +Model aliases allow clients to use friendly, semantic names instead of provider-specific model names. For example: +- `arch.summarize.v1` → `4o-mini` (fast, cheap model for summaries) +- `arch.reasoning.v1` → `gpt-4o` (capable model for complex reasoning) +- `creative-model` → `claude-3-5-sonnet` (creative tasks) + +## Configuration + +The `arch_config_with_aliases.yaml` file defines several aliases: + +```yaml +# Model aliases - friendly names that map to actual provider names +model_aliases: + # Alias for summarization tasks -> fast/cheap model + arch.summarize.v1: + target: gpt-4o-mini + + # Alias for general purpose tasks -> latest model + arch.v1: + target: o3 + + # Alias for reasoning tasks -> capable model + arch.reasoning.v1: + target: gpt-4o + + # Alias for creative tasks -> Claude model + arch.creative.v1: + target: claude-3-5-sonnet-20241022 + + # Alias for quick responses -> fast model + arch.fast.v1: + target: claude-3-haiku-20240307 + + # Semantic aliases + summary-model: + target: gpt-4o-mini + + chat-model: + target: gpt-4o + + creative-model: + target: claude-3-5-sonnet-20241022 +``` + +## Prerequisites +- Install all dependencies as described in the main Arch README ([link](https://github.com/katanemo/arch/?tab=readme-ov-file#prerequisites)) +- Set your API keys in your environment: + - `export OPENAI_API_KEY=your-openai-key` + - `export ANTHROPIC_API_KEY=your-anthropic-key` (optional, but recommended for Anthropic tests) + +## How to Run + +1. Start the demo: + ```sh + sh run_demo.sh + ``` + - This will create a `.env` file with your API keys (if not present). + - Starts Arch Gateway with model alias config (`arch_config_with_aliases.yaml`). + +2. To stop the demo: + ```sh + sh run_demo.sh down + ``` + - This will stop Arch Gateway and any related services. + +## Example Requests + +### OpenAI client with alias `arch.summarize.v1` +```sh +curl -sS -X POST "http://localhost:12000/v1/chat/completions" \ + -H "Authorization: Bearer test-key" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "arch.summarize.v1", + "max_tokens": 50, + "messages": [ + { "role": "user", + "content": "Hello, please respond with exactly: Hello from alias arch.summarize.v1!" + } + ] + }' | jq . +``` + +### OpenAI client with alias `arch.v1` +```sh +curl -sS -X POST "http://localhost:12000/v1/chat/completions" \ + -H "Authorization: Bearer test-key" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "arch.v1", + "max_tokens": 50, + "messages": [ + { "role": "user", + "content": "Hello, please respond with exactly: Hello from alias arch.v1!" + } + ] + }' | jq . +``` + +### Anthropic client with alias `arch.summarize.v1` +```sh +curl -sS -X POST "http://localhost:12000/v1/messages" \ + -H "x-api-key: test-key" \ + -H "anthropic-version: 2023-06-01" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "arch.summarize.v1", + "max_tokens": 50, + "messages": [ + { "role": "user", + "content": "Hello, please respond with exactly: Hello from alias arch.summarize.v1 via Anthropic!" + } + ] + }' | jq . +``` + +### Anthropic client with alias `arch.v1` +```sh +curl -sS -X POST "http://localhost:12000/v1/messages" \ + -H "x-api-key: test-key" \ + -H "anthropic-version: 2023-06-01" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "arch.summarize.v1", + "max_tokens": 50, + "messages": [ + { "role": "user", + "content": "Hello, please respond with exactly: Hello from alias arch.summarize.v1 via Anthropic!" + } + ] + }' | jq . +``` + +## Notes +- The `.env` file will be created automatically if missing, with your API keys. +- If `ANTHROPIC_API_KEY` is not set, Anthropic requests will not work. +- You can add more aliases in `arch_config_with_aliases.yaml`. +- All curl examples use `jq .` for pretty-printing JSON responses. + +## Troubleshooting +- Ensure your API keys are set in your environment before running the demo. +- If you see errors about missing keys, set them and re-run the script. +- For more details, see the main Arch documentation. diff --git a/demos/use_cases/model_alias_routing/arch_config_with_aliases.yaml b/demos/use_cases/model_alias_routing/arch_config_with_aliases.yaml new file mode 100644 index 00000000..6a0fe25f --- /dev/null +++ b/demos/use_cases/model_alias_routing/arch_config_with_aliases.yaml @@ -0,0 +1,59 @@ +version: v0.1 + +listeners: + egress_traffic: + address: 0.0.0.0 + port: 12000 + message_format: openai + timeout: 30s + +llm_providers: + # OpenAI Models + - model: openai/gpt-4o-mini + access_key: $OPENAI_API_KEY + default: true + + - model: openai/o3 + access_key: $OPENAI_API_KEY + + - model: openai/gpt-4o + access_key: $OPENAI_API_KEY + + # Anthropic Models + - model: anthropic/claude-3-5-sonnet-20241022 + access_key: $ANTHROPIC_API_KEY + + - model: anthropic/claude-3-haiku-20240307 + access_key: $ANTHROPIC_API_KEY + +# Model aliases - friendly names that map to actual provider names +model_aliases: + # Alias for summarization tasks -> fast/cheap model + arch.summarize.v1: + target: gpt-4o-mini + + # Alias for general purpose tasks -> latest model + arch.v1: + target: o3 + + # Alias for reasoning tasks -> capable model + arch.reasoning.v1: + target: gpt-4o + + # Alias for creative tasks -> Claude model + arch.creative.v1: + target: claude-3-5-sonnet-20241022 + + # Alias for quick responses -> fast model + arch.fast.v1: + target: claude-3-haiku-20240307 + + # Semantic aliases + summary-model: + target: gpt-4o-mini + + chat-model: + target: gpt-4o + + creative-model: + target: claude-3-5-sonnet-20241022 diff --git a/demos/use_cases/model_alias_routing/run_demo.sh b/demos/use_cases/model_alias_routing/run_demo.sh new file mode 100644 index 00000000..2774e2a4 --- /dev/null +++ b/demos/use_cases/model_alias_routing/run_demo.sh @@ -0,0 +1,60 @@ +#!/bin/bash +set -e + +# Function to start the demo +start_demo() { + # Step 1: Check if .env file exists + if [ -f ".env" ]; then + echo ".env file already exists. Skipping creation." + else + # Step 2: Create `.env` file and set API keys + if [ -z "$OPENAI_API_KEY" ]; then + echo "Error: OPENAI_API_KEY environment variable is not set for the demo." + exit 1 + fi + if [ -z "$ANTHROPIC_API_KEY" ]; then + echo "Warning: ANTHROPIC_API_KEY environment variable is not set. Anthropic features may not work." + fi + + echo "Creating .env file..." + echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env + if [ -n "$ANTHROPIC_API_KEY" ]; then + echo "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" >> .env + fi + echo ".env file created with API keys." + fi + + # Step 3: Start Arch + echo "Starting Arch with arch_config_with_aliases.yaml..." + archgw up arch_config_with_aliases.yaml + + echo "\n\nArch started successfully." + echo "Please run the following CURL command to test model alias routing. Additional instructions are in the README.md file. \n" + echo "curl -sS -X POST \"http://localhost:12000/v1/chat/completions\" \ + -H \"Authorization: Bearer test-key\" \ + -H \"Content-Type: application/json\" \ + -d '{ + \"model\": \"arch.summarize.v1\", + \"max_tokens\": 50, + \"messages\": [ + { \"role\": \"user\", + \"content\": \"Hello, please respond with exactly: Hello from alias arch.summarize.v1!\" + } + ] + }' | jq ." +} + +# Function to stop the demo +stop_demo() { + # Step 2: Stop Arch + echo "Stopping Arch..." + archgw down +} + +# Main script logic +if [ "$1" == "down" ]; then + stop_demo +else + # Default action is to bring the demo up + start_demo +fi diff --git a/tests/e2e/run_e2e_tests.sh b/tests/e2e/run_e2e_tests.sh index c716a182..54a53b1f 100644 --- a/tests/e2e/run_e2e_tests.sh +++ b/tests/e2e/run_e2e_tests.sh @@ -60,14 +60,23 @@ archgw up demos/samples_python/weather_forecast/arch_config.yaml kill $model_server_tail_pid cd - -log running e2e tests -log ================= -poetry run pytest +log running e2e tests for prompt gateway +log ==================================== +poetry run pytest test_prompt_gateway.py -log shutting down the arch gateway service -log ====================================== +log shutting down the arch gateway service for prompt_gateway demo +log =============================================================== archgw down +log startup arch gateway with model alias routing demo +cd ../../ +archgw up demos/use_cases/model_alias_routing/arch_config_with_aliases.yaml +cd - + +log running e2e tests for model alias routing +log ======================================== +poetry run pytest test_model_alias_routing.py + log shutting down the weather_forecast demo log ======================================= cd ../../demos/samples_python/weather_forecast diff --git a/tests/e2e/test_model_alias_routing.py b/tests/e2e/test_model_alias_routing.py new file mode 100644 index 00000000..c0f6f7c5 --- /dev/null +++ b/tests/e2e/test_model_alias_routing.py @@ -0,0 +1,269 @@ +import anthropic +import openai +import os +import logging +import pytest +import sys + +# Set up logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + handlers=[logging.StreamHandler(sys.stdout)], +) +logger = logging.getLogger(__name__) + +LLM_GATEWAY_ENDPOINT = os.getenv( + "LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1/chat/completions" +) + +# ============================================================================= +# MODEL ALIAS TESTS +# ============================================================================= + + +def test_openai_client_with_alias_arch_summarize_v1(): + """Test OpenAI client using model alias 'arch.summarize.v1' which should resolve to '4o-mini'""" + logger.info("Testing OpenAI client with alias 'arch.summarize.v1' -> '4o-mini'") + + base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "") + client = openai.OpenAI( + api_key="test-key", + base_url=f"{base_url}/v1", + ) + + completion = client.chat.completions.create( + model="arch.summarize.v1", # This should resolve to 4o-mini + max_tokens=50, + messages=[ + { + "role": "user", + "content": "Hello, please respond with exactly: Hello from alias arch.summarize.v1!", + } + ], + ) + + response_content = completion.choices[0].message.content + logger.info(f"Response from arch.summarize.v1 alias: {response_content}") + assert response_content == "Hello from alias arch.summarize.v1!" + + +def test_openai_client_with_alias_arch_v1(): + """Test OpenAI client using model alias 'arch.v1' which should resolve to 'o3'""" + logger.info("Testing OpenAI client with alias 'arch.v1' -> 'o3'") + + base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "") + client = openai.OpenAI( + api_key="test-key", + base_url=f"{base_url}/v1", + ) + + completion = client.chat.completions.create( + model="arch.v1", # This should resolve to gpt-o3 + max_tokens=50, + messages=[ + { + "role": "user", + "content": "Hello, please respond with exactly: Hello from alias arch.v1!", + } + ], + ) + + response_content = completion.choices[0].message.content + logger.info(f"Response from arch.v1 alias: {response_content}") + assert response_content == "Hello from alias arch.v1!" + + +def test_anthropic_client_with_alias_arch_summarize_v1(): + """Test Anthropic client using model alias 'arch.summarize.v1' which should resolve to '4o-mini'""" + logger.info("Testing Anthropic client with alias 'arch.summarize.v1' -> '4o-mini'") + + base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "") + client = anthropic.Anthropic(api_key="test-key", base_url=base_url) + + message = client.messages.create( + model="arch.summarize.v1", # This should resolve to 4o-mini + max_tokens=50, + messages=[ + { + "role": "user", + "content": "Hello, please respond with exactly: Hello from alias arch.summarize.v1 via Anthropic!", + } + ], + ) + + response_content = "".join(b.text for b in message.content if b.type == "text") + logger.info( + f"Response from arch.summarize.v1 alias via Anthropic: {response_content}" + ) + assert response_content == "Hello from alias arch.summarize.v1 via Anthropic!" + + +def test_anthropic_client_with_alias_arch_v1(): + """Test Anthropic client using model alias 'arch.v1' which should resolve to 'o3'""" + logger.info("Testing Anthropic client with alias 'arch.v1' -> 'o3'") + + base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "") + client = anthropic.Anthropic(api_key="test-key", base_url=base_url) + + message = client.messages.create( + model="arch.v1", # This should resolve to o3 + max_tokens=50, + messages=[ + { + "role": "user", + "content": "Hello, please respond with exactly: Hello from alias arch.v1 via Anthropic!", + } + ], + ) + + response_content = "".join(b.text for b in message.content if b.type == "text") + logger.info(f"Response from arch.v1 alias via Anthropic: {response_content}") + assert response_content == "Hello from alias arch.v1 via Anthropic!" + + +def test_openai_client_with_alias_streaming(): + """Test OpenAI client using model alias with streaming""" + logger.info( + "Testing OpenAI client with alias 'arch.summarize.v1' streaming -> '4o-mini'" + ) + + base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "") + client = openai.OpenAI( + api_key="test-key", + base_url=f"{base_url}/v1", + ) + + stream = client.chat.completions.create( + model="arch.summarize.v1", # This should resolve to 4o-mini + max_tokens=50, + messages=[ + { + "role": "user", + "content": "Hello, please respond with exactly: Hello from streaming alias!", + } + ], + stream=True, + ) + + content_chunks = [] + for chunk in stream: + if chunk.choices[0].delta.content: + content_chunks.append(chunk.choices[0].delta.content) + + full_content = "".join(content_chunks) + logger.info(f"Streaming response from arch.summarize.v1 alias: {full_content}") + assert full_content == "Hello from streaming alias!" + + +def test_anthropic_client_with_alias_streaming(): + """Test Anthropic client using model alias with streaming""" + logger.info( + "Testing Anthropic client with alias 'arch.summarize.v1' streaming -> '4o-mini'" + ) + + base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "") + client = anthropic.Anthropic(api_key="test-key", base_url=base_url) + + with client.messages.stream( + model="arch.summarize.v1", # This should resolve to 4o-mini + max_tokens=50, + messages=[ + { + "role": "user", + "content": "Hello, please respond with exactly: Hello from streaming alias via Anthropic!", + } + ], + ) as stream: + pieces = [t for t in stream.text_stream] + full_text = "".join(pieces) + + logger.info( + f"Streaming response from arch.summarize.v1 alias via Anthropic: {full_text}" + ) + assert full_text == "Hello from streaming alias via Anthropic!" + + +def test_nonexistent_alias(): + """Test that using a non-existent alias falls back to treating it as a direct model name""" + logger.info( + "Testing non-existent alias 'nonexistent.alias' should be treated as direct model" + ) + + base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "") + client = openai.OpenAI( + api_key="test-key", + base_url=f"{base_url}/v1", + ) + + try: + completion = client.chat.completions.create( + model="nonexistent.alias", # This alias doesn't exist + max_tokens=50, + messages=[ + { + "role": "user", + "content": "Hello, this should fail or use as direct model name", + } + ], + ) + logger.info("Non-existent alias was handled gracefully") + # If it succeeds, it means the alias was passed through as a direct model name + logger.info(f"Response: {completion.choices[0].message.content}") + except Exception as e: + logger.info(f"Non-existent alias resulted in error (expected): {e}") + # This is also acceptable behavior + + +# ============================================================================= +# DIRECT MODEL TESTS (for comparison) +# ============================================================================= + + +def test_direct_model_4o_mini_openai(): + """Test OpenAI client using direct model name '4o-mini'""" + logger.info("Testing OpenAI client with direct model '4o-mini'") + + base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "") + client = openai.OpenAI( + api_key="test-key", + base_url=f"{base_url}/v1", + ) + + completion = client.chat.completions.create( + model="4o-mini", # Direct model name + max_tokens=50, + messages=[ + { + "role": "user", + "content": "Hello, please respond with exactly: Hello from direct 4o-mini!", + } + ], + ) + + response_content = completion.choices[0].message.content + logger.info(f"Response from direct 4o-mini: {response_content}") + assert response_content == "Hello from direct 4o-mini!" + + +def test_direct_model_4o_mini_anthropic(): + """Test Anthropic client using direct model name '4o-mini'""" + logger.info("Testing Anthropic client with direct model '4o-mini'") + + base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "") + client = anthropic.Anthropic(api_key="test-key", base_url=base_url) + + message = client.messages.create( + model="4o-mini", # Direct model name + max_tokens=50, + messages=[ + { + "role": "user", + "content": "Hello, please respond with exactly: Hello from direct 4o-mini via Anthropic!", + } + ], + ) + + response_content = "".join(b.text for b in message.content if b.type == "text") + logger.info(f"Response from direct 4o-mini via Anthropic: {response_content}") + assert response_content == "Hello from direct 4o-mini via Anthropic!"