mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
adding support for model aliases in archgw (#566)
* adding support for model aliases in archgw * fixed PR based on feedback * removing README. Not relevant for PR --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-136.local>
This commit is contained in:
parent
1e8c81d8f6
commit
4eb2b410c5
12 changed files with 634 additions and 14 deletions
|
|
@ -7,6 +7,7 @@ properties:
|
|||
- v0.1
|
||||
- v0.1.0
|
||||
- 0.1-beta
|
||||
- 0.2.0
|
||||
listeners:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
|
|
@ -102,6 +103,19 @@ properties:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- model
|
||||
|
||||
model_aliases:
|
||||
type: object
|
||||
patternProperties:
|
||||
"^.*$":
|
||||
type: object
|
||||
properties:
|
||||
target:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- target
|
||||
|
||||
overrides:
|
||||
type: object
|
||||
properties:
|
||||
|
|
|
|||
|
|
@ -208,6 +208,16 @@ def validate_and_render_schema():
|
|||
|
||||
config_yaml["llm_providers"] = updated_llm_providers
|
||||
|
||||
# Validate model aliases if present
|
||||
if "model_aliases" in config_yaml:
|
||||
model_aliases = config_yaml["model_aliases"]
|
||||
for alias_name, alias_config in model_aliases.items():
|
||||
target = alias_config.get("target")
|
||||
if target not in model_name_keys:
|
||||
raise Exception(
|
||||
f"Model alias '{alias_name}' targets '{target}' which is not defined as a model. Available models: {', '.join(sorted(model_name_keys))}"
|
||||
)
|
||||
|
||||
arch_config_string = yaml.dump(config_yaml)
|
||||
arch_llm_config_string = yaml.dump(config_yaml)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use bytes::Bytes;
|
||||
use common::configuration::ModelUsagePreference;
|
||||
use common::configuration::{ModelAlias, ModelUsagePreference};
|
||||
use common::consts::ARCH_PROVIDER_HINT_HEADER;
|
||||
use hermesllm::apis::openai::ChatCompletionsRequest;
|
||||
use hermesllm::clients::SupportedAPIs;
|
||||
|
|
@ -28,6 +28,7 @@ pub async fn chat(
|
|||
request: Request<hyper::body::Incoming>,
|
||||
router_service: Arc<RouterService>,
|
||||
full_qualified_llm_provider_url: String,
|
||||
model_aliases: Arc<Option<HashMap<String, ModelAlias>>>,
|
||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||
|
||||
let request_path = request.uri().path().to_string();
|
||||
|
|
@ -35,6 +36,7 @@ pub async fn chat(
|
|||
let chat_request_bytes = request.collect().await?.to_bytes();
|
||||
|
||||
debug!("Received request body (raw utf8): {}", String::from_utf8_lossy(&chat_request_bytes));
|
||||
|
||||
let mut client_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &SupportedAPIs::from_endpoint(request_path.as_str()).unwrap())) {
|
||||
Ok(request) => request,
|
||||
Err(err) => {
|
||||
|
|
@ -46,6 +48,24 @@ pub async fn chat(
|
|||
}
|
||||
};
|
||||
|
||||
// Model alias resolution: update model field in client_request immediately
|
||||
// This ensures all downstream objects use the resolved model
|
||||
let model_from_request = client_request.model().to_string();
|
||||
let resolved_model = if let Some(model_aliases) = model_aliases.as_ref() {
|
||||
if let Some(model_alias) = model_aliases.get(&model_from_request) {
|
||||
debug!(
|
||||
"Model Alias: 'From {}' -> 'To{}'",
|
||||
model_from_request, model_alias.target
|
||||
);
|
||||
model_alias.target.clone()
|
||||
} else {
|
||||
model_from_request.clone()
|
||||
}
|
||||
} else {
|
||||
model_from_request.clone()
|
||||
};
|
||||
client_request.set_model(resolved_model.clone());
|
||||
|
||||
// Clone metadata for routing and remove archgw_preference_config from original
|
||||
let routing_metadata = client_request.metadata().clone();
|
||||
|
||||
|
|
@ -77,7 +97,7 @@ pub async fn chat(
|
|||
};
|
||||
|
||||
debug!(
|
||||
"[BRIGHTSTAFF -> ARCH_ROUTER] REQ: {}",
|
||||
"[ARCH_ROUTER REQ]: {}",
|
||||
&serde_json::to_string(&chat_completions_request_for_arch_router).unwrap()
|
||||
);
|
||||
|
||||
|
|
@ -132,11 +152,12 @@ pub async fn chat(
|
|||
Ok(route) => match route {
|
||||
Some((_, model_name)) => model_name,
|
||||
None => {
|
||||
debug!(
|
||||
debug!(
|
||||
"No route determined, using default model from request: {}",
|
||||
chat_completions_request_for_arch_router.model
|
||||
);
|
||||
chat_completions_request_for_arch_router.model.clone()
|
||||
|
||||
}
|
||||
},
|
||||
Err(err) => {
|
||||
|
|
@ -148,7 +169,7 @@ pub async fn chat(
|
|||
};
|
||||
|
||||
debug!(
|
||||
"[BRIGHTSTAFF -> ARCH_ROUTER] URL: {}, Model Hint: {}",
|
||||
"[ARCH_ROUTER] URL: {}, Resolved Model: {}",
|
||||
full_qualified_llm_provider_url, model_name
|
||||
);
|
||||
|
||||
|
|
|
|||
|
|
@ -94,12 +94,16 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
routing_llm_provider,
|
||||
));
|
||||
|
||||
let model_aliases = Arc::new(arch_config.model_aliases.clone());
|
||||
|
||||
|
||||
loop {
|
||||
let (stream, _) = listener.accept().await?;
|
||||
let peer_addr = stream.peer_addr()?;
|
||||
let io = TokioIo::new(stream);
|
||||
|
||||
let router_service: Arc<RouterService> = Arc::clone(&router_service);
|
||||
let model_aliases = Arc::clone(&model_aliases);
|
||||
let llm_provider_url = llm_provider_url.clone();
|
||||
|
||||
let llm_providers = llm_providers.clone();
|
||||
|
|
@ -109,12 +113,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
let parent_cx = extract_context_from_request(&req);
|
||||
let llm_provider_url = llm_provider_url.clone();
|
||||
let llm_providers = llm_providers.clone();
|
||||
let model_aliases = Arc::clone(&model_aliases);
|
||||
|
||||
async move {
|
||||
match (req.method(), req.uri().path()) {
|
||||
(&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH) => {
|
||||
let fully_qualified_url = format!("{}{}", llm_provider_url, req.uri().path());
|
||||
chat(req, router_service, fully_qualified_url)
|
||||
chat(req, router_service, fully_qualified_url, model_aliases)
|
||||
.with_context(parent_cx)
|
||||
.await
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,11 +13,17 @@ pub struct Routing {
|
|||
pub model: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ModelAlias {
|
||||
pub target: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Configuration {
|
||||
pub version: String,
|
||||
pub endpoints: Option<HashMap<String, Endpoint>>,
|
||||
pub llm_providers: Vec<LlmProvider>,
|
||||
pub model_aliases: Option<HashMap<String, ModelAlias>>,
|
||||
pub overrides: Option<Overrides>,
|
||||
pub system_prompt: Option<String>,
|
||||
pub prompt_guards: Option<PromptGuards>,
|
||||
|
|
|
|||
|
|
@ -104,6 +104,20 @@ pub struct ChatCompletionsRequest {
|
|||
// pub web_search: Option<bool>, // GOOD FIRST ISSUE: Future support for web search
|
||||
}
|
||||
|
||||
impl ChatCompletionsRequest {
|
||||
/// Suppress max_tokens if the model is o3, o3-*, openrouter/o3, or openrouter/o3-*
|
||||
pub fn suppress_max_tokens_if_o3(&mut self) {
|
||||
let model = self.model.as_str();
|
||||
let is_o3 = model == "o3"
|
||||
|| model.starts_with("o3-")
|
||||
|| model == "openrouter/o3"
|
||||
|| model.starts_with("openrouter/o3-");
|
||||
if is_o3 {
|
||||
self.max_tokens = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// CHAT COMPLETIONS API TYPES
|
||||
// ============================================================================
|
||||
|
|
@ -530,7 +544,10 @@ impl TryFrom<&[u8]> for ChatCompletionsRequest {
|
|||
type Error = OpenAIStreamError;
|
||||
|
||||
fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> {
|
||||
serde_json::from_slice(bytes).map_err(OpenAIStreamError::from)
|
||||
let mut req: ChatCompletionsRequest = serde_json::from_slice(bytes).map_err(OpenAIStreamError::from)?;
|
||||
// Use the centralized suppression logic
|
||||
req.suppress_max_tokens_if_o3();
|
||||
Ok(req)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ impl TryFrom<AnthropicMessagesRequest> for ChatCompletionsRequest {
|
|||
let openai_tools = req.tools.map(|tools| convert_anthropic_tools(tools));
|
||||
let (openai_tool_choice, parallel_tool_calls) = convert_anthropic_tool_choice(req.tool_choice);
|
||||
|
||||
Ok(ChatCompletionsRequest {
|
||||
let mut _chat_completions_req: ChatCompletionsRequest = ChatCompletionsRequest {
|
||||
model: req.model,
|
||||
messages: openai_messages,
|
||||
temperature: req.temperature,
|
||||
|
|
@ -109,7 +109,9 @@ impl TryFrom<AnthropicMessagesRequest> for ChatCompletionsRequest {
|
|||
tool_choice: openai_tool_choice,
|
||||
parallel_tool_calls,
|
||||
..Default::default()
|
||||
})
|
||||
};
|
||||
_chat_completions_req.suppress_max_tokens_if_o3();
|
||||
Ok(_chat_completions_req)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
148
demos/use_cases/model_alias_routing/README.md
Normal file
148
demos/use_cases/model_alias_routing/README.md
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
# Model Alias Demo Suite
|
||||
|
||||
This directory contains demos for the model alias feature in archgw.
|
||||
|
||||
## Overview
|
||||
|
||||
Model aliases allow clients to use friendly, semantic names instead of provider-specific model names. For example:
|
||||
- `arch.summarize.v1` → `4o-mini` (fast, cheap model for summaries)
|
||||
- `arch.reasoning.v1` → `gpt-4o` (capable model for complex reasoning)
|
||||
- `creative-model` → `claude-3-5-sonnet` (creative tasks)
|
||||
|
||||
## Configuration
|
||||
|
||||
The `arch_config_with_aliases.yaml` file defines several aliases:
|
||||
|
||||
```yaml
|
||||
# Model aliases - friendly names that map to actual provider names
|
||||
model_aliases:
|
||||
# Alias for summarization tasks -> fast/cheap model
|
||||
arch.summarize.v1:
|
||||
target: gpt-4o-mini
|
||||
|
||||
# Alias for general purpose tasks -> latest model
|
||||
arch.v1:
|
||||
target: o3
|
||||
|
||||
# Alias for reasoning tasks -> capable model
|
||||
arch.reasoning.v1:
|
||||
target: gpt-4o
|
||||
|
||||
# Alias for creative tasks -> Claude model
|
||||
arch.creative.v1:
|
||||
target: claude-3-5-sonnet-20241022
|
||||
|
||||
# Alias for quick responses -> fast model
|
||||
arch.fast.v1:
|
||||
target: claude-3-haiku-20240307
|
||||
|
||||
# Semantic aliases
|
||||
summary-model:
|
||||
target: gpt-4o-mini
|
||||
|
||||
chat-model:
|
||||
target: gpt-4o
|
||||
|
||||
creative-model:
|
||||
target: claude-3-5-sonnet-20241022
|
||||
```
|
||||
|
||||
## Prerequisites
|
||||
- Install all dependencies as described in the main Arch README ([link](https://github.com/katanemo/arch/?tab=readme-ov-file#prerequisites))
|
||||
- Set your API keys in your environment:
|
||||
- `export OPENAI_API_KEY=your-openai-key`
|
||||
- `export ANTHROPIC_API_KEY=your-anthropic-key` (optional, but recommended for Anthropic tests)
|
||||
|
||||
## How to Run
|
||||
|
||||
1. Start the demo:
|
||||
```sh
|
||||
sh run_demo.sh
|
||||
```
|
||||
- This will create a `.env` file with your API keys (if not present).
|
||||
- Starts Arch Gateway with model alias config (`arch_config_with_aliases.yaml`).
|
||||
|
||||
2. To stop the demo:
|
||||
```sh
|
||||
sh run_demo.sh down
|
||||
```
|
||||
- This will stop Arch Gateway and any related services.
|
||||
|
||||
## Example Requests
|
||||
|
||||
### OpenAI client with alias `arch.summarize.v1`
|
||||
```sh
|
||||
curl -sS -X POST "http://localhost:12000/v1/chat/completions" \
|
||||
-H "Authorization: Bearer test-key" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "arch.summarize.v1",
|
||||
"max_tokens": 50,
|
||||
"messages": [
|
||||
{ "role": "user",
|
||||
"content": "Hello, please respond with exactly: Hello from alias arch.summarize.v1!"
|
||||
}
|
||||
]
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
### OpenAI client with alias `arch.v1`
|
||||
```sh
|
||||
curl -sS -X POST "http://localhost:12000/v1/chat/completions" \
|
||||
-H "Authorization: Bearer test-key" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "arch.v1",
|
||||
"max_tokens": 50,
|
||||
"messages": [
|
||||
{ "role": "user",
|
||||
"content": "Hello, please respond with exactly: Hello from alias arch.v1!"
|
||||
}
|
||||
]
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
### Anthropic client with alias `arch.summarize.v1`
|
||||
```sh
|
||||
curl -sS -X POST "http://localhost:12000/v1/messages" \
|
||||
-H "x-api-key: test-key" \
|
||||
-H "anthropic-version: 2023-06-01" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "arch.summarize.v1",
|
||||
"max_tokens": 50,
|
||||
"messages": [
|
||||
{ "role": "user",
|
||||
"content": "Hello, please respond with exactly: Hello from alias arch.summarize.v1 via Anthropic!"
|
||||
}
|
||||
]
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
### Anthropic client with alias `arch.v1`
|
||||
```sh
|
||||
curl -sS -X POST "http://localhost:12000/v1/messages" \
|
||||
-H "x-api-key: test-key" \
|
||||
-H "anthropic-version: 2023-06-01" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "arch.summarize.v1",
|
||||
"max_tokens": 50,
|
||||
"messages": [
|
||||
{ "role": "user",
|
||||
"content": "Hello, please respond with exactly: Hello from alias arch.summarize.v1 via Anthropic!"
|
||||
}
|
||||
]
|
||||
}' | jq .
|
||||
```
|
||||
|
||||
## Notes
|
||||
- The `.env` file will be created automatically if missing, with your API keys.
|
||||
- If `ANTHROPIC_API_KEY` is not set, Anthropic requests will not work.
|
||||
- You can add more aliases in `arch_config_with_aliases.yaml`.
|
||||
- All curl examples use `jq .` for pretty-printing JSON responses.
|
||||
|
||||
## Troubleshooting
|
||||
- Ensure your API keys are set in your environment before running the demo.
|
||||
- If you see errors about missing keys, set them and re-run the script.
|
||||
- For more details, see the main Arch documentation.
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
version: v0.1
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
# OpenAI Models
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: openai/o3
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
# Anthropic Models
|
||||
- model: anthropic/claude-3-5-sonnet-20241022
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
- model: anthropic/claude-3-haiku-20240307
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
# Model aliases - friendly names that map to actual provider names
|
||||
model_aliases:
|
||||
# Alias for summarization tasks -> fast/cheap model
|
||||
arch.summarize.v1:
|
||||
target: gpt-4o-mini
|
||||
|
||||
# Alias for general purpose tasks -> latest model
|
||||
arch.v1:
|
||||
target: o3
|
||||
|
||||
# Alias for reasoning tasks -> capable model
|
||||
arch.reasoning.v1:
|
||||
target: gpt-4o
|
||||
|
||||
# Alias for creative tasks -> Claude model
|
||||
arch.creative.v1:
|
||||
target: claude-3-5-sonnet-20241022
|
||||
|
||||
# Alias for quick responses -> fast model
|
||||
arch.fast.v1:
|
||||
target: claude-3-haiku-20240307
|
||||
|
||||
# Semantic aliases
|
||||
summary-model:
|
||||
target: gpt-4o-mini
|
||||
|
||||
chat-model:
|
||||
target: gpt-4o
|
||||
|
||||
creative-model:
|
||||
target: claude-3-5-sonnet-20241022
|
||||
60
demos/use_cases/model_alias_routing/run_demo.sh
Normal file
60
demos/use_cases/model_alias_routing/run_demo.sh
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Function to start the demo
|
||||
start_demo() {
|
||||
# Step 1: Check if .env file exists
|
||||
if [ -f ".env" ]; then
|
||||
echo ".env file already exists. Skipping creation."
|
||||
else
|
||||
# Step 2: Create `.env` file and set API keys
|
||||
if [ -z "$OPENAI_API_KEY" ]; then
|
||||
echo "Error: OPENAI_API_KEY environment variable is not set for the demo."
|
||||
exit 1
|
||||
fi
|
||||
if [ -z "$ANTHROPIC_API_KEY" ]; then
|
||||
echo "Warning: ANTHROPIC_API_KEY environment variable is not set. Anthropic features may not work."
|
||||
fi
|
||||
|
||||
echo "Creating .env file..."
|
||||
echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env
|
||||
if [ -n "$ANTHROPIC_API_KEY" ]; then
|
||||
echo "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" >> .env
|
||||
fi
|
||||
echo ".env file created with API keys."
|
||||
fi
|
||||
|
||||
# Step 3: Start Arch
|
||||
echo "Starting Arch with arch_config_with_aliases.yaml..."
|
||||
archgw up arch_config_with_aliases.yaml
|
||||
|
||||
echo "\n\nArch started successfully."
|
||||
echo "Please run the following CURL command to test model alias routing. Additional instructions are in the README.md file. \n"
|
||||
echo "curl -sS -X POST \"http://localhost:12000/v1/chat/completions\" \
|
||||
-H \"Authorization: Bearer test-key\" \
|
||||
-H \"Content-Type: application/json\" \
|
||||
-d '{
|
||||
\"model\": \"arch.summarize.v1\",
|
||||
\"max_tokens\": 50,
|
||||
\"messages\": [
|
||||
{ \"role\": \"user\",
|
||||
\"content\": \"Hello, please respond with exactly: Hello from alias arch.summarize.v1!\"
|
||||
}
|
||||
]
|
||||
}' | jq ."
|
||||
}
|
||||
|
||||
# Function to stop the demo
|
||||
stop_demo() {
|
||||
# Step 2: Stop Arch
|
||||
echo "Stopping Arch..."
|
||||
archgw down
|
||||
}
|
||||
|
||||
# Main script logic
|
||||
if [ "$1" == "down" ]; then
|
||||
stop_demo
|
||||
else
|
||||
# Default action is to bring the demo up
|
||||
start_demo
|
||||
fi
|
||||
|
|
@ -60,14 +60,23 @@ archgw up demos/samples_python/weather_forecast/arch_config.yaml
|
|||
kill $model_server_tail_pid
|
||||
cd -
|
||||
|
||||
log running e2e tests
|
||||
log =================
|
||||
poetry run pytest
|
||||
log running e2e tests for prompt gateway
|
||||
log ====================================
|
||||
poetry run pytest test_prompt_gateway.py
|
||||
|
||||
log shutting down the arch gateway service
|
||||
log ======================================
|
||||
log shutting down the arch gateway service for prompt_gateway demo
|
||||
log ===============================================================
|
||||
archgw down
|
||||
|
||||
log startup arch gateway with model alias routing demo
|
||||
cd ../../
|
||||
archgw up demos/use_cases/model_alias_routing/arch_config_with_aliases.yaml
|
||||
cd -
|
||||
|
||||
log running e2e tests for model alias routing
|
||||
log ========================================
|
||||
poetry run pytest test_model_alias_routing.py
|
||||
|
||||
log shutting down the weather_forecast demo
|
||||
log =======================================
|
||||
cd ../../demos/samples_python/weather_forecast
|
||||
|
|
|
|||
269
tests/e2e/test_model_alias_routing.py
Normal file
269
tests/e2e/test_model_alias_routing.py
Normal file
|
|
@ -0,0 +1,269 @@
|
|||
import anthropic
|
||||
import openai
|
||||
import os
|
||||
import logging
|
||||
import pytest
|
||||
import sys
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
handlers=[logging.StreamHandler(sys.stdout)],
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
LLM_GATEWAY_ENDPOINT = os.getenv(
|
||||
"LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1/chat/completions"
|
||||
)
|
||||
|
||||
# =============================================================================
|
||||
# MODEL ALIAS TESTS
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def test_openai_client_with_alias_arch_summarize_v1():
|
||||
"""Test OpenAI client using model alias 'arch.summarize.v1' which should resolve to '4o-mini'"""
|
||||
logger.info("Testing OpenAI client with alias 'arch.summarize.v1' -> '4o-mini'")
|
||||
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = openai.OpenAI(
|
||||
api_key="test-key",
|
||||
base_url=f"{base_url}/v1",
|
||||
)
|
||||
|
||||
completion = client.chat.completions.create(
|
||||
model="arch.summarize.v1", # This should resolve to 4o-mini
|
||||
max_tokens=50,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, please respond with exactly: Hello from alias arch.summarize.v1!",
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
response_content = completion.choices[0].message.content
|
||||
logger.info(f"Response from arch.summarize.v1 alias: {response_content}")
|
||||
assert response_content == "Hello from alias arch.summarize.v1!"
|
||||
|
||||
|
||||
def test_openai_client_with_alias_arch_v1():
|
||||
"""Test OpenAI client using model alias 'arch.v1' which should resolve to 'o3'"""
|
||||
logger.info("Testing OpenAI client with alias 'arch.v1' -> 'o3'")
|
||||
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = openai.OpenAI(
|
||||
api_key="test-key",
|
||||
base_url=f"{base_url}/v1",
|
||||
)
|
||||
|
||||
completion = client.chat.completions.create(
|
||||
model="arch.v1", # This should resolve to gpt-o3
|
||||
max_tokens=50,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, please respond with exactly: Hello from alias arch.v1!",
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
response_content = completion.choices[0].message.content
|
||||
logger.info(f"Response from arch.v1 alias: {response_content}")
|
||||
assert response_content == "Hello from alias arch.v1!"
|
||||
|
||||
|
||||
def test_anthropic_client_with_alias_arch_summarize_v1():
|
||||
"""Test Anthropic client using model alias 'arch.summarize.v1' which should resolve to '4o-mini'"""
|
||||
logger.info("Testing Anthropic client with alias 'arch.summarize.v1' -> '4o-mini'")
|
||||
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = anthropic.Anthropic(api_key="test-key", base_url=base_url)
|
||||
|
||||
message = client.messages.create(
|
||||
model="arch.summarize.v1", # This should resolve to 4o-mini
|
||||
max_tokens=50,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, please respond with exactly: Hello from alias arch.summarize.v1 via Anthropic!",
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
response_content = "".join(b.text for b in message.content if b.type == "text")
|
||||
logger.info(
|
||||
f"Response from arch.summarize.v1 alias via Anthropic: {response_content}"
|
||||
)
|
||||
assert response_content == "Hello from alias arch.summarize.v1 via Anthropic!"
|
||||
|
||||
|
||||
def test_anthropic_client_with_alias_arch_v1():
|
||||
"""Test Anthropic client using model alias 'arch.v1' which should resolve to 'o3'"""
|
||||
logger.info("Testing Anthropic client with alias 'arch.v1' -> 'o3'")
|
||||
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = anthropic.Anthropic(api_key="test-key", base_url=base_url)
|
||||
|
||||
message = client.messages.create(
|
||||
model="arch.v1", # This should resolve to o3
|
||||
max_tokens=50,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, please respond with exactly: Hello from alias arch.v1 via Anthropic!",
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
response_content = "".join(b.text for b in message.content if b.type == "text")
|
||||
logger.info(f"Response from arch.v1 alias via Anthropic: {response_content}")
|
||||
assert response_content == "Hello from alias arch.v1 via Anthropic!"
|
||||
|
||||
|
||||
def test_openai_client_with_alias_streaming():
|
||||
"""Test OpenAI client using model alias with streaming"""
|
||||
logger.info(
|
||||
"Testing OpenAI client with alias 'arch.summarize.v1' streaming -> '4o-mini'"
|
||||
)
|
||||
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = openai.OpenAI(
|
||||
api_key="test-key",
|
||||
base_url=f"{base_url}/v1",
|
||||
)
|
||||
|
||||
stream = client.chat.completions.create(
|
||||
model="arch.summarize.v1", # This should resolve to 4o-mini
|
||||
max_tokens=50,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, please respond with exactly: Hello from streaming alias!",
|
||||
}
|
||||
],
|
||||
stream=True,
|
||||
)
|
||||
|
||||
content_chunks = []
|
||||
for chunk in stream:
|
||||
if chunk.choices[0].delta.content:
|
||||
content_chunks.append(chunk.choices[0].delta.content)
|
||||
|
||||
full_content = "".join(content_chunks)
|
||||
logger.info(f"Streaming response from arch.summarize.v1 alias: {full_content}")
|
||||
assert full_content == "Hello from streaming alias!"
|
||||
|
||||
|
||||
def test_anthropic_client_with_alias_streaming():
|
||||
"""Test Anthropic client using model alias with streaming"""
|
||||
logger.info(
|
||||
"Testing Anthropic client with alias 'arch.summarize.v1' streaming -> '4o-mini'"
|
||||
)
|
||||
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = anthropic.Anthropic(api_key="test-key", base_url=base_url)
|
||||
|
||||
with client.messages.stream(
|
||||
model="arch.summarize.v1", # This should resolve to 4o-mini
|
||||
max_tokens=50,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, please respond with exactly: Hello from streaming alias via Anthropic!",
|
||||
}
|
||||
],
|
||||
) as stream:
|
||||
pieces = [t for t in stream.text_stream]
|
||||
full_text = "".join(pieces)
|
||||
|
||||
logger.info(
|
||||
f"Streaming response from arch.summarize.v1 alias via Anthropic: {full_text}"
|
||||
)
|
||||
assert full_text == "Hello from streaming alias via Anthropic!"
|
||||
|
||||
|
||||
def test_nonexistent_alias():
|
||||
"""Test that using a non-existent alias falls back to treating it as a direct model name"""
|
||||
logger.info(
|
||||
"Testing non-existent alias 'nonexistent.alias' should be treated as direct model"
|
||||
)
|
||||
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = openai.OpenAI(
|
||||
api_key="test-key",
|
||||
base_url=f"{base_url}/v1",
|
||||
)
|
||||
|
||||
try:
|
||||
completion = client.chat.completions.create(
|
||||
model="nonexistent.alias", # This alias doesn't exist
|
||||
max_tokens=50,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, this should fail or use as direct model name",
|
||||
}
|
||||
],
|
||||
)
|
||||
logger.info("Non-existent alias was handled gracefully")
|
||||
# If it succeeds, it means the alias was passed through as a direct model name
|
||||
logger.info(f"Response: {completion.choices[0].message.content}")
|
||||
except Exception as e:
|
||||
logger.info(f"Non-existent alias resulted in error (expected): {e}")
|
||||
# This is also acceptable behavior
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# DIRECT MODEL TESTS (for comparison)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def test_direct_model_4o_mini_openai():
|
||||
"""Test OpenAI client using direct model name '4o-mini'"""
|
||||
logger.info("Testing OpenAI client with direct model '4o-mini'")
|
||||
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = openai.OpenAI(
|
||||
api_key="test-key",
|
||||
base_url=f"{base_url}/v1",
|
||||
)
|
||||
|
||||
completion = client.chat.completions.create(
|
||||
model="4o-mini", # Direct model name
|
||||
max_tokens=50,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, please respond with exactly: Hello from direct 4o-mini!",
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
response_content = completion.choices[0].message.content
|
||||
logger.info(f"Response from direct 4o-mini: {response_content}")
|
||||
assert response_content == "Hello from direct 4o-mini!"
|
||||
|
||||
|
||||
def test_direct_model_4o_mini_anthropic():
|
||||
"""Test Anthropic client using direct model name '4o-mini'"""
|
||||
logger.info("Testing Anthropic client with direct model '4o-mini'")
|
||||
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
client = anthropic.Anthropic(api_key="test-key", base_url=base_url)
|
||||
|
||||
message = client.messages.create(
|
||||
model="4o-mini", # Direct model name
|
||||
max_tokens=50,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, please respond with exactly: Hello from direct 4o-mini via Anthropic!",
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
response_content = "".join(b.text for b in message.content if b.type == "text")
|
||||
logger.info(f"Response from direct 4o-mini via Anthropic: {response_content}")
|
||||
assert response_content == "Hello from direct 4o-mini via Anthropic!"
|
||||
Loading…
Add table
Add a link
Reference in a new issue