2024-10-28 20:05:06 -04:00
import json
import pytest
import requests
from deepdiff import DeepDiff
2025-04-15 14:39:12 -07:00
import re
2025-09-10 07:40:30 -07:00
import anthropic
import openai
2024-10-28 20:05:06 -04:00
2024-11-07 11:59:29 -08:00
from common import (
PROMPT_GATEWAY_ENDPOINT ,
2025-09-10 07:40:30 -07:00
LLM_GATEWAY_ENDPOINT ,
2024-11-07 11:59:29 -08:00
PREFILL_LIST ,
Rename all arch references to plano (#745)
* Rename all arch references to plano across the codebase
Complete rebrand from "Arch"/"archgw" to "Plano" including:
- Config files: arch_config_schema.yaml, workflow, demo configs
- Environment variables: ARCH_CONFIG_* → PLANO_CONFIG_*
- Python CLI: variables, functions, file paths, docker mounts
- Rust crates: config paths, log messages, metadata keys
- Docker/build: Dockerfile, supervisord, .dockerignore, .gitignore
- Docker Compose: volume mounts and env vars across all demos/tests
- GitHub workflows: job/step names
- Shell scripts: log messages
- Demos: Python code, READMEs, VS Code configs, Grafana dashboard
- Docs: RST includes, code comments, config references
- Package metadata: package.json, pyproject.toml, uv.lock
External URLs (docs.archgw.com, github.com/katanemo/archgw) left as-is.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* Update remaining arch references in docs
- Rename RST cross-reference labels: arch_access_logging, arch_overview_tracing, arch_overview_threading → plano_*
- Update label references in request_lifecycle.rst
- Rename arch_config_state_storage_example.yaml → plano_config_state_storage_example.yaml
- Update config YAML comments: "Arch creates/uses" → "Plano creates/uses"
- Update "the Arch gateway" → "the Plano gateway" in configuration_reference.rst
- Update arch_config_schema.yaml reference in provider_models.py
- Rename arch_agent_router → plano_agent_router in config example
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* Fix remaining arch references found in second pass
- config/docker-compose.dev.yaml: ARCH_CONFIG_FILE → PLANO_CONFIG_FILE,
arch_config.yaml → plano_config.yaml, archgw_logs → plano_logs
- config/test_passthrough.yaml: container mount path
- tests/e2e/docker-compose.yaml: source file path (was still arch_config.yaml)
- cli/planoai/core.py: comment and log message
- crates/brightstaff/src/tracing/constants.rs: doc comment
- tests/{e2e,archgw}/common.py: get_arch_messages → get_plano_messages,
arch_state/arch_messages variables renamed
- tests/{e2e,archgw}/test_prompt_gateway.py: updated imports and usages
- demos/shared/test_runner/{common,test_demos}.py: same renames
- tests/e2e/test_model_alias_routing.py: docstring
- .dockerignore: archgw_modelserver → plano_modelserver
- demos/use_cases/claude_code_router/pretty_model_resolution.sh: container name
Note: x-arch-* HTTP header values and Rust constant names intentionally
preserved for backwards compatibility with existing deployments.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---------
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 15:16:56 -08:00
get_plano_messages ,
2024-11-07 11:59:29 -08:00
get_data_chunks ,
)
2024-10-28 20:05:06 -04:00
2025-04-15 14:39:12 -07:00
def cleanup_tool_call ( tool_call ) :
pattern = r " ```json \ n(.*?) \ n``` "
match = re . search ( pattern , tool_call , re . DOTALL )
if match :
tool_call = match . group ( 1 )
return tool_call . strip ( )
2025-11-22 12:55:00 -08:00
def normalize_tool_call_arguments ( tool_call ) :
"""
Normalize tool call arguments to ensure they are always a dict .
According to OpenAI API spec , the ' arguments ' field should be a JSON string ,
but for easier testing we parse it into a dict here .
Args :
tool_call : A tool call dict that may have ' arguments ' as either a string or dict
Returns :
A tool call dict with ' arguments ' guaranteed to be a dict
"""
if " arguments " in tool_call and isinstance ( tool_call [ " arguments " ] , str ) :
try :
tool_call [ " arguments " ] = json . loads ( tool_call [ " arguments " ] )
except ( json . JSONDecodeError , TypeError ) :
# If parsing fails, keep it as is
pass
return tool_call
2024-10-28 20:05:06 -04:00
@pytest.mark.parametrize ( " stream " , [ True , False ] )
def test_prompt_gateway ( stream ) :
expected_tool_call = {
2024-12-20 13:25:01 -08:00
" name " : " get_current_weather " ,
" arguments " : { " days " : 10 , " location " : " seattle " } ,
2024-10-28 20:05:06 -04:00
}
body = {
" messages " : [
{
" role " : " user " ,
" content " : " how is the weather in seattle for next 10 days " ,
}
] ,
2026-01-28 17:47:33 -08:00
" model " : " openai/gpt-4o " ,
2024-10-28 20:05:06 -04:00
" stream " : stream ,
}
response = requests . post ( PROMPT_GATEWAY_ENDPOINT , json = body , stream = stream )
assert response . status_code == 200
if stream :
chunks = get_data_chunks ( response , n = 20 )
2024-12-20 13:25:01 -08:00
# print(chunks)
2024-10-28 20:05:06 -04:00
assert len ( chunks ) > 2
# first chunk is tool calls (role = assistant)
response_json = json . loads ( chunks [ 0 ] )
assert response_json . get ( " model " ) . startswith ( " Arch " )
choices = response_json . get ( " choices " , [ ] )
assert len ( choices ) > 0
assert " role " in choices [ 0 ] [ " delta " ]
role = choices [ 0 ] [ " delta " ] [ " role " ]
assert role == " assistant "
2025-04-15 14:39:12 -07:00
print ( f " choices: { choices } " )
tool_call_str = choices [ 0 ] . get ( " delta " , { } ) . get ( " content " , " " )
print ( " tool_call_str: " , tool_call_str )
cleaned_tool_call_str = cleanup_tool_call ( tool_call_str )
print ( " cleaned_tool_call_str: " , cleaned_tool_call_str )
tool_calls = json . loads ( cleaned_tool_call_str ) . get ( " tool_calls " , [ ] )
2024-10-28 20:05:06 -04:00
assert len ( tool_calls ) > 0
2025-11-22 12:55:00 -08:00
tool_call = normalize_tool_call_arguments ( tool_calls [ 0 ] )
2024-12-20 13:25:01 -08:00
location = tool_call [ " arguments " ] [ " location " ]
assert expected_tool_call [ " arguments " ] [ " location " ] in location . lower ( )
del expected_tool_call [ " arguments " ] [ " location " ]
del tool_call [ " arguments " ] [ " location " ]
diff = DeepDiff ( expected_tool_call , tool_call , ignore_string_case = True )
2024-10-28 20:05:06 -04:00
assert not diff
# second chunk is api call result (role = tool)
response_json = json . loads ( chunks [ 1 ] )
choices = response_json . get ( " choices " , [ ] )
assert len ( choices ) > 0
assert " role " in choices [ 0 ] [ " delta " ]
role = choices [ 0 ] [ " delta " ] [ " role " ]
assert role == " tool "
# third..end chunk is summarization (role = assistant)
response_json = json . loads ( chunks [ 2 ] )
2025-04-15 14:39:12 -07:00
assert response_json . get ( " model " ) . startswith ( " gpt-4o " )
2024-10-28 20:05:06 -04:00
choices = response_json . get ( " choices " , [ ] )
assert len ( choices ) > 0
assert " role " in choices [ 0 ] [ " delta " ]
role = choices [ 0 ] [ " delta " ] [ " role " ]
assert role == " assistant "
else :
response_json = response . json ( )
2025-04-15 14:39:12 -07:00
assert response_json . get ( " model " ) . startswith ( " gpt-4o " )
2024-10-28 20:05:06 -04:00
choices = response_json . get ( " choices " , [ ] )
assert len ( choices ) > 0
assert " role " in choices [ 0 ] [ " message " ]
assert choices [ 0 ] [ " message " ] [ " role " ] == " assistant "
Rename all arch references to plano (#745)
* Rename all arch references to plano across the codebase
Complete rebrand from "Arch"/"archgw" to "Plano" including:
- Config files: arch_config_schema.yaml, workflow, demo configs
- Environment variables: ARCH_CONFIG_* → PLANO_CONFIG_*
- Python CLI: variables, functions, file paths, docker mounts
- Rust crates: config paths, log messages, metadata keys
- Docker/build: Dockerfile, supervisord, .dockerignore, .gitignore
- Docker Compose: volume mounts and env vars across all demos/tests
- GitHub workflows: job/step names
- Shell scripts: log messages
- Demos: Python code, READMEs, VS Code configs, Grafana dashboard
- Docs: RST includes, code comments, config references
- Package metadata: package.json, pyproject.toml, uv.lock
External URLs (docs.archgw.com, github.com/katanemo/archgw) left as-is.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* Update remaining arch references in docs
- Rename RST cross-reference labels: arch_access_logging, arch_overview_tracing, arch_overview_threading → plano_*
- Update label references in request_lifecycle.rst
- Rename arch_config_state_storage_example.yaml → plano_config_state_storage_example.yaml
- Update config YAML comments: "Arch creates/uses" → "Plano creates/uses"
- Update "the Arch gateway" → "the Plano gateway" in configuration_reference.rst
- Update arch_config_schema.yaml reference in provider_models.py
- Rename arch_agent_router → plano_agent_router in config example
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* Fix remaining arch references found in second pass
- config/docker-compose.dev.yaml: ARCH_CONFIG_FILE → PLANO_CONFIG_FILE,
arch_config.yaml → plano_config.yaml, archgw_logs → plano_logs
- config/test_passthrough.yaml: container mount path
- tests/e2e/docker-compose.yaml: source file path (was still arch_config.yaml)
- cli/planoai/core.py: comment and log message
- crates/brightstaff/src/tracing/constants.rs: doc comment
- tests/{e2e,archgw}/common.py: get_arch_messages → get_plano_messages,
arch_state/arch_messages variables renamed
- tests/{e2e,archgw}/test_prompt_gateway.py: updated imports and usages
- demos/shared/test_runner/{common,test_demos}.py: same renames
- tests/e2e/test_model_alias_routing.py: docstring
- .dockerignore: archgw_modelserver → plano_modelserver
- demos/use_cases/claude_code_router/pretty_model_resolution.sh: container name
Note: x-arch-* HTTP header values and Rust constant names intentionally
preserved for backwards compatibility with existing deployments.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---------
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 15:16:56 -08:00
# now verify plano_messages (tool call and api response) that are sent as response metadata
plano_messages = get_plano_messages ( response_json )
print ( " plano_messages: " , json . dumps ( plano_messages ) )
assert len ( plano_messages ) == 2
tool_calls_message = plano_messages [ 0 ]
2025-04-15 14:39:12 -07:00
print ( " tool_calls_message: " , tool_calls_message )
tool_calls = tool_calls_message . get ( " content " , [ ] )
cleaned_tool_call_str = cleanup_tool_call ( tool_calls )
cleaned_tool_call_json = json . loads ( cleaned_tool_call_str )
print ( " cleaned_tool_call_json: " , json . dumps ( cleaned_tool_call_json ) )
tool_calls_list = cleaned_tool_call_json . get ( " tool_calls " , [ ] )
assert len ( tool_calls_list ) > 0
2025-11-22 12:55:00 -08:00
tool_call = normalize_tool_call_arguments ( tool_calls_list [ 0 ] )
2024-12-20 13:25:01 -08:00
location = tool_call [ " arguments " ] [ " location " ]
assert expected_tool_call [ " arguments " ] [ " location " ] in location . lower ( )
del expected_tool_call [ " arguments " ] [ " location " ]
del tool_call [ " arguments " ] [ " location " ]
diff = DeepDiff ( expected_tool_call , tool_call , ignore_string_case = True )
2024-10-28 20:05:06 -04:00
assert not diff
@pytest.mark.parametrize ( " stream " , [ True , False ] )
2024-12-20 13:25:01 -08:00
@pytest.mark.skip ( " no longer needed " )
2024-10-28 20:05:06 -04:00
def test_prompt_gateway_arch_direct_response ( stream ) :
body = {
" messages " : [
{
" role " : " user " ,
" content " : " how is the weather " ,
}
] ,
2026-01-28 17:47:33 -08:00
" model " : " openai/gpt-4o " ,
2024-10-28 20:05:06 -04:00
" stream " : stream ,
}
response = requests . post ( PROMPT_GATEWAY_ENDPOINT , json = body , stream = stream )
assert response . status_code == 200
if stream :
chunks = get_data_chunks ( response , n = 3 )
assert len ( chunks ) > 0
response_json = json . loads ( chunks [ 0 ] )
# make sure arch responded directly
assert response_json . get ( " model " ) . startswith ( " Arch " )
# and tool call is null
choices = response_json . get ( " choices " , [ ] )
assert len ( choices ) > 0
tool_calls = choices [ 0 ] . get ( " delta " , { } ) . get ( " tool_calls " , [ ] )
assert len ( tool_calls ) == 0
2024-11-07 11:59:29 -08:00
response_json = json . loads ( chunks [ 1 ] )
choices = response_json . get ( " choices " , [ ] )
assert len ( choices ) > 0
message = choices [ 0 ] [ " delta " ] [ " content " ]
2024-10-28 20:05:06 -04:00
else :
response_json = response . json ( )
assert response_json . get ( " model " ) . startswith ( " Arch " )
choices = response_json . get ( " choices " , [ ] )
assert len ( choices ) > 0
message = choices [ 0 ] [ " message " ] [ " content " ]
2024-11-07 11:59:29 -08:00
2024-12-20 13:25:01 -08:00
assert " days " in message
assert any (
message . startswith ( word ) for word in PREFILL_LIST
) , f " Expected assistant message to start with one of { PREFILL_LIST } , but got ' { assistant_message } ' "
2024-10-28 20:05:06 -04:00
@pytest.mark.parametrize ( " stream " , [ True , False ] )
2024-12-20 13:25:01 -08:00
@pytest.mark.skip ( " no longer needed " )
2024-10-28 20:05:06 -04:00
def test_prompt_gateway_param_gathering ( stream ) :
body = {
" messages " : [
{
" role " : " user " ,
" content " : " how is the weather in seattle " ,
}
] ,
2026-01-28 17:47:33 -08:00
" model " : " openai/gpt-4o " ,
2024-10-28 20:05:06 -04:00
" stream " : stream ,
}
response = requests . post ( PROMPT_GATEWAY_ENDPOINT , json = body , stream = stream )
assert response . status_code == 200
if stream :
chunks = get_data_chunks ( response , n = 3 )
2024-12-20 13:25:01 -08:00
assert len ( chunks ) > 1
2024-10-28 20:05:06 -04:00
response_json = json . loads ( chunks [ 0 ] )
# make sure arch responded directly
assert response_json . get ( " model " ) . startswith ( " Arch " )
# and tool call is null
choices = response_json . get ( " choices " , [ ] )
assert len ( choices ) > 0
tool_calls = choices [ 0 ] . get ( " delta " , { } ) . get ( " tool_calls " , [ ] )
assert len ( tool_calls ) == 0
2024-12-20 13:25:01 -08:00
# second chunk is api call result (role = tool)
response_json = json . loads ( chunks [ 1 ] )
choices = response_json . get ( " choices " , [ ] )
assert len ( choices ) > 0
message = choices [ 0 ] . get ( " message " , { } ) . get ( " content " , " " )
assert " days " not in message
2024-10-28 20:05:06 -04:00
else :
response_json = response . json ( )
assert response_json . get ( " model " ) . startswith ( " Arch " )
choices = response_json . get ( " choices " , [ ] )
assert len ( choices ) > 0
message = choices [ 0 ] [ " message " ] [ " content " ]
2024-12-20 13:25:01 -08:00
assert " days " in message
2024-10-28 20:05:06 -04:00
@pytest.mark.parametrize ( " stream " , [ True , False ] )
2024-12-20 13:25:01 -08:00
@pytest.mark.skip ( " no longer needed " )
2024-10-28 20:05:06 -04:00
def test_prompt_gateway_param_tool_call ( stream ) :
expected_tool_call = {
2024-12-20 13:25:01 -08:00
" name " : " get_current_weather " ,
" arguments " : { " location " : " seattle, wa " , " days " : " 2 " } ,
2024-10-28 20:05:06 -04:00
}
body = {
" messages " : [
{
" role " : " user " ,
" content " : " how is the weather in seattle " ,
} ,
{
" role " : " assistant " ,
2024-12-20 13:25:01 -08:00
" content " : " Of course, I can help with that. Could you please specify the days you want the weather forecast for? " ,
" model " : " Arch-Function " ,
2024-10-28 20:05:06 -04:00
} ,
{
" role " : " user " ,
2024-12-20 13:25:01 -08:00
" content " : " for 2 days please " ,
2024-10-28 20:05:06 -04:00
} ,
] ,
2026-01-28 17:47:33 -08:00
" model " : " openai/gpt-4o " ,
2024-10-28 20:05:06 -04:00
" stream " : stream ,
}
response = requests . post ( PROMPT_GATEWAY_ENDPOINT , json = body , stream = stream )
assert response . status_code == 200
if stream :
chunks = get_data_chunks ( response , n = 20 )
assert len ( chunks ) > 2
# first chunk is tool calls (role = assistant)
response_json = json . loads ( chunks [ 0 ] )
assert response_json . get ( " model " ) . startswith ( " Arch " )
choices = response_json . get ( " choices " , [ ] )
assert len ( choices ) > 0
assert " role " in choices [ 0 ] [ " delta " ]
role = choices [ 0 ] [ " delta " ] [ " role " ]
assert role == " assistant "
tool_calls = choices [ 0 ] . get ( " delta " , { } ) . get ( " tool_calls " , [ ] )
assert len ( tool_calls ) > 0
2025-11-22 12:55:00 -08:00
tool_call = normalize_tool_call_arguments ( tool_calls [ 0 ] [ " function " ] )
2024-10-28 20:05:06 -04:00
diff = DeepDiff ( tool_call , expected_tool_call , ignore_string_case = True )
assert not diff
# second chunk is api call result (role = tool)
response_json = json . loads ( chunks [ 1 ] )
choices = response_json . get ( " choices " , [ ] )
assert len ( choices ) > 0
assert " role " in choices [ 0 ] [ " delta " ]
role = choices [ 0 ] [ " delta " ] [ " role " ]
assert role == " tool "
# third..end chunk is summarization (role = assistant)
response_json = json . loads ( chunks [ 2 ] )
2025-04-15 14:39:12 -07:00
assert response_json . get ( " model " ) . startswith ( " gpt-4o " )
2024-10-28 20:05:06 -04:00
choices = response_json . get ( " choices " , [ ] )
assert len ( choices ) > 0
assert " role " in choices [ 0 ] [ " delta " ]
role = choices [ 0 ] [ " delta " ] [ " role " ]
assert role == " assistant "
else :
response_json = response . json ( )
2025-04-15 14:39:12 -07:00
assert response_json . get ( " model " ) . startswith ( " gpt-4o " )
2024-10-28 20:05:06 -04:00
choices = response_json . get ( " choices " , [ ] )
assert len ( choices ) > 0
assert " role " in choices [ 0 ] [ " message " ]
assert choices [ 0 ] [ " message " ] [ " role " ] == " assistant "
Rename all arch references to plano (#745)
* Rename all arch references to plano across the codebase
Complete rebrand from "Arch"/"archgw" to "Plano" including:
- Config files: arch_config_schema.yaml, workflow, demo configs
- Environment variables: ARCH_CONFIG_* → PLANO_CONFIG_*
- Python CLI: variables, functions, file paths, docker mounts
- Rust crates: config paths, log messages, metadata keys
- Docker/build: Dockerfile, supervisord, .dockerignore, .gitignore
- Docker Compose: volume mounts and env vars across all demos/tests
- GitHub workflows: job/step names
- Shell scripts: log messages
- Demos: Python code, READMEs, VS Code configs, Grafana dashboard
- Docs: RST includes, code comments, config references
- Package metadata: package.json, pyproject.toml, uv.lock
External URLs (docs.archgw.com, github.com/katanemo/archgw) left as-is.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* Update remaining arch references in docs
- Rename RST cross-reference labels: arch_access_logging, arch_overview_tracing, arch_overview_threading → plano_*
- Update label references in request_lifecycle.rst
- Rename arch_config_state_storage_example.yaml → plano_config_state_storage_example.yaml
- Update config YAML comments: "Arch creates/uses" → "Plano creates/uses"
- Update "the Arch gateway" → "the Plano gateway" in configuration_reference.rst
- Update arch_config_schema.yaml reference in provider_models.py
- Rename arch_agent_router → plano_agent_router in config example
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* Fix remaining arch references found in second pass
- config/docker-compose.dev.yaml: ARCH_CONFIG_FILE → PLANO_CONFIG_FILE,
arch_config.yaml → plano_config.yaml, archgw_logs → plano_logs
- config/test_passthrough.yaml: container mount path
- tests/e2e/docker-compose.yaml: source file path (was still arch_config.yaml)
- cli/planoai/core.py: comment and log message
- crates/brightstaff/src/tracing/constants.rs: doc comment
- tests/{e2e,archgw}/common.py: get_arch_messages → get_plano_messages,
arch_state/arch_messages variables renamed
- tests/{e2e,archgw}/test_prompt_gateway.py: updated imports and usages
- demos/shared/test_runner/{common,test_demos}.py: same renames
- tests/e2e/test_model_alias_routing.py: docstring
- .dockerignore: archgw_modelserver → plano_modelserver
- demos/use_cases/claude_code_router/pretty_model_resolution.sh: container name
Note: x-arch-* HTTP header values and Rust constant names intentionally
preserved for backwards compatibility with existing deployments.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---------
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 15:16:56 -08:00
# now verify plano_messages (tool call and api response) that are sent as response metadata
plano_messages = get_plano_messages ( response_json )
assert len ( plano_messages ) == 2
tool_calls_message = plano_messages [ 0 ]
2024-10-28 20:05:06 -04:00
tool_calls = tool_calls_message . get ( " tool_calls " , [ ] )
assert len ( tool_calls ) > 0
2025-11-22 12:55:00 -08:00
tool_call = normalize_tool_call_arguments ( tool_calls [ 0 ] [ " function " ] )
2024-10-28 20:05:06 -04:00
diff = DeepDiff ( tool_call , expected_tool_call , ignore_string_case = True )
assert not diff
@pytest.mark.parametrize ( " stream " , [ True , False ] )
def test_prompt_gateway_default_target ( stream ) :
body = {
" messages " : [
{
" role " : " user " ,
2025-04-15 14:39:12 -07:00
" content " : " hello " ,
2024-10-28 20:05:06 -04:00
} ,
] ,
2026-01-28 17:47:33 -08:00
" model " : " openai/gpt-4o " ,
2024-10-28 20:05:06 -04:00
" stream " : stream ,
}
response = requests . post ( PROMPT_GATEWAY_ENDPOINT , json = body , stream = stream )
assert response . status_code == 200
if stream :
chunks = get_data_chunks ( response , n = 3 )
assert len ( chunks ) > 0
response_json = json . loads ( chunks [ 0 ] )
2025-04-15 14:39:12 -07:00
print ( " response_json chunks[0]: " , response_json )
2024-10-28 20:05:06 -04:00
assert response_json . get ( " model " ) . startswith ( " api_server " )
assert len ( response_json . get ( " choices " , [ ] ) ) > 0
assert response_json . get ( " choices " ) [ 0 ] [ " delta " ] [ " role " ] == " assistant "
response_json = json . loads ( chunks [ 1 ] )
2025-04-15 14:39:12 -07:00
print ( " response_json chunks[1]: " , response_json )
2024-10-28 20:05:06 -04:00
choices = response_json . get ( " choices " , [ ] )
assert len ( choices ) > 0
content = choices [ 0 ] [ " delta " ] [ " content " ]
2024-11-07 22:11:00 -06:00
assert content == " I can help you with weather forecast "
2024-10-28 20:05:06 -04:00
else :
response_json = response . json ( )
2025-04-15 14:39:12 -07:00
print ( " response_json: " , response_json )
2024-10-28 20:05:06 -04:00
assert response_json . get ( " model " ) . startswith ( " api_server " )
assert len ( response_json . get ( " choices " ) ) > 0
assert response_json . get ( " choices " ) [ 0 ] [ " message " ] [ " role " ] == " assistant "
assert (
response_json . get ( " choices " ) [ 0 ] [ " message " ] [ " content " ]
2024-11-07 22:11:00 -06:00
== " I can help you with weather forecast "
2024-10-28 20:05:06 -04:00
)
2024-11-25 17:16:35 -08:00
@pytest.mark.parametrize ( " stream " , [ True , False ] )
2024-12-20 13:25:01 -08:00
@pytest.mark.skip (
" This test is failing due to the prompt gateway not being able to handle the guardrail "
)
2024-11-25 17:16:35 -08:00
def test_prompt_gateway_prompt_guard_jailbreak ( stream ) :
body = {
" messages " : [
{
" role " : " user " ,
" content " : " How would you respond to a question about generating harmful or unethical content " ,
}
] ,
2026-01-28 17:47:33 -08:00
" model " : " openai/gpt-4o " ,
2024-11-25 17:16:35 -08:00
" stream " : stream ,
}
response = requests . post ( PROMPT_GATEWAY_ENDPOINT , json = body , stream = stream )
assert response . status_code == 200
if stream :
chunks = get_data_chunks ( response , n = 20 )
assert len ( chunks ) == 2
response_json = json . loads ( chunks [ 1 ] )
choices = response_json . get ( " choices " , [ ] )
assert len ( choices ) > 0
content = choices [ 0 ] [ " delta " ] [ " content " ]
assert (
content
== " Looks like you ' re curious about my abilities, but I can only provide assistance for weather forecasting. "
)
else :
response_json = response . json ( )
assert (
response_json . get ( " choices " ) [ 0 ] [ " message " ] [ " content " ]
== " Looks like you ' re curious about my abilities, but I can only provide assistance for weather forecasting. "
)
2025-09-10 07:40:30 -07:00
def test_claude_v1_messages_api ( ) :
""" Test Claude client using /v1/messages API through llm_gateway (port 12000) """
# Get the base URL from the LLM gateway endpoint
base_url = LLM_GATEWAY_ENDPOINT . replace ( " /v1/chat/completions " , " " )
client = anthropic . Anthropic (
api_key = " test-key " , base_url = base_url # Dummy key for testing
)
message = client . messages . create (
model = " claude-sonnet-4-20250514 " , # Use working model from smoke test
max_tokens = 50 ,
messages = [
{
" role " : " user " ,
" content " : " Hello, please respond with exactly: Hello from Claude! " ,
}
] ,
)
assert message . content [ 0 ] . text == " Hello from Claude! "
def test_claude_v1_messages_api_streaming ( ) :
base_url = LLM_GATEWAY_ENDPOINT . replace ( " /v1/chat/completions " , " " )
client = anthropic . Anthropic ( api_key = " test-key " , base_url = base_url )
with client . messages . stream (
model = " claude-sonnet-4-20250514 " ,
max_tokens = 50 ,
messages = [
{
" role " : " user " ,
" content " : " Hello, please respond with exactly: Hello from Claude! " ,
}
] ,
) as stream :
# This yields only text deltas in order
pieces = [ t for t in stream . text_stream ]
full_text = " " . join ( pieces )
# You can also get the fully-assembled Message object
final = stream . get_final_message ( )
# A safe way to reassemble text from the content blocks:
final_text = " " . join ( b . text for b in final . content if b . type == " text " )
assert full_text == " Hello from Claude! "
assert final_text == " Hello from Claude! "
def test_anthropic_client_with_openai_model_streaming ( ) :
""" Test Anthropic client using /v1/messages API with OpenAI model (gpt-4o-mini)
This tests the transformation : OpenAI upstream - > Anthropic client format with proper event lines
"""
base_url = LLM_GATEWAY_ENDPOINT . replace ( " /v1/chat/completions " , " " )
client = anthropic . Anthropic ( api_key = " test-key " , base_url = base_url )
with client . messages . stream (
2026-01-28 17:47:33 -08:00
model = " gpt-4o-mini " , # OpenAI model via Anthropic client
2025-09-29 19:23:08 -07:00
max_tokens = 500 ,
2025-09-10 07:40:30 -07:00
messages = [
{
" role " : " user " ,
2025-09-29 19:23:08 -07:00
" content " : " Hello, please respond with exactly: Hello from ChatGPT! " ,
2025-09-10 07:40:30 -07:00
}
] ,
) as stream :
# This yields only text deltas in order
pieces = [ t for t in stream . text_stream ]
full_text = " " . join ( pieces )
# You can also get the fully-assembled Message object
final = stream . get_final_message ( )
# A safe way to reassemble text from the content blocks:
final_text = " " . join ( b . text for b in final . content if b . type == " text " )
2025-09-29 19:23:08 -07:00
assert full_text == " Hello from ChatGPT! "
assert final_text == " Hello from ChatGPT! "
2025-09-10 07:40:30 -07:00
def test_openai_gpt4o_mini_v1_messages_api ( ) :
""" Test OpenAI GPT-4o-mini using /v1/chat/completions API through llm_gateway (port 12000) """
# Get the base URL from the LLM gateway endpoint
base_url = LLM_GATEWAY_ENDPOINT . replace ( " /v1/chat/completions " , " " )
client = openai . OpenAI (
api_key = " test-key " , # Dummy key for testing
base_url = f " { base_url } /v1 " , # OpenAI needs /v1 suffix in base_url
)
completion = client . chat . completions . create (
model = " gpt-4o-mini " ,
max_tokens = 50 ,
messages = [
{
" role " : " user " ,
" content " : " Hello, please respond with exactly: Hello from GPT-4o-mini! " ,
}
] ,
)
assert completion . choices [ 0 ] . message . content == " Hello from GPT-4o-mini! "
def test_openai_gpt4o_mini_v1_messages_api_streaming ( ) :
""" Test OpenAI GPT-4o-mini using /v1/chat/completions API with streaming through llm_gateway (port 12000) """
# Get the base URL from the LLM gateway endpoint
base_url = LLM_GATEWAY_ENDPOINT . replace ( " /v1/chat/completions " , " " )
client = openai . OpenAI (
api_key = " test-key " , # Dummy key for testing
base_url = f " { base_url } /v1 " , # OpenAI needs /v1 suffix in base_url
)
stream = client . chat . completions . create (
model = " gpt-4o-mini " ,
max_tokens = 50 ,
messages = [
{
" role " : " user " ,
" content " : " Hello, please respond with exactly: Hello from GPT-4o-mini! " ,
}
] ,
stream = True ,
)
# Collect all the streaming chunks
content_chunks = [ ]
for chunk in stream :
if chunk . choices [ 0 ] . delta . content :
content_chunks . append ( chunk . choices [ 0 ] . delta . content )
# Reconstruct the full message
full_content = " " . join ( content_chunks )
assert full_content == " Hello from GPT-4o-mini! "
def test_openai_client_with_claude_model_streaming ( ) :
""" Test OpenAI client using /v1/chat/completions API with Claude model (claude-sonnet-4-20250514)
This tests the transformation : Anthropic upstream - > OpenAI client format with proper chunk handling
"""
# Get the base URL from the LLM gateway endpoint
base_url = LLM_GATEWAY_ENDPOINT . replace ( " /v1/chat/completions " , " " )
client = openai . OpenAI (
api_key = " test-key " , # Dummy key for testing
base_url = f " { base_url } /v1 " , # OpenAI needs /v1 suffix in base_url
)
stream = client . chat . completions . create (
model = " claude-sonnet-4-20250514 " , # Claude model via OpenAI client
max_tokens = 50 ,
messages = [
{
" role " : " user " ,
" content " : " Who are you? ALWAYS RESPOND WITH:I appreciate the request, but I should clarify that I ' m Claude, made by Anthropic, not OpenAI. I don ' t want to create confusion about my origins. " ,
}
] ,
stream = True ,
temperature = 0.1 ,
)
# Collect all the streaming chunks
content_chunks = [ ]
for chunk in stream :
if chunk . choices [ 0 ] . delta . content :
content_chunks . append ( chunk . choices [ 0 ] . delta . content )
# Reconstruct the full message
full_content = " " . join ( content_chunks )
assert full_content is not None