mirror of
https://github.com/katanemo/plano.git
synced 2026-06-08 14:55:14 +02:00
adding support for claude code routing (#575)
* fixed for claude code routing. first commit * removing redundant enum tags for cache_control * making sure that claude code can run via the archgw cli * fixing broken config * adding a README.md and updated the cli to use more of our defined patterns for params * fixed config.yaml * minor fixes to make sure PR is clean. Ready to ship * adding claude-sonnet-4-5 to the config * fixes based on PR * fixed alias for README * fixed 400 error handling tests, now that we write temperature to 1.0 for GPT-5 --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-257.local> Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-288.local>
This commit is contained in:
parent
03c2cf6f0d
commit
f00870dccb
16 changed files with 903 additions and 106 deletions
|
|
@ -140,7 +140,7 @@ static_resources:
|
|||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: {{ llm_cluster_name }}
|
||||
timeout: 60s
|
||||
timeout: 300s
|
||||
{% endfor %}
|
||||
|
||||
{% if agent_orchestrator %}
|
||||
|
|
@ -153,7 +153,7 @@ static_resources:
|
|||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: {{ agent_orchestrator }}
|
||||
timeout: 60s
|
||||
timeout: 300s
|
||||
{% endif %}
|
||||
http_filters:
|
||||
- name: envoy.filters.http.compressor
|
||||
|
|
@ -266,7 +266,7 @@ static_resources:
|
|||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: {{ internal_cluster }}
|
||||
timeout: 60s
|
||||
timeout: 300s
|
||||
{% endfor %}
|
||||
|
||||
{% for cluster_name, cluster in arch_clusters.items() %}
|
||||
|
|
@ -279,7 +279,7 @@ static_resources:
|
|||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: {{ cluster_name }}
|
||||
timeout: 60s
|
||||
timeout: 300s
|
||||
{% endfor %}
|
||||
http_filters:
|
||||
- name: envoy.filters.http.router
|
||||
|
|
@ -434,7 +434,7 @@ static_resources:
|
|||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: {{ llm_cluster_name }}
|
||||
timeout: 60s
|
||||
timeout: 300s
|
||||
{% endfor %}
|
||||
- match:
|
||||
prefix: "/"
|
||||
|
|
|
|||
|
|
@ -242,7 +242,7 @@ def validate_and_render_schema():
|
|||
if llm_gateway_listener.get("address") == None:
|
||||
llm_gateway_listener["address"] = "127.0.0.1"
|
||||
if llm_gateway_listener.get("timeout") == None:
|
||||
llm_gateway_listener["timeout"] = "10s"
|
||||
llm_gateway_listener["timeout"] = "300s"
|
||||
|
||||
use_agent_orchestrator = config_yaml.get("overrides", {}).get(
|
||||
"use_agent_orchestrator", False
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import json
|
||||
import subprocess
|
||||
import os
|
||||
import time
|
||||
|
|
@ -185,3 +186,93 @@ def stop_arch_modelserver():
|
|||
except subprocess.CalledProcessError as e:
|
||||
log.info(f"Failed to start model_server. Please check archgw_modelserver logs")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def start_cli_agent(arch_config_file=None, settings_json="{}"):
|
||||
"""Start a CLI client connected to Arch."""
|
||||
|
||||
with open(arch_config_file, "r") as file:
|
||||
arch_config = file.read()
|
||||
arch_config_yaml = yaml.safe_load(arch_config)
|
||||
|
||||
# Get egress listener configuration
|
||||
egress_config = arch_config_yaml.get("listeners", {}).get("egress_traffic", {})
|
||||
host = egress_config.get("host", "127.0.0.1")
|
||||
port = egress_config.get("port", 12000)
|
||||
|
||||
# Parse additional settings from command line
|
||||
try:
|
||||
additional_settings = json.loads(settings_json) if settings_json else {}
|
||||
except json.JSONDecodeError:
|
||||
log.error("Settings must be valid JSON")
|
||||
sys.exit(1)
|
||||
|
||||
# Set up environment variables
|
||||
env = os.environ.copy()
|
||||
env.update(
|
||||
{
|
||||
"ANTHROPIC_AUTH_TOKEN": "test", # Use test token for arch
|
||||
"ANTHROPIC_API_KEY": "",
|
||||
"ANTHROPIC_BASE_URL": f"http://{host}:{port}",
|
||||
"NO_PROXY": host,
|
||||
"DISABLE_TELEMETRY": "true",
|
||||
"DISABLE_COST_WARNINGS": "true",
|
||||
"API_TIMEOUT_MS": "600000",
|
||||
}
|
||||
)
|
||||
|
||||
# Set ANTHROPIC_SMALL_FAST_MODEL from additional_settings or model alias
|
||||
if "ANTHROPIC_SMALL_FAST_MODEL" in additional_settings:
|
||||
env["ANTHROPIC_SMALL_FAST_MODEL"] = additional_settings[
|
||||
"ANTHROPIC_SMALL_FAST_MODEL"
|
||||
]
|
||||
else:
|
||||
# Check if arch.claude.code.small.fast alias exists in model_aliases
|
||||
model_aliases = arch_config_yaml.get("model_aliases", {})
|
||||
if "arch.claude.code.small.fast" in model_aliases:
|
||||
env["ANTHROPIC_SMALL_FAST_MODEL"] = "arch.claude.code.small.fast"
|
||||
else:
|
||||
log.info(
|
||||
"Tip: Set an alias 'arch.claude.code.small.fast' in your model_aliases config to set a small fast model Claude Code"
|
||||
)
|
||||
log.info("Or provide ANTHROPIC_SMALL_FAST_MODEL in --settings JSON")
|
||||
|
||||
# Non-interactive mode configuration from additional_settings only
|
||||
if additional_settings.get("NON_INTERACTIVE_MODE", False):
|
||||
env.update(
|
||||
{
|
||||
"CI": "true",
|
||||
"FORCE_COLOR": "0",
|
||||
"NODE_NO_READLINE": "1",
|
||||
"TERM": "dumb",
|
||||
}
|
||||
)
|
||||
|
||||
# Build claude command arguments
|
||||
claude_args = []
|
||||
|
||||
# Add settings if provided, excluding those already handled as environment variables
|
||||
if additional_settings:
|
||||
# Filter out settings that are already processed as environment variables
|
||||
claude_settings = {
|
||||
k: v
|
||||
for k, v in additional_settings.items()
|
||||
if k not in ["ANTHROPIC_SMALL_FAST_MODEL", "NON_INTERACTIVE_MODE"]
|
||||
}
|
||||
if claude_settings:
|
||||
claude_args.append(f"--settings={json.dumps(claude_settings)}")
|
||||
|
||||
# Use claude from PATH
|
||||
claude_path = "claude"
|
||||
log.info(f"Connecting Claude Code Agent to Arch at {host}:{port}")
|
||||
|
||||
try:
|
||||
subprocess.run([claude_path] + claude_args, env=env, check=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
log.error(f"Error starting claude: {e}")
|
||||
sys.exit(1)
|
||||
except FileNotFoundError:
|
||||
log.error(
|
||||
f"{claude_path} not found. Make sure Claude Code is installed: npm install -g @anthropic-ai/claude-code"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
|
|
|||
|
|
@ -4,13 +4,20 @@ import sys
|
|||
import subprocess
|
||||
import multiprocessing
|
||||
import importlib.metadata
|
||||
import json
|
||||
from cli import targets
|
||||
from cli.docker_cli import docker_validate_archgw_schema, stream_gateway_logs
|
||||
from cli.docker_cli import (
|
||||
docker_validate_archgw_schema,
|
||||
stream_gateway_logs,
|
||||
docker_container_status,
|
||||
)
|
||||
from cli.utils import (
|
||||
getLogger,
|
||||
get_llm_provider_access_keys,
|
||||
has_ingress_listener,
|
||||
load_env_file_to_dict,
|
||||
stream_access_logs,
|
||||
find_config_file,
|
||||
)
|
||||
from cli.core import (
|
||||
start_arch_modelserver,
|
||||
|
|
@ -18,9 +25,11 @@ from cli.core import (
|
|||
start_arch,
|
||||
stop_docker_container,
|
||||
download_models_from_hf,
|
||||
start_cli_agent,
|
||||
)
|
||||
from cli.consts import (
|
||||
ARCHGW_DOCKER_IMAGE,
|
||||
ARCHGW_DOCKER_NAME,
|
||||
KATANEMO_DOCKERHUB_REPO,
|
||||
SERVICE_NAME_ARCHGW,
|
||||
SERVICE_NAME_MODEL_SERVER,
|
||||
|
|
@ -170,12 +179,8 @@ def up(file, path, service, foreground):
|
|||
start_arch_modelserver(foreground)
|
||||
return
|
||||
|
||||
if file:
|
||||
# If a file is provided, process that file
|
||||
arch_config_file = os.path.abspath(file)
|
||||
else:
|
||||
# If no file is provided, use the path and look for arch_config.yaml
|
||||
arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml"))
|
||||
# Use the utility function to find config file
|
||||
arch_config_file = find_config_file(path, file)
|
||||
|
||||
# Check if the file exists
|
||||
if not os.path.exists(arch_config_file):
|
||||
|
|
@ -183,7 +188,6 @@ def up(file, path, service, foreground):
|
|||
return
|
||||
|
||||
log.info(f"Validating {arch_config_file}")
|
||||
|
||||
(
|
||||
validation_return_code,
|
||||
validation_stdout,
|
||||
|
|
@ -240,8 +244,15 @@ def up(file, path, service, foreground):
|
|||
if service == SERVICE_NAME_ARCHGW:
|
||||
start_arch(arch_config_file, env, foreground=foreground)
|
||||
else:
|
||||
download_models_from_hf()
|
||||
start_arch_modelserver(foreground)
|
||||
# Check if ingress_traffic listener is configured before starting model_server
|
||||
if has_ingress_listener(arch_config_file):
|
||||
download_models_from_hf()
|
||||
start_arch_modelserver(foreground)
|
||||
else:
|
||||
log.info(
|
||||
"Skipping model_server startup: no ingress_traffic listener configured in arch_config.yaml"
|
||||
)
|
||||
|
||||
start_arch(arch_config_file, env, foreground=foreground)
|
||||
|
||||
|
||||
|
|
@ -321,10 +332,51 @@ def logs(debug, follow):
|
|||
archgw_process.terminate()
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("type", type=click.Choice(["claude"]), required=True)
|
||||
@click.argument("file", required=False) # Optional file argument
|
||||
@click.option(
|
||||
"--path", default=".", help="Path to the directory containing arch_config.yaml"
|
||||
)
|
||||
@click.option(
|
||||
"--settings",
|
||||
default="{}",
|
||||
help="Additional settings as JSON string for the CLI agent.",
|
||||
)
|
||||
def cli_agent(type, file, path, settings):
|
||||
"""Start a CLI agent connected to Arch.
|
||||
|
||||
CLI_AGENT: The type of CLI agent to start (currently only 'claude' is supported)
|
||||
"""
|
||||
|
||||
# Check if archgw docker container is running
|
||||
archgw_status = docker_container_status(ARCHGW_DOCKER_NAME)
|
||||
if archgw_status != "running":
|
||||
log.error(f"archgw docker container is not running (status: {archgw_status})")
|
||||
log.error("Please start archgw using the 'archgw up' command.")
|
||||
sys.exit(1)
|
||||
|
||||
# Determine arch_config.yaml path
|
||||
arch_config_file = find_config_file(path, file)
|
||||
if not os.path.exists(arch_config_file):
|
||||
log.error(f"Config file not found: {arch_config_file}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
start_cli_agent(arch_config_file, settings)
|
||||
except SystemExit:
|
||||
# Re-raise SystemExit to preserve exit codes
|
||||
raise
|
||||
except Exception as e:
|
||||
click.echo(f"Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
main.add_command(up)
|
||||
main.add_command(down)
|
||||
main.add_command(build)
|
||||
main.add_command(logs)
|
||||
main.add_command(cli_agent)
|
||||
main.add_command(generate_prompt_targets)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -21,6 +21,22 @@ def getLogger(name="cli"):
|
|||
log = getLogger(__name__)
|
||||
|
||||
|
||||
def has_ingress_listener(arch_config_file):
|
||||
"""Check if the arch config file has ingress_traffic listener configured."""
|
||||
try:
|
||||
with open(arch_config_file) as f:
|
||||
arch_config_dict = yaml.safe_load(f)
|
||||
|
||||
ingress_traffic = arch_config_dict.get("listeners", {}).get(
|
||||
"ingress_traffic", {}
|
||||
)
|
||||
|
||||
return bool(ingress_traffic)
|
||||
except Exception as e:
|
||||
log.error(f"Error reading config file {arch_config_file}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def get_llm_provider_access_keys(arch_config_file):
|
||||
with open(arch_config_file, "r") as file:
|
||||
arch_config = file.read()
|
||||
|
|
@ -72,6 +88,19 @@ def load_env_file_to_dict(file_path):
|
|||
return env_dict
|
||||
|
||||
|
||||
def find_config_file(path=".", file=None):
|
||||
"""Find the appropriate config file path."""
|
||||
if file:
|
||||
# If a file is provided, process that file
|
||||
return os.path.abspath(file)
|
||||
else:
|
||||
# If no file is provided, use the path and look for arch_config.yaml first, then config.yaml for convenience
|
||||
arch_config_file = os.path.abspath(os.path.join(path, "config.yaml"))
|
||||
if not os.path.exists(arch_config_file):
|
||||
arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml"))
|
||||
return arch_config_file
|
||||
|
||||
|
||||
def stream_access_logs(follow):
|
||||
"""
|
||||
Get the archgw access logs
|
||||
|
|
|
|||
|
|
@ -126,8 +126,9 @@ pub async fn chat(
|
|||
});
|
||||
|
||||
const MAX_MESSAGE_LENGTH: usize = 50;
|
||||
let latest_message_for_log = if latest_message_for_log.len() > MAX_MESSAGE_LENGTH {
|
||||
format!("{}...", &latest_message_for_log[..MAX_MESSAGE_LENGTH])
|
||||
let latest_message_for_log = if latest_message_for_log.chars().count() > MAX_MESSAGE_LENGTH {
|
||||
let truncated: String = latest_message_for_log.chars().take(MAX_MESSAGE_LENGTH).collect();
|
||||
format!("{}...", truncated)
|
||||
} else {
|
||||
latest_message_for_log
|
||||
};
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -88,6 +88,7 @@ pub struct ChatCompletionsRequest {
|
|||
pub prediction: Option<StaticContent>,
|
||||
// pub reasoning_effect: Option<bool>, // GOOD FIRST ISSUE: Future support for reasoning effects
|
||||
pub response_format: Option<Value>,
|
||||
pub reasoning_effort: Option<String>, // e.g., "none", "low", "medium", "high"
|
||||
// pub safety_identifier: Option<String>, // GOOD FIRST ISSUE: Future support for safety identifiers
|
||||
pub seed: Option<i32>,
|
||||
pub service_tier: Option<String>,
|
||||
|
|
@ -116,6 +117,13 @@ impl ChatCompletionsRequest {
|
|||
self.max_tokens = None;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fix_temperature_if_gpt5(&mut self) {
|
||||
let model = self.model.as_str();
|
||||
if model.starts_with("gpt-5") {
|
||||
self.temperature = Some(1.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
|
|
@ -598,6 +606,7 @@ impl TryFrom<&[u8]> for ChatCompletionsRequest {
|
|||
let mut req: ChatCompletionsRequest = serde_json::from_slice(bytes).map_err(OpenAIStreamError::from)?;
|
||||
// Use the centralized suppression logic
|
||||
req.suppress_max_tokens_if_o3();
|
||||
req.fix_temperature_if_gpt5();
|
||||
Ok(req)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -111,6 +111,7 @@ impl TryFrom<AnthropicMessagesRequest> for ChatCompletionsRequest {
|
|||
..Default::default()
|
||||
};
|
||||
_chat_completions_req.suppress_max_tokens_if_o3();
|
||||
_chat_completions_req.fix_temperature_if_gpt5();
|
||||
Ok(_chat_completions_req)
|
||||
}
|
||||
}
|
||||
|
|
@ -352,6 +353,7 @@ impl TryFrom<ChatCompletionsStreamResponse> for MessagesStreamEvent {
|
|||
let choice = &resp.choices[0];
|
||||
|
||||
// Handle final chunk with usage
|
||||
let has_usage = resp.usage.is_some();
|
||||
if let Some(usage) = resp.usage {
|
||||
if let Some(finish_reason) = &choice.finish_reason {
|
||||
let anthropic_stop_reason: MessagesStopReason = finish_reason.clone().into();
|
||||
|
|
@ -403,11 +405,27 @@ impl TryFrom<ChatCompletionsStreamResponse> for MessagesStreamEvent {
|
|||
return convert_tool_call_deltas(tool_calls.clone());
|
||||
}
|
||||
|
||||
// Handle finish reason
|
||||
// Handle finish reason - generate MessageDelta only (MessageStop comes later)
|
||||
if let Some(finish_reason) = &choice.finish_reason {
|
||||
if *finish_reason == FinishReason::Stop {
|
||||
return Ok(MessagesStreamEvent::MessageStop);
|
||||
// If we have usage data, it was already handled above
|
||||
// If not, we need to generate MessageDelta with default usage
|
||||
if !has_usage {
|
||||
let anthropic_stop_reason: MessagesStopReason = finish_reason.clone().into();
|
||||
return Ok(MessagesStreamEvent::MessageDelta {
|
||||
delta: MessagesMessageDelta {
|
||||
stop_reason: anthropic_stop_reason,
|
||||
stop_sequence: None,
|
||||
},
|
||||
usage: MessagesUsage {
|
||||
input_tokens: 0,
|
||||
output_tokens: 0,
|
||||
cache_creation_input_tokens: None,
|
||||
cache_read_input_tokens: None,
|
||||
},
|
||||
});
|
||||
}
|
||||
// If usage was already handled above, we don't need to do anything more here
|
||||
// MessageStop will be handled when [DONE] is encountered
|
||||
}
|
||||
|
||||
// Default to ping for unhandled cases
|
||||
|
|
@ -468,18 +486,6 @@ impl TryFrom<MessagesMessage> for Vec<Message> {
|
|||
}
|
||||
MessagesMessageContent::Blocks(blocks) => {
|
||||
let (content_parts, tool_calls, tool_results) = blocks.split_for_openai()?;
|
||||
|
||||
// Create main message
|
||||
let content = build_openai_content(content_parts, &tool_calls);
|
||||
let main_message = Message {
|
||||
role: message.role.into(),
|
||||
content,
|
||||
name: None,
|
||||
tool_calls: if tool_calls.is_empty() { None } else { Some(tool_calls) },
|
||||
tool_call_id: None,
|
||||
};
|
||||
result.push(main_message);
|
||||
|
||||
// Add tool result messages
|
||||
for (tool_use_id, result_text, _is_error) in tool_results {
|
||||
result.push(Message {
|
||||
|
|
@ -490,6 +496,20 @@ impl TryFrom<MessagesMessage> for Vec<Message> {
|
|||
tool_call_id: Some(tool_use_id),
|
||||
});
|
||||
}
|
||||
|
||||
// Only create main message if there's actual content or tool calls
|
||||
// Skip creating empty content messages (e.g., when message only contains tool_result blocks)
|
||||
if !content_parts.is_empty() || !tool_calls.is_empty() {
|
||||
let content = build_openai_content(content_parts, &tool_calls);
|
||||
let main_message = Message {
|
||||
role: message.role.into(),
|
||||
content,
|
||||
name: None,
|
||||
tool_calls: if tool_calls.is_empty() { None } else { Some(tool_calls) },
|
||||
tool_call_id: None,
|
||||
};
|
||||
result.push(main_message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -515,9 +535,11 @@ impl TryFrom<Message> for MessagesMessage {
|
|||
MessagesContentBlock::ToolResult {
|
||||
tool_use_id: tool_call_id,
|
||||
is_error: None,
|
||||
content: vec![MessagesContentBlock::Text {
|
||||
content: ToolResultContent::Blocks(vec![MessagesContentBlock::Text {
|
||||
text: message.content.extract_text(),
|
||||
}],
|
||||
cache_control: None,
|
||||
}]),
|
||||
cache_control: None,
|
||||
},
|
||||
]),
|
||||
});
|
||||
|
|
@ -551,7 +573,7 @@ impl ContentUtils<ToolCall> for Vec<MessagesContentBlock> {
|
|||
|
||||
for block in self {
|
||||
match block {
|
||||
MessagesContentBlock::ToolUse { id, name, input } |
|
||||
MessagesContentBlock::ToolUse { id, name, input, .. } |
|
||||
MessagesContentBlock::ServerToolUse { id, name, input } |
|
||||
MessagesContentBlock::McpToolUse { id, name, input } => {
|
||||
let arguments = serde_json::to_string(&input)?;
|
||||
|
|
@ -575,7 +597,7 @@ impl ContentUtils<ToolCall> for Vec<MessagesContentBlock> {
|
|||
|
||||
for block in self {
|
||||
match block {
|
||||
MessagesContentBlock::Text { text } => {
|
||||
MessagesContentBlock::Text { text, .. } => {
|
||||
content_parts.push(ContentPart::Text { text: text.clone() });
|
||||
}
|
||||
MessagesContentBlock::Image { source } => {
|
||||
|
|
@ -587,7 +609,7 @@ impl ContentUtils<ToolCall> for Vec<MessagesContentBlock> {
|
|||
},
|
||||
});
|
||||
}
|
||||
MessagesContentBlock::ToolUse { id, name, input } |
|
||||
MessagesContentBlock::ToolUse { id, name, input, .. } |
|
||||
MessagesContentBlock::ServerToolUse { id, name, input } |
|
||||
MessagesContentBlock::McpToolUse { id, name, input } => {
|
||||
let arguments = serde_json::to_string(&input)?;
|
||||
|
|
@ -597,7 +619,10 @@ impl ContentUtils<ToolCall> for Vec<MessagesContentBlock> {
|
|||
function: FunctionCall { name: name.clone(), arguments },
|
||||
});
|
||||
}
|
||||
MessagesContentBlock::ToolResult { tool_use_id, content, is_error } |
|
||||
MessagesContentBlock::ToolResult { tool_use_id, content, is_error, .. } => {
|
||||
let result_text = content.extract_text();
|
||||
tool_results.push((tool_use_id.clone(), result_text, is_error.unwrap_or(false)));
|
||||
}
|
||||
MessagesContentBlock::WebSearchToolResult { tool_use_id, content, is_error } |
|
||||
MessagesContentBlock::CodeExecutionToolResult { tool_use_id, content, is_error } |
|
||||
MessagesContentBlock::McpToolResult { tool_use_id, content, is_error } => {
|
||||
|
|
@ -819,7 +844,7 @@ fn build_openai_content(content_parts: Vec<ContentPart>, tool_calls: &[ToolCall]
|
|||
fn build_anthropic_content(content_blocks: Vec<MessagesContentBlock>) -> MessagesMessageContent {
|
||||
if content_blocks.len() == 1 {
|
||||
match &content_blocks[0] {
|
||||
MessagesContentBlock::Text { text } => MessagesMessageContent::Single(text.clone()),
|
||||
MessagesContentBlock::Text { text, .. } => MessagesMessageContent::Single(text.clone()),
|
||||
_ => MessagesMessageContent::Blocks(content_blocks),
|
||||
}
|
||||
} else if content_blocks.is_empty() {
|
||||
|
|
@ -835,12 +860,11 @@ fn convert_anthropic_content_to_openai(content: &[MessagesContentBlock]) -> Resu
|
|||
|
||||
for block in content {
|
||||
match block {
|
||||
MessagesContentBlock::Text { text } => {
|
||||
MessagesContentBlock::Text { text, .. } => {
|
||||
text_parts.push(text.clone());
|
||||
}
|
||||
MessagesContentBlock::Thinking { text } => {
|
||||
// Include thinking as regular text for OpenAI
|
||||
text_parts.push(format!("[Thinking: {}]", text));
|
||||
MessagesContentBlock::Thinking { thinking, .. } => {
|
||||
text_parts.push(format!("thinking: {}", thinking));
|
||||
}
|
||||
_ => {
|
||||
// Skip other content types for basic text conversion
|
||||
|
|
@ -860,14 +884,14 @@ fn convert_openai_message_to_anthropic_content(message: &Message) -> Result<Vec<
|
|||
match &message.content {
|
||||
MessageContent::Text(text) => {
|
||||
if !text.is_empty() {
|
||||
blocks.push(MessagesContentBlock::Text { text: text.clone() });
|
||||
blocks.push(MessagesContentBlock::Text { text: text.clone(), cache_control: None });
|
||||
}
|
||||
}
|
||||
MessageContent::Parts(parts) => {
|
||||
for part in parts {
|
||||
match part {
|
||||
ContentPart::Text { text } => {
|
||||
blocks.push(MessagesContentBlock::Text { text: text.clone() });
|
||||
blocks.push(MessagesContentBlock::Text { text: text.clone(), cache_control: None });
|
||||
}
|
||||
ContentPart::ImageUrl { image_url } => {
|
||||
let source = convert_image_url_to_source(image_url);
|
||||
|
|
@ -886,6 +910,7 @@ fn convert_openai_message_to_anthropic_content(message: &Message) -> Result<Vec<
|
|||
id: tool_call.id.clone(),
|
||||
name: tool_call.function.name.clone(),
|
||||
input,
|
||||
cache_control: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -984,6 +1009,21 @@ fn convert_content_delta(delta: MessagesContentDelta) -> Result<ChatCompletionsS
|
|||
None,
|
||||
))
|
||||
}
|
||||
MessagesContentDelta::ThinkingDelta { thinking } => {
|
||||
Ok(create_openai_chunk(
|
||||
"stream",
|
||||
"unknown",
|
||||
MessageDelta {
|
||||
role: None,
|
||||
content: Some(format!("thinking: {}", thinking)),
|
||||
refusal: None,
|
||||
function_call: None,
|
||||
tool_calls: None,
|
||||
},
|
||||
None,
|
||||
None,
|
||||
))
|
||||
}
|
||||
MessagesContentDelta::InputJsonDelta { partial_json } => {
|
||||
Ok(create_openai_chunk(
|
||||
"stream",
|
||||
|
|
@ -1023,6 +1063,7 @@ fn convert_tool_call_deltas(tool_calls: Vec<ToolCallDelta>) -> Result<MessagesSt
|
|||
id: id.clone(),
|
||||
name: name.clone(),
|
||||
input: Value::Object(serde_json::Map::new()),
|
||||
cache_control: None,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
@ -1254,6 +1295,7 @@ mod tests {
|
|||
id: "call_123".to_string(),
|
||||
name: "get_weather".to_string(),
|
||||
input: json!({}),
|
||||
cache_control: None,
|
||||
},
|
||||
};
|
||||
|
||||
|
|
@ -1566,6 +1608,7 @@ mod tests {
|
|||
id: "call_weather".to_string(),
|
||||
name: "get_weather".to_string(),
|
||||
input: json!({}),
|
||||
cache_control: None,
|
||||
},
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -269,6 +269,13 @@ impl TryFrom<(&[u8], &SupportedAPIs, &SupportedAPIs)> for ProviderStreamResponse
|
|||
Ok(ProviderStreamResponseType::ChatCompletionsStreamResponse(chat_resp))
|
||||
}
|
||||
(SupportedAPIs::OpenAIChatCompletions(_), SupportedAPIs::AnthropicMessagesAPI(_)) => {
|
||||
// Special case: Handle [DONE] marker for OpenAI -> Anthropic conversion
|
||||
if bytes == b"[DONE]" {
|
||||
return Ok(ProviderStreamResponseType::MessagesStreamEvent(
|
||||
crate::apis::anthropic::MessagesStreamEvent::MessageStop
|
||||
));
|
||||
}
|
||||
|
||||
let openai_resp: crate::apis::openai::ChatCompletionsStreamResponse = serde_json::from_slice(bytes)?;
|
||||
|
||||
// Transform to Anthropic Messages stream format using the transformer
|
||||
|
|
@ -287,8 +294,8 @@ impl TryFrom<(SseEvent, &SupportedAPIs, &SupportedAPIs)> for SseEvent {
|
|||
// Create a new transformed event based on the original
|
||||
let mut transformed_event = sse_event;
|
||||
|
||||
// If not [DONE] and has data, parse the data as a provider stream response (business logic layer)
|
||||
if !transformed_event.is_done() && transformed_event.data.is_some() {
|
||||
// If has data, parse the data as a provider stream response (business logic layer)
|
||||
if transformed_event.data.is_some() {
|
||||
let data_str = transformed_event.data.as_ref().unwrap();
|
||||
let data_bytes = data_str.as_bytes();
|
||||
let transformed_response = ProviderStreamResponseType::try_from((data_bytes, client_api, upstream_api))?;
|
||||
|
|
@ -380,6 +387,7 @@ where
|
|||
I::Item: AsRef<str>,
|
||||
{
|
||||
pub lines: I,
|
||||
pub done_seen: bool,
|
||||
}
|
||||
|
||||
impl<I> SseStreamIter<I>
|
||||
|
|
@ -388,7 +396,7 @@ where
|
|||
I::Item: AsRef<str>,
|
||||
{
|
||||
pub fn new(lines: I) -> Self {
|
||||
Self { lines }
|
||||
Self { lines, done_seen: false }
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -411,14 +419,20 @@ where
|
|||
type Item = SseEvent;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// If we already returned [DONE], terminate the stream
|
||||
if self.done_seen {
|
||||
return None;
|
||||
}
|
||||
|
||||
for line in &mut self.lines {
|
||||
let line_str = line.as_ref();
|
||||
|
||||
// Try to parse as either data: or event: line
|
||||
if let Ok(event) = line_str.parse::<SseEvent>() {
|
||||
// For data: lines, check if this is the [DONE] marker - if so, end the stream
|
||||
// For data: lines, check if this is the [DONE] marker
|
||||
if event.data.is_some() && event.is_done() {
|
||||
return None;
|
||||
self.done_seen = true;
|
||||
return Some(event); // Return [DONE] event for transformation
|
||||
}
|
||||
// For data: lines, skip events that should be filtered at the transport layer
|
||||
if event.data.is_some() && event.should_skip() {
|
||||
|
|
@ -706,7 +720,11 @@ mod tests {
|
|||
assert!(event2.data.as_ref().unwrap().contains("msg2"));
|
||||
assert!(!event2.should_skip());
|
||||
|
||||
// Iterator should end at [DONE] (no more events)
|
||||
// Third event should be [DONE]
|
||||
let done_event = iter.next().unwrap();
|
||||
assert!(done_event.is_done());
|
||||
|
||||
// Iterator should end after [DONE]
|
||||
assert!(iter.next().is_none());
|
||||
}
|
||||
|
||||
|
|
@ -745,7 +763,11 @@ mod tests {
|
|||
assert!(!event4.is_event_only());
|
||||
assert!(event4.data.as_ref().unwrap().contains("Hello"));
|
||||
|
||||
// Iterator should end at [DONE]
|
||||
// Fifth event should be [DONE]
|
||||
let done_event = iter.next().unwrap();
|
||||
assert!(done_event.is_done());
|
||||
|
||||
// Iterator should end after [DONE]
|
||||
assert!(iter.next().is_none());
|
||||
}
|
||||
|
||||
|
|
@ -776,4 +798,25 @@ mod tests {
|
|||
let provider_type = ProviderStreamResponseType::ChatCompletionsStreamResponse(openai_event);
|
||||
assert_eq!(provider_type.event_type(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_done_marker_handled_in_stream_response_transformation() {
|
||||
use crate::apis::anthropic::AnthropicApi;
|
||||
|
||||
// Test that [DONE] marker is properly converted to MessageStop in the transformation layer
|
||||
let done_bytes = b"[DONE]";
|
||||
let client_api = SupportedAPIs::AnthropicMessagesAPI(AnthropicApi::Messages);
|
||||
let upstream_api = SupportedAPIs::OpenAIChatCompletions(crate::apis::openai::OpenAIApi::ChatCompletions);
|
||||
|
||||
let result = ProviderStreamResponseType::try_from((done_bytes.as_slice(), &client_api, &upstream_api));
|
||||
assert!(result.is_ok());
|
||||
|
||||
if let Ok(ProviderStreamResponseType::MessagesStreamEvent(event)) = result {
|
||||
// Verify it's a MessageStop event
|
||||
assert_eq!(event.event_type(), Some("message_stop"));
|
||||
assert!(matches!(event, crate::apis::anthropic::MessagesStreamEvent::MessageStop));
|
||||
} else {
|
||||
panic!("Expected MessagesStreamEvent::MessageStop");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -395,23 +395,15 @@ impl StreamContext {
|
|||
}
|
||||
}
|
||||
|
||||
fn debug_log_body(&self, body: &[u8]) {
|
||||
debug!(
|
||||
"[ARCHGW_REQ_ID:{}] UPSTREAM_RAW_RESPONSE: body_size={} content={}",
|
||||
self.request_identifier(),
|
||||
body.len(),
|
||||
String::from_utf8_lossy(body)
|
||||
);
|
||||
}
|
||||
|
||||
fn handle_streaming_response(
|
||||
&mut self,
|
||||
body: &[u8],
|
||||
provider_id: ProviderId,
|
||||
) -> Result<Vec<u8>, Action> {
|
||||
debug!(
|
||||
"[ARCHGW_REQ_ID:{}] STREAMING_PROCESS: provider_id={:?} chunk_size={}",
|
||||
"[ARCHGW_REQ_ID:{}] STREAMING_PROCESS: client={:?} provider_id={:?} chunk_size={}",
|
||||
self.request_identifier(),
|
||||
self.client_api,
|
||||
provider_id,
|
||||
body.len()
|
||||
);
|
||||
|
|
@ -958,7 +950,12 @@ impl HttpContext for StreamContext {
|
|||
Err(action) => return action,
|
||||
};
|
||||
|
||||
self.debug_log_body(&body);
|
||||
debug!(
|
||||
"[ARCHGW_REQ_ID:{}] UPSTREAM_RAW_RESPONSE: body_size={} content={}",
|
||||
self.request_identifier(),
|
||||
body.len(),
|
||||
String::from_utf8_lossy(&body)
|
||||
);
|
||||
|
||||
let provider_id = self.get_provider_id();
|
||||
if self.streaming_response {
|
||||
|
|
|
|||
133
demos/use_cases/claude_code/README.md
Normal file
133
demos/use_cases/claude_code/README.md
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
# Claude Code Routing with (Preference-aligned) Intelligence
|
||||
|
||||
## Why This Matters
|
||||
|
||||
**Claude Code is powerful, but what if you could access the best of ALL AI models through one familiar interface?**
|
||||
|
||||
Instead of being locked into a set of LLMs from one provier, imagine:
|
||||
- Using **DeepSeek's coding expertise** for complex algorithms
|
||||
- Leveraging **GPT-5's reasoning** for architecture decisions
|
||||
- Tapping **Claude's analysis** for code reviews
|
||||
- Accessing **Grok's speed** for quick iterations
|
||||
|
||||
**All through the same Claude Code interface you already love.**
|
||||
|
||||
## The Solution: Intelligent Multi-LLM Routing
|
||||
|
||||
Arch Gateway transforms Claude Code into a **universal AI development interface** that:
|
||||
|
||||
### 🌐 **Connects to Any LLM Provider**
|
||||
- **OpenAI**: GPT-4.1, GPT-5, etc.
|
||||
- **Anthropic**: Claude 3.5 Sonnet, Claude 3 Haiku, Claude 4.5
|
||||
- **DeepSeek**: DeepSeek-V3, DeepSeek-Coder-V2
|
||||
- **Grok**: Grok-2, Grok-2-mini
|
||||
- **Others**: Gemini, Llama, Mistral, local models via Ollama
|
||||
|
||||
### 🧠 **Routes Intelligently Based on Task**
|
||||
Our research-backed routing system automatically selects the optimal model by analyzing:
|
||||
- **Task complexity** (simple refactoring vs. architectural design)
|
||||
- **Content type** (code generation vs. debugging vs. documentation)
|
||||
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Prerequisites
|
||||
- Claude Code installed: `npm install -g @anthropic-ai/claude-code`
|
||||
- Docker running on your system
|
||||
- Create a python virtual environment in your current working directory
|
||||
|
||||
### 1. Get the Configuration File
|
||||
Download the demo configuration file using one of these methods:
|
||||
|
||||
**Option A: Direct download**
|
||||
```bash
|
||||
curl -O https://raw.githubusercontent.com/katanemo/arch/main/demos/use_cases/claude_code/config.yaml
|
||||
```
|
||||
|
||||
**Option B: Clone the repository**
|
||||
```bash
|
||||
git clone https://github.com/katanemo/arch.git
|
||||
cd arch/demos/use_cases/claude_code
|
||||
|
||||
```
|
||||
|
||||
### 2. Set Up Your API Keys
|
||||
Set up your environment variables with your actual API keys:
|
||||
```bash
|
||||
export OPENAI_API_KEY="your-openai-api-key"
|
||||
export ANTHROPIC_API_KEY="your-anthropic-api-key"
|
||||
export AZURE_API_KEY="your-azure-api-key" # Optional
|
||||
```
|
||||
|
||||
Alternatively, create a `.env` file in your working directory:
|
||||
```bash
|
||||
echo "OPENAI_API_KEY=your-openai-api-key" > .env
|
||||
echo "ANTHROPIC_API_KEY=your-anthropic-api-key" >> .env
|
||||
```
|
||||
|
||||
### 3. Install and Start Arch Gateway
|
||||
```bash
|
||||
pip install archgw
|
||||
archgw up
|
||||
```
|
||||
|
||||
### 4. Launch Claude Code with Multi-LLM Support
|
||||
```bash
|
||||
archgw cli-agent claude
|
||||
```
|
||||
|
||||
That's it! Claude Code now has access to multiple LLM providers with intelligent routing.
|
||||
|
||||
## What You'll Experience
|
||||
|
||||
### Screenshot Placeholder
|
||||

|
||||
*Claude Code interface enhanced with intelligent model routing and multi-provider access*
|
||||
|
||||
### Real-Time Model Selection
|
||||
When you interact with Claude Code, you'll get:
|
||||
- **Automatic model selection** based on your query type
|
||||
- **Transparent routing decisions** showing which model was chosen and why
|
||||
- **Seamless failover** if a model becomes unavailable
|
||||
|
||||
## Configuration
|
||||
|
||||
The setup uses the included `config.yaml` file which defines:
|
||||
|
||||
### Multi-Provider Access
|
||||
```yaml
|
||||
llm_providers:
|
||||
- model: openai/gpt-4.1-2025-04-14
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: generating new code snippets and functions
|
||||
- model: anthropic/claude-3-5-sonnet-20241022
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
routing_preferences:
|
||||
name: code understanding
|
||||
description: explaining and analyzing existing code
|
||||
```
|
||||
## Advanced Usage
|
||||
|
||||
### Custom Model Selection
|
||||
```bash
|
||||
# Force a specific model for this session
|
||||
archgw cli-agent claude --settings='{"ANTHROPIC_SMALL_FAST_MODEL": "deepseek-coder-v2"}'
|
||||
|
||||
# Enable detailed routing information
|
||||
archgw cli-agent claude --settings='{"statusLine": {"type": "command", "command": "ccr statusline"}}'
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
The system automatically configures:
|
||||
```bash
|
||||
ANTHROPIC_BASE_URL=http://127.0.0.1:12000 # Routes through Arch Gateway
|
||||
ANTHROPIC_SMALL_FAST_MODEL=arch.claude.code.small.fast # Uses intelligent alias
|
||||
```
|
||||
|
||||
## Real Developer Workflows
|
||||
|
||||
This intelligent routing is powered by our research in preference-aligned LLMM routing:
|
||||
- **Research Paper**: [Preference-Aligned LLM Router](https://arxiv.org/abs/2506.16655)
|
||||
- **Technical Docs**: [docs.archgw.com](https://docs.archgw.com)
|
||||
41
demos/use_cases/claude_code/config.yaml
Normal file
41
demos/use_cases/claude_code/config.yaml
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
version: v0.1
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
# OpenAI Models
|
||||
- model: openai/gpt-5-2025-08-07
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code generation
|
||||
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||
|
||||
- model: openai/gpt-4.1-2025-04-14
|
||||
access_key: $OPENAI_API_KEY
|
||||
routing_preferences:
|
||||
- name: code understanding
|
||||
description: understand and explain existing code snippets, functions, or libraries
|
||||
|
||||
# Anthropic Models
|
||||
- model: anthropic/claude-sonnet-4-5
|
||||
default: true
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
- model: anthropic/claude-3-haiku-20240307
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
# Ollama Models
|
||||
- model: ollama/llama3.1
|
||||
base_url: http://host.docker.internal:11434
|
||||
|
||||
|
||||
# Model aliases - friendly names that map to actual provider names
|
||||
model_aliases:
|
||||
# Alias for a small faster Claude model
|
||||
arch.claude.code.small.fast:
|
||||
target: claude-3-haiku-20240307
|
||||
|
|
@ -24,7 +24,7 @@ llm_providers:
|
|||
access_key: $OPENAI_API_KEY
|
||||
|
||||
# Anthropic Models
|
||||
- model: anthropic/claude-3-5-sonnet-20241022
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
- model: anthropic/claude-3-haiku-20240307
|
||||
|
|
@ -56,7 +56,7 @@ model_aliases:
|
|||
|
||||
# Alias for creative tasks -> Claude model
|
||||
arch.creative.v1:
|
||||
target: claude-3-5-sonnet-20241022
|
||||
target: claude-sonnet-4-20250514
|
||||
|
||||
# Alias for quick responses -> fast model
|
||||
arch.fast.v1:
|
||||
|
|
@ -67,7 +67,7 @@ model_aliases:
|
|||
target: gpt-5-mini-2025-08-07
|
||||
|
||||
chat-model:
|
||||
target: llama3.1
|
||||
target: gpt-5-mini-2025-08-07
|
||||
|
||||
creative-model:
|
||||
target: claude-3-5-sonnet-20241022
|
||||
target: claude-sonnet-4-20250514
|
||||
|
|
|
|||
|
|
@ -199,8 +199,7 @@ def test_400_error_handling_with_alias():
|
|||
try:
|
||||
completion = client.chat.completions.create(
|
||||
model="arch.summarize.v1", # This should resolve to gpt-5-mini-2025-08-07
|
||||
max_completion_tokens=50,
|
||||
temperature=0.7, # This is a typo - should be "temperature", which should trigger a 400 error
|
||||
max_tokens=50,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
|
|
@ -350,3 +349,57 @@ def test_direct_model_4o_mini_anthropic():
|
|||
response_content = "".join(b.text for b in message.content if b.type == "text")
|
||||
logger.info(f"Response from direct 4o-mini via Anthropic: {response_content}")
|
||||
assert response_content == "Hello from direct 4o-mini via Anthropic!"
|
||||
|
||||
|
||||
def test_anthropic_thinking_mode_streaming():
|
||||
# Anthropic base_url should be the root, not /v1/chat/completions
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
|
||||
client = anthropic.Anthropic(
|
||||
api_key=os.environ.get("ANTHROPIC_API_KEY", "test-key"),
|
||||
base_url=base_url,
|
||||
)
|
||||
|
||||
thinking_block_started = False
|
||||
thinking_delta_seen = False
|
||||
text_delta_seen = False
|
||||
|
||||
with client.messages.stream(
|
||||
model="claude-sonnet-4-20250514",
|
||||
max_tokens=2048,
|
||||
thinking={"type": "enabled", "budget_tokens": 1024}, # <- idiomatic
|
||||
messages=[{"role": "user", "content": "Explain briefly what 2+2 equals"}],
|
||||
) as stream:
|
||||
for event in stream:
|
||||
# 1) detect when a thinking block starts
|
||||
if event.type == "content_block_start" and getattr(
|
||||
event, "content_block", None
|
||||
):
|
||||
if getattr(event.content_block, "type", None) == "thinking":
|
||||
thinking_block_started = True
|
||||
|
||||
# 2) collect text vs thinking deltas
|
||||
if event.type == "content_block_delta" and getattr(event, "delta", None):
|
||||
if event.delta.type == "text_delta":
|
||||
text_delta_seen = True
|
||||
elif event.delta.type == "thinking_delta":
|
||||
# some SDKs expose .thinking, others .text for this delta; not needed here
|
||||
thinking_delta_seen = True
|
||||
|
||||
final = stream.get_final_message()
|
||||
|
||||
# Basic integrity
|
||||
assert final is not None
|
||||
assert final.content and len(final.content) > 0
|
||||
|
||||
# Normal text should have streamed
|
||||
assert text_delta_seen, "Expected normal text deltas in stream"
|
||||
|
||||
# With thinking enabled, we expect a thinking block and at least one thinking delta
|
||||
assert thinking_block_started, "No thinking block started"
|
||||
assert thinking_delta_seen, "No thinking deltas observed"
|
||||
|
||||
# Optional: double-check on the assembled message
|
||||
final_block_types = [blk.type for blk in final.content]
|
||||
assert "text" in final_block_types
|
||||
assert "thinking" in final_block_types
|
||||
|
|
|
|||
|
|
@ -417,12 +417,12 @@ def test_anthropic_client_with_openai_model_streaming():
|
|||
client = anthropic.Anthropic(api_key="test-key", base_url=base_url)
|
||||
|
||||
with client.messages.stream(
|
||||
model="gpt-4o-mini", # OpenAI model via Anthropic client
|
||||
max_tokens=50,
|
||||
model="gpt-5-mini-2025-08-07", # OpenAI model via Anthropic client
|
||||
max_tokens=500,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, please respond with exactly: Hello from GPT-4o-mini via Anthropic!",
|
||||
"content": "Hello, please respond with exactly: Hello from ChatGPT!",
|
||||
}
|
||||
],
|
||||
) as stream:
|
||||
|
|
@ -435,8 +435,8 @@ def test_anthropic_client_with_openai_model_streaming():
|
|||
# A safe way to reassemble text from the content blocks:
|
||||
final_text = "".join(b.text for b in final.content if b.type == "text")
|
||||
|
||||
assert full_text == "Hello from GPT-4o-mini via Anthropic!"
|
||||
assert final_text == "Hello from GPT-4o-mini via Anthropic!"
|
||||
assert full_text == "Hello from ChatGPT!"
|
||||
assert final_text == "Hello from ChatGPT!"
|
||||
|
||||
|
||||
def test_openai_gpt4o_mini_v1_messages_api():
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue