mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
making sure that claude code can run via the archgw cli
This commit is contained in:
parent
1b7f9e43e7
commit
2f611f74a8
9 changed files with 422 additions and 30 deletions
|
|
@ -4,7 +4,7 @@ import time
|
|||
import sys
|
||||
|
||||
import yaml
|
||||
from cli.utils import getLogger
|
||||
from cli.utils import getLogger, read_config_file
|
||||
from cli.consts import (
|
||||
ARCHGW_DOCKER_IMAGE,
|
||||
ARCHGW_DOCKER_NAME,
|
||||
|
|
@ -185,3 +185,106 @@ def stop_arch_modelserver():
|
|||
except subprocess.CalledProcessError as e:
|
||||
log.info(f"Failed to start model_server. Please check archgw_modelserver logs")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def start_cli_agent(arch_config_file=None, settings_json="{}"):
|
||||
"""Start a CLI client connected to Arch."""
|
||||
import json
|
||||
|
||||
# Use current directory for config if not specified
|
||||
if arch_config_file is None:
|
||||
config_path = "."
|
||||
else:
|
||||
config_path = (
|
||||
os.path.dirname(arch_config_file)
|
||||
if os.path.dirname(arch_config_file)
|
||||
else "."
|
||||
)
|
||||
|
||||
# Get port and host from arch_config.yaml listeners > egress
|
||||
arch_config = read_config_file(config_path)
|
||||
if not arch_config:
|
||||
log.error(f"Config file not found in {config_path}")
|
||||
sys.exit(1)
|
||||
|
||||
# Get egress listener configuration
|
||||
egress_config = arch_config.get("listeners", {}).get("egress_traffic", {})
|
||||
host = egress_config.get("host", "127.0.0.1")
|
||||
port = egress_config.get("port", 12000)
|
||||
|
||||
# Parse additional settings from command line
|
||||
try:
|
||||
additional_settings = json.loads(settings_json) if settings_json else {}
|
||||
except json.JSONDecodeError:
|
||||
log.error("Settings must be valid JSON")
|
||||
sys.exit(1)
|
||||
|
||||
# Set up environment variables
|
||||
env = os.environ.copy()
|
||||
env.update(
|
||||
{
|
||||
"ANTHROPIC_AUTH_TOKEN": "test", # Use test token for arch
|
||||
"ANTHROPIC_API_KEY": "",
|
||||
"ANTHROPIC_BASE_URL": f"http://{host}:{port}",
|
||||
"NO_PROXY": host,
|
||||
"DISABLE_TELEMETRY": "true",
|
||||
"DISABLE_COST_WARNINGS": "true",
|
||||
"API_TIMEOUT_MS": "600000",
|
||||
}
|
||||
)
|
||||
|
||||
# Set ANTHROPIC_SMALL_FAST_MODEL from additional_settings or model alias
|
||||
if "ANTHROPIC_SMALL_FAST_MODEL" in additional_settings:
|
||||
env["ANTHROPIC_SMALL_FAST_MODEL"] = additional_settings[
|
||||
"ANTHROPIC_SMALL_FAST_MODEL"
|
||||
]
|
||||
else:
|
||||
# Check if arch.claude.code.small.fast alias exists in model_aliases
|
||||
model_aliases = arch_config.get("model_aliases", {})
|
||||
if "arch.claude.code.small.fast" in model_aliases:
|
||||
env["ANTHROPIC_SMALL_FAST_MODEL"] = "arch.claude.code.small.fast"
|
||||
else:
|
||||
log.info(
|
||||
"Tip: Set an alias 'arch.claude.code.small.fast' in your model_aliases config to set a small fast model Claude Code"
|
||||
)
|
||||
log.info("Or provide ANTHROPIC_SMALL_FAST_MODEL in --settings JSON")
|
||||
|
||||
# Non-interactive mode configuration from additional_settings only
|
||||
if additional_settings.get("NON_INTERACTIVE_MODE", False):
|
||||
env.update(
|
||||
{
|
||||
"CI": "true",
|
||||
"FORCE_COLOR": "0",
|
||||
"NODE_NO_READLINE": "1",
|
||||
"TERM": "dumb",
|
||||
}
|
||||
)
|
||||
|
||||
# Build claude command arguments
|
||||
claude_args = []
|
||||
|
||||
# Add settings if provided, excluding those already handled as environment variables
|
||||
if additional_settings:
|
||||
# Filter out settings that are already processed as environment variables
|
||||
claude_settings = {
|
||||
k: v
|
||||
for k, v in additional_settings.items()
|
||||
if k not in ["ANTHROPIC_SMALL_FAST_MODEL", "NON_INTERACTIVE_MODE"]
|
||||
}
|
||||
if claude_settings:
|
||||
claude_args.append(f"--settings={json.dumps(claude_settings)}")
|
||||
|
||||
# Use claude from PATH
|
||||
claude_path = "claude"
|
||||
log.info(f"Starting Claude CLI Agent to Arch at {host}:{port}")
|
||||
|
||||
try:
|
||||
subprocess.run([claude_path] + claude_args, env=env, check=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
log.error(f"Error starting claude: {e}")
|
||||
sys.exit(1)
|
||||
except FileNotFoundError:
|
||||
log.error(
|
||||
f"{claude_path} not found. Make sure Claude Code is installed: npm install -g @anthropic-ai/claude-code"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
|
|
|||
|
|
@ -4,14 +4,21 @@ import sys
|
|||
import subprocess
|
||||
import multiprocessing
|
||||
import importlib.metadata
|
||||
import json
|
||||
from cli import targets
|
||||
from cli.docker_cli import docker_validate_archgw_schema, stream_gateway_logs
|
||||
from cli.docker_cli import (
|
||||
docker_validate_archgw_schema,
|
||||
stream_gateway_logs,
|
||||
docker_container_status,
|
||||
)
|
||||
from cli.utils import (
|
||||
getLogger,
|
||||
get_llm_provider_access_keys,
|
||||
has_ingress_listener,
|
||||
load_env_file_to_dict,
|
||||
stream_access_logs,
|
||||
read_config_file,
|
||||
find_config_file,
|
||||
)
|
||||
from cli.core import (
|
||||
start_arch_modelserver,
|
||||
|
|
@ -19,9 +26,11 @@ from cli.core import (
|
|||
start_arch,
|
||||
stop_docker_container,
|
||||
download_models_from_hf,
|
||||
start_cli_agent,
|
||||
)
|
||||
from cli.consts import (
|
||||
ARCHGW_DOCKER_IMAGE,
|
||||
ARCHGW_DOCKER_NAME,
|
||||
KATANEMO_DOCKERHUB_REPO,
|
||||
SERVICE_NAME_ARCHGW,
|
||||
SERVICE_NAME_MODEL_SERVER,
|
||||
|
|
@ -171,12 +180,8 @@ def up(file, path, service, foreground):
|
|||
start_arch_modelserver(foreground)
|
||||
return
|
||||
|
||||
if file:
|
||||
# If a file is provided, process that file
|
||||
arch_config_file = os.path.abspath(file)
|
||||
else:
|
||||
# If no file is provided, use the path and look for arch_config.yaml
|
||||
arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml"))
|
||||
# Use the utility function to find config file
|
||||
arch_config_file = find_config_file(path, file)
|
||||
|
||||
# Check if the file exists
|
||||
if not os.path.exists(arch_config_file):
|
||||
|
|
@ -329,10 +334,52 @@ def logs(debug, follow):
|
|||
archgw_process.terminate()
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("cli_type", type=click.Choice(["claude"]), required=True)
|
||||
@click.option(
|
||||
"--path",
|
||||
default=None,
|
||||
help="Path to the directory containing arch_config.yaml (defaults to current directory)",
|
||||
)
|
||||
@click.option(
|
||||
"--settings",
|
||||
default="{}",
|
||||
help="Additional settings as JSON string for the CLI agent.",
|
||||
)
|
||||
def cli_agent(cli_type, path, settings):
|
||||
"""Start a CLI agent connected to Arch.
|
||||
|
||||
CLI_TYPE: The type of CLI agent to start (currently only 'claude' is supported)
|
||||
"""
|
||||
# Determine arch_config.yaml path
|
||||
arch_config_file = None
|
||||
if path:
|
||||
arch_config_file = os.path.join(path, "arch_config.yaml")
|
||||
else:
|
||||
arch_config_file = "arch_config.yaml" # Current directory
|
||||
|
||||
# Check if archgw docker container is running
|
||||
archgw_status = docker_container_status(ARCHGW_DOCKER_NAME)
|
||||
if archgw_status != "running":
|
||||
log.error(f"archgw docker container is not running (status: {archgw_status})")
|
||||
log.error("Please start archgw using the 'archgw up' command.")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
start_cli_agent(arch_config_file, settings)
|
||||
except SystemExit:
|
||||
# Re-raise SystemExit to preserve exit codes
|
||||
raise
|
||||
except Exception as e:
|
||||
click.echo(f"Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
main.add_command(up)
|
||||
main.add_command(down)
|
||||
main.add_command(build)
|
||||
main.add_command(logs)
|
||||
main.add_command(cli_agent)
|
||||
main.add_command(generate_prompt_targets)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -88,6 +88,36 @@ def load_env_file_to_dict(file_path):
|
|||
return env_dict
|
||||
|
||||
|
||||
def read_config_file(path="."):
|
||||
"""Read configuration from arch_config.yaml or config.yaml in the specified path."""
|
||||
config_files = ["arch_config.yaml", "config.yaml"]
|
||||
|
||||
for config_file in config_files:
|
||||
config_path = os.path.abspath(os.path.join(path, config_file))
|
||||
if os.path.exists(config_path):
|
||||
try:
|
||||
with open(config_path, "r") as f:
|
||||
return yaml.safe_load(f)
|
||||
except Exception as e:
|
||||
log.warning(f"Error reading {config_path}: {e}")
|
||||
continue
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def find_config_file(path=".", file=None):
|
||||
"""Find the appropriate config file path."""
|
||||
if file:
|
||||
# If a file is provided, process that file
|
||||
return os.path.abspath(file)
|
||||
else:
|
||||
# If no file is provided, use the path and look for arch_config.yaml first, then config.yaml for convenience
|
||||
arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml"))
|
||||
if not os.path.exists(arch_config_file):
|
||||
arch_config_file = os.path.abspath(os.path.join(path, "config.yaml"))
|
||||
return arch_config_file
|
||||
|
||||
|
||||
def stream_access_logs(follow):
|
||||
"""
|
||||
Get the archgw access logs
|
||||
|
|
|
|||
|
|
@ -70,7 +70,9 @@ pub enum ServiceTier {
|
|||
#[skip_serializing_none]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct ThinkingConfig {
|
||||
pub enabled: bool,
|
||||
#[serde(rename = "type")]
|
||||
pub thinking_type: String,
|
||||
pub budget_tokens: Option<u32>,
|
||||
}
|
||||
|
||||
// MCP Server types
|
||||
|
|
@ -166,7 +168,8 @@ pub enum MessagesContentBlock {
|
|||
cache_control: Option<MessagesCacheControl>,
|
||||
},
|
||||
Thinking {
|
||||
text: String,
|
||||
thinking: String,
|
||||
signature: Option<String>,
|
||||
cache_control: Option<MessagesCacheControl>,
|
||||
},
|
||||
Image {
|
||||
|
|
@ -235,6 +238,7 @@ impl ExtractText for Vec<MessagesContentBlock> {
|
|||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
#[serde(tag = "type")]
|
||||
pub enum MessagesImageSource {
|
||||
Base64 {
|
||||
media_type: String,
|
||||
|
|
@ -247,6 +251,7 @@ pub enum MessagesImageSource {
|
|||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
#[serde(tag = "type")]
|
||||
pub enum MessagesDocumentSource {
|
||||
Base64 {
|
||||
media_type: String,
|
||||
|
|
@ -409,6 +414,8 @@ pub enum MessagesContentDelta {
|
|||
TextDelta { text: String },
|
||||
#[serde(rename = "input_json_delta")]
|
||||
InputJsonDelta { partial_json: String },
|
||||
#[serde(rename = "thinking_delta")]
|
||||
ThinkingDelta { thinking: String },
|
||||
}
|
||||
|
||||
#[skip_serializing_none]
|
||||
|
|
@ -566,10 +573,10 @@ impl ProviderStreamResponse for MessagesStreamEvent {
|
|||
fn content_delta(&self) -> Option<&str> {
|
||||
match self {
|
||||
MessagesStreamEvent::ContentBlockDelta { delta, .. } => {
|
||||
if let MessagesContentDelta::TextDelta { text } = delta {
|
||||
Some(text)
|
||||
} else {
|
||||
None
|
||||
match delta {
|
||||
MessagesContentDelta::TextDelta { text } => Some(text),
|
||||
MessagesContentDelta::ThinkingDelta { thinking } => Some(thinking),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
|
|
@ -672,7 +679,7 @@ mod tests {
|
|||
"system": "You are a helpful assistant",
|
||||
"service_tier": "auto",
|
||||
"thinking": {
|
||||
"enabled": true
|
||||
"type": "enabled"
|
||||
},
|
||||
"metadata": {
|
||||
"user_id": "123"
|
||||
|
|
@ -699,7 +706,7 @@ mod tests {
|
|||
}
|
||||
|
||||
if let Some(thinking) = &deserialized_request.thinking {
|
||||
assert_eq!(thinking.enabled, true);
|
||||
assert_eq!(thinking.thinking_type, "enabled");
|
||||
} else {
|
||||
panic!("Expected thinking config");
|
||||
}
|
||||
|
|
@ -754,10 +761,9 @@ mod tests {
|
|||
{
|
||||
"type": "image",
|
||||
"source": {
|
||||
"base64": {
|
||||
"media_type": "image/jpeg",
|
||||
"data": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
|
||||
}
|
||||
"type": "base64",
|
||||
"media_type": "image/jpeg",
|
||||
"data": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
|
||||
}
|
||||
}
|
||||
]
|
||||
|
|
@ -767,7 +773,7 @@ mod tests {
|
|||
"content": [
|
||||
{
|
||||
"type": "thinking",
|
||||
"text": "Let me analyze the image and then check the weather..."
|
||||
"thinking": "Let me analyze the image and then check the weather..."
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
|
|
@ -854,8 +860,8 @@ mod tests {
|
|||
assert_eq!(content_blocks.len(), 3);
|
||||
|
||||
// Validate thinking content block
|
||||
if let MessagesContentBlock::Thinking { text, .. } = &content_blocks[0] {
|
||||
assert_eq!(text, "Let me analyze the image and then check the weather...");
|
||||
if let MessagesContentBlock::Thinking { thinking, .. } = &content_blocks[0] {
|
||||
assert_eq!(thinking, "Let me analyze the image and then check the weather...");
|
||||
} else {
|
||||
panic!("Expected thinking content block");
|
||||
}
|
||||
|
|
@ -1320,4 +1326,68 @@ mod tests {
|
|||
assert_eq!(all_variants.len(), 1);
|
||||
assert_eq!(all_variants[0], AnthropicApi::Messages);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_anthropic_thinking_streaming() {
|
||||
// Test thinking delta stream event
|
||||
let thinking_delta_json = json!({
|
||||
"type": "content_block_delta",
|
||||
"index": 0,
|
||||
"delta": {
|
||||
"type": "thinking_delta",
|
||||
"thinking": ".\n\nI need to consider:\n1. Current"
|
||||
}
|
||||
});
|
||||
|
||||
let deserialized_event: MessagesStreamEvent = serde_json::from_value(thinking_delta_json.clone()).unwrap();
|
||||
if let MessagesStreamEvent::ContentBlockDelta { index, ref delta } = deserialized_event {
|
||||
assert_eq!(index, 0);
|
||||
if let MessagesContentDelta::ThinkingDelta { thinking } = delta {
|
||||
assert_eq!(thinking, ".\n\nI need to consider:\n1. Current");
|
||||
} else {
|
||||
panic!("Expected thinking delta");
|
||||
}
|
||||
} else {
|
||||
panic!("Expected content block delta event");
|
||||
}
|
||||
|
||||
// Test that thinking delta is returned by content_delta()
|
||||
assert_eq!(deserialized_event.content_delta(), Some(".\n\nI need to consider:\n1. Current"));
|
||||
|
||||
let serialized_event_json = serde_json::to_value(&deserialized_event).unwrap();
|
||||
assert_eq!(thinking_delta_json, serialized_event_json);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_anthropic_thinking_request_config() {
|
||||
// Test thinking config with budget_tokens
|
||||
let request_json = json!({
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Test message"
|
||||
}
|
||||
],
|
||||
"max_tokens": 2048,
|
||||
"thinking": {
|
||||
"type": "enabled",
|
||||
"budget_tokens": 1024
|
||||
}
|
||||
});
|
||||
|
||||
let deserialized_request: MessagesRequest = serde_json::from_value(request_json.clone()).unwrap();
|
||||
assert_eq!(deserialized_request.model, "claude-sonnet-4-20250514");
|
||||
assert_eq!(deserialized_request.max_tokens, 2048);
|
||||
|
||||
if let Some(thinking) = &deserialized_request.thinking {
|
||||
assert_eq!(thinking.thinking_type, "enabled");
|
||||
assert_eq!(thinking.budget_tokens, Some(1024));
|
||||
} else {
|
||||
panic!("Expected thinking config");
|
||||
}
|
||||
|
||||
let serialized_json = serde_json::to_value(&deserialized_request).unwrap();
|
||||
assert_eq!(request_json, serialized_json);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -88,6 +88,7 @@ pub struct ChatCompletionsRequest {
|
|||
pub prediction: Option<StaticContent>,
|
||||
// pub reasoning_effect: Option<bool>, // GOOD FIRST ISSUE: Future support for reasoning effects
|
||||
pub response_format: Option<Value>,
|
||||
pub reasoning_effort: Option<String>, // e.g., "none", "low", "medium", "high"
|
||||
// pub safety_identifier: Option<String>, // GOOD FIRST ISSUE: Future support for safety identifiers
|
||||
pub seed: Option<i32>,
|
||||
pub service_tier: Option<String>,
|
||||
|
|
|
|||
|
|
@ -862,9 +862,8 @@ fn convert_anthropic_content_to_openai(content: &[MessagesContentBlock]) -> Resu
|
|||
MessagesContentBlock::Text { text, .. } => {
|
||||
text_parts.push(text.clone());
|
||||
}
|
||||
MessagesContentBlock::Thinking { text, .. } => {
|
||||
// Include thinking as regular text for OpenAI
|
||||
text_parts.push(format!("[Thinking: {}]", text));
|
||||
MessagesContentBlock::Thinking { thinking, .. } => {
|
||||
text_parts.push(format!("thinking: {}", thinking));
|
||||
}
|
||||
_ => {
|
||||
// Skip other content types for basic text conversion
|
||||
|
|
@ -1009,6 +1008,21 @@ fn convert_content_delta(delta: MessagesContentDelta) -> Result<ChatCompletionsS
|
|||
None,
|
||||
))
|
||||
}
|
||||
MessagesContentDelta::ThinkingDelta { thinking } => {
|
||||
Ok(create_openai_chunk(
|
||||
"stream",
|
||||
"unknown",
|
||||
MessageDelta {
|
||||
role: None,
|
||||
content: Some(format!("[Thinking: {}]", thinking)),
|
||||
refusal: None,
|
||||
function_call: None,
|
||||
tool_calls: None,
|
||||
},
|
||||
None,
|
||||
None,
|
||||
))
|
||||
}
|
||||
MessagesContentDelta::InputJsonDelta { partial_json } => {
|
||||
Ok(create_openai_chunk(
|
||||
"stream",
|
||||
|
|
|
|||
73
demos/use_cases/claude_code/config.yaml
Normal file
73
demos/use_cases/claude_code/config.yaml
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
version: v0.1
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
|
||||
# OpenAI Models
|
||||
- model: openai/gpt-5-mini-2025-08-07
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
- model: openai/o3
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
# Anthropic Models
|
||||
- model: anthropic/claude-3-5-sonnet-20241022
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
- model: anthropic/claude-3-haiku-20240307
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
# Azure OpenAI Models
|
||||
- model: azure_openai/gpt-5-mini
|
||||
access_key: $AZURE_API_KEY
|
||||
base_url: https://katanemo.openai.azure.com
|
||||
|
||||
# Ollama Models
|
||||
- model: ollama/llama3.1
|
||||
base_url: http://host.docker.internal:11434
|
||||
|
||||
|
||||
# Model aliases - friendly names that map to actual provider names
|
||||
model_aliases:
|
||||
# Alias for summarization tasks -> fast/cheap model
|
||||
arch.summarize.v1:
|
||||
target: gpt-5-mini-2025-08-07
|
||||
|
||||
# Alias for general purpose tasks -> latest model
|
||||
arch.v1:
|
||||
target: o3
|
||||
|
||||
# Alias for reasoning tasks -> capable model
|
||||
arch.reasoning.v1:
|
||||
target: gpt-4o
|
||||
|
||||
# Alias for creative tasks -> Claude model
|
||||
arch.creative.v1:
|
||||
target: claude-3-5-sonnet-20241022
|
||||
|
||||
# Alias for quick responses -> fast model
|
||||
arch.fast.v1:
|
||||
target: claude-3-haiku-20240307
|
||||
|
||||
# Semantic aliases
|
||||
summary-model:
|
||||
target: gpt-5-mini-2025-08-07
|
||||
|
||||
chat-model:
|
||||
target: claude-3-5-sonnet-20241022
|
||||
|
||||
creative-model:
|
||||
target: claude-3-5-sonnet-20241022
|
||||
|
|
@ -24,7 +24,7 @@ llm_providers:
|
|||
access_key: $OPENAI_API_KEY
|
||||
|
||||
# Anthropic Models
|
||||
- model: anthropic/claude-3-5-sonnet-20241022
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
- model: anthropic/claude-3-haiku-20240307
|
||||
|
|
@ -56,7 +56,7 @@ model_aliases:
|
|||
|
||||
# Alias for creative tasks -> Claude model
|
||||
arch.creative.v1:
|
||||
target: claude-3-5-sonnet-20241022
|
||||
target: claude-sonnet-4-20250514
|
||||
|
||||
# Alias for quick responses -> fast model
|
||||
arch.fast.v1:
|
||||
|
|
@ -67,7 +67,7 @@ model_aliases:
|
|||
target: gpt-5-mini-2025-08-07
|
||||
|
||||
chat-model:
|
||||
target: llama3.1
|
||||
target:
|
||||
|
||||
creative-model:
|
||||
target: claude-3-5-sonnet-20241022
|
||||
target: claude-sonnet-4-20250514
|
||||
|
|
|
|||
|
|
@ -350,3 +350,57 @@ def test_direct_model_4o_mini_anthropic():
|
|||
response_content = "".join(b.text for b in message.content if b.type == "text")
|
||||
logger.info(f"Response from direct 4o-mini via Anthropic: {response_content}")
|
||||
assert response_content == "Hello from direct 4o-mini via Anthropic!"
|
||||
|
||||
|
||||
def test_anthropic_thinking_mode_streaming():
|
||||
# Anthropic base_url should be the root, not /v1/chat/completions
|
||||
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
|
||||
|
||||
client = anthropic.Anthropic(
|
||||
api_key=os.environ.get("ANTHROPIC_API_KEY", "test-key"),
|
||||
base_url=base_url,
|
||||
)
|
||||
|
||||
thinking_block_started = False
|
||||
thinking_delta_seen = False
|
||||
text_delta_seen = False
|
||||
|
||||
with client.messages.stream(
|
||||
model="claude-sonnet-4-20250514",
|
||||
max_tokens=2048,
|
||||
thinking={"type": "enabled", "budget_tokens": 1024}, # <- idiomatic
|
||||
messages=[{"role": "user", "content": "Explain briefly what 2+2 equals"}],
|
||||
) as stream:
|
||||
for event in stream:
|
||||
# 1) detect when a thinking block starts
|
||||
if event.type == "content_block_start" and getattr(
|
||||
event, "content_block", None
|
||||
):
|
||||
if getattr(event.content_block, "type", None) == "thinking":
|
||||
thinking_block_started = True
|
||||
|
||||
# 2) collect text vs thinking deltas
|
||||
if event.type == "content_block_delta" and getattr(event, "delta", None):
|
||||
if event.delta.type == "text_delta":
|
||||
text_delta_seen = True
|
||||
elif event.delta.type == "thinking_delta":
|
||||
# some SDKs expose .thinking, others .text for this delta; not needed here
|
||||
thinking_delta_seen = True
|
||||
|
||||
final = stream.get_final_message()
|
||||
|
||||
# Basic integrity
|
||||
assert final is not None
|
||||
assert final.content and len(final.content) > 0
|
||||
|
||||
# Normal text should have streamed
|
||||
assert text_delta_seen, "Expected normal text deltas in stream"
|
||||
|
||||
# With thinking enabled, we expect a thinking block and at least one thinking delta
|
||||
assert thinking_block_started, "No thinking block started"
|
||||
assert thinking_delta_seen, "No thinking deltas observed"
|
||||
|
||||
# Optional: double-check on the assembled message
|
||||
final_block_types = [blk.type for blk in final.content]
|
||||
assert "text" in final_block_types
|
||||
assert "thinking" in final_block_types
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue