making sure that claude code can run via the archgw cli

This commit is contained in:
Salman Paracha 2025-09-28 14:15:42 -07:00
parent 1b7f9e43e7
commit 2f611f74a8
9 changed files with 422 additions and 30 deletions

View file

@ -4,7 +4,7 @@ import time
import sys
import yaml
from cli.utils import getLogger
from cli.utils import getLogger, read_config_file
from cli.consts import (
ARCHGW_DOCKER_IMAGE,
ARCHGW_DOCKER_NAME,
@ -185,3 +185,106 @@ def stop_arch_modelserver():
except subprocess.CalledProcessError as e:
log.info(f"Failed to start model_server. Please check archgw_modelserver logs")
sys.exit(1)
def start_cli_agent(arch_config_file=None, settings_json="{}"):
"""Start a CLI client connected to Arch."""
import json
# Use current directory for config if not specified
if arch_config_file is None:
config_path = "."
else:
config_path = (
os.path.dirname(arch_config_file)
if os.path.dirname(arch_config_file)
else "."
)
# Get port and host from arch_config.yaml listeners > egress
arch_config = read_config_file(config_path)
if not arch_config:
log.error(f"Config file not found in {config_path}")
sys.exit(1)
# Get egress listener configuration
egress_config = arch_config.get("listeners", {}).get("egress_traffic", {})
host = egress_config.get("host", "127.0.0.1")
port = egress_config.get("port", 12000)
# Parse additional settings from command line
try:
additional_settings = json.loads(settings_json) if settings_json else {}
except json.JSONDecodeError:
log.error("Settings must be valid JSON")
sys.exit(1)
# Set up environment variables
env = os.environ.copy()
env.update(
{
"ANTHROPIC_AUTH_TOKEN": "test", # Use test token for arch
"ANTHROPIC_API_KEY": "",
"ANTHROPIC_BASE_URL": f"http://{host}:{port}",
"NO_PROXY": host,
"DISABLE_TELEMETRY": "true",
"DISABLE_COST_WARNINGS": "true",
"API_TIMEOUT_MS": "600000",
}
)
# Set ANTHROPIC_SMALL_FAST_MODEL from additional_settings or model alias
if "ANTHROPIC_SMALL_FAST_MODEL" in additional_settings:
env["ANTHROPIC_SMALL_FAST_MODEL"] = additional_settings[
"ANTHROPIC_SMALL_FAST_MODEL"
]
else:
# Check if arch.claude.code.small.fast alias exists in model_aliases
model_aliases = arch_config.get("model_aliases", {})
if "arch.claude.code.small.fast" in model_aliases:
env["ANTHROPIC_SMALL_FAST_MODEL"] = "arch.claude.code.small.fast"
else:
log.info(
"Tip: Set an alias 'arch.claude.code.small.fast' in your model_aliases config to set a small fast model Claude Code"
)
log.info("Or provide ANTHROPIC_SMALL_FAST_MODEL in --settings JSON")
# Non-interactive mode configuration from additional_settings only
if additional_settings.get("NON_INTERACTIVE_MODE", False):
env.update(
{
"CI": "true",
"FORCE_COLOR": "0",
"NODE_NO_READLINE": "1",
"TERM": "dumb",
}
)
# Build claude command arguments
claude_args = []
# Add settings if provided, excluding those already handled as environment variables
if additional_settings:
# Filter out settings that are already processed as environment variables
claude_settings = {
k: v
for k, v in additional_settings.items()
if k not in ["ANTHROPIC_SMALL_FAST_MODEL", "NON_INTERACTIVE_MODE"]
}
if claude_settings:
claude_args.append(f"--settings={json.dumps(claude_settings)}")
# Use claude from PATH
claude_path = "claude"
log.info(f"Starting Claude CLI Agent to Arch at {host}:{port}")
try:
subprocess.run([claude_path] + claude_args, env=env, check=True)
except subprocess.CalledProcessError as e:
log.error(f"Error starting claude: {e}")
sys.exit(1)
except FileNotFoundError:
log.error(
f"{claude_path} not found. Make sure Claude Code is installed: npm install -g @anthropic-ai/claude-code"
)
sys.exit(1)

View file

@ -4,14 +4,21 @@ import sys
import subprocess
import multiprocessing
import importlib.metadata
import json
from cli import targets
from cli.docker_cli import docker_validate_archgw_schema, stream_gateway_logs
from cli.docker_cli import (
docker_validate_archgw_schema,
stream_gateway_logs,
docker_container_status,
)
from cli.utils import (
getLogger,
get_llm_provider_access_keys,
has_ingress_listener,
load_env_file_to_dict,
stream_access_logs,
read_config_file,
find_config_file,
)
from cli.core import (
start_arch_modelserver,
@ -19,9 +26,11 @@ from cli.core import (
start_arch,
stop_docker_container,
download_models_from_hf,
start_cli_agent,
)
from cli.consts import (
ARCHGW_DOCKER_IMAGE,
ARCHGW_DOCKER_NAME,
KATANEMO_DOCKERHUB_REPO,
SERVICE_NAME_ARCHGW,
SERVICE_NAME_MODEL_SERVER,
@ -171,12 +180,8 @@ def up(file, path, service, foreground):
start_arch_modelserver(foreground)
return
if file:
# If a file is provided, process that file
arch_config_file = os.path.abspath(file)
else:
# If no file is provided, use the path and look for arch_config.yaml
arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml"))
# Use the utility function to find config file
arch_config_file = find_config_file(path, file)
# Check if the file exists
if not os.path.exists(arch_config_file):
@ -329,10 +334,52 @@ def logs(debug, follow):
archgw_process.terminate()
@click.command()
@click.argument("cli_type", type=click.Choice(["claude"]), required=True)
@click.option(
"--path",
default=None,
help="Path to the directory containing arch_config.yaml (defaults to current directory)",
)
@click.option(
"--settings",
default="{}",
help="Additional settings as JSON string for the CLI agent.",
)
def cli_agent(cli_type, path, settings):
"""Start a CLI agent connected to Arch.
CLI_TYPE: The type of CLI agent to start (currently only 'claude' is supported)
"""
# Determine arch_config.yaml path
arch_config_file = None
if path:
arch_config_file = os.path.join(path, "arch_config.yaml")
else:
arch_config_file = "arch_config.yaml" # Current directory
# Check if archgw docker container is running
archgw_status = docker_container_status(ARCHGW_DOCKER_NAME)
if archgw_status != "running":
log.error(f"archgw docker container is not running (status: {archgw_status})")
log.error("Please start archgw using the 'archgw up' command.")
sys.exit(1)
try:
start_cli_agent(arch_config_file, settings)
except SystemExit:
# Re-raise SystemExit to preserve exit codes
raise
except Exception as e:
click.echo(f"Error: {e}")
sys.exit(1)
main.add_command(up)
main.add_command(down)
main.add_command(build)
main.add_command(logs)
main.add_command(cli_agent)
main.add_command(generate_prompt_targets)
if __name__ == "__main__":

View file

@ -88,6 +88,36 @@ def load_env_file_to_dict(file_path):
return env_dict
def read_config_file(path="."):
"""Read configuration from arch_config.yaml or config.yaml in the specified path."""
config_files = ["arch_config.yaml", "config.yaml"]
for config_file in config_files:
config_path = os.path.abspath(os.path.join(path, config_file))
if os.path.exists(config_path):
try:
with open(config_path, "r") as f:
return yaml.safe_load(f)
except Exception as e:
log.warning(f"Error reading {config_path}: {e}")
continue
return {}
def find_config_file(path=".", file=None):
"""Find the appropriate config file path."""
if file:
# If a file is provided, process that file
return os.path.abspath(file)
else:
# If no file is provided, use the path and look for arch_config.yaml first, then config.yaml for convenience
arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml"))
if not os.path.exists(arch_config_file):
arch_config_file = os.path.abspath(os.path.join(path, "config.yaml"))
return arch_config_file
def stream_access_logs(follow):
"""
Get the archgw access logs

View file

@ -70,7 +70,9 @@ pub enum ServiceTier {
#[skip_serializing_none]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct ThinkingConfig {
pub enabled: bool,
#[serde(rename = "type")]
pub thinking_type: String,
pub budget_tokens: Option<u32>,
}
// MCP Server types
@ -166,7 +168,8 @@ pub enum MessagesContentBlock {
cache_control: Option<MessagesCacheControl>,
},
Thinking {
text: String,
thinking: String,
signature: Option<String>,
cache_control: Option<MessagesCacheControl>,
},
Image {
@ -235,6 +238,7 @@ impl ExtractText for Vec<MessagesContentBlock> {
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "snake_case")]
#[serde(tag = "type")]
pub enum MessagesImageSource {
Base64 {
media_type: String,
@ -247,6 +251,7 @@ pub enum MessagesImageSource {
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "snake_case")]
#[serde(tag = "type")]
pub enum MessagesDocumentSource {
Base64 {
media_type: String,
@ -409,6 +414,8 @@ pub enum MessagesContentDelta {
TextDelta { text: String },
#[serde(rename = "input_json_delta")]
InputJsonDelta { partial_json: String },
#[serde(rename = "thinking_delta")]
ThinkingDelta { thinking: String },
}
#[skip_serializing_none]
@ -566,10 +573,10 @@ impl ProviderStreamResponse for MessagesStreamEvent {
fn content_delta(&self) -> Option<&str> {
match self {
MessagesStreamEvent::ContentBlockDelta { delta, .. } => {
if let MessagesContentDelta::TextDelta { text } = delta {
Some(text)
} else {
None
match delta {
MessagesContentDelta::TextDelta { text } => Some(text),
MessagesContentDelta::ThinkingDelta { thinking } => Some(thinking),
_ => None,
}
}
_ => None,
@ -672,7 +679,7 @@ mod tests {
"system": "You are a helpful assistant",
"service_tier": "auto",
"thinking": {
"enabled": true
"type": "enabled"
},
"metadata": {
"user_id": "123"
@ -699,7 +706,7 @@ mod tests {
}
if let Some(thinking) = &deserialized_request.thinking {
assert_eq!(thinking.enabled, true);
assert_eq!(thinking.thinking_type, "enabled");
} else {
panic!("Expected thinking config");
}
@ -754,10 +761,9 @@ mod tests {
{
"type": "image",
"source": {
"base64": {
"media_type": "image/jpeg",
"data": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
}
"type": "base64",
"media_type": "image/jpeg",
"data": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
}
}
]
@ -767,7 +773,7 @@ mod tests {
"content": [
{
"type": "thinking",
"text": "Let me analyze the image and then check the weather..."
"thinking": "Let me analyze the image and then check the weather..."
},
{
"type": "text",
@ -854,8 +860,8 @@ mod tests {
assert_eq!(content_blocks.len(), 3);
// Validate thinking content block
if let MessagesContentBlock::Thinking { text, .. } = &content_blocks[0] {
assert_eq!(text, "Let me analyze the image and then check the weather...");
if let MessagesContentBlock::Thinking { thinking, .. } = &content_blocks[0] {
assert_eq!(thinking, "Let me analyze the image and then check the weather...");
} else {
panic!("Expected thinking content block");
}
@ -1320,4 +1326,68 @@ mod tests {
assert_eq!(all_variants.len(), 1);
assert_eq!(all_variants[0], AnthropicApi::Messages);
}
#[test]
fn test_anthropic_thinking_streaming() {
// Test thinking delta stream event
let thinking_delta_json = json!({
"type": "content_block_delta",
"index": 0,
"delta": {
"type": "thinking_delta",
"thinking": ".\n\nI need to consider:\n1. Current"
}
});
let deserialized_event: MessagesStreamEvent = serde_json::from_value(thinking_delta_json.clone()).unwrap();
if let MessagesStreamEvent::ContentBlockDelta { index, ref delta } = deserialized_event {
assert_eq!(index, 0);
if let MessagesContentDelta::ThinkingDelta { thinking } = delta {
assert_eq!(thinking, ".\n\nI need to consider:\n1. Current");
} else {
panic!("Expected thinking delta");
}
} else {
panic!("Expected content block delta event");
}
// Test that thinking delta is returned by content_delta()
assert_eq!(deserialized_event.content_delta(), Some(".\n\nI need to consider:\n1. Current"));
let serialized_event_json = serde_json::to_value(&deserialized_event).unwrap();
assert_eq!(thinking_delta_json, serialized_event_json);
}
#[test]
fn test_anthropic_thinking_request_config() {
// Test thinking config with budget_tokens
let request_json = json!({
"model": "claude-sonnet-4-20250514",
"messages": [
{
"role": "user",
"content": "Test message"
}
],
"max_tokens": 2048,
"thinking": {
"type": "enabled",
"budget_tokens": 1024
}
});
let deserialized_request: MessagesRequest = serde_json::from_value(request_json.clone()).unwrap();
assert_eq!(deserialized_request.model, "claude-sonnet-4-20250514");
assert_eq!(deserialized_request.max_tokens, 2048);
if let Some(thinking) = &deserialized_request.thinking {
assert_eq!(thinking.thinking_type, "enabled");
assert_eq!(thinking.budget_tokens, Some(1024));
} else {
panic!("Expected thinking config");
}
let serialized_json = serde_json::to_value(&deserialized_request).unwrap();
assert_eq!(request_json, serialized_json);
}
}

View file

@ -88,6 +88,7 @@ pub struct ChatCompletionsRequest {
pub prediction: Option<StaticContent>,
// pub reasoning_effect: Option<bool>, // GOOD FIRST ISSUE: Future support for reasoning effects
pub response_format: Option<Value>,
pub reasoning_effort: Option<String>, // e.g., "none", "low", "medium", "high"
// pub safety_identifier: Option<String>, // GOOD FIRST ISSUE: Future support for safety identifiers
pub seed: Option<i32>,
pub service_tier: Option<String>,

View file

@ -862,9 +862,8 @@ fn convert_anthropic_content_to_openai(content: &[MessagesContentBlock]) -> Resu
MessagesContentBlock::Text { text, .. } => {
text_parts.push(text.clone());
}
MessagesContentBlock::Thinking { text, .. } => {
// Include thinking as regular text for OpenAI
text_parts.push(format!("[Thinking: {}]", text));
MessagesContentBlock::Thinking { thinking, .. } => {
text_parts.push(format!("thinking: {}", thinking));
}
_ => {
// Skip other content types for basic text conversion
@ -1009,6 +1008,21 @@ fn convert_content_delta(delta: MessagesContentDelta) -> Result<ChatCompletionsS
None,
))
}
MessagesContentDelta::ThinkingDelta { thinking } => {
Ok(create_openai_chunk(
"stream",
"unknown",
MessageDelta {
role: None,
content: Some(format!("[Thinking: {}]", thinking)),
refusal: None,
function_call: None,
tool_calls: None,
},
None,
None,
))
}
MessagesContentDelta::InputJsonDelta { partial_json } => {
Ok(create_openai_chunk(
"stream",

View file

@ -0,0 +1,73 @@
version: v0.1
listeners:
egress_traffic:
address: 0.0.0.0
port: 12000
message_format: openai
timeout: 30s
llm_providers:
# OpenAI Models
- model: openai/gpt-5-mini-2025-08-07
access_key: $OPENAI_API_KEY
default: true
- model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY
- model: openai/o3
access_key: $OPENAI_API_KEY
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
# Anthropic Models
- model: anthropic/claude-3-5-sonnet-20241022
access_key: $ANTHROPIC_API_KEY
- model: anthropic/claude-3-haiku-20240307
access_key: $ANTHROPIC_API_KEY
# Azure OpenAI Models
- model: azure_openai/gpt-5-mini
access_key: $AZURE_API_KEY
base_url: https://katanemo.openai.azure.com
# Ollama Models
- model: ollama/llama3.1
base_url: http://host.docker.internal:11434
# Model aliases - friendly names that map to actual provider names
model_aliases:
# Alias for summarization tasks -> fast/cheap model
arch.summarize.v1:
target: gpt-5-mini-2025-08-07
# Alias for general purpose tasks -> latest model
arch.v1:
target: o3
# Alias for reasoning tasks -> capable model
arch.reasoning.v1:
target: gpt-4o
# Alias for creative tasks -> Claude model
arch.creative.v1:
target: claude-3-5-sonnet-20241022
# Alias for quick responses -> fast model
arch.fast.v1:
target: claude-3-haiku-20240307
# Semantic aliases
summary-model:
target: gpt-5-mini-2025-08-07
chat-model:
target: claude-3-5-sonnet-20241022
creative-model:
target: claude-3-5-sonnet-20241022

View file

@ -24,7 +24,7 @@ llm_providers:
access_key: $OPENAI_API_KEY
# Anthropic Models
- model: anthropic/claude-3-5-sonnet-20241022
- model: anthropic/claude-sonnet-4-20250514
access_key: $ANTHROPIC_API_KEY
- model: anthropic/claude-3-haiku-20240307
@ -56,7 +56,7 @@ model_aliases:
# Alias for creative tasks -> Claude model
arch.creative.v1:
target: claude-3-5-sonnet-20241022
target: claude-sonnet-4-20250514
# Alias for quick responses -> fast model
arch.fast.v1:
@ -67,7 +67,7 @@ model_aliases:
target: gpt-5-mini-2025-08-07
chat-model:
target: llama3.1
target:
creative-model:
target: claude-3-5-sonnet-20241022
target: claude-sonnet-4-20250514

View file

@ -350,3 +350,57 @@ def test_direct_model_4o_mini_anthropic():
response_content = "".join(b.text for b in message.content if b.type == "text")
logger.info(f"Response from direct 4o-mini via Anthropic: {response_content}")
assert response_content == "Hello from direct 4o-mini via Anthropic!"
def test_anthropic_thinking_mode_streaming():
# Anthropic base_url should be the root, not /v1/chat/completions
base_url = LLM_GATEWAY_ENDPOINT.replace("/v1/chat/completions", "")
client = anthropic.Anthropic(
api_key=os.environ.get("ANTHROPIC_API_KEY", "test-key"),
base_url=base_url,
)
thinking_block_started = False
thinking_delta_seen = False
text_delta_seen = False
with client.messages.stream(
model="claude-sonnet-4-20250514",
max_tokens=2048,
thinking={"type": "enabled", "budget_tokens": 1024}, # <- idiomatic
messages=[{"role": "user", "content": "Explain briefly what 2+2 equals"}],
) as stream:
for event in stream:
# 1) detect when a thinking block starts
if event.type == "content_block_start" and getattr(
event, "content_block", None
):
if getattr(event.content_block, "type", None) == "thinking":
thinking_block_started = True
# 2) collect text vs thinking deltas
if event.type == "content_block_delta" and getattr(event, "delta", None):
if event.delta.type == "text_delta":
text_delta_seen = True
elif event.delta.type == "thinking_delta":
# some SDKs expose .thinking, others .text for this delta; not needed here
thinking_delta_seen = True
final = stream.get_final_message()
# Basic integrity
assert final is not None
assert final.content and len(final.content) > 0
# Normal text should have streamed
assert text_delta_seen, "Expected normal text deltas in stream"
# With thinking enabled, we expect a thinking block and at least one thinking delta
assert thinking_block_started, "No thinking block started"
assert thinking_delta_seen, "No thinking deltas observed"
# Optional: double-check on the assembled message
final_block_types = [blk.type for blk in final.content]
assert "text" in final_block_types
assert "thinking" in final_block_types