diff --git a/arch/tools/cli/core.py b/arch/tools/cli/core.py index fda8ba88..0846a51a 100644 --- a/arch/tools/cli/core.py +++ b/arch/tools/cli/core.py @@ -1,10 +1,11 @@ +import json import subprocess import os import time import sys import yaml -from cli.utils import getLogger, read_config_file +from cli.utils import getLogger from cli.consts import ( ARCHGW_DOCKER_IMAGE, ARCHGW_DOCKER_NAME, @@ -189,26 +190,13 @@ def stop_arch_modelserver(): def start_cli_agent(arch_config_file=None, settings_json="{}"): """Start a CLI client connected to Arch.""" - import json - # Use current directory for config if not specified - if arch_config_file is None: - config_path = "." - else: - config_path = ( - os.path.dirname(arch_config_file) - if os.path.dirname(arch_config_file) - else "." - ) - - # Get port and host from arch_config.yaml listeners > egress - arch_config = read_config_file(config_path) - if not arch_config: - log.error(f"Config file not found in {config_path}") - sys.exit(1) + with open(arch_config_file, "r") as file: + arch_config = file.read() + arch_config_yaml = yaml.safe_load(arch_config) # Get egress listener configuration - egress_config = arch_config.get("listeners", {}).get("egress_traffic", {}) + egress_config = arch_config_yaml.get("listeners", {}).get("egress_traffic", {}) host = egress_config.get("host", "127.0.0.1") port = egress_config.get("port", 12000) @@ -240,7 +228,7 @@ def start_cli_agent(arch_config_file=None, settings_json="{}"): ] else: # Check if arch.claude.code.small.fast alias exists in model_aliases - model_aliases = arch_config.get("model_aliases", {}) + model_aliases = arch_config_yaml.get("model_aliases", {}) if "arch.claude.code.small.fast" in model_aliases: env["ANTHROPIC_SMALL_FAST_MODEL"] = "arch.claude.code.small.fast" else: @@ -276,7 +264,7 @@ def start_cli_agent(arch_config_file=None, settings_json="{}"): # Use claude from PATH claude_path = "claude" - log.info(f"Starting Claude CLI Agent to Arch at {host}:{port}") + log.info(f"Connecting Claude Code Agent to Arch at {host}:{port}") try: subprocess.run([claude_path] + claude_args, env=env, check=True) diff --git a/arch/tools/cli/main.py b/arch/tools/cli/main.py index ddaacf1d..25c00404 100644 --- a/arch/tools/cli/main.py +++ b/arch/tools/cli/main.py @@ -17,7 +17,6 @@ from cli.utils import ( has_ingress_listener, load_env_file_to_dict, stream_access_logs, - read_config_file, find_config_file, ) from cli.core import ( @@ -189,7 +188,6 @@ def up(file, path, service, foreground): return log.info(f"Validating {arch_config_file}") - ( validation_return_code, validation_stdout, diff --git a/arch/tools/cli/utils.py b/arch/tools/cli/utils.py index 97a8428f..d7adca60 100644 --- a/arch/tools/cli/utils.py +++ b/arch/tools/cli/utils.py @@ -88,23 +88,6 @@ def load_env_file_to_dict(file_path): return env_dict -def read_config_file(path="."): - """Read configuration from arch_config.yaml or config.yaml in the specified path.""" - config_files = ["arch_config.yaml", "config.yaml"] - - for config_file in config_files: - config_path = os.path.abspath(os.path.join(path, config_file)) - if os.path.exists(config_path): - try: - with open(config_path, "r") as f: - return yaml.safe_load(f) - except Exception as e: - log.warning(f"Error reading {config_path}: {e}") - continue - - return {} - - def find_config_file(path=".", file=None): """Find the appropriate config file path.""" if file: diff --git a/demos/use_cases/claude_code/README.md b/demos/use_cases/claude_code/README.md index f4b9e038..afaf785f 100644 --- a/demos/use_cases/claude_code/README.md +++ b/demos/use_cases/claude_code/README.md @@ -1,32 +1,24 @@ -# Claude Code Routing with Intelligence +# Claude Code Routing with (Preference-aligned) Intelligence ## Why This Matters **Claude Code is powerful, but what if you could access the best of ALL AI models through one familiar interface?** -Instead of being locked into a single provider, imagine: +Instead of being locked into a set of LLMs from one provier, imagine: - Using **DeepSeek's coding expertise** for complex algorithms -- Leveraging **GPT-4's reasoning** for architecture decisions +- Leveraging **GPT-5's reasoning** for architecture decisions - Tapping **Claude's analysis** for code reviews - Accessing **Grok's speed** for quick iterations **All through the same Claude Code interface you already love.** -## The Problem with Single-Model Development - -Most developers are stuck in single-provider silos: -- 🔒 **Vendor Lock-in**: Tied to one model's strengths and weaknesses -- 🎯 **Wrong Tool for the Job**: Using a reasoning model for simple tasks (expensive) or a fast model for complex problems (poor results) -- 🚫 **No Fallbacks**: When your preferred model is down, you're stuck -- 💸 **Suboptimal Costs**: Paying premium prices for tasks that could use cheaper models - ## The Solution: Intelligent Multi-LLM Routing Arch Gateway transforms Claude Code into a **universal AI development interface** that: ### 🌐 **Connects to Any LLM Provider** -- **OpenAI**: GPT-4o, o1-preview, GPT-4o-mini -- **Anthropic**: Claude 3.5 Sonnet, Claude 3 Haiku +- **OpenAI**: GPT-4.1, GPT-5, etc. +- **Anthropic**: Claude 3.5 Sonnet, Claude 3 Haiku, Claude 4.5 - **DeepSeek**: DeepSeek-V3, DeepSeek-Coder-V2 - **Grok**: Grok-2, Grok-2-mini - **Others**: Gemini, Llama, Mistral, local models via Ollama @@ -35,25 +27,51 @@ Arch Gateway transforms Claude Code into a **universal AI development interface* Our research-backed routing system automatically selects the optimal model by analyzing: - **Task complexity** (simple refactoring vs. architectural design) - **Content type** (code generation vs. debugging vs. documentation) -- **Performance preferences** (speed vs. quality vs. cost) -- **Real-time availability** (automatic failover when models are down) -### 💡 **Learns Your Preferences** -The system adapts to your coding patterns and preferences over time, ensuring you always get the best model for your specific needs. ## Quick Start ### Prerequisites - Claude Code installed: `npm install -g @anthropic-ai/claude-code` - Docker running on your system +- Create a python virtual environment in your current working directory -### 1. Install and Start Arch Gateway +### 1. Get the Configuration File +Download the demo configuration file using one of these methods: + +**Option A: Direct download** +```bash +curl -O https://raw.githubusercontent.com/katanemo/arch/main/demos/use_cases/claude_code/config.yaml +``` + +**Option B: Clone the repository** +```bash +git clone https://github.com/katanemo/arch.git +cd arch/demos/use_cases/claude_code + +``` + +### 2. Set Up Your API Keys +Set up your environment variables with your actual API keys: +```bash +export OPENAI_API_KEY="your-openai-api-key" +export ANTHROPIC_API_KEY="your-anthropic-api-key" +export AZURE_API_KEY="your-azure-api-key" # Optional +``` + +Alternatively, create a `.env` file in your working directory: +```bash +echo "OPENAI_API_KEY=your-openai-api-key" > .env +echo "ANTHROPIC_API_KEY=your-anthropic-api-key" >> .env +``` + +### 3. Install and Start Arch Gateway ```bash pip install archgw archgw up ``` -### 2. Launch Claude Code with Multi-LLM Support +### 4. Launch Claude Code with Multi-LLM Support ```bash archgw cli-agent claude ``` @@ -67,31 +85,10 @@ That's it! Claude Code now has access to multiple LLM providers with intelligent *Claude Code interface enhanced with intelligent model routing and multi-provider access* ### Real-Time Model Selection -When you interact with Claude Code, you'll see: +When you interact with Claude Code, you'll get: - **Automatic model selection** based on your query type - **Transparent routing decisions** showing which model was chosen and why - **Seamless failover** if a model becomes unavailable -- **Performance metrics** comparing response times and quality - -### Example Interactions - -**Code Generation Query:** -``` -You: "Create a Python function to validate email addresses" -→ Routed to: DeepSeek-Coder-V2 (optimized for code generation) -``` - -**Architecture Discussion:** -``` -You: "How should I structure a microservices backend?" -→ Routed to: Claude 3.5 Sonnet (excellent for architectural reasoning) -``` - -**Quick Bug Fix:** -``` -You: "Fix this syntax error in my JavaScript" -→ Routed to: GPT-4o-mini (fast and cost-effective for simple fixes) -``` ## Configuration @@ -129,31 +126,8 @@ ANTHROPIC_BASE_URL=http://127.0.0.1:12000 # Routes through Arch Gateway ANTHROPIC_SMALL_FAST_MODEL=arch.fast.v1 # Uses intelligent alias ``` -## Benefits You'll See Immediately - -### 🚀 **Better Performance** -- Right model for each task = better results -- Automatic failover = no interruptions -- Caching = faster repeated queries - -### 💰 **Cost Optimization** -- Use expensive models only when needed -- Leverage free/cheap models for simple tasks -- Track usage across all providers - -### 🛡️ **Reliability** -- Multiple providers = no single point of failure -- Automatic retry logic -- Graceful degradation when models are unavailable - -### 📊 **Insights** -- See which models work best for your coding style -- Track performance metrics across providers -- Optimize your model usage over time - ## Real Developer Workflows -This intelligent routing is powered by our research in preference-aligned AI systems: -- **Research Paper**: [Preference-Aligned LLM Router](https://katanemo.com/research) -- **Technical Docs**: [docs.katanemo.com](https://docs.katanemo.com) -- **API Reference**: [docs.katanemo.com/api](https://docs.katanemo.com/api) +This intelligent routing is powered by our research in preference-aligned LLMM routing: +- **Research Paper**: [Preference-Aligned LLM Router](https://arxiv.org/abs/2506.16655) +- **Technical Docs**: [docs.archgw.com](https://docs.archgw.com)