mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
minor fixes to make sure PR is clean. Ready to ship
This commit is contained in:
parent
5f7f38ad24
commit
e771912a86
4 changed files with 49 additions and 106 deletions
|
|
@ -1,10 +1,11 @@
|
|||
import json
|
||||
import subprocess
|
||||
import os
|
||||
import time
|
||||
import sys
|
||||
|
||||
import yaml
|
||||
from cli.utils import getLogger, read_config_file
|
||||
from cli.utils import getLogger
|
||||
from cli.consts import (
|
||||
ARCHGW_DOCKER_IMAGE,
|
||||
ARCHGW_DOCKER_NAME,
|
||||
|
|
@ -189,26 +190,13 @@ def stop_arch_modelserver():
|
|||
|
||||
def start_cli_agent(arch_config_file=None, settings_json="{}"):
|
||||
"""Start a CLI client connected to Arch."""
|
||||
import json
|
||||
|
||||
# Use current directory for config if not specified
|
||||
if arch_config_file is None:
|
||||
config_path = "."
|
||||
else:
|
||||
config_path = (
|
||||
os.path.dirname(arch_config_file)
|
||||
if os.path.dirname(arch_config_file)
|
||||
else "."
|
||||
)
|
||||
|
||||
# Get port and host from arch_config.yaml listeners > egress
|
||||
arch_config = read_config_file(config_path)
|
||||
if not arch_config:
|
||||
log.error(f"Config file not found in {config_path}")
|
||||
sys.exit(1)
|
||||
with open(arch_config_file, "r") as file:
|
||||
arch_config = file.read()
|
||||
arch_config_yaml = yaml.safe_load(arch_config)
|
||||
|
||||
# Get egress listener configuration
|
||||
egress_config = arch_config.get("listeners", {}).get("egress_traffic", {})
|
||||
egress_config = arch_config_yaml.get("listeners", {}).get("egress_traffic", {})
|
||||
host = egress_config.get("host", "127.0.0.1")
|
||||
port = egress_config.get("port", 12000)
|
||||
|
||||
|
|
@ -240,7 +228,7 @@ def start_cli_agent(arch_config_file=None, settings_json="{}"):
|
|||
]
|
||||
else:
|
||||
# Check if arch.claude.code.small.fast alias exists in model_aliases
|
||||
model_aliases = arch_config.get("model_aliases", {})
|
||||
model_aliases = arch_config_yaml.get("model_aliases", {})
|
||||
if "arch.claude.code.small.fast" in model_aliases:
|
||||
env["ANTHROPIC_SMALL_FAST_MODEL"] = "arch.claude.code.small.fast"
|
||||
else:
|
||||
|
|
@ -276,7 +264,7 @@ def start_cli_agent(arch_config_file=None, settings_json="{}"):
|
|||
|
||||
# Use claude from PATH
|
||||
claude_path = "claude"
|
||||
log.info(f"Starting Claude CLI Agent to Arch at {host}:{port}")
|
||||
log.info(f"Connecting Claude Code Agent to Arch at {host}:{port}")
|
||||
|
||||
try:
|
||||
subprocess.run([claude_path] + claude_args, env=env, check=True)
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ from cli.utils import (
|
|||
has_ingress_listener,
|
||||
load_env_file_to_dict,
|
||||
stream_access_logs,
|
||||
read_config_file,
|
||||
find_config_file,
|
||||
)
|
||||
from cli.core import (
|
||||
|
|
@ -189,7 +188,6 @@ def up(file, path, service, foreground):
|
|||
return
|
||||
|
||||
log.info(f"Validating {arch_config_file}")
|
||||
|
||||
(
|
||||
validation_return_code,
|
||||
validation_stdout,
|
||||
|
|
|
|||
|
|
@ -88,23 +88,6 @@ def load_env_file_to_dict(file_path):
|
|||
return env_dict
|
||||
|
||||
|
||||
def read_config_file(path="."):
|
||||
"""Read configuration from arch_config.yaml or config.yaml in the specified path."""
|
||||
config_files = ["arch_config.yaml", "config.yaml"]
|
||||
|
||||
for config_file in config_files:
|
||||
config_path = os.path.abspath(os.path.join(path, config_file))
|
||||
if os.path.exists(config_path):
|
||||
try:
|
||||
with open(config_path, "r") as f:
|
||||
return yaml.safe_load(f)
|
||||
except Exception as e:
|
||||
log.warning(f"Error reading {config_path}: {e}")
|
||||
continue
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def find_config_file(path=".", file=None):
|
||||
"""Find the appropriate config file path."""
|
||||
if file:
|
||||
|
|
|
|||
|
|
@ -1,32 +1,24 @@
|
|||
# Claude Code Routing with Intelligence
|
||||
# Claude Code Routing with (Preference-aligned) Intelligence
|
||||
|
||||
## Why This Matters
|
||||
|
||||
**Claude Code is powerful, but what if you could access the best of ALL AI models through one familiar interface?**
|
||||
|
||||
Instead of being locked into a single provider, imagine:
|
||||
Instead of being locked into a set of LLMs from one provier, imagine:
|
||||
- Using **DeepSeek's coding expertise** for complex algorithms
|
||||
- Leveraging **GPT-4's reasoning** for architecture decisions
|
||||
- Leveraging **GPT-5's reasoning** for architecture decisions
|
||||
- Tapping **Claude's analysis** for code reviews
|
||||
- Accessing **Grok's speed** for quick iterations
|
||||
|
||||
**All through the same Claude Code interface you already love.**
|
||||
|
||||
## The Problem with Single-Model Development
|
||||
|
||||
Most developers are stuck in single-provider silos:
|
||||
- 🔒 **Vendor Lock-in**: Tied to one model's strengths and weaknesses
|
||||
- 🎯 **Wrong Tool for the Job**: Using a reasoning model for simple tasks (expensive) or a fast model for complex problems (poor results)
|
||||
- 🚫 **No Fallbacks**: When your preferred model is down, you're stuck
|
||||
- 💸 **Suboptimal Costs**: Paying premium prices for tasks that could use cheaper models
|
||||
|
||||
## The Solution: Intelligent Multi-LLM Routing
|
||||
|
||||
Arch Gateway transforms Claude Code into a **universal AI development interface** that:
|
||||
|
||||
### 🌐 **Connects to Any LLM Provider**
|
||||
- **OpenAI**: GPT-4o, o1-preview, GPT-4o-mini
|
||||
- **Anthropic**: Claude 3.5 Sonnet, Claude 3 Haiku
|
||||
- **OpenAI**: GPT-4.1, GPT-5, etc.
|
||||
- **Anthropic**: Claude 3.5 Sonnet, Claude 3 Haiku, Claude 4.5
|
||||
- **DeepSeek**: DeepSeek-V3, DeepSeek-Coder-V2
|
||||
- **Grok**: Grok-2, Grok-2-mini
|
||||
- **Others**: Gemini, Llama, Mistral, local models via Ollama
|
||||
|
|
@ -35,25 +27,51 @@ Arch Gateway transforms Claude Code into a **universal AI development interface*
|
|||
Our research-backed routing system automatically selects the optimal model by analyzing:
|
||||
- **Task complexity** (simple refactoring vs. architectural design)
|
||||
- **Content type** (code generation vs. debugging vs. documentation)
|
||||
- **Performance preferences** (speed vs. quality vs. cost)
|
||||
- **Real-time availability** (automatic failover when models are down)
|
||||
|
||||
### 💡 **Learns Your Preferences**
|
||||
The system adapts to your coding patterns and preferences over time, ensuring you always get the best model for your specific needs.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Prerequisites
|
||||
- Claude Code installed: `npm install -g @anthropic-ai/claude-code`
|
||||
- Docker running on your system
|
||||
- Create a python virtual environment in your current working directory
|
||||
|
||||
### 1. Install and Start Arch Gateway
|
||||
### 1. Get the Configuration File
|
||||
Download the demo configuration file using one of these methods:
|
||||
|
||||
**Option A: Direct download**
|
||||
```bash
|
||||
curl -O https://raw.githubusercontent.com/katanemo/arch/main/demos/use_cases/claude_code/config.yaml
|
||||
```
|
||||
|
||||
**Option B: Clone the repository**
|
||||
```bash
|
||||
git clone https://github.com/katanemo/arch.git
|
||||
cd arch/demos/use_cases/claude_code
|
||||
|
||||
```
|
||||
|
||||
### 2. Set Up Your API Keys
|
||||
Set up your environment variables with your actual API keys:
|
||||
```bash
|
||||
export OPENAI_API_KEY="your-openai-api-key"
|
||||
export ANTHROPIC_API_KEY="your-anthropic-api-key"
|
||||
export AZURE_API_KEY="your-azure-api-key" # Optional
|
||||
```
|
||||
|
||||
Alternatively, create a `.env` file in your working directory:
|
||||
```bash
|
||||
echo "OPENAI_API_KEY=your-openai-api-key" > .env
|
||||
echo "ANTHROPIC_API_KEY=your-anthropic-api-key" >> .env
|
||||
```
|
||||
|
||||
### 3. Install and Start Arch Gateway
|
||||
```bash
|
||||
pip install archgw
|
||||
archgw up
|
||||
```
|
||||
|
||||
### 2. Launch Claude Code with Multi-LLM Support
|
||||
### 4. Launch Claude Code with Multi-LLM Support
|
||||
```bash
|
||||
archgw cli-agent claude
|
||||
```
|
||||
|
|
@ -67,31 +85,10 @@ That's it! Claude Code now has access to multiple LLM providers with intelligent
|
|||
*Claude Code interface enhanced with intelligent model routing and multi-provider access*
|
||||
|
||||
### Real-Time Model Selection
|
||||
When you interact with Claude Code, you'll see:
|
||||
When you interact with Claude Code, you'll get:
|
||||
- **Automatic model selection** based on your query type
|
||||
- **Transparent routing decisions** showing which model was chosen and why
|
||||
- **Seamless failover** if a model becomes unavailable
|
||||
- **Performance metrics** comparing response times and quality
|
||||
|
||||
### Example Interactions
|
||||
|
||||
**Code Generation Query:**
|
||||
```
|
||||
You: "Create a Python function to validate email addresses"
|
||||
→ Routed to: DeepSeek-Coder-V2 (optimized for code generation)
|
||||
```
|
||||
|
||||
**Architecture Discussion:**
|
||||
```
|
||||
You: "How should I structure a microservices backend?"
|
||||
→ Routed to: Claude 3.5 Sonnet (excellent for architectural reasoning)
|
||||
```
|
||||
|
||||
**Quick Bug Fix:**
|
||||
```
|
||||
You: "Fix this syntax error in my JavaScript"
|
||||
→ Routed to: GPT-4o-mini (fast and cost-effective for simple fixes)
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
|
|
@ -129,31 +126,8 @@ ANTHROPIC_BASE_URL=http://127.0.0.1:12000 # Routes through Arch Gateway
|
|||
ANTHROPIC_SMALL_FAST_MODEL=arch.fast.v1 # Uses intelligent alias
|
||||
```
|
||||
|
||||
## Benefits You'll See Immediately
|
||||
|
||||
### 🚀 **Better Performance**
|
||||
- Right model for each task = better results
|
||||
- Automatic failover = no interruptions
|
||||
- Caching = faster repeated queries
|
||||
|
||||
### 💰 **Cost Optimization**
|
||||
- Use expensive models only when needed
|
||||
- Leverage free/cheap models for simple tasks
|
||||
- Track usage across all providers
|
||||
|
||||
### 🛡️ **Reliability**
|
||||
- Multiple providers = no single point of failure
|
||||
- Automatic retry logic
|
||||
- Graceful degradation when models are unavailable
|
||||
|
||||
### 📊 **Insights**
|
||||
- See which models work best for your coding style
|
||||
- Track performance metrics across providers
|
||||
- Optimize your model usage over time
|
||||
|
||||
## Real Developer Workflows
|
||||
|
||||
This intelligent routing is powered by our research in preference-aligned AI systems:
|
||||
- **Research Paper**: [Preference-Aligned LLM Router](https://katanemo.com/research)
|
||||
- **Technical Docs**: [docs.katanemo.com](https://docs.katanemo.com)
|
||||
- **API Reference**: [docs.katanemo.com/api](https://docs.katanemo.com/api)
|
||||
This intelligent routing is powered by our research in preference-aligned LLMM routing:
|
||||
- **Research Paper**: [Preference-Aligned LLM Router](https://arxiv.org/abs/2506.16655)
|
||||
- **Technical Docs**: [docs.archgw.com](https://docs.archgw.com)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue