minor fixes to make sure PR is clean. Ready to ship

2026-06-17 15:25:17 +02:00 · 2025-09-29 14:13:04 -07:00 · 2025-09-29 14:13:04 -07:00 · e771912a86
commit e771912a86
parent 5f7f38ad24
4 changed files with 49 additions and 106 deletions
--- a/arch/tools/cli/core.py
+++ b/arch/tools/cli/core.py
@ -1,10 +1,11 @@
+import json
 import subprocess
 import os
 import time
 import sys

 import yaml
-from cli.utils import getLogger, read_config_file
+from cli.utils import getLogger
 from cli.consts import (
    ARCHGW_DOCKER_IMAGE,
    ARCHGW_DOCKER_NAME,
@ -189,26 +190,13 @@ def stop_arch_modelserver():

 def start_cli_agent(arch_config_file=None, settings_json="{}"):
    """Start a CLI client connected to Arch."""
-    import json

-    # Use current directory for config if not specified
-    if arch_config_file is None:
-        config_path = "."
-    else:
-        config_path = (
-            os.path.dirname(arch_config_file)
-            if os.path.dirname(arch_config_file)
-            else "."
-        )
-
-    # Get port and host from arch_config.yaml listeners > egress
-    arch_config = read_config_file(config_path)
-    if not arch_config:
-        log.error(f"Config file not found in {config_path}")
-        sys.exit(1)
+    with open(arch_config_file, "r") as file:
+        arch_config = file.read()
+        arch_config_yaml = yaml.safe_load(arch_config)

    # Get egress listener configuration
-    egress_config = arch_config.get("listeners", {}).get("egress_traffic", {})
+    egress_config = arch_config_yaml.get("listeners", {}).get("egress_traffic", {})
    host = egress_config.get("host", "127.0.0.1")
    port = egress_config.get("port", 12000)

@ -240,7 +228,7 @@ def start_cli_agent(arch_config_file=None, settings_json="{}"):
        ]
    else:
        # Check if arch.claude.code.small.fast alias exists in model_aliases
-        model_aliases = arch_config.get("model_aliases", {})
+        model_aliases = arch_config_yaml.get("model_aliases", {})
        if "arch.claude.code.small.fast" in model_aliases:
            env["ANTHROPIC_SMALL_FAST_MODEL"] = "arch.claude.code.small.fast"
        else:
@ -276,7 +264,7 @@ def start_cli_agent(arch_config_file=None, settings_json="{}"):

    # Use claude from PATH
    claude_path = "claude"
-    log.info(f"Starting Claude CLI Agent to Arch at {host}:{port}")
+    log.info(f"Connecting Claude Code Agent to Arch at {host}:{port}")

    try:
        subprocess.run([claude_path] + claude_args, env=env, check=True)
--- a/arch/tools/cli/main.py
+++ b/arch/tools/cli/main.py
@ -17,7 +17,6 @@ from cli.utils import (
    has_ingress_listener,
    load_env_file_to_dict,
    stream_access_logs,
-    read_config_file,
    find_config_file,
 )
 from cli.core import (
@ -189,7 +188,6 @@ def up(file, path, service, foreground):
        return

    log.info(f"Validating {arch_config_file}")
-
    (
        validation_return_code,
        validation_stdout,
--- a/arch/tools/cli/utils.py
+++ b/arch/tools/cli/utils.py
@ -88,23 +88,6 @@ def load_env_file_to_dict(file_path):
    return env_dict


-def read_config_file(path="."):
-    """Read configuration from arch_config.yaml or config.yaml in the specified path."""
-    config_files = ["arch_config.yaml", "config.yaml"]
-
-    for config_file in config_files:
-        config_path = os.path.abspath(os.path.join(path, config_file))
-        if os.path.exists(config_path):
-            try:
-                with open(config_path, "r") as f:
-                    return yaml.safe_load(f)
-            except Exception as e:
-                log.warning(f"Error reading {config_path}: {e}")
-                continue
-
-    return {}
-
-
 def find_config_file(path=".", file=None):
    """Find the appropriate config file path."""
    if file:
--- a/demos/use_cases/claude_code/README.md
+++ b/demos/use_cases/claude_code/README.md
@ -1,32 +1,24 @@
-# Claude Code Routing with Intelligence
+# Claude Code Routing with (Preference-aligned) Intelligence

 ## Why This Matters

 **Claude Code is powerful, but what if you could access the best of ALL AI models through one familiar interface?**

-Instead of being locked into a single provider, imagine:
+Instead of being locked into a set of LLMs from one provier, imagine:
 - Using **DeepSeek's coding expertise** for complex algorithms
- Leveraging **GPT-4's reasoning** for architecture decisions
+- Leveraging **GPT-5's reasoning** for architecture decisions
 - Tapping **Claude's analysis** for code reviews
 - Accessing **Grok's speed** for quick iterations

 **All through the same Claude Code interface you already love.**

-## The Problem with Single-Model Development
-
-Most developers are stuck in single-provider silos:
- 🔒 **Vendor Lock-in**: Tied to one model's strengths and weaknesses
- 🎯 **Wrong Tool for the Job**: Using a reasoning model for simple tasks (expensive) or a fast model for complex problems (poor results)
- 🚫 **No Fallbacks**: When your preferred model is down, you're stuck
- 💸 **Suboptimal Costs**: Paying premium prices for tasks that could use cheaper models
-
 ## The Solution: Intelligent Multi-LLM Routing

 Arch Gateway transforms Claude Code into a **universal AI development interface** that:

 ### 🌐 **Connects to Any LLM Provider**
- **OpenAI**: GPT-4o, o1-preview, GPT-4o-mini
- **Anthropic**: Claude 3.5 Sonnet, Claude 3 Haiku
+- **OpenAI**: GPT-4.1, GPT-5, etc.
+- **Anthropic**: Claude 3.5 Sonnet, Claude 3 Haiku, Claude 4.5
 - **DeepSeek**: DeepSeek-V3, DeepSeek-Coder-V2
 - **Grok**: Grok-2, Grok-2-mini
 - **Others**: Gemini, Llama, Mistral, local models via Ollama
@ -35,25 +27,51 @@ Arch Gateway transforms Claude Code into a **universal AI development interface*
 Our research-backed routing system automatically selects the optimal model by analyzing:
 - **Task complexity** (simple refactoring vs. architectural design)
 - **Content type** (code generation vs. debugging vs. documentation)
- **Performance preferences** (speed vs. quality vs. cost)
- **Real-time availability** (automatic failover when models are down)

-### 💡 **Learns Your Preferences**
-The system adapts to your coding patterns and preferences over time, ensuring you always get the best model for your specific needs.

 ## Quick Start

 ### Prerequisites
 - Claude Code installed: `npm install -g @anthropic-ai/claude-code`
 - Docker running on your system
+- Create a python virtual environment in your current working directory

-### 1. Install and Start Arch Gateway
+### 1. Get the Configuration File
+Download the demo configuration file using one of these methods:
+
+**Option A: Direct download**
+```bash
+curl -O https://raw.githubusercontent.com/katanemo/arch/main/demos/use_cases/claude_code/config.yaml
+```
+
+**Option B: Clone the repository**
+```bash
+git clone https://github.com/katanemo/arch.git
+cd arch/demos/use_cases/claude_code
+
+```
+
+### 2. Set Up Your API Keys
+Set up your environment variables with your actual API keys:
+```bash
+export OPENAI_API_KEY="your-openai-api-key"
+export ANTHROPIC_API_KEY="your-anthropic-api-key"
+export AZURE_API_KEY="your-azure-api-key"  # Optional
+```
+
+Alternatively, create a `.env` file in your working directory:
+```bash
+echo "OPENAI_API_KEY=your-openai-api-key" > .env
+echo "ANTHROPIC_API_KEY=your-anthropic-api-key" >> .env
+```
+
+### 3. Install and Start Arch Gateway
 ```bash
 pip install archgw
 archgw up
 ```

-### 2. Launch Claude Code with Multi-LLM Support
+### 4. Launch Claude Code with Multi-LLM Support
 ```bash
 archgw cli-agent claude
 ```
@ -67,31 +85,10 @@ That's it! Claude Code now has access to multiple LLM providers with intelligent
 *Claude Code interface enhanced with intelligent model routing and multi-provider access*

 ### Real-Time Model Selection
-When you interact with Claude Code, you'll see:
+When you interact with Claude Code, you'll get:
 - **Automatic model selection** based on your query type
 - **Transparent routing decisions** showing which model was chosen and why
 - **Seamless failover** if a model becomes unavailable
- **Performance metrics** comparing response times and quality
-
-### Example Interactions
-
-**Code Generation Query:**
-```
-You: "Create a Python function to validate email addresses"
-→ Routed to: DeepSeek-Coder-V2 (optimized for code generation)
-```
-
-**Architecture Discussion:**
-```
-You: "How should I structure a microservices backend?"
-→ Routed to: Claude 3.5 Sonnet (excellent for architectural reasoning)
-```
-
-**Quick Bug Fix:**
-```
-You: "Fix this syntax error in my JavaScript"
-→ Routed to: GPT-4o-mini (fast and cost-effective for simple fixes)
-```

 ## Configuration

@ -129,31 +126,8 @@ ANTHROPIC_BASE_URL=http://127.0.0.1:12000  # Routes through Arch Gateway
 ANTHROPIC_SMALL_FAST_MODEL=arch.fast.v1    # Uses intelligent alias
 ```

-## Benefits You'll See Immediately
-
-### 🚀 **Better Performance**
- Right model for each task = better results
- Automatic failover = no interruptions
- Caching = faster repeated queries
-
-### 💰 **Cost Optimization**
- Use expensive models only when needed
- Leverage free/cheap models for simple tasks
- Track usage across all providers
-
-### 🛡️ **Reliability**
- Multiple providers = no single point of failure
- Automatic retry logic
- Graceful degradation when models are unavailable
-
-### 📊 **Insights**
- See which models work best for your coding style
- Track performance metrics across providers
- Optimize your model usage over time
-
 ## Real Developer Workflows

-This intelligent routing is powered by our research in preference-aligned AI systems:
- **Research Paper**: [Preference-Aligned LLM Router](https://katanemo.com/research)
- **Technical Docs**: [docs.katanemo.com](https://docs.katanemo.com)
- **API Reference**: [docs.katanemo.com/api](https://docs.katanemo.com/api)
+This intelligent routing is powered by our research in preference-aligned LLMM routing:
+- **Research Paper**: [Preference-Aligned LLM Router](https://arxiv.org/abs/2506.16655)
+- **Technical Docs**: [docs.archgw.com](https://docs.archgw.com)