From 59e34a34ca84ae5cdc6d1b9c61c98596df99d02a Mon Sep 17 00:00:00 2001
From: Salman Paracha <salmanparacha@MacBook-Pro-257.local>
Date: Sun, 28 Sep 2025 21:57:15 -0700
Subject: [PATCH] adding a README.md and updated the cli to use more of our
 defined patterns for params

---
 arch/tools/cli/main.py                  |  23 ++--
 demos/use_cases/claude_code/README.md   | 159 ++++++++++++++++++++++++
 demos/use_cases/claude_code/config.yaml |  35 ++----
 3 files changed, 179 insertions(+), 38 deletions(-)
 create mode 100644 demos/use_cases/claude_code/README.md

diff --git a/arch/tools/cli/main.py b/arch/tools/cli/main.py
index df4a25cf..ddaacf1d 100644
--- a/arch/tools/cli/main.py
+++ b/arch/tools/cli/main.py
@@ -335,28 +335,21 @@ def logs(debug, follow):
 
 
 @click.command()
-@click.argument("cli_type", type=click.Choice(["claude"]), required=True)
+@click.argument("type", type=click.Choice(["claude"]), required=True)
+@click.argument("file", required=False)  # Optional file argument
 @click.option(
-    "--path",
-    default=None,
-    help="Path to the directory containing arch_config.yaml (defaults to current directory)",
+    "--path", default=".", help="Path to the directory containing arch_config.yaml"
 )
 @click.option(
     "--settings",
     default="{}",
     help="Additional settings as JSON string for the CLI agent.",
 )
-def cli_agent(cli_type, path, settings):
+def cli_agent(type, file, path, settings):
     """Start a CLI agent connected to Arch.
 
-    CLI_TYPE: The type of CLI agent to start (currently only 'claude' is supported)
+    CLI_AGENT: The type of CLI agent to start (currently only 'claude' is supported)
     """
-    # Determine arch_config.yaml path
-    arch_config_file = None
-    if path:
-        arch_config_file = os.path.join(path, "arch_config.yaml")
-    else:
-        arch_config_file = "arch_config.yaml"  # Current directory
 
     # Check if archgw docker container is running
     archgw_status = docker_container_status(ARCHGW_DOCKER_NAME)
@@ -365,6 +358,12 @@ def cli_agent(cli_type, path, settings):
         log.error("Please start archgw using the 'archgw up' command.")
         sys.exit(1)
 
+    # Determine arch_config.yaml path
+    arch_config_file = find_config_file(path, file)
+    if not os.path.exists(arch_config_file):
+        log.error(f"Config file not found: {arch_config_file}")
+        sys.exit(1)
+
     try:
         start_cli_agent(arch_config_file, settings)
     except SystemExit:
diff --git a/demos/use_cases/claude_code/README.md b/demos/use_cases/claude_code/README.md
new file mode 100644
index 00000000..f4b9e038
--- /dev/null
+++ b/demos/use_cases/claude_code/README.md
@@ -0,0 +1,159 @@
+# Claude Code Routing with Intelligence
+
+## Why This Matters
+
+**Claude Code is powerful, but what if you could access the best of ALL AI models through one familiar interface?**
+
+Instead of being locked into a single provider, imagine:
+- Using **DeepSeek's coding expertise** for complex algorithms
+- Leveraging **GPT-4's reasoning** for architecture decisions
+- Tapping **Claude's analysis** for code reviews
+- Accessing **Grok's speed** for quick iterations
+
+**All through the same Claude Code interface you already love.**
+
+## The Problem with Single-Model Development
+
+Most developers are stuck in single-provider silos:
+- 🔒 **Vendor Lock-in**: Tied to one model's strengths and weaknesses
+- 🎯 **Wrong Tool for the Job**: Using a reasoning model for simple tasks (expensive) or a fast model for complex problems (poor results)
+- 🚫 **No Fallbacks**: When your preferred model is down, you're stuck
+- 💸 **Suboptimal Costs**: Paying premium prices for tasks that could use cheaper models
+
+## The Solution: Intelligent Multi-LLM Routing
+
+Arch Gateway transforms Claude Code into a **universal AI development interface** that:
+
+### 🌐 **Connects to Any LLM Provider**
+- **OpenAI**: GPT-4o, o1-preview, GPT-4o-mini
+- **Anthropic**: Claude 3.5 Sonnet, Claude 3 Haiku
+- **DeepSeek**: DeepSeek-V3, DeepSeek-Coder-V2
+- **Grok**: Grok-2, Grok-2-mini
+- **Others**: Gemini, Llama, Mistral, local models via Ollama
+
+### 🧠 **Routes Intelligently Based on Task**
+Our research-backed routing system automatically selects the optimal model by analyzing:
+- **Task complexity** (simple refactoring vs. architectural design)
+- **Content type** (code generation vs. debugging vs. documentation)
+- **Performance preferences** (speed vs. quality vs. cost)
+- **Real-time availability** (automatic failover when models are down)
+
+### 💡 **Learns Your Preferences**
+The system adapts to your coding patterns and preferences over time, ensuring you always get the best model for your specific needs.
+
+## Quick Start
+
+### Prerequisites
+- Claude Code installed: `npm install -g @anthropic-ai/claude-code`
+- Docker running on your system
+
+### 1. Install and Start Arch Gateway
+```bash
+pip install archgw
+archgw up
+```
+
+### 2. Launch Claude Code with Multi-LLM Support
+```bash
+archgw cli-agent claude
+```
+
+That's it! Claude Code now has access to multiple LLM providers with intelligent routing.
+
+## What You'll Experience
+
+### Screenshot Placeholder
+![Claude Code with Multi-LLM Routing](screenshot-placeholder.png)
+*Claude Code interface enhanced with intelligent model routing and multi-provider access*
+
+### Real-Time Model Selection
+When you interact with Claude Code, you'll see:
+- **Automatic model selection** based on your query type
+- **Transparent routing decisions** showing which model was chosen and why
+- **Seamless failover** if a model becomes unavailable
+- **Performance metrics** comparing response times and quality
+
+### Example Interactions
+
+**Code Generation Query:**
+```
+You: "Create a Python function to validate email addresses"
+→ Routed to: DeepSeek-Coder-V2 (optimized for code generation)
+```
+
+**Architecture Discussion:**
+```
+You: "How should I structure a microservices backend?"
+→ Routed to: Claude 3.5 Sonnet (excellent for architectural reasoning)
+```
+
+**Quick Bug Fix:**
+```
+You: "Fix this syntax error in my JavaScript"
+→ Routed to: GPT-4o-mini (fast and cost-effective for simple fixes)
+```
+
+## Configuration
+
+The setup uses the included `config.yaml` file which defines:
+
+### Multi-Provider Access
+```yaml
+llm_providers:
+  - model: openai/gpt-4.1-2025-04-14
+    access_key: $OPENAI_API_KEY
+    routing_preferences:
+    - name: code generation
+        description: generating new code snippets and functions
+  - model: anthropic/claude-3-5-sonnet-20241022
+    access_key: $ANTHROPIC_API_KEY
+    routing_preferences:
+        name: code understanding
+        description: explaining and analyzing existing code
+```
+## Advanced Usage
+
+### Custom Model Selection
+```bash
+# Force a specific model for this session
+archgw cli-agent claude --settings='{"ANTHROPIC_SMALL_FAST_MODEL": "deepseek-coder-v2"}'
+
+# Enable detailed routing information
+archgw cli-agent claude --settings='{"statusLine": {"type": "command", "command": "ccr statusline"}}'
+```
+
+### Environment Variables
+The system automatically configures:
+```bash
+ANTHROPIC_BASE_URL=http://127.0.0.1:12000  # Routes through Arch Gateway
+ANTHROPIC_SMALL_FAST_MODEL=arch.fast.v1    # Uses intelligent alias
+```
+
+## Benefits You'll See Immediately
+
+### 🚀 **Better Performance**
+- Right model for each task = better results
+- Automatic failover = no interruptions
+- Caching = faster repeated queries
+
+### 💰 **Cost Optimization**
+- Use expensive models only when needed
+- Leverage free/cheap models for simple tasks
+- Track usage across all providers
+
+### 🛡️ **Reliability**
+- Multiple providers = no single point of failure
+- Automatic retry logic
+- Graceful degradation when models are unavailable
+
+### 📊 **Insights**
+- See which models work best for your coding style
+- Track performance metrics across providers
+- Optimize your model usage over time
+
+## Real Developer Workflows
+
+This intelligent routing is powered by our research in preference-aligned AI systems:
+- **Research Paper**: [Preference-Aligned LLM Router](https://katanemo.com/research)
+- **Technical Docs**: [docs.katanemo.com](https://docs.katanemo.com)
+- **API Reference**: [docs.katanemo.com/api](https://docs.katanemo.com/api)
diff --git a/demos/use_cases/claude_code/config.yaml b/demos/use_cases/claude_code/config.yaml
index e1e89d1e..dc758796 100644
--- a/demos/use_cases/claude_code/config.yaml
+++ b/demos/use_cases/claude_code/config.yaml
@@ -10,18 +10,16 @@ listeners:
 llm_providers:
 
   # OpenAI Models
-  - model: openai/gpt-5-mini-2025-08-07
-    access_key: $OPENAI_API_KEY
-    default: true
+  - model: openai/gpt-5-2025-08-07
+    routing_preferences:
+      - name: code generation
+        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
 
-  - model: openai/gpt-4o-mini
-    access_key: $OPENAI_API_KEY
-
-  - model: openai/o3
-    access_key: $OPENAI_API_KEY
-
-  - model: openai/gpt-4o
+  - model: openai/gpt-4.1-2025-04-14
     access_key: $OPENAI_API_KEY
+    routing_preferences:
+      - name: code understanding
+        description: understand and explain existing code snippets, functions, or libraries
 
   # Anthropic Models
   - model: anthropic/claude-3-5-sonnet-20241022
@@ -44,15 +42,7 @@ llm_providers:
 model_aliases:
   # Alias for summarization tasks -> fast/cheap model
   arch.summarize.v1:
-    target: gpt-5-mini-2025-08-07
-
-  # Alias for general purpose tasks -> latest model
-  arch.v1:
-    target: o3
-
-  # Alias for reasoning tasks -> capable model
-  arch.reasoning.v1:
-    target: gpt-4o
+    target: gpt-4.1-2025-04-14
 
   # Alias for creative tasks -> Claude model
   arch.creative.v1:
@@ -62,12 +52,5 @@ model_aliases:
   arch.fast.v1:
     target: claude-3-haiku-20240307
 
-  # Semantic aliases
-  summary-model:
-    target: gpt-5-mini-2025-08-07
-
   chat-model:
     target: claude-3-5-sonnet-20241022
-
-  creative-model:
-    target: claude-3-5-sonnet-20241022