From 2065b13f70860025d32854ce33a395b84e2a56b5 Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil.hafeez@gmail.com>
Date: Wed, 22 Oct 2025 16:47:20 -0700
Subject: [PATCH] add rag agent demo

---
 demos/use_cases/rag_agent/.dockerignore       |  0
 demos/use_cases/rag_agent/Dockerfile          | 46 +++++++++
 demos/use_cases/rag_agent/README.md           |  4 +-
 demos/use_cases/rag_agent/arch_config.yaml    |  6 --
 demos/use_cases/rag_agent/docker-compose.yaml | 24 +++++
 demos/use_cases/rag_agent/test.rest           | 95 ++++---------------
 6 files changed, 92 insertions(+), 83 deletions(-)
 create mode 100644 demos/use_cases/rag_agent/.dockerignore
 create mode 100644 demos/use_cases/rag_agent/Dockerfile

diff --git a/demos/use_cases/rag_agent/.dockerignore b/demos/use_cases/rag_agent/.dockerignore
new file mode 100644
index 00000000..e69de29b
diff --git a/demos/use_cases/rag_agent/Dockerfile b/demos/use_cases/rag_agent/Dockerfile
new file mode 100644
index 00000000..4d71fccf
--- /dev/null
+++ b/demos/use_cases/rag_agent/Dockerfile
@@ -0,0 +1,46 @@
+# syntax=docker/dockerfile:1
+
+# Base Python image (aligns with requires-python >=3.10)
+FROM python:3.12-slim
+
+# Install runtime deps and curl for installing uv
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+       bash \
+       curl \
+       ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Astral's uv (Python package manager/runner used by start_agents.sh)
+# Keep both common install paths in PATH so `uv` is available in the container
+ENV PATH="/root/.local/bin:/root/.cargo/bin:${PATH}"
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+
+# Workdir for app
+WORKDIR /app
+
+# Copy project metadata and source first to leverage caching for dependency install
+COPY pyproject.toml README.md /app/
+COPY src/ /app/src/
+COPY start_agents.sh /app/
+COPY uv.lock /app/
+
+# Pre-create a project venv and install deps into it for faster startup
+# Using `uv sync` ensures the .venv contains all dependencies declared in pyproject.toml
+RUN uv venv \
+    && uv sync --frozen \
+    && chmod +x /app/start_agents.sh
+
+# Make venv the default Python for subsequent commands (optional; uv run will pick .venv automatically)
+ENV VIRTUAL_ENV=/app/.venv
+ENV PATH="/app/.venv/bin:${PATH}"
+
+# Expose the three agent ports
+EXPOSE 10500 10501 10502
+
+# Default environment (can be overridden at `docker run` time)
+# Example: point to an external archgw or a compose service name
+# ENV LLM_GATEWAY_ENDPOINT="http://archgw:12000/v1"
+
+# Run the multi-agent launcher script
+ENTRYPOINT ["/app/start_agents.sh"]
diff --git a/demos/use_cases/rag_agent/README.md b/demos/use_cases/rag_agent/README.md
index 66102f6f..41989927 100644
--- a/demos/use_cases/rag_agent/README.md
+++ b/demos/use_cases/rag_agent/README.md
@@ -10,9 +10,9 @@ A FastAPI service that rewrites user queries using archgw and gpt-4o-mini for be
 
 ## Setup and Running
 
-1. **Start archgw**:
+1. **Start archgw with open-web and jaeger ui**:
    ```bash
-   archgw up --foreground
+   docker compose up -d
    ```
 
 2. **Start the query parser service**:
diff --git a/demos/use_cases/rag_agent/arch_config.yaml b/demos/use_cases/rag_agent/arch_config.yaml
index c884ad6b..b28b5de5 100644
--- a/demos/use_cases/rag_agent/arch_config.yaml
+++ b/demos/use_cases/rag_agent/arch_config.yaml
@@ -7,10 +7,6 @@ agents:
     url: http://host.docker.internal:10501/v1/chat/completions
   - id: rag_agent
     url: http://host.docker.internal:10502/v1/chat/completions
-  - id: research_agent
-    url: http://host.docker.internal:10503/v1/chat/completions
-  - id: weather_forecast_agent
-    url: http://host.docker.internal:10504/process
 
 model_providers:
   - model: openai/gpt-4o-mini
@@ -18,8 +14,6 @@ model_providers:
     default: true
   - model: openai/gpt-4o
     access_key: $OPENAI_API_KEY
-  - model: ollama/llama3.1
-    base_url: http://host.docker.internal:11434
 
 model_aliases:
   fast-llm:
diff --git a/demos/use_cases/rag_agent/docker-compose.yaml b/demos/use_cases/rag_agent/docker-compose.yaml
index a5d45ed9..d4c4a6d8 100644
--- a/demos/use_cases/rag_agent/docker-compose.yaml
+++ b/demos/use_cases/rag_agent/docker-compose.yaml
@@ -15,3 +15,27 @@ services:
       - DEFAULT_MODEL=gpt-4o-mini
       - ENABLE_OPENAI_API=true
       - OPENAI_API_BASE_URL=http://host.docker.internal:8001/v1
+  rag_agents:
+    build:
+      context: .
+    container_name: rag_agents
+    ports:
+      - "10500:10500"
+      - "10501:10501"
+      - "10502:10502"
+    env_file:
+      - .env
+    environment:
+      - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
+  archgw:
+    image: katanemo/archgw:0.3.16
+    container_name: archgw
+    ports:
+      - "8001:8001"
+      - "12000:12000"
+    volumes:
+      - ./arch_config.yaml:/app/arch_config.yaml:ro
+    env_file:
+      - .env
+    environment:
+      - OPENAI_API_KEY=${OPENAI_API_KEY?Please set the OPENAI_API_KEY environment variable}
diff --git a/demos/use_cases/rag_agent/test.rest b/demos/use_cases/rag_agent/test.rest
index f3d68c54..098a31c2 100644
--- a/demos/use_cases/rag_agent/test.rest
+++ b/demos/use_cases/rag_agent/test.rest
@@ -1,54 +1,15 @@
-@baseUrl = http://0.0.0.0:10502
-
-# Health Check
-GET {{baseUrl}}/health
-
-###
-
-# Test 1: Simple Non-Streaming Chat Completion
-POST {{baseUrl}}/v1/chat/completions
+### send request to query rewriter agent
+POST http://localhost:10500/v1/chat/completions
 Content-Type: application/json
 
 {
-  "model": "gpt-4o",
-  "messages": [
-    {
-      "role": "user",
-      "content": "Hello! Can you help me understand what machine learning is?"
-    }
-  ]
-}
-
-###
-
-# Test 2: Simple Streaming Chat Completion
-POST {{baseUrl}}/v1/chat/completions
-Content-Type: application/json
-
-{
-  "model": "gpt-4o",
-  "messages": [
-    {
-      "role": "user",
-      "content": "Explain the concept of artificial intelligence in simple terms."
-    }
-  ],
-  "stream": true
-}
-
-### Test 3
-POST http://localhost:8001/v1/chat/completions
-Content-Type: application/json
-
-{
-  "model": "gpt-4o",
+  "model": "gpt-4o-mini",
   "messages": [
     {
       "role": "user",
       "content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
     }
-  ],
-  "stream": false
+  ]
 }
 
 ### send request to context builder agent
@@ -65,7 +26,22 @@ Content-Type: application/json
   ]
 }
 
-### Test with debug mode and reasoning blocks (streaming)
+### Test directly with RAG agent
+POST http://localhost:8001/v1/chat/completions
+Content-Type: application/json
+x-debug-mode: true
+
+{
+  "model": "gpt-4o",
+  "messages": [
+    {
+      "role": "user",
+      "content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
+    }
+  ]
+}
+
+### Test directly with RAG agent with streaming
 POST http://localhost:8001/v1/chat/completions
 Content-Type: application/json
 x-debug-mode: true
@@ -80,34 +56,3 @@ x-debug-mode: true
   ],
   "stream": true
 }
-
-### Test debug mode without streaming (should work normally)
-POST {{baseUrl}}/v1/chat/completions
-Content-Type: application/json
-X-Debug-Mode: true
-
-{
-  "model": "gpt-4o",
-  "messages": [
-    {
-      "role": "user",
-      "content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
-    }
-  ],
-  "stream": false
-}
-
-### Test debug mode without streaming (should work normally)
-POST http://localhost:8001/v1/chat/completions
-Content-Type: application/json
-
-{
-  "model": "gpt-4o",
-  "messages": [
-    {
-      "role": "user",
-      "content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
-    }
-  ],
-  "stream": true
-}