add rag agent demo

2026-06-17 15:25:17 +02:00 · 2025-10-22 16:47:20 -07:00 · 2025-10-22 16:47:20 -07:00 · 2065b13f70
commit 2065b13f70
parent fb6e7fba6e
6 changed files with 92 additions and 83 deletions
--- a/demos/use_cases/rag_agent/.dockerignore
+++ b/demos/use_cases/rag_agent/.dockerignore
--- a/demos/use_cases/rag_agent/Dockerfile
+++ b/demos/use_cases/rag_agent/Dockerfile
@ -0,0 +1,46 @@
+# syntax=docker/dockerfile:1
+
+# Base Python image (aligns with requires-python >=3.10)
+FROM python:3.12-slim
+
+# Install runtime deps and curl for installing uv
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+       bash \
+       curl \
+       ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Astral's uv (Python package manager/runner used by start_agents.sh)
+# Keep both common install paths in PATH so `uv` is available in the container
+ENV PATH="/root/.local/bin:/root/.cargo/bin:${PATH}"
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+
+# Workdir for app
+WORKDIR /app
+
+# Copy project metadata and source first to leverage caching for dependency install
+COPY pyproject.toml README.md /app/
+COPY src/ /app/src/
+COPY start_agents.sh /app/
+COPY uv.lock /app/
+
+# Pre-create a project venv and install deps into it for faster startup
+# Using `uv sync` ensures the .venv contains all dependencies declared in pyproject.toml
+RUN uv venv \
+    && uv sync --frozen \
+    && chmod +x /app/start_agents.sh
+
+# Make venv the default Python for subsequent commands (optional; uv run will pick .venv automatically)
+ENV VIRTUAL_ENV=/app/.venv
+ENV PATH="/app/.venv/bin:${PATH}"
+
+# Expose the three agent ports
+EXPOSE 10500 10501 10502
+
+# Default environment (can be overridden at `docker run` time)
+# Example: point to an external archgw or a compose service name
+# ENV LLM_GATEWAY_ENDPOINT="http://archgw:12000/v1"
+
+# Run the multi-agent launcher script
+ENTRYPOINT ["/app/start_agents.sh"]
--- a/demos/use_cases/rag_agent/README.md
+++ b/demos/use_cases/rag_agent/README.md
@ -10,9 +10,9 @@ A FastAPI service that rewrites user queries using archgw and gpt-4o-mini for be

 ## Setup and Running

-1. **Start archgw**:
+1. **Start archgw with open-web and jaeger ui**:
   ```bash
-   archgw up --foreground
+   docker compose up -d
   ```

 2. **Start the query parser service**:
--- a/demos/use_cases/rag_agent/arch_config.yaml
+++ b/demos/use_cases/rag_agent/arch_config.yaml
@ -7,10 +7,6 @@ agents:
    url: http://host.docker.internal:10501/v1/chat/completions
  - id: rag_agent
    url: http://host.docker.internal:10502/v1/chat/completions
-  - id: research_agent
-    url: http://host.docker.internal:10503/v1/chat/completions
-  - id: weather_forecast_agent
-    url: http://host.docker.internal:10504/process

 model_providers:
  - model: openai/gpt-4o-mini
@ -18,8 +14,6 @@ model_providers:
    default: true
  - model: openai/gpt-4o
    access_key: $OPENAI_API_KEY
-  - model: ollama/llama3.1
-    base_url: http://host.docker.internal:11434

 model_aliases:
  fast-llm:
--- a/demos/use_cases/rag_agent/docker-compose.yaml
+++ b/demos/use_cases/rag_agent/docker-compose.yaml
@ -15,3 +15,27 @@ services:
      - DEFAULT_MODEL=gpt-4o-mini
      - ENABLE_OPENAI_API=true
      - OPENAI_API_BASE_URL=http://host.docker.internal:8001/v1
+  rag_agents:
+    build:
+      context: .
+    container_name: rag_agents
+    ports:
+      - "10500:10500"
+      - "10501:10501"
+      - "10502:10502"
+    env_file:
+      - .env
+    environment:
+      - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
+  archgw:
+    image: katanemo/archgw:0.3.16
+    container_name: archgw
+    ports:
+      - "8001:8001"
+      - "12000:12000"
+    volumes:
+      - ./arch_config.yaml:/app/arch_config.yaml:ro
+    env_file:
+      - .env
+    environment:
+      - OPENAI_API_KEY=${OPENAI_API_KEY?Please set the OPENAI_API_KEY environment variable}
--- a/demos/use_cases/rag_agent/test.rest
+++ b/demos/use_cases/rag_agent/test.rest
@ -1,54 +1,15 @@
-@baseUrl = http://0.0.0.0:10502
-
-# Health Check
-GET {{baseUrl}}/health
-
-###
-
-# Test 1: Simple Non-Streaming Chat Completion
-POST {{baseUrl}}/v1/chat/completions
+### send request to query rewriter agent
+POST http://localhost:10500/v1/chat/completions
 Content-Type: application/json

 {
-  "model": "gpt-4o",
-  "messages": [
-    {
-      "role": "user",
-      "content": "Hello! Can you help me understand what machine learning is?"
-    }
-  ]
-}
-
-###
-
-# Test 2: Simple Streaming Chat Completion
-POST {{baseUrl}}/v1/chat/completions
-Content-Type: application/json
-
-{
-  "model": "gpt-4o",
-  "messages": [
-    {
-      "role": "user",
-      "content": "Explain the concept of artificial intelligence in simple terms."
-    }
-  ],
-  "stream": true
-}
-
-### Test 3
-POST http://localhost:8001/v1/chat/completions
-Content-Type: application/json
-
-{
-  "model": "gpt-4o",
+  "model": "gpt-4o-mini",
  "messages": [
    {
      "role": "user",
      "content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
    }
-  ],
-  "stream": false
+  ]
 }

 ### send request to context builder agent
@ -65,7 +26,22 @@ Content-Type: application/json
  ]
 }

-### Test with debug mode and reasoning blocks (streaming)
+### Test directly with RAG agent
+POST http://localhost:8001/v1/chat/completions
+Content-Type: application/json
+x-debug-mode: true
+
+{
+  "model": "gpt-4o",
+  "messages": [
+    {
+      "role": "user",
+      "content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
+    }
+  ]
+}
+
+### Test directly with RAG agent with streaming
 POST http://localhost:8001/v1/chat/completions
 Content-Type: application/json
 x-debug-mode: true
@ -80,34 +56,3 @@ x-debug-mode: true
  ],
  "stream": true
 }
-
-### Test debug mode without streaming (should work normally)
-POST {{baseUrl}}/v1/chat/completions
-Content-Type: application/json
-X-Debug-Mode: true
-
-{
-  "model": "gpt-4o",
-  "messages": [
-    {
-      "role": "user",
-      "content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
-    }
-  ],
-  "stream": false
-}
-
-### Test debug mode without streaming (should work normally)
-POST http://localhost:8001/v1/chat/completions
-Content-Type: application/json
-
-{
-  "model": "gpt-4o",
-  "messages": [
-    {
-      "role": "user",
-      "content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
-    }
-  ],
-  "stream": true
-}