From 2065b13f70860025d32854ce33a395b84e2a56b5 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Wed, 22 Oct 2025 16:47:20 -0700 Subject: [PATCH] add rag agent demo --- demos/use_cases/rag_agent/.dockerignore | 0 demos/use_cases/rag_agent/Dockerfile | 46 +++++++++ demos/use_cases/rag_agent/README.md | 4 +- demos/use_cases/rag_agent/arch_config.yaml | 6 -- demos/use_cases/rag_agent/docker-compose.yaml | 24 +++++ demos/use_cases/rag_agent/test.rest | 95 ++++--------------- 6 files changed, 92 insertions(+), 83 deletions(-) create mode 100644 demos/use_cases/rag_agent/.dockerignore create mode 100644 demos/use_cases/rag_agent/Dockerfile diff --git a/demos/use_cases/rag_agent/.dockerignore b/demos/use_cases/rag_agent/.dockerignore new file mode 100644 index 00000000..e69de29b diff --git a/demos/use_cases/rag_agent/Dockerfile b/demos/use_cases/rag_agent/Dockerfile new file mode 100644 index 00000000..4d71fccf --- /dev/null +++ b/demos/use_cases/rag_agent/Dockerfile @@ -0,0 +1,46 @@ +# syntax=docker/dockerfile:1 + +# Base Python image (aligns with requires-python >=3.10) +FROM python:3.12-slim + +# Install runtime deps and curl for installing uv +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + bash \ + curl \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Install Astral's uv (Python package manager/runner used by start_agents.sh) +# Keep both common install paths in PATH so `uv` is available in the container +ENV PATH="/root/.local/bin:/root/.cargo/bin:${PATH}" +RUN curl -LsSf https://astral.sh/uv/install.sh | sh + +# Workdir for app +WORKDIR /app + +# Copy project metadata and source first to leverage caching for dependency install +COPY pyproject.toml README.md /app/ +COPY src/ /app/src/ +COPY start_agents.sh /app/ +COPY uv.lock /app/ + +# Pre-create a project venv and install deps into it for faster startup +# Using `uv sync` ensures the .venv contains all dependencies declared in pyproject.toml +RUN uv venv \ + && uv sync --frozen \ + && chmod +x /app/start_agents.sh + +# Make venv the default Python for subsequent commands (optional; uv run will pick .venv automatically) +ENV VIRTUAL_ENV=/app/.venv +ENV PATH="/app/.venv/bin:${PATH}" + +# Expose the three agent ports +EXPOSE 10500 10501 10502 + +# Default environment (can be overridden at `docker run` time) +# Example: point to an external archgw or a compose service name +# ENV LLM_GATEWAY_ENDPOINT="http://archgw:12000/v1" + +# Run the multi-agent launcher script +ENTRYPOINT ["/app/start_agents.sh"] diff --git a/demos/use_cases/rag_agent/README.md b/demos/use_cases/rag_agent/README.md index 66102f6f..41989927 100644 --- a/demos/use_cases/rag_agent/README.md +++ b/demos/use_cases/rag_agent/README.md @@ -10,9 +10,9 @@ A FastAPI service that rewrites user queries using archgw and gpt-4o-mini for be ## Setup and Running -1. **Start archgw**: +1. **Start archgw with open-web and jaeger ui**: ```bash - archgw up --foreground + docker compose up -d ``` 2. **Start the query parser service**: diff --git a/demos/use_cases/rag_agent/arch_config.yaml b/demos/use_cases/rag_agent/arch_config.yaml index c884ad6b..b28b5de5 100644 --- a/demos/use_cases/rag_agent/arch_config.yaml +++ b/demos/use_cases/rag_agent/arch_config.yaml @@ -7,10 +7,6 @@ agents: url: http://host.docker.internal:10501/v1/chat/completions - id: rag_agent url: http://host.docker.internal:10502/v1/chat/completions - - id: research_agent - url: http://host.docker.internal:10503/v1/chat/completions - - id: weather_forecast_agent - url: http://host.docker.internal:10504/process model_providers: - model: openai/gpt-4o-mini @@ -18,8 +14,6 @@ model_providers: default: true - model: openai/gpt-4o access_key: $OPENAI_API_KEY - - model: ollama/llama3.1 - base_url: http://host.docker.internal:11434 model_aliases: fast-llm: diff --git a/demos/use_cases/rag_agent/docker-compose.yaml b/demos/use_cases/rag_agent/docker-compose.yaml index a5d45ed9..d4c4a6d8 100644 --- a/demos/use_cases/rag_agent/docker-compose.yaml +++ b/demos/use_cases/rag_agent/docker-compose.yaml @@ -15,3 +15,27 @@ services: - DEFAULT_MODEL=gpt-4o-mini - ENABLE_OPENAI_API=true - OPENAI_API_BASE_URL=http://host.docker.internal:8001/v1 + rag_agents: + build: + context: . + container_name: rag_agents + ports: + - "10500:10500" + - "10501:10501" + - "10502:10502" + env_file: + - .env + environment: + - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1 + archgw: + image: katanemo/archgw:0.3.16 + container_name: archgw + ports: + - "8001:8001" + - "12000:12000" + volumes: + - ./arch_config.yaml:/app/arch_config.yaml:ro + env_file: + - .env + environment: + - OPENAI_API_KEY=${OPENAI_API_KEY?Please set the OPENAI_API_KEY environment variable} diff --git a/demos/use_cases/rag_agent/test.rest b/demos/use_cases/rag_agent/test.rest index f3d68c54..098a31c2 100644 --- a/demos/use_cases/rag_agent/test.rest +++ b/demos/use_cases/rag_agent/test.rest @@ -1,54 +1,15 @@ -@baseUrl = http://0.0.0.0:10502 - -# Health Check -GET {{baseUrl}}/health - -### - -# Test 1: Simple Non-Streaming Chat Completion -POST {{baseUrl}}/v1/chat/completions +### send request to query rewriter agent +POST http://localhost:10500/v1/chat/completions Content-Type: application/json { - "model": "gpt-4o", - "messages": [ - { - "role": "user", - "content": "Hello! Can you help me understand what machine learning is?" - } - ] -} - -### - -# Test 2: Simple Streaming Chat Completion -POST {{baseUrl}}/v1/chat/completions -Content-Type: application/json - -{ - "model": "gpt-4o", - "messages": [ - { - "role": "user", - "content": "Explain the concept of artificial intelligence in simple terms." - } - ], - "stream": true -} - -### Test 3 -POST http://localhost:8001/v1/chat/completions -Content-Type: application/json - -{ - "model": "gpt-4o", + "model": "gpt-4o-mini", "messages": [ { "role": "user", "content": "What is the guaranteed uptime percentage for TechCorp's cloud services?" } - ], - "stream": false + ] } ### send request to context builder agent @@ -65,7 +26,22 @@ Content-Type: application/json ] } -### Test with debug mode and reasoning blocks (streaming) +### Test directly with RAG agent +POST http://localhost:8001/v1/chat/completions +Content-Type: application/json +x-debug-mode: true + +{ + "model": "gpt-4o", + "messages": [ + { + "role": "user", + "content": "What is the guaranteed uptime percentage for TechCorp's cloud services?" + } + ] +} + +### Test directly with RAG agent with streaming POST http://localhost:8001/v1/chat/completions Content-Type: application/json x-debug-mode: true @@ -80,34 +56,3 @@ x-debug-mode: true ], "stream": true } - -### Test debug mode without streaming (should work normally) -POST {{baseUrl}}/v1/chat/completions -Content-Type: application/json -X-Debug-Mode: true - -{ - "model": "gpt-4o", - "messages": [ - { - "role": "user", - "content": "What is the guaranteed uptime percentage for TechCorp's cloud services?" - } - ], - "stream": false -} - -### Test debug mode without streaming (should work normally) -POST http://localhost:8001/v1/chat/completions -Content-Type: application/json - -{ - "model": "gpt-4o", - "messages": [ - { - "role": "user", - "content": "What is the guaranteed uptime percentage for TechCorp's cloud services?" - } - ], - "stream": true -}