add rag agent demo

This commit is contained in:
Adil Hafeez 2025-10-22 16:47:20 -07:00
parent fb6e7fba6e
commit 2065b13f70
No known key found for this signature in database
GPG key ID: 9B18EF7691369645
6 changed files with 92 additions and 83 deletions

View file

View file

@ -0,0 +1,46 @@
# syntax=docker/dockerfile:1
# Base Python image (aligns with requires-python >=3.10)
FROM python:3.12-slim
# Install runtime deps and curl for installing uv
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
bash \
curl \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# Install Astral's uv (Python package manager/runner used by start_agents.sh)
# Keep both common install paths in PATH so `uv` is available in the container
ENV PATH="/root/.local/bin:/root/.cargo/bin:${PATH}"
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
# Workdir for app
WORKDIR /app
# Copy project metadata and source first to leverage caching for dependency install
COPY pyproject.toml README.md /app/
COPY src/ /app/src/
COPY start_agents.sh /app/
COPY uv.lock /app/
# Pre-create a project venv and install deps into it for faster startup
# Using `uv sync` ensures the .venv contains all dependencies declared in pyproject.toml
RUN uv venv \
&& uv sync --frozen \
&& chmod +x /app/start_agents.sh
# Make venv the default Python for subsequent commands (optional; uv run will pick .venv automatically)
ENV VIRTUAL_ENV=/app/.venv
ENV PATH="/app/.venv/bin:${PATH}"
# Expose the three agent ports
EXPOSE 10500 10501 10502
# Default environment (can be overridden at `docker run` time)
# Example: point to an external archgw or a compose service name
# ENV LLM_GATEWAY_ENDPOINT="http://archgw:12000/v1"
# Run the multi-agent launcher script
ENTRYPOINT ["/app/start_agents.sh"]

View file

@ -10,9 +10,9 @@ A FastAPI service that rewrites user queries using archgw and gpt-4o-mini for be
## Setup and Running
1. **Start archgw**:
1. **Start archgw with open-web and jaeger ui**:
```bash
archgw up --foreground
docker compose up -d
```
2. **Start the query parser service**:

View file

@ -7,10 +7,6 @@ agents:
url: http://host.docker.internal:10501/v1/chat/completions
- id: rag_agent
url: http://host.docker.internal:10502/v1/chat/completions
- id: research_agent
url: http://host.docker.internal:10503/v1/chat/completions
- id: weather_forecast_agent
url: http://host.docker.internal:10504/process
model_providers:
- model: openai/gpt-4o-mini
@ -18,8 +14,6 @@ model_providers:
default: true
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
- model: ollama/llama3.1
base_url: http://host.docker.internal:11434
model_aliases:
fast-llm:

View file

@ -15,3 +15,27 @@ services:
- DEFAULT_MODEL=gpt-4o-mini
- ENABLE_OPENAI_API=true
- OPENAI_API_BASE_URL=http://host.docker.internal:8001/v1
rag_agents:
build:
context: .
container_name: rag_agents
ports:
- "10500:10500"
- "10501:10501"
- "10502:10502"
env_file:
- .env
environment:
- LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
archgw:
image: katanemo/archgw:0.3.16
container_name: archgw
ports:
- "8001:8001"
- "12000:12000"
volumes:
- ./arch_config.yaml:/app/arch_config.yaml:ro
env_file:
- .env
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY?Please set the OPENAI_API_KEY environment variable}

View file

@ -1,54 +1,15 @@
@baseUrl = http://0.0.0.0:10502
# Health Check
GET {{baseUrl}}/health
###
# Test 1: Simple Non-Streaming Chat Completion
POST {{baseUrl}}/v1/chat/completions
### send request to query rewriter agent
POST http://localhost:10500/v1/chat/completions
Content-Type: application/json
{
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "Hello! Can you help me understand what machine learning is?"
}
]
}
###
# Test 2: Simple Streaming Chat Completion
POST {{baseUrl}}/v1/chat/completions
Content-Type: application/json
{
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "Explain the concept of artificial intelligence in simple terms."
}
],
"stream": true
}
### Test 3
POST http://localhost:8001/v1/chat/completions
Content-Type: application/json
{
"model": "gpt-4o",
"model": "gpt-4o-mini",
"messages": [
{
"role": "user",
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
}
],
"stream": false
]
}
### send request to context builder agent
@ -65,7 +26,22 @@ Content-Type: application/json
]
}
### Test with debug mode and reasoning blocks (streaming)
### Test directly with RAG agent
POST http://localhost:8001/v1/chat/completions
Content-Type: application/json
x-debug-mode: true
{
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
}
]
}
### Test directly with RAG agent with streaming
POST http://localhost:8001/v1/chat/completions
Content-Type: application/json
x-debug-mode: true
@ -80,34 +56,3 @@ x-debug-mode: true
],
"stream": true
}
### Test debug mode without streaming (should work normally)
POST {{baseUrl}}/v1/chat/completions
Content-Type: application/json
X-Debug-Mode: true
{
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
}
],
"stream": false
}
### Test debug mode without streaming (should work normally)
POST http://localhost:8001/v1/chat/completions
Content-Type: application/json
{
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
}
],
"stream": true
}