mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
add rag agent demo
This commit is contained in:
parent
fb6e7fba6e
commit
2065b13f70
6 changed files with 92 additions and 83 deletions
0
demos/use_cases/rag_agent/.dockerignore
Normal file
0
demos/use_cases/rag_agent/.dockerignore
Normal file
46
demos/use_cases/rag_agent/Dockerfile
Normal file
46
demos/use_cases/rag_agent/Dockerfile
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
# syntax=docker/dockerfile:1
|
||||
|
||||
# Base Python image (aligns with requires-python >=3.10)
|
||||
FROM python:3.12-slim
|
||||
|
||||
# Install runtime deps and curl for installing uv
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
bash \
|
||||
curl \
|
||||
ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Astral's uv (Python package manager/runner used by start_agents.sh)
|
||||
# Keep both common install paths in PATH so `uv` is available in the container
|
||||
ENV PATH="/root/.local/bin:/root/.cargo/bin:${PATH}"
|
||||
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
|
||||
# Workdir for app
|
||||
WORKDIR /app
|
||||
|
||||
# Copy project metadata and source first to leverage caching for dependency install
|
||||
COPY pyproject.toml README.md /app/
|
||||
COPY src/ /app/src/
|
||||
COPY start_agents.sh /app/
|
||||
COPY uv.lock /app/
|
||||
|
||||
# Pre-create a project venv and install deps into it for faster startup
|
||||
# Using `uv sync` ensures the .venv contains all dependencies declared in pyproject.toml
|
||||
RUN uv venv \
|
||||
&& uv sync --frozen \
|
||||
&& chmod +x /app/start_agents.sh
|
||||
|
||||
# Make venv the default Python for subsequent commands (optional; uv run will pick .venv automatically)
|
||||
ENV VIRTUAL_ENV=/app/.venv
|
||||
ENV PATH="/app/.venv/bin:${PATH}"
|
||||
|
||||
# Expose the three agent ports
|
||||
EXPOSE 10500 10501 10502
|
||||
|
||||
# Default environment (can be overridden at `docker run` time)
|
||||
# Example: point to an external archgw or a compose service name
|
||||
# ENV LLM_GATEWAY_ENDPOINT="http://archgw:12000/v1"
|
||||
|
||||
# Run the multi-agent launcher script
|
||||
ENTRYPOINT ["/app/start_agents.sh"]
|
||||
|
|
@ -10,9 +10,9 @@ A FastAPI service that rewrites user queries using archgw and gpt-4o-mini for be
|
|||
|
||||
## Setup and Running
|
||||
|
||||
1. **Start archgw**:
|
||||
1. **Start archgw with open-web and jaeger ui**:
|
||||
```bash
|
||||
archgw up --foreground
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
2. **Start the query parser service**:
|
||||
|
|
|
|||
|
|
@ -7,10 +7,6 @@ agents:
|
|||
url: http://host.docker.internal:10501/v1/chat/completions
|
||||
- id: rag_agent
|
||||
url: http://host.docker.internal:10502/v1/chat/completions
|
||||
- id: research_agent
|
||||
url: http://host.docker.internal:10503/v1/chat/completions
|
||||
- id: weather_forecast_agent
|
||||
url: http://host.docker.internal:10504/process
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
|
|
@ -18,8 +14,6 @@ model_providers:
|
|||
default: true
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
- model: ollama/llama3.1
|
||||
base_url: http://host.docker.internal:11434
|
||||
|
||||
model_aliases:
|
||||
fast-llm:
|
||||
|
|
|
|||
|
|
@ -15,3 +15,27 @@ services:
|
|||
- DEFAULT_MODEL=gpt-4o-mini
|
||||
- ENABLE_OPENAI_API=true
|
||||
- OPENAI_API_BASE_URL=http://host.docker.internal:8001/v1
|
||||
rag_agents:
|
||||
build:
|
||||
context: .
|
||||
container_name: rag_agents
|
||||
ports:
|
||||
- "10500:10500"
|
||||
- "10501:10501"
|
||||
- "10502:10502"
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
- LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1
|
||||
archgw:
|
||||
image: katanemo/archgw:0.3.16
|
||||
container_name: archgw
|
||||
ports:
|
||||
- "8001:8001"
|
||||
- "12000:12000"
|
||||
volumes:
|
||||
- ./arch_config.yaml:/app/arch_config.yaml:ro
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY?Please set the OPENAI_API_KEY environment variable}
|
||||
|
|
|
|||
|
|
@ -1,54 +1,15 @@
|
|||
@baseUrl = http://0.0.0.0:10502
|
||||
|
||||
# Health Check
|
||||
GET {{baseUrl}}/health
|
||||
|
||||
###
|
||||
|
||||
# Test 1: Simple Non-Streaming Chat Completion
|
||||
POST {{baseUrl}}/v1/chat/completions
|
||||
### send request to query rewriter agent
|
||||
POST http://localhost:10500/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello! Can you help me understand what machine learning is?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
###
|
||||
|
||||
# Test 2: Simple Streaming Chat Completion
|
||||
POST {{baseUrl}}/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Explain the concept of artificial intelligence in simple terms."
|
||||
}
|
||||
],
|
||||
"stream": true
|
||||
}
|
||||
|
||||
### Test 3
|
||||
POST http://localhost:8001/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "gpt-4o",
|
||||
"model": "gpt-4o-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
]
|
||||
}
|
||||
|
||||
### send request to context builder agent
|
||||
|
|
@ -65,7 +26,22 @@ Content-Type: application/json
|
|||
]
|
||||
}
|
||||
|
||||
### Test with debug mode and reasoning blocks (streaming)
|
||||
### Test directly with RAG agent
|
||||
POST http://localhost:8001/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
x-debug-mode: true
|
||||
|
||||
{
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
### Test directly with RAG agent with streaming
|
||||
POST http://localhost:8001/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
x-debug-mode: true
|
||||
|
|
@ -80,34 +56,3 @@ x-debug-mode: true
|
|||
],
|
||||
"stream": true
|
||||
}
|
||||
|
||||
### Test debug mode without streaming (should work normally)
|
||||
POST {{baseUrl}}/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
X-Debug-Mode: true
|
||||
|
||||
{
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
}
|
||||
|
||||
### Test debug mode without streaming (should work normally)
|
||||
POST http://localhost:8001/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
|
||||
}
|
||||
],
|
||||
"stream": true
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue