diff --git a/demos/advanced/currency_exchange/run_demo.sh b/demos/advanced/currency_exchange/run_demo.sh index 6623dee5..e430a1cd 100644 --- a/demos/advanced/currency_exchange/run_demo.sh +++ b/demos/advanced/currency_exchange/run_demo.sh @@ -18,22 +18,24 @@ start_demo() { echo ".env file created with OPENAI_API_KEY." fi - # Step 3: Start Plano + # Step 3: Optionally start UI services (AnythingLLM, Jaeger) + # Jaeger must start before Plano so it can bind the OTEL port (4317) + if [ "$1" == "--with-ui" ]; then + echo "Starting UI services (AnythingLLM, Jaeger)..." + docker compose up -d + fi + + # Step 4: Start Plano echo "Starting Plano with config.yaml..." planoai up config.yaml - - # Step 4: Start developer services - echo "Starting Network Agent using Docker Compose..." - docker compose up -d # Run in detached mode } # Function to stop the demo stop_demo() { - # Step 1: Stop Docker Compose services - echo "Stopping Network Agent using Docker Compose..." - docker compose down + # Stop Docker Compose services if running + docker compose down 2>/dev/null || true - # Step 2: Stop Plano + # Stop Plano echo "Stopping Plano..." planoai down } @@ -42,6 +44,5 @@ stop_demo() { if [ "$1" == "down" ]; then stop_demo else - # Default action is to bring the demo up - start_demo + start_demo "$1" fi diff --git a/demos/advanced/multi_turn_rag/docker-compose.yaml b/demos/advanced/multi_turn_rag/docker-compose.yaml index 1c3ed73c..f36987e4 100644 --- a/demos/advanced/multi_turn_rag/docker-compose.yaml +++ b/demos/advanced/multi_turn_rag/docker-compose.yaml @@ -1,15 +1,4 @@ services: - rag_energy_source_agent: - build: - context: . - dockerfile: Dockerfile - ports: - - "18083:80" - healthcheck: - test: ["CMD", "curl" ,"http://localhost:80/healthz"] - interval: 5s - retries: 20 - anythingllm: image: mintplexlabs/anythingllm restart: always diff --git a/demos/advanced/multi_turn_rag/pyproject.toml b/demos/advanced/multi_turn_rag/pyproject.toml new file mode 100644 index 00000000..05824bd6 --- /dev/null +++ b/demos/advanced/multi_turn_rag/pyproject.toml @@ -0,0 +1,12 @@ +[project] +name = "multi-turn-rag" +version = "0.1.0" +requires-python = ">=3.12" +dependencies = [ + "fastapi", + "uvicorn", + "pydantic>=2.8", + "httpx>=0.27", + "openai>=1.51", + "python-dotenv>=1.0", +] diff --git a/demos/advanced/multi_turn_rag/run_demo.sh b/demos/advanced/multi_turn_rag/run_demo.sh index f9434aa2..5bec6368 100644 --- a/demos/advanced/multi_turn_rag/run_demo.sh +++ b/demos/advanced/multi_turn_rag/run_demo.sh @@ -18,22 +18,32 @@ start_demo() { echo ".env file created with OPENAI_API_KEY." fi - # Step 3: Start Plano + # Step 3: Optionally start UI services (AnythingLLM) + # UI services must start before Plano to avoid OTEL port conflicts + if [ "$1" == "--with-ui" ]; then + echo "Starting UI services (AnythingLLM)..." + docker compose up -d + fi + + # Step 4: Start Plano echo "Starting Plano with config.yaml..." planoai up config.yaml - # Step 4: Start Network Agent - echo "Starting HR Agent using Docker Compose..." - docker compose up -d # Run in detached mode + # Step 5: Start agents natively + echo "Starting agents..." + bash start_agents.sh & } # Function to stop the demo stop_demo() { - # Step 1: Stop Docker Compose services - echo "Stopping HR Agent using Docker Compose..." - docker compose down -v + # Stop agents + echo "Stopping agents..." + pkill -f start_agents.sh 2>/dev/null || true - # Step 2: Stop Plano + # Stop Docker Compose services if running + docker compose down 2>/dev/null || true + + # Stop Plano echo "Stopping Plano..." planoai down } @@ -42,6 +52,5 @@ stop_demo() { if [ "$1" == "down" ]; then stop_demo else - # Default action is to bring the demo up - start_demo + start_demo "$1" fi diff --git a/demos/advanced/multi_turn_rag/start_agents.sh b/demos/advanced/multi_turn_rag/start_agents.sh new file mode 100755 index 00000000..00b7f1b1 --- /dev/null +++ b/demos/advanced/multi_turn_rag/start_agents.sh @@ -0,0 +1,24 @@ +#!/bin/bash +set -e + +PIDS=() + +log() { echo "$(date '+%F %T') - $*"; } + +cleanup() { + log "Stopping agents..." + for PID in "${PIDS[@]}"; do + kill $PID 2>/dev/null && log "Stopped process $PID" + done + exit 0 +} + +trap cleanup EXIT INT TERM + +log "Starting rag_energy_source_agent on port 18083..." +uv run uvicorn main:app --host 0.0.0.0 --port 18083 & +PIDS+=($!) + +for PID in "${PIDS[@]}"; do + wait "$PID" +done diff --git a/demos/advanced/stock_quote/run_demo.sh b/demos/advanced/stock_quote/run_demo.sh index 6623dee5..e430a1cd 100644 --- a/demos/advanced/stock_quote/run_demo.sh +++ b/demos/advanced/stock_quote/run_demo.sh @@ -18,22 +18,24 @@ start_demo() { echo ".env file created with OPENAI_API_KEY." fi - # Step 3: Start Plano + # Step 3: Optionally start UI services (AnythingLLM, Jaeger) + # Jaeger must start before Plano so it can bind the OTEL port (4317) + if [ "$1" == "--with-ui" ]; then + echo "Starting UI services (AnythingLLM, Jaeger)..." + docker compose up -d + fi + + # Step 4: Start Plano echo "Starting Plano with config.yaml..." planoai up config.yaml - - # Step 4: Start developer services - echo "Starting Network Agent using Docker Compose..." - docker compose up -d # Run in detached mode } # Function to stop the demo stop_demo() { - # Step 1: Stop Docker Compose services - echo "Stopping Network Agent using Docker Compose..." - docker compose down + # Stop Docker Compose services if running + docker compose down 2>/dev/null || true - # Step 2: Stop Plano + # Stop Plano echo "Stopping Plano..." planoai down } @@ -42,6 +44,5 @@ stop_demo() { if [ "$1" == "down" ]; then stop_demo else - # Default action is to bring the demo up - start_demo + start_demo "$1" fi diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/README.md b/demos/agent_orchestration/multi_agent_crewai_langchain/README.md index e2fe23fb..97d71e7f 100644 --- a/demos/agent_orchestration/multi_agent_crewai_langchain/README.md +++ b/demos/agent_orchestration/multi_agent_crewai_langchain/README.md @@ -41,21 +41,36 @@ cd demos/agent_orchestration/multi_agent_crewai_langchain ./run_demo.sh ``` -This starts Plano natively and brings up via Docker Compose: +This starts Plano natively and runs agents as local processes: - **CrewAI Flight Agent** (port 10520) - flight search - **LangChain Weather Agent** (port 10510) - weather forecasts -- **AnythingLLM** (port 3001) - chat interface -- **Jaeger** (port 16686) - distributed tracing Plano runs natively on the host (ports 12000, 8001). +To also start AnythingLLM (chat UI), Jaeger (tracing), and other optional services: + +```bash +./run_demo.sh --with-ui +``` + +This additionally starts: +- **AnythingLLM** (port 3001) - chat interface +- **Jaeger** (port 16686) - distributed tracing + ### Try It Out -1. **Open the Chat Interface** +1. **Using curl** + ```bash + curl -X POST http://localhost:8001/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{"model": "gpt-4o", "messages": [{"role": "user", "content": "What is the weather in San Francisco?"}]}' + ``` + +2. **Using AnythingLLM (requires `--with-ui`)** - Navigate to [http://localhost:3001](http://localhost:3001) - Create an account (stored locally) -2. **Ask Multi-Agent Questions** +3. **Ask Multi-Agent Questions** ``` "What's the weather in San Francisco and can you find flights from Seattle to San Francisco?" ``` @@ -65,7 +80,7 @@ Plano runs natively on the host (ports 12000, 8001). - Routes the flight part to the CrewAI agent - Combines responses seamlessly -3. **View Distributed Traces** +4. **View Distributed Traces (requires `--with-ui`)** - Open [http://localhost:16686](http://localhost:16686) (Jaeger UI) - See how requests flow through both agents diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/config.yaml b/demos/agent_orchestration/multi_agent_crewai_langchain/config.yaml index b3a204f3..ef522337 100644 --- a/demos/agent_orchestration/multi_agent_crewai_langchain/config.yaml +++ b/demos/agent_orchestration/multi_agent_crewai_langchain/config.yaml @@ -2,9 +2,9 @@ version: v0.3.0 agents: - id: weather_agent - url: http://langchain-weather-agent:10510 + url: http://localhost:10510 - id: flight_agent - url: http://crewai-flight-agent:10520 + url: http://localhost:10520 model_providers: - model: openai/gpt-4o diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml b/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml index 2d9c180b..74954562 100644 --- a/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml +++ b/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml @@ -1,27 +1,5 @@ services: - crewai-flight-agent: - build: - dockerfile: Dockerfile - restart: always - ports: - - "10520:10520" - environment: - - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1 - - AEROAPI_KEY=${AEROAPI_KEY:?AEROAPI_KEY environment variable is required but not set} - - PYTHONUNBUFFERED=1 - command: ["python", "-u", "crewai/flight_agent.py"] - - langchain-weather-agent: - build: - dockerfile: Dockerfile - restart: always - ports: - - "10510:10510" - environment: - - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1 - command: ["python", "-u", "langchain/weather_agent.py"] - anythingllm: image: mintplexlabs/anythingllm restart: always @@ -36,6 +14,8 @@ services: - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000 - GENERIC_OPEN_AI_API_KEY=sk-placeholder + extra_hosts: + - "host.docker.internal:host-gateway" jaeger: build: @@ -44,3 +24,4 @@ services: ports: - "16686:16686" # Jaeger UI - "4317:4317" # OTLP gRPC receiver + - "4318:4318" # OTLP HTTP receiver diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh b/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh index b7dc0fad..35bbbbdd 100755 --- a/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh +++ b/demos/agent_orchestration/multi_agent_crewai_langchain/run_demo.sh @@ -12,33 +12,38 @@ start_demo() { echo "Error: OPENAI_API_KEY environment variable is not set for the demo." exit 1 fi - if [ -z "$AEROAPI_KEY" ]; then - echo "Error: AEROAPI_KEY environment variable is not set for the demo." - exit 1 - fi echo "Creating .env file..." echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env - echo "AEROAPI_KEY=$AEROAPI_KEY" >> .env echo ".env file created with API keys." fi - # Step 3: Start Plano + # Step 3: Optionally start UI services (AnythingLLM, Jaeger) + # Jaeger must start before Plano so it can bind the OTEL port (4317) + if [ "$1" == "--with-ui" ]; then + echo "Starting UI services (AnythingLLM, Jaeger)..." + docker compose up -d + fi + + # Step 4: Start Plano echo "Starting Plano with config.yaml..." planoai up config.yaml - # Step 4: Start agents and services - echo "Starting agents using Docker Compose..." - docker compose up -d + # Step 5: Start agents natively + echo "Starting agents..." + bash start_agents.sh & } # Function to stop the demo stop_demo() { - # Step 1: Stop Docker Compose services - echo "Stopping Docker Compose services..." - docker compose down + # Stop agents + echo "Stopping agents..." + pkill -f start_agents.sh 2>/dev/null || true - # Step 2: Stop Plano + # Stop Docker Compose services if running + docker compose down 2>/dev/null || true + + # Stop Plano echo "Stopping Plano..." planoai down } @@ -47,5 +52,5 @@ stop_demo() { if [ "$1" == "down" ]; then stop_demo else - start_demo + start_demo "$1" fi diff --git a/demos/agent_orchestration/multi_agent_crewai_langchain/start_agents.sh b/demos/agent_orchestration/multi_agent_crewai_langchain/start_agents.sh new file mode 100755 index 00000000..78d2fecb --- /dev/null +++ b/demos/agent_orchestration/multi_agent_crewai_langchain/start_agents.sh @@ -0,0 +1,30 @@ +#!/bin/bash +set -e + +PIDS=() + +log() { echo "$(date '+%F %T') - $*"; } + +cleanup() { + log "Stopping agents..." + for PID in "${PIDS[@]}"; do + kill $PID 2>/dev/null && log "Stopped process $PID" + done + exit 0 +} + +trap cleanup EXIT INT TERM + +export LLM_GATEWAY_ENDPOINT=http://localhost:12000/v1 + +log "Starting langchain weather_agent on port 10510..." +uv run python langchain/weather_agent.py & +PIDS+=($!) + +log "Starting crewai flight_agent on port 10520..." +uv run python crewai/flight_agent.py & +PIDS+=($!) + +for PID in "${PIDS[@]}"; do + wait "$PID" +done diff --git a/demos/agent_orchestration/travel_agents/README.md b/demos/agent_orchestration/travel_agents/README.md index d6468612..7886539d 100644 --- a/demos/agent_orchestration/travel_agents/README.md +++ b/demos/agent_orchestration/travel_agents/README.md @@ -23,9 +23,10 @@ All agents use Plano's agent orchestration LLM to intelligently route user reque ## Prerequisites - [Plano CLI](https://docs.planoai.dev/get_started/quickstart.html#prerequisites) installed (`pip install planoai`) -- Docker and Docker Compose (for agent services) +- [uv](https://docs.astral.sh/uv/) installed (for running agents natively) - [OpenAI API key](https://platform.openai.com/api-keys) - [FlightAware AeroAPI key](https://www.flightaware.com/aeroapi/portal) +- Docker and Docker Compose (optional, only needed for `--with-ui`) > **Note:** You'll need to obtain a FlightAware AeroAPI key for live flight data. Visit [https://www.flightaware.com/aeroapi/portal](https://www.flightaware.com/aeroapi/portal) to get your API key. @@ -46,16 +47,34 @@ export OPENAI_API_KEY="your OpenAI api key" ./run_demo.sh ``` -This starts Plano natively and brings up via Docker Compose: +This starts Plano natively and runs agents as local processes: - Weather Agent on port 10510 - Flight Agent on port 10520 -- Open WebUI on port 8080 Plano runs natively on the host (port 8001). +To also start Open WebUI, Jaeger tracing, and other optional services, pass `--with-ui`: + +```bash +./run_demo.sh --with-ui +``` + +This additionally starts: +- Open WebUI on port 8080 +- Jaeger tracing UI on port 16686 + ### 4. Test the System -Use Open WebUI at http://localhost:8080 +**Option A: Using curl** +```bash +curl -X POST http://localhost:8001/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{"model": "gpt-5.2", "messages": [{"role": "user", "content": "What is the weather in Istanbul?"}]}' +``` + +**Option B: Using Open WebUI (requires `--with-ui`)** + +Navigate to http://localhost:8080 > **Note:** The Open WebUI may take a few minutes to start up and be fully ready. Please wait for the container to finish initializing before accessing the interface. Once ready, make sure to select the **gpt-5.2** model from the model dropdown menu in the UI. @@ -102,7 +121,7 @@ Each agent: 3. Generates response using GPT-5.2 4. Streams response back to user -Both agents run as Docker containers and communicate with Plano running natively on the host. +Both agents run as native local processes and communicate with Plano running natively on the host. ## Observability diff --git a/demos/agent_orchestration/travel_agents/docker-compose.yaml b/demos/agent_orchestration/travel_agents/docker-compose.yaml index f0fb78e5..66edbdc3 100644 --- a/demos/agent_orchestration/travel_agents/docker-compose.yaml +++ b/demos/agent_orchestration/travel_agents/docker-compose.yaml @@ -1,32 +1,5 @@ services: - weather-agent: - build: - context: . - dockerfile: Dockerfile - container_name: weather-agent - restart: always - ports: - - "10510:10510" - environment: - - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1 - command: ["uv", "run", "python", "src/travel_agents/weather_agent.py"] - extra_hosts: - - "host.docker.internal:host-gateway" - flight-agent: - build: - context: . - dockerfile: Dockerfile - container_name: flight-agent - restart: always - ports: - - "10520:10520" - environment: - - LLM_GATEWAY_ENDPOINT=http://host.docker.internal:12000/v1 - - AEROAPI_KEY=${AEROAPI_KEY:? AEROAPI_KEY environment variable is required but not set} - command: ["uv", "run", "python", "src/travel_agents/flight_agent.py"] - extra_hosts: - - "host.docker.internal:host-gateway" open-web-ui: image: dyrnq/open-webui:main restart: always @@ -40,9 +13,8 @@ services: - ENABLE_TITLE_GENERATION=false - ENABLE_TAGS_GENERATION=false - ENABLE_AUTOCOMPLETE_GENERATION=false - depends_on: - - weather-agent - - flight-agent + extra_hosts: + - "host.docker.internal:host-gateway" jaeger: build: context: ../../shared/jaeger diff --git a/demos/agent_orchestration/travel_agents/run_demo.sh b/demos/agent_orchestration/travel_agents/run_demo.sh index b7dc0fad..643a0aa2 100755 --- a/demos/agent_orchestration/travel_agents/run_demo.sh +++ b/demos/agent_orchestration/travel_agents/run_demo.sh @@ -23,22 +23,32 @@ start_demo() { echo ".env file created with API keys." fi - # Step 3: Start Plano + # Step 3: Optionally start UI services (Open WebUI, Jaeger) + # Jaeger must start before Plano so it can bind the OTEL port (4317) + if [ "$1" == "--with-ui" ]; then + echo "Starting UI services (Open WebUI, Jaeger)..." + docker compose up -d + fi + + # Step 4: Start Plano echo "Starting Plano with config.yaml..." planoai up config.yaml - # Step 4: Start agents and services - echo "Starting agents using Docker Compose..." - docker compose up -d + # Step 5: Start agents natively + echo "Starting agents..." + bash start_agents.sh & } # Function to stop the demo stop_demo() { - # Step 1: Stop Docker Compose services - echo "Stopping Docker Compose services..." - docker compose down + # Stop agents + echo "Stopping agents..." + pkill -f start_agents.sh 2>/dev/null || true - # Step 2: Stop Plano + # Stop Docker Compose services if running + docker compose down 2>/dev/null || true + + # Stop Plano echo "Stopping Plano..." planoai down } @@ -47,5 +57,5 @@ stop_demo() { if [ "$1" == "down" ]; then stop_demo else - start_demo + start_demo "$1" fi diff --git a/demos/agent_orchestration/travel_agents/start_agents.sh b/demos/agent_orchestration/travel_agents/start_agents.sh new file mode 100755 index 00000000..4f2e32a7 --- /dev/null +++ b/demos/agent_orchestration/travel_agents/start_agents.sh @@ -0,0 +1,30 @@ +#!/bin/bash +set -e + +PIDS=() + +log() { echo "$(date '+%F %T') - $*"; } + +cleanup() { + log "Stopping agents..." + for PID in "${PIDS[@]}"; do + kill $PID 2>/dev/null && log "Stopped process $PID" + done + exit 0 +} + +trap cleanup EXIT INT TERM + +export LLM_GATEWAY_ENDPOINT=http://localhost:12000/v1 + +log "Starting weather_agent on port 10510..." +uv run python src/travel_agents/weather_agent.py & +PIDS+=($!) + +log "Starting flight_agent on port 10520..." +uv run python src/travel_agents/flight_agent.py & +PIDS+=($!) + +for PID in "${PIDS[@]}"; do + wait "$PID" +done diff --git a/demos/filter_chains/http_filter/README.md b/demos/filter_chains/http_filter/README.md index 5e675113..86748217 100644 --- a/demos/filter_chains/http_filter/README.md +++ b/demos/filter_chains/http_filter/README.md @@ -41,23 +41,27 @@ export OPENAI_API_KEY="your-key" ./run_demo.sh ``` -This starts Plano natively and brings up via Docker Compose: -- Input Guards MCP server on port 10500 -- Query Rewriter MCP server on port 10501 -- Context Builder MCP server on port 10502 +This starts Plano natively and runs agents as local processes: +- Input Guards HTTP server on port 10500 +- Query Rewriter HTTP server on port 10501 +- Context Builder HTTP server on port 10502 - RAG Agent REST server on port 10505 -- Jaeger UI for viewing traces at http://localhost:16686 -- AnythingLLM at http://localhost:3001 for interactive queries Plano runs natively on the host (port 8001 and 12000). +To also start AnythingLLM (chat UI) and Jaeger (tracing): + +```bash +./run_demo.sh --with-ui +``` + +This additionally starts: +- Jaeger UI for viewing traces at http://localhost:16686 +- AnythingLLM at http://localhost:3001 for interactive queries + ### 2. Test the system -**Option A: Using AnythingLLM (recommended)** - -Navigate to http://localhost:3001 and send queries through the chat interface. - -**Option B: Using curl** +**Option A: Using curl (recommended)** ```bash curl -X POST http://localhost:8001/v1/chat/completions \ -H "Content-Type: application/json" \ diff --git a/demos/filter_chains/http_filter/config.yaml b/demos/filter_chains/http_filter/config.yaml index 117931e2..014a141a 100644 --- a/demos/filter_chains/http_filter/config.yaml +++ b/demos/filter_chains/http_filter/config.yaml @@ -2,23 +2,23 @@ version: v0.3.0 agents: - id: rag_agent - url: http://rag-agents:10505 + url: http://localhost:10505 filters: - id: input_guards - url: http://rag-agents:10500 + url: http://localhost:10500 type: http # type: mcp (default) # transport: streamable-http (default) # tool: input_guards (default - same as filter id) - id: query_rewriter - url: http://rag-agents:10501 + url: http://localhost:10501 type: http # type: mcp (default) # transport: streamable-http (default) # tool: query_rewriter (default - same as filter id) - id: context_builder - url: http://rag-agents:10502 + url: http://localhost:10502 type: http model_providers: diff --git a/demos/filter_chains/http_filter/docker-compose.yaml b/demos/filter_chains/http_filter/docker-compose.yaml index 64962bce..0361926c 100644 --- a/demos/filter_chains/http_filter/docker-compose.yaml +++ b/demos/filter_chains/http_filter/docker-compose.yaml @@ -1,16 +1,4 @@ services: - rag-agents: - build: - context: . - dockerfile: Dockerfile - ports: - - "10500:10500" - - "10501:10501" - - "10502:10502" - - "10505:10505" - environment: - - LLM_GATEWAY_ENDPOINT=${LLM_GATEWAY_ENDPOINT:-http://host.docker.internal:12000/v1} - - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set} jaeger: build: context: ../../shared/jaeger @@ -32,3 +20,5 @@ services: - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000 - GENERIC_OPEN_AI_API_KEY=sk-placeholder + extra_hosts: + - "host.docker.internal:host-gateway" diff --git a/demos/filter_chains/http_filter/run_demo.sh b/demos/filter_chains/http_filter/run_demo.sh index bed84f16..f203f5b1 100755 --- a/demos/filter_chains/http_filter/run_demo.sh +++ b/demos/filter_chains/http_filter/run_demo.sh @@ -18,22 +18,32 @@ start_demo() { echo ".env file created with OPENAI_API_KEY." fi - # Step 3: Start Plano + # Step 3: Optionally start UI services (AnythingLLM, Jaeger) + # Jaeger must start before Plano so it can bind the OTEL port (4317) + if [ "$1" == "--with-ui" ]; then + echo "Starting UI services (AnythingLLM, Jaeger)..." + docker compose up -d + fi + + # Step 4: Start Plano echo "Starting Plano with config.yaml..." planoai up config.yaml - # Step 4: Start services - echo "Starting services using Docker Compose..." - docker compose up -d + # Step 5: Start agents natively + echo "Starting agents..." + bash start_agents.sh & } # Function to stop the demo stop_demo() { - # Step 1: Stop Docker Compose services - echo "Stopping Docker Compose services..." - docker compose down + # Stop agents + echo "Stopping agents..." + pkill -f start_agents.sh 2>/dev/null || true - # Step 2: Stop Plano + # Stop Docker Compose services if running + docker compose down 2>/dev/null || true + + # Stop Plano echo "Stopping Plano..." planoai down } @@ -42,5 +52,5 @@ stop_demo() { if [ "$1" == "down" ]; then stop_demo else - start_demo + start_demo "$1" fi diff --git a/demos/filter_chains/http_filter/start_agents.sh b/demos/filter_chains/http_filter/start_agents.sh old mode 100644 new mode 100755 index 06cabeec..8dfdc0f4 --- a/demos/filter_chains/http_filter/start_agents.sh +++ b/demos/filter_chains/http_filter/start_agents.sh @@ -1,78 +1,38 @@ -# #!/bin/bash -# set -e - -# WAIT_FOR_PIDS=() - -# log() { -# timestamp=$(python3 -c 'from datetime import datetime; print(datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:23])') -# message="$*" -# echo "$timestamp - $message" -# } - -# cleanup() { -# log "Caught signal, terminating all user processes ..." -# for PID in "${WAIT_FOR_PIDS[@]}"; do -# if kill $PID 2> /dev/null; then -# log "killed process: $PID" -# fi -# done -# exit 1 -# } - -# trap cleanup EXIT - -# log "Starting input_guards agent on port 10500/mcp..." -# uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10500 --agent input_guards & -# WAIT_FOR_PIDS+=($!) - -# log "Starting query_rewriter agent on port 10501/mcp..." -# uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10501 --agent query_rewriter & -# WAIT_FOR_PIDS+=($!) - -# log "Starting context_builder agent on port 10502/mcp..." -# uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10502 --agent context_builder & -# WAIT_FOR_PIDS+=($!) - -# # log "Starting response_generator agent on port 10400..." -# # uv run python -m rag_agent --host 0.0.0.0 --port 10400 --agent response_generator & -# # WAIT_FOR_PIDS+=($!) - -# log "Starting response_generator agent on port 10505..." -# uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10505 --agent response_generator & -# WAIT_FOR_PIDS+=($!) - -# for PID in "${WAIT_FOR_PIDS[@]}"; do -# wait "$PID" -# done - - - - #!/bin/bash set -e -export PYTHONPATH=/app/src - -pids=() +PIDS=() log() { echo "$(date '+%F %T') - $*"; } -log "Starting input_guards HTTP server on :10500" +cleanup() { + log "Stopping agents..." + for PID in "${PIDS[@]}"; do + kill $PID 2>/dev/null && log "Stopped process $PID" + done + exit 0 +} + +trap cleanup EXIT INT TERM + +export PYTHONPATH=./src + +log "Starting input_guards HTTP server on port 10500..." uv run uvicorn rag_agent.input_guards:app --host 0.0.0.0 --port 10500 & -pids+=($!) +PIDS+=($!) -log "Starting query_rewriter HTTP server on :10501" +log "Starting query_rewriter HTTP server on port 10501..." uv run uvicorn rag_agent.query_rewriter:app --host 0.0.0.0 --port 10501 & -pids+=($!) +PIDS+=($!) -log "Starting context_builder HTTP server on :10502" +log "Starting context_builder HTTP server on port 10502..." uv run uvicorn rag_agent.context_builder:app --host 0.0.0.0 --port 10502 & -pids+=($!) +PIDS+=($!) -log "Starting response_generator (OpenAI-compatible) on :10505" +log "Starting response_generator (OpenAI-compatible) on port 10505..." uv run uvicorn rag_agent.rag_agent:app --host 0.0.0.0 --port 10505 & -pids+=($!) +PIDS+=($!) -for PID in "${pids[@]}"; do +for PID in "${PIDS[@]}"; do wait "$PID" done diff --git a/demos/filter_chains/mcp_filter/README.md b/demos/filter_chains/mcp_filter/README.md index 5e675113..798015e2 100644 --- a/demos/filter_chains/mcp_filter/README.md +++ b/demos/filter_chains/mcp_filter/README.md @@ -41,23 +41,27 @@ export OPENAI_API_KEY="your-key" ./run_demo.sh ``` -This starts Plano natively and brings up via Docker Compose: +This starts Plano natively and runs agents as local processes: - Input Guards MCP server on port 10500 - Query Rewriter MCP server on port 10501 - Context Builder MCP server on port 10502 - RAG Agent REST server on port 10505 -- Jaeger UI for viewing traces at http://localhost:16686 -- AnythingLLM at http://localhost:3001 for interactive queries Plano runs natively on the host (port 8001 and 12000). +To also start AnythingLLM (chat UI) and Jaeger (tracing): + +```bash +./run_demo.sh --with-ui +``` + +This additionally starts: +- Jaeger UI for viewing traces at http://localhost:16686 +- AnythingLLM at http://localhost:3001 for interactive queries + ### 2. Test the system -**Option A: Using AnythingLLM (recommended)** - -Navigate to http://localhost:3001 and send queries through the chat interface. - -**Option B: Using curl** +**Option A: Using curl (recommended)** ```bash curl -X POST http://localhost:8001/v1/chat/completions \ -H "Content-Type: application/json" \ diff --git a/demos/filter_chains/mcp_filter/docker-compose.yaml b/demos/filter_chains/mcp_filter/docker-compose.yaml index 64962bce..0361926c 100644 --- a/demos/filter_chains/mcp_filter/docker-compose.yaml +++ b/demos/filter_chains/mcp_filter/docker-compose.yaml @@ -1,16 +1,4 @@ services: - rag-agents: - build: - context: . - dockerfile: Dockerfile - ports: - - "10500:10500" - - "10501:10501" - - "10502:10502" - - "10505:10505" - environment: - - LLM_GATEWAY_ENDPOINT=${LLM_GATEWAY_ENDPOINT:-http://host.docker.internal:12000/v1} - - OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set} jaeger: build: context: ../../shared/jaeger @@ -32,3 +20,5 @@ services: - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000 - GENERIC_OPEN_AI_API_KEY=sk-placeholder + extra_hosts: + - "host.docker.internal:host-gateway" diff --git a/demos/filter_chains/mcp_filter/run_demo.sh b/demos/filter_chains/mcp_filter/run_demo.sh index bed84f16..f203f5b1 100755 --- a/demos/filter_chains/mcp_filter/run_demo.sh +++ b/demos/filter_chains/mcp_filter/run_demo.sh @@ -18,22 +18,32 @@ start_demo() { echo ".env file created with OPENAI_API_KEY." fi - # Step 3: Start Plano + # Step 3: Optionally start UI services (AnythingLLM, Jaeger) + # Jaeger must start before Plano so it can bind the OTEL port (4317) + if [ "$1" == "--with-ui" ]; then + echo "Starting UI services (AnythingLLM, Jaeger)..." + docker compose up -d + fi + + # Step 4: Start Plano echo "Starting Plano with config.yaml..." planoai up config.yaml - # Step 4: Start services - echo "Starting services using Docker Compose..." - docker compose up -d + # Step 5: Start agents natively + echo "Starting agents..." + bash start_agents.sh & } # Function to stop the demo stop_demo() { - # Step 1: Stop Docker Compose services - echo "Stopping Docker Compose services..." - docker compose down + # Stop agents + echo "Stopping agents..." + pkill -f start_agents.sh 2>/dev/null || true - # Step 2: Stop Plano + # Stop Docker Compose services if running + docker compose down 2>/dev/null || true + + # Stop Plano echo "Stopping Plano..." planoai down } @@ -42,5 +52,5 @@ stop_demo() { if [ "$1" == "down" ]; then stop_demo else - start_demo + start_demo "$1" fi diff --git a/demos/getting_started/llm_gateway/README.md b/demos/getting_started/llm_gateway/README.md index e87467fc..b29397b6 100644 --- a/demos/getting_started/llm_gateway/README.md +++ b/demos/getting_started/llm_gateway/README.md @@ -7,7 +7,14 @@ This demo shows how you can use Plano gateway to manage keys and route to upstre ```sh sh run_demo.sh ``` -1. Navigate to http://localhost:3001/ +1. Test with curl (see example below) + +To also start the AnythingLLM chat UI and Jaeger tracing, pass `--with-ui`: +```sh +sh run_demo.sh --with-ui +``` + +Then navigate to http://localhost:3001/ for AnythingLLM. Following screen shows an example of interaction with Plano gateway showing dynamic routing. You can select between different LLMs using "override model" option in the chat UI. @@ -47,7 +54,7 @@ $ curl --header 'Content-Type: application/json' \ ``` # Observability -For tracing you can head over to http://localhost:16686/ to view recent traces. +For tracing, start with `--with-ui` and head over to http://localhost:16686/ to view recent traces. Following is a screenshot of tracing UI showing call received by Plano gateway and making upstream call to LLM, diff --git a/demos/getting_started/llm_gateway/run_demo.sh b/demos/getting_started/llm_gateway/run_demo.sh index b049bf31..e430a1cd 100644 --- a/demos/getting_started/llm_gateway/run_demo.sh +++ b/demos/getting_started/llm_gateway/run_demo.sh @@ -18,22 +18,24 @@ start_demo() { echo ".env file created with OPENAI_API_KEY." fi - # Step 3: Start Plano + # Step 3: Optionally start UI services (AnythingLLM, Jaeger) + # Jaeger must start before Plano so it can bind the OTEL port (4317) + if [ "$1" == "--with-ui" ]; then + echo "Starting UI services (AnythingLLM, Jaeger)..." + docker compose up -d + fi + + # Step 4: Start Plano echo "Starting Plano with config.yaml..." planoai up config.yaml - - # Step 4: Start LLM Routing - echo "Starting LLM Routing using Docker Compose..." - docker compose up -d # Run in detached mode } # Function to stop the demo stop_demo() { - # Step 1: Stop Docker Compose services - echo "Stopping LLM Routing using Docker Compose..." - docker compose down + # Stop Docker Compose services if running + docker compose down 2>/dev/null || true - # Step 2: Stop Plano + # Stop Plano echo "Stopping Plano..." planoai down } @@ -42,6 +44,5 @@ stop_demo() { if [ "$1" == "down" ]; then stop_demo else - # Default action is to bring the demo up - start_demo + start_demo "$1" fi diff --git a/demos/getting_started/weather_forecast/README.md b/demos/getting_started/weather_forecast/README.md index 8a9eb6c0..91fa810f 100644 --- a/demos/getting_started/weather_forecast/README.md +++ b/demos/getting_started/weather_forecast/README.md @@ -10,15 +10,26 @@ This demo shows how you can use Plano's core function calling capabilities. 3. ```sh sh run_demo.sh ``` -4. Navigate to http://localhost:3001/ -5. You can type in queries like "how is the weather?" +4. Test with curl: + ```sh + curl http://localhost:10000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{"model": "gpt-4o", "messages": [{"role": "user", "content": "how is the weather in San Francisco?"}]}' + ``` Here is a sample interaction, image -## Tracing +## Using the Chat UI and Tracing (optional) -To see a tracing dashboard, navigate to http://localhost:16686/ to open Jaeger UI. +To start AnythingLLM (chat UI) and other optional services, pass `--with-ui`: + +```sh +sh run_demo.sh --with-ui +``` + +- Navigate to http://localhost:3001/ for AnythingLLM +- Navigate to http://localhost:16686/ for Jaeger tracing UI ### Stopping Demo diff --git a/demos/getting_started/weather_forecast/docker-compose.yaml b/demos/getting_started/weather_forecast/docker-compose.yaml index 84074ab9..f36987e4 100644 --- a/demos/getting_started/weather_forecast/docker-compose.yaml +++ b/demos/getting_started/weather_forecast/docker-compose.yaml @@ -1,14 +1,4 @@ services: - weather_forecast_service: - build: - context: ./ - environment: - - OLTP_HOST=http://jaeger:4317 - extra_hosts: - - "host.docker.internal:host-gateway" - ports: - - "18083:80" - anythingllm: image: mintplexlabs/anythingllm restart: always diff --git a/demos/getting_started/weather_forecast/run_demo.sh b/demos/getting_started/weather_forecast/run_demo.sh index c8eb96e5..c77f2d83 100644 --- a/demos/getting_started/weather_forecast/run_demo.sh +++ b/demos/getting_started/weather_forecast/run_demo.sh @@ -72,23 +72,32 @@ start_demo() { exit 1 fi - # Step 4: Start Plano + # Step 4: Optionally start UI services (AnythingLLM, Jaeger, etc.) + # Jaeger must start before Plano so it can bind the OTEL port (4317) + if [ "$1" == "--with-ui" ] || [ "$2" == "--with-ui" ]; then + echo "Starting UI services with $COMPOSE_FILE..." + docker compose -f "$COMPOSE_FILE" up -d + fi + + # Step 5: Start Plano echo "Starting Plano with config.yaml..." planoai up config.yaml - # Step 5: Start Network Agent with the chosen Docker Compose file - echo "Starting Network Agent with $COMPOSE_FILE..." - docker compose -f "$COMPOSE_FILE" up -d # Run in detached mode + # Step 6: Start agents natively + echo "Starting agents..." + bash start_agents.sh & } # Function to stop the demo stop_demo() { - echo "Stopping all Docker Compose services..." + # Stop agents + echo "Stopping agents..." + pkill -f start_agents.sh 2>/dev/null || true - # Stop all services by iterating through all configurations + # Stop all Docker Compose services if running + echo "Stopping Docker Compose services..." for compose_file in ./docker-compose*.yaml; do - echo "Stopping services in $compose_file..." - docker compose -f "$compose_file" down + docker compose -f "$compose_file" down 2>/dev/null || true done # Stop Plano @@ -101,6 +110,6 @@ if [ "$1" == "down" ]; then # Call stop_demo with the second argument as the demo to stop stop_demo else - # Use the argument (jaeger, logfire, signoz) to determine the compose file - start_demo "$1" + # Use the argument (jaeger, logfire, signoz, --with-ui) to determine the compose file + start_demo "$1" "$2" fi diff --git a/demos/getting_started/weather_forecast/start_agents.sh b/demos/getting_started/weather_forecast/start_agents.sh new file mode 100755 index 00000000..548f2bf7 --- /dev/null +++ b/demos/getting_started/weather_forecast/start_agents.sh @@ -0,0 +1,24 @@ +#!/bin/bash +set -e + +PIDS=() + +log() { echo "$(date '+%F %T') - $*"; } + +cleanup() { + log "Stopping agents..." + for PID in "${PIDS[@]}"; do + kill $PID 2>/dev/null && log "Stopped process $PID" + done + exit 0 +} + +trap cleanup EXIT INT TERM + +log "Starting weather_forecast_service on port 18083..." +uv run uvicorn main:app --host 0.0.0.0 --port 18083 & +PIDS+=($!) + +for PID in "${PIDS[@]}"; do + wait "$PID" +done diff --git a/demos/integrations/ollama/run_demo.sh b/demos/integrations/ollama/run_demo.sh index 6623dee5..5bbf183b 100644 --- a/demos/integrations/ollama/run_demo.sh +++ b/demos/integrations/ollama/run_demo.sh @@ -7,33 +7,58 @@ start_demo() { if [ -f ".env" ]; then echo ".env file already exists. Skipping creation." else - # Step 2: Create `.env` file and set OpenAI key + # Step 2: Create `.env` file and set API keys if [ -z "$OPENAI_API_KEY" ]; then echo "Error: OPENAI_API_KEY environment variable is not set for the demo." exit 1 fi + if [ -z "$ANTHROPIC_API_KEY" ]; then + echo "Warning: ANTHROPIC_API_KEY environment variable is not set. Anthropic features may not work." + fi echo "Creating .env file..." echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env - echo ".env file created with OPENAI_API_KEY." + if [ -n "$ANTHROPIC_API_KEY" ]; then + echo "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" >> .env + fi + echo ".env file created with API keys." fi - # Step 3: Start Plano - echo "Starting Plano with config.yaml..." - planoai up config.yaml + # Step 3: Optionally start UI services (AnythingLLM, Jaeger) + # Jaeger must start before Plano so it can bind the OTEL port (4317) + if [ "$1" == "--with-ui" ]; then + echo "Starting UI services (AnythingLLM, Jaeger)..." + docker compose up -d + fi - # Step 4: Start developer services - echo "Starting Network Agent using Docker Compose..." - docker compose up -d # Run in detached mode + # Step 4: Start Plano + echo "Starting Plano with arch_config_with_aliases.yaml..." + planoai up arch_config_with_aliases.yaml + + echo "" + echo "Plano started successfully." + echo "Please run the following CURL command to test model alias routing. Additional instructions are in the README.md file." + echo "" + echo "curl -sS -X POST \"http://localhost:12000/v1/chat/completions\" \ + -H \"Authorization: Bearer test-key\" \ + -H \"Content-Type: application/json\" \ + -d '{ + \"model\": \"arch.summarize.v1\", + \"max_tokens\": 50, + \"messages\": [ + { \"role\": \"user\", + \"content\": \"Hello, please respond with exactly: Hello from alias arch.summarize.v1!\" + } + ] + }' | jq ." } # Function to stop the demo stop_demo() { - # Step 1: Stop Docker Compose services - echo "Stopping Network Agent using Docker Compose..." - docker compose down + # Stop Docker Compose services if running + docker compose down 2>/dev/null || true - # Step 2: Stop Plano + # Stop Plano echo "Stopping Plano..." planoai down } @@ -42,6 +67,5 @@ stop_demo() { if [ "$1" == "down" ]; then stop_demo else - # Default action is to bring the demo up - start_demo + start_demo "$1" fi diff --git a/demos/integrations/spotify_bearer_auth/run_demo.sh b/demos/integrations/spotify_bearer_auth/run_demo.sh index 6623dee5..e430a1cd 100644 --- a/demos/integrations/spotify_bearer_auth/run_demo.sh +++ b/demos/integrations/spotify_bearer_auth/run_demo.sh @@ -18,22 +18,24 @@ start_demo() { echo ".env file created with OPENAI_API_KEY." fi - # Step 3: Start Plano + # Step 3: Optionally start UI services (AnythingLLM, Jaeger) + # Jaeger must start before Plano so it can bind the OTEL port (4317) + if [ "$1" == "--with-ui" ]; then + echo "Starting UI services (AnythingLLM, Jaeger)..." + docker compose up -d + fi + + # Step 4: Start Plano echo "Starting Plano with config.yaml..." planoai up config.yaml - - # Step 4: Start developer services - echo "Starting Network Agent using Docker Compose..." - docker compose up -d # Run in detached mode } # Function to stop the demo stop_demo() { - # Step 1: Stop Docker Compose services - echo "Stopping Network Agent using Docker Compose..." - docker compose down + # Stop Docker Compose services if running + docker compose down 2>/dev/null || true - # Step 2: Stop Plano + # Stop Plano echo "Stopping Plano..." planoai down } @@ -42,6 +44,5 @@ stop_demo() { if [ "$1" == "down" ]; then stop_demo else - # Default action is to bring the demo up - start_demo + start_demo "$1" fi diff --git a/demos/llm_routing/preference_based_routing/README.md b/demos/llm_routing/preference_based_routing/README.md index 03d28cee..009002fd 100644 --- a/demos/llm_routing/preference_based_routing/README.md +++ b/demos/llm_routing/preference_based_routing/README.md @@ -10,19 +10,58 @@ cd demos/llm_routing/preference_based_routing ./run_demo.sh ``` -Or manually: +To also start AnythingLLM (chat UI) and Jaeger (tracing): -1. Start Plano ```bash -planoai up config.yaml +./run_demo.sh --with-ui ``` -2. Start AnythingLLM +Then open AnythingLLM at http://localhost:3001/ + +Or start manually: + +1. (Optional) Start AnythingLLM and Jaeger ```bash docker compose up -d ``` -3. open AnythingLLM http://localhost:3001/ +2. Start Plano +```bash +planoai up config.yaml +``` + +3. Test with curl or open AnythingLLM http://localhost:3001/ + +## Running with local Arch-Router (via Ollama) + +By default, Plano uses a hosted Arch-Router endpoint. To self-host Arch-Router locally using Ollama: + +1. Install [Ollama](https://ollama.ai) and pull the model: +```bash +ollama pull hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M +``` + +2. Make sure Ollama is running (`ollama serve` or the macOS app). + +3. Start Plano with the local config: +```bash +planoai up plano_config_local.yaml +``` + +4. Test routing: +```bash +curl -s "http://localhost:12000/routing/v1/messages" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4o-mini", + "max_tokens": 1024, + "messages": [ + {"role": "user", "content": "Create a REST API endpoint in Rust using actix-web"} + ] + }' +``` + +You should see the router select the appropriate model based on the routing preferences defined in `plano_config_local.yaml`. # Testing out preference based routing diff --git a/demos/llm_routing/preference_based_routing/run_demo.sh b/demos/llm_routing/preference_based_routing/run_demo.sh index c9525c26..30e0c67b 100755 --- a/demos/llm_routing/preference_based_routing/run_demo.sh +++ b/demos/llm_routing/preference_based_routing/run_demo.sh @@ -24,22 +24,24 @@ start_demo() { echo ".env file created with API keys." fi - # Step 3: Start Plano + # Step 3: Optionally start UI services (AnythingLLM, Jaeger) + # Jaeger must start before Plano so it can bind the OTEL port (4317) + if [ "$1" == "--with-ui" ]; then + echo "Starting UI services (AnythingLLM, Jaeger)..." + docker compose up -d + fi + + # Step 4: Start Plano echo "Starting Plano with config.yaml..." planoai up config.yaml - - # Step 4: Start services - echo "Starting services using Docker Compose..." - docker compose up -d } # Function to stop the demo stop_demo() { - # Step 1: Stop Docker Compose services - echo "Stopping Docker Compose services..." - docker compose down + # Stop Docker Compose services if running + docker compose down 2>/dev/null || true - # Step 2: Stop Plano + # Stop Plano echo "Stopping Plano..." planoai down } @@ -48,5 +50,5 @@ stop_demo() { if [ "$1" == "down" ]; then stop_demo else - start_demo + start_demo "$1" fi diff --git a/docs/source/guides/llm_router.rst b/docs/source/guides/llm_router.rst index 188b1e30..41c51b4a 100644 --- a/docs/source/guides/llm_router.rst +++ b/docs/source/guides/llm_router.rst @@ -228,6 +228,129 @@ In summary, Arch-Router demonstrates: - **Production-Ready Performance**: Optimized for low-latency, high-throughput applications in multi-model environments. +Self-hosting Arch-Router +------------------------ + +By default, Plano uses a hosted Arch-Router endpoint. To run Arch-Router locally, you can serve the model yourself using either **Ollama** or **vLLM**. + +Using Ollama (recommended for local development) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +1. **Install Ollama** + + Download and install from `ollama.ai `_. + +2. **Pull and serve Arch-Router** + + .. code-block:: bash + + ollama pull hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M + ollama serve + + This downloads the quantized GGUF model from HuggingFace and starts serving on ``http://localhost:11434``. + +3. **Configure Plano to use local Arch-Router** + + .. code-block:: yaml + + routing: + model: Arch-Router + llm_provider: arch-router + + model_providers: + - name: arch-router + model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M + base_url: http://localhost:11434 + + - model: openai/gpt-5.2 + access_key: $OPENAI_API_KEY + default: true + + - model: anthropic/claude-sonnet-4-5 + access_key: $ANTHROPIC_API_KEY + routing_preferences: + - name: creative writing + description: creative content generation, storytelling, and writing assistance + +4. **Verify the model is running** + + .. code-block:: bash + + curl http://localhost:11434/v1/models + + You should see ``Arch-Router-1.5B`` listed in the response. + +Using vLLM (recommended for production / EC2) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +vLLM provides higher throughput and GPU optimizations suitable for production deployments. + +1. **Install vLLM** + + .. code-block:: bash + + pip install vllm + +2. **Download the model weights** + + The GGUF weights are downloaded automatically from HuggingFace on first use. To pre-download: + + .. code-block:: bash + + pip install huggingface_hub + huggingface-cli download katanemo/Arch-Router-1.5B.gguf + +3. **Start the vLLM server** + + After downloading, find the GGUF file and Jinja template in the HuggingFace cache: + + .. code-block:: bash + + # Find the downloaded files + SNAPSHOT_DIR=$(ls -d ~/.cache/huggingface/hub/models--katanemo--Arch-Router-1.5B.gguf/snapshots/*/ | head -1) + + vllm serve ${SNAPSHOT_DIR}Arch-Router-1.5B-Q4_K_M.gguf \ + --host 0.0.0.0 \ + --port 10000 \ + --load-format gguf \ + --chat-template ${SNAPSHOT_DIR}template.jinja \ + --tokenizer katanemo/Arch-Router-1.5B \ + --served-model-name Arch-Router \ + --gpu-memory-utilization 0.3 \ + --tensor-parallel-size 1 \ + --enable-prefix-caching + +4. **Configure Plano to use the vLLM endpoint** + + .. code-block:: yaml + + routing: + model: Arch-Router + llm_provider: arch-router + + model_providers: + - name: arch-router + model: Arch-Router + base_url: http://:10000 + + - model: openai/gpt-5.2 + access_key: $OPENAI_API_KEY + default: true + + - model: anthropic/claude-sonnet-4-5 + access_key: $ANTHROPIC_API_KEY + routing_preferences: + - name: creative writing + description: creative content generation, storytelling, and writing assistance + +5. **Verify the server is running** + + .. code-block:: bash + + curl http://localhost:10000/health + curl http://localhost:10000/v1/models + + Combining Routing Methods ------------------------- diff --git a/tests/e2e/run_e2e_tests.sh b/tests/e2e/run_e2e_tests.sh index c24931f4..a164b7f9 100644 --- a/tests/e2e/run_e2e_tests.sh +++ b/tests/e2e/run_e2e_tests.sh @@ -21,10 +21,11 @@ trap 'print_debug' INT TERM ERR log starting > ../build.log -log building and running function_calling demo +log starting weather_forecast agent natively log =========================================== cd ../../demos/getting_started/weather_forecast/ -docker compose up weather_forecast_service --build -d +bash start_agents.sh & +AGENTS_PID=$! cd - log building and installing plano cli @@ -78,8 +79,6 @@ log running e2e tests for openai responses api client log ======================================== uv run pytest test_openai_responses_api_client_with_state.py -log shutting down the weather_forecast demo +log shutting down the weather_forecast agent log ======================================= -cd ../../demos/getting_started/weather_forecast -docker compose down -cd - +kill $AGENTS_PID 2>/dev/null || true diff --git a/tests/e2e/run_prompt_gateway_tests.sh b/tests/e2e/run_prompt_gateway_tests.sh index 58d850d8..1e947813 100755 --- a/tests/e2e/run_prompt_gateway_tests.sh +++ b/tests/e2e/run_prompt_gateway_tests.sh @@ -32,10 +32,11 @@ cd - # Re-sync e2e deps uv sync -# Start weather_forecast service (needed for prompt_gateway tests) -log "building and running weather_forecast service" +# Start weather_forecast service natively (needed for prompt_gateway tests) +log "starting weather_forecast agent natively" cd ../../demos/getting_started/weather_forecast/ -docker compose up weather_forecast_service --build -d +bash start_agents.sh & +AGENTS_PID=$! cd - # Start gateway with prompt_gateway config @@ -52,6 +53,4 @@ uv run pytest test_prompt_gateway.py # Cleanup log "shutting down" planoai down --docker || true -cd ../../demos/getting_started/weather_forecast -docker compose down -cd - +kill $AGENTS_PID 2>/dev/null || true