Update docs to Plano (#639)

2026-04-25 00:36:34 +02:00 · 2025-12-23 17:14:50 -08:00 · 2025-12-23 17:14:50 -08:00 · e224cba3e3
commit e224cba3e3
parent 15fbb6c3af
139 changed files with 4407 additions and 24735 deletions
--- a/docs/source/resources/includes/agents/agents_config.yaml
+++ b/docs/source/resources/includes/agents/agents_config.yaml
@ -0,0 +1,57 @@
+version: v0.3.0
+
+agents:
+  - id: weather_agent
+    url: http://host.docker.internal:10510
+  - id: flight_agent
+    url: http://host.docker.internal:10520
+
+model_providers:
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+    default: true
+  - model: openai/gpt-4o-mini
+    access_key: $OPENAI_API_KEY # smaller, faster, cheaper model for extracting entities like location
+
+listeners:
+  - type: agent
+    name: travel_booking_service
+    port: 8001
+    router: plano_orchestrator_v1
+    agents:
+      - id: weather_agent
+        description: |
+
+          WeatherAgent is a specialized AI assistant for real-time weather information and forecasts. It provides accurate weather data for any city worldwide using the Open-Meteo API, helping travelers plan their trips with up-to-date weather conditions.
+
+          Capabilities:
+            * Get real-time weather conditions and multi-day forecasts for any city worldwide using Open-Meteo API (free, no API key needed)
+            * Provides current temperature
+            * Provides multi-day forecasts
+            * Provides weather conditions
+            * Provides sunrise/sunset times
+            * Provides detailed weather information
+            * Understands conversation context to resolve location references from previous messages
+            * Handles weather-related questions including "What's the weather in [city]?", "What's the forecast for [city]?", "How's the weather in [city]?"
+            * When queries include both weather and other travel questions (e.g., flights, currency), this agent answers ONLY the weather part
+
+      - id: flight_agent
+        description: |
+
+          FlightAgent is an AI-powered tool specialized in providing live flight information between airports. It leverages the FlightAware AeroAPI to deliver real-time flight status, gate information, and delay updates.
+
+          Capabilities:
+            * Get live flight information between airports using FlightAware AeroAPI
+            * Shows real-time flight status
+            * Shows scheduled/estimated/actual departure and arrival times
+            * Shows gate and terminal information
+            * Shows delays
+            * Shows aircraft type
+            * Shows flight status
+            * Automatically resolves city names to airport codes (IATA/ICAO)
+            * Understands conversation context to infer origin/destination from follow-up questions
+            * Handles flight-related questions including "What flights go from [city] to [city]?", "Do flights go to [city]?", "Are there direct flights from [city]?"
+            * When queries include both flight and other travel questions (e.g., weather, currency), this agent answers ONLY the flight part
+
+tracing:
+  random_sampling: 100
--- a/docs/source/resources/includes/agents/flights.py
+++ b/docs/source/resources/includes/agents/flights.py
@ -0,0 +1,475 @@
+import json
+from fastapi import FastAPI, Request
+from fastapi.responses import StreamingResponse
+from openai import AsyncOpenAI
+import os
+import logging
+import time
+import uuid
+import uvicorn
+from datetime import datetime, timedelta
+import httpx
+from typing import Optional
+from opentelemetry.propagate import extract, inject
+
+# Set up logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - [FLIGHT_AGENT] - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+# Configuration
+LLM_GATEWAY_ENDPOINT = os.getenv(
+    "LLM_GATEWAY_ENDPOINT", "http://host.docker.internal:12000/v1"
+)
+FLIGHT_MODEL = "openai/gpt-4o"
+EXTRACTION_MODEL = "openai/gpt-4o-mini"
+
+# FlightAware AeroAPI configuration
+AEROAPI_BASE_URL = "https://aeroapi.flightaware.com/aeroapi"
+AEROAPI_KEY = os.getenv("AEROAPI_KEY", "ESVFX7TJLxB7OTuayUv0zTQBryA3tOPr")
+
+# HTTP client for API calls
+http_client = httpx.AsyncClient(timeout=30.0)
+
+# Initialize OpenAI client
+openai_client_via_plano = AsyncOpenAI(
+    base_url=LLM_GATEWAY_ENDPOINT,
+    api_key="EMPTY",
+)
+
+# System prompt for flight agent
+SYSTEM_PROMPT = """You are a travel planning assistant specializing in flight information in a multi-agent system. You will receive flight data in JSON format with these fields:
+
+- "airline": Full airline name (e.g., "Delta Air Lines")
+- "flight_number": Flight identifier (e.g., "DL123")
+- "departure_time": ISO 8601 timestamp for scheduled departure (e.g., "2025-12-24T23:00:00Z")
+- "arrival_time": ISO 8601 timestamp for scheduled arrival (e.g., "2025-12-25T04:40:00Z")
+- "origin": Origin airport IATA code (e.g., "ATL")
+- "destination": Destination airport IATA code (e.g., "SEA")
+- "aircraft_type": Aircraft model code (e.g., "A21N", "B739")
+- "status": Flight status (e.g., "Scheduled", "Delayed")
+- "terminal_origin": Departure terminal (may be null)
+- "gate_origin": Departure gate (may be null)
+
+Your task:
+1. Read the JSON flight data carefully
+2. Present each flight clearly with: airline, flight number, departure/arrival times (convert from ISO format to readable time), airports, and aircraft type
+3. Organize flights chronologically by departure time
+4. Convert ISO timestamps to readable format (e.g., "11:00 PM" or "23:00")
+5. Include terminal/gate info when available
+6. Use natural, conversational language
+
+Important: If the conversation includes information from other agents (like weather details), acknowledge and build upon that context naturally. Your primary focus is flights, but maintain awareness of the full conversation.
+
+Remember: All the data you need is in the JSON. Use it directly."""
+
+
+async def extract_flight_route(messages: list, request: Request) -> dict:
+    """Extract origin, destination, and date from conversation using LLM."""
+
+    extraction_prompt = """Extract flight origin, destination cities, and travel date from the conversation.
+
+    Rules:
+    1. Look for patterns: "flight from X to Y", "flights to Y", "fly from X"
+    2. Extract dates like "tomorrow", "next week", "December 25", "12/25", "on Monday"
+    3. Use conversation context to fill in missing details
+    4. Return JSON: {"origin": "City" or null, "destination": "City" or null, "date": "YYYY-MM-DD" or null}
+
+    Examples:
+    - "Flight from Seattle to Atlanta tomorrow" -> {"origin": "Seattle", "destination": "Atlanta", "date": "2025-12-24"}
+    - "What flights go to New York?" -> {"origin": null, "destination": "New York", "date": null}
+    - "Flights to Miami on Christmas" -> {"origin": null, "destination": "Miami", "date": "2025-12-25"}
+    - "Show me flights from LA to NYC next Monday" -> {"origin": "LA", "destination": "NYC", "date": "2025-12-30"}
+
+    Today is December 23, 2025. Extract flight route and date:"""
+
+    try:
+        ctx = extract(request.headers)
+        extra_headers = {}
+        inject(extra_headers, context=ctx)
+
+        response = await openai_client_via_plano.chat.completions.create(
+            model=EXTRACTION_MODEL,
+            messages=[
+                {"role": "system", "content": extraction_prompt},
+                *[
+                    {"role": msg.get("role"), "content": msg.get("content")}
+                    for msg in messages[-5:]
+                ],
+            ],
+            temperature=0.1,
+            max_tokens=100,
+            extra_headers=extra_headers if extra_headers else None,
+        )
+
+        result = response.choices[0].message.content.strip()
+        if "```json" in result:
+            result = result.split("```json")[1].split("```")[0].strip()
+        elif "```" in result:
+            result = result.split("```")[1].split("```")[0].strip()
+
+        route = json.loads(result)
+        return {
+            "origin": route.get("origin"),
+            "destination": route.get("destination"),
+            "date": route.get("date"),
+        }
+    except Exception as e:
+        logger.error(f"Error extracting flight route: {e}")
+        return {"origin": None, "destination": None, "date": None}
+
+
+async def resolve_airport_code(city_name: str, request: Request) -> Optional[str]:
+    """Convert city name to airport code using LLM."""
+    if not city_name:
+        return None
+
+    try:
+        ctx = extract(request.headers)
+        extra_headers = {}
+        inject(extra_headers, context=ctx)
+
+        response = await openai_client_via_plano.chat.completions.create(
+            model=EXTRACTION_MODEL,
+            messages=[
+                {
+                    "role": "system",
+                    "content": "Convert city names to primary airport IATA codes. Return only the 3-letter code. Examples: Seattle→SEA, Atlanta→ATL, New York→JFK, London→LHR",
+                },
+                {"role": "user", "content": city_name},
+            ],
+            temperature=0.1,
+            max_tokens=10,
+            extra_headers=extra_headers if extra_headers else None,
+        )
+
+        code = response.choices[0].message.content.strip().upper()
+        code = code.strip("\"'`.,!? \n\t")
+        return code if len(code) == 3 else None
+    except Exception as e:
+        logger.error(f"Error resolving airport code for {city_name}: {e}")
+        return None
+
+
+async def get_flights(
+    origin_code: str, dest_code: str, travel_date: Optional[str] = None
+) -> Optional[dict]:
+    """Get flights between two airports using FlightAware API.
+
+    Args:
+        origin_code: Origin airport IATA code
+        dest_code: Destination airport IATA code
+        travel_date: Travel date in YYYY-MM-DD format, defaults to today
+
+    Note: FlightAware API limits searches to 2 days in the future.
+    """
+    try:
+        # Use provided date or default to today
+        if travel_date:
+            search_date = travel_date
+        else:
+            search_date = datetime.now().strftime("%Y-%m-%d")
+
+        # Validate date is not too far in the future (FlightAware limit: 2 days)
+        search_date_obj = datetime.strptime(search_date, "%Y-%m-%d")
+        today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
+        days_ahead = (search_date_obj - today).days
+
+        if days_ahead > 2:
+            logger.warning(
+                f"Requested date {search_date} is {days_ahead} days ahead, exceeds FlightAware 2-day limit"
+            )
+            return {
+                "origin_code": origin_code,
+                "destination_code": dest_code,
+                "flights": [],
+                "count": 0,
+                "error": f"FlightAware API only provides flight data up to 2 days in the future. The requested date ({search_date}) is {days_ahead} days ahead. Please search for today, tomorrow, or the day after.",
+            }
+
+        url = f"{AEROAPI_BASE_URL}/airports/{origin_code}/flights/to/{dest_code}"
+        headers = {"x-apikey": AEROAPI_KEY}
+        params = {
+            "start": f"{search_date}T00:00:00Z",
+            "end": f"{search_date}T23:59:59Z",
+            "connection": "nonstop",
+            "max_pages": 1,
+        }
+
+        response = await http_client.get(url, headers=headers, params=params)
+
+        if response.status_code != 200:
+            logger.error(
+                f"FlightAware API error {response.status_code}: {response.text}"
+            )
+            return None
+
+        data = response.json()
+        flights = []
+
+        # Log raw API response for debugging
+        logger.info(f"FlightAware API returned {len(data.get('flights', []))} flights")
+
+        for idx, flight_group in enumerate(
+            data.get("flights", [])[:5]
+        ):  # Limit to 5 flights
+            # FlightAware API nests data in segments array
+            segments = flight_group.get("segments", [])
+            if not segments:
+                continue
+
+            flight = segments[0]  # Get first segment (direct flights only have one)
+
+            # Extract airport codes from nested objects
+            flight_origin = None
+            flight_dest = None
+
+            if isinstance(flight.get("origin"), dict):
+                flight_origin = flight["origin"].get("code_iata")
+
+            if isinstance(flight.get("destination"), dict):
+                flight_dest = flight["destination"].get("code_iata")
+
+            # Build flight object
+            flights.append(
+                {
+                    "airline": flight.get("operator"),
+                    "flight_number": flight.get("ident_iata") or flight.get("ident"),
+                    "departure_time": flight.get("scheduled_out"),
+                    "arrival_time": flight.get("scheduled_in"),
+                    "origin": flight_origin,
+                    "destination": flight_dest,
+                    "aircraft_type": flight.get("aircraft_type"),
+                    "status": flight.get("status"),
+                    "terminal_origin": flight.get("terminal_origin"),
+                    "gate_origin": flight.get("gate_origin"),
+                }
+            )
+
+        return {
+            "origin_code": origin_code,
+            "destination_code": dest_code,
+            "flights": flights,
+            "count": len(flights),
+        }
+    except Exception as e:
+        logger.error(f"Error fetching flights: {e}")
+        return None
+
+
+app = FastAPI(title="Flight Information Agent", version="1.0.0")
+
+
+@app.post("/v1/chat/completions")
+async def handle_request(request: Request):
+    """HTTP endpoint for chat completions with streaming support."""
+    request_body = await request.json()
+    messages = request_body.get("messages", [])
+
+    return StreamingResponse(
+        invoke_flight_agent(request, request_body),
+        media_type="text/plain",
+        headers={"content-type": "text/event-stream"},
+    )
+
+
+async def invoke_flight_agent(request: Request, request_body: dict):
+    """Generate streaming chat completions."""
+    messages = request_body.get("messages", [])
+
+    # Step 1: Extract origin, destination, and date
+    route = await extract_flight_route(messages, request)
+    origin = route.get("origin")
+    destination = route.get("destination")
+    travel_date = route.get("date")
+
+    # Step 2: Short circuit if missing origin or destination
+    if not origin or not destination:
+        missing = []
+        if not origin:
+            missing.append("origin city")
+        if not destination:
+            missing.append("destination city")
+
+        error_message = f"I need both origin and destination cities to search for flights. Please provide the {' and '.join(missing)}. For example: 'Flights from Seattle to Atlanta'"
+
+        error_chunk = {
+            "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
+            "object": "chat.completion.chunk",
+            "created": int(time.time()),
+            "model": request_body.get("model", FLIGHT_MODEL),
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {"content": error_message},
+                    "finish_reason": "stop",
+                }
+            ],
+        }
+        yield f"data: {json.dumps(error_chunk)}\n\n"
+        yield "data: [DONE]\n\n"
+        return
+
+    # Step 3: Resolve airport codes
+    origin_code = await resolve_airport_code(origin, request)
+    dest_code = await resolve_airport_code(destination, request)
+
+    if not origin_code or not dest_code:
+        error_chunk = {
+            "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
+            "object": "chat.completion.chunk",
+            "created": int(time.time()),
+            "model": request_body.get("model", FLIGHT_MODEL),
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {
+                        "content": f"I couldn't find airport codes for {origin if not origin_code else destination}. Please check the city name."
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+        }
+        yield f"data: {json.dumps(error_chunk)}\n\n"
+        yield "data: [DONE]\n\n"
+        return
+
+    # Step 4: Get live flight data
+    flight_data = await get_flights(origin_code, dest_code, travel_date)
+
+    # Determine date display for messages
+    date_display = travel_date if travel_date else "today"
+
+    if not flight_data or not flight_data.get("flights"):
+        # Check if there's a specific error message (e.g., date too far in future)
+        error_detail = flight_data.get("error") if flight_data else None
+        if error_detail:
+            no_flights_message = error_detail
+        else:
+            no_flights_message = f"No direct flights found from {origin} ({origin_code}) to {destination} ({dest_code}) for {date_display}."
+
+        error_chunk = {
+            "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
+            "object": "chat.completion.chunk",
+            "created": int(time.time()),
+            "model": request_body.get("model", FLIGHT_MODEL),
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {"content": no_flights_message},
+                    "finish_reason": "stop",
+                }
+            ],
+        }
+        yield f"data: {json.dumps(error_chunk)}\n\n"
+        yield "data: [DONE]\n\n"
+        return
+
+    # Step 5: Prepare context for LLM - append flight data to last user message
+    flight_context = f"""
+
+Flight search results from {origin} ({origin_code}) to {destination} ({dest_code}):
+
+Flight data in JSON format:
+{json.dumps(flight_data, indent=2)}
+
+Present these {len(flight_data.get('flights', []))} flight(s) to the user in a clear, readable format."""
+
+    # Build message history with flight data appended to the last user message
+    response_messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+
+    for i, msg in enumerate(messages):
+        # Append flight data to the last user message
+        if i == len(messages) - 1 and msg.get("role") == "user":
+            response_messages.append(
+                {"role": "user", "content": msg.get("content") + flight_context}
+            )
+        else:
+            response_messages.append(
+                {"role": msg.get("role"), "content": msg.get("content")}
+            )
+
+    # Log what we're sending to the LLM for debugging
+    logger.info(f"Sending messages to LLM: {json.dumps(response_messages, indent=2)}")
+
+    # Step 6: Stream response
+    try:
+        ctx = extract(request.headers)
+        extra_headers = {"x-envoy-max-retries": "3"}
+        inject(extra_headers, context=ctx)
+
+        stream = await openai_client_via_plano.chat.completions.create(
+            model=FLIGHT_MODEL,
+            messages=response_messages,
+            temperature=request_body.get("temperature", 0.7),
+            max_tokens=request_body.get("max_tokens", 1000),
+            stream=True,
+            extra_headers=extra_headers,
+        )
+
+        async for chunk in stream:
+            if chunk.choices:
+                yield f"data: {chunk.model_dump_json()}\n\n"
+
+        yield "data: [DONE]\n\n"
+
+    except Exception as e:
+        logger.error(f"Error generating flight response: {e}")
+        error_chunk = {
+            "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
+            "object": "chat.completion.chunk",
+            "created": int(time.time()),
+            "model": request_body.get("model", FLIGHT_MODEL),
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {
+                        "content": "I apologize, but I'm having trouble retrieving flight information right now. Please try again."
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+        }
+        yield f"data: {json.dumps(error_chunk)}\n\n"
+        yield "data: [DONE]\n\n"
+
+
+@app.get("/health")
+async def health_check():
+    """Health check endpoint."""
+    return {"status": "healthy", "agent": "flight_information"}
+
+
+def start_server(host: str = "localhost", port: int = 10520):
+    """Start the REST server."""
+    uvicorn.run(
+        app,
+        host=host,
+        port=port,
+        log_config={
+            "version": 1,
+            "disable_existing_loggers": False,
+            "formatters": {
+                "default": {
+                    "format": "%(asctime)s - [FLIGHT_AGENT] - %(levelname)s - %(message)s",
+                },
+            },
+            "handlers": {
+                "default": {
+                    "formatter": "default",
+                    "class": "logging.StreamHandler",
+                    "stream": "ext://sys.stdout",
+                },
+            },
+            "root": {
+                "level": "INFO",
+                "handlers": ["default"],
+            },
+        },
+    )
+
+
+if __name__ == "__main__":
+    start_server(host="0.0.0.0", port=10520)
--- a/docs/source/resources/includes/agents/weather.py
+++ b/docs/source/resources/includes/agents/weather.py
@ -0,0 +1,426 @@
+import json
+import re
+from fastapi import FastAPI, Request
+from fastapi.responses import StreamingResponse
+from openai import AsyncOpenAI
+import os
+import logging
+import time
+import uuid
+import uvicorn
+from datetime import datetime, timedelta
+import httpx
+from typing import Optional
+from urllib.parse import quote
+from opentelemetry.propagate import extract, inject
+
+# Set up logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - [WEATHER_AGENT] - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+
+# Configuration for plano LLM gateway
+LLM_GATEWAY_ENDPOINT = os.getenv(
+    "LLM_GATEWAY_ENDPOINT", "http://host.docker.internal:12001/v1"
+)
+WEATHER_MODEL = "openai/gpt-4o"
+LOCATION_MODEL = "openai/gpt-4o-mini"
+
+# Initialize OpenAI client for plano
+openai_client_via_plano = AsyncOpenAI(
+    base_url=LLM_GATEWAY_ENDPOINT,
+    api_key="EMPTY",
+)
+
+# FastAPI app for REST server
+app = FastAPI(title="Weather Forecast Agent", version="1.0.0")
+
+# HTTP client for API calls
+http_client = httpx.AsyncClient(timeout=10.0)
+
+
+# Utility functions
+def celsius_to_fahrenheit(temp_c: Optional[float]) -> Optional[float]:
+    """Convert Celsius to Fahrenheit."""
+    return round(temp_c * 9 / 5 + 32, 1) if temp_c is not None else None
+
+
+def get_user_messages(messages: list) -> list:
+    """Extract user messages from message list."""
+    return [msg for msg in messages if msg.get("role") == "user"]
+
+
+def get_last_user_content(messages: list) -> str:
+    """Get the content of the most recent user message."""
+    for msg in reversed(messages):
+        if msg.get("role") == "user":
+            return msg.get("content", "").lower()
+    return ""
+
+
+async def get_weather_data(request: Request, messages: list, days: int = 1):
+    """Extract location from user's conversation and fetch weather data from Open-Meteo API.
+
+    This function does two things:
+    1. Uses an LLM to extract the location from the user's message
+    2. Fetches weather data for that location from Open-Meteo
+
+    Currently returns only current day weather. Want to add multi-day forecasts?
+    """
+
+    instructions = """Extract the location for WEATHER queries. Return just the city name.
+
+            Rules:
+            1. For multi-part queries, extract ONLY the location mentioned with weather keywords ("weather in [location]")
+            2. If user says "there" or "that city", it typically refers to the DESTINATION city in travel contexts (not the origin)
+            3. For flight queries with weather, "there" means the destination city where they're traveling TO
+            4. Return plain text (e.g., "London", "New York", "Paris, France")
+            5. If no weather location found, return "NOT_FOUND"
+
+            Examples:
+            - "What's the weather in London?" -> "London"
+            - "Flights from Seattle to Atlanta, and show me the weather there" -> "Atlanta"
+            - "Can you get me flights from Seattle to Atlanta tomorrow, and also please show me the weather there" -> "Atlanta"
+            - "What's the weather in Seattle, and what is one flight that goes direct to Atlanta?" -> "Seattle"
+            - User asked about flights to Atlanta, then "what's the weather like there?" -> "Atlanta"
+            - "I'm going to Seattle" -> "Seattle"
+            - "What's happening?" -> "NOT_FOUND"
+
+            Extract location:"""
+
+    try:
+        user_messages = [
+            msg.get("content") for msg in messages if msg.get("role") == "user"
+        ]
+
+        if not user_messages:
+            location = "New York"
+        else:
+            ctx = extract(request.headers)
+            extra_headers = {}
+            inject(extra_headers, context=ctx)
+
+            # For location extraction, pass full conversation for context (e.g., "there" = previous destination)
+            response = await openai_client_via_plano.chat.completions.create(
+                model=LOCATION_MODEL,
+                messages=[
+                    {"role": "system", "content": instructions},
+                    *[
+                        {"role": msg.get("role"), "content": msg.get("content")}
+                        for msg in messages
+                    ],
+                ],
+                temperature=0.1,
+                max_tokens=50,
+                extra_headers=extra_headers if extra_headers else None,
+            )
+
+            location = response.choices[0].message.content.strip().strip("\"'`.,!?")
+            logger.info(f"Location extraction result: '{location}'")
+
+            if not location or location.upper() == "NOT_FOUND":
+                location = "New York"
+                logger.info(f"Location not found, defaulting to: {location}")
+
+    except Exception as e:
+        logger.error(f"Error extracting location: {e}")
+        location = "New York"
+
+    logger.info(f"Fetching weather for location: '{location}' (days: {days})")
+
+    # Step 2: Fetch weather data for the extracted location
+    try:
+        # Geocode city to get coordinates
+        geocode_url = f"https://geocoding-api.open-meteo.com/v1/search?name={quote(location)}&count=1&language=en&format=json"
+        geocode_response = await http_client.get(geocode_url)
+
+        if geocode_response.status_code != 200 or not geocode_response.json().get(
+            "results"
+        ):
+            logger.warning(f"Could not geocode {location}, using New York")
+            location = "New York"
+            geocode_url = f"https://geocoding-api.open-meteo.com/v1/search?name={quote(location)}&count=1&language=en&format=json"
+            geocode_response = await http_client.get(geocode_url)
+
+        geocode_data = geocode_response.json()
+        if not geocode_data.get("results"):
+            return {
+                "location": location,
+                "weather": {
+                    "date": datetime.now().strftime("%Y-%m-%d"),
+                    "day_name": datetime.now().strftime("%A"),
+                    "temperature_c": None,
+                    "temperature_f": None,
+                    "weather_code": None,
+                    "error": "Could not retrieve weather data",
+                },
+            }
+
+        result = geocode_data["results"][0]
+        location_name = result.get("name", location)
+        latitude = result["latitude"]
+        longitude = result["longitude"]
+
+        logger.info(
+            f"Geocoded '{location}' to {location_name} ({latitude}, {longitude})"
+        )
+
+        # Get weather forecast
+        weather_url = (
+            f"https://api.open-meteo.com/v1/forecast?"
+            f"latitude={latitude}&longitude={longitude}&"
+            f"current=temperature_2m&"
+            f"daily=sunrise,sunset,temperature_2m_max,temperature_2m_min,weather_code&"
+            f"forecast_days={days}&timezone=auto"
+        )
+
+        weather_response = await http_client.get(weather_url)
+        if weather_response.status_code != 200:
+            return {
+                "location": location_name,
+                "weather": {
+                    "date": datetime.now().strftime("%Y-%m-%d"),
+                    "day_name": datetime.now().strftime("%A"),
+                    "temperature_c": None,
+                    "temperature_f": None,
+                    "weather_code": None,
+                    "error": "Could not retrieve weather data",
+                },
+            }
+
+        weather_data = weather_response.json()
+        current_temp = weather_data.get("current", {}).get("temperature_2m")
+        daily = weather_data.get("daily", {})
+
+        # Build forecast for requested number of days
+        forecast = []
+        for i in range(days):
+            date_str = daily["time"][i]
+            date_obj = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
+
+            temp_max = (
+                daily.get("temperature_2m_max", [])[i]
+                if daily.get("temperature_2m_max")
+                else None
+            )
+            temp_min = (
+                daily.get("temperature_2m_min", [])[i]
+                if daily.get("temperature_2m_min")
+                else None
+            )
+            weather_code = (
+                daily.get("weather_code", [0])[i] if daily.get("weather_code") else 0
+            )
+            sunrise = daily.get("sunrise", [])[i] if daily.get("sunrise") else None
+            sunset = daily.get("sunset", [])[i] if daily.get("sunset") else None
+
+            # Use current temp for today, otherwise use max temp
+            temp_c = (
+                temp_max
+                if temp_max is not None
+                else (current_temp if i == 0 and current_temp else temp_min)
+            )
+
+            forecast.append(
+                {
+                    "date": date_str.split("T")[0],
+                    "day_name": date_obj.strftime("%A"),
+                    "temperature_c": round(temp_c, 1) if temp_c is not None else None,
+                    "temperature_f": celsius_to_fahrenheit(temp_c),
+                    "temperature_max_c": round(temp_max, 1)
+                    if temp_max is not None
+                    else None,
+                    "temperature_min_c": round(temp_min, 1)
+                    if temp_min is not None
+                    else None,
+                    "weather_code": weather_code,
+                    "sunrise": sunrise.split("T")[1] if sunrise else None,
+                    "sunset": sunset.split("T")[1] if sunset else None,
+                }
+            )
+
+        return {"location": location_name, "forecast": forecast}
+
+    except Exception as e:
+        logger.error(f"Error getting weather data: {e}")
+        return {
+            "location": location,
+            "weather": {
+                "date": datetime.now().strftime("%Y-%m-%d"),
+                "day_name": datetime.now().strftime("%A"),
+                "temperature_c": None,
+                "temperature_f": None,
+                "weather_code": None,
+                "error": "Could not retrieve weather data",
+            },
+        }
+
+
+@app.post("/v1/chat/completions")
+async def handle_request(request: Request):
+    """HTTP endpoint for chat completions with streaming support."""
+
+    request_body = await request.json()
+    messages = request_body.get("messages", [])
+    logger.info(
+        "messages detail json dumps: %s",
+        json.dumps(messages, indent=2),
+    )
+
+    traceparent_header = request.headers.get("traceparent")
+    return StreamingResponse(
+        invoke_weather_agent(request, request_body, traceparent_header),
+        media_type="text/plain",
+        headers={
+            "content-type": "text/event-stream",
+        },
+    )
+
+
+async def invoke_weather_agent(
+    request: Request, request_body: dict, traceparent_header: str = None
+):
+    """Generate streaming chat completions."""
+    messages = request_body.get("messages", [])
+
+    # Detect if user wants multi-day forecast
+    last_user_msg = get_last_user_content(messages)
+    days = 1
+
+    if "forecast" in last_user_msg or "week" in last_user_msg:
+        days = 7
+    elif "tomorrow" in last_user_msg:
+        days = 2
+
+    # Extract specific number of days if mentioned (e.g., "5 day forecast")
+    import re
+
+    day_match = re.search(r"(\d{1,2})\s+day", last_user_msg)
+    if day_match:
+        requested_days = int(day_match.group(1))
+        days = min(requested_days, 16)  # API supports max 16 days
+
+    # Get live weather data (location extraction happens inside this function)
+    weather_data = await get_weather_data(request, messages, days)
+
+    # Create weather context to append to user message
+    forecast_type = "forecast" if days > 1 else "current weather"
+    weather_context = f"""
+
+Weather data for {weather_data['location']} ({forecast_type}):
+{json.dumps(weather_data, indent=2)}"""
+
+    # System prompt for weather agent
+    instructions = """You are a weather assistant in a multi-agent system. You will receive weather data in JSON format with these fields:
+
+    - "location": City name
+    - "forecast": Array of weather objects, each with date, day_name, temperature_c, temperature_f, temperature_max_c, temperature_min_c, weather_code, sunrise, sunset
+    - weather_code: WMO code (0=clear, 1-3=partly cloudy, 45-48=fog, 51-67=rain, 71-86=snow, 95-99=thunderstorm)
+
+    Your task:
+    1. Present the weather/forecast clearly for the location
+    2. For single day: show current conditions
+    3. For multi-day: show each day with date and conditions
+    4. Include temperature in both Celsius and Fahrenheit
+    5. Describe conditions naturally based on weather_code
+    6. Use conversational language
+
+    Important: If the conversation includes information from other agents (like flight details), acknowledge and build upon that context naturally. Your primary focus is weather, but maintain awareness of the full conversation.
+
+    Remember: Only use the provided data. If fields are null, mention data is unavailable."""
+
+    # Build message history with weather data appended to the last user message
+    response_messages = [{"role": "system", "content": instructions}]
+
+    for i, msg in enumerate(messages):
+        # Append weather data to the last user message
+        if i == len(messages) - 1 and msg.get("role") == "user":
+            response_messages.append(
+                {"role": "user", "content": msg.get("content") + weather_context}
+            )
+        else:
+            response_messages.append(
+                {"role": msg.get("role"), "content": msg.get("content")}
+            )
+
+    try:
+        ctx = extract(request.headers)
+        extra_headers = {"x-envoy-max-retries": "3"}
+        inject(extra_headers, context=ctx)
+
+        stream = await openai_client_via_plano.chat.completions.create(
+            model=WEATHER_MODEL,
+            messages=response_messages,
+            temperature=request_body.get("temperature", 0.7),
+            max_tokens=request_body.get("max_tokens", 1000),
+            stream=True,
+            extra_headers=extra_headers,
+        )
+
+        async for chunk in stream:
+            if chunk.choices:
+                yield f"data: {chunk.model_dump_json()}\n\n"
+
+        yield "data: [DONE]\n\n"
+
+    except Exception as e:
+        logger.error(f"Error generating weather response: {e}")
+        error_chunk = {
+            "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
+            "object": "chat.completion.chunk",
+            "created": int(time.time()),
+            "model": request_body.get("model", WEATHER_MODEL),
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {
+                        "content": "I apologize, but I'm having trouble retrieving weather information right now. Please try again."
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+        }
+        yield f"data: {json.dumps(error_chunk)}\n\n"
+        yield "data: [DONE]\n\n"
+
+
+@app.get("/health")
+async def health_check():
+    """Health check endpoint."""
+    return {"status": "healthy", "agent": "weather_forecast"}
+
+
+def start_server(host: str = "localhost", port: int = 10510):
+    """Start the REST server."""
+    uvicorn.run(
+        app,
+        host=host,
+        port=port,
+        log_config={
+            "version": 1,
+            "disable_existing_loggers": False,
+            "formatters": {
+                "default": {
+                    "format": "%(asctime)s - [WEATHER_AGENT] - %(levelname)s - %(message)s",
+                },
+            },
+            "handlers": {
+                "default": {
+                    "formatter": "default",
+                    "class": "logging.StreamHandler",
+                    "stream": "ext://sys.stdout",
+                },
+            },
+            "root": {
+                "level": "INFO",
+                "handlers": ["default"],
+            },
+        },
+    )
+
+
+if __name__ == "__main__":
+    start_server(host="0.0.0.0", port=10510)
--- a/docs/source/resources/includes/arch_config_full_reference.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference.yaml
@ -1,100 +1,110 @@
-version: v0.1

+# Arch Gateway configuration version
+version: v0.3.0
+
+
+# External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
+agents:
+  - id: weather_agent  # Example agent for weather
+    url: http://host.docker.internal:10510
+
+  - id: flight_agent   # Example agent for flights
+    url: http://host.docker.internal:10520
+
+
+# MCP filters applied to requests/responses (e.g., input validation, query rewriting)
+filters:
+  - id: input_guards  # Example filter for input validation
+    url: http://host.docker.internal:10500
+    # type: mcp (default)
+    # transport: streamable-http (default)
+    # tool: input_guards (default - same as filter id)
+
+
+# LLM provider configurations with API keys and model routing
+model_providers:
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+    default: true
+
+  - model: openai/gpt-4o-mini
+    access_key: $OPENAI_API_KEY
+
+  - model: anthropic/claude-sonnet-4-0
+    access_key: $ANTHROPIC_API_KEY
+
+  - model: mistral/ministral-3b-latest
+    access_key: $MISTRAL_API_KEY
+
+
+# Model aliases - use friendly names instead of full provider model names
+model_aliases:
+  fast-llm:
+    target: gpt-4o-mini
+
+  smart-llm:
+    target: gpt-4o
+
+
+# HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
 listeners:
-  ingress_traffic:
+  # Agent listener for routing requests to multiple agents
+  - type: agent
+    name: travel_booking_service
+    port: 8001
+    router: plano_orchestrator_v1
    address: 0.0.0.0
-    port: 10000
-    message_format: openai
-    timeout: 5s
-  egress_traffic:
+    agents:
+      - id: rag_agent
+        description: virtual assistant for retrieval augmented generation tasks
+        filter_chain:
+          - input_guards
+
+  # Model listener for direct LLM access
+  - type: model
+    name: model_1
    address: 0.0.0.0
    port: 12000
-    message_format: openai
-    timeout: 5s

-# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
+  # Prompt listener for function calling (for prompt_targets)
+  - type: prompt
+    name: prompt_function_listener
+    address: 0.0.0.0
+    port: 10000
+    # This listener is used for prompt_targets and function calling
+
+
+# Reusable service endpoints
 endpoints:
  app_server:
-    # value could be ip address or a hostname with port
-    # this could also be a list of endpoints for load balancing
-    # for example endpoint: [ ip1:port, ip2:port ]
    endpoint: 127.0.0.1:80
-    # max time to wait for a connection to be established
    connect_timeout: 0.005s

  mistral_local:
    endpoint: 127.0.0.1:8001

-  error_target:
-    endpoint: error_target_1
-
-# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
-llm_providers:
-  - name: openai/gpt-4o
-    access_key: $OPENAI_API_KEY
-    model: openai/gpt-4o
-    default: true
-
-  - access_key: $MISTRAL_API_KEY
-    model: mistral/mistral-8x7b
-
-  - model: mistral/mistral-7b-instruct
-    base_url: http://mistral_local
-
-# Model aliases - friendly names that map to actual provider names
-model_aliases:
-  # Alias for summarization tasks -> fast/cheap model
-  arch.summarize.v1:
-    target: gpt-4o
-
-  # Alias for general purpose tasks -> latest model
-  arch.v1:
-    target: mistral-8x7b
-
-# provides a way to override default settings for the arch system
-overrides:
-  # By default Arch uses an NLI + embedding approach to match an incoming prompt to a prompt target.
-  # The intent matching threshold is kept at 0.80, you can override this behavior if you would like
-  prompt_target_intent_matching_threshold: 0.60
-
-# default system prompt used by all prompt targets
-system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
-
-prompt_guards:
-  input_guards:
-    jailbreak:
-      on_exception:
-        message: Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.

+# Prompt targets for function calling and API orchestration
 prompt_targets:
-  - name: information_extraction
-    default: true
-    description: handel all scenarios that are question and answer in nature. Like summarization, information extraction, etc.
-    endpoint:
-      name: app_server
-      path: /agent/summary
-      http_method: POST
-    # Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM
-    auto_llm_dispatch_on_response: true
-    # override system prompt for this prompt target
-    system_prompt: You are a helpful information extraction assistant. Use the information that is provided to you.
-
-  - name: reboot_network_device
-    description: Reboot a specific network device
-    endpoint:
-      name: app_server
-      path: /agent/action
+  - name: get_current_weather
+    description: Get current weather at a location.
    parameters:
-      - name: device_id
-        type: str
-        description: Identifier of the network device to reboot.
+      - name: location
+        description: The location to get the weather for
        required: true
-      - name: confirmation
-        type: bool
-        description: Confirmation flag to proceed with reboot.
-        default: false
-        enum: [true, false]
+        type: string
+        format: City, State
+      - name: days
+        description: the number of days for the request
+        required: true
+        type: int
+    endpoint:
+      name: app_server
+      path: /weather
+      http_method: POST

+
+# OpenTelemetry tracing configuration
 tracing:
-  # sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.
-  sampling_rate: 0.1
+  # Random sampling percentage (1-100)
+  random_sampling: 100
--- a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
+++ b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml
@ -1,15 +1,50 @@
+agents:
+- id: weather_agent
+  url: http://host.docker.internal:10510
+- id: flight_agent
+  url: http://host.docker.internal:10520
 endpoints:
  app_server:
    connect_timeout: 0.005s
    endpoint: 127.0.0.1
    port: 80
-  error_target:
-    endpoint: error_target_1
-    port: 80
+  flight_agent:
+    endpoint: host.docker.internal
+    port: 10520
+    protocol: http
+  input_guards:
+    endpoint: host.docker.internal
+    port: 10500
+    protocol: http
  mistral_local:
    endpoint: 127.0.0.1
    port: 8001
+  weather_agent:
+    endpoint: host.docker.internal
+    port: 10510
+    protocol: http
+filters:
+- id: input_guards
+  url: http://host.docker.internal:10500
 listeners:
+- address: 0.0.0.0
+  agents:
+  - description: virtual assistant for retrieval augmented generation tasks
+    filter_chain:
+    - input_guards
+    id: rag_agent
+  name: travel_booking_service
+  port: 8001
+  router: plano_orchestrator_v1
+  type: agent
+- address: 0.0.0.0
+  name: model_1
+  port: 12000
+  type: model
+- address: 0.0.0.0
+  name: prompt_function_listener
+  port: 10000
+  type: prompt
 - address: 0.0.0.0
  model_providers:
  - access_key: $OPENAI_API_KEY
@ -17,49 +52,44 @@ listeners:
    model: gpt-4o
    name: openai/gpt-4o
    provider_interface: openai
+  - access_key: $OPENAI_API_KEY
+    model: gpt-4o-mini
+    name: openai/gpt-4o-mini
+    provider_interface: openai
+  - access_key: $ANTHROPIC_API_KEY
+    model: claude-sonnet-4-0
+    name: anthropic/claude-sonnet-4-0
+    provider_interface: anthropic
  - access_key: $MISTRAL_API_KEY
-    model: mistral-8x7b
-    name: mistral/mistral-8x7b
-    provider_interface: mistral
-  - base_url: http://mistral_local
-    cluster_name: mistral_mistral_local
-    endpoint: mistral_local
-    model: mistral-7b-instruct
-    name: mistral/mistral-7b-instruct
-    port: 80
-    protocol: http
+    model: ministral-3b-latest
+    name: mistral/ministral-3b-latest
    provider_interface: mistral
  name: egress_traffic
  port: 12000
-  timeout: 5s
+  timeout: 30s
  type: model_listener
- address: 0.0.0.0
-  name: ingress_traffic
-  port: 10000
-  timeout: 5s
-  type: prompt_listener
 model_aliases:
-  arch.summarize.v1:
+  fast-llm:
+    target: gpt-4o-mini
+  smart-llm:
    target: gpt-4o
-  arch.v1:
-    target: mistral-8x7b
 model_providers:
 - access_key: $OPENAI_API_KEY
  default: true
  model: gpt-4o
  name: openai/gpt-4o
  provider_interface: openai
+- access_key: $OPENAI_API_KEY
+  model: gpt-4o-mini
+  name: openai/gpt-4o-mini
+  provider_interface: openai
+- access_key: $ANTHROPIC_API_KEY
+  model: claude-sonnet-4-0
+  name: anthropic/claude-sonnet-4-0
+  provider_interface: anthropic
 - access_key: $MISTRAL_API_KEY
-  model: mistral-8x7b
-  name: mistral/mistral-8x7b
-  provider_interface: mistral
- base_url: http://mistral_local
-  cluster_name: mistral_mistral_local
-  endpoint: mistral_local
-  model: mistral-7b-instruct
-  name: mistral/mistral-7b-instruct
-  port: 80
-  protocol: http
+  model: ministral-3b-latest
+  name: mistral/ministral-3b-latest
  provider_interface: mistral
 - model: Arch-Function
  name: arch-function
@ -67,45 +97,23 @@ model_providers:
 - model: Plano-Orchestrator
  name: plano-orchestrator
  provider_interface: arch
-overrides:
-  prompt_target_intent_matching_threshold: 0.6
-prompt_guards:
-  input_guards:
-    jailbreak:
-      on_exception:
-        message: Looks like you're curious about my abilities, but I can only provide
-          assistance within my programmed parameters.
 prompt_targets:
- auto_llm_dispatch_on_response: true
-  default: true
-  description: handel all scenarios that are question and answer in nature. Like summarization,
-    information extraction, etc.
+- description: Get current weather at a location.
  endpoint:
    http_method: POST
    name: app_server
-    path: /agent/summary
-  name: information_extraction
-  system_prompt: You are a helpful information extraction assistant. Use the information
-    that is provided to you.
- description: Reboot a specific network device
-  endpoint:
-    name: app_server
-    path: /agent/action
-  name: reboot_network_device
+    path: /weather
+  name: get_current_weather
  parameters:
-  - description: Identifier of the network device to reboot.
-    name: device_id
+  - description: The location to get the weather for
+    format: City, State
+    name: location
    required: true
-    type: str
-  - default: false
-    description: Confirmation flag to proceed with reboot.
-    enum:
-    - true
-    - false
-    name: confirmation
-    type: bool
-system_prompt: You are a network assistant that just offers facts; not advice on manufacturers
-  or purchasing decisions.
+    type: string
+  - description: the number of days for the request
+    name: days
+    required: true
+    type: int
 tracing:
-  sampling_rate: 0.1
-version: v0.1
+  random_sampling: 100
+version: v0.3.0
--- a/docs/source/resources/includes/arch_config_state_storage_example.yaml
+++ b/docs/source/resources/includes/arch_config_state_storage_example.yaml
@ -1,14 +1,12 @@
 version: v0.1

 listeners:
-  egress_traffic:
+  - type: model
+    name: model_proxy_listener
    address: 0.0.0.0
    port: 12000
-    message_format: openai
-    timeout: 30s
-
-llm_providers:

+model_providers:
  # OpenAI Models
  - model: openai/gpt-5-mini-2025-08-07
    access_key: $OPENAI_API_KEY
--- a/docs/source/resources/includes/plano_config_agents_filters.yaml
+++ b/docs/source/resources/includes/plano_config_agents_filters.yaml
@ -0,0 +1,41 @@
+version: v0.3.0
+
+agents:
+  - id: rag_agent
+    url: http://host.docker.internal:10505
+
+filters:
+  - id: query_rewriter
+    url: http://host.docker.internal:10501
+    # type: mcp # default is mcp
+    # transport: streamable-http # default is streamable-http
+    # tool: query_rewriter # default name is the filter id
+  - id: context_builder
+    url: http://host.docker.internal:10502
+
+model_providers:
+  - model: openai/gpt-4o-mini
+    access_key: $OPENAI_API_KEY
+    default: true
+  - model: openai/gpt-4o
+    access_key: $OPENAI_API_KEY
+
+model_aliases:
+  fast-llm:
+    target: gpt-4o-mini
+  smart-llm:
+    target: gpt-4o
+
+listeners:
+  - type: agent
+    name: agent_1
+    port: 8001
+    router: arch_agent_router
+    agents:
+      - id: rag_agent
+        description: virtual assistant for retrieval augmented generation tasks
+        filter_chain:
+          - query_rewriter
+          - context_builder
+tracing:
+  random_sampling: 100