Overhaul demos directory: cleanup, restructure, and standardize configs (#760)

2026-04-25 00:36:34 +02:00 · 2026-02-17 03:09:28 -08:00 · 2026-02-17 03:09:28 -08:00 · 473996d35d
commit 473996d35d
parent c3591bcbf3
205 changed files with 304 additions and 5223 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -426,7 +426,7 @@ jobs:
          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
        run: |
          source venv/bin/activate
-          cd demos/shared/test_runner && sh run_demo_tests.sh use_cases/preference_based_routing
+          cd demos/shared/test_runner && sh run_demo_tests.sh llm_routing/preference_based_routing

  # ──────────────────────────────────────────────
  # E2E: demo — currency conversion
@ -476,4 +476,4 @@ jobs:
          GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
        run: |
          source venv/bin/activate
-          cd demos/shared/test_runner && sh run_demo_tests.sh samples_python/currency_exchange
+          cd demos/shared/test_runner && sh run_demo_tests.sh advanced/currency_exchange
--- a/README.md
+++ b/README.md
@ -45,7 +45,7 @@ Plano pulls rote plumbing out of your framework so you can stay focused on what

 Plano handles **orchestration, model management, and observability** as modular building blocks - letting you configure only what you need (edge proxying for agentic orchestration and guardrails, or LLM routing from your services, or both together) to fit cleanly into existing architectures. Below is a simple multi-agent travel agent built with Plano that showcases all three core capabilities

-> 📁 **Full working code:** See [`demos/use_cases/travel_agents/`](demos/use_cases/travel_agents/) for complete weather and flight agents you can run locally.
+> 📁 **Full working code:** See [`demos/agent_orchestration/travel_agents/`](demos/agent_orchestration/travel_agents/) for complete weather and flight agents you can run locally.



@ -113,7 +113,7 @@ async def chat(request: Request):
    days = 7

    # Your agent logic: fetch data, call APIs, run tools
-    # See demos/use_cases/travel_agents/ for the full implementation
+    # See demos/agent_orchestration/travel_agents/ for the full implementation
    weather_data = await get_weather_data(request, messages, days)

    # Stream the response back through Plano
--- a/cli/planoai/templates/coding_agent_routing.yaml
+++ b/cli/planoai/templates/coding_agent_routing.yaml
@ -1,13 +1,6 @@
-version: v0.1
+version: v0.3.0

-listeners:
-  egress_traffic:
-    address: 0.0.0.0
-    port: 12000
-    message_format: openai
-    timeout: 30s
-
-llm_providers:
+model_providers:
  # OpenAI Models
  - model: openai/gpt-5-2025-08-07
    access_key: $OPENAI_API_KEY
@ -39,5 +32,10 @@ model_aliases:
  arch.claude.code.small.fast:
    target: claude-haiku-4-5

+listeners:
+  - type: model
+    name: model_listener
+    port: 12000
+
 tracing:
  random_sampling: 100
--- a/cli/planoai/templates/conversational_state_v1_responses.yaml
+++ b/cli/planoai/templates/conversational_state_v1_responses.yaml
@ -1,14 +1,10 @@
-version: v0.1
+version: v0.3.0

-listeners:
-  egress_traffic:
-    address: 0.0.0.0
-    port: 12000
-    message_format: openai
-    timeout: 30s
-
-llm_providers:
+agents:
+  - id: assistant
+    url: http://localhost:10510

+model_providers:
  # OpenAI Models
  - model: openai/gpt-5-mini-2025-08-07
    access_key: $OPENAI_API_KEY
@ -18,8 +14,23 @@ llm_providers:
  - model: anthropic/claude-sonnet-4-20250514
    access_key: $ANTHROPIC_API_KEY

+listeners:
+  - type: agent
+    name: conversation_service
+    port: 8001
+    router: plano_orchestrator_v1
+    agents:
+      - id: assistant
+        description: |
+          A conversational assistant that maintains context across multi-turn
+          conversations. It can answer follow-up questions, remember previous
+          context, and provide coherent responses in ongoing dialogues.
+
 # State storage configuration for v1/responses API
 # Manages conversation state for multi-turn conversations
 state_storage:
  # Type: memory | postgres
  type: memory
+
+tracing:
+  random_sampling: 100
--- a/cli/planoai/templates/preference_aware_routing.yaml
+++ b/cli/planoai/templates/preference_aware_routing.yaml
@ -1,13 +1,6 @@
-version: v0.1.0
+version: v0.3.0

-listeners:
-  egress_traffic:
-    address: 0.0.0.0
-    port: 12000
-    message_format: openai
-    timeout: 30s
-
-llm_providers:
+model_providers:

  - model: openai/gpt-4o-mini
    access_key: $OPENAI_API_KEY
@ -25,5 +18,10 @@ llm_providers:
      - name: code generation
        description: generating new code snippets, functions, or boilerplate based on user prompts or requirements

+listeners:
+  - type: model
+    name: model_listener
+    port: 12000
+
 tracing:
  random_sampling: 100
--- a/cli/planoai/utils.py
+++ b/cli/planoai/utils.py
@ -154,7 +154,10 @@ def convert_legacy_listeners(
                )
            listener["model_providers"] = model_providers or []
            model_provider_set = True
-            llm_gateway_listener = listener
+            # Merge user listener values into defaults for the Envoy template
+            llm_gateway_listener = {**llm_gateway_listener, **listener}
+        elif listener.get("type") == "prompt":
+            prompt_gateway_listener = {**prompt_gateway_listener, **listener}
    if not model_provider_set:
        listeners.append(llm_gateway_listener)

--- a/cli/test/test_init.py
+++ b/cli/test/test_init.py
@ -26,7 +26,7 @@ def test_init_template_builtin_writes_config(tmp_path, monkeypatch):
    config_path = tmp_path / "config.yaml"
    assert config_path.exists()
    config_text = config_path.read_text(encoding="utf-8")
-    assert "llm_providers:" in config_text
+    assert "model_providers:" in config_text


 def test_init_refuses_overwrite_without_force(tmp_path, monkeypatch):
--- a/config/docker-compose.dev.yaml
+++ b/config/docker-compose.dev.yaml
@ -8,7 +8,7 @@ services:
      - "12000:12000"
      - "19901:9901"
    volumes:
-      - ${PLANO_CONFIG_FILE:-../demos/samples_python/weather_forecast/plano_config.yaml}:/app/plano_config.yaml
+      - ${PLANO_CONFIG_FILE:-../demos/getting_started/weather_forecast/plano_config.yaml}:/app/plano_config.yaml
      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
      - ./envoy.template.yaml:/app/envoy.template.yaml
      - ./plano_config_schema.yaml:/app/plano_config_schema.yaml
--- a/demos/samples_python/currency_exchange/README.md
+++ b/demos/samples_python/currency_exchange/README.md
--- a/demos/samples_python/currency_exchange/config.yaml
+++ b/demos/samples_python/currency_exchange/config.yaml
@ -1,13 +1,11 @@
-version: v0.1.0
+version: v0.3.0

 listeners:
-  ingress_traffic:
-    address: 0.0.0.0
+  - type: prompt
+    name: prompt_listener
    port: 10000
-    message_format: openai
-    timeout: 30s

-llm_providers:
+model_providers:
  - model: openai/gpt-4o-mini
    access_key: $OPENAI_API_KEY
    default: true
--- a/demos/advanced/currency_exchange/docker-compose.yaml
+++ b/demos/advanced/currency_exchange/docker-compose.yaml
@ -0,0 +1,25 @@
+services:
+  anythingllm:
+    image: mintplexlabs/anythingllm
+    restart: always
+    ports:
+      - "3001:3001"
+    cap_add:
+      - SYS_ADMIN
+    environment:
+      - STORAGE_DIR=/app/server/storage
+      - LLM_PROVIDER=generic-openai
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:10000/v1
+      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
+      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
+      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+
+  jaeger:
+    build:
+      context: ../../shared/jaeger
+    ports:
+      - "16686:16686"
+      - "4317:4317"
+      - "4318:4318"
--- a/demos/samples_python/currency_exchange/hurl_tests/simple.hurl
+++ b/demos/samples_python/currency_exchange/hurl_tests/simple.hurl
--- a/demos/samples_python/currency_exchange/hurl_tests/simple_stream.hurl
+++ b/demos/samples_python/currency_exchange/hurl_tests/simple_stream.hurl
--- a/demos/samples_java/weather_forcecast_service/run_demo.sh
+++ b/demos/samples_java/weather_forcecast_service/run_demo.sh
--- a/demos/samples_python/currency_exchange/test_data.yaml
+++ b/demos/samples_python/currency_exchange/test_data.yaml
--- a/demos/use_cases/model_choice_with_test_harness/README.md
+++ b/demos/use_cases/model_choice_with_test_harness/README.md
--- a/demos/use_cases/model_choice_with_test_harness/bench.py
+++ b/demos/use_cases/model_choice_with_test_harness/bench.py
--- a/demos/use_cases/model_choice_with_test_harness/evals_summarize.yaml
+++ b/demos/use_cases/model_choice_with_test_harness/evals_summarize.yaml
--- a/demos/use_cases/model_choice_with_test_harness/plano_config_with_aliases.yaml
+++ b/demos/use_cases/model_choice_with_test_harness/plano_config_with_aliases.yaml
@ -1,13 +1,11 @@
-version: v0.1.0
+version: v0.3.0

 listeners:
-  egress_traffic:
-    address: 0.0.0.0
+  - type: model
+    name: model_listener
    port: 12000
-    message_format: openai
-    timeout: 30s

-llm_providers:
+model_providers:
  - model: openai/gpt-4o-mini
    access_key: $OPENAI_API_KEY
    default: true
@ -20,3 +18,6 @@ model_aliases:
    target: gpt-4o-mini
  arch.reason.v1:
    target: o3
+
+tracing:
+  random_sampling: 100
--- a/demos/use_cases/model_choice_with_test_harness/pyproject.toml
+++ b/demos/use_cases/model_choice_with_test_harness/pyproject.toml
--- a/demos/use_cases/model_choice_with_test_harness/run_demo.sh
+++ b/demos/use_cases/model_choice_with_test_harness/run_demo.sh
--- a/demos/use_cases/model_choice_with_test_harness/uv.lock
+++ b/demos/use_cases/model_choice_with_test_harness/uv.lock
--- a/demos/samples_python/multi_turn_rag_agent/Dockerfile
+++ b/demos/samples_python/multi_turn_rag_agent/Dockerfile
--- a/demos/samples_python/multi_turn_rag_agent/README.md
+++ b/demos/samples_python/multi_turn_rag_agent/README.md
--- a/demos/samples_python/multi_turn_rag_agent/config.yaml
+++ b/demos/samples_python/multi_turn_rag_agent/config.yaml
@ -1,18 +1,16 @@
-version: v0.1.0
+version: v0.3.0

 listeners:
-  ingress_traffic:
-    address: 0.0.0.0
+  - type: prompt
+    name: prompt_listener
    port: 10000
-    message_format: openai
-    timeout: 30s

 endpoints:
  rag_energy_source_agent:
    endpoint: host.docker.internal:18083
    connect_timeout: 0.005s

-llm_providers:
+model_providers:
  - access_key: $OPENAI_API_KEY
    model: openai/gpt-4o-mini
    default: true
--- a/demos/advanced/multi_turn_rag/docker-compose.yaml
+++ b/demos/advanced/multi_turn_rag/docker-compose.yaml
@ -0,0 +1,28 @@
+services:
+  rag_energy_source_agent:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "18083:80"
+    healthcheck:
+        test: ["CMD", "curl" ,"http://localhost:80/healthz"]
+        interval: 5s
+        retries: 20
+
+  anythingllm:
+    image: mintplexlabs/anythingllm
+    restart: always
+    ports:
+      - "3001:3001"
+    cap_add:
+      - SYS_ADMIN
+    environment:
+      - STORAGE_DIR=/app/server/storage
+      - LLM_PROVIDER=generic-openai
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:10000/v1
+      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
+      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
+      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
--- a/demos/samples_python/multi_turn_rag_agent/main.py
+++ b/demos/samples_python/multi_turn_rag_agent/main.py
--- a/demos/samples_python/multi_turn_rag_agent/mutli-turn-example.png
+++ b/demos/samples_python/multi_turn_rag_agent/mutli-turn-example.png
--- a/demos/samples_python/multi_turn_rag_agent/requirements.txt
+++ b/demos/samples_python/multi_turn_rag_agent/requirements.txt
--- a/demos/samples_python/multi_turn_rag_agent/run_demo.sh
+++ b/demos/samples_python/multi_turn_rag_agent/run_demo.sh
--- a/demos/samples_python/stock_quote/README.md
+++ b/demos/samples_python/stock_quote/README.md
--- a/demos/samples_python/stock_quote/config.yaml
+++ b/demos/samples_python/stock_quote/config.yaml
@ -1,13 +1,11 @@
-version: v0.1.0
+version: v0.3.0

 listeners:
-  ingress_traffic:
-    address: 0.0.0.0
+  - type: prompt
+    name: prompt_listener
    port: 10000
-    message_format: openai
-    timeout: 30s

-llm_providers:
+model_providers:
  - access_key: $OPENAI_API_KEY
    model: openai/gpt-4o

--- a/demos/advanced/stock_quote/docker-compose.yaml
+++ b/demos/advanced/stock_quote/docker-compose.yaml
@ -0,0 +1,25 @@
+services:
+  anythingllm:
+    image: mintplexlabs/anythingllm
+    restart: always
+    ports:
+      - "3001:3001"
+    cap_add:
+      - SYS_ADMIN
+    environment:
+      - STORAGE_DIR=/app/server/storage
+      - LLM_PROVIDER=generic-openai
+      - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:10000/v1
+      - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini
+      - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000
+      - GENERIC_OPEN_AI_API_KEY=sk-placeholder
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+
+  jaeger:
+    build:
+      context: ../../shared/jaeger
+    ports:
+      - "16686:16686"
+      - "4317:4317"
+      - "4318:4318"
--- a/demos/samples_python/currency_exchange/run_demo.sh
+++ b/demos/samples_python/currency_exchange/run_demo.sh
--- a/demos/samples_python/stock_quote/stock_quote_demo.png
+++ b/demos/samples_python/stock_quote/stock_quote_demo.png
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/Dockerfile
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/Dockerfile
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/README.md
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/README.md
@ -37,7 +37,7 @@ Plano acts as a **framework-agnostic proxy and data plane** that:

 ```bash
 # From the demo directory
-cd demos/use_cases/multi_agent_with_crewai_langchain
+cd demos/agent_orchestration/multi_agent_crewai_langchain

 # Build and start all services
 docker-compose up -d
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/config.yaml
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/config.yaml
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/crewai/flight_agent.py
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/crewai/flight_agent.py
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/docker-compose.yaml
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/langchain/weather_agent.py
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/langchain/weather_agent.py
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/openai_protocol.py
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/openai_protocol.py
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/pyproject.toml
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/pyproject.toml
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/traces.png
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/traces.png
--- a/demos/agent_orchestration/multi_agent_crewai_langchain/uv.lock
+++ b/demos/agent_orchestration/multi_agent_crewai_langchain/uv.lock
--- a/demos/agent_orchestration/travel_agents/Dockerfile
+++ b/demos/agent_orchestration/travel_agents/Dockerfile
--- a/demos/agent_orchestration/travel_agents/README.md
+++ b/demos/agent_orchestration/travel_agents/README.md
--- a/demos/agent_orchestration/travel_agents/config.yaml
+++ b/demos/agent_orchestration/travel_agents/config.yaml
--- a/demos/agent_orchestration/travel_agents/docker-compose.yaml
+++ b/demos/agent_orchestration/travel_agents/docker-compose.yaml
--- a/demos/agent_orchestration/travel_agents/pyproject.toml
+++ b/demos/agent_orchestration/travel_agents/pyproject.toml
--- a/demos/agent_orchestration/travel_agents/src/travel_agents/flight_agent.py
+++ b/demos/agent_orchestration/travel_agents/src/travel_agents/flight_agent.py
--- a/demos/agent_orchestration/travel_agents/src/travel_agents/weather_agent.py
+++ b/demos/agent_orchestration/travel_agents/src/travel_agents/weather_agent.py
--- a/demos/agent_orchestration/travel_agents/test.rest
+++ b/demos/agent_orchestration/travel_agents/test.rest
--- a/demos/agent_orchestration/travel_agents/tracing.png
+++ b/demos/agent_orchestration/travel_agents/tracing.png
--- a/demos/agent_orchestration/travel_agents/travel_agent_request.rest
+++ b/demos/agent_orchestration/travel_agents/travel_agent_request.rest
--- a/demos/agent_orchestration/travel_agents/uv.lock
+++ b/demos/agent_orchestration/travel_agents/uv.lock
--- a/demos/filter_chains/http_filter/Dockerfile
+++ b/demos/filter_chains/http_filter/Dockerfile
--- a/demos/filter_chains/http_filter/README.md
+++ b/demos/filter_chains/http_filter/README.md
--- a/demos/filter_chains/http_filter/config.yaml
+++ b/demos/filter_chains/http_filter/config.yaml
--- a/demos/filter_chains/http_filter/docker-compose.yaml
+++ b/demos/filter_chains/http_filter/docker-compose.yaml
--- a/demos/filter_chains/http_filter/http.rest
+++ b/demos/filter_chains/http_filter/http.rest
--- a/demos/filter_chains/http_filter/mcp_query.rest
+++ b/demos/filter_chains/http_filter/mcp_query.rest
--- a/demos/filter_chains/http_filter/pyproject.toml
+++ b/demos/filter_chains/http_filter/pyproject.toml
--- a/demos/filter_chains/http_filter/sample_queries.md
+++ b/demos/filter_chains/http_filter/sample_queries.md
--- a/demos/filter_chains/http_filter/src/rag_agent/init.py
+++ b/demos/filter_chains/http_filter/src/rag_agent/init.py
--- a/demos/filter_chains/http_filter/src/rag_agent/main.py
+++ b/demos/filter_chains/http_filter/src/rag_agent/main.py
--- a/demos/filter_chains/http_filter/src/rag_agent/api.py
+++ b/demos/filter_chains/http_filter/src/rag_agent/api.py
--- a/demos/filter_chains/http_filter/src/rag_agent/context_builder.py
+++ b/demos/filter_chains/http_filter/src/rag_agent/context_builder.py
--- a/demos/filter_chains/http_filter/src/rag_agent/input_guards.py
+++ b/demos/filter_chains/http_filter/src/rag_agent/input_guards.py
--- a/demos/filter_chains/http_filter/src/rag_agent/query_rewriter.py
+++ b/demos/filter_chains/http_filter/src/rag_agent/query_rewriter.py
--- a/demos/filter_chains/http_filter/src/rag_agent/rag_agent.py
+++ b/demos/filter_chains/http_filter/src/rag_agent/rag_agent.py
--- a/demos/filter_chains/http_filter/src/rag_agent/sample_knowledge_base.csv
+++ b/demos/filter_chains/http_filter/src/rag_agent/sample_knowledge_base.csv
--- a/demos/filter_chains/http_filter/start_agents.sh
+++ b/demos/filter_chains/http_filter/start_agents.sh
--- a/demos/filter_chains/http_filter/test.rest
+++ b/demos/filter_chains/http_filter/test.rest
--- a/demos/filter_chains/http_filter/uv.lock
+++ b/demos/filter_chains/http_filter/uv.lock
--- a/demos/filter_chains/mcp_filter/Dockerfile
+++ b/demos/filter_chains/mcp_filter/Dockerfile
--- a/demos/filter_chains/mcp_filter/README.md
+++ b/demos/filter_chains/mcp_filter/README.md
--- a/demos/filter_chains/mcp_filter/config.yaml
+++ b/demos/filter_chains/mcp_filter/config.yaml
--- a/demos/filter_chains/mcp_filter/docker-compose.yaml
+++ b/demos/filter_chains/mcp_filter/docker-compose.yaml
--- a/demos/filter_chains/mcp_filter/mcp_query.rest
+++ b/demos/filter_chains/mcp_filter/mcp_query.rest
--- a/demos/filter_chains/mcp_filter/pyproject.toml
+++ b/demos/filter_chains/mcp_filter/pyproject.toml
--- a/demos/filter_chains/mcp_filter/sample_queries.md
+++ b/demos/filter_chains/mcp_filter/sample_queries.md
--- a/demos/filter_chains/mcp_filter/src/rag_agent/init.py
+++ b/demos/filter_chains/mcp_filter/src/rag_agent/init.py
--- a/demos/filter_chains/mcp_filter/src/rag_agent/main.py
+++ b/demos/filter_chains/mcp_filter/src/rag_agent/main.py
--- a/demos/filter_chains/mcp_filter/src/rag_agent/api.py
+++ b/demos/filter_chains/mcp_filter/src/rag_agent/api.py
--- a/demos/filter_chains/mcp_filter/src/rag_agent/context_builder.py
+++ b/demos/filter_chains/mcp_filter/src/rag_agent/context_builder.py
--- a/demos/filter_chains/mcp_filter/src/rag_agent/input_guards.py
+++ b/demos/filter_chains/mcp_filter/src/rag_agent/input_guards.py
--- a/demos/filter_chains/mcp_filter/src/rag_agent/query_rewriter.py
+++ b/demos/filter_chains/mcp_filter/src/rag_agent/query_rewriter.py
--- a/demos/filter_chains/mcp_filter/src/rag_agent/rag_agent.py
+++ b/demos/filter_chains/mcp_filter/src/rag_agent/rag_agent.py
--- a/demos/filter_chains/mcp_filter/src/rag_agent/sample_knowledge_base.csv
+++ b/demos/filter_chains/mcp_filter/src/rag_agent/sample_knowledge_base.csv
--- a/demos/filter_chains/mcp_filter/start_agents.sh
+++ b/demos/filter_chains/mcp_filter/start_agents.sh
--- a/demos/filter_chains/mcp_filter/test.rest
+++ b/demos/filter_chains/mcp_filter/test.rest
--- a/demos/filter_chains/mcp_filter/uv.lock
+++ b/demos/filter_chains/mcp_filter/uv.lock
--- a/demos/getting_started/llm_gateway/README.md
+++ b/demos/getting_started/llm_gateway/README.md
@ -7,7 +7,7 @@ This demo shows how you can use Plano gateway to manage keys and route to upstre
   ```sh
   sh run_demo.sh
   ```
-1. Navigate to http://localhost:18080/
+1. Navigate to http://localhost:3001/

 Following screen shows an example of interaction with Plano gateway showing dynamic routing. You can select between different LLMs using "override model" option in the chat UI.

@ -32,7 +32,7 @@ $ curl --header 'Content-Type: application/json' \
      "messages": {
        "role": "assistant",
        "tool_calls": null,
-        "content": "Hello! How can I assist you today? Let's chat about anything you'd like. 😊"
+        "content": "Hello! How can I assist you today? Let's chat about anything you'd like."
      },
      "finish_reason": "stop"
    }
@ -47,11 +47,7 @@ $ curl --header 'Content-Type: application/json' \
 ```

 # Observability
-Plano gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from Plano and we are using grafana to visualize the stats in dashboard. To see grafana dashboard follow instructions below,
-
-1. Navigate to http://localhost:3000/ to open grafana UI (use admin/grafana as credentials)
-1. From grafana left nav click on dashboards and select "Intelligent Gateway Overview" to view Plano gateway stats
-1. For tracing you can head over to http://localhost:16686/ to view recent traces.
+For tracing you can head over to http://localhost:16686/ to view recent traces.

 Following is a screenshot of tracing UI showing call received by Plano gateway and making upstream call to LLM,

--- a/demos/getting_started/llm_gateway/config.yaml
+++ b/demos/getting_started/llm_gateway/config.yaml
--- a/demos/getting_started/llm_gateway/docker-compose.yaml
+++ b/demos/getting_started/llm_gateway/docker-compose.yaml
@ -37,13 +37,3 @@ services:
      - "16686:16686"
      - "4317:4317"
      - "4318:4318"
-
-  prometheus:
-    build:
-      context: ../../shared/prometheus
-
-  grafana:
-    build:
-      context: ../../shared/grafana
-    ports:
-      - "3000:3000"
--- a/demos/getting_started/llm_gateway/jaeger_tracing_llm_routing.png
+++ b/demos/getting_started/llm_gateway/jaeger_tracing_llm_routing.png
--- a/demos/getting_started/llm_gateway/llm_routing_demo.png
+++ b/demos/getting_started/llm_gateway/llm_routing_demo.png
--- a/demos/getting_started/llm_gateway/run_demo.sh
+++ b/demos/getting_started/llm_gateway/run_demo.sh
--- a/demos/getting_started/weather_forecast/Dockerfile
+++ b/demos/getting_started/weather_forecast/Dockerfile
--- a/Show more
+++ b/Show more