From 680dee60a0302979af90a6268b60c10ea31ae0ef Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil.hafeez@gmail.com>
Date: Wed, 11 Mar 2026 16:46:03 -0700
Subject: [PATCH] update orchestrator model name

---
 demos/agent_orchestration/travel_agents/README.md      |  2 +-
 .../travel_agents/config_local_orchestrator.yaml       |  4 ++--
 docs/source/guides/orchestration.rst                   | 10 ++++++----
 3 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/demos/agent_orchestration/travel_agents/README.md b/demos/agent_orchestration/travel_agents/README.md
index 9ae46dde..239ba938 100644
--- a/demos/agent_orchestration/travel_agents/README.md
+++ b/demos/agent_orchestration/travel_agents/README.md
@@ -141,7 +141,7 @@ vllm serve katanemo/Plano-Orchestrator-4B \
     --gpu-memory-utilization 0.3 \
     --tokenizer katanemo/Plano-Orchestrator-4B \
     --chat-template chat_template.jinja \
-    --served-model-name Plano-Orchestrator \
+    --served-model-name katanemo/Plano-Orchestrator-4B \
     --enable-prefix-caching
 ```
 
diff --git a/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml b/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml
index d448bf3b..226fc4a7 100644
--- a/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml
+++ b/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml
@@ -1,7 +1,7 @@
 version: v0.3.0
 
 overrides:
-  orchestrator_model: plano/Plano-Orchestrator
+  orchestrator_model: plano/katanemo/Plano-Orchestrator-4B
 
 agents:
   - id: weather_agent
@@ -10,7 +10,7 @@ agents:
     url: http://localhost:10520
 
 model_providers:
-  - model: plano/Plano-Orchestrator
+  - model: plano/katanemo/Plano-Orchestrator-4B
     base_url: http://localhost:8000
 
   - model: openai/gpt-5.2
diff --git a/docs/source/guides/orchestration.rst b/docs/source/guides/orchestration.rst
index 9205bae7..2c32cf02 100644
--- a/docs/source/guides/orchestration.rst
+++ b/docs/source/guides/orchestration.rst
@@ -379,7 +379,7 @@ Using vLLM
            --gpu-memory-utilization 0.3 \
            --tokenizer katanemo/Plano-Orchestrator-4B \
            --chat-template chat_template.jinja \
-           --served-model-name Plano-Orchestrator \
+           --served-model-name katanemo/Plano-Orchestrator-4B \
            --enable-prefix-caching
 
    For the 30B-A3B-FP8 model (production):
@@ -394,18 +394,20 @@ Using vLLM
            --tokenizer katanemo/Plano-Orchestrator-30B-A3B-FP8 \
            --chat-template chat_template.jinja \
            --max-model-len 32768 \
-           --served-model-name Plano-Orchestrator \
+           --served-model-name katanemo/Plano-Orchestrator-30B-A3B-FP8 \
            --enable-prefix-caching
 
 4. **Configure Plano to use the local orchestrator**
 
+   Use the model name matching your ``--served-model-name``:
+
    .. code-block:: yaml
 
        overrides:
-         orchestrator_model: plano/Plano-Orchestrator
+         orchestrator_model: plano/katanemo/Plano-Orchestrator-4B
 
        model_providers:
-         - model: plano/Plano-Orchestrator
+         - model: plano/katanemo/Plano-Orchestrator-4B
            base_url: http://<your-server-ip>:8000
 
 5. **Verify the server is running**