From 680dee60a0302979af90a6268b60c10ea31ae0ef Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Wed, 11 Mar 2026 16:46:03 -0700 Subject: [PATCH] update orchestrator model name --- demos/agent_orchestration/travel_agents/README.md | 2 +- .../travel_agents/config_local_orchestrator.yaml | 4 ++-- docs/source/guides/orchestration.rst | 10 ++++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/demos/agent_orchestration/travel_agents/README.md b/demos/agent_orchestration/travel_agents/README.md index 9ae46dde..239ba938 100644 --- a/demos/agent_orchestration/travel_agents/README.md +++ b/demos/agent_orchestration/travel_agents/README.md @@ -141,7 +141,7 @@ vllm serve katanemo/Plano-Orchestrator-4B \ --gpu-memory-utilization 0.3 \ --tokenizer katanemo/Plano-Orchestrator-4B \ --chat-template chat_template.jinja \ - --served-model-name Plano-Orchestrator \ + --served-model-name katanemo/Plano-Orchestrator-4B \ --enable-prefix-caching ``` diff --git a/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml b/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml index d448bf3b..226fc4a7 100644 --- a/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml +++ b/demos/agent_orchestration/travel_agents/config_local_orchestrator.yaml @@ -1,7 +1,7 @@ version: v0.3.0 overrides: - orchestrator_model: plano/Plano-Orchestrator + orchestrator_model: plano/katanemo/Plano-Orchestrator-4B agents: - id: weather_agent @@ -10,7 +10,7 @@ agents: url: http://localhost:10520 model_providers: - - model: plano/Plano-Orchestrator + - model: plano/katanemo/Plano-Orchestrator-4B base_url: http://localhost:8000 - model: openai/gpt-5.2 diff --git a/docs/source/guides/orchestration.rst b/docs/source/guides/orchestration.rst index 9205bae7..2c32cf02 100644 --- a/docs/source/guides/orchestration.rst +++ b/docs/source/guides/orchestration.rst @@ -379,7 +379,7 @@ Using vLLM --gpu-memory-utilization 0.3 \ --tokenizer katanemo/Plano-Orchestrator-4B \ --chat-template chat_template.jinja \ - --served-model-name Plano-Orchestrator \ + --served-model-name katanemo/Plano-Orchestrator-4B \ --enable-prefix-caching For the 30B-A3B-FP8 model (production): @@ -394,18 +394,20 @@ Using vLLM --tokenizer katanemo/Plano-Orchestrator-30B-A3B-FP8 \ --chat-template chat_template.jinja \ --max-model-len 32768 \ - --served-model-name Plano-Orchestrator \ + --served-model-name katanemo/Plano-Orchestrator-30B-A3B-FP8 \ --enable-prefix-caching 4. **Configure Plano to use the local orchestrator** + Use the model name matching your ``--served-model-name``: + .. code-block:: yaml overrides: - orchestrator_model: plano/Plano-Orchestrator + orchestrator_model: plano/katanemo/Plano-Orchestrator-4B model_providers: - - model: plano/Plano-Orchestrator + - model: plano/katanemo/Plano-Orchestrator-4B base_url: http://:8000 5. **Verify the server is running**