From 50c5ec92464efbae0d23777092f9e3ecddb65c0b Mon Sep 17 00:00:00 2001
From: Shuguang Chen <54548843+nehcgs@users.noreply.github.com>
Date: Wed, 26 Nov 2025 15:57:23 -0800
Subject: [PATCH] orchestration integration

---
 crates/Cargo.lock                             |   1 +
 crates/brightstaff/src/router/mod.rs          |   2 +
 .../src/router/orchestrator_model.rs          |  30 +
 .../src/router/orchestrator_model_v1.rs       | 960 ++++++++++++++++++
 crates/common/Cargo.toml                      |   2 +-
 crates/common/src/configuration.rs            |  33 +
 6 files changed, 1027 insertions(+), 1 deletion(-)
 create mode 100644 crates/brightstaff/src/router/orchestrator_model.rs
 create mode 100644 crates/brightstaff/src/router/orchestrator_model_v1.rs
diff --git a/crates/Cargo.lock b/crates/Cargo.lock
index 5797d5a2..88696a1d 100644
--- a/crates/Cargo.lock
+++ b/crates/Cargo.lock
@@ -2450,6 +2450,7 @@ version = "1.0.140"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
 dependencies = [
+ "indexmap 2.9.0",
  "itoa",
  "memchr",
  "ryu",
diff --git a/crates/brightstaff/src/router/mod.rs b/crates/brightstaff/src/router/mod.rs
index e35ea731..b1299477 100644
--- a/crates/brightstaff/src/router/mod.rs
+++ b/crates/brightstaff/src/router/mod.rs
@@ -1,3 +1,5 @@
 pub mod llm_router;
+pub mod orchestrator_model;
+pub mod orchestrator_model_v1;
 pub mod router_model;
 pub mod router_model_v1;
diff --git a/crates/brightstaff/src/router/orchestrator_model.rs b/crates/brightstaff/src/router/orchestrator_model.rs
new file mode 100644
index 00000000..19c78ca3
--- /dev/null
+++ b/crates/brightstaff/src/router/orchestrator_model.rs
@@ -0,0 +1,30 @@
+use common::configuration::AgentUsagePreference;
+use hermesllm::apis::openai::{ChatCompletionsRequest, Message};
+use thiserror::Error;
+
+#[derive(Debug, Error)]
+pub enum OrchestratorModelError {
+    #[error("Failed to parse JSON: {0}")]
+    JsonError(#[from] serde_json::Error),
+}
+
+pub type Result<T> = std::result::Result<T, OrchestratorModelError>;
+
+/// OrchestratorModel trait for handling orchestration requests.
+/// Unlike RouterModel which returns a single route, OrchestratorModel
+/// can return multiple routes as the model output format is:
+/// {"route": ["route_name_1", "route_name_2", ...]}
+pub trait OrchestratorModel: Send + Sync {
+    fn generate_request(
+        &self,
+        messages: &[Message],
+        usage_preferences: &Option<Vec<AgentUsagePreference>>,
+    ) -> ChatCompletionsRequest;
+    /// Returns a vector of (route_name, model_name) tuples for all matched routes.
+    fn parse_response(
+        &self,
+        content: &str,
+        usage_preferences: &Option<Vec<AgentUsagePreference>>,
+    ) -> Result<Option<Vec<(String, String)>>>;
+    fn get_model_name(&self) -> String;
+}
diff --git a/crates/brightstaff/src/router/orchestrator_model_v1.rs b/crates/brightstaff/src/router/orchestrator_model_v1.rs
new file mode 100644
index 00000000..eeba4ec0
--- /dev/null
+++ b/crates/brightstaff/src/router/orchestrator_model_v1.rs
@@ -0,0 +1,960 @@
+use std::collections::HashMap;
+
+use common::configuration::{AgentUsagePreference, OrchestrationPreference};
+use hermesllm::apis::openai::{ChatCompletionsRequest, Message, MessageContent, Role};
+use serde::{Deserialize, Serialize, ser::Serialize as SerializeTrait};
+use tracing::{debug, warn};
+
+use super::orchestrator_model::{OrchestratorModel, OrchestratorModelError};
+pub const ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT: &str = r#"
+You are a helpful assistant that selects the most suitable routes based on user intent.
+You are provided with a list of available routes enclosed within <routes></routes> XML tags:
+<routes>
+{routes}
+</routes>
+
+You are also given the conversation context enclosed within <conversation></conversation> XML tags:
+<conversation>
+{conversation}
+</conversation>
+
+## Instructions
+1. Analyze the latest user intent from the conversation.
+2. Compare it against the available routes to find which routes can help fulfill the request.
+3. Respond only with the exact route names from <routes>.
+4. If no routes can help or the intent is already fulfilled, return an empty list.
+
+## Response Format
+Return your answer strictly in JSON as follows:
+{{"route": ["route_name_1", "route_name_2", "..."]}}
+If no routes are needed, return an empty list for `route`.
+"#;
+
+pub type Result<T> = std::result::Result<T, OrchestratorModelError>;
+pub struct OrchestratorModelV1 {
+    agent_orchestration_json_str: String,
+    agent_orchestration_to_model_map: HashMap<String, String>,
+    orchestration_model: String,
+    max_token_length: usize,
+}
+/// Convert compact JSON parameters to Python-style spacing
+/// e.g. {"type":"object","properties":{},"required":[]} -> {"type": "object", "properties": {}, "required": []}
+fn to_python_style_parameters(json_str: &str) -> String {
+    json_str.replace(
+        r#""parameters":{"type":"object","properties":{},"required":[]}"#,
+        r#""parameters":{"type": "object", "properties": {}, "required": []}"#,
+    )
+}
+
+impl OrchestratorModelV1 {
+    pub fn new(
+        agent_orchestrations: HashMap<String, Vec<OrchestrationPreference>>,
+        orchestration_model: String,
+        max_token_length: usize,
+    ) -> Self {
+        let agent_orchestration_values: Vec<OrchestrationPreference> =
+            agent_orchestrations.values().flatten().cloned().collect();
+        // Format routes: each route as compact JSON on its own line with Python-style spacing for parameters
+        let agent_orchestration_json_str = agent_orchestration_values
+            .iter()
+            .map(|pref| to_python_style_parameters(&serde_json::to_string(pref).unwrap_or_default()))
+            .collect::<Vec<String>>()
+            .join("\n");
+        let agent_orchestration_to_model_map: HashMap<String, String> = agent_orchestrations
+            .iter()
+            .flat_map(|(model, prefs)| prefs.iter().map(|pref| (pref.name.clone(), model.clone())))
+            .collect();
+
+        OrchestratorModelV1 {
+            orchestration_model,
+            max_token_length,
+            agent_orchestration_json_str,
+            agent_orchestration_to_model_map,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct AgentOrchestratorResponse {
+    /// The route field now expects an array of route names: ["route_name_1", "route_name_2", ...]
+    pub route: Option<Vec<String>>,
+}
+
+const TOKEN_LENGTH_DIVISOR: usize = 4; // Approximate token length divisor for UTF-8 characters
+
+impl OrchestratorModel for OrchestratorModelV1 {
+    fn generate_request(
+        &self,
+        messages: &[Message],
+        usage_preferences_from_request: &Option<Vec<AgentUsagePreference>>,
+    ) -> ChatCompletionsRequest {
+        // remove system prompt, tool calls, tool call response and messages without content
+        // if content is empty its likely a tool call
+        // when role == tool its tool call response
+        let messages_vec = messages
+            .iter()
+            .filter(|m| {
+                m.role != Role::System && m.role != Role::Tool && !m.content.to_string().is_empty()
+            })
+            .collect::<Vec<&Message>>();
+
+        // Following code is to ensure that the conversation does not exceed max token length
+        // Note: we use a simple heuristic to estimate token count based on character length to optimize for performance
+        let mut token_count = ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT.len() / TOKEN_LENGTH_DIVISOR;
+        let mut selected_messages_list_reversed: Vec<&Message> = vec![];
+        for (selected_messsage_count, message) in messages_vec.iter().rev().enumerate() {
+            let message_token_count = message.content.to_string().len() / TOKEN_LENGTH_DIVISOR;
+            token_count += message_token_count;
+            if token_count > self.max_token_length {
+                debug!(
+                      "OrchestratorModelV1: token count {} exceeds max token length {}, truncating conversation, selected message count {}, total message count: {}",
+                      token_count,
+                      self.max_token_length
+                      , selected_messsage_count,
+                      messages_vec.len()
+                  );
+                if message.role == Role::User {
+                    // If message that exceeds max token length is from user, we need to keep it
+                    selected_messages_list_reversed.push(message);
+                }
+                break;
+            }
+            // If we are here, it means that the message is within the max token length
+            selected_messages_list_reversed.push(message);
+        }
+
+        if selected_messages_list_reversed.is_empty() {
+            debug!(
+                "OrchestratorModelV1: no messages selected, using the last message in the conversation"
+            );
+            if let Some(last_message) = messages_vec.last() {
+                selected_messages_list_reversed.push(last_message);
+            }
+        }
+
+        // ensure that first and last selected message is from user
+        // Note: selected_messages_list_reversed is in reverse order, so:
+        // - first() is the last message in the original conversation
+        // - last() is the first message in the original conversation
+        if let Some(first_message) = selected_messages_list_reversed.first() {
+            if first_message.role != Role::User {
+                warn!("OrchestratorModelV1: last message in the conversation is not from user, this may lead to incorrect orchestration");
+            }
+        }
+        if let Some(last_message) = selected_messages_list_reversed.last() {
+            if last_message.role != Role::User {
+                warn!("OrchestratorModelV1: first message in the selected conversation is not from user, this may lead to incorrect orchestration");
+            }
+        }
+
+        // Reverse the selected messages to maintain the conversation order
+        let selected_conversation_list = selected_messages_list_reversed
+            .iter()
+            .rev()
+            .map(|message| {
+                Message {
+                    role: message.role.clone(),
+                    // we can unwrap here because we have already filtered out messages without content
+                    content: MessageContent::Text(message.content.to_string()),
+                    name: None,
+                    tool_calls: None,
+                    tool_call_id: None,
+                }
+            })
+            .collect::<Vec<Message>>();
+
+        // Generate the orchestrator request message based on the usage preferences.
+        // If preferences are passed in request then we use them otherwise we use the default orchestration modelpreferences.
+        let orchestrator_message = match convert_to_orchestrator_preferences(usage_preferences_from_request) {
+            Some(prefs) => generate_orchestrator_message(&prefs, &selected_conversation_list),
+            None => generate_orchestrator_message(&self.agent_orchestration_json_str, &selected_conversation_list),
+        };
+
+        ChatCompletionsRequest {
+            model: self.orchestration_model.clone(),
+            messages: vec![Message {
+                content: MessageContent::Text(orchestrator_message),
+                role: Role::User,
+                name: None,
+                tool_calls: None,
+                tool_call_id: None,
+            }],
+            temperature: Some(0.01),
+            ..Default::default()
+        }
+    }
+
+    fn parse_response(
+        &self,
+        content: &str,
+        usage_preferences: &Option<Vec<AgentUsagePreference>>,
+    ) -> Result<Option<Vec<(String, String)>>> {
+        if content.is_empty() {
+            return Ok(None);
+        }
+        let orchestrator_resp_fixed = fix_json_response(content);
+        let orchestrator_response: AgentOrchestratorResponse = serde_json::from_str(orchestrator_resp_fixed.as_str())?;
+
+        let selected_routes = orchestrator_response.route.unwrap_or_default();
+
+        // Filter out empty routes
+        let valid_routes: Vec<String> = selected_routes
+            .into_iter()
+            .filter(|route| !route.is_empty())
+            .collect();
+
+        if valid_routes.is_empty() {
+            return Ok(None);
+        }
+
+        let mut result: Vec<(String, String)> = Vec::new();
+
+        if let Some(usage_preferences) = usage_preferences {
+            // If usage preferences are defined, we need to find the model that matches each selected route
+            for selected_route in valid_routes {
+                let model_name: Option<String> = usage_preferences
+                    .iter()
+                    .find(|pref| {
+                        pref.orchestration_preferences
+                            .iter()
+                            .any(|orchestration_pref| orchestration_pref.name == selected_route)
+                    })
+                    .map(|pref| pref.model.clone());
+
+                if let Some(model_name) = model_name {
+                    result.push((selected_route, model_name));
+                } else {
+                    warn!(
+                        "No matching model found for route: {}, usage preferences: {:?}",
+                        selected_route, usage_preferences
+                    );
+                }
+            }
+        } else {
+            // If no usage preferences are passed in request then use the default orchestration model preferences
+            for selected_route in valid_routes {
+                if let Some(model) = self.agent_orchestration_to_model_map.get(&selected_route).cloned() {
+                    result.push((selected_route, model));
+                } else {
+                    warn!(
+                        "No model found for route: {}, orchestrator model preferences: {:?}",
+                        selected_route, self.agent_orchestration_to_model_map
+                    );
+                }
+            }
+        }
+
+        if result.is_empty() {
+            return Ok(None);
+        }
+
+        Ok(Some(result))
+    }
+
+    fn get_model_name(&self) -> String {
+        self.orchestration_model.clone()
+    }
+}
+
+fn generate_orchestrator_message(prefs: &str, selected_conversation_list: &Vec<Message>) -> String {
+    // Format conversation with 4-space indentation (equivalent to Python's json.dumps(obj, indent=4))
+    let formatter = serde_json::ser::PrettyFormatter::with_indent(b"    ");
+    let mut conversation_buf = Vec::new();
+    let mut serializer = serde_json::Serializer::with_formatter(&mut conversation_buf, formatter);
+    SerializeTrait::serialize(&selected_conversation_list, &mut serializer).unwrap();
+    let conversation_json = String::from_utf8(conversation_buf).unwrap_or_default();
+
+    ARCH_ORCHESTRATOR_V1_SYSTEM_PROMPT
+        .replace("{routes}", prefs)
+        .replace("{conversation}", &conversation_json)
+}
+
+fn convert_to_orchestrator_preferences(
+    prefs_from_request: &Option<Vec<AgentUsagePreference>>,
+) -> Option<String> {
+    if let Some(usage_preferences) = prefs_from_request {
+        let orchestration_preferences: Vec<OrchestrationPreference> = usage_preferences
+            .iter()
+            .flat_map(|pref| {
+                pref.orchestration_preferences
+                    .iter()
+                    .map(|orchestration_pref| OrchestrationPreference {
+                        name: orchestration_pref.name.clone(),
+                        description: orchestration_pref.description.clone(),
+                    })
+            })
+            .collect();
+
+        // Format routes: each route as compact JSON on its own line with Python-style spacing for parameters
+        let routes_str = orchestration_preferences
+            .iter()
+            .map(|pref| to_python_style_parameters(&serde_json::to_string(pref).unwrap_or_default()))
+            .collect::<Vec<String>>()
+            .join("\n");
+
+        return Some(routes_str);
+    }
+
+    None
+}
+
+fn fix_json_response(body: &str) -> String {
+    body.replace("'", "\"").replace("\\n", "")
+}
+
+impl std::fmt::Debug for dyn OrchestratorModel {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "OrchestratorModel")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn test_system_prompt_format() {
+        let expected_prompt = r#"
+You are a helpful assistant that selects the most suitable routes based on user intent.
+You are provided with a list of available routes enclosed within <routes></routes> XML tags:
+<routes>
+{"name":"Image generation","description":"generating image","parameters":{"type": "object", "properties": {}, "required": []}}
+</routes>
+
+You are also given the conversation context enclosed within <conversation></conversation> XML tags:
+<conversation>
+[
+    {
+        "role": "user",
+        "content": "hi"
+    },
+    {
+        "role": "assistant",
+        "content": "Hello! How can I assist you today?"
+    },
+    {
+        "role": "user",
+        "content": "given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson"
+    }
+]
+</conversation>
+
+## Instructions
+1. Analyze the latest user intent from the conversation.
+2. Compare it against the available routes to find which routes can help fulfill the request.
+3. Respond only with the exact route names from <routes>.
+4. If no routes can help or the intent is already fulfilled, return an empty list.
+
+## Response Format
+Return your answer strictly in JSON as follows:
+{{"route": ["route_name_1", "route_name_2", "..."]}}
+If no routes are needed, return an empty list for `route`.
+"#;
+        let orchestrations_str = r#"
+          {
+            "gpt-4o": [
+              {"name": "Image generation", "description": "generating image"}
+            ]
+        }
+        "#;
+        let agent_orchestrations =
+            serde_json::from_str::<HashMap<String, Vec<OrchestrationPreference>>>(orchestrations_str).unwrap();
+        let orchestration_model = "test-model".to_string();
+        let orchestrator = OrchestratorModelV1::new(agent_orchestrations, orchestration_model.clone(), usize::MAX);
+
+        let conversation_str = r#"
+                    [
+                        {
+                            "role": "user",
+                            "content": "hi"
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "Hello! How can I assist you today?"
+                        },
+                        {
+                            "role": "user",
+                            "content": "given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson"
+                        }
+                    ]
+        "#;
+        let conversation: Vec<Message> = serde_json::from_str(conversation_str).unwrap();
+
+        let req = orchestrator.generate_request(&conversation, &None);
+
+        let prompt = req.messages[0].content.to_string();
+
+        assert_eq!(expected_prompt, prompt);
+    }
+
+    #[test]
+    fn test_system_prompt_format_usage_preferences() {
+        let expected_prompt = r#"
+You are a helpful assistant that selects the most suitable routes based on user intent.
+You are provided with a list of available routes enclosed within <routes></routes> XML tags:
+<routes>
+{"name":"code-generation","description":"generating new code snippets, functions, or boilerplate based on user prompts or requirements","parameters":{"type": "object", "properties": {}, "required": []}}
+</routes>
+
+You are also given the conversation context enclosed within <conversation></conversation> XML tags:
+<conversation>
+[
+    {
+        "role": "user",
+        "content": "hi"
+    },
+    {
+        "role": "assistant",
+        "content": "Hello! How can I assist you today?"
+    },
+    {
+        "role": "user",
+        "content": "given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson"
+    }
+]
+</conversation>
+
+## Instructions
+1. Analyze the latest user intent from the conversation.
+2. Compare it against the available routes to find which routes can help fulfill the request.
+3. Respond only with the exact route names from <routes>.
+4. If no routes can help or the intent is already fulfilled, return an empty list.
+
+## Response Format
+Return your answer strictly in JSON as follows:
+{{"route": ["route_name_1", "route_name_2", "..."]}}
+If no routes are needed, return an empty list for `route`.
+"#;
+        // Empty orchestrations map - not used when usage_preferences are provided
+        let agent_orchestrations: HashMap<String, Vec<OrchestrationPreference>> = HashMap::new();
+        let orchestration_model = "test-model".to_string();
+        let orchestrator = OrchestratorModelV1::new(agent_orchestrations, orchestration_model.clone(), usize::MAX);
+
+        let conversation_str = r#"
+                    [
+                        {
+                            "role": "user",
+                            "content": "hi"
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "Hello! How can I assist you today?"
+                        },
+                        {
+                            "role": "user",
+                            "content": "given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson"
+                        }
+                    ]
+        "#;
+        let conversation: Vec<Message> = serde_json::from_str(conversation_str).unwrap();
+
+        let usage_preferences = Some(vec![AgentUsagePreference {
+            model: "claude/claude-3-7-sonnet".to_string(),
+            orchestration_preferences: vec![OrchestrationPreference {
+                name: "code-generation".to_string(),
+                description: "generating new code snippets, functions, or boilerplate based on user prompts or requirements".to_string(),
+            }],
+        }]);
+        let req = orchestrator.generate_request(&conversation, &usage_preferences);
+
+        let prompt = req.messages[0].content.to_string();
+
+        assert_eq!(expected_prompt, prompt);
+    }
+
+    #[test]
+    fn test_conversation_exceed_token_count() {
+        let expected_prompt = r#"
+You are a helpful assistant that selects the most suitable routes based on user intent.
+You are provided with a list of available routes enclosed within <routes></routes> XML tags:
+<routes>
+{"name":"Image generation","description":"generating image","parameters":{"type": "object", "properties": {}, "required": []}}
+</routes>
+
+You are also given the conversation context enclosed within <conversation></conversation> XML tags:
+<conversation>
+[
+    {
+        "role": "user",
+        "content": "given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson"
+    }
+]
+</conversation>
+
+## Instructions
+1. Analyze the latest user intent from the conversation.
+2. Compare it against the available routes to find which routes can help fulfill the request.
+3. Respond only with the exact route names from <routes>.
+4. If no routes can help or the intent is already fulfilled, return an empty list.
+
+## Response Format
+Return your answer strictly in JSON as follows:
+{{"route": ["route_name_1", "route_name_2", "..."]}}
+If no routes are needed, return an empty list for `route`.
+"#;
+
+        let orchestrations_str = r#"
+          {
+            "gpt-4o": [
+              {"name": "Image generation", "description": "generating image"}
+            ]
+        }
+        "#;
+        let agent_orchestrations =
+            serde_json::from_str::<HashMap<String, Vec<OrchestrationPreference>>>(orchestrations_str).unwrap();
+        let orchestration_model = "test-model".to_string();
+        let orchestrator = OrchestratorModelV1::new(agent_orchestrations, orchestration_model.clone(), 235);
+
+        let conversation_str = r#"
+                    [
+                        {
+                            "role": "user",
+                            "content": "hi"
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "Hello! How can I assist you today?"
+                        },
+                        {
+                            "role": "user",
+                            "content": "given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson"
+                        }
+                    ]
+        "#;
+
+        let conversation: Vec<Message> = serde_json::from_str(conversation_str).unwrap();
+
+        let req = orchestrator.generate_request(&conversation, &None);
+
+        let prompt = req.messages[0].content.to_string();
+
+        assert_eq!(expected_prompt, prompt);
+    }
+
+    #[test]
+    fn test_conversation_exceed_token_count_large_single_message() {
+        let expected_prompt = r#"
+You are a helpful assistant that selects the most suitable routes based on user intent.
+You are provided with a list of available routes enclosed within <routes></routes> XML tags:
+<routes>
+{"name":"Image generation","description":"generating image","parameters":{"type": "object", "properties": {}, "required": []}}
+</routes>
+
+You are also given the conversation context enclosed within <conversation></conversation> XML tags:
+<conversation>
+[
+    {
+        "role": "user",
+        "content": "given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson and this is a very long message that exceeds the max token length of the routing model, so it should be truncated and only the last user message should be included in the conversation for routing."
+    }
+]
+</conversation>
+
+## Instructions
+1. Analyze the latest user intent from the conversation.
+2. Compare it against the available routes to find which routes can help fulfill the request.
+3. Respond only with the exact route names from <routes>.
+4. If no routes can help or the intent is already fulfilled, return an empty list.
+
+## Response Format
+Return your answer strictly in JSON as follows:
+{{"route": ["route_name_1", "route_name_2", "..."]}}
+If no routes are needed, return an empty list for `route`.
+"#;
+
+        let orchestrations_str = r#"
+          {
+            "gpt-4o": [
+              {"name": "Image generation", "description": "generating image"}
+            ]
+        }
+        "#;
+        let agent_orchestrations =
+            serde_json::from_str::<HashMap<String, Vec<OrchestrationPreference>>>(orchestrations_str).unwrap();
+
+        let orchestration_model = "test-model".to_string();
+        let orchestrator = OrchestratorModelV1::new(agent_orchestrations, orchestration_model.clone(), 200);
+
+        let conversation_str = r#"
+                    [
+                        {
+                            "role": "user",
+                            "content": "hi"
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "Hello! How can I assist you today?"
+                        },
+                        {
+                            "role": "user",
+                            "content": "given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson and this is a very long message that exceeds the max token length of the routing model, so it should be truncated and only the last user message should be included in the conversation for routing."
+                        }
+                    ]
+        "#;
+
+        let conversation: Vec<Message> = serde_json::from_str(conversation_str).unwrap();
+
+        let req = orchestrator.generate_request(&conversation, &None);
+
+        let prompt = req.messages[0].content.to_string();
+
+        assert_eq!(expected_prompt, prompt);
+    }
+
+    #[test]
+    fn test_conversation_trim_upto_user_message() {
+        let expected_prompt = r#"
+You are a helpful assistant that selects the most suitable routes based on user intent.
+You are provided with a list of available routes enclosed within <routes></routes> XML tags:
+<routes>
+{"name":"Image generation","description":"generating image","parameters":{"type": "object", "properties": {}, "required": []}}
+</routes>
+
+You are also given the conversation context enclosed within <conversation></conversation> XML tags:
+<conversation>
+[
+    {
+        "role": "user",
+        "content": "given the image In style of Andy Warhol"
+    },
+    {
+        "role": "assistant",
+        "content": "ok here is the image"
+    },
+    {
+        "role": "user",
+        "content": "pls give me another image about Bart and Lisa"
+    }
+]
+</conversation>
+
+## Instructions
+1. Analyze the latest user intent from the conversation.
+2. Compare it against the available routes to find which routes can help fulfill the request.
+3. Respond only with the exact route names from <routes>.
+4. If no routes can help or the intent is already fulfilled, return an empty list.
+
+## Response Format
+Return your answer strictly in JSON as follows:
+{{"route": ["route_name_1", "route_name_2", "..."]}}
+If no routes are needed, return an empty list for `route`.
+"#;
+
+        let orchestrations_str = r#"
+          {
+            "gpt-4o": [
+              {"name": "Image generation", "description": "generating image"}
+            ]
+        }
+        "#;
+        let agent_orchestrations =
+            serde_json::from_str::<HashMap<String, Vec<OrchestrationPreference>>>(orchestrations_str).unwrap();
+        let orchestration_model = "test-model".to_string();
+        let orchestrator = OrchestratorModelV1::new(agent_orchestrations, orchestration_model.clone(), 230);
+
+        let conversation_str = r#"
+                    [
+                        {
+                            "role": "user",
+                            "content": "hi"
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "Hello! How can I assist you today?"
+                        },
+                        {
+                            "role": "user",
+                            "content": "given the image In style of Andy Warhol"
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "ok here is the image"
+                        },
+                        {
+                            "role": "user",
+                            "content": "pls give me another image about Bart and Lisa"
+                        }
+                    ]
+        "#;
+
+        let conversation: Vec<Message> = serde_json::from_str(conversation_str).unwrap();
+
+        let req = orchestrator.generate_request(&conversation, &None);
+
+        let prompt = req.messages[0].content.to_string();
+
+        assert_eq!(expected_prompt, prompt);
+    }
+
+    #[test]
+    fn test_non_text_input() {
+        let expected_prompt = r#"
+You are a helpful assistant that selects the most suitable routes based on user intent.
+You are provided with a list of available routes enclosed within <routes></routes> XML tags:
+<routes>
+{"name":"Image generation","description":"generating image","parameters":{"type": "object", "properties": {}, "required": []}}
+</routes>
+
+You are also given the conversation context enclosed within <conversation></conversation> XML tags:
+<conversation>
+[
+    {
+        "role": "user",
+        "content": "hi"
+    },
+    {
+        "role": "assistant",
+        "content": "Hello! How can I assist you today?"
+    },
+    {
+        "role": "user",
+        "content": "given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson"
+    }
+]
+</conversation>
+
+## Instructions
+1. Analyze the latest user intent from the conversation.
+2. Compare it against the available routes to find which routes can help fulfill the request.
+3. Respond only with the exact route names from <routes>.
+4. If no routes can help or the intent is already fulfilled, return an empty list.
+
+## Response Format
+Return your answer strictly in JSON as follows:
+{{"route": ["route_name_1", "route_name_2", "..."]}}
+If no routes are needed, return an empty list for `route`.
+"#;
+        let orchestrations_str = r#"
+          {
+            "gpt-4o": [
+              {"name": "Image generation", "description": "generating image"}
+            ]
+        }
+        "#;
+        let agent_orchestrations =
+            serde_json::from_str::<HashMap<String, Vec<OrchestrationPreference>>>(orchestrations_str).unwrap();
+        let orchestration_model = "test-model".to_string();
+        let orchestrator = OrchestratorModelV1::new(agent_orchestrations, orchestration_model.clone(), usize::MAX);
+
+        let conversation_str = r#"
+                    [
+                        {
+                            "role": "user",
+                            "content": [
+                              {
+                                "type": "text",
+                                "text": "hi"
+                              },
+                              {
+                                "type": "image_url",
+                                "image_url": {
+                                  "url": "https://example.com/image.png"
+                                }
+                              }
+                            ]
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "Hello! How can I assist you today?"
+                        },
+                        {
+                            "role": "user",
+                            "content": "given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson"
+                        }
+                    ]
+        "#;
+        let conversation: Vec<Message> = serde_json::from_str(conversation_str).unwrap();
+
+        let req = orchestrator.generate_request(&conversation, &None);
+
+        let prompt = req.messages[0].content.to_string();
+
+        assert_eq!(expected_prompt, prompt);
+    }
+
+    #[test]
+    fn test_skip_tool_call() {
+        let expected_prompt = r#"
+You are a helpful assistant that selects the most suitable routes based on user intent.
+You are provided with a list of available routes enclosed within <routes></routes> XML tags:
+<routes>
+{"name":"Image generation","description":"generating image","parameters":{"type": "object", "properties": {}, "required": []}}
+</routes>
+
+You are also given the conversation context enclosed within <conversation></conversation> XML tags:
+<conversation>
+[
+    {
+        "role": "user",
+        "content": "What's the weather like in Tokyo?"
+    },
+    {
+        "role": "assistant",
+        "content": "The current weather in Tokyo is 22°C and sunny."
+    },
+    {
+        "role": "user",
+        "content": "What about in New York?"
+    }
+]
+</conversation>
+
+## Instructions
+1. Analyze the latest user intent from the conversation.
+2. Compare it against the available routes to find which routes can help fulfill the request.
+3. Respond only with the exact route names from <routes>.
+4. If no routes can help or the intent is already fulfilled, return an empty list.
+
+## Response Format
+Return your answer strictly in JSON as follows:
+{{"route": ["route_name_1", "route_name_2", "..."]}}
+If no routes are needed, return an empty list for `route`.
+"#;
+        let orchestrations_str = r#"
+          {
+            "gpt-4o": [
+              {"name": "Image generation", "description": "generating image"}
+            ]
+        }
+        "#;
+        let agent_orchestrations =
+            serde_json::from_str::<HashMap<String, Vec<OrchestrationPreference>>>(orchestrations_str).unwrap();
+        let orchestration_model = "test-model".to_string();
+        let orchestrator = OrchestratorModelV1::new(agent_orchestrations, orchestration_model.clone(), usize::MAX);
+
+        let conversation_str = r#"
+                                                [
+                                                  {
+                                                    "role": "user",
+                                                    "content": "What's the weather like in Tokyo?"
+                                                  },
+                                                  {
+                                                    "role": "assistant",
+                                                    "content": "",
+                                                    "tool_calls": [
+                                                      {
+                                                        "id": "toolcall-abc123",
+                                                        "type": "function",
+                                                        "function": {
+                                                          "name": "get_weather",
+                                                          "arguments": "{ \"location\": \"Tokyo\" }"
+                                                        }
+                                                      }
+                                                    ]
+                                                  },
+                                                  {
+                                                    "role": "tool",
+                                                    "tool_call_id": "toolcall-abc123",
+                                                    "content": "{ \"temperature\": \"22°C\", \"condition\": \"Sunny\" }"
+                                                  },
+                                                  {
+                                                    "role": "assistant",
+                                                    "content": "The current weather in Tokyo is 22°C and sunny."
+                                                  },
+                                                  {
+                                                    "role": "user",
+                                                    "content": "What about in New York?"
+                                                  }
+                                                ]
+        "#;
+
+        // expects conversation to look like this
+
+        // [
+        //   {
+        //     "role": "user",
+        //     "content": "What's the weather like in Tokyo?"
+        //   },
+        //   {
+        //     "role": "assistant",
+        //     "content": "The current weather in Tokyo is 22°C and sunny."
+        //   },
+        //   {
+        //     "role": "user",
+        //     "content": "What about in New York?"
+        //   }
+        // ]
+
+        let conversation: Vec<Message> = serde_json::from_str(conversation_str).unwrap();
+
+        let req: ChatCompletionsRequest = orchestrator.generate_request(&conversation, &None);
+
+        let prompt = req.messages[0].content.to_string();
+
+        assert_eq!(expected_prompt, prompt);
+    }
+
+    #[test]
+    fn test_parse_response() {
+        let orchestrations_str = r#"
+          {
+            "gpt-4o": [
+              {"name": "Image generation", "description": "generating image"},
+              {"name": "Code generation", "description": "generating code"}
+            ]
+        }
+        "#;
+        let agent_orchestrations =
+            serde_json::from_str::<HashMap<String, Vec<OrchestrationPreference>>>(orchestrations_str).unwrap();
+
+        let orchestrator = OrchestratorModelV1::new(agent_orchestrations, "test-model".to_string(), 2000);
+
+        // Case 1: Valid JSON with single route in array
+        let input = r#"{"route": ["Image generation"]}"#;
+        let result = orchestrator.parse_response(input, &None).unwrap();
+        assert_eq!(
+            result,
+            Some(vec![("Image generation".to_string(), "gpt-4o".to_string())])
+        );
+
+        // Case 2: Valid JSON with multiple routes in array
+        let input = r#"{"route": ["Image generation", "Code generation"]}"#;
+        let result = orchestrator.parse_response(input, &None).unwrap();
+        assert_eq!(
+            result,
+            Some(vec![
+                ("Image generation".to_string(), "gpt-4o".to_string()),
+                ("Code generation".to_string(), "gpt-4o".to_string())
+            ])
+        );
+
+        // Case 3: Valid JSON with empty array
+        let input = r#"{"route": []}"#;
+        let result = orchestrator.parse_response(input, &None).unwrap();
+        assert_eq!(result, None);
+
+        // Case 4: Valid JSON with null route
+        let input = r#"{"route": null}"#;
+        let result = orchestrator.parse_response(input, &None).unwrap();
+        assert_eq!(result, None);
+
+        // Case 5: JSON missing route field
+        let input = r#"{}"#;
+        let result = orchestrator.parse_response(input, &None).unwrap();
+        assert_eq!(result, None);
+
+        // Case 5.1: empty string
+        let input = r#""#;
+        let result = orchestrator.parse_response(input, &None).unwrap();
+        assert_eq!(result, None);
+
+        // Case 6: Malformed JSON
+        let input = r#"{"route": ["route1""#; // missing closing ]
+        let result = orchestrator.parse_response(input, &None);
+        assert!(result.is_err());
+
+        // Case 7: Single quotes and \n in JSON
+        let input = "{'route': ['Image generation']}\\n";
+        let result = orchestrator.parse_response(input, &None).unwrap();
+        assert_eq!(
+            result,
+            Some(vec![("Image generation".to_string(), "gpt-4o".to_string())])
+        );
+
+        // Case 8: Array with unknown route (not in orchestrations map)
+        let input = r#"{"route": ["Unknown route"]}"#;
+        let result = orchestrator.parse_response(input, &None).unwrap();
+        assert_eq!(result, None);
+    }
+}
diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml
index aa95e2e4..a619f02c 100644
--- a/crates/common/Cargo.toml
+++ b/crates/common/Cargo.toml
@@ -14,7 +14,7 @@ derivative = "2.2.0"
 thiserror = "1.0.64"
 tiktoken-rs = "0.5.9"
 rand = "0.8.5"
-serde_json = "1.0"
+serde_json = { version = "1.0", features = ["preserve_order"] }
 hex = "0.4.3"
 urlencoding = "2.1.3"
 url = "2.5.4"
diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs
index 27f8ebd9..18b65f88 100644
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@@ -252,6 +252,39 @@ pub struct RoutingPreference {
     pub description: String,
 }
 
+#[derive(Serialize, Deserialize, Debug)]
+pub struct AgentUsagePreference {
+    pub model: String,
+    pub orchestration_preferences: Vec<OrchestrationPreference>,
+}
+
+/// OrchestrationPreference with custom serialization to always include default parameters.
+/// The parameters field is always serialized as:
+/// {"type": "object", "properties": {}, "required": []}
+#[derive(Debug, Clone, Deserialize)]
+pub struct OrchestrationPreference {
+    pub name: String,
+    pub description: String,
+}
+
+impl serde::Serialize for OrchestrationPreference {
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+        let mut state = serializer.serialize_struct("OrchestrationPreference", 3)?;
+        state.serialize_field("name", &self.name)?;
+        state.serialize_field("description", &self.description)?;
+        state.serialize_field("parameters", &serde_json::json!({
+            "type": "object",
+            "properties": {},
+            "required": []
+        }))?;
+        state.end()
+    }
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 //TODO: use enum for model, but if there is a new model, we need to update the code
 pub struct LlmProvider {