From 0d190a6e5c84b4b4e1496c1bc19a62ebd6e6ccc6 Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil@katanemo.com>
Date: Fri, 30 May 2025 17:40:46 -0700
Subject: [PATCH] update code to use new json based system prompt for routing
 (#493)

---
 crates/brightstaff/src/main.rs                |  25 +-
 crates/brightstaff/src/router/llm_router.rs   |  22 +-
 .../brightstaff/src/router/router_model_v1.rs | 595 +++++++++++-------
 crates/common/src/api/hallucination.rs        |   1 -
 crates/common/src/api/open_ai.rs              |  18 +
 crates/common/src/configuration.rs            |  19 +
 crates/common/src/tokenizer.rs                |   8 +-
 crates/prompt_gateway/src/http_context.rs     |   5 +-
 crates/prompt_gateway/tests/integration.rs    |   3 +-
 .../preference_based_routing/arch_config.yaml |  13 +-
 .../docker-compose.yaml                       |   2 +-
 .../test_router_endpoint.rest                 |   5 +-
 12 files changed, 433 insertions(+), 283 deletions(-)
diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs
index 8eb2d7e2..5502c983 100644
--- a/crates/brightstaff/src/main.rs
+++ b/crates/brightstaff/src/main.rs
@@ -101,20 +101,16 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
                             .with_context(parent_cx)
                             .await
                     }
-                    (&Method::GET, "/v1/models") => {
-                        Ok(list_models(llm_providers).await)
-                    }
+                    (&Method::GET, "/v1/models") => Ok(list_models(llm_providers).await),
                     (&Method::OPTIONS, "/v1/models") => {
                         let mut response = Response::new(empty());
                         *response.status_mut() = StatusCode::NO_CONTENT;
-                        response.headers_mut().insert(
-                            "Allow",
-                            "GET, OPTIONS".parse().unwrap(),
-                        );
-                        response.headers_mut().insert(
-                            "Access-Control-Allow-Origin",
-                            "*".parse().unwrap(),
-                        );
+                        response
+                            .headers_mut()
+                            .insert("Allow", "GET, OPTIONS".parse().unwrap());
+                        response
+                            .headers_mut()
+                            .insert("Access-Control-Allow-Origin", "*".parse().unwrap());
                         response.headers_mut().insert(
                             "Access-Control-Allow-Headers",
                             "Authorization, Content-Type".parse().unwrap(),
@@ -123,10 +119,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
                             "Access-Control-Allow-Methods",
                             "GET, POST, OPTIONS".parse().unwrap(),
                         );
-                        response.headers_mut().insert(
-                            "Content-Type",
-                            "application/json".parse().unwrap(),
-                        );
+                        response
+                            .headers_mut()
+                            .insert("Content-Type", "application/json".parse().unwrap());
 
                         Ok(response)
                     }
diff --git a/crates/brightstaff/src/router/llm_router.rs b/crates/brightstaff/src/router/llm_router.rs
index 48851184..d4158388 100644
--- a/crates/brightstaff/src/router/llm_router.rs
+++ b/crates/brightstaff/src/router/llm_router.rs
@@ -2,7 +2,7 @@ use std::sync::Arc;
 
 use common::{
     api::open_ai::{ChatCompletionsResponse, ContentType, Message},
-    configuration::LlmProvider,
+    configuration::{LlmProvider, LlmRoute},
     consts::ARCH_PROVIDER_HINT_HEADER,
 };
 use hyper::header;
@@ -47,26 +47,10 @@ impl RouterService {
             .cloned()
             .collect::<Vec<LlmProvider>>();
 
-        // convert the llm_providers to yaml string but only include name and usage
-        let llm_providers_with_usage_yaml = providers_with_usage
-            .iter()
-            .map(|provider| {
-                format!(
-                    "- name: {}\n  description: {}",
-                    provider.name,
-                    provider.usage.as_ref().unwrap_or(&"".to_string())
-                )
-            })
-            .collect::<Vec<String>>()
-            .join("\n");
-
-        debug!(
-            "llm_providers from config with usage: {}...",
-            llm_providers_with_usage_yaml.replace("\n", "\\n")
-        );
+        let llm_routes: Vec<LlmRoute> = providers_with_usage.iter().map(LlmRoute::from).collect();
 
         let router_model = Arc::new(router_model_v1::RouterModelV1::new(
-            llm_providers_with_usage_yaml.clone(),
+            llm_routes,
             routing_model_name.clone(),
             router_model_v1::MAX_TOKEN_LEN,
         ));
diff --git a/crates/brightstaff/src/router/router_model_v1.rs b/crates/brightstaff/src/router/router_model_v1.rs
index cbea39ff..bc69b475 100644
--- a/crates/brightstaff/src/router/router_model_v1.rs
+++ b/crates/brightstaff/src/router/router_model_v1.rs
@@ -1,6 +1,7 @@
 use common::{
     api::open_ai::{ChatCompletionsRequest, ContentType, Message},
-    consts::{SYSTEM_ROLE, USER_ROLE},
+    configuration::LlmRoute,
+    consts::{SYSTEM_ROLE, TOOL_ROLE, USER_ROLE},
 };
 use serde::{Deserialize, Serialize};
 use tracing::{debug, warn};
@@ -15,36 +16,33 @@ You are provided with route description within <routes></routes> XML tags:
 {routes}
 </routes>
 
-Your task is to decide which route is best suit with user intent on the conversation in <conversation></conversation> XML tags.  Follow the instruction:
-1. If the latest intent from user is irrelevant, response with empty route {"route": ""}.
-2. If the user request is full fill and user thank or ending the conversation , response with empty route {"route": ""}.
-3. Understand user latest intent and find the best match route in <routes></routes> xml tags.
-
-Based on your analysis, provide your response in the following JSON formats if you decide to match any route:
-{"route": "route_name"}
-
-
 <conversation>
 {conversation}
 </conversation>
+
+Your task is to decide which route is best suit with user intent on the conversation in <conversation></conversation> XML tags.  Follow the instruction:
+1. If the latest intent from user is irrelevant or user intent is full filled, response with other route {"route": "other"}.
+2. You must analyze the route descriptions and find the best match route for user latest intent.
+3. You only response the name of the route that best matches the user's request, use the exact name in the <routes></routes>.
+
+Based on your analysis, provide your response in the following JSON formats if you decide to match any route:
+{"route": "route_name"}
 "#;
 
 pub type Result<T> = std::result::Result<T, RoutingModelError>;
 pub struct RouterModelV1 {
-    llm_providers_with_usage_yaml: String,
+    llm_route_json_str: String,
     routing_model: String,
     max_token_length: usize,
 }
 impl RouterModelV1 {
-    pub fn new(
-        llm_providers_with_usage_yaml: String,
-        routing_model: String,
-        max_token_length: usize,
-    ) -> Self {
+    pub fn new(llm_routes: Vec<LlmRoute>, routing_model: String, max_token_length: usize) -> Self {
+        let llm_route_json_str =
+            serde_json::to_string(&llm_routes).unwrap_or_else(|_| "[]".to_string());
         RouterModelV1 {
-            llm_providers_with_usage_yaml,
             routing_model,
             max_token_length,
+            llm_route_json_str,
         }
     }
 }
@@ -58,9 +56,12 @@ const TOKEN_LENGTH_DIVISOR: usize = 4; // Approximate token length divisor for U
 
 impl RouterModel for RouterModelV1 {
     fn generate_request(&self, messages: &[Message]) -> ChatCompletionsRequest {
+        // remove system prompt, tool calls, tool call response and messages without content
+        // if content is empty its likely a tool call
+        // when role == tool its tool call response
         let messages_vec = messages
             .iter()
-            .filter(|m| m.role != SYSTEM_ROLE)
+            .filter(|m| m.role != SYSTEM_ROLE && m.role != TOOL_ROLE && m.content.is_some())
             .collect::<Vec<&Message>>();
 
         // Following code is to ensure that the conversation does not exceed max token length
@@ -116,21 +117,23 @@ impl RouterModel for RouterModelV1 {
         }
 
         // Reverse the selected messages to maintain the conversation order
-
-        let selected_conversation_list_str = selected_messages_list_reversed
+        let selected_conversation_list = selected_messages_list_reversed
             .iter()
             .rev()
-            .map(|m| {
-                let content_json_str = serde_json::to_string(&m.content).unwrap_or_default();
-                format!("{}: {}", m.role, content_json_str)
+            .map(|message| {
+                Message::new(
+                    message.role.clone(),
+                    // we can unwrap here because we have already filtered out messages without content
+                    message.content.as_ref().unwrap().to_string(),
+                )
             })
-            .collect::<Vec<String>>();
+            .collect::<Vec<Message>>();
 
         let messages_content = ARCH_ROUTER_V1_SYSTEM_PROMPT
-            .replace("{routes}", &self.llm_providers_with_usage_yaml)
+            .replace("{routes}", &self.llm_route_json_str)
             .replace(
                 "{conversation}",
-                selected_conversation_list_str.join("\n").as_str(),
+                &serde_json::to_string(&selected_conversation_list).unwrap_or_default(),
             );
 
         ChatCompletionsRequest {
@@ -215,60 +218,53 @@ mod tests {
 You are a helpful assistant designed to find the best suited route.
 You are provided with route description within <routes></routes> XML tags:
 <routes>
-route1: description1
-route2: description2
+[{"name":"Image generation","description":"generating image"},{"name":"image conversion","description":"convert images to provided format"},{"name":"image search","description":"search image"},{"name":"Audio Processing","description":"Analyzing and interpreting audio input including speech, music, and environmental sounds"},{"name":"Speech Recognition","description":"Converting spoken language into written text"}]
 </routes>
 
+<conversation>
+[{"role":"user","content":"hi"},{"role":"assistant","content":"Hello! How can I assist you today?"},{"role":"user","content":"given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson"}]
+</conversation>
+
 Your task is to decide which route is best suit with user intent on the conversation in <conversation></conversation> XML tags.  Follow the instruction:
-1. If the latest intent from user is irrelevant, response with empty route {"route": ""}.
-2. If the user request is full fill and user thank or ending the conversation , response with empty route {"route": ""}.
-3. Understand user latest intent and find the best match route in <routes></routes> xml tags.
+1. If the latest intent from user is irrelevant or user intent is full filled, response with other route {"route": "other"}.
+2. You must analyze the route descriptions and find the best match route for user latest intent.
+3. You only response the name of the route that best matches the user's request, use the exact name in the <routes></routes>.
 
 Based on your analysis, provide your response in the following JSON formats if you decide to match any route:
 {"route": "route_name"}
-
-
-<conversation>
-user: "Hello, I want to book a flight."
-assistant: "Sure, where would you like to go?"
-user: "seattle"
-</conversation>
 "#;
-
-        let routes_yaml = "route1: description1\nroute2: description2";
+        let routes_str = r#"
+          [
+              {"name": "Image generation", "description": "generating image"},
+              {"name": "image conversion", "description": "convert images to provided format"},
+              {"name": "image search", "description": "search image"},
+              {"name": "Audio Processing", "description": "Analyzing and interpreting audio input including speech, music, and environmental sounds"},
+              {"name": "Speech Recognition", "description": "Converting spoken language into written text"}
+          ]
+        "#;
+        let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
         let routing_model = "test-model".to_string();
-        let router = RouterModelV1::new(routes_yaml.to_string(), routing_model.clone(), usize::MAX);
+        let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
 
-        let messages = vec![
-            Message {
-                role: "system".to_string(),
-                content: Some(ContentType::Text(
-                    "You are a helpful assistant.".to_string(),
-                )),
-                ..Default::default()
-            },
-            Message {
-                role: "user".to_string(),
-                content: Some(ContentType::Text(
-                    "Hello, I want to book a flight.".to_string(),
-                )),
-                ..Default::default()
-            },
-            Message {
-                role: "assistant".to_string(),
-                content: Some(ContentType::Text(
-                    "Sure, where would you like to go?".to_string(),
-                )),
-                ..Default::default()
-            },
-            Message {
-                role: "user".to_string(),
-                content: Some(ContentType::Text("seattle".to_string())),
-                ..Default::default()
-            },
-        ];
+        let conversation_str = r#"
+                    [
+                        {
+                            "role": "user",
+                            "content": "hi"
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "Hello! How can I assist you today?"
+                        },
+                        {
+                            "role": "user",
+                            "content": "given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson"
+                        }
+                    ]
+        "#;
+        let conversation: Vec<Message> = serde_json::from_str(conversation_str).unwrap();
 
-        let req = router.generate_request(&messages);
+        let req = router.generate_request(&conversation);
 
         let prompt = req.messages[0].content.as_ref().unwrap();
 
@@ -282,68 +278,55 @@ user: "seattle"
 You are a helpful assistant designed to find the best suited route.
 You are provided with route description within <routes></routes> XML tags:
 <routes>
-route1: description1
-route2: description2
+[{"name":"Image generation","description":"generating image"},{"name":"image conversion","description":"convert images to provided format"},{"name":"image search","description":"search image"},{"name":"Audio Processing","description":"Analyzing and interpreting audio input including speech, music, and environmental sounds"},{"name":"Speech Recognition","description":"Converting spoken language into written text"}]
 </routes>
 
+<conversation>
+[{"role":"user","content":"given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson"}]
+</conversation>
+
 Your task is to decide which route is best suit with user intent on the conversation in <conversation></conversation> XML tags.  Follow the instruction:
-1. If the latest intent from user is irrelevant, response with empty route {"route": ""}.
-2. If the user request is full fill and user thank or ending the conversation , response with empty route {"route": ""}.
-3. Understand user latest intent and find the best match route in <routes></routes> xml tags.
+1. If the latest intent from user is irrelevant or user intent is full filled, response with other route {"route": "other"}.
+2. You must analyze the route descriptions and find the best match route for user latest intent.
+3. You only response the name of the route that best matches the user's request, use the exact name in the <routes></routes>.
 
 Based on your analysis, provide your response in the following JSON formats if you decide to match any route:
 {"route": "route_name"}
-
-
-<conversation>
-user: "I want to book a flight."
-assistant: "Sure, where would you like to go?"
-user: "seattle"
-</conversation>
 "#;
 
-        let routes_yaml = "route1: description1\nroute2: description2";
+        let routes_str = r#"
+          [
+              {"name": "Image generation", "description": "generating image"},
+              {"name": "image conversion", "description": "convert images to provided format"},
+              {"name": "image search", "description": "search image"},
+              {"name": "Audio Processing", "description": "Analyzing and interpreting audio input including speech, music, and environmental sounds"},
+              {"name": "Speech Recognition", "description": "Converting spoken language into written text"}
+          ]
+        "#;
+        let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
         let routing_model = "test-model".to_string();
-        let router = RouterModelV1::new(routes_yaml.to_string(), routing_model.clone(), 223);
+        let router = RouterModelV1::new(llm_routes, routing_model.clone(), 235);
 
-        let messages = vec![
-            Message {
-                role: "system".to_string(),
-                content: Some(ContentType::Text(
-                    "You are a helpful assistant.".to_string(),
-                )),
-                ..Default::default()
-            },
-            Message {
-                role: "user".to_string(),
-                content: Some(ContentType::Text("Hi".to_string())),
-                ..Default::default()
-            },
-            Message {
-                role: "assistant".to_string(),
-                content: Some(ContentType::Text("Hello! How can I assist you".to_string())),
-                ..Default::default()
-            },
-            Message {
-                role: "user".to_string(),
-                content: Some(ContentType::Text("I want to book a flight.".to_string())),
-                ..Default::default()
-            },
-            Message {
-                role: "assistant".to_string(),
-                content: Some(ContentType::Text(
-                    "Sure, where would you like to go?".to_string(),
-                )),
-                ..Default::default()
-            },
-            Message {
-                role: "user".to_string(),
-                content: Some(ContentType::Text("seattle".to_string())),
-                ..Default::default()
-            },
-        ];
+        let conversation_str = r#"
+                    [
+                        {
+                            "role": "user",
+                            "content": "hi"
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "Hello! How can I assist you today?"
+                        },
+                        {
+                            "role": "user",
+                            "content": "given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson"
+                        }
+                    ]
+        "#;
 
-        let req = router.generate_request(&messages);
+        let conversation: Vec<Message> = serde_json::from_str(conversation_str).unwrap();
+
+        let req = router.generate_request(&conversation);
 
         let prompt = req.messages[0].content.as_ref().unwrap();
 
@@ -357,69 +340,55 @@ user: "seattle"
 You are a helpful assistant designed to find the best suited route.
 You are provided with route description within <routes></routes> XML tags:
 <routes>
-route1: description1
-route2: description2
+[{"name":"Image generation","description":"generating image"},{"name":"image conversion","description":"convert images to provided format"},{"name":"image search","description":"search image"},{"name":"Audio Processing","description":"Analyzing and interpreting audio input including speech, music, and environmental sounds"},{"name":"Speech Recognition","description":"Converting spoken language into written text"}]
 </routes>
 
+<conversation>
+[{"role":"user","content":"given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson and this is a very long message that exceeds the max token length of the routing model, so it should be truncated and only the last user message should be included in the conversation for routing."}]
+</conversation>
+
 Your task is to decide which route is best suit with user intent on the conversation in <conversation></conversation> XML tags.  Follow the instruction:
-1. If the latest intent from user is irrelevant, response with empty route {"route": ""}.
-2. If the user request is full fill and user thank or ending the conversation , response with empty route {"route": ""}.
-3. Understand user latest intent and find the best match route in <routes></routes> xml tags.
+1. If the latest intent from user is irrelevant or user intent is full filled, response with other route {"route": "other"}.
+2. You must analyze the route descriptions and find the best match route for user latest intent.
+3. You only response the name of the route that best matches the user's request, use the exact name in the <routes></routes>.
 
 Based on your analysis, provide your response in the following JSON formats if you decide to match any route:
 {"route": "route_name"}
-
-
-<conversation>
-user: "Seatte, WA. But I also need to know about the weather there, and if there are any good restaurants nearby, and what the best time to visit is, and also if there are any events happening in the city."
-</conversation>
 "#;
 
-        let routes_yaml = "route1: description1\nroute2: description2";
+        let routes_str = r#"
+          [
+              {"name": "Image generation", "description": "generating image"},
+              {"name": "image conversion", "description": "convert images to provided format"},
+              {"name": "image search", "description": "search image"},
+              {"name": "Audio Processing", "description": "Analyzing and interpreting audio input including speech, music, and environmental sounds"},
+              {"name": "Speech Recognition", "description": "Converting spoken language into written text"}
+          ]
+        "#;
+        let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
         let routing_model = "test-model".to_string();
-        let router = RouterModelV1::new(routes_yaml.to_string(), routing_model.clone(), 210);
+        let router = RouterModelV1::new(llm_routes, routing_model.clone(), 200);
 
-        let messages = vec![
-            Message {
-                role: "system".to_string(),
-                content: Some(ContentType::Text(
-                    "You are a helpful assistant.".to_string(),
-                )),
-                ..Default::default()
-            },
-            Message {
-                role: "user".to_string(),
-                content: Some(ContentType::Text("Hi".to_string())),
-                ..Default::default()
-            },
-            Message {
-                role: "assistant".to_string(),
-                content: Some(ContentType::Text("Hello! How can I assist you".to_string())),
-                ..Default::default()
-            },
-            Message {
-                role: "user".to_string(),
-                content: Some(ContentType::Text("I want to book a flight.".to_string())),
-                ..Default::default()
-            },
-            Message {
-                role: "assistant".to_string(),
-                content: Some(ContentType::Text(
-                    "Sure, where would you like to go?".to_string(),
-                )),
-                ..Default::default()
-            },
-            Message {
-                role: "user".to_string(),
-                content: Some(ContentType::Text("Seatte, WA. But I also need to know about the weather there, \
-                                                 and if there are any good restaurants nearby, and what the \
-                                                 best time to visit is, and also if there are any events \
-                                                 happening in the city.".to_string())),
-                ..Default::default()
-            },
-        ];
+        let conversation_str = r#"
+                    [
+                        {
+                            "role": "user",
+                            "content": "hi"
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "Hello! How can I assist you today?"
+                        },
+                        {
+                            "role": "user",
+                            "content": "given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson and this is a very long message that exceeds the max token length of the routing model, so it should be truncated and only the last user message should be included in the conversation for routing."
+                        }
+                    ]
+        "#;
 
-        let req = router.generate_request(&messages);
+        let conversation: Vec<Message> = serde_json::from_str(conversation_str).unwrap();
+
+        let req = router.generate_request(&conversation);
 
         let prompt = req.messages[0].content.as_ref().unwrap();
 
@@ -433,68 +402,229 @@ user: "Seatte, WA. But I also need to know about the weather there, and if there
 You are a helpful assistant designed to find the best suited route.
 You are provided with route description within <routes></routes> XML tags:
 <routes>
-route1: description1
-route2: description2
+[{"name":"Image generation","description":"generating image"},{"name":"image conversion","description":"convert images to provided format"},{"name":"image search","description":"search image"},{"name":"Audio Processing","description":"Analyzing and interpreting audio input including speech, music, and environmental sounds"},{"name":"Speech Recognition","description":"Converting spoken language into written text"}]
 </routes>
 
+<conversation>
+[{"role":"user","content":"given the image In style of Andy Warhol"},{"role":"assistant","content":"ok here is the image"},{"role":"user","content":"pls give me another image about Bart and Lisa"}]
+</conversation>
+
 Your task is to decide which route is best suit with user intent on the conversation in <conversation></conversation> XML tags.  Follow the instruction:
-1. If the latest intent from user is irrelevant, response with empty route {"route": ""}.
-2. If the user request is full fill and user thank or ending the conversation , response with empty route {"route": ""}.
-3. Understand user latest intent and find the best match route in <routes></routes> xml tags.
+1. If the latest intent from user is irrelevant or user intent is full filled, response with other route {"route": "other"}.
+2. You must analyze the route descriptions and find the best match route for user latest intent.
+3. You only response the name of the route that best matches the user's request, use the exact name in the <routes></routes>.
 
 Based on your analysis, provide your response in the following JSON formats if you decide to match any route:
 {"route": "route_name"}
-
-
-<conversation>
-user: "I want to book a flight."
-assistant: "Sure, where would you like to go?"
-user: "seattle"
-</conversation>
 "#;
 
-        let routes_yaml = "route1: description1\nroute2: description2";
+        let routes_str = r#"
+          [
+              {"name": "Image generation", "description": "generating image"},
+              {"name": "image conversion", "description": "convert images to provided format"},
+              {"name": "image search", "description": "search image"},
+              {"name": "Audio Processing", "description": "Analyzing and interpreting audio input including speech, music, and environmental sounds"},
+              {"name": "Speech Recognition", "description": "Converting spoken language into written text"}
+          ]
+        "#;
+        let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
         let routing_model = "test-model".to_string();
-        let router = RouterModelV1::new(routes_yaml.to_string(), routing_model.clone(), 220);
+        let router = RouterModelV1::new(llm_routes, routing_model.clone(), 230);
 
-        let messages = vec![
-            Message {
-                role: "system".to_string(),
-                content: Some(ContentType::Text(
-                    "You are a helpful assistant.".to_string(),
-                )),
-                ..Default::default()
-            },
-            Message {
-                role: "user".to_string(),
-                content: Some(ContentType::Text("Hi".to_string())),
-                ..Default::default()
-            },
-            Message {
-                role: "assistant".to_string(),
-                content: Some(ContentType::Text("Hello! How can I assist you".to_string())),
-                ..Default::default()
-            },
-            Message {
-                role: "user".to_string(),
-                content: Some(ContentType::Text("I want to book a flight.".to_string())),
-                ..Default::default()
-            },
-            Message {
-                role: "assistant".to_string(),
-                content: Some(ContentType::Text(
-                    "Sure, where would you like to go?".to_string(),
-                )),
-                ..Default::default()
-            },
-            Message {
-                role: "user".to_string(),
-                content: Some(ContentType::Text("seattle".to_string())),
-                ..Default::default()
-            },
-        ];
+        let conversation_str = r#"
+                    [
+                        {
+                            "role": "user",
+                            "content": "hi"
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "Hello! How can I assist you today?"
+                        },
+                        {
+                            "role": "user",
+                            "content": "given the image In style of Andy Warhol"
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "ok here is the image"
+                        },
+                        {
+                            "role": "user",
+                            "content": "pls give me another image about Bart and Lisa"
+                        }
+                    ]
+        "#;
 
-        let req = router.generate_request(&messages);
+        let conversation: Vec<Message> = serde_json::from_str(conversation_str).unwrap();
+
+        let req = router.generate_request(&conversation);
+
+        let prompt = req.messages[0].content.as_ref().unwrap();
+
+        assert_eq!(expected_prompt, prompt.to_string());
+    }
+
+    #[test]
+    fn test_non_text_input() {
+        let expected_prompt = r#"
+You are a helpful assistant designed to find the best suited route.
+You are provided with route description within <routes></routes> XML tags:
+<routes>
+[{"name":"Image generation","description":"generating image"},{"name":"image conversion","description":"convert images to provided format"},{"name":"image search","description":"search image"},{"name":"Audio Processing","description":"Analyzing and interpreting audio input including speech, music, and environmental sounds"},{"name":"Speech Recognition","description":"Converting spoken language into written text"}]
+</routes>
+
+<conversation>
+[{"role":"user","content":"hi"},{"role":"assistant","content":"Hello! How can I assist you today?"},{"role":"user","content":"given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson"}]
+</conversation>
+
+Your task is to decide which route is best suit with user intent on the conversation in <conversation></conversation> XML tags.  Follow the instruction:
+1. If the latest intent from user is irrelevant or user intent is full filled, response with other route {"route": "other"}.
+2. You must analyze the route descriptions and find the best match route for user latest intent.
+3. You only response the name of the route that best matches the user's request, use the exact name in the <routes></routes>.
+
+Based on your analysis, provide your response in the following JSON formats if you decide to match any route:
+{"route": "route_name"}
+"#;
+        let routes_str = r#"
+          [
+              {"name": "Image generation", "description": "generating image"},
+              {"name": "image conversion", "description": "convert images to provided format"},
+              {"name": "image search", "description": "search image"},
+              {"name": "Audio Processing", "description": "Analyzing and interpreting audio input including speech, music, and environmental sounds"},
+              {"name": "Speech Recognition", "description": "Converting spoken language into written text"}
+          ]
+        "#;
+        let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
+        let routing_model = "test-model".to_string();
+        let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
+
+        let conversation_str = r#"
+                    [
+                        {
+                            "role": "user",
+                            "content": [
+                              {
+                                "type": "text",
+                                "text": "hi"
+                              },
+                              {
+                                "type": "image_url",
+                                "image_url": {
+                                  "url": "https://example.com/image.png"
+                                }
+                              }
+                            ]
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "Hello! How can I assist you today?"
+                        },
+                        {
+                            "role": "user",
+                            "content": "given the image In style of Andy Warhol, portrait of Bart and Lisa Simpson"
+                        }
+                    ]
+        "#;
+        let conversation: Vec<Message> = serde_json::from_str(conversation_str).unwrap();
+
+        let req = router.generate_request(&conversation);
+
+        let prompt = req.messages[0].content.as_ref().unwrap();
+
+        assert_eq!(expected_prompt, prompt.to_string());
+    }
+
+    #[test]
+    fn test_skip_tool_call() {
+        let expected_prompt = r#"
+You are a helpful assistant designed to find the best suited route.
+You are provided with route description within <routes></routes> XML tags:
+<routes>
+[{"name":"Image generation","description":"generating image"},{"name":"image conversion","description":"convert images to provided format"},{"name":"image search","description":"search image"},{"name":"Audio Processing","description":"Analyzing and interpreting audio input including speech, music, and environmental sounds"},{"name":"Speech Recognition","description":"Converting spoken language into written text"}]
+</routes>
+
+<conversation>
+[{"role":"user","content":"What's the weather like in Tokyo?"},{"role":"assistant","content":"The current weather in Tokyo is 22°C and sunny."},{"role":"user","content":"What about in New York?"}]
+</conversation>
+
+Your task is to decide which route is best suit with user intent on the conversation in <conversation></conversation> XML tags.  Follow the instruction:
+1. If the latest intent from user is irrelevant or user intent is full filled, response with other route {"route": "other"}.
+2. You must analyze the route descriptions and find the best match route for user latest intent.
+3. You only response the name of the route that best matches the user's request, use the exact name in the <routes></routes>.
+
+Based on your analysis, provide your response in the following JSON formats if you decide to match any route:
+{"route": "route_name"}
+"#;
+        let routes_str = r#"
+          [
+              {"name": "Image generation", "description": "generating image"},
+              {"name": "image conversion", "description": "convert images to provided format"},
+              {"name": "image search", "description": "search image"},
+              {"name": "Audio Processing", "description": "Analyzing and interpreting audio input including speech, music, and environmental sounds"},
+              {"name": "Speech Recognition", "description": "Converting spoken language into written text"}
+          ]
+        "#;
+        let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
+        let routing_model = "test-model".to_string();
+        let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
+
+        let conversation_str = r#"
+                                                [
+                                                  {
+                                                    "role": "user",
+                                                    "content": "What's the weather like in Tokyo?"
+                                                  },
+                                                  {
+                                                    "role": "assistant",
+                                                    "content": null,
+                                                    "tool_calls": [
+                                                      {
+                                                        "id": "toolcall-abc123",
+                                                        "type": "function",
+                                                        "function": {
+                                                          "name": "get_weather",
+                                                          "arguments": { "location": "Tokyo" }
+                                                        }
+                                                      }
+                                                    ]
+                                                  },
+                                                  {
+                                                    "role": "tool",
+                                                    "tool_call_id": "toolcall-abc123",
+                                                    "content": "{ \"temperature\": \"22°C\", \"condition\": \"Sunny\" }"
+                                                  },
+                                                  {
+                                                    "role": "assistant",
+                                                    "content": "The current weather in Tokyo is 22°C and sunny."
+                                                  },
+                                                  {
+                                                    "role": "user",
+                                                    "content": "What about in New York?"
+                                                  }
+                                                ]
+        "#;
+
+        // expects conversation to look like this
+
+        // [
+        //   {
+        //     "role": "user",
+        //     "content": "What's the weather like in Tokyo?"
+        //   },
+        //   {
+        //     "role": "assistant",
+        //     "content": "The current weather in Tokyo is 22°C and sunny."
+        //   },
+        //   {
+        //     "role": "user",
+        //     "content": "What about in New York?"
+        //   }
+        // ]
+
+        let conversation: Vec<Message> = serde_json::from_str(conversation_str).unwrap();
+
+        let req = router.generate_request(&conversation);
 
         let prompt = req.messages[0].content.as_ref().unwrap();
 
@@ -503,11 +633,18 @@ user: "seattle"
 
     #[test]
     fn test_parse_response() {
-        let router = RouterModelV1::new(
-            "route1: description1\nroute2: description2".to_string(),
-            "test-model".to_string(),
-            2000,
-        );
+        let routes_str = r#"
+[
+    {"name": "Image generation", "description": "generating image"},
+    {"name": "image conversion", "description": "convert images to provided format"},
+    {"name": "image search", "description": "search image"},
+    {"name": "Audio Processing", "description": "Analyzing and interpreting audio input including speech, music, and environmental sounds"},
+    {"name": "Speech Recognition", "description": "Converting spoken language into written text"}
+]
+"#;
+        let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
+
+        let router = RouterModelV1::new(llm_routes, "test-model".to_string(), 2000);
 
         // Case 1: Valid JSON with non-empty route
         let input = r#"{"route": "route1"}"#;
diff --git a/crates/common/src/api/hallucination.rs b/crates/common/src/api/hallucination.rs
index e90ea165..41ccf3d7 100644
--- a/crates/common/src/api/hallucination.rs
+++ b/crates/common/src/api/hallucination.rs
@@ -6,7 +6,6 @@ use crate::{
 };
 use serde::{Deserialize, Serialize};
 
-
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct HallucinationClassificationRequest {
     pub prompt: String,
diff --git a/crates/common/src/api/open_ai.rs b/crates/common/src/api/open_ai.rs
index d5d4ce2a..080923c1 100644
--- a/crates/common/src/api/open_ai.rs
+++ b/crates/common/src/api/open_ai.rs
@@ -162,6 +162,8 @@ pub struct StreamOptions {
 pub enum MultiPartContentType {
     #[serde(rename = "text")]
     Text,
+    #[serde(rename = "image_url")]
+    ImageUrl,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
@@ -188,6 +190,9 @@ impl Display for ContentType {
                     .filter_map(|part| {
                         if part.content_type == MultiPartContentType::Text {
                             part.text.clone()
+                        } else if part.content_type == MultiPartContentType::ImageUrl {
+                            // skip image URLs or their data in text representation
+                            None
                         } else {
                             panic!("Unsupported content type: {:?}", part.content_type);
                         }
@@ -217,6 +222,19 @@ pub struct Message {
     pub tool_call_id: Option<String>,
 }
 
+impl Message {
+    pub fn new(role: String, content: String) -> Self {
+        let content = Some(ContentType::Text(content));
+        Message {
+            role,
+            content,
+            model: None,
+            tool_calls: None,
+            tool_call_id: None,
+        }
+    }
+}
+
 impl Default for Message {
     fn default() -> Self {
         Message {
diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs
index 655361e9..5438b03e 100644
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@@ -172,6 +172,25 @@ impl Display for LlmProviderType {
     }
 }
 
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct LlmRoute {
+    pub name: String,
+    pub description: String,
+}
+
+impl From<&LlmProvider> for LlmRoute {
+    fn from(provider: &LlmProvider) -> Self {
+        Self {
+            name: provider.name.to_string(),
+            description: provider
+                .usage
+                .as_ref()
+                .cloned()
+                .unwrap_or_else(|| "No description available".to_string()),
+        }
+    }
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 //TODO: use enum for model, but if there is a new model, we need to update the code
 pub struct LlmProvider {
diff --git a/crates/common/src/tokenizer.rs b/crates/common/src/tokenizer.rs
index 198c2af7..46e39887 100644
--- a/crates/common/src/tokenizer.rs
+++ b/crates/common/src/tokenizer.rs
@@ -14,13 +14,7 @@ pub fn token_count(model_name: &str, text: &str) -> Result<usize, String> {
             );
             "gpt-4"
         }
-        true => {
-            if model_name.starts_with("gpt-4.1") {
-                "gpt-4o"
-            } else {
-                model_name
-            }
-        }
+        true => model_name
     };
 
     // Consideration: is it more expensive to instantiate the BPE object every time, or to contend the singleton?
diff --git a/crates/prompt_gateway/src/http_context.rs b/crates/prompt_gateway/src/http_context.rs
index bb673208..cd251064 100644
--- a/crates/prompt_gateway/src/http_context.rs
+++ b/crates/prompt_gateway/src/http_context.rs
@@ -237,9 +237,7 @@ impl HttpContext for StreamContext {
             Duration::from_secs(5),
         );
 
-        if let Some(content) =
-            self.user_prompt.as_ref().unwrap().content.as_ref()
-        {
+        if let Some(content) = self.user_prompt.as_ref().unwrap().content.as_ref() {
             let call_context = StreamCallContext {
                 response_handler_type: ResponseHandlerType::ArchFC,
                 user_message: Some(content.to_string()),
@@ -262,7 +260,6 @@ impl HttpContext for StreamContext {
             );
         }
         Action::Pause
-
     }
 
     fn on_http_response_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
diff --git a/crates/prompt_gateway/tests/integration.rs b/crates/prompt_gateway/tests/integration.rs
index 563c9393..e749a007 100644
--- a/crates/prompt_gateway/tests/integration.rs
+++ b/crates/prompt_gateway/tests/integration.rs
@@ -1,5 +1,6 @@
 use common::api::open_ai::{
-    ChatCompletionsResponse, Choice, ContentType, FunctionCallDetail, Message, ToolCall, ToolType, Usage
+    ChatCompletionsResponse, Choice, ContentType, FunctionCallDetail, Message, ToolCall, ToolType,
+    Usage,
 };
 use common::configuration::Configuration;
 use http::StatusCode;
diff --git a/demos/use_cases/preference_based_routing/arch_config.yaml b/demos/use_cases/preference_based_routing/arch_config.yaml
index c01f3ffc..f0d1cf95 100644
--- a/demos/use_cases/preference_based_routing/arch_config.yaml
+++ b/demos/use_cases/preference_based_routing/arch_config.yaml
@@ -1,7 +1,7 @@
 version: "0.1-beta"
 
 routing:
-  model: archgw-v1-router-model
+  model: arch-router
 
 listeners:
   egress_traffic:
@@ -12,10 +12,15 @@ listeners:
 
 llm_providers:
 
-  - name: archgw-v1-router-model
+  - name: arch-router
+    access_key: $OPENAI_API_KEY
+    provider_interface: arch
+    model: Arch-Router
+
+  - name: gpt-4o-mini
     provider_interface: openai
-    model: cotran2/qwen-4-epoch-2600
-    base_url: http://34.46.85.85:8000/v1
+    access_key: $OPENAI_API_KEY
+    model: gpt-4o-mini
 
   - name: gpt-4o
     provider_interface: openai
diff --git a/demos/use_cases/preference_based_routing/docker-compose.yaml b/demos/use_cases/preference_based_routing/docker-compose.yaml
index 54158f73..c0f30d5e 100644
--- a/demos/use_cases/preference_based_routing/docker-compose.yaml
+++ b/demos/use_cases/preference_based_routing/docker-compose.yaml
@@ -6,7 +6,7 @@ services:
     ports:
       - "8080:8080"
     environment:
-      - DEFAULT_MODEL=gpt-4o-mini
+      - DEFAULT_MODELS=gpt-4o-mini
       - ENABLE_OPENAI_API=true
       - OPENAI_API_BASE_URL=http://host.docker.internal:12000/v1
 
diff --git a/demos/use_cases/preference_based_routing/test_router_endpoint.rest b/demos/use_cases/preference_based_routing/test_router_endpoint.rest
index d4b947c8..890206cb 100644
--- a/demos/use_cases/preference_based_routing/test_router_endpoint.rest
+++ b/demos/use_cases/preference_based_routing/test_router_endpoint.rest
@@ -1,6 +1,6 @@
 @arch_llm_router_endpoint = http://35.192.87.187:8000
 
-POST http://34.46.85.85:8000/v1/chat/completions HTTP/1.1
+POST https://archfc.katanemo.dev/v1/chat/completions HTTP/1.1
 Content-Type: application/json
 
 {
@@ -21,4 +21,5 @@ Content-Type: application/json
 {"model":"cotran2/llama-1b-4-26","messages":[{"role":"user","content":"\nYou are an advanced Routing Assistant designed to select the optimal route based on user requests. \nYour task is to analyze conversations and match them to the most appropriate predefined route.\nReview the available routes config:\n\n# ROUTES CONFIG START\n- name: gpt-4o\n  description: simple requests, basic fact retrieval, easy to answer\n- name: o4-mini()\n  description: complex reasoning problem, require multi step answer\n# ROUTES CONFIG END\n\nExamine the following conversation between a user and an assistant:\n\n# CONVERSATION START\n[{\"role\":\"user\",\"content\":\"What is the capital of France?\"}]\n# CONVERSATION END\n\nYour goal is to identify the most appropriate route that matches the user's LATEST intent. Follow these steps:\n\n1. Carefully read and analyze the provided conversation, focusing on the user's latest request and the conversation scenario.\n2. Check if the user's request and scenario matches any of the routes in the routing configuration (focus on the description).\n3. Find the route that best matches.\n4. Use context clues from the entire conversation to determine the best fit.\n5. Return the best match possible. You only response the name of the route that best matches the user's request, use the exact name in the routes config.\n6. If no route relatively close to matches the user's latest intent or user last message is thank you or greeting, return an empty route ''. \n\n# OUTPUT FORMAT\nYour final output must follow this JSON format:\n{\n  \"route\": \"route_name\" # The matched route name, or empty string '' if no match\n}\n\nBased on your analysis, provide only the JSON object as your final output with no additional text, explanations, or whitespace.\n"}],"stream":false}
 
 ### get model list
-GET http://34.46.85.85:8000/v1/models HTTP/1.1
+# GET http://34.46.85.85:8000/v1/models HTTP/1.1
+GET https://archfc.katanemo.dev/arch-router/v1/models HTTP/1.1