refactor

2026-06-17 15:25:17 +02:00 · 2025-03-14 17:34:18 -07:00 · 2025-03-14 17:34:18 -07:00 · 816f513663
commit 816f513663
parent f4462d1ed5
5 changed files with 2 additions and 20 deletions
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@ -214,7 +214,6 @@ impl HttpContext for StreamContext {
            }));
        } else {
            self.select_llm_provider();
-            debug!("setting routing header to: {}", self.llm_provider().name);
            self.add_http_request_header(
                ARCH_ROUTING_HEADER,
                &self.llm_provider().provider_interface.to_string(),
@ -243,12 +242,6 @@ impl HttpContext for StreamContext {
    fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
        // Let the client send the gateway all the data before sending to the LLM_provider.
        // TODO: consider a streaming API.
-        trace!(
-            "on_http_request_body [S={}] bytes={} end_stream={}",
-            self.context_id,
-            body_size,
-            end_of_stream
-        );

        if self.request_body_sent_time.is_none() {
            self.request_body_sent_time = Some(current_time_ns());
@ -316,14 +309,6 @@ impl HttpContext for StreamContext {

        deserialized_body.model = model_name.to_string();

-        // if use_agent_orchestrator || self.llm_provider.as_ref().unwrap().model.is_none() {
-        //     deserialized_body.model = "None".to_string()
-        // } else {
-        //     // override model name from the llm provider
-        //     deserialized_body
-        //         .model
-        //         .clone_from(&self.llm_provider.as_ref().unwrap().model.as_ref().unwrap());
-        // }
        let chat_completion_request_str = serde_json::to_string(&deserialized_body).unwrap();

        trace!(
--- a/crates/prompt_gateway/src/stream_context.rs
+++ b/crates/prompt_gateway/src/stream_context.rs
@ -321,7 +321,7 @@ impl StreamContext {
                metadata.insert("use_agent_orchestrator".to_string(), "true".to_string());

                metadata.insert(
-                    "Agent-Name".to_string(),
+                    "agent-name".to_string(),
                    callout_context
                        .prompt_target_name
                        .as_ref()
--- a/demos/use_cases/orchestrating_agents/main.py
+++ b/demos/use_cases/orchestrating_agents/main.py
@ -119,7 +119,7 @@ async def completion_api(req: ChatCompletionsRequest):
    logger.info(f"request: {req}")
    if req.metadata is None:
        req.metadata = {}
-    agent_name = req.metadata.get("Agent-Name", "unknown agent")
+    agent_name = req.metadata.get("agent-name", "unknown agent")
    logger.info(f"agent: {agent_name}")

    agent_role = agent_map.get(agent_name)["role"]
--- a/model_server/src/commons/globals.py
+++ b/model_server/src/commons/globals.py
@ -21,13 +21,11 @@ ARCH_ENDPOINT = os.getenv("ARCH_ENDPOINT", "https://archfc.katanemo.dev/v1")
 ARCH_API_KEY = "EMPTY"
 ARCH_CLIENT = OpenAI(base_url=ARCH_ENDPOINT, api_key=ARCH_API_KEY)
 ARCH_AGENT_CLIENT = ARCH_CLIENT
-# ARCH_AGENT_CLIENT = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "EMPTY"))

 # Define model names
 ARCH_INTENT_MODEL_ALIAS = "Arch-Intent"
 ARCH_FUNCTION_MODEL_ALIAS = "Arch-Function"
 ARCH_AGENT_MODEL_ALIAS = ARCH_FUNCTION_MODEL_ALIAS
-# ARCH_AGENT_MODEL_ALIAS = "gpt-4o-mini"
 ARCH_GUARD_MODEL_ALIAS = "katanemo/Arch-Guard"

 # Define model handlers
--- a/model_server/src/core/function_calling.py
+++ b/model_server/src/core/function_calling.py
@ -630,7 +630,6 @@ class ArchFunctionHandler(ArchBaseHandler):
        return chat_completion_response


-# override ArchFunctionHandler
 class ArchAgentHandler(ArchFunctionHandler):
    def __init__(self, client: OpenAI, model_name: str, config: ArchAgentConfig):
        super().__init__(client, model_name, config)