From 816f513663e74925268aa625a58c6c9f2277b2e0 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Fri, 14 Mar 2025 17:34:18 -0700 Subject: [PATCH] refactor --- crates/llm_gateway/src/stream_context.rs | 15 --------------- crates/prompt_gateway/src/stream_context.rs | 2 +- demos/use_cases/orchestrating_agents/main.py | 2 +- model_server/src/commons/globals.py | 2 -- model_server/src/core/function_calling.py | 1 - 5 files changed, 2 insertions(+), 20 deletions(-) diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 6a104784..5f50e0f7 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -214,7 +214,6 @@ impl HttpContext for StreamContext { })); } else { self.select_llm_provider(); - debug!("setting routing header to: {}", self.llm_provider().name); self.add_http_request_header( ARCH_ROUTING_HEADER, &self.llm_provider().provider_interface.to_string(), @@ -243,12 +242,6 @@ impl HttpContext for StreamContext { fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action { // Let the client send the gateway all the data before sending to the LLM_provider. // TODO: consider a streaming API. - trace!( - "on_http_request_body [S={}] bytes={} end_stream={}", - self.context_id, - body_size, - end_of_stream - ); if self.request_body_sent_time.is_none() { self.request_body_sent_time = Some(current_time_ns()); @@ -316,14 +309,6 @@ impl HttpContext for StreamContext { deserialized_body.model = model_name.to_string(); - // if use_agent_orchestrator || self.llm_provider.as_ref().unwrap().model.is_none() { - // deserialized_body.model = "None".to_string() - // } else { - // // override model name from the llm provider - // deserialized_body - // .model - // .clone_from(&self.llm_provider.as_ref().unwrap().model.as_ref().unwrap()); - // } let chat_completion_request_str = serde_json::to_string(&deserialized_body).unwrap(); trace!( diff --git a/crates/prompt_gateway/src/stream_context.rs b/crates/prompt_gateway/src/stream_context.rs index 5ad375af..fddb3b20 100644 --- a/crates/prompt_gateway/src/stream_context.rs +++ b/crates/prompt_gateway/src/stream_context.rs @@ -321,7 +321,7 @@ impl StreamContext { metadata.insert("use_agent_orchestrator".to_string(), "true".to_string()); metadata.insert( - "Agent-Name".to_string(), + "agent-name".to_string(), callout_context .prompt_target_name .as_ref() diff --git a/demos/use_cases/orchestrating_agents/main.py b/demos/use_cases/orchestrating_agents/main.py index b51e4ad3..13f04647 100644 --- a/demos/use_cases/orchestrating_agents/main.py +++ b/demos/use_cases/orchestrating_agents/main.py @@ -119,7 +119,7 @@ async def completion_api(req: ChatCompletionsRequest): logger.info(f"request: {req}") if req.metadata is None: req.metadata = {} - agent_name = req.metadata.get("Agent-Name", "unknown agent") + agent_name = req.metadata.get("agent-name", "unknown agent") logger.info(f"agent: {agent_name}") agent_role = agent_map.get(agent_name)["role"] diff --git a/model_server/src/commons/globals.py b/model_server/src/commons/globals.py index 07e2ffec..49dce5e7 100644 --- a/model_server/src/commons/globals.py +++ b/model_server/src/commons/globals.py @@ -21,13 +21,11 @@ ARCH_ENDPOINT = os.getenv("ARCH_ENDPOINT", "https://archfc.katanemo.dev/v1") ARCH_API_KEY = "EMPTY" ARCH_CLIENT = OpenAI(base_url=ARCH_ENDPOINT, api_key=ARCH_API_KEY) ARCH_AGENT_CLIENT = ARCH_CLIENT -# ARCH_AGENT_CLIENT = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "EMPTY")) # Define model names ARCH_INTENT_MODEL_ALIAS = "Arch-Intent" ARCH_FUNCTION_MODEL_ALIAS = "Arch-Function" ARCH_AGENT_MODEL_ALIAS = ARCH_FUNCTION_MODEL_ALIAS -# ARCH_AGENT_MODEL_ALIAS = "gpt-4o-mini" ARCH_GUARD_MODEL_ALIAS = "katanemo/Arch-Guard" # Define model handlers diff --git a/model_server/src/core/function_calling.py b/model_server/src/core/function_calling.py index 0e33cd90..457aced8 100644 --- a/model_server/src/core/function_calling.py +++ b/model_server/src/core/function_calling.py @@ -630,7 +630,6 @@ class ArchFunctionHandler(ArchBaseHandler): return chat_completion_response -# override ArchFunctionHandler class ArchAgentHandler(ArchFunctionHandler): def __init__(self, client: OpenAI, model_name: str, config: ArchAgentConfig): super().__init__(client, model_name, config)