use passed in model name in chat completion request (#445)

2026-05-15 11:02:39 +02:00 · 2025-03-21 15:56:17 -07:00 · 2025-03-21 15:56:17 -07:00 · eb48f3d5bb
commit eb48f3d5bb
parent bd8004d1ae
20 changed files with 364 additions and 89 deletions
--- a/crates/prompt_gateway/src/http_context.rs
+++ b/crates/prompt_gateway/src/http_context.rs
@ -45,7 +45,8 @@ impl HttpContext for StreamContext {
                        warn!("Need single endpoint when use_agent_orchestrator is set");
                        self.send_server_error(
                            ServerError::LogicError(
-                                "Need single endpoint when use_agent_orchestrator is set".to_string(),
+                                "Need single endpoint when use_agent_orchestrator is set"
+                                    .to_string(),
                            ),
                            None,
                        );
@ -190,7 +191,7 @@ impl HttpContext for StreamContext {
            messages: deserialized_body.messages.clone(),
            metadata,
            stream: deserialized_body.stream,
-            model: "--".to_string(),
+            model: deserialized_body.model.clone(),
            stream_options: deserialized_body.stream_options.clone(),
            tools: Some(tool_calls),
        };
--- a/crates/prompt_gateway/src/stream_context.rs
+++ b/crates/prompt_gateway/src/stream_context.rs
@ -427,7 +427,6 @@ impl StreamContext {
            headers.insert(key.as_str(), value.as_str());
        }

-
        let call_args = CallArgs::new(
            ARCH_INTERNAL_CLUSTER_NAME,
            &path,
@ -499,10 +498,7 @@ impl StreamContext {
            }
        };

-        if !prompt_target
-            .auto_llm_dispatch_on_response
-            .unwrap_or(true)
-        {
+        if !prompt_target.auto_llm_dispatch_on_response.unwrap_or(true) {
            let tool_call_response = self.tool_call_response.as_ref().unwrap().clone();

            let direct_response_str = if self.streaming_response {
@ -655,10 +651,7 @@ impl StreamContext {
            .clone();

        // check if the default target should be dispatched to the LLM provider
-        if !prompt_target
-            .auto_llm_dispatch_on_response
-            .unwrap_or(true)
-        {
+        if !prompt_target.auto_llm_dispatch_on_response.unwrap_or(true) {
            let default_target_response_str = if self.streaming_response {
                let chat_completion_response =
                    match serde_json::from_slice::<ChatCompletionsResponse>(&body) {