diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml
index feaaa90f..e5d8b88a 100644
--- a/arch/arch_config_schema.yaml
+++ b/arch/arch_config_schema.yaml
@@ -170,8 +170,6 @@ properties:
               type: object
               additionalProperties:
                 type: string
-            pass_context:
-              type: boolean
           additionalProperties: false
           required:
             - name
diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs
index 616536a4..a956e71c 100644
--- a/crates/common/src/configuration.rs
+++ b/crates/common/src/configuration.rs
@@ -219,7 +219,6 @@ pub struct EndpointDetails {
     #[serde(rename = "http_method")]
     pub method: Option<HttpMethod>,
     pub http_headers: Option<HashMap<String, String>>,
-    pub pass_context: Option<bool>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
diff --git a/crates/prompt_gateway/src/stream_context.rs b/crates/prompt_gateway/src/stream_context.rs
index e0a319ef..5ad375af 100644
--- a/crates/prompt_gateway/src/stream_context.rs
+++ b/crates/prompt_gateway/src/stream_context.rs
@@ -379,7 +379,7 @@ impl StreamContext {
         let http_method = endpoint_details.method.clone().unwrap_or_default();
         let prompt_target_params = prompt_target.parameters.clone().unwrap_or_default();
 
-        let (path, body) = match compute_request_path_body(
+        let (path, api_call_body) = match compute_request_path_body(
             &endpoint_path,
             tool_params,
             &prompt_target_params,
@@ -396,6 +396,8 @@ impl StreamContext {
             }
         };
 
+        debug!("api call body {:?}", api_call_body);
+
         let timeout_str = API_REQUEST_TIMEOUT_MS.to_string();
 
         let http_method_str = http_method.to_string();
@@ -411,25 +413,6 @@ impl StreamContext {
         .into_iter()
         .collect();
 
-        let api_call_body = match endpoint_details.pass_context.unwrap_or_default() {
-            true => {
-                let messages = self.construct_llm_messages(&callout_context);
-
-                let chat_completion_request = ChatCompletionsRequest {
-                    model: callout_context.request_body.model.clone(),
-                    messages,
-                    tools: None,
-                    stream: callout_context.request_body.stream,
-                    stream_options: callout_context.request_body.stream_options.clone(),
-                    metadata: None,
-                };
-
-                let body_str = serde_json::to_string(&chat_completion_request).unwrap();
-                Some(body_str)
-            }
-            false => body,
-        };
-
         if self.request_id.is_some() {
             headers.insert(REQUEST_ID_HEADER, self.request_id.as_ref().unwrap());
         }
@@ -444,7 +427,6 @@ impl StreamContext {
             headers.insert(key.as_str(), value.as_str());
         }
 
-        debug!("api call body string: {}", api_call_body.as_ref().unwrap());
 
         let call_args = CallArgs::new(
             ARCH_INTERNAL_CLUSTER_NAME,
@@ -519,7 +501,7 @@ impl StreamContext {
 
         if !prompt_target
             .auto_llm_dispatch_on_response
-            .unwrap_or_default()
+            .unwrap_or(true)
         {
             let tool_call_response = self.tool_call_response.as_ref().unwrap().clone();
 
@@ -675,7 +657,7 @@ impl StreamContext {
         // check if the default target should be dispatched to the LLM provider
         if !prompt_target
             .auto_llm_dispatch_on_response
-            .unwrap_or_default()
+            .unwrap_or(true)
         {
             let default_target_response_str = if self.streaming_response {
                 let chat_completion_response =
diff --git a/demos/use_cases/orchestrating_agents/hurl_tests/simple.hurl b/demos/use_cases/orchestrating_agents/hurl_tests/simple.hurl
new file mode 100644
index 00000000..4db2c67c
--- /dev/null
+++ b/demos/use_cases/orchestrating_agents/hurl_tests/simple.hurl
@@ -0,0 +1,19 @@
+POST http://localhost:10000/v1/chat/completions
+Content-Type: application/json
+
+{
+  "messages": [
+    {
+      "role": "user",
+      "content": "I want to sell red shoes"
+    }
+  ]
+}
+HTTP 200
+[Asserts]
+header "content-type" == "application/json"
+jsonpath "$.model" matches /^gpt-4o-mini/
+jsonpath "$.metadata.x-arch-state" != null
+jsonpath "$.usage" != null
+jsonpath "$.choices[0].message.content" != null
+jsonpath "$.choices[0].message.role" == "assistant"
diff --git a/demos/use_cases/orchestrating_agents/hurl_tests/simple_stream.hurl b/demos/use_cases/orchestrating_agents/hurl_tests/simple_stream.hurl
new file mode 100644
index 00000000..f060fed0
--- /dev/null
+++ b/demos/use_cases/orchestrating_agents/hurl_tests/simple_stream.hurl
@@ -0,0 +1,16 @@
+POST http://localhost:10000/v1/chat/completions
+Content-Type: application/json
+
+{
+  "messages": [
+    {
+      "role": "user",
+      "content": "I want to sell red shoes"
+    }
+  ],
+  "stream": true
+}
+HTTP 200
+[Asserts]
+header "content-type" matches /text\/event-stream/
+body matches /^data: .*?sales_agent.*?\n/
diff --git a/demos/use_cases/orchestrating_agents/main.py b/demos/use_cases/orchestrating_agents/main.py
index db178db0..b51e4ad3 100644
--- a/demos/use_cases/orchestrating_agents/main.py
+++ b/demos/use_cases/orchestrating_agents/main.py
@@ -54,6 +54,7 @@ class ChatCompletionsRequest(BaseModel):
     messages: List[Message]
     model: str
     metadata: Dict[str, Any] = None
+    stream: bool = False
 
 
 class Choice(BaseModel):
@@ -115,48 +116,35 @@ agent_map = {
 
 @app.post("/v1/chat/completions")
 async def completion_api(req: ChatCompletionsRequest):
+    logger.info(f"request: {req}")
     if req.metadata is None:
         req.metadata = {}
     agent_name = req.metadata.get("Agent-Name", "unknown agent")
     logger.info(f"agent: {agent_name}")
 
-    def stream():
-        agent_role = agent_map.get(agent_name)["role"]
-        agent_instructions = agent_map.get(agent_name)["instructions"]
-        system_prompt = "You are a " + agent_role + ". " + agent_instructions
-        messages = [{"role": "system", "content": system_prompt}]
-        for message in req.messages:
-            messages.append({"role": message.role, "content": message.content})
-        completion = client.chat.completions.create(
-            model="--",
-            messages=messages,
-            stream=True,
-        )
-        for line in completion:
-            if line.choices and len(line.choices) > 0 and line.choices[0].delta:
-                chunk_response_str = json.dumps(line.model_dump())
-                yield "data: " + chunk_response_str + "\n\n"
-        yield "data: [DONE]" + "\n\n"
+    agent_role = agent_map.get(agent_name)["role"]
+    agent_instructions = agent_map.get(agent_name)["instructions"]
+    system_prompt = "You are a " + agent_role + ". " + agent_instructions
+    messages = [{"role": "system", "content": system_prompt}]
+    for message in req.messages:
+        messages.append({"role": message.role, "content": message.content})
+    logger.info("messages: " + str(messages))
+    completion = client.chat.completions.create(
+        model="--",
+        messages=messages,
+        stream=req.stream,
+    )
 
-        # content = agent_map.get(agent_name)
+    if req.stream:
 
-        # for c in content:
-        #     resp = ChatCompletionStreamResponse(
-        #         model="--",
-        #         choices=[
-        #             ChunkChoice(
-        #                 delta=Message(
-        #                     role="assistant",
-        #                     content=c,
-        #                 )
-        #             )
-        #         ],
-        #     )
-        #     # random sleep between 10m and 50ms
-        #     time.sleep(random.randint(10, 50) / 1000)
+        def stream():
+            for line in completion:
+                if line.choices and len(line.choices) > 0 and line.choices[0].delta:
+                    chunk_response_str = json.dumps(line.model_dump())
+                    yield "data: " + chunk_response_str + "\n\n"
+            yield "data: [DONE]" + "\n\n"
 
-        #     yield "data: " + json.dumps(resp.model_dump()) + "\n\n"
+        return StreamingResponse(stream(), media_type="text/event-stream")
 
-        # yield "data: [DONE]" + "\n\n"
-
-    return StreamingResponse(stream(), media_type="text/event-stream")
+    else:
+        return completion
diff --git a/model_server/src/core/function_calling.py b/model_server/src/core/function_calling.py
index 71108dcd..0e33cd90 100644
--- a/model_server/src/core/function_calling.py
+++ b/model_server/src/core/function_calling.py
@@ -547,7 +547,7 @@ class ArchFunctionHandler(ArchBaseHandler):
             messages=messages,
             model=self.model_name,
             stream=True,
-            extra_body={"temperature": 0.01, "logprobs": True},
+            extra_body=self.generation_params,
         )
 
         use_agent_orchestrator = req.metadata.get("use_agent_orchestrator", False)