diff --git a/crates/prompt_gateway/src/stream_context.rs b/crates/prompt_gateway/src/stream_context.rs
index e02f96c5..6851b3c0 100644
--- a/crates/prompt_gateway/src/stream_context.rs
+++ b/crates/prompt_gateway/src/stream_context.rs
@@ -124,7 +124,8 @@ impl StreamContext {
         let arch_fc_response: ChatCompletionsResponse = match serde_json::from_str(&body_str) {
             Ok(arch_fc_response) => arch_fc_response,
             Err(e) => {
-                warn!("error deserializing archfc response: {}", e);
+                warn!("error deserializing archfc response: {}, body: {}", e, body_str
+              );
                 return self.send_server_error(ServerError::Deserialization(e), None);
             }
         };
diff --git a/model_server/pyproject.toml b/model_server/pyproject.toml
index d4a35682..9fa447f0 100644
--- a/model_server/pyproject.toml
+++ b/model_server/pyproject.toml
@@ -37,7 +37,7 @@ opentelemetry-instrumentation-fastapi = "^0.49b0"
 overrides = "^7.7.0"
 
 [tool.poetry.scripts]
-archgw_modelserver = "src.cli:start_server"
+archgw_modelserver = "src.cli:run_server"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
diff --git a/model_server/src/core/model_utils.py b/model_server/src/core/model_utils.py
index c411d835..3f6b36b0 100644
--- a/model_server/src/core/model_utils.py
+++ b/model_server/src/core/model_utils.py
@@ -30,6 +30,7 @@ class ChatCompletionResponse(BaseModel):
     created: Optional[str] = ""
     choices: List[Choice]
     model: str
+    metadata: Optional[Dict[str, str]] = {}
 
 
 class GuardRequest(BaseModel):
diff --git a/model_server/src/main.py b/model_server/src/main.py
index ef15ff78..c1675412 100644
--- a/model_server/src/main.py
+++ b/model_server/src/main.py
@@ -67,11 +67,12 @@ async def function_calling(req: ChatMessage, res: Response):
                     "Arch-Function"
                 ].chat_completion(req)
                 function_latency = time.perf_counter() - function_start_time
-                return {
-                    "response": function_calling_response,
-                    "intent_latency": round(intent_latency * 1000, 3),
-                    "function_latency": round(function_latency * 1000, 3),
+                function_calling_response.metadata = {
+                    "intent_latency": str(round(intent_latency * 1000, 3)),
+                    "function_latency": str(round(function_latency * 1000, 3)),
                 }
+
+                return function_calling_response
             except Exception as e:
                 # [TODO] Review: update how to collect debugging outputs
                 # logger.error(f"Error in chat_completion from `Arch-Function`: {e}")