From dcf9bf7c4b7c76a11665ca30b707b0e9d35893f6 Mon Sep 17 00:00:00 2001
From: Sabiha Khan <sabihak89@gmail.com>
Date: Tue, 24 Mar 2026 10:41:18 +0530
Subject: [PATCH] fix: pass system_instruction to one shot llm inferences to
 avoid system instruction from llm _settings

---
 api/services/workflow/qa/analysis.py     | 13 +++++--------
 api/services/workflow/qa/node_summary.py |  3 +--
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/api/services/workflow/qa/analysis.py b/api/services/workflow/qa/analysis.py
index 34af4dd..55f5ab3 100644
--- a/api/services/workflow/qa/analysis.py
+++ b/api/services/workflow/qa/analysis.py
@@ -28,11 +28,11 @@ from api.utils.template_renderer import render_template
 from pipecat.processors.aggregators.llm_context import LLMContext
 
 
-async def _run_llm_inference(llm, messages: list[dict]) -> str | None:
+async def _run_llm_inference(llm, messages: list[dict], system_prompt: str) -> str | None:
     """Run a one-shot LLM inference using the pipecat service."""
     context = LLMContext()
     context.set_messages(messages)
-    return await llm.run_inference(context)
+    return await llm.run_inference(context, system_instruction=system_prompt)
 
 
 async def _generate_conversation_summary(
@@ -47,12 +47,11 @@ async def _generate_conversation_summary(
     Traced to Langfuse as conversation-summary-before-{node_name}.
     """
     messages = [
-        {"role": "system", "content": CONVERSATION_SUMMARY_SYSTEM_PROMPT},
         {"role": "user", "content": f"## Conversation\n{transcript}"},
     ]
 
     try:
-        summary = await _run_llm_inference(llm, messages) or ""
+        summary = await _run_llm_inference(llm, messages, CONVERSATION_SUMMARY_SYSTEM_PROMPT) or ""
 
         span_name = f"conversation-summary-before-{node_name}"
         add_qa_span_to_trace(parent_ctx, model, messages, summary, span_name)
@@ -163,13 +162,12 @@ async def run_per_node_qa_analysis(
         system_content = render_template(system_prompt, template_context)
 
         messages = [
-            {"role": "system", "content": system_content},
             {"role": "user", "content": f"## Transcript\n{node_transcript}"},
         ]
 
         # Call QA LLM
         try:
-            raw_response = await _run_llm_inference(llm, messages)
+            raw_response = await _run_llm_inference(llm, messages, system_content)
         except Exception as e:
             logger.error(
                 f"QA LLM call failed for node '{node_name}' on run {workflow_run_id}: {e}"
@@ -266,7 +264,6 @@ async def _run_whole_call_qa_analysis(
     }
     system_content = render_template(system_prompt, template_context)
     messages = [
-        {"role": "system", "content": system_content},
         {"role": "user", "content": f"## Transcript\n{transcript}"},
     ]
 
@@ -274,7 +271,7 @@ async def _run_whole_call_qa_analysis(
     llm = create_llm_service_from_provider(provider, model, api_key, **service_kwargs)
 
     try:
-        raw_response = await _run_llm_inference(llm, messages)
+        raw_response = await _run_llm_inference(llm, messages, system_content)
     except Exception as e:
         logger.error(f"QA LLM call failed for run {workflow_run_id}: {e}")
         return {"error": str(e), "node_results": {}}
diff --git a/api/services/workflow/qa/node_summary.py b/api/services/workflow/qa/node_summary.py
index 04a41ce..b02e59c 100644
--- a/api/services/workflow/qa/node_summary.py
+++ b/api/services/workflow/qa/node_summary.py
@@ -148,14 +148,13 @@ async def ensure_node_summaries(
             node_info_parts.append("Available tools:\n" + "\n".join(tool_descriptions))
         node_info = "\n".join(node_info_parts)
         messages = [
-            {"role": "system", "content": NODE_SUMMARY_SYSTEM_PROMPT},
             {"role": "user", "content": node_info},
         ]
 
         try:
             context = LLMContext()
             context.set_messages(messages)
-            summary_text = await llm.run_inference(context) or ""
+            summary_text = await llm.run_inference(context, system_instruction=NODE_SUMMARY_SYSTEM_PROMPT) or ""
         except Exception as e:
             logger.warning(f"Failed to generate summary for node {node_id}: {e}")
             updated_summaries[node_id] = {"summary": ""}