From dcf9bf7c4b7c76a11665ca30b707b0e9d35893f6 Mon Sep 17 00:00:00 2001 From: Sabiha Khan Date: Tue, 24 Mar 2026 10:41:18 +0530 Subject: [PATCH] fix: pass system_instruction to one shot llm inferences to avoid system instruction from llm _settings --- api/services/workflow/qa/analysis.py | 13 +++++-------- api/services/workflow/qa/node_summary.py | 3 +-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/api/services/workflow/qa/analysis.py b/api/services/workflow/qa/analysis.py index 34af4dd..55f5ab3 100644 --- a/api/services/workflow/qa/analysis.py +++ b/api/services/workflow/qa/analysis.py @@ -28,11 +28,11 @@ from api.utils.template_renderer import render_template from pipecat.processors.aggregators.llm_context import LLMContext -async def _run_llm_inference(llm, messages: list[dict]) -> str | None: +async def _run_llm_inference(llm, messages: list[dict], system_prompt: str) -> str | None: """Run a one-shot LLM inference using the pipecat service.""" context = LLMContext() context.set_messages(messages) - return await llm.run_inference(context) + return await llm.run_inference(context, system_instruction=system_prompt) async def _generate_conversation_summary( @@ -47,12 +47,11 @@ async def _generate_conversation_summary( Traced to Langfuse as conversation-summary-before-{node_name}. """ messages = [ - {"role": "system", "content": CONVERSATION_SUMMARY_SYSTEM_PROMPT}, {"role": "user", "content": f"## Conversation\n{transcript}"}, ] try: - summary = await _run_llm_inference(llm, messages) or "" + summary = await _run_llm_inference(llm, messages, CONVERSATION_SUMMARY_SYSTEM_PROMPT) or "" span_name = f"conversation-summary-before-{node_name}" add_qa_span_to_trace(parent_ctx, model, messages, summary, span_name) @@ -163,13 +162,12 @@ async def run_per_node_qa_analysis( system_content = render_template(system_prompt, template_context) messages = [ - {"role": "system", "content": system_content}, {"role": "user", "content": f"## Transcript\n{node_transcript}"}, ] # Call QA LLM try: - raw_response = await _run_llm_inference(llm, messages) + raw_response = await _run_llm_inference(llm, messages, system_content) except Exception as e: logger.error( f"QA LLM call failed for node '{node_name}' on run {workflow_run_id}: {e}" @@ -266,7 +264,6 @@ async def _run_whole_call_qa_analysis( } system_content = render_template(system_prompt, template_context) messages = [ - {"role": "system", "content": system_content}, {"role": "user", "content": f"## Transcript\n{transcript}"}, ] @@ -274,7 +271,7 @@ async def _run_whole_call_qa_analysis( llm = create_llm_service_from_provider(provider, model, api_key, **service_kwargs) try: - raw_response = await _run_llm_inference(llm, messages) + raw_response = await _run_llm_inference(llm, messages, system_content) except Exception as e: logger.error(f"QA LLM call failed for run {workflow_run_id}: {e}") return {"error": str(e), "node_results": {}} diff --git a/api/services/workflow/qa/node_summary.py b/api/services/workflow/qa/node_summary.py index 04a41ce..b02e59c 100644 --- a/api/services/workflow/qa/node_summary.py +++ b/api/services/workflow/qa/node_summary.py @@ -148,14 +148,13 @@ async def ensure_node_summaries( node_info_parts.append("Available tools:\n" + "\n".join(tool_descriptions)) node_info = "\n".join(node_info_parts) messages = [ - {"role": "system", "content": NODE_SUMMARY_SYSTEM_PROMPT}, {"role": "user", "content": node_info}, ] try: context = LLMContext() context.set_messages(messages) - summary_text = await llm.run_inference(context) or "" + summary_text = await llm.run_inference(context, system_instruction=NODE_SUMMARY_SYSTEM_PROMPT) or "" except Exception as e: logger.warning(f"Failed to generate summary for node {node_id}: {e}") updated_summaries[node_id] = {"summary": ""}