feat: add support for self hosted llm models

2026-07-22 11:51:04 +02:00 · 2026-03-24 17:50:45 +05:30 · 2026-03-24 17:50:45 +05:30 · ac0731a374
commit ac0731a374
parent 31e075d114
17 changed files with 179 additions and 48 deletions
--- a/api/services/workflow/pipecat_engine.py
+++ b/api/services/workflow/pipecat_engine.py
@ -437,9 +437,7 @@ class PipecatEngine:

        async def _do_extraction():
            try:
-                logger.debug(
-                    f"Starting variable extraction for node: {node.name}"
-                )
+                logger.debug(f"Starting variable extraction for node: {node.name}")
                extracted_data = (
                    await self._variable_extraction_manager._perform_extraction(
                        extraction_variables, parent_context, extraction_prompt
@ -454,7 +452,9 @@ class PipecatEngine:
                    f"Variable extraction completed for node: {node.name}. Extracted: {extracted_data}"
                )
            except Exception as e:
-                logger.error(f"Error during variable extraction for node {node.name}: {str(e)}")
+                logger.error(
+                    f"Error during variable extraction for node {node.name}: {str(e)}"
+                )

        if run_in_background:
            logger.debug(
@ -497,9 +497,7 @@ class PipecatEngine:
                    logger.error(
                        f"Pending extraction task '{task_name}' failed: {result}"
                    )
-            logger.debug(
-                f"All pending extraction tasks completed in {elapsed:.2f}s"
-            )
+            logger.debug(f"All pending extraction tasks completed in {elapsed:.2f}s")
        except asyncio.TimeoutError:
            incomplete = [
                t.get_name() for t in self._pending_extraction_tasks if not t.done()
--- a/api/services/workflow/pipecat_engine_context_composer.py
+++ b/api/services/workflow/pipecat_engine_context_composer.py
@ -34,13 +34,13 @@ You have two modes for responding:
   Example: ▸ Hello! How can I help you today?

 2. PRE-RECORDED AUDIO (●): Play a pre-recorded audio message.
-   Format: `●` followed by a space and ONLY the recording_id. Nothing else.
-   Example: ● rec_greeting_01
+   Format: `●` followed by a space followed by recording_id followed by provided transcript. Nothing else.
+   Example: ● rec_greeting_01 [ Provided Transcript ]

 RULES:
 - Your response MUST start with either `▸` or `●` as the very first character.
 - For `▸` (dynamic speech): Follow with a space and your full response text.
- For `●` (pre-recorded audio): Follow with a space and ONLY the recording_id. No other text.
+- For `●` (pre-recorded audio): Follow with a space and the recording_id and the provided transcript. No other text.
 - Use `●` when a pre-recorded message matches the situation well.
 - Use `▸` when you need to generate a dynamic, contextual response.
 - NEVER mix modes in a single response. Choose one."""
--- a/api/services/workflow/qa/analysis.py
+++ b/api/services/workflow/qa/analysis.py
@ -28,7 +28,9 @@ from api.utils.template_renderer import render_template
 from pipecat.processors.aggregators.llm_context import LLMContext


-async def _run_llm_inference(llm, messages: list[dict], system_prompt: str) -> str | None:
+async def _run_llm_inference(
+    llm, messages: list[dict], system_prompt: str
+) -> str | None:
    """Run a one-shot LLM inference using the pipecat service."""
    context = LLMContext()
    context.set_messages(messages)
@ -51,7 +53,10 @@ async def _generate_conversation_summary(
    ]

    try:
-        summary = await _run_llm_inference(llm, messages, CONVERSATION_SUMMARY_SYSTEM_PROMPT) or ""
+        summary = (
+            await _run_llm_inference(llm, messages, CONVERSATION_SUMMARY_SYSTEM_PROMPT)
+            or ""
+        )

        span_name = f"conversation-summary-before-{node_name}"
        add_qa_span_to_trace(parent_ctx, model, messages, summary, span_name)
--- a/api/services/workflow/qa/node_summary.py
+++ b/api/services/workflow/qa/node_summary.py
@ -154,7 +154,12 @@ async def ensure_node_summaries(
        try:
            context = LLMContext()
            context.set_messages(messages)
-            summary_text = await llm.run_inference(context, system_instruction=NODE_SUMMARY_SYSTEM_PROMPT) or ""
+            summary_text = (
+                await llm.run_inference(
+                    context, system_instruction=NODE_SUMMARY_SYSTEM_PROMPT
+                )
+                or ""
+            )
        except Exception as e:
            logger.warning(f"Failed to generate summary for node {node_id}: {e}")
            updated_summaries[node_id] = {"summary": ""}