feat: refactor node spec and add mcp tools (#244)

* refactor: carve out extraction panel * refactor: create spec versions for node types * refactor: create a GenericNode and remove custom nodes * feat: add python and typescript sdk * add dograh sdk * fix: fetch draft workflow definition over published one * fix: fix routes of SDKs to use code gen * chore: remove doclink dependency to reduce image size * chore: format files * chore: bump pipecat * feat: let mcp fetch archived workflows on demand * chore: fix tests * feat: add sdk documentation * chore: change banner and add badge
2026-07-25 12:01:04 +02:00 · 2026-04-21 07:56:16 +05:30 · 2026-04-21 07:56:16 +05:30 · 00a1a22b74
commit 00a1a22b74
parent 0a61ef295f
162 changed files with 14355 additions and 3554 deletions
--- a/api/services/pipecat/audio_playback.py
+++ b/api/services/pipecat/audio_playback.py
@ -97,6 +97,7 @@ async def play_audio(
    queue_frame: Callable[[Frame], Awaitable[None]],
    transcript: Optional[str] = None,
    append_to_context: bool = False,
+    persist_to_logs: bool = False,
 ) -> None:
    """Play raw PCM-16 audio once.

@ -115,6 +116,8 @@ async def play_audio(
        transcript: Optional transcript of the recording.
        append_to_context: Whether the transcript should be appended to
            the LLM assistant context.  Defaults to False.
+        persist_to_logs: Whether the transcript should be written to the
+            app-level logs buffer by observers. Defaults to False.
    """
    context_id = str(uuid.uuid4())
    await queue_frame(TTSStartedFrame(context_id=context_id))
@ -123,6 +126,7 @@ async def play_audio(
            text=transcript, aggregated_by="recording", context_id=context_id
        )
        tts_text.append_to_context = append_to_context
+        tts_text.persist_to_logs = persist_to_logs
        await queue_frame(tts_text)
    await queue_frame(
        TTSAudioRawFrame(
--- a/api/services/pipecat/realtime_feedback_observer.py
+++ b/api/services/pipecat/realtime_feedback_observer.py
@ -42,6 +42,7 @@ from pipecat.frames.frames import (
    MetricsFrame,
    StopFrame,
    TranscriptionFrame,
+    TTSSpeakFrame,
    TTSTextFrame,
    UserMuteStartedFrame,
    UserMuteStoppedFrame,
@ -230,8 +231,22 @@ class RealtimeFeedbackObserver(BaseObserver):
                    },
                }
            )
+        # Handle engine-queued speech (transition/tool messages) marked for
+        # log persistence. The downstream TTSTextFrame(s) from the TTS service
+        # still stream to WS as normal; we persist the full utterance once here
+        # to avoid word-level log entries from word-timestamp providers.
+        elif isinstance(frame, TTSSpeakFrame):
+            if getattr(frame, "persist_to_logs", False):
+                await self._append_to_buffer(
+                    {
+                        "type": RealtimeFeedbackType.BOT_TEXT.value,
+                        "payload": {"text": frame.text},
+                    }
+                )
        # Handle bot TTS text - respect pts timing, WebSocket only
-        # Complete turn text is persisted via register_turn_handlers
+        # Complete turn text is persisted via register_turn_handlers,
+        # except for frames explicitly flagged persist_to_logs (e.g. recording
+        # transcripts from play_audio) which bypass the aggregator path.
        elif isinstance(frame, TTSTextFrame):
            message = {
                "type": RealtimeFeedbackType.BOT_TEXT.value,
@ -249,6 +264,9 @@ class RealtimeFeedbackObserver(BaseObserver):

                await self._ensure_clock_task()
                await self._clock_queue.put((frame.pts, frame.id, message))
+            elif getattr(frame, "persist_to_logs", False):
+                # No pts + explicit persistence request (recording transcript).
+                await self._send_message(message)
            else:
                # No pts, send immediately
                await self._send_ws(message)
--- a/api/services/pipecat/tracing_config.py
+++ b/api/services/pipecat/tracing_config.py
@ -94,6 +94,14 @@ class _OrgRoutingExporter(SpanExporter):
        org_buckets = {}

        for span in spans:
+            # Drop fastmcp's built-in auto-instrumentation spans
+            # (`tools/call <name>`, etc.) — our `@traced_tool` decorator
+            # in `api/mcp_server/tracing.py` produces the spans we want. Keeping
+            # both would just double every trace.
+            scope = getattr(span, "instrumentation_scope", None)
+            if scope is not None and scope.name == "fastmcp":
+                continue
+
            org_id = span.attributes.get("dograh.org_id") if span.attributes else None
            if org_id and str(org_id) in self._org_exporters:
                org_buckets.setdefault(str(org_id), []).append(span)