feat: refactor node spec and add mcp tools (#244)

* refactor: carve out extraction panel

* refactor: create spec versions for node types

* refactor: create a GenericNode and remove custom nodes

* feat: add python and typescript sdk

* add dograh sdk

* fix: fetch draft workflow definition over published one

* fix: fix routes of SDKs to use code gen

* chore: remove doclink dependency to reduce image size

* chore: format files

* chore: bump pipecat

* feat: let mcp fetch archived workflows on demand

* chore: fix tests

* feat: add sdk documentation

* chore: change banner and add badge
This commit is contained in:
Abhishek 2026-04-21 07:56:16 +05:30 committed by GitHub
parent 0a61ef295f
commit 00a1a22b74
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
162 changed files with 14355 additions and 3554 deletions

View file

@ -97,6 +97,7 @@ async def play_audio(
queue_frame: Callable[[Frame], Awaitable[None]],
transcript: Optional[str] = None,
append_to_context: bool = False,
persist_to_logs: bool = False,
) -> None:
"""Play raw PCM-16 audio once.
@ -115,6 +116,8 @@ async def play_audio(
transcript: Optional transcript of the recording.
append_to_context: Whether the transcript should be appended to
the LLM assistant context. Defaults to False.
persist_to_logs: Whether the transcript should be written to the
app-level logs buffer by observers. Defaults to False.
"""
context_id = str(uuid.uuid4())
await queue_frame(TTSStartedFrame(context_id=context_id))
@ -123,6 +126,7 @@ async def play_audio(
text=transcript, aggregated_by="recording", context_id=context_id
)
tts_text.append_to_context = append_to_context
tts_text.persist_to_logs = persist_to_logs
await queue_frame(tts_text)
await queue_frame(
TTSAudioRawFrame(

View file

@ -42,6 +42,7 @@ from pipecat.frames.frames import (
MetricsFrame,
StopFrame,
TranscriptionFrame,
TTSSpeakFrame,
TTSTextFrame,
UserMuteStartedFrame,
UserMuteStoppedFrame,
@ -230,8 +231,22 @@ class RealtimeFeedbackObserver(BaseObserver):
},
}
)
# Handle engine-queued speech (transition/tool messages) marked for
# log persistence. The downstream TTSTextFrame(s) from the TTS service
# still stream to WS as normal; we persist the full utterance once here
# to avoid word-level log entries from word-timestamp providers.
elif isinstance(frame, TTSSpeakFrame):
if getattr(frame, "persist_to_logs", False):
await self._append_to_buffer(
{
"type": RealtimeFeedbackType.BOT_TEXT.value,
"payload": {"text": frame.text},
}
)
# Handle bot TTS text - respect pts timing, WebSocket only
# Complete turn text is persisted via register_turn_handlers
# Complete turn text is persisted via register_turn_handlers,
# except for frames explicitly flagged persist_to_logs (e.g. recording
# transcripts from play_audio) which bypass the aggregator path.
elif isinstance(frame, TTSTextFrame):
message = {
"type": RealtimeFeedbackType.BOT_TEXT.value,
@ -249,6 +264,9 @@ class RealtimeFeedbackObserver(BaseObserver):
await self._ensure_clock_task()
await self._clock_queue.put((frame.pts, frame.id, message))
elif getattr(frame, "persist_to_logs", False):
# No pts + explicit persistence request (recording transcript).
await self._send_message(message)
else:
# No pts, send immediately
await self._send_ws(message)

View file

@ -94,6 +94,14 @@ class _OrgRoutingExporter(SpanExporter):
org_buckets = {}
for span in spans:
# Drop fastmcp's built-in auto-instrumentation spans
# (`tools/call <name>`, etc.) — our `@traced_tool` decorator
# in `api/mcp_server/tracing.py` produces the spans we want. Keeping
# both would just double every trace.
scope = getattr(span, "instrumentation_scope", None)
if scope is not None and scope.name == "fastmcp":
continue
org_id = span.attributes.get("dograh.org_id") if span.attributes else None
if org_id and str(org_id) in self._org_exporters:
org_buckets.setdefault(str(org_id), []).append(span)