feat: refactor node spec and add mcp tools (#244)

* refactor: carve out extraction panel * refactor: create spec versions for node types * refactor: create a GenericNode and remove custom nodes * feat: add python and typescript sdk * add dograh sdk * fix: fetch draft workflow definition over published one * fix: fix routes of SDKs to use code gen * chore: remove doclink dependency to reduce image size * chore: format files * chore: bump pipecat * feat: let mcp fetch archived workflows on demand * chore: fix tests * feat: add sdk documentation * chore: change banner and add badge
2026-07-25 12:01:04 +02:00 · 2026-04-21 07:56:16 +05:30 · 2026-04-21 07:56:16 +05:30 · 00a1a22b74
commit 00a1a22b74
parent 0a61ef295f
162 changed files with 14355 additions and 3554 deletions
--- a/api/mcp_server/tools/init.py
+++ b/api/mcp_server/tools/init.py
--- a/api/mcp_server/tools/catalog.py
+++ b/api/mcp_server/tools/catalog.py
@ -0,0 +1,113 @@
+"""MCP discovery tools for the reference catalogs.
+
+Node properties of type `tool_refs`, `document_refs`, `recording_ref`, and
+`credential_ref` carry UUIDs that resolve against these catalogs. LLMs must
+list the catalog before populating those fields with real UUIDs.
+"""
+
+from api.db import db_client
+from api.mcp_server.auth import authenticate_mcp_request
+from api.mcp_server.server import mcp
+from api.mcp_server.tracing import traced_tool
+
+
+@mcp.tool
+@traced_tool
+async def list_tools(status: str | None = "active") -> list[dict]:
+    """List tools the agent can invoke during a call.
+
+    Returns each tool's `tool_uuid` (use this in node `tool_uuids` properties),
+    `name`, `description`, and `category`. Pass `status=None` to include
+    archived tools.
+    """
+    user = await authenticate_mcp_request()
+    tools = await db_client.get_tools_for_organization(
+        organization_id=user.selected_organization_id,
+        status=status,
+    )
+    return [
+        {
+            "tool_uuid": t.tool_uuid,
+            "name": t.name,
+            "description": t.description or "",
+            "category": t.category,
+        }
+        for t in tools
+    ]
+
+
+@mcp.tool
+@traced_tool
+async def list_documents() -> list[dict]:
+    """List knowledge-base documents the agent can reference during a call.
+
+    Returns each document's `document_uuid` (use this in node
+    `document_uuids` properties), `filename`, and `processing_status`.
+    """
+    user = await authenticate_mcp_request()
+    documents = await db_client.get_documents_for_organization(
+        organization_id=user.selected_organization_id,
+    )
+    return [
+        {
+            "document_uuid": d.document_uuid,
+            "filename": d.filename,
+            "processing_status": d.processing_status,
+            "total_chunks": d.total_chunks,
+        }
+        for d in documents
+    ]
+
+
+@mcp.tool
+@traced_tool
+async def list_credentials() -> list[dict]:
+    """List external credentials available for webhook auth and pre-call fetch.
+
+    Returns each credential's `credential_uuid` (use this in node
+    `credential_uuid` / `pre_call_fetch_credential_uuid` properties), `name`,
+    `description`, and `credential_type`.
+    """
+    user = await authenticate_mcp_request()
+    credentials = await db_client.get_credentials_for_organization(
+        organization_id=user.selected_organization_id,
+    )
+    return [
+        {
+            "credential_uuid": c.credential_uuid,
+            "name": c.name,
+            "description": c.description or "",
+            "credential_type": c.credential_type,
+        }
+        for c in credentials
+    ]
+
+
+@mcp.tool
+@traced_tool
+async def list_recordings(workflow_id: int | None = None) -> list[dict]:
+    """List pre-recorded audio files available for greetings and edge
+    transition speech.
+
+    Returns each recording's `recording_id` (use this in
+    `greeting_recording_id` / `transition_speech_recording_id` properties),
+    `transcript`, and TTS metadata. Pass `workflow_id` to filter to one
+    workflow's recordings.
+    """
+    user = await authenticate_mcp_request()
+    recordings = await db_client.get_recordings(
+        organization_id=user.selected_organization_id,
+        workflow_id=workflow_id,
+    )
+    return [
+        {
+            "id": r.id,
+            "recording_id": r.recording_id,
+            "workflow_id": r.workflow_id,
+            "transcript": r.transcript,
+            "tts_provider": r.tts_provider,
+            "tts_model": r.tts_model,
+            "tts_voice_id": r.tts_voice_id,
+        }
+        for r in recordings
+    ]
--- a/api/mcp_server/tools/get_workflow_code.py
+++ b/api/mcp_server/tools/get_workflow_code.py
@ -0,0 +1,71 @@
+"""MCP tool that returns a workflow as SDK TypeScript code.
+
+Companion to `save_workflow`: the LLM calls `get_workflow_code` to see
+the current state of a workflow as editable code, mutates it, and calls
+`save_workflow` with the new code. Storage stays JSON; the TS form is
+an ephemeral projection for the LLM edit loop.
+
+Selection priority: latest draft → latest published → legacy
+`workflow.workflow_definition`. That matches the UI's "whichever is the
+working copy" behavior so the LLM sees what a human editor would see.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from fastapi import HTTPException
+
+from api.db import db_client
+from api.mcp_server.auth import authenticate_mcp_request
+from api.mcp_server.server import mcp
+from api.mcp_server.tracing import traced_tool
+from api.mcp_server.ts_bridge import TsBridgeError, generate_code
+
+
+@mcp.tool
+@traced_tool
+async def get_workflow_code(workflow_id: int) -> dict[str, Any]:
+    """Return the workflow as SDK TypeScript code the LLM can edit.
+
+    Output shape:
+        {"code": "<TS source>", "workflow_id": int, "version": "draft" | "published" | "legacy"}
+
+    The LLM edits `code`, then calls `save_workflow(workflow_id, code)`.
+    """
+    user = await authenticate_mcp_request()
+
+    workflow = await db_client.get_workflow(
+        workflow_id, organization_id=user.selected_organization_id
+    )
+    if not workflow:
+        raise HTTPException(status_code=404, detail=f"Workflow {workflow_id} not found")
+
+    # Draft wins over published — editing a draft is the normal flow.
+    # `current_definition` (is_current=True) is the published row, so we
+    # fetch the draft explicitly. If the latest draft was just published,
+    # no draft row exists and we fall through to `released_definition`.
+    draft = await db_client.get_draft_version(workflow_id)
+    released = workflow.released_definition
+
+    if draft is not None and draft.workflow_json:
+        payload = draft.workflow_json
+        source = "draft"
+    elif released is not None and released.workflow_json:
+        payload = released.workflow_json
+        source = "published"
+    else:
+        payload = workflow.workflow_definition or {}
+        source = "legacy"
+
+    try:
+        code = await generate_code(payload, workflow_name=workflow.name or "")
+    except TsBridgeError as e:
+        raise HTTPException(status_code=500, detail=f"Failed to generate code: {e}")
+
+    return {
+        "workflow_id": workflow_id,
+        "name": workflow.name or "",
+        "version": source,
+        "code": code,
+    }
--- a/api/mcp_server/tools/node_types.py
+++ b/api/mcp_server/tools/node_types.py
@ -0,0 +1,57 @@
+"""MCP discovery tools for node specifications.
+
+LLMs call these tools first to learn the available node-type catalog and
+each node's property schema before composing or modifying a workflow.
+"""
+
+from fastapi import HTTPException
+
+from api.mcp_server.auth import authenticate_mcp_request
+from api.mcp_server.server import mcp
+from api.mcp_server.tracing import traced_tool
+from api.services.workflow.node_specs import SPEC_VERSION, all_specs, get_spec
+
+
+@mcp.tool
+@traced_tool
+async def list_node_types() -> dict:
+    """List every available node type with a brief summary.
+
+    Use this first to discover what nodes exist, then call `get_node_type`
+    for the full schema of any node you intend to use.
+
+    Returns:
+        A dict with `spec_version` (pin against this in any generated workflow
+        code) and `node_types` (list of {name, display_name, description,
+        category}).
+    """
+    await authenticate_mcp_request()
+    return {
+        "spec_version": SPEC_VERSION,
+        "node_types": [
+            {
+                "name": spec.name,
+                "display_name": spec.display_name,
+                "description": spec.description,
+                "category": spec.category.value,
+            }
+            for spec in all_specs()
+        ],
+    }
+
+
+@mcp.tool
+@traced_tool
+async def get_node_type(name: str) -> dict:
+    """Fetch the full schema for a node type, including every property's
+    type, default, conditional visibility rules, and LLM-readable
+    description, plus worked examples.
+
+    Use the property `description` and the `examples` list to understand
+    semantics — types alone are not enough.
+    """
+    await authenticate_mcp_request()
+    spec = get_spec(name)
+    if spec is None:
+        raise HTTPException(status_code=404, detail=f"Unknown node type: {name!r}")
+    return spec.model_dump(mode="json")
--- a/api/mcp_server/tools/save_workflow.py
+++ b/api/mcp_server/tools/save_workflow.py
@ -0,0 +1,168 @@
+"""MCP tool that accepts LLM-authored SDK TypeScript and saves it as a draft.
+
+Execution flow:
+    1. Parse via the Node TS validator — AST-only, never executes the code.
+       Returns either a workflow JSON or per-location parse/validate errors.
+    2. Pydantic validation via `ReactFlowDTO.model_validate` (defence in
+       depth; the parser is already spec-driven, but the DTO layer is the
+       authoritative wire-format gate).
+    3. Graph validation via `WorkflowGraph`.
+    4. Save as a new draft via `db_client.save_workflow_draft` — the
+       published version stays intact, so edits are rollback-safe.
+
+Error codes surfaced to the LLM:
+    parse_error       — TS parse failed or a disallowed construct was used
+    validation_error  — node data failed spec validation (unknown field,
+                        missing required, wrong type, option out of range)
+    schema_validation — ReactFlowDTO Pydantic rejection (rare; parser bug)
+    graph_validation  — semantic graph rule broken (e.g. no start node)
+    bridge_error      — Node subprocess failed before returning JSON
+
+All LLM-facing errors include file:line:column where available so the
+LLM can correct its code directly.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from fastapi import HTTPException
+from loguru import logger
+from pydantic import ValidationError as PydanticValidationError
+
+from api.db import db_client
+from api.mcp_server.auth import authenticate_mcp_request
+from api.mcp_server.server import mcp
+from api.mcp_server.tracing import traced_tool
+from api.mcp_server.ts_bridge import TsBridgeError, parse_code
+from api.services.workflow.dto import ReactFlowDTO
+from api.services.workflow.layout import reconcile_positions
+from api.services.workflow.workflow import WorkflowGraph
+
+
+async def _previous_workflow_json(workflow: Any) -> dict[str, Any] | None:
+    """Same selection priority as `get_workflow_code` — the version the
+    LLM saw is the version we reconcile against.
+
+    `current_definition` (is_current=True) is the published row, so the
+    draft must be fetched explicitly. If no draft exists (e.g. the last
+    draft was just published), fall through to `released_definition`.
+    """
+    draft = await db_client.get_draft_version(workflow.id)
+    if draft is not None and draft.workflow_json:
+        return draft.workflow_json
+    released = workflow.released_definition
+    if released is not None and released.workflow_json:
+        return released.workflow_json
+    return workflow.workflow_definition or None
+
+
+def _error_result(code: str, message: str, **extra: Any) -> dict[str, Any]:
+    return {"saved": False, "error_code": code, "error": message, **extra}
+
+
+def _format_errors(errors: list[dict[str, Any]]) -> str:
+    parts: list[str] = []
+    for e in errors:
+        loc = ""
+        line = e.get("line")
+        col = e.get("column")
+        if line is not None:
+            loc = f" (line {line}" + (f", col {col}" if col is not None else "") + ")"
+        parts.append(f"{e.get('message', '')}{loc}")
+    return "\n".join(parts)
+
+
+@mcp.tool
+@traced_tool
+async def save_workflow(workflow_id: int, code: str) -> dict[str, Any]:
+    """Parse SDK TypeScript and save the resulting workflow as a draft.
+
+    `code` is TypeScript source using `@dograh/sdk`. Fetch the current
+    code first via `get_workflow_code(workflow_id)`, edit it, then pass
+    the full updated source here.
+
+    Example code:
+        import { Workflow } from "@dograh/sdk";
+        import { startCall, endCall } from "@dograh/sdk/typed";
+
+        const wf = new Workflow({ name: "lead_qualification" });
+        const greeting = wf.addTyped(startCall({ name: "Greeting", prompt: "Hi!" }));
+        const done     = wf.addTyped(endCall({ name: "Done", prompt: "Bye." }));
+        wf.edge(greeting, done, { label: "done", condition: "conversation complete" });
+
+    On success the draft version is saved; the published version is
+    untouched.
+    """
+    user = await authenticate_mcp_request()
+
+    workflow = await db_client.get_workflow(
+        workflow_id, organization_id=user.selected_organization_id
+    )
+    if not workflow:
+        raise HTTPException(status_code=404, detail=f"Workflow {workflow_id} not found")
+
+    # 1. Parse + spec-validate via the Node TS validator.
+    try:
+        parsed = await parse_code(code)
+    except TsBridgeError as e:
+        logger.warning(f"ts_bridge failure: {e}")
+        return _error_result("bridge_error", str(e))
+
+    if not parsed.get("ok"):
+        stage = parsed.get("stage", "parse")
+        errs = parsed.get("errors") or []
+        code_key = "parse_error" if stage == "parse" else "validation_error"
+        return _error_result(code_key, _format_errors(errs), errors=errs)
+
+    payload = parsed["workflow"]
+    new_name = (parsed.get("workflowName") or "").strip()
+
+    # 1b. Reconcile node positions against the previously-stored workflow.
+    # The parser drops positions by design (LLMs don't place nodes well);
+    # here we fill them back in from what was there before, and pick
+    # approximate placements for newly-introduced nodes.
+    payload = reconcile_positions(payload, await _previous_workflow_json(workflow))
+
+    # 2. Pydantic shape check (defence in depth — parser is spec-driven).
+    try:
+        dto = ReactFlowDTO.model_validate(payload)
+    except PydanticValidationError as e:
+        return _error_result("schema_validation", str(e))
+
+    # 3. Graph-level semantic validation (start-node count, edge shape).
+    try:
+        WorkflowGraph(dto)
+    except (ValueError, Exception) as e:  # WorkflowGraph raises ValueError
+        return _error_result("graph_validation", str(e))
+
+    # 4a. If the `new Workflow({ name })` in the edited source differs from
+    # the stored name, rename the workflow. Name is a workflow-level field
+    # (not versioned), so this takes effect immediately.
+    name_changed = bool(new_name) and new_name != workflow.name
+    if name_changed:
+        await db_client.update_workflow(
+            workflow_id=workflow_id,
+            name=new_name,
+            workflow_definition=None,
+            template_context_variables=None,
+            workflow_configurations=None,
+            organization_id=user.selected_organization_id,
+        )
+
+    # 4b. Save as a new draft (existing published version stays intact).
+    draft = await db_client.save_workflow_draft(
+        workflow_id=workflow_id,
+        workflow_definition=payload,
+    )
+
+    return {
+        "saved": True,
+        "workflow_id": workflow_id,
+        "version_number": draft.version_number,
+        "status": draft.status,
+        "node_count": len(payload["nodes"]),
+        "edge_count": len(payload["edges"]),
+        "name": new_name or workflow.name,
+        "renamed": name_changed,
+    }
--- a/api/mcp_server/tools/workflows.py
+++ b/api/mcp_server/tools/workflows.py
@ -0,0 +1,53 @@
+from fastapi import HTTPException
+
+from api.db import db_client
+from api.mcp_server.auth import authenticate_mcp_request
+from api.mcp_server.server import mcp
+from api.mcp_server.tracing import traced_tool
+
+
+@mcp.tool
+@traced_tool
+async def list_workflows(status: str | None = "active") -> list[dict]:
+    """List agents (workflows) in the caller's organization.
+
+    Returns id, name, status, and created_at for each agent. Use
+    `get_workflow` to fetch a single agent's full definition. Defaults
+    to active agents; pass `status="archived"` to list archived agents,
+    or `status=None` to list all.
+    """
+    user = await authenticate_mcp_request()
+    workflows = await db_client.get_all_workflows_for_listing(
+        organization_id=user.selected_organization_id,
+        status=status,
+    )
+    return [
+        {
+            "id": w.id,
+            "name": w.name,
+            "status": w.status,
+            "created_at": w.created_at.isoformat() if w.created_at else None,
+        }
+        for w in workflows
+    ]
+
+
+@mcp.tool
+@traced_tool
+async def get_workflow(workflow_id: int) -> dict:
+    """Fetch a single agent by id, including its current published definition."""
+    user = await authenticate_mcp_request()
+    workflow = await db_client.get_workflow(
+        workflow_id, organization_id=user.selected_organization_id
+    )
+    if not workflow:
+        raise HTTPException(status_code=404, detail=f"Workflow {workflow_id} not found")
+
+    current = workflow.current_definition
+    return {
+        "id": workflow.id,
+        "name": workflow.name,
+        "status": workflow.status,
+        "definition": current.workflow_json if current else None,
+        "version_number": current.version_number if current else None,
+    }