feat: refactor node spec and add mcp tools (#244)

* refactor: carve out extraction panel * refactor: create spec versions for node types * refactor: create a GenericNode and remove custom nodes * feat: add python and typescript sdk * add dograh sdk * fix: fetch draft workflow definition over published one * fix: fix routes of SDKs to use code gen * chore: remove doclink dependency to reduce image size * chore: format files * chore: bump pipecat * feat: let mcp fetch archived workflows on demand * chore: fix tests * feat: add sdk documentation * chore: change banner and add badge
2026-07-25 12:01:04 +02:00 · 2026-04-21 07:56:16 +05:30 · 2026-04-21 07:56:16 +05:30 · 00a1a22b74
commit 00a1a22b74
parent 0a61ef295f
162 changed files with 14355 additions and 3554 deletions
--- a/api/tests/conftest.py
+++ b/api/tests/conftest.py
@ -14,14 +14,17 @@ from unittest.mock import Mock
 import pytest

 from api.services.workflow.dto import (
+    AgentNodeData,
+    AgentRFNode,
    EdgeDataDTO,
+    EndCallNodeData,
+    EndCallRFNode,
    ExtractionVariableDTO,
-    NodeDataDTO,
-    NodeType,
    Position,
    ReactFlowDTO,
    RFEdgeDTO,
-    RFNodeDTO,
+    StartCallNodeData,
+    StartCallRFNode,
    VariableType,
 )
 from api.services.workflow.workflow import WorkflowGraph
@ -252,11 +255,10 @@ def simple_workflow() -> WorkflowGraph:
    """
    dto = ReactFlowDTO(
        nodes=[
-            RFNodeDTO(
+            StartCallRFNode(
                id="start",
-                type=NodeType.startNode,
                position=Position(x=0, y=0),
-                data=NodeDataDTO(
+                data=StartCallNodeData(
                    name="Start Call",
                    prompt=START_CALL_SYSTEM_PROMPT,
                    is_start=True,
@ -273,11 +275,10 @@ def simple_workflow() -> WorkflowGraph:
                    ],
                ),
            ),
-            RFNodeDTO(
+            EndCallRFNode(
                id="end",
-                type=NodeType.endNode,
                position=Position(x=0, y=200),
-                data=NodeDataDTO(
+                data=EndCallNodeData(
                    name="End Call",
                    prompt=END_CALL_SYSTEM_PROMPT,
                    is_end=True,
@ -317,11 +318,10 @@ def three_node_workflow() -> WorkflowGraph:
    """
    dto = ReactFlowDTO(
        nodes=[
-            RFNodeDTO(
+            StartCallRFNode(
                id="start",
-                type=NodeType.startNode,
                position=Position(x=0, y=0),
-                data=NodeDataDTO(
+                data=StartCallNodeData(
                    name="Start Call",
                    prompt=START_CALL_SYSTEM_PROMPT,
                    is_start=True,
@ -338,11 +338,10 @@ def three_node_workflow() -> WorkflowGraph:
                    ],
                ),
            ),
-            RFNodeDTO(
+            AgentRFNode(
                id="agent",
-                type=NodeType.agentNode,
                position=Position(x=0, y=200),
-                data=NodeDataDTO(
+                data=AgentNodeData(
                    name="Collect Info",
                    prompt=AGENT_SYSTEM_PROMPT,
                    allow_interrupt=False,
@ -358,11 +357,10 @@ def three_node_workflow() -> WorkflowGraph:
                    ],
                ),
            ),
-            RFNodeDTO(
+            EndCallRFNode(
                id="end",
-                type=NodeType.endNode,
                position=Position(x=0, y=400),
-                data=NodeDataDTO(
+                data=EndCallNodeData(
                    name="End Call",
                    prompt=END_CALL_SYSTEM_PROMPT,
                    is_end=True,
@ -411,11 +409,10 @@ def three_node_workflow_extraction_start_only() -> WorkflowGraph:
    """
    dto = ReactFlowDTO(
        nodes=[
-            RFNodeDTO(
+            StartCallRFNode(
                id="start",
-                type=NodeType.startNode,
                position=Position(x=0, y=0),
-                data=NodeDataDTO(
+                data=StartCallNodeData(
                    name="Start Call",
                    prompt=START_CALL_SYSTEM_PROMPT,
                    is_start=True,
@ -432,11 +429,10 @@ def three_node_workflow_extraction_start_only() -> WorkflowGraph:
                    ],
                ),
            ),
-            RFNodeDTO(
+            AgentRFNode(
                id="agent",
-                type=NodeType.agentNode,
                position=Position(x=0, y=200),
-                data=NodeDataDTO(
+                data=AgentNodeData(
                    name="Collect Info",
                    prompt=AGENT_SYSTEM_PROMPT,
                    allow_interrupt=False,
@ -444,11 +440,10 @@ def three_node_workflow_extraction_start_only() -> WorkflowGraph:
                    extraction_enabled=False,  # Explicitly disabled for testing
                ),
            ),
-            RFNodeDTO(
+            EndCallRFNode(
                id="end",
-                type=NodeType.endNode,
                position=Position(x=0, y=400),
-                data=NodeDataDTO(
+                data=EndCallNodeData(
                    name="End Call",
                    prompt=END_CALL_SYSTEM_PROMPT,
                    is_end=True,
@ -493,11 +488,10 @@ def three_node_workflow_no_variable_extraction() -> WorkflowGraph:
    """
    dto = ReactFlowDTO(
        nodes=[
-            RFNodeDTO(
+            StartCallRFNode(
                id="start",
-                type=NodeType.startNode,
                position=Position(x=0, y=0),
-                data=NodeDataDTO(
+                data=StartCallNodeData(
                    name="Start Call",
                    prompt=START_CALL_SYSTEM_PROMPT,
                    is_start=True,
@ -506,11 +500,10 @@ def three_node_workflow_no_variable_extraction() -> WorkflowGraph:
                    extraction_enabled=False,
                ),
            ),
-            RFNodeDTO(
+            AgentRFNode(
                id="agent",
-                type=NodeType.agentNode,
                position=Position(x=0, y=200),
-                data=NodeDataDTO(
+                data=AgentNodeData(
                    name="Collect Info",
                    prompt=AGENT_SYSTEM_PROMPT,
                    allow_interrupt=False,
@ -518,11 +511,10 @@ def three_node_workflow_no_variable_extraction() -> WorkflowGraph:
                    extraction_enabled=False,  # Explicitly disabled for testing
                ),
            ),
-            RFNodeDTO(
+            EndCallRFNode(
                id="end",
-                type=NodeType.endNode,
                position=Position(x=0, y=400),
-                data=NodeDataDTO(
+                data=EndCallNodeData(
                    name="End Call",
                    prompt=END_CALL_SYSTEM_PROMPT,
                    is_end=True,
--- a/api/tests/test_display_options_evaluator.py
+++ b/api/tests/test_display_options_evaluator.py
@ -0,0 +1,39 @@
+"""Golden-test parity for the display_options evaluator.
+
+Both the Python `evaluate_display_options` and the TypeScript
+`evaluateDisplayOptions` (in `ui/src/components/flow/renderer/displayOptions.ts`)
+must agree on every fixture in `display_options_fixtures.json`. The TS
+side is verified by `ui/scripts/test-display-options.mjs`.
+"""
+
+import json
+from pathlib import Path
+
+import pytest
+
+from api.services.workflow.node_specs import evaluate_display_options
+
+FIXTURES_PATH = (
+    Path(__file__).parent.parent
+    / "services"
+    / "workflow"
+    / "node_specs"
+    / "display_options_fixtures.json"
+)
+
+
+def load_cases():
+    with open(FIXTURES_PATH) as f:
+        return json.load(f)["cases"]
+
+
+@pytest.mark.parametrize("case", load_cases(), ids=lambda c: c["name"])
+def test_python_evaluator_matches_fixture(case):
+    rules = case["rules"]
+    values = case["values"]
+    expected = case["expected"]
+    actual = evaluate_display_options(rules, values)
+    assert actual is expected, (
+        f"{case['name']}: expected {expected}, got {actual} "
+        f"for rules={rules!r} values={values!r}"
+    )
--- a/api/tests/test_dograh_sdk.py
+++ b/api/tests/test_dograh_sdk.py
@ -0,0 +1,235 @@
+"""Tests for the Python runtime SDK (`dograh_sdk`).
+
+Uses a stub client backed by the in-process spec registry rather than
+exercising the HTTP layer — the HTTP client is a thin wrapper that's
+easier to test manually against a live server.
+
+Covers:
+- Workflow builder round-trips through ReactFlowDTO validation
+- Validation errors fail at the `add()` call site
+- from_json preserves node IDs and subsequent add() doesn't collide
+- Edge labels / conditions are required
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+from dograh_sdk import Workflow
+from dograh_sdk._generated_models import NodeSpec
+from dograh_sdk.errors import ValidationError
+
+from api.services.workflow.dto import ReactFlowDTO
+from api.services.workflow.node_specs import all_specs, get_spec
+
+
+class _StubClient:
+    """Stand-in for DograhClient backed by the in-process spec registry.
+    Matches the real client's contract: `get_node_type(name)` returns a
+    `NodeSpec` Pydantic model."""
+
+    def get_node_type(self, name: str) -> NodeSpec:
+        spec = get_spec(name)
+        if spec is None:
+            raise ValueError(f"Unknown spec: {name}")
+        return NodeSpec.model_validate(spec.model_dump(mode="json"))
+
+
+@pytest.fixture
+def client() -> _StubClient:
+    return _StubClient()
+
+
+# ─── Builder + to_json round-trip ────────────────────────────────────────
+
+
+def test_builds_minimal_workflow_and_roundtrips_through_dto(client: _StubClient):
+    wf = Workflow(client=client, name="minimal")
+    start = wf.add(
+        type="startCall",
+        name="greeting",
+        prompt="Say hi to the caller.",
+    )
+    end = wf.add(
+        type="endCall",
+        name="close",
+        prompt="Thank the caller and hang up.",
+    )
+    wf.edge(start, end, label="done", condition="When the greeting is complete")
+
+    payload = wf.to_json()
+    # Wire format must validate through the backend Pydantic union — if
+    # it doesn't, the SDK has silently drifted from the spec schema.
+    dto = ReactFlowDTO.model_validate(payload)
+    assert len(dto.nodes) == 2
+    assert {n.type for n in dto.nodes} == {"startCall", "endCall"}
+    assert len(dto.edges) == 1
+
+
+def test_defaults_applied_from_spec(client: _StubClient):
+    """Spec defaults (e.g., `allow_interrupt=False` on startCall) fill in
+    when the user doesn't pass them."""
+    wf = Workflow(client=client, name="defaults")
+    start = wf.add(type="startCall", name="greeting", prompt="hello")
+    payload = wf.to_json()
+    data = payload["nodes"][0]["data"]
+    assert data["allow_interrupt"] is False  # spec default
+    assert data["add_global_prompt"] is True  # spec default
+    _ = start  # used implicitly; silence unused
+
+
+def test_webhook_complex_fields_validate(client: _StubClient):
+    """Webhook's json + fixed_collection (custom_headers) round-trip."""
+    wf = Workflow(client=client, name="wh")
+    wh = wf.add(
+        type="webhook",
+        name="notify",
+        enabled=True,
+        http_method="POST",
+        endpoint_url="https://api.example.com/hook",
+        custom_headers=[{"key": "X-Source", "value": "dograh"}],
+        payload_template={"run": "{{workflow_run_id}}"},
+    )
+    payload = wf.to_json()
+    # Webhook has no incoming/outgoing graph requirements — render as a
+    # standalone node in the graph for the DTO round-trip.
+    ReactFlowDTO.model_validate(payload)
+    _ = wh
+
+
+# ─── Validation errors at call site ──────────────────────────────────────
+
+
+def test_unknown_field_raises_at_add(client: _StubClient):
+    wf = Workflow(client=client, name="typo")
+    with pytest.raises(ValidationError, match="unknown field"):
+        wf.add(
+            type="startCall",
+            name="greeting",
+            prompt="hi",
+            promt="typo",  # extra misspelled field
+        )
+
+
+def test_missing_required_raises_at_add(client: _StubClient):
+    wf = Workflow(client=client, name="missing")
+    with pytest.raises(ValidationError, match="required field missing"):
+        wf.add(type="startCall", name="greeting")  # no prompt
+
+
+def test_wrong_scalar_type_raises(client: _StubClient):
+    wf = Workflow(client=client, name="wrongtype")
+    with pytest.raises(ValidationError, match="expected boolean"):
+        wf.add(
+            type="agentNode",
+            name="x",
+            prompt="y",
+            allow_interrupt="yes",
+        )
+
+
+def test_invalid_options_value_raises(client: _StubClient):
+    wf = Workflow(client=client, name="wrongenum")
+    with pytest.raises(ValidationError, match="not in allowed"):
+        wf.add(
+            type="startCall",
+            name="greeting",
+            prompt="hi",
+            greeting_type="video",  # only text|audio allowed
+        )
+
+
+def test_unknown_node_type_raises(client: _StubClient):
+    wf = Workflow(client=client, name="x")
+    with pytest.raises(ValueError, match="Unknown spec"):
+        wf.add(type="nonExistentType", name="x")
+
+
+def test_validation_error_surfaces_llm_hint(client: _StubClient):
+    """When a property carries `llm_hint`, it appears in the error message
+    so LLMs can self-correct on retry. `tool_uuids` on agentNode has the
+    hint 'List of tool UUIDs from `list_tools`.'"""
+    wf = Workflow(client=client, name="hint")
+    with pytest.raises(ValidationError) as exc_info:
+        wf.add(
+            type="agentNode",
+            name="x",
+            prompt="y",
+            tool_uuids="single-uuid-not-a-list",  # wrong shape: str, not list
+        )
+    msg = str(exc_info.value)
+    assert "tool_uuids" in msg
+    assert "Hint:" in msg
+    assert "list_tools" in msg
+
+
+def test_no_hint_message_when_spec_has_none(client: _StubClient):
+    """Properties without `llm_hint` produce a plain error (no dangling
+    'Hint:' line)."""
+    wf = Workflow(client=client, name="no-hint")
+    with pytest.raises(ValidationError) as exc_info:
+        wf.add(type="agentNode", name="x", prompt="y", allow_interrupt="yes")
+    assert "Hint:" not in str(exc_info.value)
+
+
+def test_edge_requires_label_and_condition(client: _StubClient):
+    wf = Workflow(client=client, name="edge")
+    a = wf.add(type="startCall", name="a", prompt="hi")
+    b = wf.add(type="endCall", name="b", prompt="bye")
+    with pytest.raises(ValidationError, match="label is required"):
+        wf.edge(a, b, label="", condition="condition")
+    with pytest.raises(ValidationError, match="condition is required"):
+        wf.edge(a, b, label="label", condition="")
+
+
+# ─── Round-trip from_json → edit → to_json ────────────────────────────────
+
+
+def test_from_json_preserves_ids_and_next_id_doesnt_collide(client: _StubClient):
+    wf0 = Workflow(client=client, name="w0")
+    start = wf0.add(type="startCall", name="g", prompt="hi")
+    end = wf0.add(type="endCall", name="e", prompt="bye")
+    wf0.edge(start, end, label="done", condition="done")
+
+    payload = wf0.to_json()
+    wf1 = Workflow.from_json(payload, client=client, name="w0-reload")
+
+    # IDs are preserved
+    assert [n.id for n in wf1._nodes] == [start.id, end.id]
+    # Next add() gets a fresh ID, not colliding with the existing ones
+    new_ref = wf1.add(type="agentNode", name="qualify", prompt="ask stuff")
+    assert new_ref.id != start.id
+    assert new_ref.id != end.id
+    assert int(new_ref.id) > max(int(start.id), int(end.id))
+
+
+def test_from_json_validates_data(client: _StubClient):
+    """Loading a JSON payload with a misnamed field raises — we don't
+    silently accept drift."""
+    bad = {
+        "nodes": [
+            {
+                "id": "1",
+                "type": "startCall",
+                "position": {"x": 0, "y": 0},
+                "data": {"name": "g", "prompt": "hi", "bogus": 1},
+            }
+        ],
+        "edges": [],
+    }
+    with pytest.raises(ValidationError, match="unknown field"):
+        Workflow.from_json(bad, client=client)
+
+
+# ─── Sanity: all registered specs are reachable by name ───────────────────
+
+
+def test_every_registered_spec_is_reachable_by_sdk(client: _StubClient):
+    wf = Workflow(client=client, name="probe")
+    for spec in all_specs():
+        # Just fetch the spec via the client; doesn't add anything. This
+        # ensures the `_StubClient` wiring works for all types.
+        probe = client.get_node_type(spec.name)
+        assert probe.name == spec.name
+    _ = wf
--- a/api/tests/test_dograh_sdk_typed.py
+++ b/api/tests/test_dograh_sdk_typed.py
@ -0,0 +1,128 @@
+"""Tests for the typed SDK (`dograh_sdk.typed`).
+
+Covers:
+- Generated classes import cleanly and declare the correct spec name
+- `Workflow.add_typed(node)` produces the same wire format as
+  `Workflow.add(type=..., **kwargs)`
+- Typed-class construction respects required/optional field defaults
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+from dograh_sdk import Workflow
+from dograh_sdk._generated_models import NodeSpec
+from dograh_sdk.typed import (
+    AgentNode,
+    EndCall,
+    GlobalNode,
+    Qa,
+    StartCall,
+    Trigger,
+    TypedNode,
+    Webhook,
+)
+
+from api.services.workflow.dto import ReactFlowDTO
+from api.services.workflow.node_specs import get_spec
+
+
+class _StubClient:
+    def get_node_type(self, name: str) -> NodeSpec:
+        return NodeSpec.model_validate(get_spec(name).model_dump(mode="json"))
+
+
+@pytest.fixture
+def client() -> _StubClient:
+    return _StubClient()
+
+
+# ─── Generated classes declare the correct discriminator ──────────────────
+
+
+@pytest.mark.parametrize(
+    "cls,expected_type",
+    [
+        (StartCall, "startCall"),
+        (AgentNode, "agentNode"),
+        (EndCall, "endCall"),
+        (GlobalNode, "globalNode"),
+        (Trigger, "trigger"),
+        (Webhook, "webhook"),
+        (Qa, "qa"),
+    ],
+    ids=lambda v: v.__name__ if isinstance(v, type) else v,
+)
+def test_typed_class_declares_spec_name(cls: type[TypedNode], expected_type: str):
+    assert cls.type == expected_type
+    # Instances inherit the ClassVar
+    if cls is StartCall:
+        inst = cls(name="g", prompt="hi")
+    elif cls is AgentNode:
+        inst = cls(name="a", prompt="hi")
+    elif cls is EndCall:
+        inst = cls(name="e", prompt="hi")
+    elif cls is GlobalNode:
+        inst = cls(name="g", prompt="hi")
+    elif cls is Trigger:
+        inst = cls(name="t")
+    elif cls is Webhook:
+        inst = cls(name="wh")
+    else:  # Qa
+        inst = cls(name="qa")
+    assert inst.type == expected_type
+
+
+# ─── add_typed integrates with Workflow and round-trips through DTO ──────
+
+
+def test_add_typed_builds_valid_workflow(client: _StubClient):
+    wf = Workflow(client=client, name="typed-e2e")
+    start = wf.add_typed(StartCall(name="greeting", prompt="Hi there!"))
+    end = wf.add_typed(EndCall(name="done", prompt="Bye."))
+    wf.edge(start, end, label="done", condition="conversation over")
+
+    payload = wf.to_json()
+    dto = ReactFlowDTO.model_validate(payload)
+    assert len(dto.nodes) == 2
+    assert payload["nodes"][0]["type"] == "startCall"
+    assert payload["nodes"][1]["type"] == "endCall"
+
+
+def test_add_typed_and_add_produce_identical_data(client: _StubClient):
+    """The typed path and the generic path should produce identical node
+    data for equivalent inputs."""
+    wf_typed = Workflow(client=client)
+    wf_typed.add_typed(AgentNode(name="q", prompt="ask"))
+
+    wf_generic = Workflow(client=client)
+    wf_generic.add(type="agentNode", name="q", prompt="ask")
+
+    typed_data = wf_typed.to_json()["nodes"][0]["data"]
+    generic_data = wf_generic.to_json()["nodes"][0]["data"]
+    assert typed_data == generic_data
+
+
+def test_webhook_mutable_defaults_dont_share_state(client: _StubClient):
+    """Dataclass default_factory ensures every Webhook() gets its own dict."""
+    wf = Workflow(client=client)
+    a = wf.add_typed(Webhook(name="a"))
+    b = wf.add_typed(Webhook(name="b"))
+    payload = wf.to_json()
+    a_data = payload["nodes"][0]["data"]
+    b_data = payload["nodes"][1]["data"]
+    # Both instances must end up with payload_template populated from the
+    # factory; mutating one must not affect the other.
+    assert a_data["payload_template"] is not b_data["payload_template"]
+    _ = a, b
+
+
+def test_typed_sdk_surfaces_spec_default_to_field(client: _StubClient):
+    """Spec defaults make it all the way through: StartCall().name defaults
+    to the spec's `"Start Call"` literal."""
+    s = StartCall(prompt="hi")
+    assert s.name == "Start Call"
+    assert s.allow_interrupt is False  # matches spec default from earlier edits
+    assert s.add_global_prompt is True
--- a/api/tests/test_dto.py
+++ b/api/tests/test_dto.py
@ -1,11 +1,98 @@
+from pathlib import Path
+
 import pytest

-from api.services.workflow.dto import ReactFlowDTO
+from api.services.workflow.dto import ReactFlowDTO, sanitize_workflow_definition
+
+_FIXTURES_DIR = Path(__file__).parent / "definitions"


@pytest.mark.asyncio
 async def test_dto():
-    # assert no exceptions are raised
-    with open("tests/definitions/rf-1.json", "r") as f:
+    # Path resolved relative to this test file so the test works regardless
+    # of the cwd pytest is invoked from.
+    with open(_FIXTURES_DIR / "rf-1.json", "r") as f:
        dto = ReactFlowDTO.model_validate_json(f.read())
    assert dto is not None
+
+
+def test_sanitize_strips_ui_runtime_fields():
+    definition = {
+        "viewport": {"x": 0, "y": 0, "zoom": 1},
+        "nodes": [
+            {
+                "id": "n1",
+                "type": "startCall",
+                "position": {"x": 0, "y": 0},
+                "width": 200,  # ReactFlow-computed, preserved
+                "selected": True,  # ReactFlow runtime, preserved
+                "data": {
+                    "name": "Start",
+                    "prompt": "hi",
+                    "greeting": "hello",
+                    "invalid": True,  # UI-only, should be stripped
+                    "validationMessage": "oops",  # UI-only, should be stripped
+                    "mystery_field": 42,  # unknown, should be stripped
+                },
+            },
+            {
+                "id": "n2",
+                "type": "agentNode",
+                "position": {"x": 1, "y": 1},
+                "data": {"name": "A", "prompt": "p", "invalid": False},
+            },
+        ],
+        "edges": [
+            {
+                "id": "e1",
+                "source": "n1",
+                "target": "n2",
+                "data": {
+                    "label": "next",
+                    "condition": "true",
+                    "invalid": True,  # UI-only, should be stripped
+                },
+            }
+        ],
+    }
+
+    out = sanitize_workflow_definition(definition)
+
+    # Top-level keys preserved
+    assert out["viewport"] == {"x": 0, "y": 0, "zoom": 1}
+    # ReactFlow runtime fields on the node itself preserved
+    assert out["nodes"][0]["width"] == 200
+    assert out["nodes"][0]["selected"] is True
+
+    # node.data stripped of unknowns, known fields kept
+    n1_data = out["nodes"][0]["data"]
+    assert n1_data == {"name": "Start", "prompt": "hi", "greeting": "hello"}
+    assert "invalid" not in n1_data
+    assert "validationMessage" not in n1_data
+    assert "mystery_field" not in n1_data
+
+    n2_data = out["nodes"][1]["data"]
+    assert n2_data == {"name": "A", "prompt": "p"}
+
+    # edge.data stripped
+    assert out["edges"][0]["data"] == {"label": "next", "condition": "true"}
+
+
+def test_sanitize_noop_on_empty_and_unknown_types():
+    assert sanitize_workflow_definition(None) is None
+    assert sanitize_workflow_definition({}) == {}
+
+    # Unknown node type: pass through unchanged rather than wipe data
+    definition = {
+        "nodes": [
+            {
+                "id": "n1",
+                "type": "unknownType",
+                "position": {"x": 0, "y": 0},
+                "data": {"anything": "goes"},
+            }
+        ],
+        "edges": [],
+    }
+    out = sanitize_workflow_definition(definition)
+    assert out["nodes"][0]["data"] == {"anything": "goes"}
--- a/api/tests/test_layout.py
+++ b/api/tests/test_layout.py
@ -0,0 +1,124 @@
+"""Tests for position reconciliation after the LLM save round-trip."""
+
+from __future__ import annotations
+
+from api.services.workflow.layout import reconcile_positions
+
+
+def _node(
+    id: str,
+    type: str,
+    *,
+    name: str | None = None,
+    x: float = 0.0,
+    y: float = 0.0,
+) -> dict:
+    data: dict = {}
+    if name is not None:
+        data["name"] = name
+    return {"id": id, "type": type, "position": {"x": x, "y": y}, "data": data}
+
+
+def _edge(src: str, tgt: str) -> dict:
+    return {
+        "id": f"{src}-{tgt}",
+        "source": src,
+        "target": tgt,
+        "data": {"label": "x", "condition": "y"},
+    }
+
+
+def test_named_match_preserves_position():
+    previous = {
+        "nodes": [_node("99", "startCall", name="greeting", x=100, y=200)],
+        "edges": [],
+    }
+    new = {
+        "nodes": [_node("1", "startCall", name="greeting")],
+        "edges": [],
+    }
+    out = reconcile_positions(new, previous)
+    assert out["nodes"][0]["position"] == {"x": 100, "y": 200}
+
+
+def test_unnamed_match_by_type_ordering():
+    previous = {
+        "nodes": [
+            _node("7", "agentNode", x=-648, y=-158),
+            _node("8", "agentNode", x=500, y=-100),
+        ],
+        "edges": [],
+    }
+    new = {
+        "nodes": [
+            _node("1", "agentNode"),
+            _node("2", "agentNode"),
+        ],
+        "edges": [],
+    }
+    out = reconcile_positions(new, previous)
+    assert out["nodes"][0]["position"] == {"x": -648, "y": -158}
+    assert out["nodes"][1]["position"] == {"x": 500, "y": -100}
+
+
+def test_new_node_placed_relative_to_incoming_neighbor():
+    previous = {
+        "nodes": [_node("99", "startCall", name="greeting", x=100, y=200)],
+        "edges": [],
+    }
+    new = {
+        "nodes": [
+            _node("1", "startCall", name="greeting"),
+            _node("2", "agentNode", name="new_node"),
+        ],
+        "edges": [_edge("1", "2")],
+    }
+    out = reconcile_positions(new, previous)
+    # Start call keeps its previous position.
+    assert out["nodes"][0]["position"] == {"x": 100, "y": 200}
+    # New node offset from its incoming neighbor.
+    assert out["nodes"][1]["position"] == {"x": 500, "y": 400}
+
+
+def test_orphan_new_node_stays_at_origin():
+    new = {
+        "nodes": [_node("1", "agentNode", name="orphan")],
+        "edges": [],
+    }
+    out = reconcile_positions(new, None)
+    assert out["nodes"][0]["position"] == {"x": 0.0, "y": 0.0}
+
+
+def test_named_wins_over_unnamed_ordering():
+    previous = {
+        "nodes": [
+            _node("7", "agentNode", x=-648, y=-158),  # unnamed
+            _node("8", "agentNode", name="qualify", x=900, y=900),
+        ],
+        "edges": [],
+    }
+    new = {
+        "nodes": [
+            _node("1", "agentNode", name="qualify"),  # matches named
+            _node("2", "agentNode"),  # falls to unnamed queue
+        ],
+        "edges": [],
+    }
+    out = reconcile_positions(new, previous)
+    assert out["nodes"][0]["position"] == {"x": 900, "y": 900}
+    assert out["nodes"][1]["position"] == {"x": -648, "y": -158}
+
+
+def test_no_previous_keeps_origin_for_all_matched_positions():
+    new = {
+        "nodes": [
+            _node("1", "startCall", name="greeting"),
+            _node("2", "agentNode", name="reply"),
+        ],
+        "edges": [_edge("1", "2")],
+    }
+    out = reconcile_positions(new, None)
+    # No previous → first node stays at origin (no incoming), second
+    # node placed relative to its incoming neighbor at origin.
+    assert out["nodes"][0]["position"] == {"x": 0.0, "y": 0.0}
+    assert out["nodes"][1]["position"] == {"x": 400.0, "y": 200.0}
--- a/api/tests/test_mcp_save_workflow.py
+++ b/api/tests/test_mcp_save_workflow.py
@ -0,0 +1,225 @@
+"""Integration tests for the `save_workflow` MCP tool.
+
+Mocks `authenticate_mcp_request` and the db_client so tests don't need
+a live DB, but exercises the real TS validator subprocess end-to-end —
+parse is part of the contract the LLM relies on.
+
+Round-trip and pure-parser tests live in `test_ts_bridge.py`; this file
+focuses on the MCP tool's error-routing, version tagging, and DB-call
+shape.
+"""
+
+from __future__ import annotations
+
+import shutil
+from dataclasses import dataclass
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from fastapi import HTTPException
+
+from api.mcp_server.tools.save_workflow import save_workflow
+
+pytestmark = pytest.mark.skipif(
+    shutil.which("node") is None, reason="node binary not available"
+)
+
+
+# ─── Fixtures & helpers ──────────────────────────────────────────────────
+
+
+@dataclass
+class _FakeDraft:
+    version_number: int = 2
+    status: str = "draft"
+
+
+class _FakeWorkflowModel:
+    id = 1
+    organization_id = 1
+    name = "test"
+    # reconcile_positions reads whichever of these holds the previous
+    # stored workflow JSON; None on all three is fine for a greenfield
+    # test and causes reconcile_positions to fall back to the placement
+    # heuristic for any new node.
+    current_definition = None
+    released_definition = None
+    workflow_definition = None
+
+
+@pytest.fixture
+def authed_user() -> MagicMock:
+    user = MagicMock()
+    user.selected_organization_id = 1
+    user.id = 1
+    return user
+
+
+@pytest.fixture
+def mock_backends(authed_user: MagicMock):
+    save_mock = AsyncMock(return_value=_FakeDraft())
+    update_mock = AsyncMock(return_value=_FakeWorkflowModel())
+    with (
+        patch(
+            "api.mcp_server.tools.save_workflow.authenticate_mcp_request",
+            AsyncMock(return_value=authed_user),
+        ),
+        patch(
+            "api.mcp_server.tools.save_workflow.db_client.get_workflow",
+            AsyncMock(return_value=_FakeWorkflowModel()),
+        ),
+        patch(
+            "api.mcp_server.tools.save_workflow.db_client.save_workflow_draft",
+            save_mock,
+        ),
+        patch(
+            "api.mcp_server.tools.save_workflow.db_client.update_workflow",
+            update_mock,
+        ),
+        patch(
+            "api.mcp_server.tools.save_workflow.db_client.get_draft_version",
+            AsyncMock(return_value=None),
+        ),
+    ):
+        yield save_mock, update_mock
+
+
+def _valid_code(name: str = "tool-test") -> str:
+    return f'''import {{ Workflow }} from "@dograh/sdk";
+import {{ startCall, endCall }} from "@dograh/sdk/typed";
+
+const wf = new Workflow({{ name: "{name}" }});
+
+const greeting = wf.addTyped(startCall({{ name: "greeting", prompt: "Hi!" }}));
+const done     = wf.addTyped(endCall({{ name: "done", prompt: "Bye." }}));
+
+wf.edge(greeting, done, {{ label: "done", condition: "conversation complete" }});
+'''
+
+
+# ─── Happy path ──────────────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_happy_path_saves_draft(mock_backends):
+    save_mock, update_mock = mock_backends
+    # Match the stored name so the rename branch stays dormant here.
+    result = await save_workflow(
+        workflow_id=1, code=_valid_code(name=_FakeWorkflowModel.name)
+    )
+    assert result["saved"] is True
+    assert result["workflow_id"] == 1
+    assert result["version_number"] == 2
+    assert result["status"] == "draft"
+    assert result["node_count"] == 2
+    assert result["edge_count"] == 1
+    assert result["renamed"] is False
+    assert result["name"] == _FakeWorkflowModel.name
+    save_mock.assert_awaited_once()
+    update_mock.assert_not_awaited()
+    payload = save_mock.call_args.kwargs["workflow_definition"]
+    assert len(payload["nodes"]) == 2
+    assert len(payload["edges"]) == 1
+
+
+@pytest.mark.asyncio
+async def test_rename_propagates_to_update_workflow(mock_backends):
+    save_mock, update_mock = mock_backends
+    result = await save_workflow(workflow_id=1, code=_valid_code(name="renamed"))
+    assert result["saved"] is True
+    assert result["renamed"] is True
+    assert result["name"] == "renamed"
+    update_mock.assert_awaited_once()
+    kwargs = update_mock.call_args.kwargs
+    assert kwargs["workflow_id"] == 1
+    assert kwargs["name"] == "renamed"
+    assert kwargs["workflow_definition"] is None
+    assert kwargs["organization_id"] == 1
+    save_mock.assert_awaited_once()
+
+
+# ─── Parse-stage rejections ──────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_parser_rejects_disallowed_top_level(mock_backends):
+    save_mock, update_mock = mock_backends
+    code = _valid_code() + "function evil() { return 1; }\n"
+    result = await save_workflow(workflow_id=1, code=code)
+    assert result["saved"] is False
+    assert result["error_code"] == "parse_error"
+    save_mock.assert_not_awaited()
+    update_mock.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_parser_rejects_unknown_factory(mock_backends):
+    save_mock, update_mock = mock_backends
+    code = """import { Workflow } from "@dograh/sdk";
+const wf = new Workflow({ name: "x" });
+const n = wf.addTyped(fakeNode({ name: "x", prompt: "y" }));
+"""
+    result = await save_workflow(workflow_id=1, code=code)
+    assert result["saved"] is False
+    assert result["error_code"] == "parse_error"
+    assert "Unknown node type" in result["error"]
+    save_mock.assert_not_awaited()
+    update_mock.assert_not_awaited()
+
+
+# ─── Validation-stage rejections ─────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_unknown_field_surfaces_validation_error(mock_backends):
+    save_mock, update_mock = mock_backends
+    code = """import { Workflow } from "@dograh/sdk";
+import { startCall } from "@dograh/sdk/typed";
+const wf = new Workflow({ name: "x" });
+const n = wf.addTyped(startCall({ name: "g", prompt: "hi", promt: "typo" }));
+"""
+    result = await save_workflow(workflow_id=1, code=code)
+    assert result["saved"] is False
+    assert result["error_code"] == "validation_error"
+    assert "Unknown field" in result["error"]
+    save_mock.assert_not_awaited()
+    update_mock.assert_not_awaited()
+
+
+# ─── Graph-stage rejections ──────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_graph_validation_catches_missing_start_node(mock_backends):
+    save_mock, update_mock = mock_backends
+    # Only an end node — WorkflowGraph requires exactly one start node.
+    code = """import { Workflow } from "@dograh/sdk";
+import { endCall } from "@dograh/sdk/typed";
+const wf = new Workflow({ name: "orphan" });
+const only = wf.addTyped(endCall({ name: "only", prompt: "bye" }));
+"""
+    result = await save_workflow(workflow_id=1, code=code)
+    assert result["saved"] is False
+    assert result["error_code"] == "graph_validation"
+    save_mock.assert_not_awaited()
+    update_mock.assert_not_awaited()
+
+
+# ─── Workflow not found / unauthorized ───────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_unknown_workflow_raises_404(authed_user: MagicMock):
+    with (
+        patch(
+            "api.mcp_server.tools.save_workflow.authenticate_mcp_request",
+            AsyncMock(return_value=authed_user),
+        ),
+        patch(
+            "api.mcp_server.tools.save_workflow.db_client.get_workflow",
+            AsyncMock(return_value=None),
+        ),
+    ):
+        with pytest.raises(HTTPException) as exc_info:
+            await save_workflow(workflow_id=999, code=_valid_code())
+        assert exc_info.value.status_code == 404
--- a/api/tests/test_node_specs.py
+++ b/api/tests/test_node_specs.py
@ -0,0 +1,196 @@
+"""Spec-quality lint.
+
+Catches drift between NodeSpecs and the rest of the system before it lands:
+- Placeholder/empty descriptions
+- Missing examples
+- display_options referencing fields that don't exist
+- Examples that don't validate against the per-type Pydantic DTO
+- Spec name not matching a discriminator value in dto.py
+"""
+
+from __future__ import annotations
+
+import re
+
+import pytest
+
+from api.services.workflow.dto import NodeType, ReactFlowDTO
+from api.services.workflow.node_specs import (
+    NodeSpec,
+    PropertySpec,
+    PropertyType,
+    all_specs,
+)
+
+PLACEHOLDER_DESCRIPTION_PATTERN = re.compile(
+    r"^\s*(todo|fixme|tbd|xxx|\.\.\.|placeholder|description|n/?a|\?)\s*\.?\s*$",
+    re.IGNORECASE,
+)
+
+
+def _walk_properties(props: list[PropertySpec], path: str = ""):
+    """Yield (full_path, property) for every property and nested sub-property."""
+    for prop in props:
+        full_path = f"{path}.{prop.name}" if path else prop.name
+        yield full_path, prop
+        if prop.properties:
+            yield from _walk_properties(prop.properties, full_path)
+
+
+# ─────────────────────────────────────────────────────────────────────────
+# Lint
+# ─────────────────────────────────────────────────────────────────────────
+
+
+@pytest.mark.parametrize("spec", all_specs(), ids=lambda s: s.name)
+def test_node_spec_has_non_placeholder_description(spec: NodeSpec):
+    assert spec.description.strip(), f"{spec.name}: empty description"
+    assert not PLACEHOLDER_DESCRIPTION_PATTERN.match(spec.description), (
+        f"{spec.name}: description looks like a placeholder: {spec.description!r}"
+    )
+    assert len(spec.description) >= 20, (
+        f"{spec.name}: description too short to be useful for an LLM "
+        f"({len(spec.description)} chars)"
+    )
+
+
+@pytest.mark.parametrize("spec", all_specs(), ids=lambda s: s.name)
+def test_node_spec_has_at_least_one_example(spec: NodeSpec):
+    assert spec.examples, (
+        f"{spec.name}: must have at least one NodeExample so LLMs have a "
+        f"realistic shape to pattern-match."
+    )
+
+
+@pytest.mark.parametrize("spec", all_specs(), ids=lambda s: s.name)
+def test_property_descriptions_non_placeholder(spec: NodeSpec):
+    for path, prop in _walk_properties(spec.properties):
+        assert prop.description.strip(), f"{spec.name}.{path}: empty description"
+        assert not PLACEHOLDER_DESCRIPTION_PATTERN.match(prop.description), (
+            f"{spec.name}.{path}: description looks like a placeholder: "
+            f"{prop.description!r}"
+        )
+
+
+@pytest.mark.parametrize("spec", all_specs(), ids=lambda s: s.name)
+def test_display_options_reference_real_fields(spec: NodeSpec):
+    """A property's display_options must only reference sibling property
+    names. Nested properties are scoped to their parent's siblings."""
+
+    def _check(scope_props: list[PropertySpec], scope_path: str = ""):
+        names_in_scope = {p.name for p in scope_props}
+        for prop in scope_props:
+            current_path = f"{scope_path}.{prop.name}" if scope_path else prop.name
+            if prop.display_options:
+                refs = set((prop.display_options.show or {}).keys()) | set(
+                    (prop.display_options.hide or {}).keys()
+                )
+                missing = refs - names_in_scope
+                assert not missing, (
+                    f"{spec.name}.{current_path}: display_options references "
+                    f"unknown sibling fields: {sorted(missing)}"
+                )
+            if prop.properties:
+                _check(prop.properties, current_path)
+
+    _check(spec.properties)
+
+
+@pytest.mark.parametrize("spec", all_specs(), ids=lambda s: s.name)
+def test_options_properties_have_options(spec: NodeSpec):
+    for path, prop in _walk_properties(spec.properties):
+        if prop.type in (PropertyType.options, PropertyType.multi_options):
+            assert prop.options, (
+                f"{spec.name}.{path}: type={prop.type.value} requires at "
+                f"least one PropertyOption."
+            )
+
+
+@pytest.mark.parametrize("spec", all_specs(), ids=lambda s: s.name)
+def test_fixed_collection_has_sub_properties(spec: NodeSpec):
+    for path, prop in _walk_properties(spec.properties):
+        if prop.type == PropertyType.fixed_collection:
+            assert prop.properties, (
+                f"{spec.name}.{path}: fixed_collection requires nested "
+                f"`properties` describing each row."
+            )
+
+
+@pytest.mark.parametrize("spec", all_specs(), ids=lambda s: s.name)
+def test_spec_name_matches_dto_discriminator(spec: NodeSpec):
+    valid_names = {t.value for t in NodeType}
+    assert spec.name in valid_names, (
+        f"NodeSpec {spec.name!r} doesn't match any NodeType discriminator. "
+        f"Valid: {sorted(valid_names)}"
+    )
+
+
+@pytest.mark.parametrize("spec", all_specs(), ids=lambda s: s.name)
+def test_examples_validate_against_dto(spec: NodeSpec):
+    """Each NodeExample.data must pass per-type DTO validation. This stops
+    examples from drifting away from the actual wire schema."""
+    for ex in spec.examples:
+        wire_node = {
+            "id": "example",
+            "type": spec.name,
+            "position": {"x": 0, "y": 0},
+            "data": ex.data,
+        }
+        # Build a minimal valid graph: example node plus a synthetic peer if
+        # graph_constraints require an incoming or outgoing edge.
+        nodes = [wire_node]
+        edges: list[dict] = []
+        constraints = spec.graph_constraints
+
+        if constraints and (constraints.min_outgoing or 0) > 0:
+            nodes.append(
+                {
+                    "id": "downstream",
+                    "type": "endCall",
+                    "position": {"x": 0, "y": 0},
+                    "data": {"name": "End", "prompt": "End", "is_end": True},
+                }
+            )
+            edges.append(
+                {
+                    "id": "e_out",
+                    "source": "example",
+                    "target": "downstream",
+                    "data": {"label": "next", "condition": "next"},
+                }
+            )
+
+        if constraints and (constraints.min_incoming or 0) > 0:
+            nodes.append(
+                {
+                    "id": "upstream",
+                    "type": "startCall",
+                    "position": {"x": 0, "y": 0},
+                    "data": {
+                        "name": "Start",
+                        "prompt": "Hello",
+                        "is_start": True,
+                    },
+                }
+            )
+            edges.append(
+                {
+                    "id": "e_in",
+                    "source": "upstream",
+                    "target": "example",
+                    "data": {"label": "in", "condition": "in"},
+                }
+            )
+
+        # Validate. If this raises, the example is broken.
+        ReactFlowDTO.model_validate({"nodes": nodes, "edges": edges})
+
+
+def test_all_dto_types_have_specs():
+    """Every NodeType discriminator value must have a registered NodeSpec —
+    catches the case where someone adds a new node type to dto.py but
+    forgets to author a spec."""
+    spec_names = {s.name for s in all_specs()}
+    type_values = {t.value for t in NodeType}
+    missing = type_values - spec_names
+    assert not missing, f"NodeType discriminators without specs: {sorted(missing)}"
--- a/api/tests/test_pipecat_engine_end_call.py
+++ b/api/tests/test_pipecat_engine_end_call.py
@ -27,12 +27,13 @@ import pytest
 from api.enums import ToolCategory
 from api.services.workflow.dto import (
    EdgeDataDTO,
-    NodeDataDTO,
-    NodeType,
+    EndCallNodeData,
+    EndCallRFNode,
    Position,
    ReactFlowDTO,
    RFEdgeDTO,
-    RFNodeDTO,
+    StartCallNodeData,
+    StartCallRFNode,
 )
 from api.services.workflow.pipecat_engine import PipecatEngine
 from api.services.workflow.pipecat_engine_custom_tools import CustomToolManager
@ -1013,11 +1014,10 @@ class TestEndCallExtractionBehavior:
        # Create a workflow where start node has NO extraction
        dto = ReactFlowDTO(
            nodes=[
-                RFNodeDTO(
+                StartCallRFNode(
                    id="start",
-                    type=NodeType.startNode,
                    position=Position(x=0, y=0),
-                    data=NodeDataDTO(
+                    data=StartCallNodeData(
                        name="Start Call",
                        prompt=START_CALL_SYSTEM_PROMPT,
                        is_start=True,
@ -1026,11 +1026,10 @@ class TestEndCallExtractionBehavior:
                        extraction_enabled=False,  # No extraction
                    ),
                ),
-                RFNodeDTO(
+                EndCallRFNode(
                    id="end",
-                    type=NodeType.endNode,
                    position=Position(x=0, y=200),
-                    data=NodeDataDTO(
+                    data=EndCallNodeData(
                        name="End Call",
                        prompt=END_CALL_SYSTEM_PROMPT,
                        is_end=True,
--- a/api/tests/test_sdk_sync.py
+++ b/api/tests/test_sdk_sync.py
@ -0,0 +1,99 @@
+"""Drift guard: committed SDK typed files must match what codegen
+produces from the current `node_specs/` registry.
+
+Fails loudly if a spec was edited without running
+`./scripts/generate_sdk.sh`. CI also runs the full script and asserts
+an empty `git diff` as the authoritative cross-language check; this
+test is the fast local feedback loop inside pytest.
+"""
+
+from __future__ import annotations
+
+import json
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+import pytest
+
+# Ensure the Python SDK package is importable without requiring a
+# `pip install -e sdk/python`. The codegen lives there because it ships
+# with the SDK wheel, but tests need to reach it directly.
+REPO_ROOT = Path(__file__).resolve().parents[2]
+SDK_PY_SRC = REPO_ROOT / "sdk" / "python" / "src"
+if str(SDK_PY_SRC) not in sys.path:
+    sys.path.insert(0, str(SDK_PY_SRC))
+
+from dograh_sdk.codegen import generate_all  # noqa: E402
+
+from api.services.workflow.node_specs import SPEC_VERSION, all_specs  # noqa: E402
+
+PY_OUT = REPO_ROOT / "sdk" / "python" / "src" / "dograh_sdk" / "typed"
+TS_OUT = REPO_ROOT / "sdk" / "typescript" / "src" / "typed"
+TS_CODEGEN = REPO_ROOT / "sdk" / "typescript" / "scripts" / "codegen.mts"
+REGEN_HINT = "Run ./scripts/generate_sdk.sh to regenerate."
+
+
+def _specs_payload() -> dict:
+    return {
+        "spec_version": SPEC_VERSION,
+        "node_types": [s.model_dump(mode="json") for s in all_specs()],
+    }
+
+
+def _compare_trees(expected_dir: Path, actual_dir: Path, *, skip: set[str]) -> None:
+    def tree(d: Path) -> dict[str, str]:
+        return {
+            p.name: p.read_text()
+            for p in d.iterdir()
+            if p.is_file() and p.name not in skip
+        }
+
+    expected = tree(expected_dir)
+    actual = tree(actual_dir)
+
+    if expected.keys() != actual.keys():
+        pytest.fail(
+            f"File set differs in {expected_dir.name}/.\n"
+            f"  committed: {sorted(expected)}\n"
+            f"  generated: {sorted(actual)}\n"
+            f"{REGEN_HINT}"
+        )
+    for name in sorted(expected):
+        if expected[name] != actual[name]:
+            pytest.fail(
+                f"{expected_dir.name}/{name} is out of sync with node_specs. "
+                f"{REGEN_HINT}"
+            )
+
+
+def test_python_sdk_typed_in_sync(tmp_path: Path) -> None:
+    specs = _specs_payload()["node_types"]
+    generate_all(specs, tmp_path)
+    # _base.py is hand-written and lives alongside generated files.
+    _compare_trees(PY_OUT, tmp_path, skip={"_base.py", "__pycache__"})
+
+
+@pytest.mark.skipif(shutil.which("node") is None, reason="node binary not available")
+def test_typescript_sdk_typed_in_sync(tmp_path: Path) -> None:
+    specs_file = tmp_path / "specs.json"
+    specs_file.write_text(json.dumps(_specs_payload()))
+    out = tmp_path / "ts_out"
+
+    result = subprocess.run(
+        [
+            "node",
+            str(TS_CODEGEN),
+            "--input",
+            str(specs_file),
+            "--out",
+            str(out),
+        ],
+        capture_output=True,
+        text=True,
+    )
+    assert result.returncode == 0, (
+        f"TS codegen failed:\nstdout: {result.stdout}\nstderr: {result.stderr}"
+    )
+    _compare_trees(TS_OUT, out, skip=set())
--- a/api/tests/test_text_and_audio_playback.py
+++ b/api/tests/test_text_and_audio_playback.py
@ -15,12 +15,13 @@ import pytest
 from api.services.pipecat.recording_audio_cache import RecordingAudio
 from api.services.workflow.dto import (
    EdgeDataDTO,
-    NodeDataDTO,
-    NodeType,
+    EndCallNodeData,
+    EndCallRFNode,
    Position,
    ReactFlowDTO,
    RFEdgeDTO,
-    RFNodeDTO,
+    StartCallNodeData,
+    StartCallRFNode,
 )
 from api.services.workflow.pipecat_engine import PipecatEngine
 from api.services.workflow.pipecat_engine_custom_tools import CustomToolManager
@ -64,11 +65,10 @@ def text_workflow() -> WorkflowGraph:
    """Start->End workflow with text greeting and text transition speech."""
    dto = ReactFlowDTO(
        nodes=[
-            RFNodeDTO(
+            StartCallRFNode(
                id="start",
-                type=NodeType.startNode,
                position=Position(x=0, y=0),
-                data=NodeDataDTO(
+                data=StartCallNodeData(
                    name="Start Call",
                    prompt=START_PROMPT,
                    is_start=True,
@ -79,11 +79,10 @@ def text_workflow() -> WorkflowGraph:
                    extraction_enabled=False,
                ),
            ),
-            RFNodeDTO(
+            EndCallRFNode(
                id="end",
-                type=NodeType.endNode,
                position=Position(x=0, y=200),
-                data=NodeDataDTO(
+                data=EndCallNodeData(
                    name="End Call",
                    prompt=END_PROMPT,
                    is_end=True,
@ -115,11 +114,10 @@ def audio_workflow() -> WorkflowGraph:
    """Start->End workflow with audio greeting and audio transition speech."""
    dto = ReactFlowDTO(
        nodes=[
-            RFNodeDTO(
+            StartCallRFNode(
                id="start",
-                type=NodeType.startNode,
                position=Position(x=0, y=0),
-                data=NodeDataDTO(
+                data=StartCallNodeData(
                    name="Start Call",
                    prompt=START_PROMPT,
                    is_start=True,
@ -130,11 +128,10 @@ def audio_workflow() -> WorkflowGraph:
                    extraction_enabled=False,
                ),
            ),
-            RFNodeDTO(
+            EndCallRFNode(
                id="end",
-                type=NodeType.endNode,
                position=Position(x=0, y=200),
-                data=NodeDataDTO(
+                data=EndCallNodeData(
                    name="End Call",
                    prompt=END_PROMPT,
                    is_end=True,
@ -293,11 +290,10 @@ class TestStartGreeting:
        """No greeting configured should return None."""
        dto = ReactFlowDTO(
            nodes=[
-                RFNodeDTO(
+                StartCallRFNode(
                    id="start",
-                    type=NodeType.startNode,
                    position=Position(x=0, y=0),
-                    data=NodeDataDTO(
+                    data=StartCallNodeData(
                        name="Start",
                        prompt="Prompt",
                        is_start=True,
@ -305,11 +301,10 @@ class TestStartGreeting:
                        extraction_enabled=False,
                    ),
                ),
-                RFNodeDTO(
+                EndCallRFNode(
                    id="end",
-                    type=NodeType.endNode,
                    position=Position(x=0, y=200),
-                    data=NodeDataDTO(
+                    data=EndCallNodeData(
                        name="End",
                        prompt="End",
                        is_end=True,
@ -338,11 +333,10 @@ class TestStartGreeting:
        """Text greeting with {{variable}} placeholders should be rendered."""
        dto = ReactFlowDTO(
            nodes=[
-                RFNodeDTO(
+                StartCallRFNode(
                    id="start",
-                    type=NodeType.startNode,
                    position=Position(x=0, y=0),
-                    data=NodeDataDTO(
+                    data=StartCallNodeData(
                        name="Start",
                        prompt="Prompt",
                        is_start=True,
@ -352,11 +346,10 @@ class TestStartGreeting:
                        extraction_enabled=False,
                    ),
                ),
-                RFNodeDTO(
+                EndCallRFNode(
                    id="end",
-                    type=NodeType.endNode,
                    position=Position(x=0, y=200),
-                    data=NodeDataDTO(
+                    data=EndCallNodeData(
                        name="End",
                        prompt="End",
                        is_end=True,
--- a/api/tests/test_ts_bridge.py
+++ b/api/tests/test_ts_bridge.py
@ -0,0 +1,275 @@
+"""End-to-end tests for the Node TS validator bridge.
+
+Exercises the real `node` subprocess — slow-ish but the whole point is
+that code → JSON and JSON → code round-trip losslessly.
+"""
+
+from __future__ import annotations
+
+import shutil
+
+import pytest
+
+from api.mcp_server.ts_bridge import TsBridgeError, generate_code, parse_code
+
+pytestmark = pytest.mark.skipif(
+    shutil.which("node") is None, reason="node binary not available"
+)
+
+
+def _minimal_workflow() -> dict:
+    """Start → End, one edge. Stored shape matches ReactFlowDTO."""
+    return {
+        "nodes": [
+            {
+                "id": "1",
+                "type": "startCall",
+                "position": {"x": 0, "y": 0},
+                "data": {
+                    "name": "Greeting",
+                    "prompt": "Greet warmly.",
+                    "greeting_type": "text",
+                    "greeting": "Hi {{first_name}}!",
+                    "allow_interrupt": True,
+                },
+            },
+            {
+                "id": "2",
+                "type": "endCall",
+                "position": {"x": 200, "y": 0},
+                "data": {"name": "Done", "prompt": "Say goodbye."},
+            },
+        ],
+        "edges": [
+            {
+                "id": "1-2",
+                "source": "1",
+                "target": "2",
+                "data": {"label": "done", "condition": "conversation complete"},
+            },
+        ],
+        "viewport": {"x": 0, "y": 0, "zoom": 1},
+    }
+
+
+def _normalize(wf: dict) -> dict:
+    """Strip cosmetics before comparing a round-tripped workflow.
+
+    Node IDs are regenerated deterministically by the parser
+    (1, 2, 3, ...) so the inputs already match if constructed that way.
+    Position is preserved. Edge ids follow `source-target`.
+    """
+    return {
+        "nodes": [
+            {
+                "id": n["id"],
+                "type": n["type"],
+                "position": n["position"],
+                "data": n["data"],
+            }
+            for n in wf["nodes"]
+        ],
+        "edges": [
+            {
+                "id": e["id"],
+                "source": e["source"],
+                "target": e["target"],
+                "data": e["data"],
+            }
+            for e in wf["edges"]
+        ],
+    }
+
+
+# ─── generate_code ───────────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_generate_emits_imports_and_factories():
+    code = await generate_code(_minimal_workflow(), workflow_name="test")
+    assert 'import { Workflow } from "@dograh/sdk";' in code
+    assert "startCall" in code
+    assert "endCall" in code
+    assert "wf.addTyped(startCall(" in code
+    assert "wf.edge(" in code
+
+
+@pytest.mark.asyncio
+async def test_generate_strips_spec_defaults():
+    wf = _minimal_workflow()
+    code = await generate_code(wf)
+    # `add_global_prompt=True` is a spec default for startCall; emitted
+    # code should omit it. Keeps the LLM-facing projection tight.
+    assert "add_global_prompt" not in code
+
+
+@pytest.mark.asyncio
+async def test_generate_omits_position():
+    """Positions are hidden from the LLM — auto-layout post-processing
+    (future) reassigns them on save. Keeping them out of the edit
+    surface avoids the LLM producing cramped/overlapping layouts."""
+    wf = _minimal_workflow()
+    code = await generate_code(wf)
+    assert "position" not in code
+
+
+@pytest.mark.asyncio
+async def test_generate_strips_legacy_ui_state_fields():
+    """Stored workflows from before spec validation carry UI-state fields
+    (`invalid`, `selected`, `is_start`, etc.). `get_workflow_code` hides
+    those from the LLM so edits don't round-trip the noise."""
+    wf = {
+        "nodes": [
+            {
+                "id": "1",
+                "type": "startCall",
+                "position": {"x": 0, "y": 0},
+                "data": {
+                    "name": "g",
+                    "prompt": "hi",
+                    "invalid": False,
+                    "validationMessage": None,
+                    "is_start": True,
+                    "selected": True,
+                    "dragging": False,
+                },
+            },
+        ],
+        "edges": [],
+        "viewport": {"x": 0, "y": 0, "zoom": 1},
+    }
+    code = await generate_code(wf)
+    for dropped in ("invalid", "validationMessage", "is_start", "selected", "dragging"):
+        assert dropped not in code, f"{dropped} should be stripped"
+    assert 'prompt: "hi"' in code
+
+
+@pytest.mark.asyncio
+async def test_generate_strips_unknown_edge_fields():
+    wf = _minimal_workflow()
+    wf["edges"][0]["data"]["invalid"] = False
+    wf["edges"][0]["data"]["validationMessage"] = None
+    code = await generate_code(wf)
+    assert "invalid" not in code
+    assert "validationMessage" not in code
+
+
+# ─── parse_code ──────────────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_parse_accepts_minimal_code():
+    code = """import { Workflow } from "@dograh/sdk";
+import { startCall, endCall } from "@dograh/sdk/typed";
+
+const wf = new Workflow({ name: "min" });
+const a = wf.addTyped(startCall({ name: "g", prompt: "hi" }));
+const b = wf.addTyped(endCall({ name: "d", prompt: "bye" }));
+wf.edge(a, b, { label: "done", condition: "wrapped" });
+"""
+    result = await parse_code(code)
+    assert result["ok"] is True
+    wf = result["workflow"]
+    assert len(wf["nodes"]) == 2
+    assert len(wf["edges"]) == 1
+    assert wf["nodes"][0]["type"] == "startCall"
+    assert wf["edges"][0]["source"] == wf["nodes"][0]["id"]
+
+
+@pytest.mark.asyncio
+async def test_parse_rejects_function_declaration():
+    code = """import { Workflow } from "@dograh/sdk";
+const wf = new Workflow({ name: "x" });
+function evil() { return 1; }
+"""
+    result = await parse_code(code)
+    assert result["ok"] is False
+    assert result["stage"] == "parse"
+    assert any("FunctionDeclaration" in e["message"] for e in result["errors"])
+
+
+@pytest.mark.asyncio
+async def test_parse_rejects_unknown_field():
+    code = """import { Workflow } from "@dograh/sdk";
+import { startCall } from "@dograh/sdk/typed";
+const wf = new Workflow({ name: "x" });
+const a = wf.addTyped(startCall({ name: "g", prompt: "hi", promt: "typo" }));
+"""
+    result = await parse_code(code)
+    assert result["ok"] is False
+    assert result["stage"] == "validate"
+    assert any("Unknown field" in e["message"] for e in result["errors"])
+
+
+@pytest.mark.asyncio
+async def test_parse_rejects_unknown_variable_in_edge():
+    code = """import { Workflow } from "@dograh/sdk";
+import { startCall, endCall } from "@dograh/sdk/typed";
+const wf = new Workflow({ name: "x" });
+const a = wf.addTyped(startCall({ name: "g", prompt: "hi" }));
+wf.edge(a, missing, { label: "done", condition: "c" });
+"""
+    result = await parse_code(code)
+    assert result["ok"] is False
+    assert result["stage"] == "parse"
+    assert any("Unknown node variable" in e["message"] for e in result["errors"])
+
+
+@pytest.mark.asyncio
+async def test_parse_requires_label_and_condition_on_edge():
+    code = """import { Workflow } from "@dograh/sdk";
+import { startCall, endCall } from "@dograh/sdk/typed";
+const wf = new Workflow({ name: "x" });
+const a = wf.addTyped(startCall({ name: "g", prompt: "hi" }));
+const b = wf.addTyped(endCall({ name: "d", prompt: "bye" }));
+wf.edge(a, b, { label: "", condition: "c" });
+"""
+    result = await parse_code(code)
+    assert result["ok"] is False
+    assert result["stage"] == "parse"
+
+
+# ─── Round-trip ──────────────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_round_trip_minimal():
+    wf = _minimal_workflow()
+    code = await generate_code(wf, workflow_name="rt")
+    result = await parse_code(code)
+    assert result["ok"] is True, result
+    # Positions are intentionally not preserved — they'll be reassigned
+    # by a downstream auto-layout pass. Parser defaults to {0, 0}.
+    for in_node, out_node in zip(wf["nodes"], result["workflow"]["nodes"]):
+        assert out_node["type"] == in_node["type"]
+        assert out_node["position"] == {"x": 0, "y": 0}
+        for k, v in in_node["data"].items():
+            assert out_node["data"][k] == v, (
+                f"{k}: {out_node['data'].get(k)!r} != {v!r}"
+            )
+    assert _normalize({"nodes": [], "edges": result["workflow"]["edges"]})["edges"] == [
+        {
+            "id": "1-2",
+            "source": "1",
+            "target": "2",
+            "data": {"label": "done", "condition": "conversation complete"},
+        }
+    ]
+
+
+@pytest.mark.asyncio
+async def test_generate_fails_on_unknown_type():
+    bad = {
+        "nodes": [
+            {
+                "id": "1",
+                "type": "doesNotExist",
+                "position": {"x": 0, "y": 0},
+                "data": {},
+            }
+        ],
+        "edges": [],
+        "viewport": {"x": 0, "y": 0, "zoom": 1},
+    }
+    with pytest.raises(TsBridgeError, match="Unknown node type"):
+        await generate_code(bad)