dograh/sdk/python/src/dograh_sdk/_validation.py
Abhishek 00a1a22b74
feat: refactor node spec and add mcp tools (#244)
* refactor: carve out extraction panel

* refactor: create spec versions for node types

* refactor: create a GenericNode and remove custom nodes

* feat: add python and typescript sdk

* add dograh sdk

* fix: fetch draft workflow definition over published one

* fix: fix routes of SDKs to use code gen

* chore: remove doclink dependency to reduce image size

* chore: format files

* chore: bump pipecat

* feat: let mcp fetch archived workflows on demand

* chore: fix tests

* feat: add sdk documentation

* chore: change banner and add badge
2026-04-21 07:56:16 +05:30

166 lines
5.4 KiB
Python

"""Client-side validation of node data against a fetched spec.
Intentionally lightweight: we catch the hallucinations that matter (typo'd
field names, missing required fields, obvious scalar-type mismatches) and
leave rigorous coercion to the backend's Pydantic validators, which run at
save time. The tradeoff: the SDK fails fast on mistakes an LLM makes, and
the backend remains the single authority on wire-format correctness.
"""
from __future__ import annotations
from typing import Any
from .errors import ValidationError
# Map PropertyType → primitive Python type(s) we check at call site.
# `None` means "skip scalar-type check" (compound types, refs, JSON, etc.).
_SCALAR_TYPES: dict[str, tuple[type, ...] | None] = {
"string": (str,),
"number": (int, float),
"boolean": (bool,),
"options": None, # value-in-options handled separately
"multi_options": None,
"fixed_collection": (list,),
"json": None, # any JSON-serializable
"tool_refs": (list,),
"document_refs": (list,),
"recording_ref": (str,),
"credential_ref": (str,),
"mention_textarea": (str,),
"url": (str,),
}
def _with_hint(prop: dict[str, Any], message: str) -> str:
"""Append `prop.llm_hint` to an error message when set.
Surfacing the hint inside validation errors lets an LLM author
self-correct on retry — it sees the catalog reference or value-shape
guidance inline with the failure.
"""
hint = prop.get("llm_hint")
if hint:
return f"{message}\n Hint: {hint}"
return message
def _check_scalar(prop: dict[str, Any], value: Any) -> None:
# None is always allowed (missing value handled by required check).
if value is None:
return
allowed = _SCALAR_TYPES.get(prop["type"])
if allowed is None:
return
# Booleans ARE ints in Python, so exclude accidentally-matching bools.
if bool in allowed and not (int in allowed or float in allowed):
if not isinstance(value, bool):
raise ValidationError(
_with_hint(
prop,
f"{prop['name']}: expected boolean, got {type(value).__name__}",
)
)
return
if not isinstance(value, allowed):
raise ValidationError(
_with_hint(
prop,
f"{prop['name']}: expected {prop['type']}, "
f"got {type(value).__name__}",
)
)
def _check_options(prop: dict[str, Any], value: Any) -> None:
if value is None:
return
allowed = {o["value"] for o in prop.get("options") or []}
if not allowed:
return
if prop["type"] == "multi_options":
if not isinstance(value, list):
raise ValidationError(
_with_hint(
prop,
f"{prop['name']}: expected list, got {type(value).__name__}",
)
)
bad = [v for v in value if v not in allowed]
if bad:
raise ValidationError(
_with_hint(
prop,
f"{prop['name']}: values {bad} not in allowed {sorted(allowed)}",
)
)
else: # 'options'
if value not in allowed:
raise ValidationError(
_with_hint(
prop,
f"{prop['name']}: {value!r} not in allowed {sorted(allowed)}",
)
)
def validate_node_data(
spec: dict[str, Any],
kwargs: dict[str, Any],
) -> dict[str, Any]:
"""Validate LLM-supplied kwargs against the node spec. Returns the data
dict to embed in the wire format, with defaults applied.
Raises:
ValidationError if kwargs contain unknown fields, omit required
fields, or carry obvious type mismatches.
"""
declared = {p["name"]: p for p in spec["properties"]}
# Unknown field names — the most common LLM hallucination.
unknown = set(kwargs) - set(declared)
if unknown:
raise ValidationError(
f"{spec['name']}: unknown field(s) {sorted(unknown)}. "
f"Allowed: {sorted(declared)}"
)
# Per-property validation
data: dict[str, Any] = {}
for name, prop in declared.items():
if name in kwargs:
value = kwargs[name]
elif prop.get("default") is not None:
value = prop["default"]
else:
value = None
# Scalar / collection shape
if prop["type"] in ("options", "multi_options"):
_check_options(prop, value)
else:
_check_scalar(prop, value)
# Nested fixed_collection rows — validate each row as a sub-spec.
if prop["type"] == "fixed_collection" and isinstance(value, list):
sub_spec = {"name": f"{spec['name']}.{name}", "properties": prop.get("properties") or []}
data[name] = [validate_node_data(sub_spec, row) for row in value]
continue
if value is not None:
data[name] = value
# Required check — must be set AND non-empty for strings.
for name, prop in declared.items():
if not prop.get("required"):
continue
val = data.get(name)
if val is None or (isinstance(val, str) and val.strip() == ""):
raise ValidationError(
_with_hint(
prop,
f"{spec['name']}: required field missing: {name}",
)
)
return data