feat: refactor node spec and add mcp tools (#244)

* refactor: carve out extraction panel

* refactor: create spec versions for node types

* refactor: create a GenericNode and remove custom nodes

* feat: add python and typescript sdk

* add dograh sdk

* fix: fetch draft workflow definition over published one

* fix: fix routes of SDKs to use code gen

* chore: remove doclink dependency to reduce image size

* chore: format files

* chore: bump pipecat

* feat: let mcp fetch archived workflows on demand

* chore: fix tests

* feat: add sdk documentation

* chore: change banner and add badge
This commit is contained in:
Abhishek 2026-04-21 07:56:16 +05:30 committed by GitHub
parent 0a61ef295f
commit 00a1a22b74
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
162 changed files with 14355 additions and 3554 deletions

View file

@ -487,6 +487,71 @@ class MPSServiceKeyClient:
response=response,
)
async def process_document(
self,
file_path: str,
filename: str,
content_type: str,
retrieval_mode: str = "chunked",
max_tokens: int = 128,
chunk_overlap_tokens: int = 0,
merge_peers: bool = True,
tokenizer_model: Optional[str] = None,
correlation_id: Optional[str] = None,
organization_id: Optional[int] = None,
created_by: Optional[str] = None,
) -> dict:
"""Convert + chunk a document via MPS /document/process.
Returns a dict matching DocumentProcessResponse in MPS:
{
"mode": "chunked" | "full_document",
"docling_metadata": {...},
"full_text": str | None, # populated only in full_document mode
"chunks": [...], # populated only in chunked mode
}
Timeout is 300s to match the ALB idle_timeout configured in
infrastructure/mps/main.tf. Raises on non-2xx responses.
"""
data = {
"retrieval_mode": retrieval_mode,
"max_tokens": str(max_tokens),
"chunk_overlap_tokens": str(chunk_overlap_tokens),
"merge_peers": str(merge_peers).lower(),
}
if tokenizer_model is not None:
data["tokenizer_model"] = tokenizer_model
if correlation_id:
data["correlation_id"] = correlation_id
headers = self._get_headers(organization_id, created_by)
# Remove JSON content-type so httpx sets the correct multipart boundary.
headers.pop("Content-Type", None)
async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client:
with open(file_path, "rb") as fh:
files = {"file": (filename, fh.read(), content_type)}
response = await client.post(
f"{self.base_url}/api/v1/document/process",
files=files,
data=data,
headers=headers,
)
if response.status_code == 200:
return response.json()
logger.error(
f"Failed to process document: {response.status_code} - {response.text}"
)
raise httpx.HTTPStatusError(
f"Failed to process document: {response.text}",
request=response.request,
response=response,
)
async def call_workflow_api(
self,
call_type: str,