feat: refactor node spec and add mcp tools (#244)

* refactor: carve out extraction panel * refactor: create spec versions for node types * refactor: create a GenericNode and remove custom nodes * feat: add python and typescript sdk * add dograh sdk * fix: fetch draft workflow definition over published one * fix: fix routes of SDKs to use code gen * chore: remove doclink dependency to reduce image size * chore: format files * chore: bump pipecat * feat: let mcp fetch archived workflows on demand * chore: fix tests * feat: add sdk documentation * chore: change banner and add badge
2026-07-25 12:01:04 +02:00 · 2026-04-21 07:56:16 +05:30 · 2026-04-21 07:56:16 +05:30 · 00a1a22b74
commit 00a1a22b74
parent 0a61ef295f
162 changed files with 14355 additions and 3554 deletions
--- a/api/tasks/knowledge_base_processing.py
+++ b/api/tasks/knowledge_base_processing.py
@ -1,22 +1,22 @@
-"""ARQ background task for processing knowledge base documents."""
+"""ARQ background task for processing knowledge base documents.
+
+Document conversion and chunking live in the Model Proxy Service (MPS);
+this task downloads the file from S3, calls MPS, then handles the embedding
+and DB writes locally.
+"""

-import json
 import os
 import tempfile

-from docling.chunking import HybridChunker
-from docling.document_converter import DocumentConverter
-from docling_core.transforms.chunker.tokenizer.huggingface import HuggingFaceTokenizer
 from loguru import logger
-from transformers import AutoTokenizer

 from api.db import db_client
 from api.db.models import KnowledgeBaseChunkModel
 from api.services.gen_ai import OpenAIEmbeddingService
+from api.services.mps_service_key_client import mps_service_key_client
 from api.services.storage import storage_fs

-# For tokenization/chunking
-TOKENIZER_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+MAX_FILE_SIZE_BYTES = 5 * 1024 * 1024


 async def process_knowledge_base_document(
@ -24,93 +24,84 @@ async def process_knowledge_base_document(
    document_id: int,
    s3_key: str,
    organization_id: int,
+    created_by_provider_id: str,
    max_tokens: int = 128,
    retrieval_mode: str = "chunked",
 ):
-    """Process a knowledge base document: download, chunk, embed, and store.
+    """Process a knowledge base document via MPS: download, call MPS, embed, store.

    Args:
        ctx: ARQ context
        document_id: Database ID of the document
        s3_key: S3 key where the file is stored
        organization_id: Organization ID
+        created_by_provider_id: Uploading user's provider ID (for OSS-mode auth to MPS)
        max_tokens: Maximum number of tokens per chunk (default: 128)
        retrieval_mode: "chunked" for vector search or "full_document" for full text
    """
    logger.info(
-        f"Starting knowledge base document processing for document_id={document_id}, "
-        f"s3_key={s3_key}, organization_id={organization_id}"
+        f"Processing knowledge base document: document_id={document_id}, "
+        f"s3_key={s3_key}, org={organization_id}, mode={retrieval_mode}"
    )

    temp_file_path = None

    try:
-        # Update status to processing
        await db_client.update_document_status(document_id, "processing")

-        # Extract file extension from S3 key
        filename = s3_key.split("/")[-1]
-        file_extension = (
-            os.path.splitext(filename)[1] or ".bin"
-        )  # Default to .bin if no extension
+        file_extension = os.path.splitext(filename)[1] or ".bin"

-        # Create temp file for download with correct extension
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=file_extension)
        temp_file_path = temp_file.name
        temp_file.close()

-        # Download file from S3
        logger.info(f"Downloading file from S3: {s3_key}")
        download_success = await storage_fs.adownload_file(s3_key, temp_file_path)
-
        if not download_success:
            raise Exception(f"Failed to download file from S3: {s3_key}")
-
        if not os.path.exists(temp_file_path):
            raise FileNotFoundError(f"Downloaded file not found: {temp_file_path}")

        file_size = os.path.getsize(temp_file_path)
        logger.info(f"Downloaded file size: {file_size} bytes")

-        # Validate file size (max 5MB)
-        max_file_size = 5 * 1024 * 1024
-        if file_size > max_file_size:
-            error_message = f"File size ({file_size / (1024 * 1024):.1f}MB) exceeds the maximum allowed size of 5MB."
+        if file_size > MAX_FILE_SIZE_BYTES:
+            error_message = (
+                f"File size ({file_size / (1024 * 1024):.1f}MB) exceeds the "
+                f"maximum allowed size of {MAX_FILE_SIZE_BYTES // (1024 * 1024)}MB."
+            )
            logger.warning(f"Document {document_id}: {error_message}")
            await db_client.update_document_status(
                document_id, "failed", error_message=error_message
            )
            return

-        # Compute file hash and get mime type
        file_hash = db_client.compute_file_hash(temp_file_path)
        mime_type = db_client.get_mime_type(temp_file_path)
-        filename = s3_key.split("/")[-1]

-        # Get document record
        document = await db_client.get_document_by_id(document_id)
        if not document:
            raise Exception(f"Document {document_id} not found")

-        # Check if a document with this hash already exists (reject duplicates)
+        # Reject duplicates (same hash already ingested for this org).
        existing_doc = await db_client.get_document_by_hash(file_hash, organization_id)
        if existing_doc and existing_doc.id != document_id:
            error_message = (
                f"This file is a duplicate of '{existing_doc.filename}'. "
-                f"Please delete the duplicate files and consolidate them into a single unique file before uploading."
+                f"Please delete the duplicate files and consolidate them into a "
+                f"single unique file before uploading."
            )
            logger.warning(
-                f"Duplicate document detected: {document_id} is duplicate of {existing_doc.id} "
-                f"({existing_doc.filename})"
+                f"Duplicate document detected: {document_id} is duplicate of "
+                f"{existing_doc.id} ({existing_doc.filename})"
            )
-            # Update file metadata
            await db_client.update_document_metadata(
                document_id,
                file_size_bytes=file_size,
                file_hash=file_hash,
                mime_type=mime_type,
            )
-            # Mark as failed with duplicate error message
            await db_client.update_document_status(
                document_id,
                "failed",
@ -122,7 +113,6 @@ async def process_knowledge_base_document(
            )
            return

-        # Update document with file metadata
        await db_client.update_document_metadata(
            document_id,
            file_size_bytes=file_size,
@ -130,52 +120,35 @@ async def process_knowledge_base_document(
            mime_type=mime_type,
        )

-        # Full document mode: extract text and store it, skip chunking/embedding
+        logger.info(f"Delegating document processing to MPS (mode={retrieval_mode})")
+        mps_response = await mps_service_key_client.process_document(
+            file_path=temp_file_path,
+            filename=filename,
+            content_type=mime_type or "application/octet-stream",
+            retrieval_mode=retrieval_mode,
+            max_tokens=max_tokens,
+            organization_id=organization_id,
+            created_by=created_by_provider_id,
+        )
+
+        docling_metadata = mps_response.get("docling_metadata", {})
+
        if retrieval_mode == "full_document":
-            logger.info(f"Document {document_id}: full_document mode, extracting text")
-
-            plain_text_extensions = {".txt", ".json"}
-            if file_extension.lower() in plain_text_extensions:
-                with open(temp_file_path, "r", encoding="utf-8") as f:
-                    full_text = f.read()
-                if file_extension.lower() == ".json":
-                    try:
-                        parsed = json.loads(full_text)
-                        full_text = json.dumps(parsed, indent=2, ensure_ascii=False)
-                    except json.JSONDecodeError:
-                        pass
-                docling_metadata = {"document_type": "PlainText"}
-            else:
-                converter = DocumentConverter()
-                conversion_result = converter.convert(temp_file_path)
-                doc = conversion_result.document
-                full_text = doc.export_to_text()
-                docling_metadata = {
-                    "num_pages": len(doc.pages) if hasattr(doc, "pages") else None,
-                    "document_type": type(doc).__name__,
-                }
-
-            # Store full text on the document record
+            full_text = mps_response.get("full_text") or ""
            await db_client.update_document_full_text(document_id, full_text)
-
            await db_client.update_document_status(
                document_id,
                "completed",
                total_chunks=0,
                docling_metadata=docling_metadata,
            )
-
            logger.info(
                f"Successfully processed full_document {document_id}. "
                f"Text length: {len(full_text)} chars"
            )
            return

-        # Initialize the OpenAI embedding service
-        logger.info(
-            f"Initializing OpenAI embedding service with max_tokens={max_tokens}"
-        )
-        # Try to get user's embeddings configuration
+        # Chunked mode: fetch user embedding config, embed via OpenAI, persist chunks.
        embeddings_api_key = None
        embeddings_model = None
        embeddings_base_url = None
@ -187,7 +160,6 @@ async def process_knowledge_base_document(
                embeddings_base_url = getattr(user_config.embeddings, "base_url", None)
                logger.info(f"Using user embeddings config: model={embeddings_model}")

-        # Check if API key is configured
        if not embeddings_api_key:
            error_message = (
                "OpenAI API key not configured. Please set your API key in "
@ -199,190 +171,57 @@ async def process_knowledge_base_document(
            )
            return

-        service = OpenAIEmbeddingService(
+        embedding_service = OpenAIEmbeddingService(
            db_client=db_client,
-            max_tokens=max_tokens,
            api_key=embeddings_api_key,
            model_id=embeddings_model or "text-embedding-3-small",
            base_url=embeddings_base_url,
        )

-        # Step 1: Initialize tokenizer for chunking
-        logger.info(
-            f"Loading tokenizer: {TOKENIZER_MODEL} with max_tokens={max_tokens}"
-        )
-        hf_tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_MODEL)
-        tokenizer = HuggingFaceTokenizer(
-            tokenizer=hf_tokenizer,
-            max_tokens=max_tokens,
-        )
+        mps_chunks = mps_response.get("chunks", [])
+        if not mps_chunks:
+            logger.warning(f"Document {document_id}: MPS returned zero chunks")

-        chunk_texts = []
        chunk_records = []
-        token_counts = []
-
-        # Check if file is a plain text format that docling doesn't support
-        plain_text_extensions = {".txt", ".json"}
-        if file_extension.lower() in plain_text_extensions:
-            # Read text content directly
-            logger.info(f"Reading {file_extension} file directly (bypassing docling)")
-            with open(temp_file_path, "r", encoding="utf-8") as f:
-                raw_content = f.read()
-
-            # For JSON files, pretty-print for better readability
-            if file_extension.lower() == ".json":
-                try:
-                    parsed = json.loads(raw_content)
-                    raw_content = json.dumps(parsed, indent=2, ensure_ascii=False)
-                except json.JSONDecodeError:
-                    logger.warning(
-                        "JSON file is not valid JSON, treating as plain text"
-                    )
-
-            docling_metadata = {
-                "num_pages": None,
-                "document_type": "PlainText",
-            }
-
-            # Token-based chunking for plain text
-            tokens = hf_tokenizer.encode(raw_content, add_special_tokens=False)
-            total_tokens = len(tokens)
-            logger.info(
-                f"Total tokens in file: {total_tokens}, chunking with max_tokens={max_tokens}"
+        chunk_texts = []
+        for chunk in mps_chunks:
+            contextualized = chunk.get("contextualized_text") or chunk["chunk_text"]
+            chunk_records.append(
+                KnowledgeBaseChunkModel(
+                    document_id=document_id,
+                    organization_id=organization_id,
+                    chunk_text=chunk["chunk_text"],
+                    contextualized_text=contextualized,
+                    chunk_index=chunk["chunk_index"],
+                    chunk_metadata=chunk.get("chunk_metadata") or {},
+                    embedding_model=embedding_service.get_model_id(),
+                    embedding_dimension=embedding_service.get_embedding_dimension(),
+                    token_count=chunk.get("token_count", 0),
+                )
            )
+            chunk_texts.append(contextualized)

-            start = 0
-            chunk_index = 0
-            while start < total_tokens:
-                end = min(start + max_tokens, total_tokens)
-                chunk_token_ids = tokens[start:end]
-                chunk_text = hf_tokenizer.decode(
-                    chunk_token_ids, skip_special_tokens=True
-                )
-
-                token_count = len(chunk_token_ids)
-                token_counts.append(token_count)
-
-                chunk_record = KnowledgeBaseChunkModel(
-                    document_id=document_id,
-                    organization_id=organization_id,
-                    chunk_text=chunk_text,
-                    contextualized_text=chunk_text,
-                    chunk_index=chunk_index,
-                    chunk_metadata={},
-                    embedding_model=service.get_model_id(),
-                    embedding_dimension=service.get_embedding_dimension(),
-                    token_count=token_count,
-                )
-
-                chunk_records.append(chunk_record)
-                chunk_texts.append(chunk_text)
-                chunk_index += 1
-                start = end
-
-            total_chunks = len(chunk_records)
-            logger.info(f"Generated {total_chunks} chunks from plain text")
-
-        else:
-            # Use docling for structured formats (PDF, DOCX, etc.)
-            logger.info("Converting document with docling")
-            converter = DocumentConverter()
-            conversion_result = converter.convert(temp_file_path)
-            doc = conversion_result.document
-
-            docling_metadata = {
-                "num_pages": len(doc.pages) if hasattr(doc, "pages") else None,
-                "document_type": type(doc).__name__,
-            }
-
-            # Initialize chunker
-            logger.info(f"Initializing HybridChunker with max_tokens={max_tokens}")
-            chunker = HybridChunker(tokenizer=tokenizer)
-
-            # Chunk the document
-            logger.info(f"Chunking document with max_tokens={max_tokens}")
-            chunks = list(chunker.chunk(dl_doc=doc))
-            total_chunks = len(chunks)
-            logger.info(f"Generated {total_chunks} chunks")
-
-            # Process each chunk
-            for i, chunk in enumerate(chunks):
-                chunk_text = chunk.text
-                contextualized_text = chunker.contextualize(chunk=chunk)
-
-                text_to_tokenize = (
-                    contextualized_text if contextualized_text else chunk_text
-                )
-                token_count = len(
-                    tokenizer.tokenizer.encode(
-                        text_to_tokenize, add_special_tokens=False
-                    )
-                )
-                token_counts.append(token_count)
-
-                chunk_metadata = {}
-                if hasattr(chunk, "meta") and chunk.meta:
-                    chunk_metadata = {
-                        "doc_items": (
-                            [str(item) for item in chunk.meta.doc_items]
-                            if hasattr(chunk.meta, "doc_items")
-                            else []
-                        ),
-                        "headings": (
-                            chunk.meta.headings
-                            if hasattr(chunk.meta, "headings")
-                            else []
-                        ),
-                    }
-
-                chunk_record = KnowledgeBaseChunkModel(
-                    document_id=document_id,
-                    organization_id=organization_id,
-                    chunk_text=chunk_text,
-                    contextualized_text=contextualized_text,
-                    chunk_index=i,
-                    chunk_metadata=chunk_metadata,
-                    embedding_model=service.get_model_id(),
-                    embedding_dimension=service.get_embedding_dimension(),
-                    token_count=token_count,
-                )
-
-                chunk_records.append(chunk_record)
-                chunk_texts.append(text_to_tokenize)
-
-        # Log chunk statistics
-        if token_counts:
-            avg_tokens = sum(token_counts) / len(token_counts)
-            min_tokens = min(token_counts)
-            max_tokens_actual = max(token_counts)
-            logger.info("Chunk token statistics:")
-            logger.info(f"  - Average: {avg_tokens:.1f} tokens")
-            logger.info(f"  - Min: {min_tokens} tokens")
-            logger.info(f"  - Max: {max_tokens_actual} tokens")
-
-        # Step 6: Generate embeddings using OpenAI
-        logger.info(f"Generating embeddings using {service.get_model_id()}")
-        embeddings = await service.embed_texts(chunk_texts)
-
-        # Step 7: Attach embeddings to chunk records
+        logger.info(
+            f"Generating embeddings for {len(chunk_texts)} chunks "
+            f"using {embedding_service.get_model_id()}"
+        )
+        embeddings = await embedding_service.embed_texts(chunk_texts)
        for chunk_record, embedding in zip(chunk_records, embeddings):
            chunk_record.embedding = embedding

-        # Step 8: Save chunks in database
        logger.info("Storing chunks in database")
        await db_client.create_chunks_batch(chunk_records)

-        # Step 9: Update document status to completed
        await db_client.update_document_status(
            document_id,
            "completed",
-            total_chunks=total_chunks,
+            total_chunks=len(chunk_records),
            docling_metadata=docling_metadata,
        )

        logger.info(
            f"Successfully processed knowledge base document {document_id}. "
-            f"Total chunks: {total_chunks}"
+            f"Total chunks: {len(chunk_records)}"
        )

    except Exception as e:
@ -390,14 +229,12 @@ async def process_knowledge_base_document(
            f"Error processing knowledge base document {document_id}: {e}",
            exc_info=True,
        )
-        # Update document status to failed
        await db_client.update_document_status(
            document_id, "failed", error_message=str(e)
        )
        raise

    finally:
-        # Clean up temp file
        if temp_file_path and os.path.exists(temp_file_path):
            try:
                os.remove(temp_file_path)
--- a/api/tasks/run_integrations.py
+++ b/api/tasks/run_integrations.py
@ -5,12 +5,19 @@ from typing import Any, Dict, Optional

 import httpx
 from loguru import logger
+from pydantic import ValidationError

 from api.constants import BACKEND_API_ENDPOINT
 from api.db import db_client
 from api.db.models import WorkflowRunModel
 from api.enums import OrganizationConfigurationKey
 from api.services.pipecat.tracing_config import register_org_langfuse_credentials
+from api.services.workflow.dto import (
+    QANodeData,
+    QARFNode,
+    WebhookNodeData,
+    WebhookRFNode,
+)
 from api.services.workflow.qa import run_per_node_qa_analysis
 from api.utils.credential_auth import build_auth_header
 from api.utils.template_renderer import render_template
@ -19,34 +26,34 @@ from pipecat.utils.run_context import set_current_org_id, set_current_run_id


 def _should_skip_qa(
-    node_data: dict,
+    qa_data: QANodeData,
    workflow_run: WorkflowRunModel,
 ) -> str | None:
    """Check whether QA analysis should be skipped for this call.

    Returns a reason string if the call should be skipped, or None if it should proceed.
    """
-    # Check minimum call duration
-    min_duration = node_data.get("qa_min_call_duration", 15)
    usage_info = workflow_run.usage_info or {}
    call_duration = usage_info.get("call_duration_seconds")
-    if call_duration is not None and call_duration < min_duration:
-        return f"call duration ({call_duration:.1f}s) below minimum ({min_duration}s)"
+    if call_duration is not None and call_duration < qa_data.qa_min_call_duration:
+        return (
+            f"call duration ({call_duration:.1f}s) below minimum "
+            f"({qa_data.qa_min_call_duration}s)"
+        )

-    # Check voicemail calls
-    qa_voicemail_calls = node_data.get("qa_voicemail_calls", False)
-    if not qa_voicemail_calls:
+    if not qa_data.qa_voicemail_calls:
        gathered_context = workflow_run.gathered_context or {}
        call_disposition = gathered_context.get("call_disposition", "")
        if call_disposition == EndTaskReason.VOICEMAIL_DETECTED.value:
            return "voicemail call and QA voicemail calls is disabled"

-    # Check sample rate
-    sample_rate = node_data.get("qa_sample_rate", 100)
-    if sample_rate < 100:
+    if qa_data.qa_sample_rate < 100:
        roll = random.randint(1, 100)
-        if roll > sample_rate:
-            return f"excluded by sampling ({sample_rate}% sample rate, rolled {roll})"
+        if roll > qa_data.qa_sample_rate:
+            return (
+                f"excluded by sampling ({qa_data.qa_sample_rate}% sample rate, "
+                f"rolled {roll})"
+            )

    return None

@ -66,15 +73,22 @@ async def _run_qa_nodes(
    results: Dict[str, Any] = {}

    for node in qa_nodes:
-        node_data = node.get("data", {})
        node_id = node.get("id", "unknown")
-        node_name = node_data.get("name", "QA Analysis")
+        try:
+            qa_node = QARFNode.model_validate(node)
+        except ValidationError as e:
+            logger.warning(f"QA node #{node_id} failed validation, skipping: {e}")
+            results[f"qa_{node_id}"] = {"error": "validation_failed"}
+            continue

-        if not node_data.get("qa_enabled", True):
+        qa_data = qa_node.data
+        node_name = qa_data.name
+
+        if not qa_data.qa_enabled:
            logger.debug(f"QA node '{node_name}' is disabled, skipping")
            continue

-        skip_reason = _should_skip_qa(node_data, workflow_run)
+        skip_reason = _should_skip_qa(qa_data, workflow_run)
        if skip_reason:
            logger.info(f"Skipping QA node '{node_name}' (#{node_id}): {skip_reason}")
            results[f"qa_{node_id}"] = {"skipped": True, "reason": skip_reason}
@ -83,7 +97,7 @@ async def _run_qa_nodes(
        try:
            logger.info(f"Running QA analysis for node '{node_name}' (#{node_id})")
            result = await run_per_node_qa_analysis(
-                node_data,
+                qa_data,
                workflow_run,
                workflow_run_id,
                workflow_definition,
@ -260,7 +274,16 @@ async def run_integrations_post_workflow_run(_ctx, workflow_run_id: int):

        # Step 8: Execute each webhook node
        for node in webhook_nodes:
-            webhook_data = node.get("data", {})
+            node_id = node.get("id", "unknown")
+            try:
+                webhook_node = WebhookRFNode.model_validate(node)
+            except ValidationError as e:
+                logger.warning(
+                    f"Webhook node #{node_id} failed validation, skipping: {e}"
+                )
+                continue
+
+            webhook_data = webhook_node.data
            try:
                await _execute_webhook_node(
                    webhook_data=webhook_data,
@ -268,10 +291,7 @@ async def run_integrations_post_workflow_run(_ctx, workflow_run_id: int):
                    organization_id=organization_id,
                )
            except Exception as e:
-                # Log error but continue with other webhooks
-                logger.warning(
-                    f"Failed to execute webhook '{webhook_data.get('name', 'unknown')}': {e}"
-                )
+                logger.warning(f"Failed to execute webhook '{webhook_data.name}': {e}")

    except Exception as e:
        logger.error(f"Error running integrations: {e}", exc_info=True)
@ -323,7 +343,7 @@ def _build_render_context(


 async def _execute_webhook_node(
-    webhook_data: Dict[str, Any],
+    webhook_data: WebhookNodeData,
    render_context: Dict[str, Any],
    organization_id: int,
 ) -> bool:
@ -331,31 +351,27 @@ async def _execute_webhook_node(
    Execute a single webhook node.

    Args:
-        webhook_data: The webhook node's data dict from workflow definition
+        webhook_data: The validated webhook node data
        render_context: Context for template rendering
        organization_id: For credential lookup

    Returns:
        True if successful, False otherwise
    """
-    webhook_name = webhook_data.get("name", "Unnamed Webhook")
+    webhook_name = webhook_data.name

-    # 1. Check if enabled
-    if not webhook_data.get("enabled", True):
+    if not webhook_data.enabled:
        logger.debug(f"Webhook '{webhook_name}' is disabled, skipping")
        return True

-    # 2. Validate endpoint URL
-    url = webhook_data.get("endpoint_url")
+    url = webhook_data.endpoint_url
    if not url:
        logger.warning(f"Webhook '{webhook_name}' has no endpoint URL")
        return False

-    # 3. Build headers
    headers = {"Content-Type": "application/json"}

-    # 4. Add auth header if credential configured
-    credential_uuid = webhook_data.get("credential_uuid")
+    credential_uuid = webhook_data.credential_uuid
    if credential_uuid:
        credential = await db_client.get_credential_by_uuid(
            credential_uuid, organization_id
@ -369,18 +385,13 @@ async def _execute_webhook_node(
                f"Credential {credential_uuid} not found for webhook '{webhook_name}'"
            )

-    # 5. Add custom headers
-    custom_headers = webhook_data.get("custom_headers", [])
-    for h in custom_headers:
-        if h.get("key") and h.get("value"):
-            headers[h["key"]] = h["value"]
+    for h in webhook_data.custom_headers or []:
+        if h.key and h.value:
+            headers[h.key] = h.value

-    # 6. Render payload template
-    payload_template = webhook_data.get("payload_template", {})
-    payload = render_template(payload_template, render_context)
+    payload = render_template(webhook_data.payload_template or {}, render_context)

-    # 7. Make HTTP request
-    method = webhook_data.get("http_method", "POST").upper()
+    method = (webhook_data.http_method or "POST").upper()

    logger.info(f"Executing webhook '{webhook_name}': {method}")