feat: add pre call fetch configuration (#222)

* feat: add pre call fetch configuration

* docs: add NEW tags for pages about new features

---------

Co-authored-by: Sabiha Khan <sabihak89@gmail.com>
This commit is contained in:
Abhishek 2026-04-06 12:30:37 +05:30 committed by GitHub
parent c4c4b591db
commit ec2f322486
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
27 changed files with 646 additions and 66 deletions

View file

@ -1,3 +1,5 @@
import asyncio
from loguru import logger
from api.db import db_client
@ -13,6 +15,7 @@ from api.services.pipecat.tracing_config import get_trace_url
from api.services.workflow.pipecat_engine import PipecatEngine
from api.tasks.arq import enqueue_job
from api.tasks.function_names import FunctionNames
from api.utils.hold_audio import play_hold_audio_loop
from pipecat.frames.frames import Frame, LLMContextFrame, TTSSpeakFrame
from pipecat.pipeline.task import PipelineTask
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
@ -28,6 +31,7 @@ def register_event_handlers(
in_memory_logs_buffer: InMemoryLogsBuffer,
pipeline_metrics_aggregator: PipelineMetricsAggregator,
audio_config=AudioConfig,
pre_call_fetch_task: asyncio.Task | None = None,
):
"""Register all event handlers for transport and task events.
@ -58,6 +62,9 @@ def register_event_handlers(
async def maybe_trigger_initial_response():
"""Start the conversation after both pipeline_started and client_connected events.
If a pre-call fetch is in progress, plays a ringer while waiting for the
response, then merges the result into the call context before proceeding.
If the start node has a greeting configured, play it directly via TTS.
Otherwise, trigger an LLM generation for the opening message.
"""
@ -68,6 +75,43 @@ def register_event_handlers(
):
ready_state["initial_response_triggered"] = True
# Wait for pre-call fetch if in progress, playing ringer meanwhile
if pre_call_fetch_task is not None:
if not pre_call_fetch_task.done():
logger.info(
"Pre-call fetch still in progress, playing ringer while waiting"
)
stop_ringer = asyncio.Event()
sample_rate = audio_config.pipeline_sample_rate or 16000
ringer_task = asyncio.create_task(
play_hold_audio_loop(task, stop_ringer, sample_rate)
)
try:
fetch_result = await pre_call_fetch_task
finally:
stop_ringer.set()
await ringer_task
else:
fetch_result = pre_call_fetch_task.result()
if fetch_result:
engine._call_context_vars.update(fetch_result)
try:
await db_client.update_workflow_run(
workflow_run_id,
initial_context={**engine._call_context_vars},
)
except Exception as e:
logger.error(f"Failed to persist pre-call fetch context: {e}")
logger.info(
f"Pre-call fetch complete, merged keys: "
f"{list(fetch_result.keys())}"
)
# Set the start node now (after pre-call fetch data is merged)
# so that render_template() has the complete _call_context_vars.
await engine.set_node(engine.workflow.start_node_id)
greeting = engine.get_start_greeting()
if greeting:
logger.debug(

View file

@ -0,0 +1,115 @@
"""Pre-call HTTP data fetch for StartCall node.
Executes an HTTP request before a voice call starts to enrich the
call context with data from external systems (CRM, ERP, etc.).
"""
from typing import Any, Dict, Optional
import httpx
from loguru import logger
from api.db import db_client
from api.utils.credential_auth import build_auth_header
PRE_CALL_FETCH_TIMEOUT_SECONDS = 10
async def execute_pre_call_fetch(
*,
url: str,
credential_uuid: Optional[str],
call_context_vars: Dict[str, Any],
workflow_id: int,
organization_id: int,
) -> Dict[str, Any]:
"""Execute a POST request to fetch data before a call starts.
Sends a standardized payload with call metadata (agent_id, from/to numbers).
The response JSON is returned as a dict to be merged into initial_context.
Returns:
Response JSON dict on success, empty dict on any failure.
Never raises.
"""
# Build standardized payload
payload = {
"event": "call_inbound",
"call_inbound": {
"agent_id": workflow_id,
"from_number": call_context_vars.get("caller_number", ""),
"to_number": call_context_vars.get("called_number", ""),
},
}
# Build headers
headers: Dict[str, str] = {"Content-Type": "application/json"}
if credential_uuid:
try:
credential = await db_client.get_credential_by_uuid(
credential_uuid, organization_id
)
if credential:
headers.update(build_auth_header(credential))
else:
logger.warning(
f"Pre-call fetch: credential {credential_uuid} not found"
)
except Exception as e:
logger.error(f"Pre-call fetch: failed to resolve credential: {e}")
logger.info(f"Pre-call fetch: POST {url}")
try:
async with httpx.AsyncClient(timeout=PRE_CALL_FETCH_TIMEOUT_SECONDS) as client:
response = await client.post(url, headers=headers, json=payload)
try:
response_data = response.json()
except Exception:
response_data = {}
if response.is_success:
if not isinstance(response_data, dict):
logger.warning(
"Pre-call fetch: response is not a JSON object, skipping"
)
return {}
# Extract dynamic_variables from Retell-compatible response
# Supports: {call_inbound: {dynamic_variables: {...}}}
# or: {dynamic_variables: {...}}
dynamic_vars = {}
call_inbound = response_data.get("call_inbound")
if isinstance(call_inbound, dict):
dynamic_vars = call_inbound.get("dynamic_variables", {})
elif "dynamic_variables" in response_data:
dynamic_vars = response_data["dynamic_variables"]
if not isinstance(dynamic_vars, dict):
dynamic_vars = {}
logger.info(
f"Pre-call fetch: success ({response.status_code}), "
f"dynamic_variables keys: {list(dynamic_vars.keys())}"
)
return dynamic_vars
else:
logger.warning(
f"Pre-call fetch: HTTP {response.status_code} - "
f"{response.text[:200]}"
)
return {}
except httpx.TimeoutException:
logger.error(
f"Pre-call fetch: timed out after {PRE_CALL_FETCH_TIMEOUT_SECONDS}s"
)
return {}
except httpx.RequestError as e:
logger.error(f"Pre-call fetch: request failed: {e}")
return {}
except Exception as e:
logger.error(f"Pre-call fetch: unexpected error: {e}")
return {}

View file

@ -24,6 +24,7 @@ from api.services.pipecat.pipeline_engine_callbacks_processor import (
PipelineEngineCallbacksProcessor,
)
from api.services.pipecat.pipeline_metrics_aggregator import PipelineMetricsAggregator
from api.services.pipecat.pre_call_fetch import execute_pre_call_fetch
from api.services.pipecat.realtime_feedback_observer import (
RealtimeFeedbackObserver,
register_turn_log_handlers,
@ -622,6 +623,28 @@ async def _run_pipeline(
ReactFlowDTO.model_validate(workflow.workflow_definition_with_fallback)
)
# Pre-call fetch: fire early so it runs concurrently with remaining setup
pre_call_fetch_task = None
start_node = workflow_graph.nodes.get(workflow_graph.start_node_id)
if (
start_node
and start_node.pre_call_fetch_enabled
and start_node.pre_call_fetch_url
):
logger.info(
f"Pre-call fetch enabled for workflow run {workflow_run_id}, "
f"firing request to {start_node.pre_call_fetch_url}"
)
pre_call_fetch_task = asyncio.create_task(
execute_pre_call_fetch(
url=start_node.pre_call_fetch_url,
credential_uuid=start_node.pre_call_fetch_credential_uuid,
call_context_vars=merged_call_context_vars,
workflow_id=workflow_id,
organization_id=workflow.organization_id,
)
)
# Create in-memory logs buffer early so it can be used by engine callbacks
in_memory_logs_buffer = InMemoryLogsBuffer(workflow_run_id)
@ -952,6 +975,7 @@ async def _run_pipeline(
in_memory_logs_buffer=in_memory_logs_buffer,
pipeline_metrics_aggregator=pipeline_metrics_aggregator,
audio_config=audio_config,
pre_call_fetch_task=pre_call_fetch_task,
)
register_audio_data_handler(audio_buffer, workflow_run_id, in_memory_audio_buffer)

View file

@ -59,6 +59,10 @@ class NodeDataDTO(BaseModel):
detect_voicemail: bool = False
delayed_start: bool = False
delayed_start_duration: Optional[float] = None
# Pre-call fetch (start node only)
pre_call_fetch_enabled: bool = False
pre_call_fetch_url: Optional[str] = None
pre_call_fetch_credential_uuid: Optional[str] = None
tool_uuids: Optional[List[str]] = None
document_uuids: Optional[List[str]] = None
trigger_path: Optional[str] = None

View file

@ -162,8 +162,6 @@ class PipecatEngine:
if self._context_compaction_enabled:
self._context_summarization_manager = ContextSummarizationManager(self)
await self.set_node(self.workflow.start_node_id)
logger.debug(f"{self.__class__.__name__} initialized")
except Exception as e:
logger.error(f"Error initializing {self.__class__.__name__}: {e}")

View file

@ -14,7 +14,6 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional
from loguru import logger
from api.constants import APP_ROOT_DIR
from api.db import db_client
from api.enums import ToolCategory, WorkflowRunMode
from api.services.telephony.call_transfer_manager import get_call_transfer_manager
@ -28,11 +27,10 @@ from api.services.workflow.tools.custom_tool import (
execute_http_tool,
tool_to_function_schema,
)
from api.utils.hold_audio import load_hold_audio
from api.utils.hold_audio import play_hold_audio_loop
from pipecat.adapters.schemas.function_schema import FunctionSchema
from pipecat.frames.frames import (
FunctionCallResultProperties,
OutputAudioRawFrame,
TTSSpeakFrame,
)
from pipecat.services.llm_service import FunctionCallParams
@ -539,7 +537,11 @@ class CustomToolManager:
# Start hold music as background task
hold_music_task = asyncio.create_task(
self.play_hold_music_loop(hold_music_stop_event, sample_rate)
play_hold_audio_loop(
self._engine.task,
hold_music_stop_event,
sample_rate,
)
)
# Wait for transfer completion using Redis pub/sub
@ -666,44 +668,3 @@ class CustomToolManager:
# Unknown action, treat as generic success
logger.warning(f"Unknown transfer action: {action}, treating as success")
await function_call_params.result_callback(result)
async def play_hold_music_loop(
self, stop_event: asyncio.Event, sample_rate: int = 8000
):
"""Play hold music in a loop until stop event is triggered.
Args:
stop_event: Event to stop the hold music loop
sample_rate: Sample rate for the hold music (default 8000Hz for Twilio)
"""
try:
# Path to hold music file based on sample rate
hold_music_file = (
APP_ROOT_DIR / "assets" / f"transfer_hold_ring_{sample_rate}.wav"
)
hold_audio_data = load_hold_audio(hold_music_file, sample_rate)
num_samples = len(hold_audio_data) // 2
duration = int(num_samples / sample_rate)
logger.info(f"Starting hold music loop with file: {hold_music_file}")
while not stop_event.is_set():
# Queue the hold audio frame
frame = OutputAudioRawFrame(
audio=hold_audio_data,
sample_rate=sample_rate,
num_channels=1,
)
await self._engine.task.queue_frame(frame)
# Wait for the audio to play or until stopped
try:
await asyncio.wait_for(stop_event.wait(), timeout=duration + 1.5)
break # Stop event was set
except asyncio.TimeoutError:
pass # Continue looping
logger.info("Hold music loop stopped")
except Exception as e:
logger.error(f"Error in hold music loop: {e}")

View file

@ -82,6 +82,9 @@ class Node:
self.delayed_start_duration = data.delayed_start_duration
self.tool_uuids = data.tool_uuids
self.document_uuids = data.document_uuids
self.pre_call_fetch_enabled = data.pre_call_fetch_enabled
self.pre_call_fetch_url = data.pre_call_fetch_url
self.pre_call_fetch_credential_uuid = data.pre_call_fetch_credential_uuid
self.data = data

View file

@ -201,6 +201,7 @@ async def run_pipeline_and_capture_context(
async def initialize_engine():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
await asyncio.gather(run_pipeline(), initialize_engine())

View file

@ -287,6 +287,7 @@ class TestEndCallViaNodeTransition:
async def initialize_engine():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
await asyncio.gather(run_pipeline(), initialize_engine())
@ -390,6 +391,7 @@ class TestEndCallViaNodeTransition:
async def initialize_engine():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
await asyncio.gather(run_pipeline(), initialize_engine())
@ -488,6 +490,7 @@ class TestEndCallViaCustomTool:
async def initialize_engine():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
await asyncio.gather(run_pipeline(), initialize_engine())
@ -579,6 +582,7 @@ class TestEndCallViaCustomTool:
async def initialize_engine():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
await asyncio.gather(run_pipeline(), initialize_engine())
@ -656,6 +660,7 @@ class TestEndCallViaClientDisconnect:
async def initialize_and_disconnect():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
# Wait for initial generation to complete
@ -746,6 +751,7 @@ class TestEndCallRaceConditions:
async def initialize_and_race():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
# Wait for initial generation
@ -858,6 +864,7 @@ class TestEndCallRaceConditions:
nonlocal disconnect_called
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
# Wait for the end_call tool to be called
@ -951,6 +958,7 @@ class TestEndCallExtractionBehavior:
async def initialize_and_end():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
# Wait for initial generation
@ -1076,6 +1084,7 @@ class TestEndCallExtractionBehavior:
async def initialize_and_end():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
# Wait for initial generation

View file

@ -294,6 +294,7 @@ class TestNodeSwitchWithUserSpeech:
async def initialize_engine():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
# Start the LLM generation - user speech will be injected
# automatically when FunctionCallResultFrame #1 is seen
await engine.llm.queue_frame(LLMContextFrame(engine.context))

View file

@ -131,6 +131,7 @@ async def run_pipeline_with_tool_calls(
# Small delay to let runner start
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
# Run both concurrently

View file

@ -176,6 +176,7 @@ class TestVariableExtractionDuringTransitions:
async def initialize_engine():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
await asyncio.gather(run_pipeline(), initialize_engine())

View file

@ -227,6 +227,7 @@ class TestTTSPauseWithAudioWriteFailure:
async def initialize_and_end_call():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
# Start LLM generation - this will trigger TTS
await engine.llm.queue_frame(LLMContextFrame(engine.context))
@ -346,6 +347,7 @@ class TestTTSPauseWithAudioWriteFailure:
async def initialize_and_observe():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
await engine.llm.queue_frame(LLMContextFrame(engine.context))

View file

@ -274,6 +274,7 @@ class TestUserIdleHandler:
async def initialize_engine():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
await engine.llm.queue_frame(LLMContextFrame(engine.context))
await asyncio.gather(run_pipeline(), initialize_engine())

View file

@ -266,6 +266,7 @@ class TestUserMutingDuringBotSpeech:
async def run_test():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
# Trigger first LLM completion
await engine.llm.queue_frame(LLMContextFrame(engine.context))
@ -356,6 +357,7 @@ class TestUserMutingDuringBotSpeech:
async def run_test():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
# Trigger first LLM completion
await engine.llm.queue_frame(LLMContextFrame(engine.context))
@ -451,6 +453,7 @@ class TestUserMutingDuringBotSpeech:
async def run_test():
await asyncio.sleep(0.01)
await engine.initialize()
await engine.set_node(engine.workflow.start_node_id)
# Trigger first LLM completion
await engine.llm.queue_frame(LLMContextFrame(engine.context))

View file

@ -1,15 +1,19 @@
"""
Hold audio utility for loading and caching hold music files.
Hold audio utility for loading, caching, and playing hold music files.
This module provides functionality to load hold music audio files at specific sample rates
with caching to improve performance during multiple calls.
with caching to improve performance during multiple calls, and a reusable loop that queues
audio frames until a stop event is set.
"""
import asyncio
from typing import Dict, Optional, Tuple
import numpy as np
from loguru import logger
from pipecat.frames.frames import OutputAudioRawFrame
try:
import soundfile as sf
except ModuleNotFoundError as e:
@ -92,3 +96,56 @@ def get_cache_info() -> Dict[str, int]:
"cached_files": len(_hold_audio_cache),
"total_cache_size": sum(len(data) for data in _hold_audio_cache.values()),
}
async def play_hold_audio_loop(
task,
stop_event: asyncio.Event,
sample_rate: int = 16000,
hold_music_file: Optional[str] = None,
) -> None:
"""Play hold/ring-back audio in a loop until *stop_event* is set.
This is a shared helper used by call-transfer hold music and the
pre-call data fetch ringer. The caller is responsible for creating
the ``asyncio.Event`` and setting it when playback should stop.
Args:
task: A ``PipelineTask`` (or anything with ``queue_frame``).
stop_event: Set this event to terminate the loop.
sample_rate: Target sample rate for audio playback.
hold_music_file: Path to a WAV file. When *None* the default
``transfer_hold_ring_{sample_rate}.wav`` asset is used.
"""
if hold_music_file is None:
from api.constants import APP_ROOT_DIR
hold_music_file = str(
APP_ROOT_DIR / "assets" / f"transfer_hold_ring_{sample_rate}.wav"
)
hold_audio_data = load_hold_audio(hold_music_file, sample_rate)
if not hold_audio_data:
logger.warning(f"Hold audio loop: failed to load {hold_music_file}, skipping")
return
num_samples = len(hold_audio_data) // 2 # 16-bit PCM = 2 bytes per sample
duration = num_samples / sample_rate
logger.debug(f"Hold audio loop: playing at {sample_rate}Hz")
try:
while not stop_event.is_set():
frame = OutputAudioRawFrame(
audio=hold_audio_data,
sample_rate=sample_rate,
num_channels=1,
)
await task.queue_frame(frame)
try:
await asyncio.wait_for(stop_event.wait(), timeout=duration + 1.5)
break
except asyncio.TimeoutError:
pass
except Exception as e:
logger.error(f"Hold audio loop: error: {e}")
logger.debug("Hold audio loop: stopped")

View file

@ -56,9 +56,11 @@
"voice-agent/editing-a-workflow",
"voice-agent/pre-recorded-audio",
"voice-agent/template-variables",
"voice-agent/pre-call-data-fetch",
{
"group": "Tools",
"pages": [
"voice-agent/tools/introduction",
{
"group": "Built-in Tools",
"pages": [
@ -74,6 +76,7 @@
}
]
},
"voice-agent/knowledge-base",
{
"group": "Nodes",
"pages": [

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

View file

@ -0,0 +1,53 @@
---
title: "Knowledge Base"
description: "Upload documents that your voice agent can reference during live conversations to provide accurate, context-aware responses."
---
The Knowledge Base lets you upload documents that your voice agents can reference during conversations. Instead of encoding all information into prompts, you can provide source documents and let the agent retrieve relevant content on the fly.
<Warning>
You must configure an embedding provider and API key in **AI Models Configuration → Embedding** before using the Knowledge Base. Document processing and retrieval depend on embeddings, so this feature will not work without a valid embedding configuration.
</Warning>
![Embedding Configuration](../images/embedding-configurations.png)
## How It Works
1. You **upload** a document (PDF, DOCX, TXT, or JSON) to the Knowledge Base
2. Dograh **processes** and chunks the document for efficient retrieval
3. You **attach** the document to one or more workflow nodes
4. During a call, the agent **searches** the document for relevant information based on the caller's questions and uses it to generate accurate responses
## Supported File Types
| Format | Extension |
|--------|-----------|
| PDF | `.pdf` |
| Word | `.docx`, `.doc` |
| Text | `.txt` |
| JSON | `.json` |
Maximum file size: **5 MB**
## Uploading Documents
1. Go to **Knowledge Base Files** in the dashboard
2. Click **Upload New** or drag and drop a file
3. Wait for processing to complete — the document will be chunked and indexed automatically
## Attaching Documents to Nodes
Once a document is processed, you can attach it to any **Start Call** or **Agent** node in your workflow:
1. Open the node edit dialog
2. Scroll to the **Knowledge Base Documents** section
3. Select one or more documents for the agent to reference
The agent will only search documents attached to the current node, so attach only the documents relevant to that conversation step.
## Best Practices
- **Keep documents focused** — a single topic per document produces better retrieval results than a large multi-topic file
- **Use clear, structured content** — headings, lists, and short paragraphs help the chunking process
- **Attach selectively** — only attach documents relevant to a specific node rather than attaching everything everywhere
- **Keep documents up to date** — re-upload when source information changes to avoid stale answers

View file

@ -0,0 +1,140 @@
---
title: "Pre-Call Data Fetch"
description: "Fetch customer data from your CRM or ERP before the call starts, so your voice agent can greet callers by name and reference their account details."
tag: "NEW"
---
Pre-Call Data Fetch allows you to enrich the call context with external data before the voice agent starts speaking. When enabled on the **Start Call** node, Dograh sends an HTTP request to your API as soon as a call is initiated. While the response is loading, the caller hears a ring-back tone. Once the data arrives, it is merged into the call's [initial context](/core-concepts/context-and-variables#initial_context) and becomes available as template variables in your prompts and greetings.
## How It Works
1. A call arrives (inbound) or is initiated (outbound).
2. Dograh sends a **POST** request to your configured endpoint with a standardized payload.
3. The caller hears a ring-back tone while waiting for the response.
4. Your API responds with a JSON object containing `dynamic_variables`.
5. The variables are merged into the call's initial context.
6. The voice agent starts with full access to the fetched data via `{{variable_name}}` syntax.
## Configuration
Open the **Start Call** node editor and expand **Advanced Settings**. Toggle **Pre-Call Data Fetch** and configure:
| Field | Description |
| --- | --- |
| **Endpoint URL** | The URL Dograh will send the POST request to. |
| **Authentication** | Optional credential for authenticating the request. Supports API key, bearer token, basic auth, and custom header. |
## Request Format
Dograh sends a `POST` request with the following JSON payload:
```json
{
"event": "call_inbound",
"call_inbound": {
"agent_id": 123,
"from_number": "+12137771234",
"to_number": "+12137771235"
}
}
```
| Field | Description |
| --- | --- |
| `event` | Always `"call_inbound"`. |
| `call_inbound.agent_id` | The workflow (agent) ID. |
| `call_inbound.from_number` | The caller's phone number (`caller_number` from initial context). |
| `call_inbound.to_number` | The called phone number (`called_number` from initial context). |
The `Content-Type` header is set to `application/json`. If you configured a credential, the corresponding authentication header is included.
## Expected Response Format
Your API should return a **JSON object** with a `2xx` status code. The variables to inject into the call context should be placed inside the `dynamic_variables` key:
```json
{
"call_inbound": {
"dynamic_variables": {
"customer_name": "Jane Doe",
"account_status": "active",
"loyalty_tier": "gold",
"open_tickets": 2
}
}
}
```
You can also place `dynamic_variables` at the top level:
```json
{
"dynamic_variables": {
"customer_name": "Jane Doe",
"account_status": "active"
}
}
```
After the response is received, you can reference these values anywhere template variables are supported:
- **Greeting**: `Hello {{customer_name}}, thank you for calling!`
- **Prompt**: `The customer is a {{loyalty_tier}} member with {{open_tickets}} open support tickets.`
<Note>
If the response is not a valid JSON object, does not contain `dynamic_variables`, or the request fails or times out, the call proceeds normally without the additional context. The pre-call fetch never blocks or fails a call.
</Note>
## Nested Variables
If your `dynamic_variables` contain nested objects, you can access them using dot notation:
```json
{
"call_inbound": {
"dynamic_variables": {
"customer": {
"name": "Jane Doe",
"address": {
"city": "Los Angeles"
}
}
}
}
}
```
Access in prompts as `{{customer.name}}` and `{{customer.address.city}}`.
## Timeout
The request has a **10-second timeout**. If your API does not respond within this window, the call proceeds without the fetched data. Design your endpoint to respond as quickly as possible to minimize the ring-back tone duration.
## Example Integration
A simple Node.js endpoint that looks up a customer by phone number:
```javascript
app.post("/dograh/pre-call", async (req, res) => {
const { call_inbound } = req.body;
const customer = await db.customers.findOne({
phone: call_inbound.from_number,
});
if (!customer) {
return res.json({});
}
res.json({
call_inbound: {
dynamic_variables: {
customer_name: customer.name,
account_status: customer.status,
loyalty_tier: customer.tier,
},
},
});
});
```

View file

@ -1,6 +1,7 @@
---
title: "Pre-recorded Audio"
description: "Build hybrid voice agents that combine pre-recorded audio with dynamic text generation for lower latency, reduced TTS costs, and natural-sounding conversations."
tag: "NEW"
---
Custom recordings allow you to build **hybrid voice agents** that use your own pre-recorded audio for key parts of the conversation, while falling back to LLM-generated speech (via a cloned voice) for dynamic responses. This gives you the best of both worlds — the emotional depth of real human speech and the flexibility of AI-generated dialogue.

View file

@ -0,0 +1,39 @@
---
title: "Tools"
description: "Extend your voice agent's capabilities by giving it tools to perform actions during live conversations."
---
Tools let your AI agent take actions during a conversation — transfer calls, end calls, or call external APIs — based on the context of the conversation and your prompt instructions.
When a tool is attached to a workflow node, the LLM decides **when** to invoke it and **what parameters** to pass, based on the user's spoken intent and your node-level instructions.
## Tool Types
Dograh provides two categories of tools:
### Built-in Tools
Pre-configured tools that handle common telephony operations out of the box:
- [**Call Transfer**](/voice-agent/tools/call-transfer) — Transfer the active call to a phone number or SIP endpoint
- [**End Call**](/voice-agent/tools/end-call) — Terminate the call when the conversation is complete
### Custom Tools
Tools you define to integrate with any external system:
- [**HTTP API**](/voice-agent/tools/http-api) — Call any REST API endpoint during a conversation (e.g., CRM updates, data lookups, triggering automations)
## How Tools Work
1. You **define** a tool with a name, description, and parameters
2. You **attach** the tool to one or more workflow nodes
3. During a call, the LLM reads your node prompt, the tool description, and the caller's intent to decide whether to invoke the tool
4. The tool executes and returns a result that the agent can use to continue the conversation
## Best Practices
- **Attach only relevant tools to each node** — fewer tools means more reliable invocations
- **Write clear tool descriptions** — the LLM uses these to decide when to call the tool
- **Guide the LLM in your node prompt** — explicitly describe when a tool should be used
- **Test tool behavior** — verify your agent invokes tools at the right moments using web or phone calls

View file

@ -1,6 +1,6 @@
"use client";
import { FileText } from "lucide-react";
import { ExternalLink, FileText } from "lucide-react";
import Link from "next/link";
import { useMemo } from "react";
@ -8,6 +8,7 @@ import type { DocumentResponseSchema } from "@/client/types.gen";
import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox";
import { Label } from "@/components/ui/label";
import { KNOWLEDGE_BASE_DOC_URL } from "@/constants/documentation";
interface DocumentSelectorProps {
value: string[];
@ -57,7 +58,10 @@ export const DocumentSelector = ({
<>
<Label>{label}</Label>
{description && (
<Label className="text-xs text-muted-foreground">{description}</Label>
<Label className="text-xs text-muted-foreground">
{description}{" "}
<a href={KNOWLEDGE_BASE_DOC_URL} target="_blank" rel="noopener noreferrer" className="underline">Learn more</a>
</Label>
)}
</>
)}
@ -66,11 +70,12 @@ export const DocumentSelector = ({
No documents available. Upload documents to the knowledge base first.
</div>
<div className="flex justify-center">
<Link href="/files">
<Button variant="outline" size="sm">
<Button variant="outline" size="sm" asChild>
<Link href="/files" target="_blank">
<ExternalLink className="h-4 w-4 mr-2" />
Upload Documents
</Button>
</Link>
</Link>
</Button>
</div>
</div>
</div>
@ -83,7 +88,10 @@ export const DocumentSelector = ({
<>
<Label>{label}</Label>
{description && (
<Label className="text-xs text-muted-foreground">{description}</Label>
<Label className="text-xs text-muted-foreground">
{description}{" "}
<a href={KNOWLEDGE_BASE_DOC_URL} target="_blank" rel="noopener noreferrer" className="underline">Learn more</a>
</Label>
)}
</>
)}
@ -123,15 +131,23 @@ export const DocumentSelector = ({
</div>
))}
</div>
<div className="p-2 bg-muted/30">
<Link
href="/files"
target="_blank"
className="flex items-center gap-2 text-sm text-muted-foreground hover:text-foreground"
>
<ExternalLink className="h-4 w-4" />
Manage Documents
</Link>
</div>
</div>
<div className="flex items-center justify-between text-xs text-muted-foreground pt-1">
<span>
{value.length > 0 && (
<p className="text-xs text-muted-foreground">
{value.length} {value.length === 1 ? "document" : "documents"} selected
</span>
<Link href="/files" className="hover:underline">
Manage Documents
</Link>
</div>
</p>
)}
</div>
);
};

View file

@ -8,6 +8,7 @@ import type { ToolResponse } from "@/client/types.gen";
import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox";
import { Label } from "@/components/ui/label";
import { TOOLS_INTRODUCTION_DOC_URL } from "@/constants/documentation";
interface ToolSelectorProps {
value: string[];
@ -46,7 +47,8 @@ export function ToolSelector({
<Label>{label}</Label>
{description && (
<Label className="text-xs text-muted-foreground">
{description}
{description}{" "}
<a href={TOOLS_INTRODUCTION_DOC_URL} target="_blank" rel="noopener noreferrer" className="underline">Learn more</a>
</Label>
)}
</>

View file

@ -1,5 +1,5 @@
import { NodeProps, NodeToolbar, Position } from "@xyflow/react";
import { Edit, FileText, Play, PlusIcon, Trash2Icon, Wrench } from "lucide-react";
import { ChevronRight, Edit, FileText, Play, PlusIcon, Settings, Trash2Icon, Wrench } from "lucide-react";
import { memo, useCallback, useEffect, useMemo, useState } from "react";
import { useWorkflow } from "@/app/workflow/[workflowId]/contexts/WorkflowContext";
@ -11,12 +11,14 @@ import { MentionTextarea } from "@/components/flow/MentionTextarea";
import { ToolBadges } from "@/components/flow/ToolBadges";
import { ToolSelector } from "@/components/flow/ToolSelector";
import { ExtractionVariable, FlowNodeData } from "@/components/flow/types";
import { CredentialSelector, UrlInput, validateUrl } from "@/components/http";
import { Button } from "@/components/ui/button";
import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "@/components/ui/collapsible";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { Switch } from "@/components/ui/switch";
import { Textarea } from "@/components/ui/textarea";
import { CONTEXT_VARIABLES_DOC_URL, NODE_DOCUMENTATION_URLS } from "@/constants/documentation";
import { CONTEXT_VARIABLES_DOC_URL, NODE_DOCUMENTATION_URLS, PRE_CALL_DATA_FETCH_DOC_URL } from "@/constants/documentation";
import { NodeContent } from "./common/NodeContent";
import { NodeEditDialog } from "./common/NodeEditDialog";
@ -48,6 +50,12 @@ interface StartCallEditFormProps {
setToolUuids: (value: string[]) => void;
documentUuids: string[];
setDocumentUuids: (value: string[]) => void;
preCallFetchEnabled: boolean;
setPreCallFetchEnabled: (value: boolean) => void;
preCallFetchUrl: string;
setPreCallFetchUrl: (value: string) => void;
preCallFetchCredentialUuid: string;
setPreCallFetchCredentialUuid: (value: string) => void;
tools: ToolResponse[];
documents: DocumentResponseSchema[];
recordings: RecordingResponseSchema[];
@ -77,6 +85,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
const [variables, setVariables] = useState<ExtractionVariable[]>(data.extraction_variables ?? []);
const [toolUuids, setToolUuids] = useState<string[]>(data.tool_uuids ?? []);
const [documentUuids, setDocumentUuids] = useState<string[]>(data.document_uuids ?? []);
const [preCallFetchEnabled, setPreCallFetchEnabled] = useState(data.pre_call_fetch_enabled ?? false);
const [preCallFetchUrl, setPreCallFetchUrl] = useState(data.pre_call_fetch_url ?? "");
const [preCallFetchCredentialUuid, setPreCallFetchCredentialUuid] = useState(data.pre_call_fetch_credential_uuid ?? "");
// Compute if form has unsaved changes (only check prompt, name, greeting)
const isDirty = useMemo(() => {
@ -88,6 +99,14 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
}, [greeting, prompt, name, data]);
const handleSave = async () => {
// Validate pre-call fetch URL if enabled
if (preCallFetchEnabled && preCallFetchUrl) {
const urlValidation = validateUrl(preCallFetchUrl);
if (!urlValidation.valid) {
return;
}
}
handleSaveNodeData({
...data,
greeting: greeting || undefined,
@ -102,6 +121,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
extraction_variables: variables,
tool_uuids: toolUuids.length > 0 ? toolUuids : undefined,
document_uuids: documentUuids.length > 0 ? documentUuids : undefined,
pre_call_fetch_enabled: preCallFetchEnabled,
pre_call_fetch_url: preCallFetchEnabled ? preCallFetchUrl || undefined : undefined,
pre_call_fetch_credential_uuid: preCallFetchEnabled && preCallFetchCredentialUuid ? preCallFetchCredentialUuid : undefined,
});
setOpen(false);
await saveWorkflow();
@ -122,6 +144,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
setVariables(data.extraction_variables ?? []);
setToolUuids(data.tool_uuids ?? []);
setDocumentUuids(data.document_uuids ?? []);
setPreCallFetchEnabled(data.pre_call_fetch_enabled ?? false);
setPreCallFetchUrl(data.pre_call_fetch_url ?? "");
setPreCallFetchCredentialUuid(data.pre_call_fetch_credential_uuid ?? "");
}
setOpen(newOpen);
};
@ -141,6 +166,9 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
setVariables(data.extraction_variables ?? []);
setToolUuids(data.tool_uuids ?? []);
setDocumentUuids(data.document_uuids ?? []);
setPreCallFetchEnabled(data.pre_call_fetch_enabled ?? false);
setPreCallFetchUrl(data.pre_call_fetch_url ?? "");
setPreCallFetchCredentialUuid(data.pre_call_fetch_credential_uuid ?? "");
}
}, [data, open]);
@ -243,6 +271,12 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
setToolUuids={setToolUuids}
documentUuids={documentUuids}
setDocumentUuids={setDocumentUuids}
preCallFetchEnabled={preCallFetchEnabled}
setPreCallFetchEnabled={setPreCallFetchEnabled}
preCallFetchUrl={preCallFetchUrl}
setPreCallFetchUrl={setPreCallFetchUrl}
preCallFetchCredentialUuid={preCallFetchCredentialUuid}
setPreCallFetchCredentialUuid={setPreCallFetchCredentialUuid}
tools={tools ?? []}
documents={documents ?? []}
recordings={recordings ?? []}
@ -278,6 +312,12 @@ const StartCallEditForm = ({
setToolUuids,
documentUuids,
setDocumentUuids,
preCallFetchEnabled,
setPreCallFetchEnabled,
preCallFetchUrl,
setPreCallFetchUrl,
preCallFetchCredentialUuid,
setPreCallFetchCredentialUuid,
tools,
documents,
recordings,
@ -475,6 +515,57 @@ const StartCallEditForm = ({
description="Select documents from the knowledge base that the agent can reference during this conversation step."
/>
</div>
{/* Advanced Settings */}
<div className="pt-4 border-t mt-4">
<Collapsible>
<CollapsibleTrigger className="flex items-center gap-2 w-full text-sm font-medium hover:text-foreground text-muted-foreground">
<Settings className="h-4 w-4" />
<span>Advanced Settings</span>
<ChevronRight className="h-4 w-4 ml-auto transition-transform [[data-state=open]>svg&]:rotate-90" />
</CollapsibleTrigger>
<CollapsibleContent className="mt-4 space-y-4">
{/* Pre-Call Data Fetch */}
<div className="flex items-center space-x-2">
<Switch
id="pre-call-fetch"
checked={preCallFetchEnabled}
onCheckedChange={setPreCallFetchEnabled}
/>
<Label htmlFor="pre-call-fetch">Pre-Call Data Fetch</Label>
</div>
<p className="text-xs text-muted-foreground">
Fetch data from an external API before the call starts. A standardized POST request with caller/called numbers will be sent. The JSON response fields will be merged into the call context and available as template variables in your prompts.{" "}
<a href={PRE_CALL_DATA_FETCH_DOC_URL} target="_blank" rel="noopener noreferrer" className="underline">Learn more</a>
</p>
{preCallFetchEnabled && (
<div className="border rounded-md p-4 space-y-4 bg-muted/20">
<div className="grid gap-2">
<Label>Endpoint URL</Label>
<Label className="text-xs text-muted-foreground">
The URL to send the pre-call data fetch request to.
</Label>
<UrlInput
value={preCallFetchUrl}
onChange={setPreCallFetchUrl}
placeholder="https://api.example.com/customer-lookup"
showValidation
/>
</div>
<div className="grid gap-2">
<Label>Authentication</Label>
<CredentialSelector
value={preCallFetchCredentialUuid}
onChange={setPreCallFetchCredentialUuid}
/>
</div>
</div>
)}
</CollapsibleContent>
</Collapsible>
</div>
</div>
);
};

View file

@ -28,6 +28,10 @@ export type FlowNodeData = {
detect_voicemail?: boolean;
delayed_start?: boolean;
delayed_start_duration?: number;
// Pre-call data fetch (StartCall only)
pre_call_fetch_enabled?: boolean;
pre_call_fetch_url?: string;
pre_call_fetch_credential_uuid?: string;
// Trigger node specific
trigger_path?: string;
// Webhook node specific

View file

@ -12,6 +12,12 @@ export const NODE_DOCUMENTATION_URLS: Record<string, string> = {
export const CONTEXT_VARIABLES_DOC_URL = `${DOCS_BASE}/core-concepts/context-and-variables`;
export const TOOLS_INTRODUCTION_DOC_URL = `${DOCS_BASE}/voice-agent/tools/introduction`;
export const KNOWLEDGE_BASE_DOC_URL = `${DOCS_BASE}/voice-agent/knowledge-base`;
export const PRE_CALL_DATA_FETCH_DOC_URL = `${DOCS_BASE}/voice-agent/pre-call-data-fetch`;
export const TOOL_DOCUMENTATION_URLS: Record<string, string> = {
http_api: `${DOCS_BASE}/voice-agent/tools/http-api`,
end_call: `${DOCS_BASE}/voice-agent/tools/end-call`,