mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-07 07:55:16 +02:00
feat: add AWS Bedrock support
This commit is contained in:
parent
1604e306ec
commit
fe84f086ba
30 changed files with 546 additions and 195 deletions
|
|
@ -206,7 +206,7 @@ class WorkflowClient(BaseDBClient):
|
|||
async def update_workflow(
|
||||
self,
|
||||
workflow_id: int,
|
||||
name: str,
|
||||
name: str | None,
|
||||
workflow_definition: dict | None,
|
||||
template_context_variables: dict | None,
|
||||
workflow_configurations: dict | None,
|
||||
|
|
@ -249,7 +249,8 @@ class WorkflowClient(BaseDBClient):
|
|||
if not workflow:
|
||||
raise ValueError(f"Workflow with ID {workflow_id} not found")
|
||||
|
||||
workflow.name = name
|
||||
if name is not None:
|
||||
workflow.name = name
|
||||
|
||||
if template_context_variables is not None:
|
||||
workflow.template_context_variables = template_context_variables
|
||||
|
|
|
|||
|
|
@ -108,9 +108,7 @@ async def get_mps_credits(user: UserModel = Depends(get_user)):
|
|||
)
|
||||
else:
|
||||
if not user.selected_organization_id:
|
||||
raise HTTPException(
|
||||
status_code=400, detail="No organization selected"
|
||||
)
|
||||
raise HTTPException(status_code=400, detail="No organization selected")
|
||||
usage = await mps_service_key_client.get_usage_by_organization(
|
||||
user.selected_organization_id
|
||||
)
|
||||
|
|
|
|||
|
|
@ -71,10 +71,10 @@ async def get_auth_user(
|
|||
|
||||
|
||||
class UserConfigurationRequestResponseSchema(BaseModel):
|
||||
llm: dict[str, Union[str, float, list[str]]] | None = None
|
||||
tts: dict[str, Union[str, float, list[str]]] | None = None
|
||||
stt: dict[str, Union[str, float, list[str]]] | None = None
|
||||
embeddings: dict[str, Union[str, float, list[str]]] | None = None
|
||||
llm: dict[str, Union[str, float, list[str], None]] | None = None
|
||||
tts: dict[str, Union[str, float, list[str], None]] | None = None
|
||||
stt: dict[str, Union[str, float, list[str], None]] | None = None
|
||||
embeddings: dict[str, Union[str, float, list[str], None]] | None = None
|
||||
test_phone_number: str | None = None
|
||||
timezone: str | None = None
|
||||
organization_pricing: dict[str, Union[float, str, bool]] | None = None
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ class DuplicateTemplateRequest(BaseModel):
|
|||
|
||||
|
||||
class UpdateWorkflowRequest(BaseModel):
|
||||
name: str
|
||||
name: str | None = None
|
||||
workflow_definition: dict | None = None
|
||||
template_context_variables: dict | None = None
|
||||
workflow_configurations: dict | None = None
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ class UserConfigurationValidator:
|
|||
ServiceProviders.DOGRAH.value: self._check_dograh_api_key,
|
||||
ServiceProviders.SARVAM.value: self._check_sarvam_api_key,
|
||||
ServiceProviders.SPEECHMATICS.value: self._check_speechmatics_api_key,
|
||||
ServiceProviders.AWS_BEDROCK.value: self._check_aws_bedrock_api_key,
|
||||
}
|
||||
|
||||
async def validate(self, configuration: UserConfiguration) -> APIKeyStatusResponse:
|
||||
|
|
@ -71,6 +72,21 @@ class UserConfigurationValidator:
|
|||
return [] # Optional service not configured is OK
|
||||
|
||||
provider = service_config.provider
|
||||
|
||||
# AWS Bedrock uses AWS credentials instead of api_key
|
||||
if provider == ServiceProviders.AWS_BEDROCK.value:
|
||||
try:
|
||||
if not self._check_aws_bedrock_api_key(provider, service_config):
|
||||
return [
|
||||
{
|
||||
"model": service_name,
|
||||
"message": f"Invalid {provider} credentials",
|
||||
}
|
||||
]
|
||||
except ValueError as e:
|
||||
return [{"model": service_name, "message": str(e)}]
|
||||
return []
|
||||
|
||||
api_key = service_config.api_key
|
||||
|
||||
try:
|
||||
|
|
@ -143,3 +159,8 @@ class UserConfigurationValidator:
|
|||
|
||||
def _check_speechmatics_api_key(self, model: str, api_key: str) -> bool:
|
||||
return True
|
||||
|
||||
def _check_aws_bedrock_api_key(self, model: str, service_config) -> bool:
|
||||
if not service_config.aws_access_key or not service_config.aws_secret_key:
|
||||
raise ValueError("AWS access key and secret key are required for Bedrock")
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ class ServiceProviders(str, Enum):
|
|||
DOGRAH = "dograh"
|
||||
SARVAM = "sarvam"
|
||||
SPEECHMATICS = "speechmatics"
|
||||
AWS_BEDROCK = "aws_bedrock"
|
||||
|
||||
|
||||
class BaseServiceConfiguration(BaseModel):
|
||||
|
|
@ -37,6 +38,7 @@ class BaseServiceConfiguration(BaseModel):
|
|||
ServiceProviders.GOOGLE,
|
||||
ServiceProviders.AZURE,
|
||||
ServiceProviders.DOGRAH,
|
||||
ServiceProviders.AWS_BEDROCK,
|
||||
# ServiceProviders.SARVAM,
|
||||
]
|
||||
api_key: str | list[str]
|
||||
|
|
@ -44,6 +46,8 @@ class BaseServiceConfiguration(BaseModel):
|
|||
@field_validator("api_key")
|
||||
@classmethod
|
||||
def validate_api_key(cls, v):
|
||||
if v is None:
|
||||
return v
|
||||
if isinstance(v, list) and len(v) == 0:
|
||||
raise ValueError("api_key list must not be empty")
|
||||
return v
|
||||
|
|
@ -51,6 +55,8 @@ class BaseServiceConfiguration(BaseModel):
|
|||
def __getattribute__(self, name: str):
|
||||
if name == "api_key":
|
||||
value = super().__getattribute__(name)
|
||||
if value is None:
|
||||
return value
|
||||
if isinstance(value, list):
|
||||
return random.choice(value)
|
||||
return value
|
||||
|
|
@ -59,6 +65,8 @@ class BaseServiceConfiguration(BaseModel):
|
|||
def get_all_api_keys(self) -> list[str]:
|
||||
"""Get all API keys as a list (bypasses random selection)."""
|
||||
value = super().__getattribute__("api_key")
|
||||
if value is None:
|
||||
return []
|
||||
if isinstance(value, list):
|
||||
return list(value)
|
||||
return [value]
|
||||
|
|
@ -167,6 +175,14 @@ OPENROUTER_MODELS = [
|
|||
]
|
||||
AZURE_MODELS = ["gpt-4.1-mini"]
|
||||
DOGRAH_LLM_MODELS = ["default", "accurate", "fast", "lite", "zen"]
|
||||
AWS_BEDROCK_MODELS = [
|
||||
"us.amazon.nova-pro-v1:0",
|
||||
"us.amazon.nova-lite-v1:0",
|
||||
"us.amazon.nova-micro-v1:0",
|
||||
"us.anthropic.claude-sonnet-4-20250514-v1:0",
|
||||
"us.anthropic.claude-3-5-sonnet-20241022-v2:0",
|
||||
"us.anthropic.claude-haiku-4-5-20251001-v1:0",
|
||||
]
|
||||
|
||||
|
||||
@register_llm
|
||||
|
|
@ -219,6 +235,19 @@ class DograhLLMService(BaseLLMConfiguration):
|
|||
)
|
||||
|
||||
|
||||
@register_llm
|
||||
class AWSBedrockLLMConfiguration(BaseLLMConfiguration):
|
||||
provider: Literal[ServiceProviders.AWS_BEDROCK] = ServiceProviders.AWS_BEDROCK
|
||||
model: str = Field(
|
||||
default="us.amazon.nova-pro-v1:0",
|
||||
json_schema_extra={"examples": AWS_BEDROCK_MODELS},
|
||||
)
|
||||
aws_access_key: str = Field(default="")
|
||||
aws_secret_key: str = Field(default="")
|
||||
aws_region: str = Field(default="us-east-1")
|
||||
api_key: str | list[str] | None = Field(default=None)
|
||||
|
||||
|
||||
LLMConfig = Annotated[
|
||||
Union[
|
||||
OpenAILLMService,
|
||||
|
|
@ -227,6 +256,7 @@ LLMConfig = Annotated[
|
|||
GoogleLLMService,
|
||||
AzureLLMService,
|
||||
DograhLLMService,
|
||||
AWSBedrockLLMConfiguration,
|
||||
],
|
||||
Field(discriminator="provider"),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ from api.services.pipecat.pipeline_metrics_aggregator import PipelineMetricsAggr
|
|||
from api.services.workflow.pipecat_engine import PipecatEngine
|
||||
from api.tasks.arq import enqueue_job
|
||||
from api.tasks.function_names import FunctionNames
|
||||
from pipecat.frames.frames import Frame, LLMContextFrame
|
||||
from pipecat.frames.frames import Frame, LLMContextFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||
from pipecat.utils.enums import EndTaskReason
|
||||
|
|
@ -47,32 +47,44 @@ def register_event_handlers(
|
|||
sample_rate=sample_rate,
|
||||
num_channels=num_channels,
|
||||
)
|
||||
# Track both events to ensure LLM is only triggered after both occur
|
||||
# Track both events to ensure the initial response is only triggered after both occur
|
||||
ready_state = {
|
||||
"pipeline_started": False,
|
||||
"client_connected": False,
|
||||
"llm_triggered": False,
|
||||
"initial_response_triggered": False,
|
||||
}
|
||||
|
||||
async def maybe_trigger_llm():
|
||||
"""Trigger LLM only after both pipeline_started and client_connected events."""
|
||||
async def maybe_trigger_initial_response():
|
||||
"""Start the conversation after both pipeline_started and client_connected events.
|
||||
|
||||
If the start node has a greeting configured, play it directly via TTS.
|
||||
Otherwise, trigger an LLM generation for the opening message.
|
||||
"""
|
||||
if (
|
||||
ready_state["pipeline_started"]
|
||||
and ready_state["client_connected"]
|
||||
and not ready_state["llm_triggered"]
|
||||
and not ready_state["initial_response_triggered"]
|
||||
):
|
||||
ready_state["llm_triggered"] = True
|
||||
logger.debug(
|
||||
"Both pipeline_started and client_connected received - triggering initial LLM generation"
|
||||
)
|
||||
await engine.llm.queue_frame(LLMContextFrame(engine.context))
|
||||
ready_state["initial_response_triggered"] = True
|
||||
|
||||
greeting = engine.get_start_greeting()
|
||||
if greeting:
|
||||
logger.debug(
|
||||
"Both pipeline_started and client_connected received - playing greeting via TTS"
|
||||
)
|
||||
await task.queue_frame(TTSSpeakFrame(greeting))
|
||||
else:
|
||||
logger.debug(
|
||||
"Both pipeline_started and client_connected received - triggering initial LLM generation"
|
||||
)
|
||||
await engine.llm.queue_frame(LLMContextFrame(engine.context))
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(_transport, _participant):
|
||||
logger.debug("In on_client_connected callback handler")
|
||||
await audio_buffer.start_recording()
|
||||
ready_state["client_connected"] = True
|
||||
await maybe_trigger_llm()
|
||||
await maybe_trigger_initial_response()
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(_transport, _participant):
|
||||
|
|
@ -93,7 +105,7 @@ def register_event_handlers(
|
|||
async def on_pipeline_started(_task: PipelineTask, _frame: Frame):
|
||||
logger.debug("In on_pipeline_started callback handler")
|
||||
ready_state["pipeline_started"] = True
|
||||
await maybe_trigger_llm()
|
||||
await maybe_trigger_initial_response()
|
||||
|
||||
@task.event_handler("on_pipeline_error")
|
||||
async def on_pipeline_error(_task: PipelineTask, frame: Frame):
|
||||
|
|
|
|||
|
|
@ -74,9 +74,16 @@ def build_pipeline(
|
|||
if recording_router:
|
||||
post_llm.append(recording_router)
|
||||
|
||||
processors.append(user_context_aggregator)
|
||||
|
||||
# Insert LLM gate before the main LLM when voicemail detection is enabled.
|
||||
# This prevents the main LLM from being triggered until classification
|
||||
# determines whether a human or voicemail answered the call.
|
||||
if voicemail_detector:
|
||||
processors.append(voicemail_detector.llm_gate())
|
||||
|
||||
processors.extend(
|
||||
[
|
||||
user_context_aggregator,
|
||||
llm, # LLM
|
||||
*post_llm,
|
||||
tts, # TTS
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ from pipecat.frames.frames import (
|
|||
MetricsFrame,
|
||||
StopFrame,
|
||||
TranscriptionFrame,
|
||||
TTSSpeakFrame,
|
||||
)
|
||||
from pipecat.metrics.metrics import TTFBMetricsData
|
||||
from pipecat.observers.base_observer import BaseObserver, FramePushed
|
||||
|
|
@ -205,6 +206,17 @@ class RealtimeFeedbackObserver(BaseObserver):
|
|||
},
|
||||
}
|
||||
)
|
||||
# Handle TTSSpeakFrame (e.g. greeting) - send immediately via WS only
|
||||
# Final turn text is persisted via on_assistant_turn_stopped to avoid duplication
|
||||
elif isinstance(frame, TTSSpeakFrame):
|
||||
await self._send_ws(
|
||||
{
|
||||
"type": RealtimeFeedbackType.BOT_TEXT.value,
|
||||
"payload": {
|
||||
"text": frame.text,
|
||||
},
|
||||
}
|
||||
)
|
||||
# Handle bot TTS text - respect pts timing, WebSocket only
|
||||
# Complete turn text is persisted via register_turn_handlers
|
||||
elif isinstance(frame, LLMTextFrame):
|
||||
|
|
|
|||
|
|
@ -173,7 +173,9 @@ async def _download_and_convert(
|
|||
Returns the processed PCM bytes, or None on failure.
|
||||
"""
|
||||
ext = _ext_from_key(recording.storage_key)
|
||||
fd, tmp_path = tempfile.mkstemp(suffix=ext, prefix=f"dograh_dl_{recording.recording_id}_")
|
||||
fd, tmp_path = tempfile.mkstemp(
|
||||
suffix=ext, prefix=f"dograh_dl_{recording.recording_id}_"
|
||||
)
|
||||
os.close(fd)
|
||||
try:
|
||||
storage = get_storage_fn(recording.storage_backend)
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ from api.services.pipecat.recording_audio_cache import (
|
|||
from api.services.pipecat.recording_router_processor import RecordingRouterProcessor
|
||||
from api.services.pipecat.service_factory import (
|
||||
create_llm_service,
|
||||
create_llm_service_from_provider,
|
||||
create_stt_service,
|
||||
create_tts_service,
|
||||
)
|
||||
|
|
@ -669,18 +670,31 @@ async def _run_pipeline(
|
|||
async def on_user_turn_started(aggregator, strategy):
|
||||
user_idle_handler.reset()
|
||||
|
||||
# Create voicemail detector if enabled in the workflow's start node
|
||||
# Create voicemail detector if enabled in workflow configurations
|
||||
voicemail_detector = None
|
||||
start_node = workflow_graph.nodes.get(workflow_graph.start_node_id)
|
||||
if start_node and start_node.detect_voicemail:
|
||||
voicemail_config = (workflow.workflow_configurations or {}).get(
|
||||
"voicemail_detection", {}
|
||||
)
|
||||
if voicemail_config.get("enabled", False):
|
||||
logger.info(f"Voicemail detection enabled for workflow run {workflow_run_id}")
|
||||
# Create a separate LLM instance for the voicemail sub-pipeline
|
||||
# (can't share with main pipeline as it would mess up frame linking)
|
||||
voicemail_llm = create_llm_service(user_config)
|
||||
if voicemail_config.get("use_workflow_llm", True):
|
||||
voicemail_llm = create_llm_service(user_config)
|
||||
else:
|
||||
voicemail_llm = create_llm_service_from_provider(
|
||||
provider=voicemail_config.get("provider", "openai"),
|
||||
model=voicemail_config.get("model", "gpt-4.1"),
|
||||
api_key=voicemail_config.get("api_key", ""),
|
||||
)
|
||||
|
||||
long_speech_timeout = voicemail_config.get("long_speech_timeout", 8.0)
|
||||
custom_system_prompt = voicemail_config.get("system_prompt") or None
|
||||
|
||||
voicemail_detector = VoicemailDetector(
|
||||
llm=voicemail_llm,
|
||||
voicemail_response_delay=1.0,
|
||||
long_speech_timeout=8.0,
|
||||
long_speech_timeout=long_speech_timeout,
|
||||
custom_system_prompt=custom_system_prompt,
|
||||
)
|
||||
|
||||
# Register event handler to end task when voicemail is detected
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ from loguru import logger
|
|||
|
||||
from api.constants import MPS_API_URL
|
||||
from api.services.configuration.registry import ServiceProviders
|
||||
from pipecat.services.aws.llm import AWSBedrockLLMService, AWSBedrockLLMSettings
|
||||
from pipecat.services.azure.llm import AzureLLMService, AzureLLMSettings
|
||||
from pipecat.services.cartesia.stt import CartesiaSTTService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService, CartesiaTTSSettings
|
||||
|
|
@ -268,56 +269,91 @@ def create_tts_service(user_config, audio_config: "AudioConfig"):
|
|||
)
|
||||
|
||||
|
||||
def create_llm_service(user_config):
|
||||
"""Create and return appropriate LLM service based on user configuration"""
|
||||
model = user_config.llm.model
|
||||
logger.info(
|
||||
f"Creating LLM service: provider={user_config.llm.provider}, model={model}"
|
||||
)
|
||||
if user_config.llm.provider == ServiceProviders.OPENAI.value:
|
||||
def create_llm_service_from_provider(
|
||||
provider: str,
|
||||
model: str,
|
||||
api_key: str,
|
||||
*,
|
||||
base_url: str | None = None,
|
||||
endpoint: str | None = None,
|
||||
aws_access_key: str | None = None,
|
||||
aws_secret_key: str | None = None,
|
||||
aws_region: str | None = None,
|
||||
):
|
||||
"""Create an LLM service from explicit provider/model/api_key.
|
||||
|
||||
Also used by create_llm_service which extracts these from user_config.
|
||||
"""
|
||||
logger.info(f"Creating LLM service: provider={provider}, model={model}")
|
||||
if provider == ServiceProviders.OPENAI.value:
|
||||
if "gpt-5" in model:
|
||||
return OpenAILLMService(
|
||||
api_key=user_config.llm.api_key,
|
||||
api_key=api_key,
|
||||
settings=OpenAILLMSettings(
|
||||
model=model,
|
||||
extra={"reasoning_effort": "minimal", "verbosity": "low"},
|
||||
),
|
||||
)
|
||||
else:
|
||||
return OpenAILLMService(
|
||||
api_key=user_config.llm.api_key,
|
||||
settings=OpenAILLMSettings(model=model, temperature=0.1),
|
||||
)
|
||||
elif user_config.llm.provider == ServiceProviders.GROQ.value:
|
||||
print(
|
||||
f"Creating Groq LLM service with API key: {user_config.llm.api_key} and model: {model}"
|
||||
return OpenAILLMService(
|
||||
api_key=api_key,
|
||||
settings=OpenAILLMSettings(model=model, temperature=0.1),
|
||||
)
|
||||
elif provider == ServiceProviders.GROQ.value:
|
||||
return GroqLLMService(
|
||||
api_key=user_config.llm.api_key,
|
||||
api_key=api_key,
|
||||
settings=GroqLLMSettings(model=model, temperature=0.1),
|
||||
)
|
||||
elif user_config.llm.provider == ServiceProviders.OPENROUTER.value:
|
||||
elif provider == ServiceProviders.OPENROUTER.value:
|
||||
kwargs = {}
|
||||
if base_url:
|
||||
kwargs["base_url"] = base_url
|
||||
return OpenRouterLLMService(
|
||||
api_key=user_config.llm.api_key,
|
||||
base_url=user_config.llm.base_url,
|
||||
api_key=api_key,
|
||||
settings=OpenRouterLLMSettings(model=model, temperature=0.1),
|
||||
**kwargs,
|
||||
)
|
||||
elif user_config.llm.provider == ServiceProviders.GOOGLE.value:
|
||||
elif provider == ServiceProviders.GOOGLE.value:
|
||||
return GoogleLLMService(
|
||||
api_key=user_config.llm.api_key,
|
||||
api_key=api_key,
|
||||
settings=GoogleLLMSettings(model=model, temperature=0.1),
|
||||
)
|
||||
elif user_config.llm.provider == ServiceProviders.AZURE.value:
|
||||
elif provider == ServiceProviders.AZURE.value:
|
||||
return AzureLLMService(
|
||||
api_key=user_config.llm.api_key,
|
||||
endpoint=user_config.llm.endpoint,
|
||||
api_key=api_key,
|
||||
endpoint=endpoint,
|
||||
settings=AzureLLMSettings(model=model, temperature=0.1),
|
||||
)
|
||||
elif user_config.llm.provider == ServiceProviders.DOGRAH.value:
|
||||
elif provider == ServiceProviders.DOGRAH.value:
|
||||
return DograhLLMService(
|
||||
base_url=f"{MPS_API_URL}/api/v1/llm",
|
||||
api_key=user_config.llm.api_key,
|
||||
api_key=api_key,
|
||||
settings=OpenAILLMSettings(model=model),
|
||||
)
|
||||
elif provider == ServiceProviders.AWS_BEDROCK.value:
|
||||
return AWSBedrockLLMService(
|
||||
aws_access_key=aws_access_key,
|
||||
aws_secret_key=aws_secret_key,
|
||||
aws_region=aws_region,
|
||||
settings=AWSBedrockLLMSettings(model=model),
|
||||
)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="Invalid LLM provider")
|
||||
raise HTTPException(status_code=400, detail=f"Invalid LLM provider {provider}")
|
||||
|
||||
|
||||
def create_llm_service(user_config):
|
||||
"""Create and return appropriate LLM service based on user configuration."""
|
||||
provider = user_config.llm.provider
|
||||
model = user_config.llm.model
|
||||
api_key = user_config.llm.api_key
|
||||
|
||||
kwargs = {}
|
||||
if provider == ServiceProviders.OPENROUTER.value:
|
||||
kwargs["base_url"] = user_config.llm.base_url
|
||||
elif provider == ServiceProviders.AZURE.value:
|
||||
kwargs["endpoint"] = user_config.llm.endpoint
|
||||
elif provider == ServiceProviders.AWS_BEDROCK.value:
|
||||
kwargs["aws_access_key"] = user_config.llm.aws_access_key
|
||||
kwargs["aws_secret_key"] = user_config.llm.aws_secret_key
|
||||
kwargs["aws_region"] = user_config.llm.aws_region
|
||||
|
||||
return create_llm_service_from_provider(provider, model, api_key, **kwargs)
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ class NodeDataDTO(BaseModel):
|
|||
extraction_prompt: Optional[str] = None
|
||||
extraction_variables: Optional[list[ExtractionVariableDTO]] = None
|
||||
add_global_prompt: bool = True
|
||||
greeting: Optional[str] = None
|
||||
wait_for_user_response: bool = False
|
||||
wait_for_user_response_timeout: Optional[float] = None
|
||||
detect_voicemail: bool = False
|
||||
|
|
|
|||
|
|
@ -554,6 +554,13 @@ class PipecatEngine:
|
|||
# Setup LLM Context with Prompts and Functions
|
||||
await self._setup_llm_context(node)
|
||||
|
||||
def get_start_greeting(self) -> Optional[str]:
|
||||
"""Return the rendered greeting for the start node, or None if not configured."""
|
||||
start_node = self.workflow.nodes.get(self.workflow.start_node_id)
|
||||
if start_node and start_node.greeting:
|
||||
return self._format_prompt(start_node.greeting)
|
||||
return None
|
||||
|
||||
async def _handle_end_node(self, node: Node) -> None:
|
||||
"""Handle end node execution."""
|
||||
if node.is_static:
|
||||
|
|
|
|||
|
|
@ -4,19 +4,16 @@ import json
|
|||
from typing import Any
|
||||
|
||||
from loguru import logger
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
from api.db.models import WorkflowRunModel
|
||||
from api.services.gen_ai.json_parser import parse_llm_json
|
||||
from api.services.pipecat.service_factory import create_llm_service_from_provider
|
||||
from api.services.workflow.qa.conversation import (
|
||||
build_conversation_structure,
|
||||
format_transcript,
|
||||
split_events_by_node,
|
||||
)
|
||||
from api.services.workflow.qa.llm_config import (
|
||||
accumulate_token_usage,
|
||||
resolve_llm_config,
|
||||
)
|
||||
from api.services.workflow.qa.llm_config import resolve_llm_config
|
||||
from api.services.workflow.qa.metrics import compute_call_metrics
|
||||
from api.services.workflow.qa.node_summary import (
|
||||
CONVERSATION_SUMMARY_SYSTEM_PROMPT,
|
||||
|
|
@ -28,15 +25,22 @@ from api.services.workflow.qa.tracing import (
|
|||
setup_langfuse_parent_context,
|
||||
)
|
||||
from api.utils.template_renderer import render_template
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
|
||||
|
||||
async def _run_llm_inference(llm, messages: list[dict]) -> str | None:
|
||||
"""Run a one-shot LLM inference using the pipecat service."""
|
||||
context = LLMContext()
|
||||
context.set_messages(messages)
|
||||
return await llm.run_inference(context)
|
||||
|
||||
|
||||
async def _generate_conversation_summary(
|
||||
client: AsyncOpenAI,
|
||||
llm,
|
||||
model: str,
|
||||
transcript: str,
|
||||
parent_ctx,
|
||||
node_name: str,
|
||||
total_token_usage: dict,
|
||||
) -> str:
|
||||
"""Generate a summary of the conversation so far (before the current node).
|
||||
|
||||
|
|
@ -48,13 +52,7 @@ async def _generate_conversation_summary(
|
|||
]
|
||||
|
||||
try:
|
||||
response = await client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=0,
|
||||
)
|
||||
summary = response.choices[0].message.content or ""
|
||||
accumulate_token_usage(total_token_usage, response)
|
||||
summary = await _run_llm_inference(llm, messages) or ""
|
||||
|
||||
span_name = f"conversation-summary-before-{node_name}"
|
||||
add_qa_span_to_trace(parent_ctx, model, messages, summary, span_name)
|
||||
|
|
@ -82,7 +80,7 @@ async def run_per_node_qa_analysis(
|
|||
Falls back to whole-call QA if events lack node_id.
|
||||
|
||||
Returns:
|
||||
Dict with node_results, token_usage, model
|
||||
Dict with node_results, model
|
||||
"""
|
||||
logs = workflow_run.logs or {}
|
||||
rtf_events = logs.get("realtime_feedback_events", [])
|
||||
|
|
@ -107,7 +105,9 @@ async def run_per_node_qa_analysis(
|
|||
return {"error": "no_system_prompt", "node_results": {}}
|
||||
|
||||
# Resolve LLM config
|
||||
model, api_key, base_url = await resolve_llm_config(qa_node_data, workflow_run)
|
||||
provider, model, api_key, service_kwargs = await resolve_llm_config(
|
||||
qa_node_data, workflow_run
|
||||
)
|
||||
if not api_key:
|
||||
logger.warning(
|
||||
f"No LLM API key configured for QA analysis on run {workflow_run_id}"
|
||||
|
|
@ -122,13 +122,9 @@ async def run_per_node_qa_analysis(
|
|||
# Set up Langfuse tracing
|
||||
parent_ctx = setup_langfuse_parent_context(workflow_run)
|
||||
|
||||
# Build LLM client
|
||||
client_kwargs: dict[str, Any] = {"api_key": api_key}
|
||||
if base_url:
|
||||
client_kwargs["base_url"] = base_url
|
||||
client = AsyncOpenAI(**client_kwargs)
|
||||
# Build LLM service
|
||||
llm = create_llm_service_from_provider(provider, model, api_key, **service_kwargs)
|
||||
|
||||
total_token_usage: dict[str, int] = {}
|
||||
node_results: dict[str, Any] = {}
|
||||
prior_conversation: list[dict] = [] # Running accumulation of all prior nodes
|
||||
|
||||
|
|
@ -150,12 +146,11 @@ async def run_per_node_qa_analysis(
|
|||
if idx > 0 and prior_conversation:
|
||||
prior_transcript = format_transcript(prior_conversation)
|
||||
previous_conversation_summary = await _generate_conversation_summary(
|
||||
client,
|
||||
llm,
|
||||
model,
|
||||
prior_transcript,
|
||||
parent_ctx,
|
||||
node_name,
|
||||
total_token_usage,
|
||||
)
|
||||
|
||||
# Substitute placeholders in the user's system prompt
|
||||
|
|
@ -174,14 +169,7 @@ async def run_per_node_qa_analysis(
|
|||
|
||||
# Call QA LLM
|
||||
try:
|
||||
response = await client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=0,
|
||||
extra_body={"stream": False},
|
||||
)
|
||||
raw_response = response.choices[0].message.content
|
||||
accumulate_token_usage(total_token_usage, response)
|
||||
raw_response = await _run_llm_inference(llm, messages)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"QA LLM call failed for node '{node_name}' on run {workflow_run_id}: {e}"
|
||||
|
|
@ -221,13 +209,10 @@ async def run_per_node_qa_analysis(
|
|||
# Append this node's conversation to running total
|
||||
prior_conversation.extend(node_conversation)
|
||||
|
||||
result: dict[str, Any] = {
|
||||
return {
|
||||
"node_results": node_results,
|
||||
"model": model,
|
||||
}
|
||||
if total_token_usage:
|
||||
result["token_usage"] = total_token_usage
|
||||
return result
|
||||
|
||||
|
||||
async def _run_whole_call_qa_analysis(
|
||||
|
|
@ -262,7 +247,9 @@ async def _run_whole_call_qa_analysis(
|
|||
logger.warning("No system prompt defined for QA Node")
|
||||
return {"error": "no_system_prompt", "node_results": {}}
|
||||
|
||||
model, api_key, base_url = await resolve_llm_config(qa_node_data, workflow_run)
|
||||
provider, model, api_key, service_kwargs = await resolve_llm_config(
|
||||
qa_node_data, workflow_run
|
||||
)
|
||||
|
||||
if not api_key:
|
||||
logger.warning(
|
||||
|
|
@ -284,27 +271,14 @@ async def _run_whole_call_qa_analysis(
|
|||
]
|
||||
|
||||
# Call LLM
|
||||
client_kwargs: dict[str, Any] = {"api_key": api_key}
|
||||
if base_url:
|
||||
client_kwargs["base_url"] = base_url
|
||||
|
||||
client = AsyncOpenAI(**client_kwargs)
|
||||
llm = create_llm_service_from_provider(provider, model, api_key, **service_kwargs)
|
||||
|
||||
try:
|
||||
response = await client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=0,
|
||||
)
|
||||
raw_response = response.choices[0].message.content
|
||||
raw_response = await _run_llm_inference(llm, messages)
|
||||
except Exception as e:
|
||||
logger.error(f"QA LLM call failed for run {workflow_run_id}: {e}")
|
||||
return {"error": str(e), "node_results": {}}
|
||||
|
||||
# Extract token usage
|
||||
token_usage: dict[str, int] = {}
|
||||
accumulate_token_usage(token_usage, response)
|
||||
|
||||
# Parse response
|
||||
node_result: dict[str, Any] = {
|
||||
"node_name": "whole_call",
|
||||
|
|
@ -325,10 +299,7 @@ async def _run_whole_call_qa_analysis(
|
|||
parent_ctx = setup_langfuse_parent_context(workflow_run)
|
||||
add_qa_span_to_trace(parent_ctx, model, messages, raw_response, "qa-analysis")
|
||||
|
||||
result: dict[str, Any] = {
|
||||
return {
|
||||
"node_results": {"whole_call": node_result},
|
||||
"model": model,
|
||||
}
|
||||
if token_usage:
|
||||
result["token_usage"] = token_usage
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -1,63 +1,50 @@
|
|||
"""LLM configuration resolution and token usage accumulation."""
|
||||
|
||||
from api.constants import MPS_API_URL
|
||||
from api.db import db_client
|
||||
from api.db.models import WorkflowRunModel
|
||||
|
||||
|
||||
def _provider_base_url(provider: str | None, endpoint: str = "") -> str | None:
|
||||
"""Return the base URL for a given LLM provider."""
|
||||
if provider == "openrouter":
|
||||
return "https://openrouter.ai/api/v1"
|
||||
if provider == "groq":
|
||||
return "https://api.groq.com/openai/v1"
|
||||
if provider == "google":
|
||||
return "https://generativelanguage.googleapis.com/v1beta/openai/"
|
||||
if provider == "azure":
|
||||
return endpoint or None
|
||||
if provider == "dograh":
|
||||
return f"{MPS_API_URL}/api/v1/llm"
|
||||
return None
|
||||
|
||||
|
||||
async def resolve_llm_config(
|
||||
qa_node_data: dict, workflow_run: WorkflowRunModel
|
||||
) -> tuple[str, str, str | None]:
|
||||
"""Resolve the LLM model, API key, and base URL for QA analysis.
|
||||
) -> tuple[str, str, str, dict]:
|
||||
"""Resolve the LLM provider, model, API key, and extra kwargs for QA analysis.
|
||||
|
||||
If the QA node has its own LLM configuration (qa_use_workflow_llm=False),
|
||||
use those settings directly. Otherwise, fall back to the user's configured LLM.
|
||||
|
||||
Returns:
|
||||
(model, api_key, base_url) tuple
|
||||
(provider, model, api_key, service_kwargs) tuple — service_kwargs can be
|
||||
passed directly to create_llm_service_from_provider as keyword arguments.
|
||||
"""
|
||||
if not qa_node_data.get("qa_use_workflow_llm", True):
|
||||
provider = qa_node_data.get("qa_provider", "openai")
|
||||
kwargs = {}
|
||||
if provider == "azure":
|
||||
kwargs["endpoint"] = qa_node_data.get("qa_endpoint", "")
|
||||
return (
|
||||
provider,
|
||||
qa_node_data.get("qa_model"),
|
||||
qa_node_data.get("qa_api_key"),
|
||||
_provider_base_url(
|
||||
qa_node_data.get("qa_provider"),
|
||||
qa_node_data.get("qa_endpoint", ""),
|
||||
),
|
||||
kwargs,
|
||||
)
|
||||
|
||||
# Fall back to user's configured LLM
|
||||
model, api_key, base_url = await resolve_user_llm_config(workflow_run)
|
||||
provider, model, api_key, kwargs = await resolve_user_llm_config(workflow_run)
|
||||
|
||||
qa_model = qa_node_data.get("qa_model", "default")
|
||||
if qa_model and qa_model != "default":
|
||||
model = qa_model
|
||||
|
||||
return model, api_key, base_url
|
||||
return provider, model, api_key, kwargs
|
||||
|
||||
|
||||
async def resolve_user_llm_config(
|
||||
workflow_run: WorkflowRunModel,
|
||||
) -> tuple[str, str, str | None]:
|
||||
) -> tuple[str, str, str, dict]:
|
||||
"""Resolve the user's configured LLM (from UserConfiguration).
|
||||
|
||||
Returns:
|
||||
(model, api_key, base_url) tuple
|
||||
(provider, model, api_key, service_kwargs) tuple
|
||||
"""
|
||||
user_id = None
|
||||
if workflow_run.workflow and workflow_run.workflow.user:
|
||||
|
|
@ -71,11 +58,14 @@ async def resolve_user_llm_config(
|
|||
provider = llm_config.get("provider", "openai")
|
||||
api_key = llm_config.get("api_key", "")
|
||||
model = llm_config.get("model", "gpt-4.1")
|
||||
base_url = _provider_base_url(provider, llm_config.get("endpoint", ""))
|
||||
if provider == "openrouter" and llm_config.get("base_url"):
|
||||
base_url = llm_config["base_url"]
|
||||
|
||||
return model, api_key, base_url
|
||||
kwargs = {}
|
||||
if provider == "azure":
|
||||
kwargs["endpoint"] = llm_config.get("endpoint", "")
|
||||
elif provider == "openrouter" and llm_config.get("base_url"):
|
||||
kwargs["base_url"] = llm_config["base_url"]
|
||||
|
||||
return provider, model, api_key, kwargs
|
||||
|
||||
|
||||
def accumulate_token_usage(total: dict, response) -> None:
|
||||
|
|
|
|||
|
|
@ -3,13 +3,14 @@
|
|||
from typing import Any
|
||||
|
||||
from loguru import logger
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
from api.db import db_client
|
||||
from api.db.models import WorkflowRunModel
|
||||
from api.services.pipecat.service_factory import create_llm_service_from_provider
|
||||
from api.services.workflow.dto import NodeType
|
||||
from api.services.workflow.qa.llm_config import resolve_llm_config
|
||||
from api.services.workflow.qa.tracing import create_node_summary_trace
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
|
||||
NODE_SUMMARY_SYSTEM_PROMPT = (
|
||||
"You are analyzing a voice AI agent script. This is only a part of a larger script. "
|
||||
|
|
@ -67,15 +68,14 @@ async def ensure_node_summaries(
|
|||
if not nodes_needing_summary:
|
||||
return existing_summaries
|
||||
|
||||
model, api_key, base_url = await resolve_llm_config(qa_node_data, workflow_run)
|
||||
provider, model, api_key, service_kwargs = await resolve_llm_config(
|
||||
qa_node_data, workflow_run
|
||||
)
|
||||
if not api_key:
|
||||
logger.warning("No API key for node summary generation, skipping")
|
||||
return existing_summaries
|
||||
|
||||
client_kwargs: dict[str, Any] = {"api_key": api_key}
|
||||
if base_url:
|
||||
client_kwargs["base_url"] = base_url
|
||||
client = AsyncOpenAI(**client_kwargs)
|
||||
llm = create_llm_service_from_provider(provider, model, api_key, **service_kwargs)
|
||||
|
||||
updated_summaries = dict(existing_summaries)
|
||||
|
||||
|
|
@ -153,12 +153,9 @@ async def ensure_node_summaries(
|
|||
]
|
||||
|
||||
try:
|
||||
response = await client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=0,
|
||||
)
|
||||
summary_text = response.choices[0].message.content or ""
|
||||
context = LLMContext()
|
||||
context.set_messages(messages)
|
||||
summary_text = await llm.run_inference(context) or ""
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to generate summary for node {node_id}: {e}")
|
||||
updated_summaries[node_id] = {"summary": ""}
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ class Node:
|
|||
self.extraction_prompt = data.extraction_prompt
|
||||
self.extraction_variables = data.extraction_variables
|
||||
self.add_global_prompt = data.add_global_prompt
|
||||
self.greeting = data.greeting
|
||||
self.detect_voicemail = data.detect_voicemail
|
||||
self.delayed_start = data.delayed_start
|
||||
self.delayed_start_duration = data.delayed_start_duration
|
||||
|
|
|
|||
|
|
@ -139,7 +139,6 @@ class TestVoicemailDetectorWithUserAggregator:
|
|||
# Create voicemail detector with the classification LLM
|
||||
voicemail_detector = VoicemailDetector(
|
||||
llm=voicemail_llm,
|
||||
voicemail_response_delay=0,
|
||||
)
|
||||
|
||||
# Set up frame counter to track UserStoppedSpeakingFrame in voicemail detector's user aggregator
|
||||
|
|
|
|||
|
|
@ -18,11 +18,11 @@ def generate_transcript_text(events: List[dict]) -> str:
|
|||
event_type == RealtimeFeedbackType.USER_TRANSCRIPTION.value
|
||||
and payload.get("final") is True
|
||||
):
|
||||
timestamp = payload.get("timestamp", "")
|
||||
timestamp = payload.get("timestamp") or event.get("timestamp", "")
|
||||
prefix = f"[{timestamp}] " if timestamp else ""
|
||||
lines.append(f"{prefix}user: {payload.get('text', '')}\n")
|
||||
elif event_type == RealtimeFeedbackType.BOT_TEXT.value:
|
||||
timestamp = payload.get("timestamp", "")
|
||||
timestamp = payload.get("timestamp") or event.get("timestamp", "")
|
||||
prefix = f"[{timestamp}] " if timestamp else ""
|
||||
lines.append(f"{prefix}assistant: {payload.get('text', '')}\n")
|
||||
|
||||
|
|
|
|||
2
pipecat
2
pipecat
|
|
@ -1 +1 @@
|
|||
Subproject commit 9118901168e176fd30c46d9521b85eac3f1d7aae
|
||||
Subproject commit bc87f917cbdd1aeb681a75af0ce0fb8f7b816e12
|
||||
|
|
@ -38,7 +38,6 @@ ui/
|
|||
- Tailwind CSS with shadcn/ui components
|
||||
- Zustand for state management
|
||||
- @xyflow/react for workflow builder
|
||||
- LiveKit for WebRTC voice
|
||||
|
||||
## API Client
|
||||
|
||||
|
|
|
|||
|
|
@ -125,6 +125,7 @@ export default function CampaignsPage() {
|
|||
<Table>
|
||||
<TableHeader>
|
||||
<TableRow>
|
||||
<TableHead>ID</TableHead>
|
||||
<TableHead>Name</TableHead>
|
||||
<TableHead>Workflow</TableHead>
|
||||
<TableHead>State</TableHead>
|
||||
|
|
@ -139,6 +140,7 @@ export default function CampaignsPage() {
|
|||
className="cursor-pointer hover:bg-muted/50"
|
||||
onClick={() => handleRowClick(campaign.id)}
|
||||
>
|
||||
<TableCell>{campaign.id}</TableCell>
|
||||
<TableCell className="font-medium">{campaign.name}</TableCell>
|
||||
<TableCell>{campaign.workflow_name}</TableCell>
|
||||
<TableCell>
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import {
|
|||
Panel,
|
||||
ReactFlow,
|
||||
} from "@xyflow/react";
|
||||
import { BookA, BrushCleaning, Maximize2, Mic, Minus, Plus, Rocket, Settings, Variable } from 'lucide-react';
|
||||
import { BookA, BrushCleaning, Maximize2, Mic, Minus, PhoneOff, Plus, Rocket, Settings, Variable } from 'lucide-react';
|
||||
import React, { useEffect, useMemo, useState } from 'react';
|
||||
|
||||
import { listDocumentsApiV1KnowledgeBaseDocumentsGet, listRecordingsApiV1WorkflowRecordingsGet, listToolsApiV1ToolsGet } from '@/client';
|
||||
|
|
@ -25,6 +25,7 @@ import { EmbedDialog } from './components/EmbedDialog';
|
|||
import { PhoneCallDialog } from './components/PhoneCallDialog';
|
||||
import { RecordingsDialog } from './components/RecordingsDialog';
|
||||
import { TemplateContextVariablesDialog } from './components/TemplateContextVariablesDialog';
|
||||
import { VoicemailDetectionDialog } from './components/VoicemailDetectionDialog';
|
||||
import { WorkflowEditorHeader } from "./components/WorkflowEditorHeader";
|
||||
import { WorkflowProvider } from "./contexts/WorkflowContext";
|
||||
import { useWorkflowState } from "./hooks/useWorkflowState";
|
||||
|
|
@ -69,6 +70,7 @@ function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialT
|
|||
const [isEmbedDialogOpen, setIsEmbedDialogOpen] = useState(false);
|
||||
const [isPhoneCallDialogOpen, setIsPhoneCallDialogOpen] = useState(false);
|
||||
const [isRecordingsDialogOpen, setIsRecordingsDialogOpen] = useState(false);
|
||||
const [isVoicemailDialogOpen, setIsVoicemailDialogOpen] = useState(false);
|
||||
const [documents, setDocuments] = useState<DocumentResponseSchema[] | undefined>(undefined);
|
||||
const [tools, setTools] = useState<ToolResponse[] | undefined>(undefined);
|
||||
const [recordings, setRecordings] = useState<RecordingResponseSchema[]>([]);
|
||||
|
|
@ -283,6 +285,22 @@ function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialT
|
|||
</TooltipContent>
|
||||
</Tooltip>
|
||||
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="icon"
|
||||
onClick={() => setIsVoicemailDialogOpen(true)}
|
||||
className="bg-white shadow-sm hover:shadow-md"
|
||||
>
|
||||
<PhoneOff className="h-4 w-4" />
|
||||
</Button>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent side="left">
|
||||
<p>Voicemail Detection</p>
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<Button
|
||||
|
|
@ -428,6 +446,13 @@ function RenderWorkflow({ initialWorkflowName, workflowId, initialFlow, initialT
|
|||
workflowId={workflowId}
|
||||
onRecordingsChange={setRecordings}
|
||||
/>
|
||||
|
||||
<VoicemailDetectionDialog
|
||||
open={isVoicemailDialogOpen}
|
||||
onOpenChange={setIsVoicemailDialogOpen}
|
||||
workflowConfigurations={workflowConfigurations}
|
||||
onSave={saveWorkflowConfigurations}
|
||||
/>
|
||||
</div>
|
||||
</WorkflowProvider>
|
||||
);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,205 @@
|
|||
import { useEffect, useState } from "react";
|
||||
|
||||
import { LLMConfigSelector } from "@/components/LLMConfigSelector";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import {
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogDescription,
|
||||
DialogFooter,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
} from "@/components/ui/dialog";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { Switch } from "@/components/ui/switch";
|
||||
import { Textarea } from "@/components/ui/textarea";
|
||||
import {
|
||||
DEFAULT_VOICEMAIL_DETECTION_CONFIGURATION,
|
||||
type VoicemailDetectionConfiguration,
|
||||
type WorkflowConfigurations,
|
||||
} from "@/types/workflow-configurations";
|
||||
|
||||
// Must match VoicemailDetector.DEFAULT_SYSTEM_PROMPT in pipecat
|
||||
const DEFAULT_VOICEMAIL_SYSTEM_PROMPT = `You are a voicemail detection classifier for an OUTBOUND calling system. A bot has called a phone number and you need to determine if a human answered or if the call went to voicemail based on the provided text.
|
||||
|
||||
HUMAN ANSWERED - LIVE CONVERSATION (respond "CONVERSATION"):
|
||||
- Personal greetings: "Hello?", "Hi", "Yeah?", "John speaking"
|
||||
- Interactive responses: "Who is this?", "What do you want?", "Can I help you?"
|
||||
- Conversational tone expecting back-and-forth dialogue
|
||||
- Questions directed at the caller: "Hello? Anyone there?"
|
||||
- Informal responses: "Yep", "What's up?", "Speaking"
|
||||
- Natural, spontaneous speech patterns
|
||||
- Immediate acknowledgment of the call
|
||||
|
||||
VOICEMAIL SYSTEM (respond "VOICEMAIL"):
|
||||
- Automated voicemail greetings: "Hi, you've reached [name], please leave a message"
|
||||
- Phone carrier messages: "The number you have dialed is not in service", "Please leave a message", "All circuits are busy"
|
||||
- Professional voicemail: "This is [name], I'm not available right now"
|
||||
- Instructions about leaving messages: "leave a message", "leave your name and number"
|
||||
- References to callback or messaging: "call me back", "I'll get back to you"
|
||||
- Carrier system messages: "mailbox is full", "has not been set up"
|
||||
- Business hours messages: "our office is currently closed"
|
||||
|
||||
Respond with ONLY "CONVERSATION" if a person answered, or "VOICEMAIL" if it's voicemail/recording.`;
|
||||
|
||||
interface VoicemailDetectionDialogProps {
|
||||
open: boolean;
|
||||
onOpenChange: (open: boolean) => void;
|
||||
workflowConfigurations: WorkflowConfigurations;
|
||||
onSave: (configurations: WorkflowConfigurations) => void;
|
||||
}
|
||||
|
||||
export const VoicemailDetectionDialog = ({
|
||||
open,
|
||||
onOpenChange,
|
||||
workflowConfigurations,
|
||||
onSave,
|
||||
}: VoicemailDetectionDialogProps) => {
|
||||
const getConfig = (): VoicemailDetectionConfiguration => ({
|
||||
...DEFAULT_VOICEMAIL_DETECTION_CONFIGURATION,
|
||||
...workflowConfigurations.voicemail_detection,
|
||||
});
|
||||
|
||||
const [enabled, setEnabled] = useState(getConfig().enabled);
|
||||
const [useWorkflowLlm, setUseWorkflowLlm] = useState(getConfig().use_workflow_llm);
|
||||
const [provider, setProvider] = useState(getConfig().provider || "openai");
|
||||
const [model, setModel] = useState(getConfig().model || "gpt-4.1");
|
||||
const [apiKey, setApiKey] = useState(getConfig().api_key || "");
|
||||
const [systemPrompt, setSystemPrompt] = useState(getConfig().system_prompt || DEFAULT_VOICEMAIL_SYSTEM_PROMPT);
|
||||
const [longSpeechTimeout, setLongSpeechTimeout] = useState(getConfig().long_speech_timeout);
|
||||
|
||||
// Sync state from props whenever the dialog opens
|
||||
useEffect(() => {
|
||||
if (open) {
|
||||
const config = {
|
||||
...DEFAULT_VOICEMAIL_DETECTION_CONFIGURATION,
|
||||
...workflowConfigurations.voicemail_detection,
|
||||
};
|
||||
setEnabled(config.enabled);
|
||||
setUseWorkflowLlm(config.use_workflow_llm);
|
||||
setProvider(config.provider || "openai");
|
||||
setModel(config.model || "gpt-4.1");
|
||||
setApiKey(config.api_key || "");
|
||||
setSystemPrompt(config.system_prompt || DEFAULT_VOICEMAIL_SYSTEM_PROMPT);
|
||||
setLongSpeechTimeout(config.long_speech_timeout);
|
||||
}
|
||||
}, [open, workflowConfigurations]);
|
||||
|
||||
const handleOpenChange = (newOpen: boolean) => {
|
||||
onOpenChange(newOpen);
|
||||
};
|
||||
|
||||
const handleSave = () => {
|
||||
const voicemailConfig: VoicemailDetectionConfiguration = {
|
||||
enabled,
|
||||
use_workflow_llm: useWorkflowLlm,
|
||||
provider: useWorkflowLlm ? undefined : provider,
|
||||
model: useWorkflowLlm ? undefined : model,
|
||||
api_key: useWorkflowLlm ? undefined : apiKey,
|
||||
system_prompt: systemPrompt && systemPrompt !== DEFAULT_VOICEMAIL_SYSTEM_PROMPT ? systemPrompt : undefined,
|
||||
long_speech_timeout: longSpeechTimeout,
|
||||
};
|
||||
|
||||
onSave({
|
||||
...workflowConfigurations,
|
||||
voicemail_detection: voicemailConfig,
|
||||
});
|
||||
onOpenChange(false);
|
||||
};
|
||||
|
||||
return (
|
||||
<Dialog open={open} onOpenChange={handleOpenChange}>
|
||||
<DialogContent className="max-w-lg max-h-[80vh] overflow-y-auto">
|
||||
<DialogHeader>
|
||||
<DialogTitle>Voicemail Detection</DialogTitle>
|
||||
<DialogDescription>
|
||||
Configure voicemail detection to automatically detect and end calls
|
||||
when a voicemail system is reached.
|
||||
</DialogDescription>
|
||||
</DialogHeader>
|
||||
|
||||
<div className="space-y-4">
|
||||
<div className="flex items-center space-x-2 p-2 border rounded-md bg-muted/20">
|
||||
<Switch
|
||||
id="voicemail-enabled"
|
||||
checked={enabled}
|
||||
onCheckedChange={setEnabled}
|
||||
/>
|
||||
<Label htmlFor="voicemail-enabled">Enable Voicemail Detection</Label>
|
||||
</div>
|
||||
|
||||
{enabled && (
|
||||
<>
|
||||
{/* LLM Configuration */}
|
||||
<div className="space-y-3">
|
||||
<div className="flex items-center space-x-2 p-2 border rounded-md bg-muted/20">
|
||||
<Switch
|
||||
id="voicemail-use-workflow-llm"
|
||||
checked={useWorkflowLlm}
|
||||
onCheckedChange={setUseWorkflowLlm}
|
||||
/>
|
||||
<Label htmlFor="voicemail-use-workflow-llm">Use Workflow LLM</Label>
|
||||
<Label className="text-xs text-muted-foreground ml-2">
|
||||
Use the LLM configured in your account settings.
|
||||
</Label>
|
||||
</div>
|
||||
|
||||
{!useWorkflowLlm && (
|
||||
<LLMConfigSelector
|
||||
provider={provider}
|
||||
onProviderChange={setProvider}
|
||||
model={model}
|
||||
onModelChange={setModel}
|
||||
apiKey={apiKey}
|
||||
onApiKeyChange={setApiKey}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* System Prompt */}
|
||||
<div className="grid gap-2">
|
||||
<Label>System Prompt</Label>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Prompt for voicemail classification.
|
||||
The LLM must respond with either "CONVERSATION" or "VOICEMAIL".
|
||||
</Label>
|
||||
<Textarea
|
||||
value={systemPrompt}
|
||||
onChange={(e) => setSystemPrompt(e.target.value)}
|
||||
className="min-h-[200px] font-mono text-xs"
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Timing Configuration */}
|
||||
<div className="grid gap-4 p-3 border rounded-md bg-muted/10">
|
||||
<Label className="font-medium">Timing</Label>
|
||||
<div className="space-y-2">
|
||||
<Label className="text-sm">Speech Cutoff (seconds)</Label>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Trigger classification early if first turn speech exceeds this duration.
|
||||
</Label>
|
||||
<Input
|
||||
type="number"
|
||||
step="0.5"
|
||||
min="1"
|
||||
max="30"
|
||||
value={longSpeechTimeout}
|
||||
onChange={(e) => setLongSpeechTimeout(parseFloat(e.target.value) || 8.0)}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<DialogFooter>
|
||||
<Button variant="outline" onClick={() => onOpenChange(false)}>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button onClick={handleSave}>Save</Button>
|
||||
</DialogFooter>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
|
|
@ -1242,7 +1242,10 @@ export const getCurrentPeriodUsageApiV1OrganizationsUsageCurrentPeriodGet = <Thr
|
|||
|
||||
/**
|
||||
* Get Mps Credits
|
||||
* Get usage and quota from MPS for the user's configured Dograh service keys.
|
||||
* Get aggregated usage and quota from MPS.
|
||||
*
|
||||
* OSS users: queries by provider_id (created_by).
|
||||
* Hosted users: queries by organization_id.
|
||||
*/
|
||||
export const getMpsCreditsApiV1OrganizationsUsageMpsCreditsGet = <ThrowOnError extends boolean = false>(options?: Options<GetMpsCreditsApiV1OrganizationsUsageMpsCreditsGetData, ThrowOnError>) => {
|
||||
return (options?.client ?? _heyApiClient).get<GetMpsCreditsApiV1OrganizationsUsageMpsCreditsGetResponse, GetMpsCreditsApiV1OrganizationsUsageMpsCreditsGetError, ThrowOnError>({
|
||||
|
|
|
|||
|
|
@ -1238,7 +1238,7 @@ export type UpdateToolRequest = {
|
|||
};
|
||||
|
||||
export type UpdateWorkflowRequest = {
|
||||
name: string;
|
||||
name?: string | null;
|
||||
workflow_definition?: {
|
||||
[key: string]: unknown;
|
||||
} | null;
|
||||
|
|
@ -1266,16 +1266,16 @@ export type UsageHistoryResponse = {
|
|||
|
||||
export type UserConfigurationRequestResponseSchema = {
|
||||
llm?: {
|
||||
[key: string]: string | number | Array<string>;
|
||||
[key: string]: string | number | Array<string> | null;
|
||||
} | null;
|
||||
tts?: {
|
||||
[key: string]: string | number | Array<string>;
|
||||
[key: string]: string | number | Array<string> | null;
|
||||
} | null;
|
||||
stt?: {
|
||||
[key: string]: string | number | Array<string>;
|
||||
[key: string]: string | number | Array<string> | null;
|
||||
} | null;
|
||||
embeddings?: {
|
||||
[key: string]: string | number | Array<string>;
|
||||
[key: string]: string | number | Array<string> | null;
|
||||
} | null;
|
||||
test_phone_number?: string | null;
|
||||
timezone?: string | null;
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@ import { useNodeHandlers } from "./common/useNodeHandlers";
|
|||
|
||||
interface StartCallEditFormProps {
|
||||
nodeData: FlowNodeData;
|
||||
greeting: string;
|
||||
setGreeting: (value: string) => void;
|
||||
prompt: string;
|
||||
setPrompt: (value: string) => void;
|
||||
name: string;
|
||||
|
|
@ -32,8 +34,6 @@ interface StartCallEditFormProps {
|
|||
setAllowInterrupt: (value: boolean) => void;
|
||||
addGlobalPrompt: boolean;
|
||||
setAddGlobalPrompt: (value: boolean) => void;
|
||||
detectVoicemail: boolean;
|
||||
setDetectVoicemail: (value: boolean) => void;
|
||||
delayedStart: boolean;
|
||||
setDelayedStart: (value: boolean) => void;
|
||||
delayedStartDuration: number;
|
||||
|
|
@ -65,11 +65,11 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
const { saveWorkflow, tools, documents, recordings } = useWorkflow();
|
||||
|
||||
// Form state
|
||||
const [greeting, setGreeting] = useState(data.greeting ?? "");
|
||||
const [prompt, setPrompt] = useState(data.prompt ?? "");
|
||||
const [name, setName] = useState(data.name);
|
||||
const [allowInterrupt, setAllowInterrupt] = useState(data.allow_interrupt ?? true);
|
||||
const [addGlobalPrompt, setAddGlobalPrompt] = useState(data.add_global_prompt ?? true);
|
||||
const [detectVoicemail, setDetectVoicemail] = useState(data.detect_voicemail ?? false);
|
||||
const [delayedStart, setDelayedStart] = useState(data.delayed_start ?? false);
|
||||
const [delayedStartDuration, setDelayedStartDuration] = useState(data.delayed_start_duration ?? 2);
|
||||
const [extractionEnabled, setExtractionEnabled] = useState(data.extraction_enabled ?? false);
|
||||
|
|
@ -78,22 +78,23 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
const [toolUuids, setToolUuids] = useState<string[]>(data.tool_uuids ?? []);
|
||||
const [documentUuids, setDocumentUuids] = useState<string[]>(data.document_uuids ?? []);
|
||||
|
||||
// Compute if form has unsaved changes (only check prompt, name)
|
||||
// Compute if form has unsaved changes (only check prompt, name, greeting)
|
||||
const isDirty = useMemo(() => {
|
||||
return (
|
||||
greeting !== (data.greeting ?? "") ||
|
||||
prompt !== (data.prompt ?? "") ||
|
||||
name !== (data.name ?? "")
|
||||
);
|
||||
}, [prompt, name, data]);
|
||||
}, [greeting, prompt, name, data]);
|
||||
|
||||
const handleSave = async () => {
|
||||
handleSaveNodeData({
|
||||
...data,
|
||||
greeting: greeting || undefined,
|
||||
prompt,
|
||||
name,
|
||||
allow_interrupt: allowInterrupt,
|
||||
add_global_prompt: addGlobalPrompt,
|
||||
detect_voicemail: detectVoicemail,
|
||||
delayed_start: delayedStart,
|
||||
delayed_start_duration: delayedStart ? delayedStartDuration : undefined,
|
||||
extraction_enabled: extractionEnabled,
|
||||
|
|
@ -112,11 +113,11 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
// Reset form state when dialog opens
|
||||
const handleOpenChange = (newOpen: boolean) => {
|
||||
if (newOpen) {
|
||||
setGreeting(data.greeting ?? "");
|
||||
setPrompt(data.prompt ?? "");
|
||||
setName(data.name);
|
||||
setAllowInterrupt(data.allow_interrupt ?? true);
|
||||
setAddGlobalPrompt(data.add_global_prompt ?? true);
|
||||
setDetectVoicemail(data.detect_voicemail ?? false);
|
||||
setDelayedStart(data.delayed_start ?? false);
|
||||
setDelayedStartDuration(data.delayed_start_duration ?? 3);
|
||||
setExtractionEnabled(data.extraction_enabled ?? false);
|
||||
|
|
@ -131,11 +132,11 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
// Update form state when data changes (e.g., from undo/redo)
|
||||
useEffect(() => {
|
||||
if (open) {
|
||||
setGreeting(data.greeting ?? "");
|
||||
setPrompt(data.prompt ?? "");
|
||||
setName(data.name);
|
||||
setAllowInterrupt(data.allow_interrupt ?? true);
|
||||
setAddGlobalPrompt(data.add_global_prompt ?? true);
|
||||
setDetectVoicemail(data.detect_voicemail ?? false);
|
||||
setDelayedStart(data.delayed_start ?? false);
|
||||
setDelayedStartDuration(data.delayed_start_duration ?? 3);
|
||||
setExtractionEnabled(data.extraction_enabled ?? false);
|
||||
|
|
@ -225,6 +226,8 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
{open && (
|
||||
<StartCallEditForm
|
||||
nodeData={data}
|
||||
greeting={greeting}
|
||||
setGreeting={setGreeting}
|
||||
prompt={prompt}
|
||||
setPrompt={setPrompt}
|
||||
name={name}
|
||||
|
|
@ -233,8 +236,6 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
setAllowInterrupt={setAllowInterrupt}
|
||||
addGlobalPrompt={addGlobalPrompt}
|
||||
setAddGlobalPrompt={setAddGlobalPrompt}
|
||||
detectVoicemail={detectVoicemail}
|
||||
setDetectVoicemail={setDetectVoicemail}
|
||||
delayedStart={delayedStart}
|
||||
setDelayedStart={setDelayedStart}
|
||||
delayedStartDuration={delayedStartDuration}
|
||||
|
|
@ -260,6 +261,8 @@ export const StartCall = memo(({ data, selected, id }: StartCallNodeProps) => {
|
|||
});
|
||||
|
||||
const StartCallEditForm = ({
|
||||
greeting,
|
||||
setGreeting,
|
||||
prompt,
|
||||
setPrompt,
|
||||
name,
|
||||
|
|
@ -268,8 +271,6 @@ const StartCallEditForm = ({
|
|||
setAllowInterrupt,
|
||||
addGlobalPrompt,
|
||||
setAddGlobalPrompt,
|
||||
detectVoicemail,
|
||||
setDetectVoicemail,
|
||||
delayedStart,
|
||||
setDelayedStart,
|
||||
delayedStartDuration,
|
||||
|
|
@ -326,6 +327,18 @@ const StartCallEditForm = ({
|
|||
onChange={(e) => setName(e.target.value)}
|
||||
/>
|
||||
|
||||
<Label>Greeting</Label>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Optional greeting message played via TTS when the call starts. If set, this will be spoken directly instead of generating a response from the LLM. Supports template variables like {"{{variable_name}}"}.
|
||||
</Label>
|
||||
<MentionTextarea
|
||||
value={greeting}
|
||||
onChange={setGreeting}
|
||||
className="min-h-[60px] max-h-[200px] resize-none overflow-y-auto"
|
||||
placeholder="e.g. Hello {{first_name}}, this is Sarah calling from Acme Corp."
|
||||
recordings={recordings}
|
||||
/>
|
||||
|
||||
<Label>Prompt</Label>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Enter the prompt for the agent. This will be used to generate the agent's response. Prompt engineering's best practices apply.
|
||||
|
|
@ -354,19 +367,6 @@ const StartCallEditForm = ({
|
|||
Add Global Prompt
|
||||
</Label>
|
||||
</div>
|
||||
<div className="flex items-center space-x-2">
|
||||
<Switch
|
||||
id="detect-voicemail"
|
||||
checked={detectVoicemail}
|
||||
onCheckedChange={setDetectVoicemail}
|
||||
/>
|
||||
<Label htmlFor="detect-voicemail">
|
||||
Detect Voicemail
|
||||
</Label>
|
||||
<Label className="text-xs text-muted-foreground">
|
||||
Automatically detect and end call if voicemail is reached.
|
||||
</Label>
|
||||
</div>
|
||||
<div className="flex flex-col space-y-2">
|
||||
<div className="flex items-center space-x-2">
|
||||
<Switch
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ export type FlowNodeData = {
|
|||
extraction_prompt?: string;
|
||||
extraction_variables?: ExtractionVariable[];
|
||||
add_global_prompt?: boolean;
|
||||
greeting?: string;
|
||||
wait_for_user_greeting?: boolean;
|
||||
detect_voicemail?: boolean;
|
||||
delayed_start?: boolean;
|
||||
|
|
|
|||
|
|
@ -12,6 +12,22 @@ export interface AmbientNoiseConfiguration {
|
|||
|
||||
export type TurnStopStrategy = 'transcription' | 'turn_analyzer';
|
||||
|
||||
export interface VoicemailDetectionConfiguration {
|
||||
enabled: boolean;
|
||||
use_workflow_llm: boolean;
|
||||
provider?: string;
|
||||
model?: string;
|
||||
api_key?: string;
|
||||
system_prompt?: string;
|
||||
long_speech_timeout: number; // seconds cutoff for long speech detection
|
||||
}
|
||||
|
||||
export const DEFAULT_VOICEMAIL_DETECTION_CONFIGURATION: VoicemailDetectionConfiguration = {
|
||||
enabled: false,
|
||||
use_workflow_llm: true,
|
||||
long_speech_timeout: 8.0,
|
||||
};
|
||||
|
||||
export interface WorkflowConfigurations {
|
||||
vad_configuration?: VADConfiguration;
|
||||
ambient_noise_configuration: AmbientNoiseConfiguration;
|
||||
|
|
@ -20,6 +36,7 @@ export interface WorkflowConfigurations {
|
|||
smart_turn_stop_secs: number; // Timeout in seconds for incomplete turn detection
|
||||
turn_stop_strategy: TurnStopStrategy; // Strategy for detecting end of user turn
|
||||
dictionary?: string; // Comma-separated words for voice agent to listen for
|
||||
voicemail_detection?: VoicemailDetectionConfiguration;
|
||||
[key: string]: unknown; // Allow additional properties for future configurations
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue